diff --git a/.NET/Build.CI.cmd b/.NET/Build.CI.cmd index 637fc43fbb..74fcdb4936 100644 --- a/.NET/Build.CI.cmd +++ b/.NET/Build.CI.cmd @@ -3,6 +3,10 @@ ECHO ==============================.NET BUILD START============================= @ECHO off SETLOCAL EnableDelayedExpansion +ECHO. +ECHO # Setting encoding to UTF-8 +chcp 65001 + ECHO. ECHO # Building .NET platform REM vswhere is an optional component for Visual Studio and also installed with Build Tools. @@ -15,17 +19,30 @@ for /f "usebackq tokens=*" %%i in (`!vswhere! -latest -products * -requires Micr ) ECHO. -SET MsBuildVersion=15.0 -ECHO # Finding MSBuild !MsBuildVersion! +ECHO # Finding MSBuild + +SET MsBuildVersion=Current +ECHO # Trying !MsBuildVersion! for VS2019 if EXIST "%VSInstallDir%\MSBuild\!MsBuildVersion!\Bin\MSBuild.exe" ( - SET MSBuild="%VSInstallDir%\MSBuild\15.0\Bin\MSBuild.exe" %* - ECHO Found MSBuild !MSBuild! + SET MSBuild="%VSInstallDir%\MSBuild\!MsBuildVersion!\Bin\MSBuild.exe" %* ) else ( - ECHO "msbuild.exe" could not be found at "!VSInstallDir!" - EXIT /B + ECHO MSBuild !MsBuildVersion! not found! + ECHO. + + SET MsBuildVersion=15.0 + ECHO # Trying !MsBuildVersion! for VS2017 + + if EXIST "%VSInstallDir%\MSBuild\!MsBuildVersion!\Bin\MSBuild.exe" ( + SET MSBuild="%VSInstallDir%\MSBuild\!MsBuildVersion!\Bin\MSBuild.exe" %* + ) else ( + ECHO "msbuild.exe" could not be found at "!VSInstallDir!" + EXIT /B + ) ) +ECHO Found MSBuild !MSBuild! + ECHO. ECHO # Check for empty and duplicate inputs in Specs Powershell -ExecutionPolicy Bypass "& {buildtools\checkSpec.ps1; exit $LastExitCode }" @@ -36,7 +53,7 @@ IF %ERRORLEVEL% NEQ 0 ( ECHO. ECHO # Restoring NuGet dependencies -CALL "buildtools\nuget" restore +CALL !MSBuild! Microsoft.Recognizers.Text.sln /t:Restore set configuration=Release ECHO. @@ -50,4 +67,4 @@ IF %ERRORLEVEL% NEQ 0 ( EXIT /b %ERRORLEVEL% ) -ECHO ============================== .NET BUILD END ============================== \ No newline at end of file +ECHO ============================== .NET BUILD END ============================== diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/ChoiceDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/ChoiceDefinitions.cs index 65da4ce470..1820d3c380 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/ChoiceDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/ChoiceDefinitions.cs @@ -23,7 +23,8 @@ public static class ChoiceDefinitions { public const string LangMarker = @"Ara"; public const string TokenizerRegex = @"[^\u0621-\u064A0-9]"; - public const string TrueRegex = @"\b(صحيح|نعم|حسنا|موافق|متأكد|بالتأكيد|أتفق|اتفق|بالطبع|هيا)\b|(\uD83D\uDC4D|\uD83D\uDC4C|\u0001f44c)"; - public const string FalseRegex = @"\b(لست متاحا|لا أستطيع|ليس تماما|أختلف|نختلف|اوافق|لست أوافق|لا|ليس صحيح|ليس|ليست|غير موافق)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90|\u0001F44E|\u0001F590)"; + public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)"; + public static readonly string TrueRegex = $@"\b(صحيح|نعم|حسنا|موافق|متأكد|بالتأكيد|أتفق|اتفق|بالطبع|هيا)\b|(\uD83D\uDC4D|\uD83D\uDC4C|\u0001f44c){SkinToneRegex}?"; + public static readonly string FalseRegex = $@"\b(لست متاحا|لا أستطيع|ليس تماما|أختلف|نختلف|اوافق|لست أوافق|لا|ليس صحيح|ليس|ليست|غير موافق)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90|\u0001F44E|\u0001F590){SkinToneRegex}?"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/DateTimeDefinitions.cs new file mode 100644 index 0000000000..205f662641 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/DateTimeDefinitions.cs @@ -0,0 +1,946 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Arabic\Arabic-DateTime.yaml +// - Language: Arabic +// - ClassName: DateTimeDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Arabic +{ + using System; + using System.Collections.Generic; + + public static class DateTimeDefinitions + { + public const string LangMarker = @"Ara"; + public const bool CheckBothBeforeAfter = false; + public static readonly string TillRegex = $@"(?\b(إلى|حتى يوم|حتى|خلال|عبر)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; + public static readonly string RangeConnectorRegex = $@"(?و|خلال|عبر|{BaseDateTime.RangeConnectorSymbolRegex})"; + public const string LastNegPrefix = @"(?القادم|التالي|الآتي|الحالي|الماضي|المقبل|الحاضر|السابق|الأخير)\b"; + public static readonly string StrictRelativeRegex = $@"\b(?القادم|التالي|الآتي|هذا|الحالي|الماضي|السابق|الأخير)\b"; + public const string UpcomingPrefixRegex = @"((هذه\s+)?(المقبل(ة)?))"; + public static readonly string NextPrefixRegex = $@"\b(بعد|القادم(ة)?|{UpcomingPrefixRegex})\b"; + public const string AfterNextSuffixRegex = @"\b(after\s+(the\s+)?next)\b"; + public const string PastPrefixRegex = @"((this\s+)?past)\b"; + public static readonly string PreviousPrefixRegex = $@"(الماضي(ة)?|السابق(ة)?)\b"; + public const string ThisPrefixRegex = @"(هذه|الحالي(ة)?)\b"; + public const string RangePrefixRegex = @"(من|بين)"; + public const string CenturySuffixRegex = @"(^century)\b"; + public const string ReferencePrefixRegex = @"(ذلك|نفس|هذا)\b"; + public const string FutureSuffixRegex = @"\b(الحالي(ة)|القادم(ة)|في المستقبل|التالي(ة)|الآتي(ة)|المقبلين|المقبل(ة))\b"; + public const string PastSuffixRegex = @"^\b$"; + public const string DayRegex = @"(?(?:3[0-1]|[1-2]\d|0?[1-9]))"; + public const string ImplicitDayRegex = @"(the\s*)?(?(?:3[0-1]|[0-2]?\d)(?:th|nd|rd|st))\b"; + public const string MonthNumRegex = @"(?1[0-2]|(0)?[1-9])\b"; + public const string WrittenOneToNineRegex = @"(?:واحد|اثنان|ثلاثة|أربعة|خمسة|ستة|سبعة|ثمانية|تسعة)"; + public const string WrittenElevenToNineteenRegex = @"(إحدى عشر|إثنى عشر|ثلاثة عشر|أربعة عشر|خمسة عشر|ستة عشر|سبعة عشر|ثمانية عشر|تسعة عشر)"; + public const string WrittenTensRegex = @"(عشر[وي]ن|ثلاث[وي]ن|أربع[وي]ن|خمس[وي]ن|ست[وي]ن|سبع[وي]ن|ثمان[وي]ن|تسع[وي]ن)"; + public static readonly string WrittenNumRegex = $@"(?:{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s+{WrittenOneToNineRegex})?)"; + public static readonly string WrittenCenturyFullYearRegex = $@"(?(واحد|اثنان\s*)?((,\s+|،\s+)?(الألفين|ألفين|ألفان|ألف))(\s+و)?(\s*(ثلاث|أربع|خمس|ست|سبع|ثمان|تسع)\s*(مائة|مئتان)(\s+و)?)?)(?({WrittenElevenToNineteenRegex})|(({WrittenOneToNineRegex})?(\s+و\s*)?)({WrittenTensRegex})?)?"; + public static readonly string WrittenCenturyOrdinalYearRegex = $@"(?({WrittenElevenToNineteenRegex}|مائة|مائتين)\s+((و)\s*)?({WrittenOneToNineRegex})\s+(و)\s*{WrittenTensRegex})"; + public static readonly string CenturyRegex = $@"\b(?{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}(\s*مائة)?(\s*و)?)\b"; + public static readonly string LastTwoYearNumRegex = $@"(?:zero\s+{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s+{WrittenOneToNineRegex})?)"; + public static readonly string FullTextYearRegex = $@"(?{CenturyRegex})\s*(?{LastTwoYearNumRegex})|(?{WrittenCenturyFullYearRegex})|{WrittenCenturyOrdinalYearRegex}"; + public const string OclockRegex = @"(?(ال)?ساعة|(ال)?ساعات)"; + public const string SpecialDescRegex = @"((?)p\b)"; + public static readonly string AmDescRegex = $@"(في\s)?(صباح(ا)?|صباحًا|الصباح|{BaseDateTime.BaseAmDescRegex})"; + public static readonly string PmDescRegex = $@"(في\s)?((ال)?مساء|مساءً|ليلًا|ليلا|(ال)?ليل(ة)?|بعد الظهر|الظهر|ظهرا|{BaseDateTime.BasePmDescRegex})"; + public static readonly string AmPmDescRegex = $@"(في\s)?(صباح(ا)?|صباحًا|الصباح|(ال)?مساء|مساءً|{BaseDateTime.BaseAmPmDescRegex})"; + public static readonly string DescRegex = $@"(:?(:?({OclockRegex}\s+)?(?({AmPmDescRegex}|{AmDescRegex}|{PmDescRegex}|{SpecialDescRegex})))|{OclockRegex})"; + public const string OfPrepositionRegex = @"(\bof\b)"; + public static readonly string TwoDigitYearRegex = $@"\b(?([0-9]\d))(?!(\s*((\:\d)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b"; + public static readonly string YearRegex = $@"(?:{BaseDateTime.FourDigitYearRegex}|{FullTextYearRegex})"; + public const string WeekDayRegex = @"(?(?=يوم\s+)?(الأحد|الإثنين|الاثنين|الثلاثاء|الأربعاء|الخميس|الجمعة|السبت|أحد|إثنين|ثلاثاء|أربعاء|خميس|جمعة|سبت))"; + public const string SingleWeekDayRegex = @"(?(?=يوم\s+)?(الأحد|الإثنين|الاثنين|الثلاثاء|الأربعاء|الخميس|الجمعة|السبت|أحد|إثنين|ثلاثاء|أربعاء|خميس|جمعة|سبت))"; + public const string NextRegex = @"(\s+)?(الآتي|الأخير|التالي|القادم|من الآن|الحالي|المقبل|الحاضر)"; + public static readonly string RelativeMonthRegex = $@"(?(من\s+)?(هذا\s+)?(الشهر|شهر)(\s+)?({NextRegex})?)"; + public const string WrittenMonthRegex = @"(((the\s+)?month of\s+)?(?apr(il)?|aug(ust)?|dec(ember)?|feb(ruary)?|jan(uary)?|july?|june?|mar(ch)?|may|nov(ember)?|oct(ober)?|sept(ember)?|sept?))"; + public static readonly string MonthSuffixRegex = $@"(?(?:(in|of|on)\s+)?({RelativeMonthRegex}|{WrittenMonthRegex}))"; + public const string DateUnitRegex = @"((?(((ال)?(يوم(ا)?|أسبوع(ا)?|شهر(ا)?|سنة|عام(ا)?|قرن|حقبة))|نهاية الأسبوع))|(?((ال)?(يومان|أسبوعان|شهران|سنتان|عامان|قرنان|حقبتان|يومين|أسبوعين|شهرين|سنتين|عامين|قرنين|حقبتين|يومان|أسبوعان|شهران|سنتان|عامان|قرنان|حقبتان|أيام|أسابيع|أشهر|سنوات|أعوام|حقبات|قرون|سنين|شهور)))|((?<=\s+\d{1,4})[ymwd]))\b"; + public const string DateTokenPrefix = @"في "; + public const string TimeTokenPrefix = @"عند "; + public const string TokenBeforeDate = @"في "; + public const string TokenBeforeTime = @"عند "; + public const string HalfTokenRegex = @"^(النصف|نصف|والنصف|ونصف)"; + public const string QuarterTokenRegex = @"^(ربع|الربع|وربع|والربع|إلا ربع|إلا الربع)"; + public const string ThreeQuarterTokenRegex = @"^(وثلاثة أرباع|ثلاثة أرباع|إلا الربع)"; + public const string ToTokenRegex = @"\b(إلا)$"; + public const string ToHalfTokenRegex = @"\b(إلا\s+(النصف|نصف))$"; + public const string ForHalfTokenRegex = @"\b(ل(s+)?(نصف))$"; + public const string FromRegex = @"\b(from(\s+the)?)$"; + public const string BetweenTokenRegex = @"\b(between(\s+the)?)$"; + public const string OrdinalNumberRegex = @"((ال)?حادي عشر|ل(ال)?ثاني عشر|(ال)?ثالث عشر|(ال)?رابع عشر|(ال)?خامس عشر|(ال)?خمسة عشر|(ال)?سادس عشر|(ال)?سابع عشر|(ال)?ثامن عشر|(ال)?تاسع عشر|(ال)?عشرون|(ال)?عشرين|(ال)?حادي والعشرون|(ال)?حادية والعشرين|(ال)?حادي والعشرين|(ال)?ثاني والعشرون|(ال)?ثانية والعشرين|(ال)?ثالث والعشرون|(ال)?رابع والعشرون|(ال)?خامس والعشرون|(ال)?سادس والعشرون|(ال)?تاسع والعشرون|(ال)?سابع والعشرون|(ال)?رابع والعشرون|الثامن|الأول|الثالث|الرابع|الخامس|السادس|الثاني|العاشر|السابع)"; + public static readonly string SimpleCasePreMonthRegex = $@"((بين|من)\s+)(({DayRegex}-{DayRegex})\s+)((من|في)\s+)?((الشهر|{SolarMonthRegex}|{LunarMonthRegex}|{ArabicMonthRegex})\s+)({RelativeRegex})?({YearRegex})?"; + public static readonly string SimpleCasesRegex = $@"(((من)\s+)?(({DayRegex}|{OrdinalNumberRegex})\s+)((الشهر|{SolarMonthRegex}|{LunarMonthRegex}|{ArabicMonthRegex})\s+)?((حتى|إلى)\s*)(({DayRegex}|{OrdinalNumberRegex})\s+)((من هذا|من|هذا|في)\s+)?(الشهر|{SolarMonthRegex}|{LunarMonthRegex}|{ArabicMonthRegex})?(\s+({RelativeRegex}))?(\s+{YearRegex})?)|({SimpleCasePreMonthRegex})"; + public static readonly string MonthFrontSimpleCasesRegex = $@"(((شهر\s+)?{SolarMonthRegex}|{LunarMonthRegex}|{ArabicMonthRegex})\s+(بين|من)\s+({DayRegex}|{OrdinalNumberRegex})\s+[و]\s*({DayRegex}|{OrdinalNumberRegex}))|({DayRegex}\s*[-\./]\s*{DayRegex}\s+{SolarMonthRegex}|{LunarMonthRegex}|{ArabicMonthRegex})"; + public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+(between\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; + public static readonly string BetweenRegex = $@"((بين|من)\s+)(({DayRegex}|{OrdinalNumberRegex})\s*)((الشهر|{SolarMonthRegex}|{LunarMonthRegex}|{ArabicMonthRegex})\s+)?((حتى|إلى|و|-)\s*)(({DayRegex}|{OrdinalNumberRegex})\s+)((من هذا|من|هذا|في)\s+)?(الشهر|{SolarMonthRegex}|{LunarMonthRegex}|{ArabicMonthRegex})?(\s*([,،-])\s*)?(\s*({RelativeRegex}))?(\s+{YearRegex})?"; + public static readonly string MonthWithYear = $@"((هذا\s+)?(شهر\s+)?({SolarMonthRegex}|{LunarMonthRegex}|{ArabicMonthRegex})[\.]?(\s*)[/\\\-\.,]?(\s*(من عام|من|في|عام))?(\s*)({YearRegex}))|(({SolarMonthRegex}|{LunarMonthRegex}|{ArabicMonthRegex})\s+(هذا\s+)?((عام|العام)\s+)?({RelativeRegex})?)"; + public const string SpecialYearPrefixes = @"(التقويمي(ة)?|(?المالي(ة)?|الدراسي(ة)?))"; + public static readonly string OneWordPeriodRegex = $@"((بعد|في|آخر)\s+(\d+\s+)?((ال)?سنوات|(ال)?أعوام|(ال)?سنين|(ال)?أسابيع|(ال)?أشهر|(ال)?أيام))(\s+\d+)?(\s+{FutureSuffixRegex})?|((هذا\s+)?(شهر\s+)?(الشهر|{SolarMonthRegex}|{LunarMonthRegex}|{ArabicMonthRegex})(\s+{RelativeRegex})?)|((هذا\s+)?((عطلة|خلال)\s+)?((نهاية\s+)?({ArabicWeekRegex}|العام)\s*)((بعد\s+)?{RelativeRegex})?)"; + public static readonly string MonthNumWithYear = $@"\b(({BaseDateTime.FourDigitYearRegex}(\s*)[/\-\.](\s*){MonthNumRegex})|({MonthNumRegex}(\s*)[/\-](\s*){BaseDateTime.FourDigitYearRegex}))\b"; + public static readonly string WeekOfMonthRegex = $@"(\b(?(الأسبوع)\s+((?الأول|الثاني|الثالث|الرابع|الخامس|الأخير)\s+)((من هذا|هذا|من)\s+)?(شهر\s+)?(الشهر|{SolarMonthRegex}|{LunarMonthRegex}|{ArabicMonthRegex})[,]?(\s+{YearRegex})?)\b)|(((الأسبوع|أسبوع)\s+)(في\s+)?{DayRegex}\s+({SolarMonthRegex}|{LunarMonthRegex}|{ArabicMonthRegex}))"; + public static readonly string WeekOfYearRegex = $@"(?(الأسبوع)\s+(?الأول|الثاني|الثالث|الرابع|الخامس|الأخير)\s+((من هذا|هذا|من)\s+)?(العام|من عام|عام)\s*({YearRegex}|{RelativeRegex})?)"; + public static readonly string OfYearRegex = $@"\b((of|in)\s+({YearRegex}|{StrictRelativeRegex}\s+year))\b"; + public const string FirstLastRegex = @"\b(the\s+)?((?first)|(?last))\b"; + public static readonly string FollowedDateUnit = $@"^\s*{DateUnitRegex}"; + public static readonly string NumberCombinedWithDateUnit = $@"\b(?\d+(\.\d*)?)(\s)?(-)?{DateUnitRegex}"; + public const string QuarterTermRegex = @"(الربع[- ]+(?الأول|الثاني|الثالث|الرابع))"; + public static readonly string RelativeQuarterTermRegex = $@"\b(الربع)\s+(?{StrictRelativeRegex})\b"; + public static readonly string QuarterRegex = $@"({YearRegex}\s+)?({QuarterTermRegex})(((\s+(من عام|من))?\s+({YearRegex}))|(\s+(هذا|من هذا|)\s+العام))?"; + public static readonly string QuarterRegexYearFront = $@"(?:{YearRegex}|{RelativeRegex}\s+year)('s)?(?:\s*-\s*|\s+(the\s+)?)?{QuarterTermRegex}"; + public const string HalfYearTermRegex = @"(?first|1st|second|2nd)\s+half"; + public static readonly string HalfYearFrontRegex = $@"(?((1[5-9]|20)\d{{2}})|2100)(\s*-\s*|\s+(the\s+)?)?h(?[1-2])"; + public static readonly string HalfYearBackRegex = $@"(the\s+)?(h(?[1-2])|({HalfYearTermRegex}))(\s+of|\s*,\s*)?\s+({YearRegex})"; + public static readonly string HalfYearRelativeRegex = $@"(the\s+)?{HalfYearTermRegex}(\s+of|\s*,\s*)?\s+({RelativeRegex}\s+year)"; + public static readonly string AllHalfYearRegex = $@"({HalfYearFrontRegex})|({HalfYearBackRegex})|({HalfYearRelativeRegex})"; + public const string EarlyPrefixRegex = @"\b(?بداية|مطلع|وقت مبكر|(?قبل))\b"; + public const string MidPrefixRegex = @"\b(?في منتصف|منتصف)\b"; + public const string LaterPrefixRegex = @"\b(?نهاية|باقي|بقية|أواخر|(?في وقت لاحق|لاحقا في|بعد))\b"; + public static readonly string PrefixPeriodRegex = $@"({EarlyPrefixRegex}|{MidPrefixRegex}|{LaterPrefixRegex}|{RelativeRegex})"; + public const string PrefixDayRegex = @"\b((?early)|(?mid(dle)?)|(?later?))(\s+in)?(\s+the\s+day)?$"; + public const string SeasonDescRegex = @"(?(ال)?ربيع|(ال)?صيف|(ال)?خريف|(ال)?شتاء)"; + public static readonly string SeasonRegex = $@"\b(?(هذا\s+)?(منتصف\s+)?(({SeasonDescRegex})(\s+{PrefixPeriodRegex})?(\s*عام\s*)?(\s*{YearRegex})?))\b"; + public const string WhichWeekRegex = @"\b(week)(\s*)(?5[0-3]|[1-4]\d|0?[1-9])\b"; + public const string WeekOfRegex = @"(the\s+)?((week)(\s+(of|(commencing|starting|beginning)(\s+on)?))|w/c)(\s+the)?"; + public const string MonthOfRegex = @"(من)(\s*)(شهر)"; + public const string SolarMonthRegex = @"(?يناير|فبراير|مارس|أبريل|مايو|يونيو|يوليو|أغسطس|سبتمبر|أكتوبر|نوفمبر|ديسمبر)"; + public const string LunarMonthRegex = @"(?محرم|صفر|ربيع الأول|ربيع الثاني|جمادى الأول|جمادى الثاني|رجب|شعبان|رمضان|شوال|ذو القعدة|ذو الحجة)"; + public const string ArabicMonthRegex = @"(?كانون الثاني|شباط|آذار|نيسان|حزيران|تموز|آب|أيلول|تشرين الأول|تشرين الثاني|كانون الأول|أيار)"; + public static readonly string MonthRegex = $@"(?{SolarMonthRegex}|{LunarMonthRegex}|{ArabicMonthRegex})"; + public static readonly string DateYearRegex = $@"(?{BaseDateTime.FourDigitYearRegex}|(?الأسبوعين|الاسبوعين|أسابيع|الاسبوع|الأسبوع|الإسبوع|أسبوعين|أسبوعي|اسبوعين|اسبوعي|أسبوع|الاسابيع|الأسابيع)"; + public static readonly string ThisRegex = $@"(?=يوم\s+)?({WeekDayRegex})(\s+)?(من|هذا|)(\s+)?(هذا)?(\s+)({ArabicWeekRegex})((\s+)({RelativeRegex}))?"; + public static readonly string LastDayDateRegex = $@"(?=يوم\s+)?({WeekDayRegex})\s+(الماضي|السابق|الأخير)"; + public static readonly string LastWeekDateRegex = $@"({ArabicWeekRegex})\s+(الماضي|السابق|الأخير)\s+({WeekDayRegex})"; + public const string LastMonthYearDateRegex = @"(قبل\s+)(\d+ )?((بضعة|بضع|عدة)\s+)?(سنتين|شهرين|الشهور|أشهر|اشهر|شهر|الشهر|أيام|عامين|عام|أعوام|سنة|سنين|سنوات)"; + public static readonly string SpecificDayRegex = $@"((قبل|بعد)\s+)?((اليوم|يوم)\s+)?(((?<=ب)الأمس|أمس|الأمس|البارحة)|(آخر يوم|الماضي|السابق|الأخير|يومين)|({DayRegex}\s+{MonthRegex}))"; + public static readonly string LastDateRegex = $@"({LastDayDateRegex}|{LastWeekDateRegex})"; + public static readonly string NextDayRegex = $@"(هذا يوم\s+|بعد\s+)?(?=(ال)?يوم\s+)?({WeekDayRegex})((\s+)({NextRegex}))?"; + public static readonly string NextWeekDayRegex = $@"((بعد )|(في هذا ?=)|(هذا ?=))?((ال|لل|ل)?أسبوع(ين)?|{ArabicWeekRegex}|اليوم|يومي|الغد|غداً|غد|غدا)(يوم)?({ArabicWeekRegex})?(\s*(الآتي|الأخير|التالي|القادم|من الآن|الحالي|المقبل|الحاضر))?(\s*{ArabicWeekRegex})?"; + public static readonly string NextWeekRegex = $@"(?=بعد )?(هذا )?({ArabicWeekRegex})\s*({NextRegex})?\s+?(يوم)?(\s+)?({WeekDayRegex})?"; + public static readonly string NextDateRegex = $@"({NextWeekRegex}|{NextDayRegex})"; + public static readonly string CardinalDayOfMonthRegex = $@"(((?<=في )|(إلى |لل|يوم ))((((ال)?عاشر|(ال)?حادي(ة)? والعشرين|(ال)?ثاني(ة)? والعشرين|(ال)?ثالث(ة)? والعشرين|(ال)?رابع(ة)? والعشرين|(ال)?خامس(ة)? والعشرين|(ال)?سادس(ة)? والعشرين|(ال)?سابع(ة)? والعشرين|(ال)?ثامن(ة)? والعشرين|(ال)?تاسع(ة)? والعشرين|(ال)?ثلاثين|(ال)?حادي(ة)? والثلاثين|(ال)?أول|(ال)?ثاني|(ال)?ثالث|(ال)?رابع|(ال)?خامس|(ال)?سادس|(ال)?سابع|(ال)?ثامن|(ال)?تاسع))|({DayRegex})))|((?<=يوم )({DayRegex})[\./-]\s+({MonthRegex}))"; + public static readonly string SpecialDayRegex = $@"({NextWeekDayRegex}|{CardinalDayOfMonthRegex}|{SpecificDayRegex}|{LastMonthYearDateRegex})"; + public static readonly string SpecialDayWithNumRegex = $@"\b((?{WrittenNumRegex})\s+days?\s+from\s+(?yesterday|tomorrow|tmr|today))\b"; + public static readonly string RelativeDayRegex = $@"\b(((the\s+)?{RelativeRegex}\s+day))\b"; + public const string SetWeekDayRegex = @"\b(?on\s+)?(?morning|afternoon|evening|night|(sun|mon|tues|wednes|thurs|fri|satur)day)s\b"; + public static readonly string WeekDayOfMonthRegex = $@"(?(the\s+)?(?first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|last)\s+(week\s+{MonthSuffixRegex}[\.]?\s+(on\s+)?{WeekDayRegex}|{WeekDayRegex}\s+{MonthSuffixRegex}))"; + public static readonly string RelativeWeekDayRegex = $@"\b({WrittenNumRegex}\s+{WeekDayRegex}\s+(from\s+now|later))\b"; + public static readonly string SpecialDate = $@"(?=\b(on|at)\s+the\s+){DayRegex}\b"; + public const string DatePreposition = @"\b(في|عند|من)"; + public static readonly string DateExtractorYearTermRegex = $@"(\s+|\s*,\s*|\s+من\s+){DateYearRegex}"; + public const string CardinalDayRegex = @"(?=يوم\s+)?((ال|لل|ل)?عاشر|(ال|لل|ل)?حادي(ة)? و(ال)?عشر[يو]ن|واحد و(ال)?عشر[يو]ن|(ال|لل|ل)?ثاني(ة)? و(ال)?عشر[يو]ن|(ال|لل|ل)?ثالث(ة)? و(ال)?عشر[يو]ن|(ال|لل|ل)?رابع(ة)? و(ال)?عشر[يو]ن|(ال|لل|ل)?خامس(ة)? و(ال)?عشر[يو]ن|(ال|لل|ل)?سادس(ة)? و(ال)?عشر[يو]ن|(ال|لل|ل)?سابع(ة)? و(ال)?عشر[يو]ن|(ال|لل|ل)?ثامن(ة)? و(ال)?عشر[يو]ن|(ال|لل|ل)?تاسع(ة)? و(ال)?عشر[يو]ن|(ال|لل|ل)?ثلاثين|(ال|لل|ل)?حادي(ة)? والثلاثين|(ال|لل|ل)?أول|(ال|لل|ل)?ثاني|(ال|لل|ل)?ثالث|(ال|لل|ل)?رابع|(ال|لل|ل)?خامس|(ال|لل|ل)?سادس|(ال|لل|ل)?سابع|(ال|لل|ل)?ثامن|(ال|لل|ل)?تاسع)"; + public static readonly string DateExtractor1 = $@"({CardinalDayRegex})(\s+يوم\s+)({WeekDayRegex})(\s+)(في|من)(\s+)(هذا|هذه)?(\s+)?(الشهر|{MonthRegex})({DateExtractorYearTermRegex}\b)?"; + public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*|\s*،\s*))?({DayRegex}|{CardinalDayRegex})[\.]?(\s+|\s*,\s*|\s+من\s+|\s*-\s*)?{MonthRegex}[\.]?((\s+(في|عند|عام|سنة|من عام|من سنة))?{DateExtractorYearTermRegex})?\b"; + public static readonly string DateExtractor4 = $@"\b{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}[\.]?\s*[/\\\-]\s*{DateYearRegex}"; + public static readonly string DateExtractor5 = $@"\b{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; + public static readonly string DateExtractor6 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({WeekDayRegex}\s+)?{MonthNumRegex}[\-\.]{DayRegex}(?![%])\b"; + public static readonly string DateExtractor7L = $@"\b({WeekDayRegex}\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b"; + public static readonly string DateExtractor7S = $@"({MonthRegex}\s*[-\./]\s*{DayRegex})|(\b({WeekDayRegex}\s+)?{MonthNumRegex}\s*[/\.]\s*{DayRegex}(?![%])\b)"; + public static readonly string DateExtractor8 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({WeekDayRegex}\s+)?{DayRegex}[\\\-]{MonthNumRegex}(?![%])\b"; + public static readonly string DateExtractor9L = $@"\b({WeekDayRegex}\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\b"; + public static readonly string DateExtractor9S = $@"\b({WeekDayRegex}\s+)?{DayRegex}\s*[/-]\s*{MonthNumRegex}(?![%])\b"; + public static readonly string DateExtractorA = $@"\b({WeekDayRegex}\s+)?{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DayRegex}"; + public static readonly string OfMonth = $@"^\s*(يوم\s+)?من\s*{MonthRegex}"; + public static readonly string MonthEnd = $@"{MonthRegex}\s*(في)?\s*$"; + public static readonly string WeekDayEnd = $@"(هذا\s+)?{WeekDayRegex}\s*[,،]?\s*$"; + public const string WeekDayStart = @"^[\.]"; + public const string RangeUnitRegex = @"\b(?years?|months?|weeks?)\b"; + public const string HourNumRegex = @"\b(?الأولى|(ال)?واحدة|(ال)?ثانية|(ال)?ثالثة|(ال)?رابعة|(ال)?خامسة|(ال)?سادسة|(ال)?سابعة|(ال)?ثامنة|(ال)?تاسعة|(ال)?عاشرة|(ال)?حادية عشر(ة)?|(ال)?ثانية عشر(ة)?|خمسة عشر|أحد عشر)\b"; + public const string MinuteNumRegex = @"\b(?أربع|خمس|ست|سبع|ثمان|تسع|عشر|عشرة|أحد عشر|إثني عشر|إثنا عشر|ثلاثة عشر|خمسة عشر|ثمانية عشر|أربعة عشر|ستة عشر|سبعة عشر|(ال)?حادية عشر(ة)?|تسعة عشر|عشرون|ثلاثون|أربعون|خمسون|عشرين|ثلاث(ين)?|أربعين|خمسين|واحد|إثنان|ثلاثة|خمسة|ثمانية)\b"; + public const string DeltaMinuteNumRegex = @"(?عشرة|أحد عشر|اثنا عشر|ثلاثة عشر|خمسة عشر|ثمانية عشر|أربعة|ستة|سبعة|تسعة|عشرين|أربعة عشر|ستة عشر|سبعة عشر|تسعة عشر| ثلاثون|أربعون|خمسين|أربعين|خمسون|واحد|اثنان|ثلاثة|خمسة|ثمانية|ثلاث(ين)?|أربع|خمس|ست|سبع|ثمان|تسع|(ال)?واحدة|(ال)?ثانية|(ال)?ثالثة|(ال)?رابعة|(ال)?خامسة|(ال)?سادسة|(ال)?سابعة|(ال)?ثامنة|(ال)?تاسعة|(ال)?عاشرة|(ال)?حادية عشر(ة)?|(ال)?ثانية عشر(ة)?)"; + public const string PmRegex = @"(?(?:(في|حول)\s|ل)?(وقت\s)?(بعد الظهر|بعد الظهيرة|(ال)?مساء|مساءً|منتصف(\s|-)الليل|الغداء|الليل|ليلا))"; + public const string PmRegexFull = @"(?(?:(في|حول)\s|ل)?(وقت\s)?(بعد الظهر|بعد الظهيرة|(ال)?مساء|مساءً|منتصف(\s|-)الليل|الغداء|الليل|ليلا))"; + public const string AmRegex = @"(?(?:(في|حول)\s|ل)?(وقت\s)?((ال)?صباح|صباحا|صباحًا))"; + public const string LunchRegex = @"\b(موعد الغذاء|وقت الغذاء)\b"; + public const string NightRegex = @"\bمنتصف(\s|-)الليل\b"; + public const string CommonDatePrefixRegex = @"^[\.]"; + public static readonly string LessThanOneHour = $@"(?((ال)?ربع|ثلاثة أرباع|(ال)?نصف)|({BaseDateTime.DeltaMinuteRegex}(\s(دقيقة|دقائق))?)|({DeltaMinuteNumRegex}(\s(دقيقة|دقائق))?))"; + public static readonly string WrittenTimeRegex = $@"(?((ال)?ساعة\s)?{HourNumRegex}\s+(و(\s)?)?({MinuteNumRegex}|{{LessThanOneHour}}|({MinuteNumRegex}\s+(و(\s)?)?(?عشرون|ثلاثون|أربعون|خمسون|عشرين|ثلاثين|أربعين|خمسين))))"; + public static readonly string TimePrefix = $@"(?(إلا|حتى|و|قبل)?(\s)?{LessThanOneHour})"; + public static readonly string TimeSuffix = $@"(?{AmRegex}|{PmRegex}|{OclockRegex})"; + public static readonly string TimeSuffixFull = $@"(?{AmRegex}|{PmRegexFull}|{OclockRegex})"; + public static readonly string BasicTime = $@"\b(?{WrittenTimeRegex}|{HourNumRegex}|({MinuteNumRegex}(\s(دقيقة|دقائق))?)|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex}(?![%\d]))"; + public const string MidnightRegex = @"(?منتصف(\s|(\s?-\s?))الليل)"; + public const string MidmorningRegex = @"(?منتصف(\s|(\s?-\s?))الصباح)"; + public const string MidafternoonRegex = @"(?منتصف(\s|(\s?-\s?))بعد الظهر)"; + public const string MiddayRegex = @"(?(وقت الغداء\s)?(منتصف(\s|(\s?-\s?)))?(النهار|(الساعة\s)?((((12\s)?الظهر)|(12\s)?الظهيرة)|(12\s)?ظهرا))(\sوقت الغداء)?)"; + public static readonly string MidTimeRegex = $@"(?({MidnightRegex}|{MidmorningRegex}|{MidafternoonRegex}|{MiddayRegex}))"; + public static readonly string AtRegex = $@"\b(?:(?:(?<=\bفي\s+)?(?:{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\.\d)|{MidTimeRegex}))|{MidTimeRegex})\b"; + public static readonly string IshRegex = $@"\b((({BaseDateTime.HourRegex}|{WrittenTimeRegex})(\s|-))?(وقت\s)?((الظهيرة|الظهر|ظهر(ا|اً))))\b"; + public const string TimeUnitRegex = @"([^A-Za-z]{1,}|\b)((?((ال)?(ساعة|دقيقة|ثانية)))|(?((ال)?(ساعات|دقائق|ثوان|ساعتين|دقيقتين|ثانيتين|ساعتان|دقيقتان|ثانيتان))))\b"; + public const string RestrictedTimeUnitRegex = @"(?(ال)?ساعة|(ال)?دقيقة)\b"; + public const string FivesRegex = @"(?(?:fifteen|(?:twen|thir|fou?r|fif)ty(\s*five)?|ten|five))\b"; + public static readonly string HourRegex = $@"\b{BaseDateTime.HourRegex}"; + public const string PeriodHourNumRegex = @"(?((واحد|اثنان|اثنين|إثنين|ثلاثة|أربعة|إثنان)?(و(\s+)?(عشرون|عشرين)))|أحد عشر|إثني عشر|((ثلاثة|خمسة|ثمانية|أربعة|ستة|سبعة|تسعة)(عشر)?)|صفر|واحد|اثنان|إثنان|ثنان|اثنين|عشرة|الأولى|(ال)?واحدة|(ال)?ثانية|(ال)?ثالثة|(ال)?رابعة|(ال)?خامسة|(ال)?سادسة|(ال)?سابعة|(ال)?ثامنة|(ال)?تاسعة|(ال)?عاشرة|(ال)?حادية عشر(ة)?|(ال)?ثانية عشر(ة)?|خمسة عشر)"; + public static readonly string ConnectNumRegex = $@"\b{BaseDateTime.HourRegex}(?[0-5][0-9])\s*{DescRegex}"; + public static readonly string TimeRegexWithDotConnector = $@"({BaseDateTime.HourRegex}(\s*\.\s*){BaseDateTime.MinuteRegex})"; + public static readonly string TimeRegex1 = $@"\b({TimePrefix}\s+)?({WrittenTimeRegex}(\s{TimePrefix})?|{HourNumRegex}|{BaseDateTime.HourRegex})(\s*|[.]){DescRegex}"; + public static readonly string TimeRegex2 = $@"(\b{TimePrefix}\s+)?(t)?{BaseDateTime.HourRegex}(\s*)?:(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?(?a)?((\s*{DescRegex})|\b)"; + public static readonly string TimeRegex3 = $@"(\b{TimePrefix}\s+)?{BaseDateTime.HourRegex}\.{BaseDateTime.MinuteRegex}(\s*{DescRegex})"; + public static readonly string TimeRegex4 = $@"\b({TimePrefix}\s+)?{BasicTime}(\s*{DescRegex})?\s+{TimeSuffix}(\s*{DescRegex})?\b"; + public static readonly string TimeRegex5 = $@"\b({DescRegex}\s)?{BasicTime}((\s*{DescRegex})((\s+{TimePrefix})?)|(\s+{TimePrefix}(\s+{TimePrefix})?))(\s{DescRegex})?"; + public static readonly string TimeRegex6 = $@"{BasicTime}(\s*{DescRegex})?\s+{TimeSuffix}\b"; + public static readonly string TimeRegex7 = $@"\b({DescRegex}\s)?(وقت الغداء\s)?{TimeSuffixFull}\s+(في\s+)?{BasicTime}(\s{DescRegex})?(\sوقت الغداء)?(\s{TimePrefix})?((\s*{DescRegex})|\b)?"; + public static readonly string TimeRegex8 = $@".^"; + public static readonly string TimeRegex9 = $@"\b{PeriodHourNumRegex}(\s+|-){FivesRegex}((\s*{DescRegex})|\b)"; + public static readonly string TimeRegex10 = $@"\b({TimePrefix}\s+)?{BaseDateTime.HourRegex}(\s*h\s*){BaseDateTime.MinuteRegex}(\s*{DescRegex})?"; + public static readonly string TimeRegex11 = $@"\b((?:({TimeTokenPrefix})?{TimeRegexWithDotConnector}(\s*{DescRegex}))|(?:(?:{TimeTokenPrefix}{TimeRegexWithDotConnector})(?!\s*per\s*cent|%)))"; + public static readonly string FirstTimeRegexInTimeRange = $@"\b{TimeRegexWithDotConnector}(\s*{DescRegex})?"; + public static readonly string PureNumFromTo = $@"({RangePrefixRegex}\s+)?(الساعة\s+)?(({BaseDateTime.HourRegex}(\s*)?:(\s*)?{BaseDateTime.MinuteRegex})|{HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?\s*{TillRegex}\s*(\s+الساعة\s+)?(({BaseDateTime.HourRegex}(\s*)?:(\s*)?{BaseDateTime.MinuteRegex})|{HourRegex}|{PeriodHourNumRegex})(\s+{TimePrefix})?(?\s*({PmRegex}|{AmRegex}|{DescRegex}))?(\s+اليوم)?"; + public static readonly string PureNumBetweenAnd = $@"(بين\s+)(الساعة\s+)?(({BaseDateTime.HourRegex}(\s*)?:(\s*)?{BaseDateTime.MinuteRegex})|{HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?\s*{RangeConnectorRegex}(\s*)?(\s+الساعة\s+)?(({BaseDateTime.HourRegex}(\s*)?:(\s*)?{BaseDateTime.MinuteRegex})|{HourRegex}|{PeriodHourNumRegex})(\s+{TimePrefix})?(?\s*({PmRegex}|{AmRegex}|{DescRegex}))?(\s+اليوم)?"; + public static readonly string SpecificTimeFromTo = $@"({RangePrefixRegex}\s+)?(الساعة\s+)?(?(({BaseDateTime.HourRegex}(\s*)?:(\s*)?{BaseDateTime.MinuteRegex})|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))\s*{TillRegex}\s*(\s+الساعة\s+)?(?(({TimeRegexWithDotConnector}(?\s*{DescRegex}))|({BaseDateTime.HourRegex}(\s*)?:(\s*)?{BaseDateTime.MinuteRegex})|({HourRegex}|{PeriodHourNumRegex})(\s+{TimePrefix})?(\s*(?{DescRegex}))?))(\s+اليوم)?"; + public static readonly string SpecificTimeBetweenAnd = $@"(بين\s+)(الساعة\s+)?(?(({BaseDateTime.HourRegex}(\s*)?:(\s*)?{BaseDateTime.MinuteRegex})|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))\s*{RangeConnectorRegex}(\s*)?(\s+الساعة\s+)?(?(({TimeRegexWithDotConnector}(?\s*{DescRegex}))|(({BaseDateTime.HourRegex}(\s*)?:(\s*)?{BaseDateTime.MinuteRegex})|{HourRegex}|{PeriodHourNumRegex})(\s+{TimePrefix})?(\s*(?{DescRegex}))?))(\s+اليوم)?"; + public const string SuffixAfterRegex = @"\b(((at)\s)?(or|and)\s+(above|after|later|greater)(?!\s+than))\b"; + public const string PrepositionRegex = @"(?^(at|on|of)(\s+the)?$)"; + public const string LaterEarlyRegex = @"((?(\s+|-)الباكر)|(?وقت متأخر(\s+|-))|أواخر(\s+|-)|وقت مبكر(\s+|-)|أول(\s+|-)|آخر(\s+|-))"; + public const string MealTimeRegex = @"\b((في|عند)\s+)?(وقت\s)?(?(ال)?إفطار|(ال)?فطور|(ال)?عشاء|(ال)?غذاء)\b"; + public static readonly string UnspecificTimePeriodRegex = $@"({MealTimeRegex})"; + public static readonly string TimeOfDayRegex = $@"\b(?((((في|عند)\s+)?{LaterEarlyRegex}?(من\s+)?(الصباح|بعد الظهر|الليل|المساء|الظهر|الأمسيات){LaterEarlyRegex}?)|{MealTimeRegex}|(((في|عند|خلال)\s+)?(النهار|((ساعة|ساعات)(\s)?العمل)))))\b"; + public static readonly string SpecificTimeOfDayRegex = $@"\b(({StrictRelativeRegex}\s+{TimeOfDayRegex})\b|\btoni(ght|te))s?\b"; + public static readonly string TimeFollowedUnit = $@"^\s*{TimeUnitRegex}"; + public static readonly string TimeNumberCombinedWithUnit = $@"\b(?\d+(\.\d*)?)(\s)?(-)?{TimeUnitRegex}"; + public static readonly string[] BusinessHourSplitStrings = { @"business", @"hour" }; + public const string NowRegex = @"\b(?(right\s+)?now|at th(e|is) minute|as soon as possible|asap|recently|previously)\b"; + public static readonly string NowParseRegex = $@"\b({NowRegex}|^(date)$)\b"; + public const string SuffixRegex = @"^\s*(in the\s+)?(morning|afternoon|evening|night)\b"; + public const string NonTimeContextTokens = @"(building)"; + public const string DateTimeTimeOfDayRegex = @"\b(?morning|(?afternoon|night|evening))\b"; + public static readonly string DateTimeSpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{DateTimeTimeOfDayRegex})\b|\btoni(ght|te))\b"; + public static readonly string TimeOfTodayAfterRegex = $@"^\s*(,\s*)?(in\s+)?{DateTimeSpecificTimeOfDayRegex}"; + public static readonly string TimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(at|around|in|on))?\s*$"; + public static readonly string SimpleTimeOfTodayAfterRegex = $@"(?{DateUnitRegex}|(ال)?ساعة|(ال)?ساعات|(ال)?دقيقة|(ال)?دقائق|(ال)?ثانية|(ال)?ثوان|(ال)?ليلة|(ال)?ليال)|ساعت(ين)?(ان)?|دقيقت(ين)?(ان)?|ثانيت(ين)?(ان)?|ليلت(ين)?(ان)?\b"; + public const string SuffixAndRegex = @"(?\s*(و)\s+(?نصف|ربع))"; + public const string PeriodicRegex = @"\b(?((?semi|bi|tri)(\s*|-))?(daily|monthly|weekly|quarterly|yearly|annual(ly)?))\b"; + public static readonly string EachUnitRegex = $@"\b(?(each|every|any|once an?)(?\s+other)?\s+({DurationUnitRegex}|(?quarters?|weekends?)|{WeekDayRegex})|(?weekends))"; + public const string EachPrefixRegex = @"\b(?(each|every|once an?)\s*$)"; + public const string SetEachRegex = @"\b(?(each|every)(?\s+other)?\s*)(?!the|that)\b"; + public static readonly string SetLastRegex = $@"(?following|next|upcoming|this|{LastNegPrefix}last|past|previous|current)"; + public const string EachDayRegex = @"^\s*(each|every)\s*day\b"; + public static readonly string DurationFollowedUnit = $@"(^\s*{DurationUnitRegex}\s+{SuffixAndRegex})|(^\s*{SuffixAndRegex}?\s+?{DurationUnitRegex})"; + public static readonly string NumberCombinedWithDurationUnit = $@"((?\d+(\.\d*)?(\s)?)?({DurationUnitRegex})(\s{WrittenOneToNineRegex})?(\sو)?(\s)?(?\d+(\.\d*)?(\s)?)?(({DurationUnitRegex})?(\s{WrittenOneToNineRegex})?)(\sو)?(\s)?(?\d+(\.\d*)?(\s)?)?({DurationUnitRegex})(\s{WrittenOneToNineRegex})?)"; + public static readonly string AnUnitRegex = $@"\b((?(1/2|½|نصف)))\s+{DurationUnitRegex}(\s(أخرى))?"; + public const string DuringRegex = @"\b((((خلال|على مدى|مدة)\s)|ل)+)(?(ال)?عام(ين)?|(ال)?سنتين|(ال)?سنة|(ال)?شهر(ين)?|الأشهر|(ال)?أسبوع(ين)?|(ال)?يوم(ين)?)\b"; + public const string AllRegex = @"(?(طوال\s+))?(?(ال)?عام|(ال)?سنة|(ال)?شهر|(ال)?أسبوع|(ال)?أسابيع|(ال)?أيام|(ال)?يوم)(?(\s+كامل(ة)?))?"; + public const string HalfRegex = @"\b((نصف)\s+)?(?(ال)?ساعة|ساعتين|دقيقة|دقيقتين|ثانية|ثانيتين|(ال)?عام(ين)?|(ال)?سنة|(ال)?شهر(ين)?|(ال)?أسبوع(ين)?|(ال)?يوم(ين)?)(?(\s+)?(و)?نصف)?\b"; + public const string ConjunctionRegex = @"\b((و(\s+ل)?)|مع)\b"; + public const string ArabicThisYearRegex = @"(\s*)?(هذا|هذه)?(لعام|عام|العام|سنة)?(\s*)?"; + public const string ArabicEidDay = @"?(يوم |عيد |ليلة | ليل)?(?=\s*)"; + public static readonly string HolidayList1 = $@"((?=\s*){ArabicEidDay}(إثنين الرماد|رأس السنة الهجرية|الحج|يواندان|الفطر|رأس السنة الجديدة|الأضحى|الأب|الشكر|عيد الميلاد|المولد النبوي|الفصح)(){ArabicThisYearRegex}(العام|السنة)?({YearRegex})?({RelativeRegex})?)"; + public static readonly string HolidayList2 = $@"((?=\s*){ArabicEidDay}(الشباب|الأطفال|الفتيات|العشاق|الأرض|الافتتاح|جرذ الأرض|الحب|الذكرى|الخريف|القمر|الربيع|الفانوس)(){ArabicThisYearRegex}(العام|السنة)?({YearRegex})?({RelativeRegex})?)"; + public static readonly string HolidayList3 = $@"((?=\s*){ArabicEidDay}(حقوق الإنسان|العالمي للأعمال الخيرية|يوم التحرير الأفريقي|حرية الصحافة العالمية|الاستقلال|الرؤساء|كل الأرواح|الشجرة|مارتن لوثر كينج| هالوين|العمال العالمي|الأم)(){ArabicThisYearRegex}(العام|السنة)?({YearRegex})?({RelativeRegex})?)"; + public static readonly string HolidayRegex = $@"({HolidayList1})|({HolidayList2})|({HolidayList3})"; + public const string AMTimeRegex = @"(?morning)"; + public const string PMTimeRegex = @"\b(?afternoon|evening|night)\b"; + public const string NightTimeRegex = @"(night)"; + public const string NowTimeRegex = @"(now|at th(e|is) minute)"; + public const string RecentlyTimeRegex = @"(recently|previously)"; + public const string AsapTimeRegex = @"(as soon as possible|asap)"; + public const string InclusiveModPrepositions = @"(?((on|in|at)\s+or\s+)|(\s+or\s+(on|in|at)))"; + public const string AroundRegex = @"(?:\b(?:around|circa)\s*?\b)(\s+the)?"; + public static readonly string BeforeRegex = $@"((\b{InclusiveModPrepositions}?(?:before|in\s+advance\s+of|prior\s+to|(no\s+later|earlier|sooner)\s+than|ending\s+(with|on)|by|(un)?till?|(?as\s+late\s+as)){InclusiveModPrepositions}?\b\s*?)|(?)((?<\s*=)|<))(\s+the)?"; + public static readonly string AfterRegex = $@"((\b{InclusiveModPrepositions}?((after|(starting|beginning)(\s+on)?(?!\sfrom)|(?>\s*=)|>))(\s+the)?"; + public const string SinceRegex = @"(?:(?:\b(?:since|after\s+or\s+equal\s+to|starting\s+(?:from|on|with)|as\s+early\s+as|(any\s+time\s+)from)\b\s*?)|(?=))(\s+the)?"; + public static readonly string SinceRegexExp = $@"({SinceRegex}|\bfrom(\s+the)?\b)"; + public const string AgoRegex = @"\b(ago|before\s+(?yesterday|today))\b"; + public static readonly string LaterRegex = $@"\b(?:later(?!((\s+in)?\s*{OneWordPeriodRegex})|(\s+{TimeOfDayRegex})|\s+than\b)|من الآن|من الان|(from|after)\s+(?tomorrow|tmr|today))\b"; + public const string BeforeAfterRegex = @"\b((?before)|(?from|after))\b"; + public static readonly string ModPrefixRegex = $@"\b({RelativeRegex}|{AroundRegex}|{BeforeRegex}|{AfterRegex}|{SinceRegex})\b"; + public static readonly string ModSuffixRegex = $@"\b({AgoRegex}|{LaterRegex}|{BeforeAfterRegex}|{FutureSuffixRegex}|{PastSuffixRegex})\b"; + public const string InConnectorRegex = @"\b(in)\b"; + public static readonly string SinceYearSuffixRegex = $@"(^\s*{SinceRegex}(\s*(the\s+)?year\s*)?{YearSuffix})"; + public static readonly string WithinNextPrefixRegex = $@"\b(within(\s+the)?(\s+(?{NextPrefixRegex}))?)\b"; + public const string TodayNowRegex = @"\b(today|now)\b"; + public static readonly string MorningStartEndRegex = $@"(^(morning|{AmDescRegex}))|((morning|{AmDescRegex})$)"; + public static readonly string AfternoonStartEndRegex = $@"(^(afternoon|{PmDescRegex}))|((afternoon|{PmDescRegex})$)"; + public const string EveningStartEndRegex = @"(^(evening))|((evening)$)"; + public const string NightStartEndRegex = @"(^(over|to)?ni(ght|te))|((over|to)?ni(ght|te)$)"; + public const string InexactNumberRegex = @"بضع(ة)?|عدة|(?((ل))?عدد(\s+من)?)"; + public static readonly string InexactNumberUnitRegex = $@"({InexactNumberRegex})\s+({DurationUnitRegex})|(في\s+)?((ال)?يومين|(ال)?أيام|(ال)?أسابيع|(ال)?أشهر|(ال)?سنوات|(ال)?أعوام|(ال)?سنين)\s+(العديدة|القليلة|الثلاثة|الأربعة|الخمسة|الستة|السبعة|الثمانية|التسعة|العشرة)"; + public static readonly string RelativeTimeUnitRegex = $@"(?:(?:(?:{NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+({TimeUnitRegex}))|((the|my))\s+({RestrictedTimeUnitRegex}))"; + public static readonly string RelativeDurationUnitRegex = $@"(?:(?:(?<=({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+)({DurationUnitRegex}))|((the|my))\s+({RestrictedTimeUnitRegex}))"; + public static readonly string ReferenceDatePeriodRegex = $@"\b{ReferencePrefixRegex}\s+(?week|month|year|decade|weekend)\b"; + public const string ConnectorRegex = @"^(-|,|for|t|around|@)$"; + public const string FromToRegex = @"(\b(from).+(to|and|or)\b.+)"; + public const string SingleAmbiguousMonthRegex = @"^(the\s+)?(may|march)$"; + public const string SingleAmbiguousTermsRegex = @"^(the\s+)?(day|week|month|year)$"; + public const string UnspecificDatePeriodRegex = @"^(week|month|year)$"; + public const string PrepositionSuffixRegex = @"\b(on|in|at|around|from|to)$"; + public const string FlexibleDayRegex = @"(?([A-Za-z]+\s)?[A-Za-z\d]+)"; + public static readonly string ForTheRegex = $@"\b((((?<=for\s+)the\s+{FlexibleDayRegex})|((?<=on\s+)(the\s+)?{FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\s*(,|\.(?!\d)|!|\?|$)))"; + public static readonly string WeekDayAndDayOfMonthRegex = $@"\b{WeekDayRegex}\s+(the\s+{FlexibleDayRegex})\b"; + public static readonly string WeekDayAndDayRegex = $@"\b{WeekDayRegex}\s+(?!(the)){DayRegex}(?!([-:]|(\s+({AmDescRegex}|{PmDescRegex}|{OclockRegex}))))\b"; + public const string RestOfDateRegex = @"\b(باقي|بقية)\s+(?الشهر|العام|الأسبوع|العقد)\b"; + public const string RestOfDateTimeRegex = @"\b(rest|remaining)\s+(of\s+)?((the|my|this|current)\s+)?(?day)\b"; + public const string AmbiguousRangeModifierPrefix = @"(from)"; + public static readonly string NumberEndingPattern = $@"^(?:\s+(?meeting|appointment|conference|((skype|teams|zoom|facetime)\s+)?call)\s+to\s+(?{PeriodHourNumRegex}|{HourRegex})([\.]?$|(\.,|,|!|\?)))"; + public const string OneOnOneRegex = @"\b(1\s*:\s*1(?!\d))|(one (on )?one|one\s*-\s*one|one\s*:\s*one)\b"; + public static readonly string LaterEarlyPeriodRegex = $@"(\b(({PrefixPeriodRegex})\s*\b\s*(?{OneWordPeriodRegex}|(?{BaseDateTime.FourDigitYearRegex}))|({UnspecificEndOfRangeRegex}))\b)|({PrefixPeriodRegex}\s+(من هذا|من|هذا)\s+(الشهر|الأسبوع|العام|الاسبوع)(\s+{PrefixPeriodRegex})?)"; + public static readonly string WeekWithWeekDayRangeRegex = $@"\b(?(هذا\s+)?(الأسبوع)\s+(({NextPrefixRegex}|{PreviousPrefixRegex})\s+)?)(((بين)\s+{WeekDayRegex}\s+(و)\s*{WeekDayRegex})|(من)\s+{WeekDayRegex}\s+(إلى)\s+{WeekDayRegex})\b"; + public const string GeneralEndingRegex = @"^\s*((\.,)|\.|,|!|\?)?\s*$"; + public const string MiddlePauseRegex = @"\s*(,)\s*"; + public const string DurationConnectorRegex = @"^\s*(?\s+|و|،|,)\s*$"; + public const string PrefixArticleRegex = @"\bإلى\s+"; + public const string OrRegex = @"\s*((\b|,\s*)(or|and)\b|,)\s*"; + public static readonly string SpecialYearTermsRegex = $@"\b(((ال)?سنة|(ال)?عام)(\s+{SpecialYearPrefixes})?)"; + public static readonly string YearPlusNumberRegex = $@"\b({SpecialYearTermsRegex}\s*((?(\d{{2,4}}))|{FullTextYearRegex}))\b"; + public static readonly string NumberAsTimeRegex = $@"\b({WrittenTimeRegex}|{PeriodHourNumRegex}|{BaseDateTime.HourRegex})\b"; + public static readonly string TimeBeforeAfterRegex = $@"\b(((?<=\b(ب|((قبل|في موعد لا يتجاوز| بعد)\s))(وقت\s+)?)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}|{MidTimeRegex}))|{MidTimeRegex})\b"; + public const string DateNumberConnectorRegex = @"^\s*(?\s+at)\s*$"; + public const string DecadeRegex = @"(?(ال)?عشرات|(ال)?عشرينيات|(ال)?عشرينات|(ال)?ثلاثينات|(ال)?أربعينيات|(ال)?أربعينات|(ال)?خمسينيات|(ال)?خمسينات|(ال)?ستينات|(ال)?سبعينيات|(ال)?سبعينات|(ال)?ثمانينات|(ال)?تسعينات|الألفين|ألفين)"; + public static readonly string DecadeWithCenturyRegex = $@"({DecadeRegex})((\s+القرن(\s+(الثماني عشر|التاسع عشر)))|(\s+(و{DecadeRegex})))?"; + public static readonly string RelativeDecadeRegex = $@"\b(?(الثلاثة|الأربعة|الخمسة|الستة|السبعة|الثمانية|التسعة|العشر|\d+)\s+)?((ال)?عقدين|(ال)?عقد|(ال)?عقود)\s+(الماضيين|الماضية|الماضي|القادمين|القادمة|القادم)\b"; + public static readonly string YearPeriodRegex = $@"(((من|بين)\s+)?{YearRegex}\s*({TillRegex}|{RangeConnectorRegex})\s*{YearRegex})"; + public static readonly string StrictTillRegex = $@"(?\b(to|(un)?till?|thru|through)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s+of|\s*,\s*))))"; + public static readonly string StrictRangeConnectorRegex = $@"(?\b(and|through|to)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s+of|\s*,\s*))))"; + public const string StartMiddleEndRegex = @"\b((?((the\s+)?(start|beginning)\s+of\s+)?)(?((the\s+)?middle\s+of\s+)?)(?((the\s+)?end\s+of\s+)?))"; + public static readonly string ComplexDatePeriodRegex = $@"(?:((from|during|in)\s+)?{StartMiddleEndRegex}(?.+)\s*({StrictTillRegex})\s*{StartMiddleEndRegex}(?.+)|((between)\s+){StartMiddleEndRegex}(?.+)\s*({StrictRangeConnectorRegex})\s*{StartMiddleEndRegex}(?.+))"; + public static readonly string FailFastRegex = $@"{BaseDateTime.DeltaMinuteRegex}|\b(?:{BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex})|{BaseDateTime.BaseAmPmDescRegex}|\b(?:zero|{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}|{WrittenMonthRegex}|{SeasonDescRegex}|{DecadeRegex}|centur(y|ies)|weekends?|quarters?|hal(f|ves)|yesterday|to(morrow|day|night)|tmr|noonish|\d(-|——)?ish|((the\s+\w*)|\d)(th|rd|nd|st)|(mid\s*(-\s*)?)?(night|morning|afternoon|day)s?|evenings?||noon|lunch(time)?|dinner(time)?|(day|night)time|overnight|dawn|dusk|sunset|hours?|hrs?|h|minutes?|mins?|seconds?|secs?|eo[dmy]|mardi[ -]?gras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|cinco de mayo|all (hallow|souls)|guy fawkes|(st )?patrick|hundreds?|noughties|aughts|thousands?)\b|{WeekDayRegex}|{SetWeekDayRegex}|{NowRegex}|{PeriodicRegex}|\b({DateUnitRegex}|{ImplicitDayRegex})"; + public static readonly Dictionary UnitMap = new Dictionary + { + { @"قرن", @"10Y" }, + { @"حقبة", @"10Y" }, + { @"قرون", @"10Y" }, + { @"حقبات", @"10Y" }, + { @"قرنين", @"20Y" }, + { @"قرنان", @"20Y" }, + { @"حقبتان", @"20Y" }, + { @"حقبتين", @"20Y" }, + { @"سنة", @"Y" }, + { @"العام", @"Y" }, + { @"عام", @"Y" }, + { @"سنوات", @"Y" }, + { @"أعوام", @"Y" }, + { @"عامان", @"2Y" }, + { @"سنتان", @"2Y" }, + { @"سنتين", @"2Y" }, + { @"عامين", @"2Y" }, + { @"الشهر", @"MON" }, + { @"شهر", @"MON" }, + { @"أشهر", @"MON" }, + { @"شهور", @"MON" }, + { @"شهرا", @"MON" }, + { @"شهرين", @"2MON" }, + { @"شهران", @"2MON" }, + { @"quarters", @"3MON" }, + { @"quarter", @"3MON" }, + { @"semesters", @"6MON" }, + { @"semestres", @"6MON" }, + { @"semester", @"6MON" }, + { @"semestre", @"6MON" }, + { @"أسبوع", @"W" }, + { @"أسابيع", @"W" }, + { @"أسبوعا", @"W" }, + { @"أسبوعان", @"2W" }, + { @"أسبوعين", @"2W" }, + { @"نهاية الأسبوع", @"WE" }, + { @"يوم", @"D" }, + { @"أيام", @"D" }, + { @"يوما", @"D" }, + { @"يومان", @"2D" }, + { @"يومين", @"2D" }, + { @"ليال", @"D" }, + { @"ليلة", @"D" }, + { @"ساعة", @"H" }, + { @"ساعات", @"H" }, + { @"ساعتان", @"2H" }, + { @"ساعتين", @"2H" }, + { @"دقيقة", @"M" }, + { @"دقائق", @"M" }, + { @"دقيقتان", @"2M" }, + { @"دقيقتين", @"2M" }, + { @"ثانية", @"S" }, + { @"ثوان", @"S" }, + { @"ثانيتان", @"2S" }, + { @"ثانيتين", @"2S" } + }; + public static readonly Dictionary UnitValueMap = new Dictionary + { + { @"قرن", 315360000 }, + { @"حقبة", 315360000 }, + { @"قرون", 315360000 }, + { @"حقبات", 315360000 }, + { @"قرنين", 630720000 }, + { @"حقبتين", 630720000 }, + { @"قرنان", 630720000 }, + { @"حقبتان", 630720000 }, + { @"سنة", 31536000 }, + { @"العام", 31536000 }, + { @"عام", 31536000 }, + { @"سنوات", 31536000 }, + { @"أعوام", 31536000 }, + { @"عامان", 63072000 }, + { @"سنتان", 63072000 }, + { @"سنتين", 63072000 }, + { @"عامين", 63072000 }, + { @"الشهر", 2592000 }, + { @"شهر", 2592000 }, + { @"أشهر", 2592000 }, + { @"شهور", 2592000 }, + { @"شهرا", 2592000 }, + { @"شهرين", 5184000 }, + { @"شهران", 5184000 }, + { @"نهاية الأسبوع", 172800 }, + { @"أسبوع", 604800 }, + { @"أسابيع", 604800 }, + { @"أسبوعا", 604800 }, + { @"أسبوعان", 1209600 }, + { @"أسبوعين", 1209600 }, + { @"يوم", 86400 }, + { @"أيام", 86400 }, + { @"يوما", 86400 }, + { @"يومان", 172800 }, + { @"يومين", 172800 }, + { @"ليلة", 86400 }, + { @"ليال", 86400 }, + { @"ساعة", 3600 }, + { @"ساعات", 3600 }, + { @"ساعتان", 7200 }, + { @"ساعتين", 7200 }, + { @"دقيقة", 60 }, + { @"دقائق", 60 }, + { @"دقيقتان", 120 }, + { @"دقيقتين", 120 }, + { @"ثانية", 1 }, + { @"ثوان", 1 }, + { @"ثانيتان", 2 }, + { @"ثانيتين", 2 } + }; + public static readonly Dictionary SpecialYearPrefixesMap = new Dictionary + { + { @"fiscal", @"FY" }, + { @"school", @"SY" }, + { @"fy", @"FY" }, + { @"sy", @"SY" } + }; + public static readonly Dictionary SeasonMap = new Dictionary + { + { @"الربيع", @"SP" }, + { @"ربيع", @"SP" }, + { @"الصيف", @"SU" }, + { @"صيف", @"SU" }, + { @"الخريف", @"FA" }, + { @"خريف", @"FA" }, + { @"الشتاء", @"WI" }, + { @"شتاء", @"WI" } + }; + public static readonly Dictionary SeasonValueMap = new Dictionary + { + { @"SP", 3 }, + { @"SU", 6 }, + { @"FA", 9 }, + { @"WI", 12 } + }; + public static readonly Dictionary CardinalMap = new Dictionary + { + { @"الأول", 1 }, + { @"الثاني", 2 }, + { @"الثالث", 3 }, + { @"الرابع", 4 }, + { @"الخامس", 5 } + }; + public static readonly Dictionary DayOfWeek = new Dictionary + { + { @"الاثنين", 1 }, + { @"الإثنين", 1 }, + { @"monday", 1 }, + { @"الثلاثاء", 2 }, + { @"tuesday", 2 }, + { @"الأربعاء", 3 }, + { @"wednesday", 3 }, + { @"الخميس", 4 }, + { @"thursday", 4 }, + { @"الجمعة", 5 }, + { @"friday", 5 }, + { @"السبت", 6 }, + { @"saturday", 6 }, + { @"الأحد", 0 }, + { @"sunday", 0 }, + { @"إثنين", 1 }, + { @"ثلاثاء", 2 }, + { @"أربعاء", 3 }, + { @"خميس", 4 }, + { @"جمعة", 5 }, + { @"سبت", 6 }, + { @"أحد", 0 } + }; + public static readonly Dictionary MonthOfYear = new Dictionary + { + { @"يناير", 1 }, + { @"فبراير", 2 }, + { @"مارس", 3 }, + { @"أبريل", 4 }, + { @"مايو", 5 }, + { @"يونيو", 6 }, + { @"يوليو", 7 }, + { @"أغسطس", 8 }, + { @"سبتمبر", 9 }, + { @"أكتوبر", 10 }, + { @"نوفمبر", 11 }, + { @"ديسمبر", 12 }, + { @"محرم", 1 }, + { @"صفر", 2 }, + { @"ربيع الأول", 3 }, + { @"ربيع الثاني", 4 }, + { @"جمادى الأول", 5 }, + { @"جمادى الثاني", 6 }, + { @"رجب", 7 }, + { @"شعبان", 8 }, + { @"رمضان", 9 }, + { @"شوال", 10 }, + { @"ذو القعدة", 11 }, + { @"ذو الحجة", 12 }, + { @"كانون الثاني", 1 }, + { @"شباط", 2 }, + { @"آذار", 3 }, + { @"نيسان", 4 }, + { @"أيار", 5 }, + { @"حزيران", 6 }, + { @"تموز", 7 }, + { @"آب", 8 }, + { @"أيلول", 9 }, + { @"تشرين الأول", 10 }, + { @"تشرين الثاني", 11 }, + { @"كانون الأول", 12 }, + { @"1", 1 }, + { @"2", 2 }, + { @"3", 3 }, + { @"4", 4 }, + { @"5", 5 }, + { @"6", 6 }, + { @"7", 7 }, + { @"8", 8 }, + { @"9", 9 }, + { @"10", 10 }, + { @"11", 11 }, + { @"12", 12 }, + { @"01", 1 }, + { @"02", 2 }, + { @"03", 3 }, + { @"04", 4 }, + { @"05", 5 }, + { @"06", 6 }, + { @"07", 7 }, + { @"08", 8 }, + { @"09", 9 } + }; + public static readonly Dictionary Numbers = new Dictionary + { + { @"صفر", 0 }, + { @"واحد", 1 }, + { @"الواحدة", 1 }, + { @"اثنان", 2 }, + { @"الثانية", 2 }, + { @"ثلاثة", 3 }, + { @"ثلاث", 3 }, + { @"الثالثة", 3 }, + { @"أربعة", 4 }, + { @"الرابعة", 4 }, + { @"خمسة", 5 }, + { @"الخامسة", 5 }, + { @"ستة", 6 }, + { @"السادسة", 6 }, + { @"سبعة", 7 }, + { @"السابعة", 7 }, + { @"ثمانية", 8 }, + { @"الثامنة", 8 }, + { @"تسعة", 9 }, + { @"التاسعة", 9 }, + { @"عشرة", 10 }, + { @"العاشرة", 10 }, + { @"أحد عشر", 11 }, + { @"الحادية عشر", 11 }, + { @"اثنا عشر", 12 }, + { @"ثلاثة عشر", 13 }, + { @"أربعة عشر", 14 }, + { @"خمسة عشر", 15 }, + { @"ستة عشر", 16 }, + { @"سبعة عشر", 17 }, + { @"ثمانية عشر", 18 }, + { @"تسعة عشر", 19 }, + { @"عشرون", 20 }, + { @"عشرين", 20 }, + { @"واحد وعشرون", 21 }, + { @"اثنان وعشرون", 22 }, + { @"ثلاثة وعشرون", 23 }, + { @"أربعة وعشرون", 24 }, + { @"خمسة وعشرون", 25 }, + { @"ستة وعشرون", 26 }, + { @"سبعة وعشرون", 27 }, + { @"ثمانية وعشرون", 28 }, + { @"تسعة وعشرون", 29 }, + { @"الثلاثين", 30 }, + { @"ثلاثين", 30 }, + { @"واحد وثلاثون", 31 }, + { @"اثنان وثلاثون", 32 }, + { @"ثلاثة وثلاثون", 33 }, + { @"أربعة وثلاثون", 34 }, + { @"خمسة وثلاثون", 35 }, + { @"ستة وثلاثون", 36 }, + { @"سبعة وثلاثون", 37 }, + { @"ثمانية وثلاثون", 38 }, + { @"تسعة وثلاثون", 39 }, + { @"أربعون", 40 }, + { @"واحد وأربعون", 41 }, + { @"اثنان وأربعون", 42 }, + { @"ثلاثة وأربعون", 43 }, + { @"أربعة وأربعون", 44 }, + { @"خمسة وأربعون", 45 }, + { @"ستة وأربعون", 46 }, + { @"سبعة وأربعون", 47 }, + { @"ثمانية وأربعون", 48 }, + { @"تسعة وأربعون", 49 }, + { @"خمسون", 50 }, + { @"واحد وخمسون", 51 }, + { @"اثنان وخمسون", 52 }, + { @"ثلاثة وخمسون", 53 }, + { @"أربعة وخمسون", 54 }, + { @"خمسة وخمسون", 55 }, + { @"ستة وخمسون", 56 }, + { @"سبعة وخمسون", 57 }, + { @"ثمانية وخمسون", 58 }, + { @"تسعة وخمسون", 59 }, + { @"ستين", 60 }, + { @"واحد وستون", 61 }, + { @"اثنان وستون", 62 }, + { @"ثلاثة وستون", 63 }, + { @"أربعة وستون", 64 }, + { @"خمسة وستون", 65 }, + { @"ستة وستون", 66 }, + { @"سبعة وستون", 67 }, + { @"ثمانية وستون", 68 }, + { @"تسعة وستون", 69 }, + { @"السبعون", 70 }, + { @"واحد وسبعون", 71 }, + { @"اثنان وسبعون", 72 }, + { @"ثلاثة وسبعون", 73 }, + { @"أربعة وسبعون", 74 }, + { @"خمسة وسبعون", 75 }, + { @"ستة وسبعون", 76 }, + { @"سبعة وسبعون", 77 }, + { @"ثمانية وسبعون", 78 }, + { @"تسعة وسبعون", 79 }, + { @"ثمانون", 80 }, + { @"واحد وثمانون", 81 }, + { @"اثنان وثمانون", 82 }, + { @"ثلاثة وثمانون", 83 }, + { @"أربعة وثمانون", 84 }, + { @"خمسة وثمانون", 85 }, + { @"ستة وثمانون", 86 }, + { @"سبعة وثمانون", 87 }, + { @"ثمانية وثمانين", 88 }, + { @"تسعة وثمانون", 89 }, + { @"تسعون", 90 }, + { @"واحد وتسعون", 91 }, + { @"اثنان وتسعون", 92 }, + { @"ثلاثة وتسعون", 93 }, + { @"أربعة وتسعون", 94 }, + { @"خمسة وتسعون", 95 }, + { @"ستة وتسعون", 96 }, + { @"سبعة وتسعون", 97 }, + { @"ثمانية وتسعون", 98 }, + { @"تسعة وتسعون", 99 }, + { @"مائة", 100 } + }; + public static readonly Dictionary DayOfMonth = new Dictionary + { + { @"1st", 1 }, + { @"1th", 1 }, + { @"2nd", 2 }, + { @"2th", 2 }, + { @"3rd", 3 }, + { @"3th", 3 }, + { @"4th", 4 }, + { @"5th", 5 }, + { @"6th", 6 }, + { @"7th", 7 }, + { @"8th", 8 }, + { @"9th", 9 }, + { @"10th", 10 }, + { @"11th", 11 }, + { @"11st", 11 }, + { @"12th", 12 }, + { @"12nd", 12 }, + { @"13th", 13 }, + { @"13rd", 13 }, + { @"14th", 14 }, + { @"15th", 15 }, + { @"16th", 16 }, + { @"17th", 17 }, + { @"18th", 18 }, + { @"19th", 19 }, + { @"20th", 20 }, + { @"21st", 21 }, + { @"21th", 21 }, + { @"22nd", 22 }, + { @"22th", 22 }, + { @"23rd", 23 }, + { @"23th", 23 }, + { @"24th", 24 }, + { @"25th", 25 }, + { @"26th", 26 }, + { @"27th", 27 }, + { @"28th", 28 }, + { @"29th", 29 }, + { @"30th", 30 }, + { @"31st", 31 }, + { @"01st", 1 }, + { @"01th", 1 }, + { @"02nd", 2 }, + { @"02th", 2 }, + { @"03rd", 3 }, + { @"03th", 3 }, + { @"04th", 4 }, + { @"05th", 5 }, + { @"06th", 6 }, + { @"07th", 7 }, + { @"08th", 8 }, + { @"09th", 9 } + }; + public static readonly Dictionary DoubleNumbers = new Dictionary + { + { @"half", 0.5 }, + { @"quarter", 0.25 } + }; + public static readonly Dictionary> HolidayNames = new Dictionary> + { + { @"easterday", new string[] { @"easterday", @"easter", @"eastersunday" } }, + { @"ashwednesday", new string[] { @"ashwednesday" } }, + { @"palmsunday", new string[] { @"palmsunday" } }, + { @"maundythursday", new string[] { @"maundythursday" } }, + { @"goodfriday", new string[] { @"goodfriday" } }, + { @"eastersaturday", new string[] { @"eastersaturday" } }, + { @"eastermonday", new string[] { @"eastermonday" } }, + { @"ascensionday", new string[] { @"ascensionday" } }, + { @"whitesunday", new string[] { @"whitesunday", @"pentecost", @"pentecostday" } }, + { @"whitemonday", new string[] { @"whitemonday" } }, + { @"trinitysunday", new string[] { @"trinitysunday" } }, + { @"corpuschristi", new string[] { @"corpuschristi" } }, + { @"earthday", new string[] { @"earthday" } }, + { @"fathers", new string[] { @"fatherday", @"fathersday" } }, + { @"mothers", new string[] { @"motherday", @"mothersday" } }, + { @"thanksgiving", new string[] { @"thanksgivingday", @"thanksgiving" } }, + { @"blackfriday", new string[] { @"blackfriday" } }, + { @"cybermonday", new string[] { @"cybermonday" } }, + { @"martinlutherking", new string[] { @"mlkday", @"martinlutherkingday", @"martinlutherkingjrday" } }, + { @"washingtonsbirthday", new string[] { @"washingtonsbirthday", @"washingtonbirthday", @"presidentsday" } }, + { @"canberra", new string[] { @"canberraday" } }, + { @"labour", new string[] { @"labourday", @"laborday" } }, + { @"columbus", new string[] { @"columbusday" } }, + { @"memorial", new string[] { @"memorialday" } }, + { @"yuandan", new string[] { @"yuandan" } }, + { @"maosbirthday", new string[] { @"maosbirthday" } }, + { @"teachersday", new string[] { @"teachersday", @"teacherday" } }, + { @"singleday", new string[] { @"singleday" } }, + { @"allsaintsday", new string[] { @"allsaintsday" } }, + { @"youthday", new string[] { @"youthday" } }, + { @"childrenday", new string[] { @"childrenday", @"childday" } }, + { @"femaleday", new string[] { @"femaleday" } }, + { @"treeplantingday", new string[] { @"treeplantingday" } }, + { @"arborday", new string[] { @"arborday" } }, + { @"girlsday", new string[] { @"girlsday" } }, + { @"whiteloverday", new string[] { @"whiteloverday" } }, + { @"loverday", new string[] { @"loverday", @"loversday" } }, + { @"christmas", new string[] { @"christmasday", @"christmas" } }, + { @"xmas", new string[] { @"xmasday", @"xmas" } }, + { @"newyear", new string[] { @"newyear" } }, + { @"newyearday", new string[] { @"newyearday" } }, + { @"newyearsday", new string[] { @"newyearsday" } }, + { @"inaugurationday", new string[] { @"inaugurationday" } }, + { @"groundhougday", new string[] { @"groundhougday" } }, + { @"valentinesday", new string[] { @"valentinesday" } }, + { @"stpatrickday", new string[] { @"stpatrickday", @"stpatricksday", @"stpatrick" } }, + { @"aprilfools", new string[] { @"aprilfools" } }, + { @"stgeorgeday", new string[] { @"stgeorgeday" } }, + { @"mayday", new string[] { @"mayday", @"intlworkersday", @"internationalworkersday", @"workersday" } }, + { @"cincodemayoday", new string[] { @"cincodemayoday" } }, + { @"baptisteday", new string[] { @"baptisteday" } }, + { @"usindependenceday", new string[] { @"usindependenceday" } }, + { @"independenceday", new string[] { @"independenceday" } }, + { @"bastilleday", new string[] { @"bastilleday" } }, + { @"halloweenday", new string[] { @"halloweenday", @"halloween" } }, + { @"allhallowday", new string[] { @"allhallowday" } }, + { @"allsoulsday", new string[] { @"allsoulsday" } }, + { @"guyfawkesday", new string[] { @"guyfawkesday" } }, + { @"veteransday", new string[] { @"veteransday" } }, + { @"christmaseve", new string[] { @"christmaseve" } }, + { @"newyeareve", new string[] { @"newyearseve", @"newyeareve" } }, + { @"juneteenth", new string[] { @"juneteenth", @"freedomday", @"jubileeday" } } + }; + public static readonly Dictionary WrittenDecades = new Dictionary + { + { @"hundreds", 0 }, + { @"tens", 10 }, + { @"twenties", 20 }, + { @"thirties", 30 }, + { @"forties", 40 }, + { @"fifties", 50 }, + { @"sixties", 60 }, + { @"seventies", 70 }, + { @"eighties", 80 }, + { @"nineties", 90 } + }; + public static readonly Dictionary SpecialDecadeCases = new Dictionary + { + { @"noughties", 2000 }, + { @"aughts", 2000 }, + { @"two thousands", 2000 } + }; + public const string DefaultLanguageFallback = @"MDY"; + public static readonly IList SuperfluousWordList = new List + { + @"preferably", + @"how about", + @"maybe", + @"perhaps", + @"say", + @"like" + }; + public static readonly string[] DurationDateRestrictions = { @"today", @"now" }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"^(morning|afternoon|evening|night|day)\b", @"\b(good\s+(morning|afternoon|evening|night|day))|(nighty\s+night)\b" }, + { @"\bnow\b", @"\b(^now,)|\b((is|are)\s+now\s+for|for\s+now)\b" }, + { @"\bmay\b", @"\b((((!|\.|\?|,|;|)\s+|^)may i)|(i|you|he|she|we|they)\s+may|(may\s+((((also|not|(also not)|well)\s+)?(be|ask|contain|constitute|e-?mail|take|have|result|involve|get|work|reply|differ))|(or may not))))\b" }, + { @"\b(a|one) second\b", @"\b(? MorningTermList = new List + { + @"morning" + }; + public static readonly IList AfternoonTermList = new List + { + @"afternoon" + }; + public static readonly IList EveningTermList = new List + { + @"evening" + }; + public static readonly IList MealtimeBreakfastTermList = new List + { + @"breakfast" + }; + public static readonly IList MealtimeBrunchTermList = new List + { + @"brunch" + }; + public static readonly IList MealtimeLunchTermList = new List + { + @"lunch", + @"lunchtime" + }; + public static readonly IList MealtimeDinnerTermList = new List + { + @"dinner", + @"dinnertime", + @"supper" + }; + public static readonly IList DaytimeTermList = new List + { + @"daytime" + }; + public static readonly IList NightTermList = new List + { + @"night" + }; + public static readonly IList SameDayTerms = new List + { + @"today", + @"otd" + }; + public static readonly IList PlusOneDayTerms = new List + { + @"tomorrow", + @"tmr", + @"day after" + }; + public static readonly IList MinusOneDayTerms = new List + { + @"yesterday", + @"day before" + }; + public static readonly IList PlusTwoDayTerms = new List + { + @"day after tomorrow", + @"day after tmr" + }; + public static readonly IList MinusTwoDayTerms = new List + { + @"day before yesterday" + }; + public static readonly IList FutureTerms = new List + { + @"this", + @"next" + }; + public static readonly IList LastCardinalTerms = new List + { + @"الأخير" + }; + public static readonly IList MonthTerms = new List + { + @"month" + }; + public static readonly IList MonthToDateTerms = new List + { + @"month to date" + }; + public static readonly IList WeekendTerms = new List + { + @"weekend" + }; + public static readonly IList WeekTerms = new List + { + @"week" + }; + public static readonly IList YearTerms = new List + { + @"year" + }; + public static readonly IList GenericYearTerms = new List + { + @"y" + }; + public static readonly IList YearToDateTerms = new List + { + @"year to date" + }; + public const string DoubleMultiplierRegex = @"^(bi)(-|\s)?"; + public const string HalfMultiplierRegex = @"^(semi)(-|\s)?"; + public const string DayTypeRegex = @"((week)?da(il)?ys?)$"; + public const string WeekTypeRegex = @"(week(s|ly)?)$"; + public const string WeekendTypeRegex = @"(weekends?)$"; + public const string MonthTypeRegex = @"(month(s|ly)?)$"; + public const string QuarterTypeRegex = @"(quarter(s|ly)?)$"; + public const string YearTypeRegex = @"((years?|annual)(ly)?)$"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/DateTimeDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/DateTimeDefinitions.tt new file mode 100644 index 0000000000..1c742b50ba --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/DateTimeDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Arabic\Arabic-DateTime.yaml"; + this.Language = "Arabic"; + this.ClassName = "DateTimeDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/NumbersDefinitions.cs new file mode 100644 index 0000000000..292db1a45b --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/NumbersDefinitions.cs @@ -0,0 +1,503 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Arabic\Arabic-Numbers.yaml +// - Language: Arabic +// - ClassName: NumbersDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Arabic +{ + using System; + using System.Collections.Generic; + + public static class NumbersDefinitions + { + public const string LangMarker = @"Ara"; + public const bool CompoundNumberLanguage = false; + public const bool MultiDecimalSeparatorCulture = true; + public const string RoundNumberIntegerRegex = @"(?:مائتان|مائة|مائة|مائتين|ثلاثمائه|أربعة مئة|خمسمائة|ستمائة|سبعمائة|ثمان مائة|تسعمائة|تريليون|ترليون|آلاف|تريليونين|تريليونات|مليار|ملياري|مليارات|مليون|مليونان|ملايين|ملايين|ألف|مليونين|ألفين|مئة|الف|ومائتين|الفين|بألفين|مئتان|الآف)"; + public const string ZeroToNineIntegerRegex = @"(وخمسة|و خمسة|بإثنان|وواحد|و واحد|واحد|وأربعة|و أربعة|واثنان|اثنان|إثنان|وثلاثة|و ثلاثة|ثلاثة|واربعة|أربع|أربعة|خمسة|وستة|و ستة|ستة|وسبعة|و سبعة|سبعة|وثمانية|و ثمانية|ثمانية|ثمانٍ|وتسعة|و تسعة|تسع|أحد|اثني|إثني|ثلاث|صفر|سبع|ست|اربع|أربع|السادس|الثامنة|تسعة|اثنين|واحدُ|وإثنين|وواحدُ|الواحد:?)"; + public const string TwoToNineIntegerRegex = @"(?:ثلاث|ثلاثة|سبعة|ثمان|ثمانية|أربع|أربعة|خمسة|تسعة|اثنان|اثنتان|اثنين|اثتنين|اثنتان|إثنان|إثنتان|إثنين|إثتنين|إثنتان|ست|ستة)"; + public const string NegativeNumberTermsRegex = @"(?(سالب|ناقص)(\s+)?)"; + public static readonly string NegativeNumberSignRegex = $@"^{NegativeNumberTermsRegex}.*"; + public const string AnIntRegex = @"(واحد|أحد)(?=\s)"; + public const string TenToNineteenIntegerRegex = @"(?:((ثلاث|ثلاثة|سبعة|ثمان|ثمانية|أربع|أربعة|خمسة|تسعة|اثنان|اثنان|اثنين|اثتنين|اثنتان|إثنان|إثنتان|إثنين|إثتنين|إثنتان|ستة|أحد|أربعة|إثني|اثني)\s(عشر|عشرة)))"; + public const string TensNumberIntegerRegex = @"(عشرة|عشرون|ثلاثون|أربعون|خمسون|ستون|سبعون|ثمانون|تسعين|وعشرين|و عشرين|وثلاثين|و ثلاثين|وأربعين|و أربعين|وخمسين|و خمسين|وستين|وستين|وسبعين|و سبعين|وثمانين|و ثمانين|وتسعين|وتسعين|وعشرون|ثلاثون|وأربعون|و أربعون|وخمسون|و خمسون|وستون|و ستون|وسبعون|و سبعون|وثمانون|و ثمانون|وتسعون|و تسعون|عشرين|ثلاثين|أربعين|خمسين|ستين|سبعين|ثمانين|تسعون|العشرون:?)"; + public static readonly string SeparaIntRegex = $@"(?:((({RoundNumberIntegerRegex}\s{RoundNumberIntegerRegex})|{TenToNineteenIntegerRegex}|({ZeroToNineIntegerRegex}(((و)?)\s+(و)?|\s*-\s*){TensNumberIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{RoundNumberIntegerRegex})(\s+{RoundNumberIntegerRegex})*))|(((\s+{RoundNumberIntegerRegex})+))"; + public static readonly string AllIntRegex = $@"(?:({SeparaIntRegex})((\s+(و)?)({SeparaIntRegex})(\s+{RoundNumberIntegerRegex})?)*|((({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}(\s+(و)?|\s*-\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex})?(\s+{RoundNumberIntegerRegex})+)\s+(و)?)*{SeparaIntRegex})"; + public const string PlaceHolderPureNumber = @"\b"; + public const string PlaceHolderDefault = @"\D|\b"; + public static readonly Func NumbersWithPlaceHolder = (placeholder) => $@"(((?(الواحد\s)?((السابق|السابقة|الثانية الى|((الذي)\s*(قبل|قبلا)\s*)?(الأخير)|قبل|بعد|سبق|سبقت|التالي|الحالي|الذي|اخر)(\s))?((تالي|الحالي|السابقة|سابق|قادم|التالي|((الذي)\s*(قبل|قبلا)\s*)?(الأخير)|آخر|أخير|حالي|اخر|الاخير|الأولى)(ة)?)|(الاخر|الاول|الأول|اول|الأولى|((الذي)\s*(قبل|قبلا)\s*)?(الأخير)|السابق|التالي|أخر))"; + public static readonly string BasicOrdinalRegex = $@"({NumberOrdinalRegex}|{RelativeOrdinalRegex})"; + public static readonly string SuffixBasicOrdinalRegex = $@"(?:(((({TensNumberIntegerRegex}(\s+(و)?|\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|({RoundNumberIntegerRegex}|المئة(\s+(و)?)))((\s+{RoundNumberIntegerRegex}|المئة)+|({BasicOrdinalRegex})))\s+(و)?)*({TensNumberIntegerRegex}(\s+|\s*))?{BasicOrdinalRegex}|({TensNumberIntegerRegex}))"; + public static readonly string SuffixRoundNumberOrdinalRegex = $@"(?:({AllIntRegex}\s+){RoundNumberOrdinalRegex})"; + public static readonly string AllOrdinalRegex = $@"(?:{SuffixBasicOrdinalRegex}|{SuffixRoundNumberOrdinalRegex})"; + public const string OrdinalNumericRegex = @"(?<=\b)(?:\d{1,3}(\s*,\s*\d{3})*\s*th)(?=\b)"; + public static readonly string OrdinalRoundNumberRegex = $@"({RoundNumberOrdinalRegex})"; + public static readonly string OrdinalEnglishRegex = $@"(?<=\b){AllOrdinalRegex}(?=\b)"; + public const string FractionNotationWithSpacesRegex = @"(((?<={?[\u0600-\u06ff]}|^)-\s*)|(?<=\b))\d+\s+\d+[/]\d+(?=(\b[^/]|$))"; + public const string FractionNotationWithSpacesRegex2 = @"(((?<={?[\u0600-\u06ff]}|^)-\s*)|(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?![\.,]))(?=\b)"; + public static readonly string FractionPrepositionWithinPercentModeRegex = $@"(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?![\.,]))(?=\b)"; + public static readonly string FractionWithOrdinalPrefix = $@"({AllOrdinalRegex})(?=\s*({FractionOrdinalPrefix}))"; + public static readonly string FractionWithPartOfPrefix = $@"((جزء من)\s+)({AllIntRegexWithLocks})"; + public static readonly string AllPointRegex = $@"((\s+{ZeroToNineIntegerRegex})+|(\s+{SeparaIntRegex}))"; + public static readonly string AllFloatRegex = $@"{AllIntRegex}(\s+(نقطة|جزء|جزء من)){AllPointRegex}"; + public static readonly string DoubleWithMultiplierRegex = $@"(((? DoubleDecimalPointRegex = (placeholder) => $@"(((? DoubleWithoutIntegralRegex = (placeholder) => $@"(?<=\s|^)(? DoubleWithThousandMarkRegex = (placeholder) => $@"(((?و)"; + public static readonly string NumberWithSuffixPercentage = $@"((?)"; + public const string LessRegex = @"(?:(أقل|اقل|اصغر|أصغر|أخفض|ادنى)(\s*من)?|تحت|(?|=)<)"; + public const string EqualRegex = @"(يساوي|(?)=)"; + public static readonly string MoreOrEqualPrefix = $@"(((ليس|لا)\s+{LessRegex})|(على\s+الأقل))"; + public static readonly string MoreOrEqual = $@"(?:(({MoreRegex}(\s+من)?)\s+(أو|او)?\s+{EqualRegex})|(({MoreOrEqualPrefix}|(تفوق))(\s+(أو|او)?\s+{EqualRegex})?)|(({EqualRegex}\s+(أو|او)?\s+)?({MoreOrEqualPrefix}|تفوق))|>\s*=)"; + public const string MoreOrEqualSuffix = @"((أو|او)\s+(((أكبر|أعظم|أطول|فوق|اكثر|اكثر|اكبر|أكثر)((?!\s+من)|(\s+من(?!(\s*\d+)))))|((فوق|أكبر|أطول|اكثر)(?!\s+من))))"; + public static readonly string LessOrEqualPrefix = $@"((ليس\s+{MoreRegex})|(at\s+most)|(بحد أقصى)|(يصل الى))"; + public static readonly string LessOrEqual = $@"(((لا\s*)?{LessRegex}\s+(أو|او)?\s+{EqualRegex})|({EqualRegex}\s+(أو|او)?\s+(((أقل|اقل|أدنى|اصغر|أصغر|ادنى)(\s+من))|تحت|(?|=)<))|({LessOrEqualPrefix}(\s+(أو|او)?\s+{EqualRegex})?)|(({EqualRegex}\s+(أو|او)?\s+)?{LessOrEqualPrefix})|<\s*=)"; + public const string LessOrEqualSuffix = @"((أ|ا)?و\s+(أقل)((?!\s+من)|(\s+من(?!(\s*\d+)))))"; + public const string NumberSplitMark = @"(?![.،](?!\d+))"; + public const string MoreRegexNoNumberSucceed = @"((أكبر|أعظم|أطول|فوق|اكثر)((?!\s+من)|\s+(من(?!(\s*\d+))))|(فوق|أكبر|أعظم)(?!(\s*\d+)))"; + public const string LessRegexNoNumberSucceed = @"((أقل|أصغر)((?!\s+من)|\s+(من(?!(\s*\d+))))|(تحت|اقل|أقل|أصغر)(?!((\s*\d+)|\s*من)))"; + public const string EqualRegexNoNumberSucceed = @"((يساوي)(?!(\s*\d+)))"; + public static readonly string OneNumberRangeMoreRegex1 = $@"({MoreOrEqual})\s*(ال)?(?({NumberSplitMark}.)+)|({EqualRegex}\s*(أو|او)?\s+({MoreRegex}))(\s+(من))\s*(?({NumberSplitMark}.)+)|({EqualRegex}\s+(أو|او)?\s+({MoreRegex}))\s*(?({NumberSplitMark}.)+)|({MoreRegex})(\s+(من))\s*(?({NumberSplitMark}.)+)|({MoreRegex})\s*(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeMoreRegex3 = $@"(?({NumberSplitMark}.)+)\s*(و|أو)\s*({MoreRegex})"; + public static readonly string OneNumberRangeMoreRegex2 = $@"(?({NumberSplitMark}.)+)\s*{MoreOrEqualSuffix}"; + public static readonly string OneNumberRangeMoreSeparateRegex = $@"({MoreRegex}\s*(من)\s+(?({NumberSplitMark}.)+)\s+(أو|او)\s+{EqualRegexNoNumberSucceed})|({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+(أو|او)\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\s+(?({NumberSplitMark}.)+)(\s+(أو|او)\s+){EqualRegexNoNumberSucceed})"; + public static readonly string OneNumberRangeLessRegex1 = $@"(({LessOrEqual})\s*(ال)?(?({NumberSplitMark}.)+))|(لا\s*)?((((أقل|اقل|أدنى|اصغر|أصغر|ادنى)(\s+من))|تحت|(?|=)<))\s*(ال)?(?({NumberSplitMark}.)+)|(لا\s*)?(({LessRegex})\s*(ال)?(?({NumberSplitMark}.)+))"; + public static readonly string OneNumberRangeLessRegex2 = $@"(?({NumberSplitMark}.)+)\s*{LessOrEqualSuffix}"; + public static readonly string OneNumberRangeLessSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)\s*(أو|او)\s+{LessRegexNoNumberSucceed})|(((((أقل|اقل|أدنى|اصغر|أصغر|ادنى)(\s+من))|تحت|(?|=)<))\s+(?({NumberSplitMark}.)+)(\s+(أو|او)\s+){EqualRegexNoNumberSucceed})"; + public static readonly string OneNumberRangeEqualRegex = $@"{EqualRegex}\s*(ال)?(?({NumberSplitMark}.)+)"; + public static readonly string TwoNumberRangeRegex1 = $@"بين\s*(ال)?(?({NumberSplitMark}.)+)\s*و\s*(ال)?(?({NumberSplitMark}.)+)"; + public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\s*(،)?\s*((أ|ا)?و|لكن|,)\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"; + public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\s*(،)?\s*((أ|ا)?و|لكن|,)\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})"; + public static readonly string TwoNumberRangeRegex4 = $@"((من\s)(?({NumberSplitMark}(?!\bمن\b).)+)\s*{TillRegex}\s*(ال\s+)?(?({NumberSplitMark}.)+))|((من\s)?(?({NumberSplitMark}(?!\bمن\b).)+)\s*{TillRegex}\s*(ال\s+)?(?({NumberSplitMark}.)+))"; + public const string AmbiguousFractionConnectorsRegex = @"(\bمن|بين|من|بين\b)"; + public const char DecimalSeparatorChar = ','; + public const string FractionMarkerToken = @"أكثر"; + public const char NonDecimalSeparatorChar = '،'; + public const string HalfADozenText = @"ستة"; + public const string WordSeparatorToken = @"و"; + public static readonly string[] WrittenDecimalSeparatorTexts = { @"نقطة | فاصلة" }; + public static readonly string[] WrittenGroupSeparatorTexts = { @"punto" }; + public static readonly string[] WrittenIntegerSeparatorTexts = { @"و" }; + public static readonly string[] WrittenFractionSeparatorTexts = { @"و" }; + public const string HalfADozenRegex = @"نصف?\sدستة"; + public static readonly string DigitalNumberRegex = $@"((?<=\b)(مائة|مائتان|دست|دستات|ألف|ألفين|مائتين|ألفين|ثلاثمائة|أربعمائة|خمسمائة|ستمائة|سبعمائة|تسعمائة|ثمانمائة|مليون|مليار|ترليون)(?=\b))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; + public static readonly Dictionary CardinalNumberMap = new Dictionary + { + { @"واحد", 1 }, + { @"صفر", 0 }, + { @"اثنان", 2 }, + { @"اثنين", 2 }, + { @"ثلاث", 3 }, + { @"ثلاثة", 3 }, + { @"أربعة", 4 }, + { @"خمسة", 5 }, + { @"ستة", 6 }, + { @"سبعة", 7 }, + { @"ثمانية", 8 }, + { @"تسعة", 9 }, + { @"عشرة", 10 }, + { @"إحدى عشر", 11 }, + { @"اثنى عشر", 12 }, + { @"دستة", 12 }, + { @"دستات", 12 }, + { @"ثلاثة عشر", 13 }, + { @"أربعة عشر", 14 }, + { @"خمسة عشر", 15 }, + { @"ستة عشر", 16 }, + { @"سبعة عشر", 17 }, + { @"ثمانية عشر", 18 }, + { @"تسعة عشر", 19 }, + { @"عشرون", 20 }, + { @"وعشرون", 20 }, + { @"ثلاثون", 30 }, + { @"وثلاثون", 30 }, + { @"أربعون", 40 }, + { @"وأربعون", 40 }, + { @"خمسون", 50 }, + { @"وخمسون", 50 }, + { @"ستون", 60 }, + { @"وستون", 60 }, + { @"سبعون", 70 }, + { @"وسبعون", 70 }, + { @"ثمانون", 80 }, + { @"وثمانون", 80 }, + { @"تسعون", 90 }, + { @"وتسعون", 90 }, + { @"مائة", 100 }, + { @"ومائة", 100 }, + { @"مائتان", 200 }, + { @"ومائتان", 200 }, + { @"مائتين", 200 }, + { @"ومائتين", 200 }, + { @"ثلاثمائة", 300 }, + { @"وثلاثمائة", 300 }, + { @"أربعمائة", 400 }, + { @"وأربعمائة", 400 }, + { @"خمسمائة", 500 }, + { @"وخمسمائة", 500 }, + { @"ستمائة", 600 }, + { @"وستمائة", 600 }, + { @"سبعمائة", 700 }, + { @"وسبعمائة", 700 }, + { @"ثمانمائة", 800 }, + { @"وثمانمائة", 800 }, + { @"تسعمائة", 900 }, + { @"وتسعمائة", 900 }, + { @"ألف", 1000 }, + { @"آلاف", 1000 }, + { @"ألفين", 2000 }, + { @"ألفان", 2000 }, + { @"المليون", 1000000 }, + { @"مليون", 1000000 }, + { @"مليار", 1000000000 }, + { @"المليار", 1000000000 }, + { @"تريليون", 1000000000000 }, + { @"التريليون", 1000000000000 }, + { @"الواحد", 1 }, + { @"الصفر", 0 }, + { @"الاثنان", 2 }, + { @"الاثنين", 2 }, + { @"الثلاثة", 3 }, + { @"الأربعة", 4 }, + { @"الخمسة", 5 }, + { @"الستة", 6 }, + { @"السبعة", 7 }, + { @"الثمانية", 8 }, + { @"التسعة", 9 }, + { @"العشرة", 10 }, + { @"الإحدى عشر", 11 }, + { @"الاثنى عشر", 12 }, + { @"الدستة", 12 }, + { @"الدستات", 12 }, + { @"الثلاثة عشر", 13 }, + { @"الأربعة عشر", 14 }, + { @"الخمسة عشر", 15 }, + { @"الستة عشر", 16 }, + { @"السبعة عشر", 17 }, + { @"الثمانية عشر", 18 }, + { @"التسعة عشر", 19 }, + { @"العشرون", 20 }, + { @"الثلاثون", 30 }, + { @"الأربعون", 40 }, + { @"الخمسون", 50 }, + { @"الستون", 60 }, + { @"السبعون", 70 }, + { @"الثمانون", 80 }, + { @"التسعون", 90 }, + { @"المائة", 100 }, + { @"المائتين", 200 }, + { @"المائتان", 200 }, + { @"الثلاثمائة", 300 }, + { @"الأربعمائة", 400 }, + { @"الخمسمائة", 500 }, + { @"الستمائة", 600 }, + { @"السبعمائة", 700 }, + { @"الثمانمائة", 800 }, + { @"التسعمائة", 900 }, + { @"الألف", 1000 }, + { @"الآلاف", 1000 }, + { @"الألفين", 2000 } + }; + public static readonly Dictionary OrdinalNumberMap = new Dictionary + { + { @"أول", 1 }, + { @"أولى", 1 }, + { @"الأول", 1 }, + { @"الأولى", 1 }, + { @"ثاني", 2 }, + { @"ثانية", 2 }, + { @"الثاني", 2 }, + { @"الثانية", 2 }, + { @"ثان", 2 }, + { @"النصف", 2 }, + { @"نصف", 2 }, + { @"ثلث", 3 }, + { @"الثالث", 3 }, + { @"الثالثة", 3 }, + { @"ثالث", 3 }, + { @"ثالثة", 3 }, + { @"الربع", 4 }, + { @"ربع", 4 }, + { @"الرابع", 4 }, + { @"الرابعة", 4 }, + { @"رابع", 4 }, + { @"رابعة", 4 }, + { @"خمس", 5 }, + { @"الخامس", 5 }, + { @"الخامسة", 5 }, + { @"خامس", 5 }, + { @"خامسة", 5 }, + { @"سدس", 6 }, + { @"السادس", 6 }, + { @"السادسة", 6 }, + { @"سادس", 6 }, + { @"سادسة", 6 }, + { @"سبع", 7 }, + { @"السابع", 7 }, + { @"السابعة", 7 }, + { @"سابع", 7 }, + { @"سابعة", 7 }, + { @"ثمن", 8 }, + { @"الثامن", 8 }, + { @"الثامنة", 8 }, + { @"ثامن", 8 }, + { @"ثامنة", 8 }, + { @"تسع", 9 }, + { @"التاسع", 9 }, + { @"التاسعة", 10 }, + { @"تاسع", 9 }, + { @"تاسعة", 9 }, + { @"واحد من عشرة", 10 }, + { @"العاشر", 10 }, + { @"واحد من إحدى عشر", 11 }, + { @"الحادية عشرة", 11 }, + { @"الحادي عشر", 11 }, + { @"واحد من إثنى عشر", 12 }, + { @"الثانية عشرة", 12 }, + { @"الثاني عشر", 12 }, + { @"واحد من ثلاثة عشر", 13 }, + { @"الثالثة عشرة", 13 }, + { @"الثالث عشر", 13 }, + { @"واحد من أربعة عشر", 14 }, + { @"الرابعة عشرة", 14 }, + { @"الرابع عشر", 14 }, + { @"واحد من خمسة عشر", 15 }, + { @"الخامسة عشرة", 15 }, + { @"الخامس عشر", 15 }, + { @"واحد من ستة عشر", 16 }, + { @"السادسة عشرة", 16 }, + { @"السادس عشر", 16 }, + { @"واحد من سبعة عشر", 17 }, + { @"السابعة عشرة", 17 }, + { @"السابع عشر", 17 }, + { @"واحد من ثمانية عشر", 18 }, + { @"الثامنة عشرة", 18 }, + { @"الثامن عشر", 18 }, + { @"واحد من تسعة عشر", 19 }, + { @"التاسع عشر", 19 }, + { @"واحد من عشرين", 20 }, + { @"العشرون", 20 }, + { @"العشرين", 20 }, + { @"الحادي والعشرين", 21 }, + { @"الثاني والعشرين", 22 }, + { @"الثالث والعشرين", 23 }, + { @"الرابع والعشرين", 24 }, + { @"الخامس والعشرين", 25 }, + { @"السادس والعشرين", 26 }, + { @"السابع والعشرين", 27 }, + { @"الثامن والعشرين", 28 }, + { @"التاسع والعشرين", 29 }, + { @"واحد من ثلاثين", 30 }, + { @"الثلاثون", 30 }, + { @"الثلاثين", 30 }, + { @"الحادي والثلاثين", 31 }, + { @"واحد من أربعين", 40 }, + { @"الأربعون", 40 }, + { @"الأربعين", 40 }, + { @"واحد من خمسين", 50 }, + { @"الخمسون", 50 }, + { @"الخمسين", 50 }, + { @"واحد من ستين", 60 }, + { @"الستون", 60 }, + { @"الستين", 60 }, + { @"واحد من سبعين", 70 }, + { @"السبعون", 70 }, + { @"السبعين", 70 }, + { @"واحد من ثمانين", 80 }, + { @"الثمانون", 80 }, + { @"الثمانين", 80 }, + { @"واحد من تسعين", 90 }, + { @"التسعون", 90 }, + { @"التسعين", 90 }, + { @"واحد من مائة", 100 }, + { @"المائة", 100 }, + { @"المائتان", 200 }, + { @"المائتين", 200 }, + { @"الثلاثمائة", 300 }, + { @"الأربعمائة", 400 }, + { @"الخمسمائة", 500 }, + { @"الستمائة", 600 }, + { @"السبعمائة", 700 }, + { @"الثمانمائة", 800 }, + { @"التسعمائة", 100 }, + { @"الألف", 1000 }, + { @"واحد من ألف", 1000 }, + { @"واحد من مليون", 1000000 }, + { @"المليون", 1000000 }, + { @"واحد من مليار", 1000000000 }, + { @"المليار", 1000000000 }, + { @"واحد من تريليون", 1000000000000 }, + { @"التريليون", 1000000000000 }, + { @"أوائل", 1 }, + { @"أنصاف", 2 }, + { @"أثلاث", 3 }, + { @"أرباع", 4 }, + { @"أخماس", 5 }, + { @"أسداس", 6 }, + { @"أسباع", 7 }, + { @"أثمان", 8 }, + { @"أتساع", 9 }, + { @"أعشار", 10 }, + { @"عشرينات", 20 }, + { @"ثلاثينات", 30 }, + { @"أربعينات", 40 }, + { @"خمسينات", 50 }, + { @"ستينات", 60 }, + { @"سبعينات", 70 }, + { @"ثمانينات", 80 }, + { @"تسعينات", 90 }, + { @"مئات", 100 }, + { @"ألوف", 1000 }, + { @"ملايين", 1000000 }, + { @"مليارات", 1000000000 }, + { @"تريليون", 1000000000000 } + }; + public static readonly Dictionary RoundNumberMap = new Dictionary + { + { @"ترليون", 1000000000000 }, + { @"مائة", 100 }, + { @"ألف", 1000 }, + { @"مليون", 1000000 }, + { @"مليار", 1000000000 }, + { @"تريليون", 1000000000000 }, + { @"مائتين", 200 }, + { @"مائتان", 200 }, + { @"ثلاثمائة", 300 }, + { @"أربعمائة", 400 }, + { @"خمسمائة", 500 }, + { @"ستمائة", 600 }, + { @"سبعمائة", 700 }, + { @"ثمانمائة", 800 }, + { @"تسعمائة", 900 }, + { @"ألفين", 2000 }, + { @"دستة", 12 }, + { @"دستات", 12 }, + { @"المائة", 100 }, + { @"الألف", 1000 }, + { @"المليون", 1000000 }, + { @"المليار", 1000000000 }, + { @"التريليون", 1000000000000 }, + { @"المائتين", 200 }, + { @"المائتان", 200 }, + { @"الثلاثمائة", 300 }, + { @"الأربعمائة", 400 }, + { @"الخمسمائة", 500 }, + { @"الستمائة", 600 }, + { @"السبعمائة", 700 }, + { @"الثمانمائة", 800 }, + { @"التسعمائة", 900 }, + { @"الألفين", 2000 }, + { @"الدستة", 12 }, + { @"الدستات", 12 } + }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"\bواحد\b", @"\b(الذي|هذا|ذلك|ذاك|أي)\s+(واحد)\b" } + }; + public static readonly Dictionary RelativeReferenceOffsetMap = new Dictionary + { + { @"الاخر", @"0" }, + { @"آخر", @"0" }, + { @"اخر", @"0" }, + { @"الأخيرة", @"0" }, + { @"الأخير", @"0" }, + { @"سبقت الأخيرة", @"-1" }, + { @"سبقت الأخير", @"-1" }, + { @"قبل الأخير", @"-1" }, + { @"قبل الأخيرة", @"-1" }, + { @"القبل الأخير", @"-1" }, + { @"قبلا الأخي", @"-1" }, + { @"التالي", @"1" }, + { @"بعد التالي", @"2" }, + { @"قادم", @"1" }, + { @"قادمة", @"1" }, + { @"القادم", @"1" }, + { @"القادمة", @"1" }, + { @"السابقة", @"-1" }, + { @"الحالي", @"0" }, + { @"الحالية", @"0" }, + { @"قبل الاخير", @"-1" }, + { @"الواحد قبل الاخير", @"-1" }, + { @"الثانية الى الاخير", @"-1" }, + { @"الذي قبلا الأخير", @"-1" }, + { @"الذي قبل الأخير", @"-1" }, + { @"الذي قبلا الأخي", @"-1" }, + { @"السابق", @"-1" }, + { @"أخر", @"0" }, + { @"الاخير", @"0" }, + { @"اول", @"1" }, + { @"الاول", @"1" }, + { @"التالية", @"-1" } + }; + public static readonly Dictionary RelativeReferenceRelativeToMap = new Dictionary + { + { @"اول", @"current" }, + { @"التالية", @"current" }, + { @"الاول", @"current" }, + { @"الاخر", @"end" }, + { @"الاخير", @"end" }, + { @"أخر", @"end" }, + { @"آخر", @"end" }, + { @"اخر", @"end" }, + { @"الأخيرة", @"end" }, + { @"الأخير", @"end" }, + { @"سبقت الأخيرة", @"current" }, + { @"سبقت الأخير", @"current" }, + { @"قبل الأخير", @"end" }, + { @"قبل الأخيرة", @"current" }, + { @"القبل الأخير", @"current" }, + { @"الذي قبلا الأخي", @"end" }, + { @"التالي", @"current" }, + { @"بعد التالي", @"current" }, + { @"قادم", @"current" }, + { @"قادمة", @"current" }, + { @"القادم", @"current" }, + { @"القادمة", @"current" }, + { @"السابقة", @"current" }, + { @"الحالي", @"current" }, + { @"قبلا الأخي", @"current" }, + { @"الحالية", @"end" }, + { @"قبل الاخير", @"end" }, + { @"الواحد قبل الاخير", @"end" }, + { @"الذي قبل الأخير", @"end" }, + { @"الذي قبلا الأخير", @"end" }, + { @"الثانية الى الاخير", @"end" }, + { @"السابق", @"current" } + }; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/NumbersDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/NumbersDefinitions.tt new file mode 100644 index 0000000000..202dac6259 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/NumbersDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Arabic\Arabic-Numbers.yaml"; + this.Language = "Arabic"; + this.ClassName = "NumbersDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/TimeZoneDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/TimeZoneDefinitions.cs new file mode 100644 index 0000000000..2365a16379 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/TimeZoneDefinitions.cs @@ -0,0 +1,1490 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Arabic\Arabic-TimeZone.yaml +// - Language: Arabic +// - ClassName: TimeZoneDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Arabic +{ + using System; + using System.Collections.Generic; + + public static class TimeZoneDefinitions + { + public const string DirectUtcRegex = @"\b(utc|gmt)(\s*[+\-\u00B1]?\s*[\d]{1,2}h?(\s*:\s*[\d]{1,2})?)?\b"; + public static readonly IList AbbreviationsList = new List + { + @"ABST", + @"ACDT", + @"ACST", + @"ACT", + @"ADT", + @"AEDT", + @"AEST", + @"AET", + @"AFT", + @"AKDT", + @"AKST", + @"AMST", + @"AMT", + @"AOE", + @"AoE", + @"ARBST", + @"ARST", + @"ART", + @"AST", + @"AWDT", + @"AWST", + @"AZOST", + @"AZOT", + @"AZST", + @"AZT", + @"BIT", + @"BST", + @"BTT", + @"CADT", + @"CAST", + @"CBST", + @"CBT", + @"CCST", + @"CDT", + @"CDTM", + @"CEST", + @"CET", + @"COT", + @"CST", + @"CSTM", + @"CT", + @"CVT", + @"EAT", + @"ECT", + @"EDT", + @"EDTM", + @"EEST", + @"EET", + @"EGST", + @"ESAST", + @"ESAT", + @"EST", + @"ESTM", + @"ET", + @"FJST", + @"FJT", + @"GET", + @"GMT", + @"GNDT", + @"GNST", + @"GST", + @"GTBST", + @"HADT", + @"HAST", + @"HDT", + @"HKT", + @"HST", + @"IRDT", + @"IRKT", + @"IRST", + @"ISDT", + @"ISST", + @"IST", + @"JDT", + @"JST", + @"KRAT", + @"KST", + @"LINT", + @"MAGST", + @"MAGT", + @"MAT", + @"MDT", + @"MDTM", + @"MEST", + @"MOST", + @"MSK", + @"MSK+1", + @"MSK+2", + @"MSK+3", + @"MSK+4", + @"MSK+5", + @"MSK+6", + @"MSK+7", + @"MSK+8", + @"MSK+9", + @"MSK-1", + @"MST", + @"MSTM", + @"MUT", + @"MVST", + @"MYST", + @"NCAST", + @"NDT", + @"NMDT", + @"NMST", + @"NPT", + @"NST", + @"NZDT", + @"NZST", + @"NZT", + @"PDST", + @"PDT", + @"PDTM", + @"PETT", + @"PKT", + @"PSAST", + @"PSAT", + @"PST", + @"PSTM", + @"PT", + @"PYST", + @"PYT", + @"RST", + @"SAEST", + @"SAPST", + @"SAST", + @"SAWST", + @"SBT", + @"SGT", + @"SLT", + @"SMST", + @"SNST", + @"SST", + @"TADT", + @"TAST", + @"THA", + @"TIST", + @"TOST", + @"TOT", + @"TRT", + @"TST", + @"ULAT", + @"UTC", + @"VET", + @"VLAT", + @"WAST", + @"WAT", + @"WEST", + @"WET", + @"WPST", + @"YAKT", + @"YEKT" + }; + public static readonly IList FullNameList = new List + { + @"Acre Time", + @"Afghanistan Standard Time", + @"Alaskan Standard Time", + @"Anywhere on Earth", + @"Arab Standard Time", + @"Arabian Standard Time", + @"Arabic Standard Time", + @"Argentina Standard Time", + @"Atlantic Standard Time", + @"AUS Central Standard Time", + @"Australian Central Time", + @"AUS Eastern Standard Time", + @"Australian Eastern Time", + @"Australian Eastern Standard Time", + @"Australian Central Daylight Time", + @"Australian Eastern Daylight Time", + @"Azerbaijan Standard Time", + @"Azores Standard Time", + @"Bahia Standard Time", + @"Bangladesh Standard Time", + @"Belarus Standard Time", + @"Canada Central Standard Time", + @"Cape Verde Standard Time", + @"Caucasus Standard Time", + @"Cen. Australia Standard Time", + @"Central America Standard Time", + @"Central Asia Standard Time", + @"Central Brazilian Standard Time", + @"Central Daylight Time", + @"Europe Central Time", + @"European Central Time", + @"Central Europe Standard Time", + @"Central Europe Std Time", + @"Central European Std Time", + @"Central European Standard Time", + @"Central Pacific Standard Time", + @"Central Standard Time", + @"Central Standard Time (Mexico)", + @"China Standard Time", + @"Dateline Standard Time", + @"E. Africa Standard Time", + @"E. Australia Standard Time", + @"E. Europe Standard Time", + @"E. South America Standard Time", + @"Eastern Time", + @"Eastern Daylight Time", + @"Eastern Standard Time", + @"Eastern Standard Time (Mexico)", + @"Egypt Standard Time", + @"Ekaterinburg Standard Time", + @"Fiji Standard Time", + @"FLE Standard Time", + @"Georgian Standard Time", + @"GMT Standard Time", + @"Greenland Standard Time", + @"Greenwich Standard Time", + @"GTB Standard Time", + @"Hawaiian Standard Time", + @"India Standard Time", + @"Iran Standard Time", + @"Israel Standard Time", + @"Jordan Standard Time", + @"Kaliningrad Standard Time", + @"Kamchatka Standard Time", + @"Korea Standard Time", + @"Libya Standard Time", + @"Line Islands Standard Time", + @"Magadan Standard Time", + @"Mauritius Standard Time", + @"Mid-Atlantic Standard Time", + @"Middle East Standard Time", + @"Montevideo Standard Time", + @"Morocco Standard Time", + @"Mountain Standard Time", + @"Mountain Standard Time (Mexico)", + @"Myanmar Standard Time", + @"N. Central Asia Standard Time", + @"Namibia Standard Time", + @"Nepal Standard Time", + @"New Zealand Standard Time", + @"Newfoundland Standard Time", + @"North Asia East Standard Time", + @"North Asia Standard Time", + @"North Korea Standard Time", + @"Pacific SA Standard Time", + @"Pacific Standard Time", + @"Pacific Daylight Time", + @"Pacific Time", + @"Pacific Standard Time", + @"Pacific Standard Time (Mexico)", + @"Pakistan Standard Time", + @"Paraguay Standard Time", + @"Romance Standard Time", + @"Russia Time Zone 1", + @"Russia Time Zone 2", + @"Russia Time Zone 3", + @"Russia Time Zone 4", + @"Russia Time Zone 5", + @"Russia Time Zone 6", + @"Russia Time Zone 7", + @"Russia Time Zone 8", + @"Russia Time Zone 9", + @"Russia Time Zone 10", + @"Russia Time Zone 11", + @"Russian Standard Time", + @"SA Eastern Standard Time", + @"SA Pacific Standard Time", + @"SA Western Standard Time", + @"Samoa Standard Time", + @"SE Asia Standard Time", + @"Singapore Standard Time", + @"Singapore Time", + @"South Africa Standard Time", + @"Sri Lanka Standard Time", + @"Syria Standard Time", + @"Taipei Standard Time", + @"Tasmania Standard Time", + @"Tokyo Standard Time", + @"Tonga Standard Time", + @"Turkey Standard Time", + @"Ulaanbaatar Standard Time", + @"US Eastern Standard Time", + @"US Mountain Standard Time", + @"Mountain", + @"Venezuela Standard Time", + @"Vladivostok Standard Time", + @"W. Australia Standard Time", + @"W. Central Africa Standard Time", + @"W. Europe Standard Time", + @"West Asia Standard Time", + @"West Pacific Standard Time", + @"Yakutsk Standard Time", + @"Pacific Daylight Saving Time", + @"Austrialian Western Daylight Time", + @"Austrialian West Daylight Time", + @"Australian Western Daylight Time", + @"Australian West Daylight Time", + @"Colombia Time", + @"Hong Kong Time", + @"Central Europe Time", + @"Central European Time", + @"Central Europe Summer Time", + @"Central European Summer Time", + @"Central Europe Standard Time", + @"Central European Standard Time", + @"Central Europe Std Time", + @"Central European Std Time", + @"West Coast Time", + @"West Coast", + @"Central Time", + @"Central", + @"Pacific", + @"Eastern" + }; + public const string BaseTimeZoneSuffixRegex = @"((\s+|-)(friendly|compatible))?(\s+|-)time(zone)?"; + public static readonly string LocationTimeSuffixRegex = $@"({BaseTimeZoneSuffixRegex})\b"; + public static readonly string TimeZoneEndRegex = $@"({BaseTimeZoneSuffixRegex})$"; + public static readonly IList AmbiguousTimezoneList = new List + { + @"bit", + @"get", + @"art", + @"cast", + @"eat", + @"lint", + @"mat", + @"most", + @"west", + @"vet", + @"wet", + @"cot", + @"pt", + @"et", + @"eastern", + @"pacific", + @"central", + @"mountain", + @"west coast" + }; + public static readonly Dictionary AbbrToMinMapping = new Dictionary + { + { @"abst", 180 }, + { @"acdt", 630 }, + { @"acst", 570 }, + { @"act", -10000 }, + { @"adt", -10000 }, + { @"aedt", 660 }, + { @"aest", 600 }, + { @"aet", 600 }, + { @"aft", 270 }, + { @"akdt", -480 }, + { @"akst", -540 }, + { @"amst", -10000 }, + { @"amt", -10000 }, + { @"aoe", -720 }, + { @"arbst", 180 }, + { @"arst", 180 }, + { @"art", -180 }, + { @"ast", -10000 }, + { @"awdt", 540 }, + { @"awst", 480 }, + { @"azost", 0 }, + { @"azot", -60 }, + { @"azst", 300 }, + { @"azt", 240 }, + { @"bit", -720 }, + { @"bst", -10000 }, + { @"btt", 360 }, + { @"cadt", -360 }, + { @"cast", 480 }, + { @"cbst", -240 }, + { @"cbt", -240 }, + { @"ccst", -360 }, + { @"cdt", -10000 }, + { @"cdtm", -360 }, + { @"cest", 120 }, + { @"cet", 60 }, + { @"cot", -300 }, + { @"cst", -10000 }, + { @"cstm", -360 }, + { @"ct", -360 }, + { @"cvt", -60 }, + { @"eat", 180 }, + { @"ect", -10000 }, + { @"edt", -240 }, + { @"edtm", -300 }, + { @"eest", 180 }, + { @"eet", 120 }, + { @"egst", 0 }, + { @"esast", -180 }, + { @"esat", -180 }, + { @"est", -300 }, + { @"estm", -300 }, + { @"et", -300 }, + { @"fjst", 780 }, + { @"fjt", 720 }, + { @"get", 240 }, + { @"gmt", 0 }, + { @"gndt", -180 }, + { @"gnst", -180 }, + { @"gst", -10000 }, + { @"gtbst", 120 }, + { @"hadt", -540 }, + { @"hast", -600 }, + { @"hdt", -540 }, + { @"hkt", 480 }, + { @"hst", -600 }, + { @"irdt", 270 }, + { @"irkt", 480 }, + { @"irst", 210 }, + { @"isdt", 120 }, + { @"isst", 120 }, + { @"ist", -10000 }, + { @"jdt", 120 }, + { @"jst", 540 }, + { @"krat", 420 }, + { @"kst", -10000 }, + { @"lint", 840 }, + { @"magst", 720 }, + { @"magt", 660 }, + { @"mat", -120 }, + { @"mdt", -360 }, + { @"mdtm", -420 }, + { @"mest", 120 }, + { @"most", 0 }, + { @"msk+1", 240 }, + { @"msk+2", 300 }, + { @"msk+3", 360 }, + { @"msk+4", 420 }, + { @"msk+5", 480 }, + { @"msk+6", 540 }, + { @"msk+7", 600 }, + { @"msk+8", 660 }, + { @"msk+9", 720 }, + { @"msk-1", 120 }, + { @"msk", 180 }, + { @"mst", -420 }, + { @"mstm", -420 }, + { @"mut", 240 }, + { @"mvst", -180 }, + { @"myst", 390 }, + { @"ncast", 420 }, + { @"ndt", -150 }, + { @"nmdt", 60 }, + { @"nmst", 60 }, + { @"npt", 345 }, + { @"nst", -210 }, + { @"nzdt", 780 }, + { @"nzst", 720 }, + { @"nzt", 720 }, + { @"pdst", -420 }, + { @"pdt", -420 }, + { @"pdtm", -480 }, + { @"pett", 720 }, + { @"pkt", 300 }, + { @"psast", -240 }, + { @"psat", -240 }, + { @"pst", -480 }, + { @"pstm", -480 }, + { @"pt", -480 }, + { @"pyst", -10000 }, + { @"pyt", -10000 }, + { @"rst", 60 }, + { @"saest", -180 }, + { @"sapst", -300 }, + { @"sast", 120 }, + { @"sawst", -240 }, + { @"sbt", 660 }, + { @"sgt", 480 }, + { @"slt", 330 }, + { @"smst", 780 }, + { @"snst", 480 }, + { @"sst", -10000 }, + { @"tadt", 600 }, + { @"tast", 600 }, + { @"tha", 420 }, + { @"tist", 480 }, + { @"tost", 840 }, + { @"tot", 780 }, + { @"trt", 180 }, + { @"tst", 540 }, + { @"ulat", 480 }, + { @"utc", 0 }, + { @"vet", -240 }, + { @"vlat", 600 }, + { @"wast", 120 }, + { @"wat", -10000 }, + { @"west", 60 }, + { @"wet", 0 }, + { @"wpst", 600 }, + { @"yakt", 540 }, + { @"yekt", 300 } + }; + public static readonly Dictionary FullToMinMapping = new Dictionary + { + { @"beijing", 480 }, + { @"shanghai", 480 }, + { @"shenzhen", 480 }, + { @"suzhou", 480 }, + { @"tianjian", 480 }, + { @"chengdu", 480 }, + { @"guangzhou", 480 }, + { @"wuxi", 480 }, + { @"xiamen", 480 }, + { @"chongqing", 480 }, + { @"shenyang", 480 }, + { @"china", 480 }, + { @"redmond", -480 }, + { @"seattle", -480 }, + { @"bellevue", -480 }, + { @"afghanistan standard", 270 }, + { @"alaskan standard", -540 }, + { @"anywhere on earth", -720 }, + { @"arab standard", 180 }, + { @"arabian standard", 180 }, + { @"arabic standard", 180 }, + { @"argentina standard", -180 }, + { @"atlantic standard", -240 }, + { @"aus central standard", 570 }, + { @"aus eastern standard", 600 }, + { @"australian eastern", 600 }, + { @"australian eastern standard", 600 }, + { @"australian central daylight", 630 }, + { @"australian eastern daylight", 660 }, + { @"azerbaijan standard", 240 }, + { @"azores standard", -60 }, + { @"bahia standard", -180 }, + { @"bangladesh standard", 360 }, + { @"belarus standard", 180 }, + { @"canada central standard", -360 }, + { @"cape verde standard", -60 }, + { @"caucasus standard", 240 }, + { @"cen. australia standard", 570 }, + { @"central australia standard", 570 }, + { @"central america standard", -360 }, + { @"central asia standard", 360 }, + { @"central brazilian standard", -240 }, + { @"central", -360 }, + { @"central daylight", -10000 }, + { @"central daylight saving", -10000 }, + { @"central europe", 60 }, + { @"central european", 60 }, + { @"central europe std", 60 }, + { @"central european std", 60 }, + { @"central europe standard", 60 }, + { @"central european standard", 60 }, + { @"central europe summer", 120 }, + { @"central european summer", 120 }, + { @"central pacific standard", 660 }, + { @"central standard time (mexico)", -360 }, + { @"central standard", -360 }, + { @"china standard", 480 }, + { @"dateline standard", -720 }, + { @"e. africa standard", 180 }, + { @"e. australia standard", 600 }, + { @"e. europe standard", 120 }, + { @"e. south america standard", -180 }, + { @"europe central", 60 }, + { @"european central", 60 }, + { @"eastern", -300 }, + { @"eastern daylight", -10000 }, + { @"eastern daylight saving", -10000 }, + { @"eastern standard time (mexico)", -300 }, + { @"eastern standard", -300 }, + { @"egypt standard", 120 }, + { @"ekaterinburg standard", 300 }, + { @"fiji standard", 720 }, + { @"fle standard", 120 }, + { @"georgian standard", 240 }, + { @"gmt standard", 0 }, + { @"greenland standard", -180 }, + { @"greenwich standard", 0 }, + { @"gtb standard", 120 }, + { @"hawaiian standard", -600 }, + { @"india standard", 330 }, + { @"iran standard", 210 }, + { @"israel standard", 120 }, + { @"jordan standard", 120 }, + { @"kaliningrad standard", 120 }, + { @"kamchatka standard", 720 }, + { @"korea standard", 540 }, + { @"libya standard", 120 }, + { @"line islands standard", 840 }, + { @"magadan standard", 660 }, + { @"mauritius standard", 240 }, + { @"mid-atlantic standard", -120 }, + { @"middle east standard", 120 }, + { @"montevideo standard", -180 }, + { @"morocco standard", 0 }, + { @"mountain", -420 }, + { @"mountain daylight", -360 }, + { @"mountain daylight saving", -360 }, + { @"mountain standard", -420 }, + { @"mountain standard time (mexico)", -420 }, + { @"myanmar standard", 390 }, + { @"n. central asia standard", 420 }, + { @"namibia standard", 60 }, + { @"nepal standard", 345 }, + { @"new zealand standard", 720 }, + { @"newfoundland standard", -210 }, + { @"north asia east standard", 480 }, + { @"north asia standard", 420 }, + { @"north korea standard", 510 }, + { @"west coast", -420 }, + { @"pacific", -480 }, + { @"pacific daylight", -420 }, + { @"pacific daylight saving", -420 }, + { @"pacific standard", -480 }, + { @"pacific standard time (mexico)", -480 }, + { @"pacific sa standard", -240 }, + { @"pakistan standard", 300 }, + { @"paraguay standard", -240 }, + { @"romance standard", 60 }, + { @"russia time zone 1", 120 }, + { @"russia time zone 2", 180 }, + { @"russia time zone 3", 240 }, + { @"russia time zone 4", 300 }, + { @"russia time zone 5", 360 }, + { @"russia time zone 6", 420 }, + { @"russia time zone 7", 480 }, + { @"russia time zone 8", 540 }, + { @"russia time zone 9", 600 }, + { @"russia time zone 10", 660 }, + { @"russia time zone 11", 720 }, + { @"russian standard", 180 }, + { @"sa eastern standard", -180 }, + { @"sa pacific standard", -300 }, + { @"sa western standard", -240 }, + { @"samoa standard", -660 }, + { @"se asia standard", 420 }, + { @"singapore standard", 480 }, + { @"singapore", 480 }, + { @"south africa standard", 120 }, + { @"sri lanka standard", 330 }, + { @"syria standard", 120 }, + { @"taipei standard", 480 }, + { @"tasmania standard", 600 }, + { @"tokyo standard", 540 }, + { @"tonga standard", 780 }, + { @"turkey standard", 180 }, + { @"ulaanbaatar standard", 480 }, + { @"us eastern standard", -300 }, + { @"us mountain standard", -420 }, + { @"venezuela standard", -240 }, + { @"vladivostok standard", 600 }, + { @"w. australia standard", 480 }, + { @"w. central africa standard", 60 }, + { @"w. europe standard", 0 }, + { @"western european", 0 }, + { @"west europe standard", 0 }, + { @"west europe std", 0 }, + { @"western europe standard", 0 }, + { @"western europe summer", 60 }, + { @"w. europe summer", 60 }, + { @"western european summer", 60 }, + { @"west europe summer", 60 }, + { @"west asia standard", 300 }, + { @"west pacific standard", 600 }, + { @"yakutsk standard", 540 }, + { @"australian western daylight", 540 }, + { @"australian west daylight", 540 }, + { @"austrialian western daylight", 540 }, + { @"austrialian west daylight", 540 }, + { @"australian western daylight saving", 540 }, + { @"australian west daylight saving", 540 }, + { @"austrialian western daylight saving", 540 }, + { @"austrialian west daylight saving", 540 }, + { @"colombia", -300 }, + { @"hong kong", 480 }, + { @"madrid", 60 }, + { @"bilbao", 60 }, + { @"seville", 60 }, + { @"valencia", 60 }, + { @"malaga", 60 }, + { @"las Palmas", 60 }, + { @"zaragoza", 60 }, + { @"alicante", 60 }, + { @"alche", 60 }, + { @"oviedo", 60 }, + { @"gijón", 60 }, + { @"avilés", 60 } + }; + public static readonly IList MajorLocations = new List + { + @"Dominican Republic", + @"Dominica", + @"Guinea Bissau", + @"Guinea-Bissau", + @"Guinea", + @"Equatorial Guinea", + @"Papua New Guinea", + @"New York City", + @"New York", + @"York", + @"Mexico City", + @"New Mexico", + @"Mexico", + @"Aberdeen", + @"Adelaide", + @"Anaheim", + @"Atlanta", + @"Auckland", + @"Austin", + @"Bangkok", + @"Baltimore", + @"Baton Rouge", + @"Beijing", + @"Belfast", + @"Birmingham", + @"Bolton", + @"Boston", + @"Bournemouth", + @"Bradford", + @"Brisbane", + @"Bristol", + @"Calgary", + @"Canberra", + @"Cardiff", + @"Charlotte", + @"Chicago", + @"Christchurch", + @"Colchester", + @"Colorado Springs", + @"Coventry", + @"Dallas", + @"Denver", + @"Derby", + @"Detroit", + @"Dubai", + @"Dublin", + @"Dudley", + @"Dunedin", + @"Edinburgh", + @"Edmonton", + @"El Paso", + @"Glasgow", + @"Gold Coast", + @"Hamilton", + @"Hialeah", + @"Houston", + @"Ipswich", + @"Jacksonville", + @"Jersey City", + @"Kansas City", + @"Kingston-upon-Hull", + @"Leeds", + @"Leicester", + @"Lexington", + @"Lincoln", + @"Liverpool", + @"London", + @"Long Beach", + @"Los Angeles", + @"Louisville", + @"Lubbock", + @"Luton", + @"Madison", + @"Manchester", + @"Mansfield", + @"Melbourne", + @"Memphis", + @"Mesa", + @"Miami", + @"Middlesbrough", + @"Milan", + @"Milton Keynes", + @"Minneapolis", + @"Montréal", + @"Montreal", + @"Nashville", + @"New Orleans", + @"Newark", + @"Newcastle-upon-Tyne", + @"Newcastle", + @"Northampton", + @"Norwich", + @"Nottingham", + @"Oklahoma City", + @"Oldham", + @"Omaha", + @"Orlando", + @"Ottawa", + @"Perth", + @"Peterborough", + @"Philadelphia", + @"Phoenix", + @"Plymouth", + @"Portland", + @"Portsmouth", + @"Preston", + @"Québec City", + @"Quebec City", + @"Québec", + @"Quebec", + @"Raleigh", + @"Reading", + @"Redmond", + @"Richmond", + @"Rome", + @"San Antonio", + @"San Diego", + @"San Francisco", + @"San José", + @"Santa Ana", + @"Seattle", + @"Sheffield", + @"Southampton", + @"Southend-on-Sea", + @"Spokane", + @"St Louis", + @"St Paul", + @"St Petersburg", + @"St. Louis", + @"St. Paul", + @"St. Petersburg", + @"Stockton-on-Tees", + @"Stockton", + @"Stoke-on-Trent", + @"Sunderland", + @"Swansea", + @"Swindon", + @"Sydney", + @"Tampa", + @"Tauranga", + @"Telford", + @"Toronto", + @"Vancouver", + @"Virginia Beach", + @"Walsall", + @"Warrington", + @"Washington", + @"Wellington", + @"Wolverhampton", + @"Abilene", + @"Akron", + @"Albuquerque", + @"Alexandria", + @"Allentown", + @"Amarillo", + @"Anchorage", + @"Ann Arbor", + @"Antioch", + @"Arlington", + @"Arvada", + @"Athens", + @"Augusta", + @"Aurora", + @"Bakersfield", + @"Beaumont", + @"Bellevue", + @"Berkeley", + @"Billings", + @"Boise", + @"Boulder", + @"Bridgeport", + @"Broken Arrow", + @"Brownsville", + @"Buffalo", + @"Burbank", + @"Cambridge", + @"Cape Coral", + @"Carlsbad", + @"Carrollton", + @"Cary", + @"Cedar Rapids", + @"Centennial", + @"Chandler", + @"Charleston", + @"Chattanooga", + @"Chengdu", + @"Chesapeake", + @"Chongqing", + @"Chula Vista", + @"Cincinnati", + @"Clarksville", + @"Clearwater", + @"Cleveland", + @"Clovis", + @"College Station", + @"Columbia", + @"Columbus", + @"Concord", + @"Coral Springs", + @"Corona", + @"Costa Mesa", + @"Daly City", + @"Davenport", + @"Dayton", + @"Denton", + @"Des Moines", + @"Downey", + @"Durham", + @"Edison", + @"El Cajon", + @"El Monte", + @"Elgin", + @"Elizabeth", + @"Elk Grove", + @"Erie", + @"Escondido", + @"Eugene", + @"Evansville", + @"Everett", + @"Fairfield", + @"Fargo", + @"Farmington Hills", + @"Fayetteville", + @"Fontana", + @"Fort Collins", + @"Fort Lauderdale", + @"Fort Wayne", + @"Fort Worth", + @"Fremont", + @"Fresno", + @"Frisco", + @"Fullerton", + @"Gainesville", + @"Garden Grove", + @"Garland", + @"Gilbert", + @"Glendale", + @"Grand Prairie", + @"Grand Rapids", + @"Green Bay", + @"Greensboro", + @"Gresham", + @"Guangzhou", + @"Hampton", + @"Hartford", + @"Hayward", + @"Henderson", + @"High Point", + @"Hollywood", + @"Honolulu", + @"Huntington Beach", + @"Huntsville", + @"Independence", + @"Indianapolis", + @"Inglewood", + @"Irvine", + @"Irving", + @"Jackson", + @"Joliet", + @"Kent", + @"Killeen", + @"Knoxville", + @"Lafayette", + @"Lakeland", + @"Lakewood", + @"Lancaster", + @"Lansing", + @"Laredo", + @"Las Cruces", + @"Las Vegas", + @"Lewisville", + @"Little Rock", + @"Lowell", + @"Macon", + @"McAllen", + @"McKinney", + @"Mesquite", + @"Miami Gardens", + @"Midland", + @"Milwaukee", + @"Miramar", + @"Mobile", + @"Modesto", + @"Montgomery", + @"Moreno Valley", + @"Murfreesboro", + @"Murrieta", + @"Naperville", + @"New Haven", + @"Newport News", + @"Norfolk", + @"Norman", + @"North Charleston", + @"North Las Vegas", + @"Norwalk", + @"Oakland", + @"Oceanside", + @"Odessa", + @"Olathe", + @"Ontario", + @"Orange", + @"Overland Park", + @"Oxnard", + @"Palm Bay", + @"Palmdale", + @"Pasadena", + @"Paterson", + @"Pearland", + @"Pembroke Pines", + @"Peoria", + @"Pittsburgh", + @"Plano", + @"Pomona", + @"Pompano Beach", + @"Providence", + @"Provo", + @"Pueblo", + @"Rancho Cucamonga", + @"Reno", + @"Rialto", + @"Richardson", + @"Riverside", + @"Rochester", + @"Rockford", + @"Roseville", + @"Round Rock", + @"Sacramento", + @"Saint Paul", + @"Salem", + @"Salinas", + @"Salt Lake City", + @"San Bernardino", + @"San Jose", + @"San Mateo", + @"Sandy Springs", + @"Santa Clara", + @"Santa Clarita", + @"Santa Maria", + @"Santa Rosa", + @"Savannah", + @"Scottsdale", + @"Shanghai", + @"Shenyang", + @"Shenzhen", + @"Shreveport", + @"Simi Valley", + @"Sioux Falls", + @"South Bend", + @"Springfield", + @"Stamford", + @"Sterling Heights", + @"Sunnyvale", + @"Surprise", + @"Suzhou", + @"Syracuse", + @"Tacoma", + @"Tallahassee", + @"Temecula", + @"Tempe", + @"Thornton", + @"Thousand Oaks", + @"Tianjing", + @"Toledo", + @"Topeka", + @"Torrance", + @"Tucson", + @"Tulsa", + @"Tyler", + @"Vallejo", + @"Ventura", + @"Victorville", + @"Visalia", + @"Waco", + @"Warren", + @"Waterbury", + @"West Covina", + @"West Jordan", + @"West Palm Beach", + @"West Valley City", + @"Westminster", + @"Wichita", + @"Wichita Falls", + @"Wilmington", + @"Winston-Salem", + @"Worcester", + @"Wuxi", + @"Xiamen", + @"Yonkers", + @"Bentonville", + @"Afghanistan", + @"AK", + @"AL", + @"Alabama", + @"Åland", + @"Åland Islands", + @"Alaska", + @"Albania", + @"Algeria", + @"American Samoa", + @"Andorra", + @"Angola", + @"Anguilla", + @"Antarctica", + @"Antigua and Barbuda", + @"AR", + @"Argentina", + @"Arizona", + @"Arkansas", + @"Armenia", + @"Aruba", + @"Australia", + @"Austria", + @"AZ", + @"Azerbaijan", + @"Bahamas", + @"Bahrain", + @"Bangladesh", + @"Barbados", + @"Belarus", + @"Belgium", + @"Belize", + @"Benin", + @"Bermuda", + @"Bhutan", + @"Bolivia", + @"Bonaire", + @"Bosnia", + @"Bosnia and Herzegovina", + @"Botswana", + @"Bouvet Island", + @"Brazil", + @"British Indian Ocean Territory", + @"British Virgin Islands", + @"Brunei", + @"Bulgaria", + @"Burkina Faso", + @"Burundi", + @"CA", + @"Cabo Verde", + @"California", + @"Cambodia", + @"Cameroon", + @"Canada", + @"Cayman Islands", + @"Central African Republic", + @"Chad", + @"Chile", + @"China", + @"Christmas Island", + @"CO", + @"Cocos Islands", + @"Colombia", + @"Colorado", + @"Comoros", + @"Congo", + @"Congo (DRC)", + @"Connecticut", + @"Cook Islands", + @"Costa Rica", + @"Côte d’Ivoire", + @"Croatia", + @"CT", + @"Cuba", + @"Curaçao", + @"Cyprus", + @"Czechia", + @"DE", + @"Delaware", + @"Denmark", + @"Djibouti", + @"Ecuador", + @"Egypt", + @"El Salvador", + @"Eritrea", + @"Estonia", + @"eSwatini", + @"Ethiopia", + @"Falkland Islands", + @"Falklands", + @"Faroe Islands", + @"Fiji", + @"Finland", + @"FL", + @"Florida", + @"France", + @"French Guiana", + @"French Polynesia", + @"French Southern Territories", + @"FYROM", + @"GA", + @"Gabon", + @"Gambia", + @"Georgia", + @"Georgia", + @"Germany", + @"Ghana", + @"Gibraltar", + @"Greece", + @"Greenland", + @"Grenada", + @"Guadeloupe", + @"Guam", + @"Guatemala", + @"Guernsey", + @"Guyana", + @"Haiti", + @"Hawaii", + @"Herzegovina", + @"HI", + @"Honduras", + @"Hong Kong", + @"Hungary", + @"IA", + @"Iceland", + @"ID", + @"Idaho", + @"IL", + @"Illinois", + @"IN", + @"India", + @"Indiana", + @"Indonesia", + @"Iowa", + @"Iran", + @"Iraq", + @"Ireland", + @"Isle of Man", + @"Israel", + @"Italy", + @"Ivory Coast", + @"Jamaica", + @"Jan Mayen", + @"Japan", + @"Jersey", + @"Jordan", + @"Kansas", + @"Kazakhstan", + @"Keeling Islands", + @"Kentucky", + @"Kenya", + @"Kiribati", + @"Korea", + @"Kosovo", + @"KS", + @"Kuwait", + @"KY", + @"Kyrgyzstan", + @"LA", + @"Laos", + @"Latvia", + @"Lebanon", + @"Lesotho", + @"Liberia", + @"Libya", + @"Liechtenstein", + @"Lithuania", + @"Louisiana", + @"Luxembourg", + @"MA", + @"Macao", + @"Macedonia", + @"Madagascar", + @"Maine", + @"Malawi", + @"Malaysia", + @"Maldives", + @"Mali", + @"Malta", + @"Marshall Islands", + @"Martinique", + @"Maryland", + @"Massachusetts", + @"Mauritania", + @"Mauritius", + @"Mayotte", + @"MD", + @"ME", + @"MI", + @"Michigan", + @"Micronesia", + @"Minnesota", + @"Mississippi", + @"Missouri", + @"MN", + @"MO", + @"Moldova", + @"Monaco", + @"Mongolia", + @"Montana", + @"Montenegro", + @"Montserrat", + @"Morocco", + @"Mozambique", + @"MS", + @"MT", + @"Myanmar", + @"Namibia", + @"Nauru", + @"NC", + @"ND", + @"NE", + @"Nebraska", + @"Nepal", + @"Netherlands", + @"Nevada", + @"New Caledonia", + @"New Hampshire", + @"New Jersey", + @"New Zealand", + @"NH", + @"Nicaragua", + @"Niger", + @"Nigeria", + @"Niue", + @"NJ", + @"NM", + @"Norfolk Island", + @"North Carolina", + @"North Dakota", + @"North Korea", + @"Northern Mariana Islands", + @"Norway", + @"NV", + @"NY", + @"OH", + @"Ohio", + @"OK", + @"Oklahoma", + @"Oman", + @"OR", + @"Oregon", + @"PA", + @"Pakistan", + @"Palau", + @"Palestinian Authority", + @"Panama", + @"Paraguay", + @"Pennsylvania", + @"Peru", + @"Philippines", + @"Pitcairn Islands", + @"Poland", + @"Portugal", + @"Puerto Rico", + @"Qatar", + @"Réunion", + @"Rhode Island", + @"RI", + @"Romania", + @"Russia", + @"Rwanda", + @"Saba", + @"Saint Barthélemy", + @"Saint Kitts and Nevis", + @"Saint Lucia", + @"Saint Martin", + @"Saint Pierre and Miquelon", + @"Saint Vincent and the Grenadines", + @"Samoa", + @"San Marino", + @"São Tomé and Príncipe", + @"Saudi Arabia", + @"SC", + @"SD", + @"Senegal", + @"Serbia", + @"Seychelles", + @"Sierra Leone", + @"Singapore", + @"Sint Eustatius", + @"Sint Maarten", + @"Slovakia", + @"Slovenia", + @"Solomon Islands", + @"Somalia", + @"South Africa", + @"South Carolina", + @"South Dakota", + @"South Sudan", + @"Spain", + @"Sri Lanka", + @"Sudan", + @"Suriname", + @"Svalbard", + @"Swaziland", + @"Sweden", + @"Switzerland", + @"Syria", + @"Taiwan", + @"Tajikistan", + @"Tanzania", + @"Tennessee", + @"Texas", + @"Thailand", + @"Timor-Leste", + @"TN", + @"Togo", + @"Tokelau", + @"Tonga", + @"Trinidad and Tobago", + @"Tunisia", + @"Turkey", + @"Turkmenistan", + @"Turks and Caicos Islands", + @"Tuvalu", + @"TX", + @"U.S. Outlying Islands", + @"US Outlying Islands", + @"U.S. Virgin Islands", + @"US Virgin Islands", + @"Uganda", + @"UK", + @"Ukraine", + @"United Arab Emirates", + @"United Kingdom", + @"United States", + @"Uruguay", + @"US", + @"USA", + @"UT", + @"Utah", + @"Uzbekistan", + @"VA", + @"Vanuatu", + @"Vatican City", + @"Venezuela", + @"Vermont", + @"Vietnam", + @"Virginia", + @"VT", + @"WA", + @"Wallis and Futuna", + @"West Virginia", + @"WI", + @"Wisconsin", + @"WV", + @"WY", + @"Wyoming", + @"Yemen", + @"Zambia", + @"Zimbabwe", + @"Paris", + @"Tokyo", + @"Shanghai", + @"Sao Paulo", + @"Rio de Janeiro", + @"Rio", + @"Brasília", + @"Brasilia", + @"Recife", + @"Milan", + @"Mumbai", + @"Moscow", + @"Frankfurt", + @"Munich", + @"Berlim", + @"Madrid", + @"Lisbon", + @"Warsaw", + @"Johannesburg", + @"Seoul", + @"Istanbul", + @"Kuala Kumpur", + @"Jakarta", + @"Amsterdam", + @"Brussels", + @"Valencia", + @"Seville", + @"Bilbao", + @"Malaga", + @"Las Palmas", + @"Zaragoza", + @"Alicante", + @"Elche", + @"Oviedo", + @"Gijón", + @"Avilés", + @"West Coast", + @"Central", + @"Pacific", + @"Eastern", + @"Mountain" + }; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/TimeZoneDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/TimeZoneDefinitions.tt new file mode 100644 index 0000000000..c98f9e4a5c --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Arabic/TimeZoneDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Arabic\Arabic-TimeZone.yaml"; + this.Language = "Arabic"; + this.ClassName = "TimeZoneDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/BaseCurrency.cs b/.NET/Microsoft.Recognizers.Definitions.Common/BaseCurrency.cs index c6300320a8..19aebfeebb 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/BaseCurrency.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/BaseCurrency.cs @@ -27,7 +27,7 @@ public static class BaseCurrency { @"__D", @"CENT" }, { @"RUB", @"KOPEK" }, { @"AFN", @"PUL" }, - { @"EUR", @"CENT" }, + { @"EUR", @"CENT|KWARTJE|DUBBELTJE|STUIVER" }, { @"ALL", @"QINDARKE" }, { @"_ALP", @"PENNY" }, { @"GBP", @"PENNY" }, @@ -190,7 +190,8 @@ public static class BaseCurrency { @"UYU", @"CENTESIMO" }, { @"VEF", @"CENTIMO" }, { @"YER", @"FILS" }, - { @"ZMW", @"NGWEE" } + { @"ZMW", @"NGWEE" }, + { @"_XBT", @"MILLIBITCOIN|SATOSHI" } }; public static readonly Dictionary CurrencyFractionalRatios = new Dictionary { @@ -266,7 +267,23 @@ public static class BaseCurrency { @"Kopiyka", 100 }, { @"Tiyin", 100 }, { @"Hào", 10 }, - { @"Ngwee", 100 } + { @"Ngwee", 100 }, + { @"Kwartje", 4 }, + { @"Dubbeltje", 10 }, + { @"Stuiver", 20 }, + { @"Millibitcoin", 1000 }, + { @"Satoshi", 100000000 } + }; + public static readonly Dictionary NonStandardFractionalSubunits = new Dictionary + { + { @"JOD", 1000 }, + { @"KWD", 1000 }, + { @"BHD", 1000 }, + { @"OMR", 1000 }, + { @"YDD", 1000 }, + { @"TND", 1000 }, + { @"MRO", 5 }, + { @"_XBT", 1000 } }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/BaseDateTime.cs b/.NET/Microsoft.Recognizers.Definitions.Common/BaseDateTime.cs index 6667804986..12df527f8d 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/BaseDateTime.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/BaseDateTime.cs @@ -21,23 +21,27 @@ namespace Microsoft.Recognizers.Definitions public static class BaseDateTime { - public const string HourRegex = @"(?2[0-4]|[0-1]?\d)(h)?"; + public const string HourRegex = @"(?2[0-4]|[0-1]?\d)(h)?"; public const string TwoDigitHourRegex = @"(?[0-1]\d|2[0-4])(h)?"; - public const string MinuteRegex = @"(?[0-5]?\d)(?!\d)"; + public const string MinuteRegex = @"(?[0-5]\d)(?!\d)"; public const string TwoDigitMinuteRegex = @"(?[0-5]\d)(?!\d)"; public const string DeltaMinuteRegex = @"(?[0-5]?\d)"; public const string SecondRegex = @"(?[0-5]?\d)"; public const string FourDigitYearRegex = @"\b(?((1\d|20)\d{2})|2100)(?!\.0\b)\b"; + public const string HyphenDateRegex = @"((?[0-9]{4})-?(?1[0-2]|0[1-9])-?(?3[01]|0[1-9]|[12][0-9]))|((?1[0-2]|0[1-9])-?(?3[01]|0[1-9]|[12][0-9])-?(?[0-9]{4}))|((?3[01]|0[1-9]|[12][0-9])-?(?1[0-2]|0[1-9])-?(?[0-9]{4}))"; public static readonly string IllegalYearRegex = $@"([-])({FourDigitYearRegex})([-])"; + public const string InvalidDayNumberPrefix = @"(\d[.,:]|[$£€]\s*)$"; + public const string CheckDecimalRegex = @"(?![,.]\d)"; public const string RangeConnectorSymbolRegex = @"(--|-|—|——|~|–)"; public const string BaseAmDescRegex = @"(am\b|a\s*\.\s*m\s*\.|a[\.]?\s*m\b)"; public const string BasePmDescRegex = @"(pm\b|p\s*\.\s*m\s*\.|p[\.]?\s*m\b)"; public const string BaseAmPmDescRegex = @"(ampm)"; public const string EqualRegex = @"(?)="; + public const string BracketRegex = @"^\s*[\)\]]|[\[\(]\s*$"; public const string MinYearNum = @"1500"; public const string MaxYearNum = @"2100"; public const string MaxTwoDigitYearFutureNum = @"30"; - public const string MinTwoDigitYearPastNum = @"70"; + public const string MinTwoDigitYearPastNum = @"40"; public static readonly Dictionary DayOfMonthDictionary = new Dictionary { { @"01", 1 }, diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/BaseEmail.cs b/.NET/Microsoft.Recognizers.Definitions.Common/BaseEmail.cs index 1bf087e660..0e3bbfb132 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/BaseEmail.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/BaseEmail.cs @@ -21,11 +21,12 @@ namespace Microsoft.Recognizers.Definitions public static class BaseEmail { - public const string EmailRegex = @"(([-a-zA-Z0-9_\+\.]+)@([-a-zA-Z\d\.]+)\.([a-zA-Z\.]{2,6}))"; + public const string EmailRegex = @"(([-a-z0-9_\+\.]+)@([-a-z\d\.]+)\.([a-z\.]{2,6}))"; public const string IPv4Regex = @"(?(\d{1,3}\.){3}\d{1,3})"; - public const string NormalSuffixRegex = @"(([0-9A-Za-z][-]*[0-9A-Za-z]*\.)+(?[a-zA-Z][\-a-zA-Z]{0,22}[a-zA-Z]))"; - public const string EmailPrefix = @"(?("")("".+?(?[a-z][\-a-z]{0,22}[a-z]))"; + public const string EmailPrefix = @"(?("")("".+?(? IntegerRegexDefinition = (placeholder, thousandsmark) => $@"(((? DoubleRegexDefinition = (placeholder, thousandsmark, decimalmark) => $@"(((? IntegerRegexDefinition = (placeholder, thousandsmark) => $@"(((? DoubleRegexDefinition = (placeholder, thousandsmark, decimalmark) => $@"(((? USPhoneNumberRegex = (WordBoundariesRegex, NonWordBoundariesRegex, EndWordBoundariesRegex) => $@"((((({NonWordBoundariesRegex}\+)|{WordBoundariesRegex})1(\s|-)?)|{WordBoundariesRegex})?(\d{{3}}\)[-\s]?|\(\d{{3}}\)[-\.\s]?|{WordBoundariesRegex}\d{{3}}\s?[-\.]?\s?)|{WordBoundariesRegex})[2-9]\d{{2}}\s?[-\.]?\s?\d{{4}}(\s?(x|X|ext)\s?\d{{3,5}})?(?!(-\s?\d)){EndWordBoundariesRegex}"; public static readonly Func CNPhoneNumberRegex = (WordBoundariesRegex, EndWordBoundariesRegex) => $@"(({WordBoundariesRegex}00\s?)?\+?(86|82|81)\s?-?\s?)?((({WordBoundariesRegex}|(?<=(86|82|81)))\d{{2,5}}\s?-?\s?|\(\d{{2,5}}\)\s?)\d{{4}}\s?-?\s?\d{{4}}(\s?-?\s?\d{{4}})?|(\b|(?<=(86|82|81)))\d{{3}}\s?-?\s?\d{{4}}\s?-?\s?\d{{4}})(?!-){EndWordBoundariesRegex}"; public static readonly Func DKPhoneNumberRegex = (WordBoundariesRegex, EndWordBoundariesRegex) => $@"((\(\s?(\+\s?|00)45\s?\)\s?)|(((? ITPhoneNumberRegex = (WordBoundariesRegex, EndWordBoundariesRegex) => $@"((\(\s?(\+\s?|00)39\s?\)\s?)|(((? NLPhoneNumberRegex = (WordBoundariesRegex, EndWordBoundariesRegex) => $@"((((\(\s?(\+\s?|00)31\s?\)\s?)|(((? ITPhoneNumberRegex = (WordBoundariesRegex, EndWordBoundariesRegex) => $@"((\(\s?(\+\s?|00)39\s?\)\s?)|(((? NLPhoneNumberRegex = (WordBoundariesRegex, EndWordBoundariesRegex) => $@"((((\(\s?(\+\s?|00)31\s?\)\s?)|(((? SpecialPhoneNumberRegex = (WordBoundariesRegex, EndWordBoundariesRegex) => $@"({WordBoundariesRegex}(\d{{3,4}}[/-]\d{{1,4}}[/-]\d{{3,4}}){EndWordBoundariesRegex})"; public const string NoAreaCodeUSPhoneNumberRegex = @"(? AmbiguityFiltersDict = new Dictionary + { + { @"^\d{4}-\d{4}$", @"omb(\s*(no(\.)?|number|#))?:?\s+\d{4}-?\d{4}" } + }; public static readonly IList SpecialBoundaryMarkers = new List { '-', diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/BaseQuotedText.cs b/.NET/Microsoft.Recognizers.Definitions.Common/BaseQuotedText.cs new file mode 100644 index 0000000000..63843ffce3 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/BaseQuotedText.cs @@ -0,0 +1,26 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Base-QuotedText.yaml +// - Language: NULL +// - ClassName: BaseQuotedText +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions +{ + using System; + using System.Collections.Generic; + + public static class BaseQuotedText + { + public const string QuotedTextRegex = @"('([a-zA-Z0-9_]+)')"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/BaseQuotedText.tt b/.NET/Microsoft.Recognizers.Definitions.Common/BaseQuotedText.tt new file mode 100644 index 0000000000..fccab806e1 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/BaseQuotedText.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Base-QuotedText.yaml"; + this.Language = null; + this.ClassName = "BaseQuotedText"; +#> +<#@ include file=".\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/BaseUnits.cs b/.NET/Microsoft.Recognizers.Definitions.Common/BaseUnits.cs index 440db9441b..8ffe422525 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/BaseUnits.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/BaseUnits.cs @@ -26,6 +26,7 @@ public static class BaseUnits public const string SecondRegex = @"(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|0|1|2|3|4|5|6|7|8|9)"; public static readonly string PmNonUnitRegex = $@"({HourRegex}\s*:\s*{MinuteRegex}(\s*:\s*{SecondRegex})?\s*pm)"; public const string AmbiguousTimeTerm = @"pm"; - public const string AmbiguousUnitNumberMultiplierRegex = @"(\s[Kk])"; + public const string AmbiguousUnitNumberMultiplierRegex = @"(\s([Kk]|mil))"; + public const string SingleCharUnitRegex = @"^\b(c|f|g|k|l|m|s)(\s*\.|\b)$"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/BuildResources.cmd b/.NET/Microsoft.Recognizers.Definitions.Common/BuildResources.cmd index 24375c96c5..9e1fa676e5 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/BuildResources.cmd +++ b/.NET/Microsoft.Recognizers.Definitions.Common/BuildResources.cmd @@ -1,6 +1,9 @@ @ECHO off SETLOCAL EnableDelayedExpansion +ECHO. +ECHO # Building Resources + SET COMMONASSEMBLYPATH=%1 IF [%1] == [] SET COMMONASSEMBLYPATH=%~dp0bin\Release\net462\Microsoft.Recognizers.Definitions.Common.dll @@ -13,10 +16,23 @@ ECHO. ECHO # Transform All T4 Templates ECHO. -FOR /R %%i IN (*.tt) DO (ECHO # Transform %%i to %%~dpni.cs & dotnet tt "%%i" -o "%%~dpni.cs" -r "%COMMONASSEMBLYPATH%") +FOR /R %%i IN (*.tt) DO ( CALL :COMPARER %%i %%~dpni) EXIT /b 0 +:COMPARER +REM Workaround to issue with exitCode/ERRORLEVEL in previous FOR loop +FOR /f "delims=" %%a in ('Powershell -ExecutionPolicy Bypass -Command "& {"..\buildtools\tsComparer.ps1" "%2.tt"}"') DO SET "RET=%%a" + +IF %RET% NEQ 0 ( + ECHO # Transform %1 to %2.cs & dotnet tt "%1" -o "%2.cs" -r "%COMMONASSEMBLYPATH%" +) ELSE ( + ECHO # No need to re-generate %1 +) +GOTO :EOF + :ERROR ECHO Error found Transforming T4 Templates -EXIT /b 1 \ No newline at end of file +EXIT /b 1 + +:EOF \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Bulgarian/ChoiceDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Bulgarian/ChoiceDefinitions.cs index f27fea1222..eda65c7a21 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Bulgarian/ChoiceDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Bulgarian/ChoiceDefinitions.cs @@ -25,7 +25,8 @@ public static class ChoiceDefinitions public const string TokenizerRegex = @"[^\w\d]"; public const string ItIsNotRegex = @"не\s+е\s+"; public const string IAmNotRegex = @"не\s+съм\s+"; - public const string TrueRegex = @"\b(в[яе]рно|определено|несъмнено|точно|добре|да|йеп|д|ok|ок|съглас((ен)|(на)))\b|(\uD83D\uDC4D|\uD83D\uDC4C)"; - public static readonly string FalseRegex = $@"\b({ItIsNotRegex}в[яе]рно|нев[яе]рно|{ItIsNotRegex}ок|{ItIsNotRegex}добре|{IAmNotRegex}съглас((ен)|(на))|не)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)"; + public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)"; + public static readonly string TrueRegex = $@"\b(в[яе]рно|определено|несъмнено|точно|добре|да|йеп|д|ok|ок|съглас((ен)|(на)))\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?"; + public static readonly string FalseRegex = $@"\b({ItIsNotRegex}в[яе]рно|нев[яе]рно|{ItIsNotRegex}ок|{ItIsNotRegex}добре|{IAmNotRegex}съглас((ен)|(на))|не)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/ChoiceDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/ChoiceDefinitions.cs index 73aee0c5ee..87e3030d78 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/ChoiceDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/ChoiceDefinitions.cs @@ -23,7 +23,8 @@ public static class ChoiceDefinitions { public const string LangMarker = @"Chs"; public const string TokenizerRegex = @"[^\u3040-\u30ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff66-\uff9f]"; - public const string TrueRegex = @"(好[的啊呀嘞哇]|没问题|可以|中|好|同意|行|是的|是|对)|(\uD83D\uDC4D|\uD83D\uDC4C)"; - public const string FalseRegex = @"(不行|不好|拒绝|否定|不中|不可以|不是的|不是|不对|不)|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)"; + public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)"; + public static readonly string TrueRegex = $@"(好[的啊呀嘞哇]|没问题|可以|中|好|同意|行|是的|是|对)|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?"; + public static readonly string FalseRegex = $@"(不行|不好|拒绝|否定|不中|不可以|不是的|不是|不对|不)|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/DateTimeDefinitions.cs index e360498463..19004b6743 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/DateTimeDefinitions.cs @@ -21,103 +21,145 @@ namespace Microsoft.Recognizers.Definitions.Chinese public static class DateTimeDefinitions { - public const string MonthRegex = @"(?正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月|大年)"; + public const string LangMarker = @"Chi"; + public const string MonthRegex = @"(?正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月|大年(?!龄|纪|级))"; public const string DayRegex = @"(?01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|1|2|3|4|5|6|7|8|9)"; - public const string DateDayRegexInChinese = @"(?初一|三十|一日|十一日|二十一日|三十一日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|一日|十一日|十日|二十一日|二十日|三十一日|三十日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|十日|二十日|三十日|10日|11日|12日|13日|14日|15日|16日|17日|18日|19日|1日|20日|21日|22日|23日|24日|25日|26日|27日|28日|29日|2日|30日|31日|3日|4日|5日|6日|7日|8日|9日|一号|十一号|二十一号|三十一号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|一号|十一号|十号|二十一号|二十号|三十一号|三十号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|十号|二十号|三十号|10号|11号|12号|13号|14号|15号|16号|17号|18号|19号|1号|20号|21号|22号|23号|24号|25号|26号|27号|28号|29号|2号|30号|31号|3号|4号|5号|6号|7号|8号|9号)"; - public const string DayRegexNumInChinese = @"(?一|十一|二十一|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|十|二十|廿|卅)"; + public const string OneToNineIntegerRegex = @"[一二三四五六七八九壹贰叁肆伍陆柒捌玖]"; + public static readonly string DateDayRegexInCJK = $@"(?(([12][0-9]|3[01]|[1-9]|[三叁][十拾][一壹]?|[二贰貳]?[十拾]({OneToNineIntegerRegex})?|{OneToNineIntegerRegex})[日号]|初一|三十))"; + public static readonly string DayRegexNumInCJK = $@"(?[12][0-9]|3[01]|[1-9]|[三叁][十拾][一壹]?|[二贰貳]?[十拾]({OneToNineIntegerRegex})?|{OneToNineIntegerRegex}|廿|卅)"; public const string MonthNumRegex = @"(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)"; public const string TwoNumYear = @"50"; public const string YearNumRegex = @"(?((1[5-9]|20)\d{2})|2100)"; public const string SimpleYearRegex = @"(?(\d{2,4}))"; - public const string ZeroToNineIntegerRegexChs = @"[一二三四五六七八九零壹贰叁肆伍陆柒捌玖〇两千俩倆仨]"; - public static readonly string DateYearInChineseRegex = $@"(?({ZeroToNineIntegerRegexChs}{ZeroToNineIntegerRegexChs}{ZeroToNineIntegerRegexChs}{ZeroToNineIntegerRegexChs}|{ZeroToNineIntegerRegexChs}{ZeroToNineIntegerRegexChs}|{ZeroToNineIntegerRegexChs}{ZeroToNineIntegerRegexChs}{ZeroToNineIntegerRegexChs}))"; + public const string ZeroToNineIntegerRegexCJK = @"[一二三四五六七八九零壹贰叁肆伍陆柒捌玖〇两千俩倆仨]"; + public const string DynastyStartYear = @"元"; + public const string RegionTitleRegex = @"(贞观|开元|神龙|洪武|建文|永乐|景泰|天顺|成化|嘉靖|万历|崇祯|顺治|康熙|雍正|乾隆|嘉庆|道光|咸丰|同治|光绪|宣统|民国)"; + public static readonly string DynastyYearRegex = $@"(?{RegionTitleRegex})(?({DynastyStartYear}|\d{{1,3}}|[十拾]?({ZeroToNineIntegerRegexCJK}[十百拾佰]?){{0,3}}))"; + public static readonly string DateYearInCJKRegex = $@"(?({ZeroToNineIntegerRegexCJK}{ZeroToNineIntegerRegexCJK}{ZeroToNineIntegerRegexCJK}{ZeroToNineIntegerRegexCJK}|{ZeroToNineIntegerRegexCJK}{ZeroToNineIntegerRegexCJK}|{ZeroToNineIntegerRegexCJK}{ZeroToNineIntegerRegexCJK}{ZeroToNineIntegerRegexCJK}|{DynastyYearRegex}))"; public const string WeekDayRegex = @"(?周日|周天|周一|周二|周三|周四|周五|周六|星期一|星期二|星期三|星期四|星期五|星期六|星期日|星期天|礼拜一|礼拜二|礼拜三|礼拜四|礼拜五|礼拜六|礼拜日|礼拜天|禮拜一|禮拜二|禮拜三|禮拜四|禮拜五|禮拜六|禮拜日|禮拜天|週日|週天|週一|週二|週三|週四|週五|週六)"; - public const string LunarRegex = @"(农历|初一|正月|大年)"; + public const string WeekDayStartEnd = @"^[.]"; + public const string LunarRegex = @"(农历|初一|正月|大年(?!龄|纪|级))"; public static readonly string DateThisRegex = $@"(这个|这一个|这|这一|本){WeekDayRegex}"; public static readonly string DateLastRegex = $@"(上一个|上个|上一|上|最后一个|最后)(的)?{WeekDayRegex}"; public static readonly string DateNextRegex = $@"(下一个|下个|下一|下)(的)?{WeekDayRegex}"; + public static readonly string DateNextNextRegex = $@"(下下|下下[个個]){WeekDayRegex}"; + public static readonly string DateLastLastRegex = $@"(上上|上上[个個]){WeekDayRegex}"; + public const string WeekWithWeekDayRangeRegex = @"^[.]"; + public const string WoMLastRegex = @"最后一"; + public const string WoMPreviousRegex = @"上个"; + public const string WoMNextRegex = @"下个"; + public const string SpecialMonthRegex = @"^[.]"; + public const string SpecialYearRegex = @"^[.]"; + public const string MonthDayRange = @"^[.]"; public const string SpecialDayRegex = @"(最近|前天|后天|昨天|明天|今天|今日|明日|昨日|大后天|大前天|後天|大後天)"; public const string SpecialDayWithNumRegex = @"^[.]"; public static readonly string WeekDayOfMonthRegex = $@"((({MonthRegex}|{MonthNumRegex})的\s*)(?第一个|第二个|第三个|第四个|第五个|最后一个)\s*{WeekDayRegex})"; + public const string WeekDayAndDayRegex = @"^[.]"; public const string ThisPrefixRegex = @"这个|这一个|这|这一|本|今"; public const string LastPrefixRegex = @"上个|上一个|上|上一|去"; public const string NextPrefixRegex = @"下个|下一个|下|下一|明"; public static readonly string RelativeRegex = $@"(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex}))"; - public static readonly string SpecialDate = $@"(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})年)?(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})月)?{DateDayRegexInChinese}"; - public const string DateUnitRegex = @"(?年|个月|周|日|天)"; + public static readonly string SpecialDate = $@"(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})年)?(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})月)?{DateDayRegexInCJK}"; + public const string DateUnitRegex = @"(?年|个月|周|週|日|天)"; public const string BeforeRegex = @"以前|之前|前"; - public const string AfterRegex = @"以后|以後|之后|之後|后|後"; - public static readonly string DateRegexList1 = $@"({LunarRegex}(\s*))?((({SimpleYearRegex}|{DateYearInChineseRegex})年)(\s*))?{MonthRegex}(\s*){DateDayRegexInChinese}((\s*|,|,){WeekDayRegex})?({BeforeRegex}|{AfterRegex})?"; - public static readonly string DateRegexList2 = $@"((({SimpleYearRegex}|{DateYearInChineseRegex})年)(\s*))?({LunarRegex}(\s*))?{MonthRegex}(\s*){DateDayRegexInChinese}((\s*|,|,){WeekDayRegex})?({BeforeRegex}|{AfterRegex})?"; - public static readonly string DateRegexList3 = $@"((({SimpleYearRegex}|{DateYearInChineseRegex})年)(\s*))?({LunarRegex}(\s*))?{MonthRegex}(\s*)({DayRegexNumInChinese}|{DayRegex})((\s*|,|,){WeekDayRegex})?({BeforeRegex}|{AfterRegex})?"; - public static readonly string DateRegexList4 = $@"{MonthNumRegex}\s*/\s*{DayRegex}((\s+|\s*,\s*){SimpleYearRegex})?"; - public static readonly string DateRegexList5 = $@"{DayRegex}\s*/\s*{MonthNumRegex}((\s+|\s*,\s*){SimpleYearRegex})?"; + public const string AfterRegex = @"以后|以後|之后|之後|后|後|还剩"; + public const string TimePeriodLeftRegex = @"还剩"; + public static readonly string DateRegexList1 = $@"({LunarRegex}(\s*))?((({SimpleYearRegex}|{DateYearInCJKRegex})年)(\s*))?{MonthRegex}(\s*){DateDayRegexInCJK}((\s*|,|,){WeekDayRegex})?"; + public static readonly string DateRegexList2 = $@"((({SimpleYearRegex}|{DateYearInCJKRegex})年)(\s*))?({LunarRegex}(\s*))?{MonthRegex}(\s*){DateDayRegexInCJK}((\s*|,|,){WeekDayRegex})?"; + public static readonly string DateRegexList3 = $@"((({SimpleYearRegex}|{DateYearInCJKRegex})年)(\s*))?({LunarRegex}(\s*))?{MonthRegex}(\s*)({DayRegexNumInCJK}|{DayRegex})((\s*|,|,){WeekDayRegex})?"; + public static readonly string DateRegexList4 = $@"{MonthNumRegex}\s*/\s*{DayRegex}"; + public static readonly string DateRegexList5 = $@"{DayRegex}\s*/\s*{MonthNumRegex}"; public static readonly string DateRegexList6 = $@"{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}\s*[/\\\-]\s*{SimpleYearRegex}"; - public static readonly string DateRegexList7 = $@"{DayRegex}\s*[/\\\-\.]\s*{MonthNumRegex}\s*[/\\\-\.]\s*{YearNumRegex}"; - public static readonly string DateRegexList8 = $@"{YearNumRegex}\s*[/\\\-\. ]\s*{MonthNumRegex}\s*[/\\\-\. ]\s*{DayRegex}"; + public static readonly string DateRegexList7 = $@"{DayRegex}\s*[/\\\-\.]\s*{MonthNumRegex}\s*[/\\\-\.]\s*{SimpleYearRegex}"; + public static readonly string DateRegexList8 = $@"{SimpleYearRegex}\s*[/\\\-\. ]\s*{MonthNumRegex}\s*[/\\\-\. ]\s*{DayRegex}"; public const string DatePeriodTillRegex = @"(?到|至|--|-|—|——|~|–)"; + public const string DatePeriodRangeSuffixRegex = @"^\b$"; + public const string DatePeriodRangePrefixRegex = @"从"; public const string DatePeriodTillSuffixRequiredRegex = @"(?与|和)"; - public const string DatePeriodDayRegexInChinese = @"(?初一|三十|一日|十一日|二十一日|三十一日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|一日|十一日|十日|二十一日|二十日|三十一日|三十日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|十日|二十日|三十日|10日|11日|12日|13日|14日|15日|16日|17日|18日|19日|1日|20日|21日|22日|23日|24日|25日|26日|27日|28日|29日|2日|30日|31日|3日|4日|5日|6日|7日|8日|9日|一号|十一号|二十一号|三十一号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|一号|十一号|十号|二十一号|二十号|三十一号|三十号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|十号|二十号|三十号|10号|11号|12号|13号|14号|15号|16号|17号|18号|19号|1号|20号|21号|22号|23号|24号|25号|26号|27号|28号|29号|2号|30号|31号|3号|4号|5号|6号|7号|8号|9号|一|十一|二十一|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|十|二十|三十|廿|卅)"; + public const string DatePeriodDayRegexInCJK = @"(?初一|三十|一日|十一日|二十一日|三十一日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|一日|十一日|十日|二十一日|二十日|三十一日|三十日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|十日|二十日|三十日|10日|11日|12日|13日|14日|15日|16日|17日|18日|19日|1日|20日|21日|22日|23日|24日|25日|26日|27日|28日|29日|2日|30日|31日|3日|4日|5日|6日|7日|8日|9日|一号|十一号|二十一号|三十一号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|一号|十一号|十号|二十一号|二十号|三十一号|三十号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|十号|二十号|三十号|10号|11号|12号|13号|14号|15号|16号|17号|18号|19号|1号|20号|21号|22号|23号|24号|25号|26号|27号|28号|29号|2号|30号|31号|3号|4号|5号|6号|7号|8号|9号|一|十一|二十一|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|十|二十|三十|廿|卅)"; public const string DatePeriodThisRegex = @"这个|这一个|这|这一|本"; public const string DatePeriodLastRegex = @"上个|上一个|上|上一"; public const string DatePeriodNextRegex = @"下个|下一个|下|下一"; + public const string DatePeriodNextNextRegex = @"下下|下下[个個]"; + public const string DatePeriodLastLastRegex = @"上上|上上[个個]"; public static readonly string RelativeMonthRegex = $@"(?({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex})\s*月)"; public const string HalfYearRegex = @"((?(上|前)半年)|(?(下|后)半年))"; public static readonly string YearRegex = $@"(({YearNumRegex})(\s*年)?|({SimpleYearRegex})\s*年){HalfYearRegex}?"; public static readonly string StrictYearRegex = $@"({YearRegex}(?=[\u4E00-\u9FFF]|\s|$|\W))"; - public const string YearRegexInNumber = @"(?(\d{3,4}))"; - public static readonly string DatePeriodYearInChineseRegex = $@"(?({ZeroToNineIntegerRegexChs}{ZeroToNineIntegerRegexChs}{ZeroToNineIntegerRegexChs}{ZeroToNineIntegerRegexChs}|{ZeroToNineIntegerRegexChs}{ZeroToNineIntegerRegexChs}|{ZeroToNineIntegerRegexChs}{ZeroToNineIntegerRegexChs}{ZeroToNineIntegerRegexChs}))年{HalfYearRegex}?"; + public const string YearRegexInNumber = @"(?(\d{4}))"; + public static readonly string DatePeriodYearInCJKRegex = $@"{DateYearInCJKRegex}年{HalfYearRegex}?"; public static readonly string MonthSuffixRegex = $@"(?({RelativeMonthRegex}|{MonthRegex}))"; - public static readonly string SimpleCasesRegex = $@"((从)\s*)?(({YearRegex}|{DatePeriodYearInChineseRegex})\s*)?{MonthSuffixRegex}({DatePeriodDayRegexInChinese}|{DayRegex})\s*{DatePeriodTillRegex}\s*({DatePeriodDayRegexInChinese}|{DayRegex})((\s+|\s*,\s*){YearRegex})?"; - public static readonly string YearAndMonth = $@"({DatePeriodYearInChineseRegex}|{YearRegex})\s*{MonthRegex}"; + public static readonly string SimpleCasesRegex = $@"((从)\s*)?(({YearRegex}|{DatePeriodYearInCJKRegex})\s*)?{MonthSuffixRegex}({DatePeriodDayRegexInCJK}|{DayRegex})\s*{DatePeriodTillRegex}\s*({DatePeriodDayRegexInCJK}|{DayRegex})((\s+|\s*,\s*){YearRegex})?"; + public static readonly string YearAndMonth = $@"({DatePeriodYearInCJKRegex}|{YearRegex}|(?明年|今年|去年))\s*({MonthRegex}|的?(?第一|第二|第三|第四|第五|第六|第七|第八|第九|第十|第十一|第十二|最后一)\s*个月\s*)"; + public static readonly string SimpleYearAndMonth = $@"({YearNumRegex}[/\\\-]{MonthNumRegex}\b$)"; public static readonly string PureNumYearAndMonth = $@"({YearRegexInNumber}\s*[-\.\/]\s*{MonthNumRegex})|({MonthNumRegex}\s*\/\s*{YearRegexInNumber})"; - public static readonly string OneWordPeriodRegex = $@"(((?(明|今|去)年)\s*)?{MonthRegex}|({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex})(?半)?\s*(周末|周|月|年)|周末|(今|明|去|前|后)年(\s*{HalfYearRegex})?)"; + public static readonly string OneWordPeriodRegex = $@"(((?(明|今|去)年)\s*)?{MonthRegex}|({DatePeriodThisRegex}|{DatePeriodLastLastRegex}|{DatePeriodLastRegex}|{DatePeriodNextNextRegex}|{DatePeriodNextRegex})(?半)?\s*([周週]末|[周週]|月|年)|[周週]末|(今|明|去|前|后)年(\s*{HalfYearRegex})?)"; + public const string LaterEarlyPeriodRegex = @"^[.]"; + public const string DatePointWithAgoAndLater = @"^[.]"; public static readonly string WeekOfMonthRegex = $@"(?{MonthSuffixRegex}的(?第一|第二|第三|第四|第五|最后一)\s*周\s*)"; - public const string UnitRegex = @"(?年|(个)?月|周|日|天)"; + public static readonly string WeekOfYearRegex = $@"(?({YearRegex}|{RelativeRegex}年)的(?第一|第二|第三|第四|第五|最后一)\s*周\s*)"; + public const string WeekOfDateRegex = @"^[.]"; + public const string MonthOfDateRegex = @"^[.]"; + public const string RestOfDateRegex = @"^[.]"; + public const string UnitRegex = @"(?年|(?(个)?月|周|週|日|天))"; public static readonly string FollowedUnit = $@"^\s*{UnitRegex}"; public static readonly string NumberCombinedWithUnit = $@"(?\d+(\.\d*)?){UnitRegex}"; public const string DateRangePrepositions = @"((从|在|自)\s*)?"; - public static readonly string YearToYear = $@"({DateRangePrepositions})({DatePeriodYearInChineseRegex}|{YearRegex})\s*({DatePeriodTillRegex}|后|後|之后|之後)\s*({DatePeriodYearInChineseRegex}|{YearRegex})(\s*((之间|之内|期间|中间|间)|前|之前))?"; - public static readonly string YearToYearSuffixRequired = $@"({DateRangePrepositions})({DatePeriodYearInChineseRegex}|{YearRegex})\s*({DatePeriodTillSuffixRequiredRegex})\s*({DatePeriodYearInChineseRegex}|{YearRegex})\s*(之间|之内|期间|中间|间)"; + public static readonly string YearToYear = $@"({DateRangePrepositions})({DatePeriodYearInCJKRegex}|{YearRegex})\s*({DatePeriodTillRegex}|后|後|之后|之後)\s*({DatePeriodYearInCJKRegex}|{YearRegex})(\s*((之间|之内|期间|中间|间)|前|之前))?"; + public static readonly string YearToYearSuffixRequired = $@"({DateRangePrepositions})({DatePeriodYearInCJKRegex}|{YearRegex})\s*({DatePeriodTillSuffixRequiredRegex})\s*({DatePeriodYearInCJKRegex}|{YearRegex})\s*(之间|之内|期间|中间|间)"; public static readonly string MonthToMonth = $@"({DateRangePrepositions})({MonthRegex}){DatePeriodTillRegex}({MonthRegex})"; public static readonly string MonthToMonthSuffixRequired = $@"({DateRangePrepositions})({MonthRegex}){DatePeriodTillSuffixRequiredRegex}({MonthRegex})\s*(之间|之内|期间|中间|间)"; + public const string DayToDay = @"^[.]"; + public const string DayRegexForPeriod = @"^[.]"; + public static readonly string FirstLastOfYearRegex = $@"(({DatePeriodYearInCJKRegex}|{YearRegex}|(?明年|今年|去年))的?)((?前)|(?(最后|最後)))"; + public const string ComplexDatePeriodRegex = @"^[.]"; public const string PastRegex = @"(?(之前|前|上|近|过去))"; public const string FutureRegex = @"(?(之后|之後|后|後|(?春|夏|秋|冬)(天|季)?"; - public static readonly string SeasonWithYear = $@"(({YearRegex}|{DatePeriodYearInChineseRegex}|(?明年|今年|去年))(的)?)?{SeasonRegex}"; - public static readonly string QuarterRegex = $@"(({YearRegex}|{DatePeriodYearInChineseRegex}|(?明年|今年|去年))(的)?)?(第(?1|2|3|4|一|二|三|四)季度)"; + public const string WhichWeekRegex = @"^[.]"; + public static readonly string SeasonWithYear = $@"(({YearRegex}|{DatePeriodYearInCJKRegex}|(?明年|今年|去年))(的)?)?{SeasonRegex}"; + public static readonly string QuarterRegex = $@"(({YearRegex}|{DatePeriodYearInCJKRegex}|(?明年|今年|去年))(的)?)?(第(?1|2|3|4|一|二|三|四)季度)"; + public const string CenturyNumRegex = @"^[.]"; public const string CenturyRegex = @"(?\d|1\d|2\d)世纪"; - public const string CenturyRegexInChinese = @"(?一|二|三|四|五|六|七|八|九|十|十一|十二|十三|十四|十五|十六|十七|十八|十九|二十|二十一|二十二)世纪"; + public const string CenturyRegexInCJK = @"(?一|二|三|四|五|六|七|八|九|十|十一|十二|十三|十四|十五|十六|十七|十八|十九|二十|二十一|二十二)世纪"; public static readonly string RelativeCenturyRegex = $@"(?({DatePeriodLastRegex}|{DatePeriodThisRegex}|{DatePeriodNextRegex}))世纪"; - public const string DecadeRegexInChinese = @"(?十|一十|二十|三十|四十|五十|六十|七十|八十|九十)"; - public static readonly string DecadeRegex = $@"(?({CenturyRegex}|{CenturyRegexInChinese}|{RelativeCenturyRegex}))?(?(\d0|{DecadeRegexInChinese}))年代"; + public const string DecadeRegexInCJK = @"(?十|一十|二十|三十|四十|五十|六十|七十|八十|九十)"; + public static readonly string DecadeRegex = $@"(?({CenturyRegex}|{CenturyRegexInCJK}|{RelativeCenturyRegex}))?(?(\d0|{DecadeRegexInCJK}))年代"; public const string PrepositionRegex = @"(?^的|在$)"; public const string NowRegex = @"(?现在|马上|立刻|刚刚才|刚刚|刚才|这会儿|当下|此刻)"; public const string NightRegex = @"(?早|晚)"; - public const string TimeOfTodayRegex = @"(今晚|今早|今晨|明晚|明早|明晨|昨晚)(的|在)?"; + public const string TimeOfSpecialDayRegex = @"(今晚|今早|今晨|明晚|明早|明晨|昨晚)(的|在)?"; public const string DateTimePeriodTillRegex = @"(?到|直到|--|-|—|——)"; public const string DateTimePeriodPrepositionRegex = @"(?^\s*的|在\s*$)"; + public const string BeforeAfterRegex = @"^\b$"; public static readonly string HourRegex = $@"\b{BaseDateTime.HourRegex}"; public const string HourNumRegex = @"(?[零〇一二两三四五六七八九]|二十[一二三四]?|十[一二三四五六七八九]?)"; public const string ZhijianRegex = @"^\s*(之间|之内|期间|中间|间)"; public const string DateTimePeriodThisRegex = @"这个|这一个|这|这一"; public const string DateTimePeriodLastRegex = @"上个|上一个|上|上一"; public const string DateTimePeriodNextRegex = @"下个|下一个|下|下一"; - public const string AmPmDescRegex = @"(?(am|a\.m\.|a m|a\. m\.|a\.m|a\. m|a m|pm|p\.m\.|p m|p\. m\.|p\.m|p\. m|p m))"; + public const string AmPmDescRegex = @"(?(am|a\.m\.|a m|a\. m\.|a\.m|a\. m|a m|pm|p\.m\.|p m|p\. m\.|p\.m|p\. m|p m|上午|中午|下午|午后|晚上|夜里|夜晚|夜间|深夜|傍晚|晚|早间?))"; public const string TimeOfDayRegex = @"(?凌晨|清晨|早上|早间|早|上午|中午|下午|午后|晚上|夜里|夜晚|半夜|夜间|深夜|傍晚|晚)"; public static readonly string SpecificTimeOfDayRegex = $@"((({DateTimePeriodThisRegex}|{DateTimePeriodNextRegex}|{DateTimePeriodLastRegex})\s+{TimeOfDayRegex})|(今晚|今早|今晨|明晚|明早|明晨|昨晚))"; public const string DateTimePeriodUnitRegex = @"(个)?(?(小时|钟头|分钟|秒钟|时|分|秒))"; public static readonly string DateTimePeriodFollowedUnit = $@"^\s*{DateTimePeriodUnitRegex}"; public static readonly string DateTimePeriodNumberCombinedWithUnit = $@"\b(?\d+(\.\d*)?){DateTimePeriodUnitRegex}"; + public const string DurationAllRegex = @"^[.]"; + public const string DurationHalfRegex = @"^[.]"; + public const string DurationRelativeDurationUnitRegex = @"^[.]"; + public const string AgoLaterRegex = @"^[.]"; + public const string DurationDuringRegex = @"^[.]"; + public const string DurationSomeRegex = @"^[.]"; + public const string DurationMoreOrLessRegex = @"^[.]"; public const string DurationYearRegex = @"((\d{3,4})|0\d|两千)\s*年"; public const string DurationHalfSuffixRegex = @"半"; public static readonly Dictionary DurationSuffixList = new Dictionary { { @"M", @"分钟" }, { @"S", @"秒钟|秒" }, - { @"H", @"个小时|小时|个钟头|钟头" }, + { @"H", @"个小时|小时|个钟头|钟头|时" }, { @"D", @"天" }, - { @"W", @"星期|个星期|周" }, + { @"W", @"星期|个星期|周|週" }, { @"Mon", @"个月" }, { @"Y", @"年" } }; @@ -132,57 +174,71 @@ public static class DateTimeDefinitions @"星期", @"个星期", @"周", + @"週", @"个月", - @"年" + @"年", + @"时" }; - public static readonly string LunarHolidayRegex = $@"(({YearRegex}|{DatePeriodYearInChineseRegex}|(?明年|今年|去年))(的)?)?(?除夕|春节|中秋节|中秋|元宵节|端午节|端午|重阳节)"; - public static readonly string HolidayRegexList1 = $@"(({YearRegex}|{DatePeriodYearInChineseRegex}|(?明年|今年|去年))(的)?)?(?新年|五一|劳动节|元旦节|元旦|愚人节|平安夜|圣诞节|植树节|国庆节|情人节|教师节|儿童节|妇女节|青年节|建军节|女生节|光棍节|双十一|清明节|清明)"; - public static readonly string HolidayRegexList2 = $@"(({YearRegex}|{DatePeriodYearInChineseRegex}|(?明年|今年|去年))(的)?)?(?母亲节|父亲节|感恩节|万圣节)"; + public static readonly string DurationUnitRegex = $@"(?{DateUnitRegex}|分钟?|秒钟?|个?小时|时|个?钟头|天|个?星期|周|週|个?月|年)"; + public const string AnUnitRegex = @"^[.]"; + public const string DurationConnectorRegex = @"^\s*(?[多又余零]?)\s*$"; + public const string ConnectorRegex = @"^\s*,\s*$"; + public static readonly string LunarHolidayRegex = $@"(({YearRegex}|{DatePeriodYearInCJKRegex}|(?明年|今年|去年))(的)?)?(?除夕|春节|中秋节|中秋|元宵节|端午节|端午|重阳节)"; + public static readonly string HolidayRegexList1 = $@"(({YearRegex}|{DatePeriodYearInCJKRegex}|(?明年|今年|去年))(的)?)?(?新年|五一|劳动节|元旦节|元旦|愚人节|平安夜|圣诞节|植树节|国庆节|情人节|教师节|儿童节|妇女节|青年节|建军节|女生节|光棍节|双十一|清明节|清明)"; + public static readonly string HolidayRegexList2 = $@"(({YearRegex}|{DatePeriodYearInCJKRegex}|(?明年|今年|去年))(的)?)?(?母亲节|父亲节|感恩节|万圣节)"; public const string SetUnitRegex = @"(?年|月|周|星期|日|天|小时|时|分钟|分|秒钟|秒)"; public static readonly string SetEachUnitRegex = $@"(?(每个|每一|每)\s*{SetUnitRegex})"; public const string SetEachPrefixRegex = @"(?(每)\s*$)"; + public const string SetEachSuffixRegex = @"^[.]"; public const string SetLastRegex = @"(?last|this|next)"; public const string SetEachDayRegex = @"(每|每一)(天|日)\s*$"; + public const string SetEachDateUnitRegex = @"^[.]"; public const string TimeHourNumRegex = @"(00|01|02|03|04|05|06|07|08|09|0|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|1|2|3|4|5|6|7|8|9)"; public const string TimeMinuteNumRegex = @"(00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|0|1|2|3|4|5|6|7|8|9)"; public const string TimeSecondNumRegex = @"(00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|0|1|2|3|4|5|6|7|8|9)"; - public const string TimeHourChsRegex = @"([零〇一二两三四五六七八九]|二十[一二三四]?|十[一二三四五六七八九]?)"; - public const string TimeMinuteChsRegex = @"([二三四五]?十[一二三四五六七八九]?|六十|[零〇一二三四五六七八九])"; - public static readonly string TimeSecondChsRegex = $@"{TimeMinuteChsRegex}"; + public const string TimeHourCJKRegex = @"([零〇一二两三四五六七八九]|二十[一二三四]?|十[一二三四五六七八九]?)"; + public const string TimeMinuteCJKRegex = @"([二三四五]?十[一二三四五六七八九]?|六十|[零〇一二三四五六七八九])"; + public static readonly string TimeSecondCJKRegex = $@"{TimeMinuteCJKRegex}"; public const string TimeClockDescRegex = @"(点\s*整|点\s*钟|点|时)"; public const string TimeMinuteDescRegex = @"(分钟|分|)"; public const string TimeSecondDescRegex = @"(秒钟|秒)"; public const string TimeBanHourPrefixRegex = @"(第)"; - public static readonly string TimeHourRegex = $@"(?{TimeHourChsRegex}|{TimeHourNumRegex}){TimeClockDescRegex}"; - public static readonly string TimeMinuteRegex = $@"(?{TimeMinuteChsRegex}|{TimeMinuteNumRegex}){TimeMinuteDescRegex}"; - public static readonly string TimeSecondRegex = $@"(?{TimeSecondChsRegex}|{TimeSecondNumRegex}){TimeSecondDescRegex}"; + public static readonly string TimeHourRegex = $@"(?{TimeHourCJKRegex}|{TimeHourNumRegex}){TimeClockDescRegex}"; + public static readonly string TimeMinuteRegex = $@"(?{TimeMinuteCJKRegex}|{TimeMinuteNumRegex}){TimeMinuteDescRegex}"; + public static readonly string TimeSecondRegex = $@"(?{TimeSecondCJKRegex}|{TimeSecondNumRegex}){TimeSecondDescRegex}"; public const string TimeHalfRegex = @"(?过半|半)"; public const string TimeQuarterRegex = @"(?[一两二三四1-4])\s*(刻钟|刻)"; - public static readonly string TimeChineseTimeRegex = $@"{TimeHourRegex}({TimeQuarterRegex}|{TimeHalfRegex}|((过|又)?{TimeMinuteRegex})({TimeSecondRegex})?)?"; + public static readonly string TimeCJKTimeRegex = $@"{TimeHourRegex}({TimeQuarterRegex}|{TimeHalfRegex}|((过|又)?{TimeMinuteRegex})({TimeSecondRegex})?)?"; public static readonly string TimeDigitTimeRegex = $@"(?{TimeHourNumRegex}):(?{TimeMinuteNumRegex})(:(?{TimeSecondNumRegex}))?"; public const string TimeDayDescRegex = @"(?凌晨|清晨|早上|早间|早|上午|中午|下午|午后|晚上|夜里|夜晚|半夜|午夜|夜间|深夜|傍晚|晚)"; public const string TimeApproximateDescPreffixRegex = @"(大[约概]|差不多|可能|也许|约|不超过|不多[于过]|最[多长少]|少于|[超短长多]过|几乎要|将近|差点|快要|接近|至少|起码|超出|不到)"; - public const string TimeApproximateDescSuffixRegex = @"(之前|以前|以后|以後|之后|之後|前|后|後|左右)"; - public static readonly string TimeRegexes1 = $@"{TimeApproximateDescPreffixRegex}?{TimeDayDescRegex}?{TimeChineseTimeRegex}{TimeApproximateDescSuffixRegex}?"; + public const string TimeApproximateDescSuffixRegex = @"(左右)"; + public static readonly string TimeRegexes1 = $@"{TimeApproximateDescPreffixRegex}?{TimeDayDescRegex}?{TimeCJKTimeRegex}{TimeApproximateDescSuffixRegex}?"; public static readonly string TimeRegexes2 = $@"{TimeApproximateDescPreffixRegex}?{TimeDayDescRegex}?{TimeDigitTimeRegex}{TimeApproximateDescSuffixRegex}?(\s*{AmPmDescRegex}?)"; - public static readonly string TimeRegexes3 = $@"差{TimeMinuteRegex}{TimeChineseTimeRegex}"; + public static readonly string TimeRegexes3 = $@"差{TimeMinuteRegex}{TimeCJKTimeRegex}"; public const string TimePeriodTimePeriodConnectWords = @"(起|至|到|–|-|—|~|~)"; - public static readonly string TimePeriodLeftChsTimeRegex = $@"(从)?(?{TimeDayDescRegex}?({TimeChineseTimeRegex}))"; - public static readonly string TimePeriodRightChsTimeRegex = $@"{TimePeriodTimePeriodConnectWords}(?{TimeDayDescRegex}?{TimeChineseTimeRegex})(之间)?"; + public static readonly string TimePeriodLeftCJKTimeRegex = $@"(从)?(?{TimeDayDescRegex}?({TimeCJKTimeRegex}))"; + public static readonly string TimePeriodRightCJKTimeRegex = $@"{TimePeriodTimePeriodConnectWords}(?{TimeDayDescRegex}?{TimeCJKTimeRegex})(之间)?"; public static readonly string TimePeriodLeftDigitTimeRegex = $@"(从)?(?{TimeDayDescRegex}?({TimeDigitTimeRegex}))"; public static readonly string TimePeriodRightDigitTimeRegex = $@"{TimePeriodTimePeriodConnectWords}(?{TimeDayDescRegex}?{TimeDigitTimeRegex})(之间)?"; - public static readonly string TimePeriodShortLeftChsTimeRegex = $@"(从)?(?{TimeDayDescRegex}?({TimeHourChsRegex}))"; + public static readonly string TimePeriodShortLeftCJKTimeRegex = $@"(从)?(?{TimeDayDescRegex}?({TimeHourCJKRegex}))"; public static readonly string TimePeriodShortLeftDigitTimeRegex = $@"(从)?(?{TimeDayDescRegex}?({TimeHourNumRegex}))"; - public static readonly string TimePeriodRegexes1 = $@"({TimePeriodLeftDigitTimeRegex}{TimePeriodRightDigitTimeRegex}|{TimePeriodLeftChsTimeRegex}{TimePeriodRightChsTimeRegex})"; - public static readonly string TimePeriodRegexes2 = $@"({TimePeriodShortLeftDigitTimeRegex}{TimePeriodRightDigitTimeRegex}|{TimePeriodShortLeftChsTimeRegex}{TimePeriodRightChsTimeRegex})"; - public const string ParserConfigurationBefore = @"(之前|以前|前)"; - public const string ParserConfigurationAfter = @"(之后|之後|以后|以後|后|後)"; + public static readonly string TimePeriodRegexes1 = $@"({TimePeriodLeftDigitTimeRegex}{TimePeriodRightDigitTimeRegex}|{TimePeriodLeftCJKTimeRegex}{TimePeriodRightCJKTimeRegex})"; + public static readonly string TimePeriodRegexes2 = $@"({TimePeriodShortLeftDigitTimeRegex}{TimePeriodRightDigitTimeRegex}|{TimePeriodShortLeftCJKTimeRegex}{TimePeriodRightCJKTimeRegex})"; + public const string FromToRegex = @"(从|自).+([至到]).+"; + public const string AmbiguousRangeModifierPrefix = @"(从|自)"; + public const string ReferenceDatePeriodRegex = @"^[.]"; + public const string UnspecificDatePeriodRegex = @"^[.]"; + public const string ParserConfigurationBefore = @"((?和|或|及)?(之前|以前)|前)"; + public const string ParserConfigurationAfter = @"((?和|或|及)?(之后|之後|以后|以後)|后|後)"; public const string ParserConfigurationUntil = @"(直到|直至|截至|截止(到)?)"; - public const string ParserConfigurationSincePrefix = @"(自从|自|自打|打)"; - public const string ParserConfigurationSinceSuffix = @"(以来|开始)"; - public const string ParserConfigurationLastWeekDayToken = @"最后一个"; - public const string ParserConfigurationNextMonthToken = @"下一个"; - public const string ParserConfigurationLastMonthToken = @"上一个"; + public const string ParserConfigurationSincePrefix = @"(自从|自|自打|打|从)"; + public const string ParserConfigurationSinceSuffix = @"(以来|开始|起)"; + public const string ParserConfigurationAroundPrefix = @"^[.]"; + public const string ParserConfigurationAroundSuffix = @"^[.]"; + public const string ParserConfigurationLastWeekDayRegex = @"最后一个"; + public const string ParserConfigurationNextMonthRegex = @"下一个"; + public const string ParserConfigurationLastMonthRegex = @"上一个"; public const string ParserConfigurationDatePrefix = @" "; public static readonly Dictionary ParserConfigurationUnitMap = new Dictionary { @@ -191,14 +247,17 @@ public static class DateTimeDefinitions { @"个月", @"MON" }, { @"日", @"D" }, { @"周", @"W" }, + { @"週", @"W" }, { @"天", @"D" }, { @"小时", @"H" }, + { @"个小时", @"H" }, { @"时", @"H" }, { @"分钟", @"M" }, { @"分", @"M" }, { @"秒钟", @"S" }, { @"秒", @"S" }, - { @"星期", @"W" } + { @"星期", @"W" }, + { @"个星期", @"W" } }; public static readonly Dictionary ParserConfigurationUnitValueMap = new Dictionary { @@ -230,11 +289,13 @@ public static class DateTimeDefinitions }; public static readonly IList WeekendTerms = new List { - @"周末" + @"周末", + @"週末" }; public static readonly IList WeekTerms = new List { @"周", + @"週", @"星期" }; public static readonly IList YearTerms = new List @@ -245,6 +306,10 @@ public static class DateTimeDefinitions { @"今年" }; + public static readonly IList YearToDateTerms = new List + { + @"今年迄今" + }; public static readonly IList LastYearTerms = new List { @"去年" @@ -296,7 +361,14 @@ public static class DateTimeDefinitions { @"第二", 2 }, { @"第三", 3 }, { @"第四", 4 }, - { @"第五", 5 } + { @"第五", 5 }, + { @"第六", 6 }, + { @"第七", 7 }, + { @"第八", 8 }, + { @"第九", 9 }, + { @"第十", 10 }, + { @"第十一", 11 }, + { @"第十二", 12 } }; public static readonly Dictionary ParserConfigurationDayOfMonth = new Dictionary { @@ -606,6 +678,22 @@ public static class DateTimeDefinitions public const string DateTimePeriodAFRegex = @"(下午|午后|傍晚)"; public const string DateTimePeriodEVRegex = @"(晚上|夜里|夜晚|晚)"; public const string DateTimePeriodNIRegex = @"(半夜|夜间|深夜)"; + public static readonly Dictionary AmbiguityTimeFiltersDict = new Dictionary + { + { @"^[.]", @"^[.]" } + }; + public static readonly Dictionary AmbiguityTimePeriodFiltersDict = new Dictionary + { + { @"^[.]", @"^[.]" } + }; + public static readonly Dictionary AmbiguityDateFiltersDict = new Dictionary + { + { @"^[.]", @"^[.]" } + }; + public static readonly Dictionary AmbiguityDateTimeFiltersDict = new Dictionary + { + { @"^[.]", @"^[.]" } + }; public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { { @"早", @"(? DurationUnitValueMap = new Dictionary + public static readonly Dictionary DurationUnitValueMap = new Dictionary { { @"Y", 31536000 }, { @"Mon", 2592000 }, @@ -663,7 +752,7 @@ public static class DateTimeDefinitions { @"晚", 18 }, { @"pm", 12 } }; - public const string DefaultLanguageFallback = @"DMY"; + public const string DefaultLanguageFallback = @"YMD"; public static readonly IList MorningTermList = new List { @"早", @@ -699,5 +788,31 @@ public static class DateTimeDefinitions { @"深夜" }; + public static readonly Dictionary DynastyYearMap = new Dictionary + { + { @"贞观", 627 }, + { @"开元", 713 }, + { @"神龙", 705 }, + { @"洪武", 1368 }, + { @"建文", 1399 }, + { @"永乐", 1403 }, + { @"景泰", 1450 }, + { @"天顺", 1457 }, + { @"成化", 1465 }, + { @"嘉靖", 1522 }, + { @"万历", 1573 }, + { @"崇祯", 1628 }, + { @"顺治", 1644 }, + { @"康熙", 1662 }, + { @"雍正", 1723 }, + { @"乾隆", 1736 }, + { @"嘉庆", 1796 }, + { @"道光", 1821 }, + { @"咸丰", 1851 }, + { @"同治", 1862 }, + { @"光绪", 1875 }, + { @"宣统", 1909 }, + { @"民国", 1912 } + }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/NumbersDefinitions.cs index e204a4a80d..aae7e888e3 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/NumbersDefinitions.cs @@ -21,7 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Chinese public static class NumbersDefinitions { - public const string LangMarker = @"Chs"; + public const string LangMarker = @"Chi"; public const bool CompoundNumberLanguage = true; public const bool MultiDecimalSeparatorCulture = false; public const char DecimalSeparatorChar = '.'; @@ -159,24 +159,31 @@ public static class NumbersDefinitions '拾' }; public static readonly string DigitalNumberRegex = $@"((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; - public const string ZeroToNineFullHalfRegex = @"[\d1234567890]"; + public const string ZeroToNineFullHalfRegex = @"[\d]"; public static readonly string DigitNumRegex = $@"{ZeroToNineFullHalfRegex}+"; public const string DozenRegex = @".*打$"; - public const string PercentageRegex = @"(?<=百\s*分\s*之).+|.+(?=个\s*百\s*分\s*点)|.*(?=[%%])"; + public const string PercentageRegex = @"(?<=(((?)"; + public const string MoreRegex = @"((大于|多于|高于|超过|大於|多於|高於|超過|超过)了?|过|>)"; public const string LessRegex = @"(小于|少于|低于|小於|少於|低於|不到|不足|<)"; public const string EqualRegex = @"(等于|等於|=)"; - public static readonly string MoreOrEqual = $@"(({MoreRegex}\s*(或|或者)?\s*{EqualRegex})|至少|最少|不{LessRegex})"; - public const string MoreOrEqualSuffix = @"(或|或者)\s*(以上|之上|更[大多高])"; - public static readonly string LessOrEqual = $@"(({LessRegex}\s*(或|或者)?\s*{EqualRegex})|至多|最多|不{MoreRegex})"; + public static readonly string MoreOrEqual = $@"(({MoreRegex}\s*(或|或者)?\s*{EqualRegex})|(至少|最少){SpeicalCharBeforeNumber}?|不{LessRegex}|≥)"; + public const string MoreOrEqualSuffix = @"(或|或者)\s*(次?以上|之上|更[大多高])"; + public static readonly string LessOrEqual = $@"(({LessRegex}\s*(或|或者)?\s*{EqualRegex})|(至多|最多){SpeicalCharBeforeNumber}?|不{MoreRegex}|≤)"; public const string LessOrEqualSuffix = @"(或|或者)\s*(以下|之下|更[小少低])"; public static readonly string OneNumberRangeMoreRegex1 = $@"({MoreOrEqual}|{MoreRegex})\s*(?((?!([并且而並的同時时]|([,,](?!\d+))|。)).)+)"; public const string OneNumberRangeMoreRegex2 = @"比\s*(?((?!(([,,](?!\d+))|。)).)+)\s*更?[大多高]"; - public const string OneNumberRangeMoreRegex3 = @"(?((?!(([,,](?!\d+))|。|[或者])).)+)\s*(或|或者)?\s*([多几余幾餘]|以上|之上|更[大多高])(?![万亿萬億]{1,2})"; + public const string OneNumberRangeMoreRegex3 = @"(?((?!(([,,](?!\d+))|。|[或者])).)+)\s*(或|或者)?\s*([多几余幾餘]|次?以上|之上|更[大多高])([万亿萬億]{0,2})"; public static readonly string OneNumberRangeLessRegex1 = $@"({LessOrEqual}|{LessRegex})\s*(?((?!([并且而並的同時时]|([,,](?!\d+))|。)).)+)"; public const string OneNumberRangeLessRegex2 = @"比\s*(?((?!(([,,](?!\d+))|。)).)+)\s*更?[小少低]"; public const string OneNumberRangeLessRegex3 = @"(?((?!(([,,](?!\d+))|。|[或者])).)+)\s*(或|或者)?\s*(以下|之下|更[小少低])"; @@ -242,9 +250,17 @@ public static class NumbersDefinitions public const string OneNumberRangeLessSeparateRegex = @"^[.]"; public static readonly string OneNumberRangeEqualRegex = $@"{EqualRegex}\s*(?((?!(([,,](?!\d+))|。)).)+)"; public static readonly string TwoNumberRangeRegex1 = $@"((位于|在|位於)|(?=(\d|\+|\-)))\s*(?((?!(([,,](?!\d+))|。)).)+)\s*(和|与|與|{TillRegex})\s*(?((?!(([,,](?!\d+))|。))[^之])+)\s*(之)?(间|間)"; - public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2}|{OneNumberRangeMoreRegex3})\s*(且|并且|而且|並且|((的)?同時)|((的)?同时)|[,,])?\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2}|{OneNumberRangeLessRegex3})"; - public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2}|{OneNumberRangeLessRegex3})\s*(且|并且|而且|並且|((的)?同時)|((的)?同时)|[,,])?\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2}|{OneNumberRangeMoreRegex3})"; + public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2}|{OneNumberRangeMoreRegex3})\s*(且|(并|並)且?|而且|((的)?同時)|((的)?同时)|[,,])?\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2}|{OneNumberRangeLessRegex3})"; + public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2}|{OneNumberRangeLessRegex3})\s*(且|(并|並)且?|而且|((的)?同時)|((的)?同时)|[,,])?\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2}|{OneNumberRangeMoreRegex3})"; public static readonly string TwoNumberRangeRegex4 = $@"(?((?!(([,,](?!\d+))|。)).)+)\s*{TillRegex}\s*(?((?!(([,,](?!\d+))|。)).)+)"; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"十", @"十足" }, + { @"伍", @"队伍|入伍|退伍|伍仁" }, + { @"肆", @"放肆|肆意|肆无忌惮" }, + { @"陆", @"大陆|陆地|登陆|海陆" }, + { @"拾", @"拾取|拾起|收拾|拾到|朝花夕拾" } + }; public const string AmbiguousFractionConnectorsRegex = @"^[.]"; public static readonly Dictionary RelativeReferenceOffsetMap = new Dictionary { diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/NumbersWithUnitDefinitions.cs index 3ef4c428ae..148b135fb5 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/NumbersWithUnitDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/NumbersWithUnitDefinitions.cs @@ -246,7 +246,8 @@ public static class NumbersWithUnitDefinitions { @"Fen", @"分钱|分" }, { @"Jiao", @"毛钱|毛|角钱|角" }, { @"Finnish markka", @"芬兰马克" }, - { @"Penni", @"盆尼" } + { @"Penni", @"盆尼" }, + { @"Bitcoin", @"₿|btc|xbt|个比特币|比特币" } }; public static readonly Dictionary CurrencyNameToIsoCodeMap = new Dictionary { @@ -536,7 +537,8 @@ public static class NumbersWithUnitDefinitions { @"Turkish lira", @"₺" }, { @"Euro", @"€" }, { @"Pound", @"£" }, - { @"Costa Rican colón", @"₡" } + { @"Costa Rican colón", @"₡" }, + { @"Bitcoin", @"₿|btc|xbt" } }; public static readonly IList CurrencyAmbiguousValues = new List { @@ -600,21 +602,28 @@ public static class NumbersWithUnitDefinitions { @"Dou", @"市斗|斗" }, { @"Dan", @"市石|石" }, { @"Kilogram", @"千克|公斤|kg" }, - { @"Jin", @"市斤|斤" }, - { @"Milligram", @"毫克|mg" }, - { @"Barrel", @"桶" }, - { @"Pot", @"罐" }, { @"Gram", @"克|g" }, + { @"Milligram", @"毫克|mg" }, + { @"Microgram", @"微克|μg" }, { @"Ton", @"公吨|吨|t" }, { @"Pound", @"磅" }, { @"Ounce", @"盎司" }, - { @"Bit", @"比特|位|b" }, - { @"Byte", @"字节|byte" }, - { @"Kilobyte", @"千字节|kb" }, - { @"Megabyte", @"兆字节|mb" }, - { @"Gigabyte", @"十亿字节|千兆字节|gb" }, - { @"Terabyte", @"万亿字节|兆兆字节|tb" }, - { @"Petabyte", @"千兆兆|千万亿字节|pb" } + { @"Jin", @"市斤|斤" }, + { @"Liang", @"两" }, + { @"Barrel", @"桶" }, + { @"Pot", @"罐" }, + { @"Bit", @"比特|位|b|bit" }, + { @"Kilobit", @"千比特|千位|kb|Kb" }, + { @"Megabit", @"兆比特|兆位|mb|Mb" }, + { @"Gigabit", @"十亿比特|千兆比特|十亿位|千兆位|gb|Gb" }, + { @"Terabit", @"万亿比特|兆兆比特|万亿位|兆兆位|tb|Tb" }, + { @"Petabit", @"千兆兆比特|千万亿比特|千兆兆位|千万亿位|pb|Pb" }, + { @"Byte", @"字节|byte|Byte" }, + { @"Kilobyte", @"千字节|kB|KB" }, + { @"Megabyte", @"兆字节|mB|MB" }, + { @"Gigabyte", @"十亿字节|千兆字节|gB|GB" }, + { @"Terabyte", @"万亿字节|兆兆字节|tB|TB" }, + { @"Petabyte", @"千兆兆字节|千万亿字节|pB|PB" } }; public static readonly IList DimensionAmbiguousValues = new List { @@ -654,6 +663,11 @@ public static class NumbersWithUnitDefinitions @"tb", @"pb" }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"五角", @"五角大楼" }, + { @"普尔", @"标准普尔" } + }; public static readonly Dictionary TemperatureSuffixList = new Dictionary { { @"F", @"华氏温度|华氏度|°f" }, @@ -674,5 +688,6 @@ public static class NumbersWithUnitDefinitions @"度", @"k" }; + public const string HalfUnitRegex = @"半"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/QuotedTextDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/QuotedTextDefinitions.cs new file mode 100644 index 0000000000..5afeb6a990 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/QuotedTextDefinitions.cs @@ -0,0 +1,35 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Chinese\Chinese-QuotedText.yaml +// - Language: Chinese +// - ClassName: QuotedTextDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Chinese +{ + using System; + using System.Collections.Generic; + + public static class QuotedTextDefinitions + { + public const string LangMarker = @"Chs"; + public const string QuotedTextRegex1 = @"(“([^“”]+)”)"; + public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)"; + public const string QuotedTextRegex3 = @"(『([^『』]+)』)"; + public const string QuotedTextRegex4 = @"(「([^「」]+)」)"; + public const string QuotedTextRegex5 = @"(﹃([^﹃﹄]+)﹄)"; + public const string QuotedTextRegex6 = @"(﹁([^﹁﹂]+)﹂)"; + public const string QuotedTextRegex7 = @"(""([^""]+)"")"; + public const string QuotedTextRegex8 = @"(\\'([^\']+)\\')"; + public const string QuotedTextRegex9 = @"(`([^`]+)`)"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/QuotedTextDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/QuotedTextDefinitions.tt new file mode 100644 index 0000000000..49ad314de6 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/QuotedTextDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Chinese\Chinese-QuotedText.yaml"; + this.Language = "Chinese"; + this.ClassName = "QuotedTextDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/ChoiceDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/ChoiceDefinitions.cs index 4c54a56288..08bdbca106 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/ChoiceDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/ChoiceDefinitions.cs @@ -23,7 +23,8 @@ public static class ChoiceDefinitions { public const string LangMarker = @"Nl"; public const string TokenizerRegex = @"[^\w\d]"; - public const string TrueRegex = @"\b(ja|jawel|jazeker|natuurlijk|vanzelfsprekend|zeker|prima|jep|yes|yep|y|ok|oke|akkoord)\b|(\uD83D\uDC4D|\uD83D\uDC4C)"; - public const string FalseRegex = @"\b(nee|neen|nope|nein|nop|no|niet|nooit)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)"; + public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)"; + public static readonly string TrueRegex = $@"\b(ja|jawel|jazeker|natuurlijk|vanzelfsprekend|zeker|prima|jep|yes|yep|y|ok|oke|akkoord)\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?"; + public static readonly string FalseRegex = $@"\b(nee|neen|nope|nein|nop|no|niet|nooit)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/DateTimeDefinitions.cs index c348cbc5d6..9491af29b4 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/DateTimeDefinitions.cs @@ -21,239 +21,285 @@ namespace Microsoft.Recognizers.Definitions.Dutch public static class DateTimeDefinitions { + public const string LangMarker = @"Dut"; public const bool CheckBothBeforeAfter = false; - public static readonly string TillRegex = $@"(?\b(tot|totdat|gedurende|tijdens|ten tijde van)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; - public static readonly string RangeConnectorRegex = $@"(?\b(en|tot en met|t/m|tot|tot aan)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; + public static readonly string TillRegex = $@"(?\b(tot(dat|\s+en\s+met)?|gedurende|tijdens|ten tijde van)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; + public static readonly string RangeConnectorRegex = $@"(?\b(en|t/m|tot(\s+(aan|en\s+met))?)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; public const string ArticleRegex = @"\b(de|het|een)\b"; - public const string RelativeRegex = @"(?dit|deze|volgende?|komende?|aankomende?|aanstaande?|huidige?|vorige?|verleden|voorgaande?|vorige?|laatste|afgelopen)"; - public const string StrictRelativeRegex = @"\b(?dit|deze|volgende?|komende?|aankomende?|aanstaande?|huidige?|vorige?|verleden|voorgaande?|vorige?|laatste|afgelopen)\b"; - public const string UpcomingPrefixRegex = @"((aankomende?|komende?|aanstaande))"; - public static readonly string NextPrefixRegex = $@"\b(volgende?|eerstvolgende|{UpcomingPrefixRegex})\b"; - public const string AfterNextSuffixRegex = @"\b(na\s+((de|het)\s+)?volgende?)\b"; - public const string PastPrefixRegex = @"((deze\s+)?verleden)\b"; - public static readonly string PreviousPrefixRegex = $@"(voorgaand[e]|vorige?|afgelopen|verleden|laatste|{PastPrefixRegex})\b"; + public const string ApostrofRegex = @"(’|‘|'|ʼ)"; + public static readonly string ApostrofsRegex = $@"({ApostrofRegex}\s*s)"; + public const string RelativeRegex = @"\b(?((dit|deze|(erop)?volgende?|(aan)?komende?|aanstaande?|huidige?|vorige?|verleden|voorgaande?|laatste|afgelopen|(op\s+)?de|het)\b)|gister(en)?)"; + public const string StrictRelativeRegex = @"\b(?((dit|deze|(erop)?volgende?|(aan)?komende?|aanstaande?|huidige?|vorige?|verleden|voorgaande?|laatste|afgelopen)\b)|gister(en)?)"; + public const string UpcomingPrefixRegex = @"((deze\s+)?((aan)?komende?|aanstaande?))"; + public static readonly string NextPrefixRegex = $@"\b((erop)?volgende?|eerstvolgende|{UpcomingPrefixRegex})\b"; + public const string AfterNextSuffixRegex = @"\b((na\s+(afloop\s+van\s+)?((de|het)\s+)?volgende?)|over)\b"; + public const string PastPrefixRegex = @"((deze\s+)?(verleden|afgelopen))\b"; + public static readonly string PreviousPrefixRegex = $@"((voorgaand[e]|vorige?|verleden|laatste|{PastPrefixRegex})\b|gister(en)?)"; public const string ThisPrefixRegex = @"(dit|deze|huidige?)\b"; - public const string RangePrefixRegex = @"(van|tot|tussen)"; + public const string RangePrefixRegex = @"(van|tussen)"; public const string CenturySuffixRegex = @"(^eeuw|^centennium)\b"; - public const string ReferencePrefixRegex = @"(dezelfde|hetzelfde|dat|die|overeenkomstige)\b"; - public const string FutureSuffixRegex = @"\b(in\s+de\s+)?(toekomst|vanaf)\b"; - public const string DayRegex = @"(de\s*)?(?(3[0-1]|[1-2]\d|0?[1-9])(ste|e|de)?)(?=\b|t)"; - public static readonly string WrittenDayRegex = $@"\b(?({WrittenOneToNineRegex})|({WrittenElevenToNineteenRegex})|(({WrittenOneToNineRegex}en)?twintig)|(((één|een)en)?dertig))\b"; - public const string ImplicitDayRegex = @"(de\s*)?(?(3[0-1]|[0-2]?\d)(ste|e|de))\b"; - public const string MonthNumRegex = @"(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)?\.?\b"; + public const string ReferencePrefixRegex = @"(dezelfde|hetzelfde|dat(zelfde)?|die|overeenkomstige)\b"; + public const string FutureSuffixRegex = @"\b(((in\s+de\s+)?toekomst)|daarna|over|na)\b"; + public const string PastSuffixRegex = @"^\b$"; + public const string DayRegex = @"(de\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:\s*(ste|de|e))?(?=\b|t)"; + public static readonly string WrittenDayRegex = $@"(?({WrittenOneToNineRegex})|({WrittenElevenToNineteenRegex})|(({WrittenOneToNineRegex}(en|ën))?twintig)|(((één|een)(en|ën))?dertig))"; + public static readonly string WrittenCardinalDayRegex = $@"(?<=((de\s+)|\b))(?(éérste|eerste|tweede|derde|vierde|vijfde|zesde|zevende|achtste|negende|tiende|{WrittenElevenToNineteenRegex}de|({WrittenOneToNineRegex}(en|ën))?twintigste|((één|een)(en|ën))?dertigste))"; + public const string ImplicitDayRegex = @"(de\s*)?(?(3[0-1]|[0-2]?\d)(\s*(ste|de|e)))\b"; + public const string MonthNumRegex = @"\b(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)\b"; public const string WrittenOneToNineRegex = @"(één|een|twee|drie|vier|vijf|zes|zeven|acht|negen)"; - public const string WrittenElevenToNineteenRegex = @"(elf|twaalf|dertien|veertien|vijftien|zestien|zeventien|achttien|negentien)"; + public const string WrittenElevenToNineteenRegex = @"(elf|elven|twaalf|dertien|veertien|vijftien|zestien|zeventien|achttien|negentien)"; public const string WrittenTensRegex = @"(tien|twintig|dertig|veertig|vijftig|zestig|zeventig|tachtig|negentig)"; public static readonly string WrittenNumRegex = $@"({WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s+{WrittenOneToNineRegex})?)"; - public static readonly string WrittenCenturyFullYearRegex = $@"((twee)\s*duizend(\s+en)?(\s*{WrittenOneToNineRegex}\s+honderd)?)"; - public const string WrittenCenturyOrdinalYearRegex = @"((ee|éé)nentwintig|tweeëntwintig|een|twee|drie|vier|vijf|zes|zeven|acht|negen|tien|elf|twaalf|dertien|veertien|vijftien|zestien|zeventien|achttien|negentien|twintig)"; - public static readonly string CenturyRegex = $@"\b(?{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}(\s+honderd)?(\s+en)?)\b"; - public static readonly string LastTwoYearNumRegex = $@"(zero\s+{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s+{WrittenOneToNineRegex})?)"; - public static readonly string FullTextYearRegex = $@"\b((?{CenturyRegex})\s+(?{LastTwoYearNumRegex})\b|\b(?{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}\s+hundred(\s+and)?))\b"; - public const string OclockRegex = @"(?uur)"; - public const string SpecialDescRegex = @"(p\b)"; - public static readonly string AmDescRegex = $@"({BaseDateTime.BaseAmDescRegex})"; - public static readonly string PmDescRegex = $@"({BaseDateTime.BasePmDescRegex})"; - public static readonly string AmPmDescRegex = $@"({BaseDateTime.BaseAmPmDescRegex})"; - public static readonly string DescRegex = $@"((({OclockRegex}\s+)?(?({AmPmDescRegex}|{AmDescRegex}|{PmDescRegex}|{SpecialDescRegex})))|{OclockRegex})"; - public static readonly string TwoDigitYearRegex = $@"\b(?([0-27-9]\d))(?!(\s*((\:)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b"; + public static readonly string WrittenCenturyFullYearRegex = $@"((twee)\s*duizend(\s+en)?(\s*{WrittenOneToNineRegex}\s*honderd)?)"; + public const string WrittenCenturyOrdinalYearRegex = @"((ee|éé)nentwintig|tweeëntwintig|tien|elf|elven|twaalf|dertien|veertien|vijftien|zestien|zeventien|achttien|negentien|twintig|een|twee|drie|vier|vijf|zes|zeven|acht|negen)"; + public static readonly string CenturyRegex = $@"\b(?{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}(\s*honderd)?(\s+en)?)"; + public static readonly string LastTwoYearNumRegex = $@"((zero|nul|en)\s+{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|({WrittenOneToNineRegex}[eë]n)?{WrittenTensRegex})"; + public static readonly string FullTextYearRegex = $@"\b((?{CenturyRegex})\s*(?{LastTwoYearNumRegex})\b|\b(?{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}\s+hundred(\s+and)?))\b"; + public const string OclockRegex = @"(?u(ur)?)\b"; + public const string SpecialDescRegex = @"((?)p\b)"; + public static readonly string AmDescRegex = $@"(:?{BaseDateTime.BaseAmDescRegex})"; + public static readonly string PmDescRegex = $@"(:?{BaseDateTime.BasePmDescRegex})"; + public static readonly string AmPmDescRegex = $@"(:?{BaseDateTime.BaseAmPmDescRegex})"; + public static readonly string DescRegex = $@"(:?(:?({OclockRegex}\s+)?(?({AmPmDescRegex}|{AmDescRegex}|{PmDescRegex}|{SpecialDescRegex}))\.?)|{OclockRegex})"; + public static readonly string PmRegex = $@"(?({ApostrofsRegex}|des)\s+(\bmiddags|avonds|nachts)|((in|tegen|op|om|met)\s+(de\s+)?)(((na)?middag|avond|(midder)?nacht|lunchtijd))|dag)"; + public static readonly string PmRegexFull = $@"(?(({ApostrofsRegex}|des)\s+(\bmiddags|avonds|nachts)|((in|tegen|op|om|met)\s+(de\s+)?)?(((na)?middag|(?(({ApostrofsRegex}|des)\s+(ochtends|morgens)|((in|tegen|op)\s+de)(\s+(ochtend|morgen))|(?<=gisteren|morgen|vandaag|(maan|dins|woens|donder|vrij|zater|zon)dag)(ochtend|morgen)|^?ochtend))"; + public static readonly string FullDescRegex = $@"({DescRegex}|{AmRegex}|{PmRegexFull})"; + public static readonly string TwoDigitYearRegex = $@"\b(?([0-24-9]\d))(?!(\s*(([:\.]\d)|keer|uurs?|{AmDescRegex}|{PmDescRegex})))\b"; public static readonly string YearRegex = $@"({BaseDateTime.FourDigitYearRegex}|{FullTextYearRegex})"; - public const string WeekDayRegex = @"\b(?maandag|dinsdag|woensdag|donderdag|vrijdag|zaterdag|zondag|ma|ma\.|di|di\.|wo|wo\.|woe|woe\.|do|do\.|vr|vr\.|vrij|za|za\.|zat|zat\.|zo|zo\.)(en)?\b"; - public const string SingleWeekDayRegex = @"\b(?maandag|dinsdag|woensdag|donderdag|vrijdag|zaterdag|zondag|ma|ma\.|di|di\.|wo|wo\.|woe|woe\.|do|do\.|vr|vr\.|vrij|za|za\.|zat|zat\.|zo|zo\.)(en)?\b"; - public static readonly string RelativeMonthRegex = $@"(?((van\s+)?de\s+)?{RelativeRegex}\s+maand)\b"; - public const string WrittenMonthRegex = @"(((de\s+)?maand\s+)?(?januari|februari|maart|april|mei|juni|juli|augustus|september|oktober|november|december|jan|feb|mar|apr|jun|jul|aug|sep|sept|oct|okt|nov|dec))"; - public static readonly string MonthSuffixRegex = $@"(?((in|van|tijdens|sinds|tot)\s+)?({RelativeMonthRegex}|{WrittenMonthRegex}))"; - public const string DateUnitRegex = @"(?eeuw(en)?|jaar|jaren|maand(en)?|week|weken|(werk)?dag(en)?)\b"; + public const string WeekDayRegex = @"\b(?((ma|di(ns)?|wo(e(ns)?)?|do|vr(ij)?|zat?|zo)(\.|\b))|((?:maan|dins|woens|donder|vrij|zater|zon)(dag(en)?)?(middag)?)\b)"; + public const string SingleWeekDayRegex = @"\b(?(((ma|di(ns)?|wo(e(ns)?)?|do|vr|za)\b(\.)?)|(vrij|zat|zon?)\.(?!$)|(((?((van\s+)?(de\s+)?)?{RelativeRegex}\s+maand)\b"; + public const string WrittenMonthRegex = @"(((de\s+)?maand\s+)?(?januari|februari|maart|april|mei|juni|juli|augustus|september|oktober|november|december|jan|feb|mar|mrt|apr|jun|jul|aug|sep|sept|oct|okt|nov|dec))"; + public static readonly string MonthSuffixRegex = $@"(?((in|van|tijdens|sinds|tot|op)\s+)?({RelativeMonthRegex}|{WrittenMonthRegex}))"; + public const string DateUnitRegex = @"(?(eeuw|maand|weekend)(?en)?|jaar|(?jaren|weken)|jr|decennia|mnd|week|(?we[er]k)?dag(?en)?|dgn)\b"; public const string DateTokenPrefix = @"op "; public const string TimeTokenPrefix = @"om "; public const string TokenBeforeDate = @"op "; public const string TokenBeforeTime = @"om "; + public const string HalfTokenRegex = @"^(half)"; + public const string QuarterTokenRegex = @"^(een\s+kwart(\s+jaar)?|kwart|een\s+kwartier|kwartier)"; + public const string ThreeQuarterTokenRegex = @"^(drie\s+kwart|drie\s+kwartier)"; + public const string ToTokenRegex = @"\b(voor)$"; + public const string ToHalfTokenRegex = @"\b(over\s+half)$"; + public const string ForHalfTokenRegex = @"\b(voor\s+half)$"; + public const string FromRegex = @"\b(van(af)?)$"; + public const string BetweenTokenRegex = @"\b(tussen)$"; public static readonly string SimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex}(\s*),?(\s*){MonthSuffixRegex}|{MonthSuffixRegex}\s+{DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; - public static readonly string MonthFrontSimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((van)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; + public static readonly string MonthFrontSimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?(({MonthSuffixRegex}\s+((van)\s+)?({DayRegex})|({DayRegex})\s+((van)\s+)?{MonthSuffixRegex})\s*{TillRegex}\s*({DayRegex})|(op\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})\s+{MonthSuffixRegex})((\s+|\s*,\s*){YearRegex})?\b"; public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+(tussen\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; public static readonly string BetweenRegex = $@"\b(tussen\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b"; - public static readonly string MonthWithYear = $@"\b(({WrittenMonthRegex}(\.)?(\s*)[/\\\-\.,]?(\s+(van|over|in))?(\s*)({YearRegex}|(?volgende?|komende?|aankomende?|huidige?|vorige?|afgelopen|dit)\s+jaar))|(({YearRegex}|(?volgende?|komende?|aankomende?|huidige?|vorige?|afgelopen|dit)\s+jaar)(\s*),?(\s*){WrittenMonthRegex}))\b"; - public static readonly string OneWordPeriodRegex = $@"\b((((de\s+)?maand van\s+)?({StrictRelativeRegex}\s+)?(?januari|februari|maart|april|mei|juni|juli|augustus|september|oktober|december|jan\.?|feb\.?|mar\.?|apr\.?|jun\.?|jul\.?|aug\.?|sep\.?|sept\.?|oct\.?|okt\.?|nov\.?|dec\.?))|(maand|jaar) tot heden|nu|({RelativeRegex}\s+)?(mijn\s+)?(weekend|week|maand|jaar)(?!((\s+van)?\s+\d+|\s+tot\s+heden|nu))(\s+{AfterNextSuffixRegex})?)\b"; + public static readonly string RelativeYearRegex = $@"({YearRegex}|{TwoDigitYearRegex}|(?volgende?|komende?|aanstaande?|aankomende?|huidige?|vorige?|afgelopen|dit)\s+jaar)"; + public static readonly string MonthWithYear = $@"\b(({WrittenMonthRegex}(\.)?(\s*)[/\\\-\.,]?(\s+(van|over|in))?(\s*){RelativeYearRegex})|({RelativeYearRegex}(\s*),?(\s*){WrittenMonthRegex}))\b"; + public static readonly string OneWordPeriodRegex = $@"\b((((de\s+)?maand\s+(van\s+)?)?({StrictRelativeRegex}\s+)?(?januari|februari|maart|april|mei|juni|juli|augustus|september|oktober|november|december|jan\.?|feb\.?|mar\.?|mrt\.?|apr\.?|jun\.?|jul\.?|aug\.?|sep\.?|sept\.?|oct\.?|okt\.?|nov\.?|dec\.?))|(maand|jaar)\s+tot(\s+op)?\s+heden|(({RelativeRegex}\s+)(mijn\s+)?(weekend|(?werkweek)|week|maand|jaar(?!\s+hoger dan))|({RelativeRegex}\s+)?(mijn\s+)(weekend|(?werkweek)|week|maand|jaar))(?!((\s+van)?\s+\d+|\s+tot(\s+op)?\s+heden|nu))(\s+{AfterNextSuffixRegex})?)\b"; public static readonly string MonthNumWithYear = $@"\b(({BaseDateTime.FourDigitYearRegex}(\s*)[/\-\.](\s*){MonthNumRegex})|({MonthNumRegex}(\s*)[/\-](\s*){BaseDateTime.FourDigitYearRegex}))\b"; - public static readonly string WeekOfMonthRegex = $@"\b(?(de\s+)?(?eerste|tweede|derde|vierde|vijfde|1e|1ste|2e|2de|3e|3de|4e|4de|5e|5de|laatste)\s+week\s+{MonthSuffixRegex})\b"; - public static readonly string WeekOfYearRegex = $@"\b(?(de\s+)?(?eerste|tweede|derde|vierde|vijfde|1e|1ste|2e|2de|3e|3de|4e|4de|5e|5de|laatste)\s+week(\s+van)?\s+({YearRegex}|{RelativeRegex}\s+jaar))\b"; + public static readonly string WeekOfMonthRegex = $@"\b(?(de\s+)?(?eerste|tweede|derde|vierde|vijfde|1e|1ste|2e|2de|3e|3de|4e|4de|5e|5de|laatste)\s+week\s+{MonthSuffixRegex}(\s+{BaseDateTime.FourDigitYearRegex}|{RelativeRegex}\s+year)?)\b"; + public static readonly string WeekOfYearRegex = $@"(\b(?(de\s+)?(?eerste|tweede|derde|vierde|vijfde|1e|1ste|2e|2de|3e|3de|4e|4de|5e|5de|laatste)\s+week(\s+van)?\s+({YearRegex}|{RelativeRegex}\s+jaar))\b)|(\b({YearRegex}|{RelativeRegex}\s+jaar)\s(?(de\s+)?(?eerste|tweede|derde|vierde|vijfde|1e|1ste|2e|2de|3e|3de|4e|4de|5e|5de|laatste)\s+week)\b)"; + public static readonly string OfYearRegex = $@"\b((of|in)\s+({YearRegex}|{StrictRelativeRegex}\s+year))\b"; + public const string FirstLastRegex = @"\b(the\s+)?((?first)|(?last))\b"; public static readonly string FollowedDateUnit = $@"^\s*{DateUnitRegex}"; public static readonly string NumberCombinedWithDateUnit = $@"\b(?\d+(\.\d*)?){DateUnitRegex}"; - public const string QuarterTermRegex = @"\b(((?eerste|1e|1ste|tweede|2e|2de|derde|3e|3de|vierde|4e|4de)[ -]+kwartaal)|(Q(?[1-4])))\b"; + public const string QuarterTermRegex = @"\b(((?eerste|1e|1ste|tweede|2e|2de|derde|3e|3de|vierde|4e|4de)[ -]+kwartaal)|(k(?[1-4])))\b"; public static readonly string QuarterRegex = $@"(het\s+)?{QuarterTermRegex}((\s+van|\s*,\s*)?\s+({YearRegex}|{RelativeRegex}\s+jaar))?"; - public static readonly string QuarterRegexYearFront = $@"({YearRegex}|{RelativeRegex}\s+jaar)('s)?\s+(de\s+)?{QuarterTermRegex}"; + public static readonly string QuarterRegexYearFront = $@"({YearRegex}|({RelativeRegex}\s+jaar))({ApostrofsRegex})?\s+((het|de)\s+)?{QuarterTermRegex}"; public const string HalfYearTermRegex = @"(?eerste|1e|1ste|tweede|2e|2de)\s+(helft)"; public static readonly string HalfYearFrontRegex = $@"(?(de\s+){HalfYearTermRegex}(\s+helft van\s+)((1[5-9]|2[0-1]])\d{{2}}))"; public static readonly string HalfYearBackRegex = $@"(het\s+)?(H(?[1-2])|({HalfYearTermRegex}))(\s+van|\s*,\s*)?\s+({YearRegex})"; public static readonly string HalfYearRelativeRegex = $@"(het\s+)?{HalfYearTermRegex}(\s+van|\s*,\s*)?\s+({RelativeRegex}\s+jaar)"; public static readonly string AllHalfYearRegex = $@"({HalfYearFrontRegex})|({HalfYearBackRegex})|({HalfYearRelativeRegex})"; - public const string EarlyPrefixRegex = @"\b(?(eerder|vroeg(er)?|begin(nend)?|start(end)?)\s+(in|op|van)?)\b"; - public const string MidPrefixRegex = @"\b(?(midden|halverwege|op\s+de\s+helft)\s+(in|op|van)?)\b"; - public const string LaterPrefixRegex = @"\b(?(laat|later|aan\s+het\s+einde|eindigend|afsluitend)(\s+(in|op|van)?))\b"; + public const string EarlyPrefixRegex = @"\b(?((?eerder)|vroeg(er)?|((de|het)\s+)?(begin(nend)?|start(end)?))(\s+(in|op|van)(\s+de)?)?)\b"; + public const string MidPrefixRegex = @"\b(?(het\s+)?(mid(den|-)?|halverwege|op\s+de\s+helft|half)(\s+(in|op|van)(\s+de)?)?)"; + public const string LaterPrefixRegex = @"\b(?(laat|(?later)|(aan\s+)?het\s+einde?(\s+van(\s+de)?)?|eind(e|igend)?|afsluitend)(\s+(in|op|van)(\s+de)?)?)\b"; public static readonly string PrefixPeriodRegex = $@"({EarlyPrefixRegex}|{MidPrefixRegex}|{LaterPrefixRegex})"; - public const string PrefixDayRegex = @"\b((?eerder|vroeg(er)?|begin|start)|(?midden|halverwege|op\s+de\s+helft)|(?laat|later|aan\s+het\s+einde))(\s+(op|van))?(\s+de\s+dag)?\b"; + public static readonly string PrefixDayRegex = $@"\b(((?eerder|vroeg(er)?|begin|start)|(?midden|halverwege|op\s+de\s+helft)|(?laat|later))(\s+(in|op|van))?(\s+de\s+dag)?$)|^\s*(((?eerder|vroeg(er)?|begin|start)|(?midden|halverwege|op\s+de\s+helft)|in\s+de|(?laat|later))(\s+(in|op|van))(\s+de\s+dag))\b"; public const string SeasonDescRegex = @"(?lente|voorjaar|zomer|herfst|najaar|winter)"; - public static readonly string SeasonRegex = $@"\b(?({PrefixPeriodRegex}\s+)?({ArticleRegex}\s+)?({RelativeRegex}\s+)?{SeasonDescRegex}((\s+(in|van)|\s*,\s*)?\s+({YearRegex}|({ArticleRegex}\s+)?({RelativeRegex}\s+)?jaar))?)\b"; + public static readonly string SeasonRegex = $@"\b(?({PrefixPeriodRegex}(\s+)?)?({ArticleRegex}\s+)?({RelativeRegex}\s+)?{SeasonDescRegex}((\s+(in|van)|\s*,\s*)?\s+({YearRegex}|({ArticleRegex}\s+)?({RelativeRegex}\s+)?jaar))?)\b"; public const string WhichWeekRegex = @"\b(week)(\s*)(?5[0-3]|[1-4]\d|0?[1-9])\b"; - public const string WeekOfRegex = @"(de\s+)?(week)(\s+van)(\s+de|het)?"; + public const string WeekOfRegex = @"(de\s+)?(week)\s+(van(\s+(de|het))?|(beginnend|die\s+begint|startend|aanvangend)(\s+op)?)"; public const string MonthOfRegex = @"(maand)(\s*)(van)"; - public const string MonthRegex = @"(?januari|februari|maart|april|mei|juni|juli|augustus|september|oktober|december|jan\.?|feb\.?|mar\.?|apr\.?|mei|jun\.?|jul\.?|aug\.?|sep\.?|sept\.?|oct\.?|okt\.?|nov\.?|dec\.?)"; + public const string MonthRegex = @"\b(?(januari|februari|maart|april|mei|juni|juli|augustus|september|oktober|november|december)\b|(jan|feb|mar|mrt|apr|jun|jul|aug|sept|sep|oct|okt|nov|dec)(?:\.|\b))"; public static readonly string DateYearRegex = $@"(?{BaseDateTime.FourDigitYearRegex}|{TwoDigitYearRegex})"; - public static readonly string YearSuffix = $@"(,?\s*({DateYearRegex}|{FullTextYearRegex}))"; - public static readonly string OnRegex = $@"(?<=\bop\s+)({DayRegex}(en)?)\b"; - public const string RelaxedOnRegex = @"(?<=\b(op\s+(de\s+|een\s+)?)(10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)(e|ste|de)?)\b"; + public static readonly string YearSuffix = $@"((,|\s*van)?\s*({DateYearRegex}|{FullTextYearRegex}))"; + public static readonly string OnRegex = $@"(?<=\bop\s+)({DayRegex})\b(?!(\.|:)\d+)"; + public const string RelaxedOnRegex = @"\b(?<=op\s+)(?:de\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:\s*(ste|de|e))?\b(?!(\.|:)\d+)"; public const string PrefixWeekDayRegex = @"(\s*((,?\s*op)|[-—–]))"; - public static readonly string ThisRegex = $@"\b((deze(\s+week)?(\s+op)?\s*){WeekDayRegex})|({WeekDayRegex}((\s+van)?\s*deze\s+week))\b"; - public static readonly string LastDateRegex = $@"\b({PreviousPrefixRegex}(\s*week)?\s+{WeekDayRegex})|({WeekDayRegex}(\s+vorige\s+week))\b"; - public static readonly string NextDateRegex = $@"\b({NextPrefixRegex}(\s*week(\s*,?\s*op)?)?\s+{WeekDayRegex})|((op\s+)?{WeekDayRegex}((van\s+)?(de\s+)?volgende)\s*week)\b"; - public static readonly string SpecialDayRegex = $@"\b(eergisteren|overmorgen|((de\s+)?({RelativeRegex})\s+dag)|gisteren|morgen|vandaag)\b"; - public static readonly string SpecialDayWithNumRegex = $@"\b((?{WrittenNumRegex})\s+dagen?\s+(vanaf\s+)(?gisteren|morgen|vandaag))\b"; + public static readonly string ThisRegex = $@"\b((deze(\s+week{PrefixWeekDayRegex}?)?\s*){WeekDayRegex})|({WeekDayRegex}((\s+van)?\s*deze\s+week))\b"; + public static readonly string LastDateRegex = $@"\b({PreviousPrefixRegex}(\s*week{PrefixWeekDayRegex}?)?\s+{WeekDayRegex})|({WeekDayRegex}(\s+van)?(\s+vorige\s+week))\b"; + public const string WeekDayForNextDateRegex = @"\b(?((ma|di(ns)?|wo(e(ns)?)?|do|vr(ij)?|za(t)?|zo)(\.|\b))|((?:maan(?!den)|dins|woens|donder|vrij|zater|zon)(dag)?))"; + public static readonly string NextDateRegex1 = $@"\b({NextPrefixRegex}(\s*week{PrefixWeekDayRegex}?)?\s+{WeekDayForNextDateRegex}|(op\s+)?{WeekDayForNextDateRegex}\s+((van\s+)?(de\s+)?{NextPrefixRegex})\s*week|(op\s+)?{NextPrefixRegex}\s*week\s+{WeekDayForNextDateRegex})"; + public static readonly string NextDateRegex2 = $@"\b({NextPrefixRegex}(\s*week(\s*,?\s*op)?)?\s+{WeekDayRegex}|(op\s+)?{WeekDayRegex}\s+((van\s+)?(de\s+)?{NextPrefixRegex})\s*week|(op\s+)?{NextPrefixRegex}\s*week\s+{WeekDayRegex})"; + public static readonly string NextDateRegex = $@"({NextDateRegex1}|{NextDateRegex2})"; + public static readonly string SpecialDayRegex = $@"\b(eergisteren|overmorgen|(de\s+)?dag\s+na\s+morgen|(de\s+)?dag\s+(ervoor|erna)|((de\s+)?({StrictRelativeRegex}|mijn)\s+dag)\b|(de\s+dag(?!\s+van))|gisteren|(deze\s+)?morgen|vandaag|morgen(middag))(?!s\b)"; + public static readonly string SpecialDayWithNumRegex = $@"\b((?{WrittenNumRegex})\s+dag(en)?\s+(gerekend\s+)?(vanaf\s+)(?gisteren|morgen|vandaag))\b"; public static readonly string RelativeDayRegex = $@"\b(((de\s+)?{RelativeRegex}\s+dag))\b"; - public const string SetWeekDayRegex = @"\b(?op\s+({ArticleRegex}\s+))?(?morgen|ochtend|middag|avond|nacht|zondag|maandag|dinsdag|woensdag|donderdag|vrijdag|zaterdag)s\b"; - public static readonly string WeekDayOfMonthRegex = $@"(?(de\s+)?(?eerste|tweede|derde|vierde|vijfde|1e|1ste|2e|2de|3e|3de|4e|4de|5e|5de|laatste)\s+{WeekDayRegex}\s+{MonthSuffixRegex})"; + public const string SetWeekDayRegex = @"\b(?op\s+({ArticleRegex}\s+)?)?(?morgen|ochtend|middag|avond|nacht|zondag|maandag|dinsdag|woensdag|donderdag|vrijdag|zaterdag)((?e)n)\b"; + public static readonly string WeekDayOfMonthRegex = $@"(?((de\s+|\b))?(?eerste|tweede|derde|vierde|vijfde|zesde|tiende|1e|1ste|2e|2de|3e|3de|4e|4de|5e|5de|laatste)\s+{WeekDayRegex}(\s+{MonthSuffixRegex}))"; public static readonly string RelativeWeekDayRegex = $@"\b({WrittenNumRegex}\s+{WeekDayRegex}\s+(vanaf\s+nu|later))\b"; public static readonly string SpecialDate = $@"(?=\b(op\s+)(de\s+)?){DayRegex}\b"; - public const string DatePreposition = @"\b(op)"; - public static readonly string DateExtractorYearTermRegex = $@"(\s+|\s*,\s*){DateYearRegex}"; - public static readonly string DateExtractor1 = $@"\b({WeekDayRegex}\s*[,-]?\s*)?(({MonthRegex}(\.)?\s*[/\\.,-]?\s*{DayRegex})|(\({MonthRegex}\s*[-.]\s*{DayRegex}\)))(\s*\(\s*{WeekDayRegex}\s*\))?({DateExtractorYearTermRegex}\b)?"; - public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?({DayRegex}|{WrittenDayRegex})(\.)?(\s+|\s*,\s*|\s*-\s*){MonthRegex}(\.)?((\s+|\s*,\s*|\s+in\s+){DateYearRegex})?\b"; - public static readonly string DateExtractor4 = $@"\b{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}(\.)?\s*[/\\\-]\s*{DateYearRegex}"; - public static readonly string DateExtractor5 = $@"\b{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}"; - public static readonly string DateExtractor6 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({WeekDayRegex}\s+)?{MonthNumRegex}[\-\.]{DayRegex}(?![%])\b"; + public const string DatePreposition = @"\b(op(\s+de)?)"; + public static readonly string DateExtractorYearTermRegex = $@"(\s+(van\s+)?|\s*[,./-]\s*){DateYearRegex}"; + public static readonly string DateExtractor1 = $@"\b({WeekDayRegex}\s*[,-]?\s*)?(({MonthRegex}(\.)?\s*[/\\.,-]?\s*{DayRegex}(?!\s*({MonthRegex}|\-\s*\d{{2}}\b)))|(\({MonthRegex}\s*[-.]\s*{DayRegex}\))|({DayRegex}(\.)?\s*[/\\.,-]?\s*{MonthRegex}))(\s*\(\s*{WeekDayRegex}\s*\))?({DateExtractorYearTermRegex}(?!\s*{MonthRegex})\b)?"; + public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*)?(de\s+)?)?(({DayRegex}(\s*dag|\.)?)((\s+|\s*[,/-]\s*|\s+van\s+)?{MonthRegex})((\.)?(\s+|\s*[,/-]\s*|\s+in\s+)?{DateYearRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[,./-]?\s*(de\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:\s*(ste|de|e))?(\s*dag|\.)?\s*[,./-]?\s*{MonthRegex})\b"; + public static readonly string DateExtractor4 = $@"\b{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}[\.]?\s*[/\\\-]\s*{ApostrofRegex}?{DateYearRegex}"; + public static readonly string DateExtractor5 = $@"\b{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; + public static readonly string DateExtractor6 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({WeekDayRegex}\s+)?{MonthNumRegex}[\.]{DayRegex}(?!([%]|\s*{FullDescRegex}))\b|(?<={DatePreposition}\s+){MonthNumRegex}[\-\.]{DayRegex}(?!([%]|\s*{FullDescRegex}))\b"; public static readonly string DateExtractor7L = $@"\b({WeekDayRegex}\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b"; - public static readonly string DateExtractor7S = $@"\b({WeekDayRegex}\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%])\b"; - public static readonly string DateExtractor8 = $@"(?<={DatePreposition}\s+)({WeekDayRegex}\s+)?{DayRegex}[\\\-]{MonthNumRegex}(?![%])\b"; + public static readonly string DateExtractor7S = $@"\b((?<=(^|{DatePreposition}\s+)){WeekDayRegex}\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{BaseDateTime.CheckDecimalRegex}(?!([%]|\s*{FullDescRegex}))\b"; + public static readonly string DateExtractor8 = $@"\b((?<=(^|{DatePreposition}\s+)){WeekDayRegex}\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:\s*(ste|de|e))?|{DayRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{MonthRegex})"; + public static readonly string OfMonth = $@"(^\s*((van|in)\s+)?)({MonthRegex})"; + public static readonly string MonthEnd = $@"{MonthRegex}(\s+de\s*)?$"; public static readonly string WeekDayEnd = $@"(deze\s+)?{WeekDayRegex}\s*,?\s*$"; - public const string WeekDayStart = @"^[\.]"; - public const string RangeUnitRegex = @"\b(?jaren|jaar|maanden|maand|weken|week)\b"; - public const string HourNumRegex = @"\b(?nul|een|één|twee|drie|vier|vijf|zes|zeven|acht|negen|tien|elf|twaalf|dertien|veertien|vijftien|zestien|zeventien|achttien|negentien|twintig|eenentwintig|éénentwintig|tweeentwintig|tweeëntwintig|drieëntwintig|vierentwintig)\b"; - public const string MinuteNumRegex = @"(?nul|een|één|twee|drie|vier|vijf|zes|zeven|acht|negen|tien|elf|twaalf|dertien|veertien|vijftien|zestien|zeventien|achttien|negentien|twintig|eenentwintig|éénentwintig|tweeentwintig|tweeëntwintig|drieëntwintig|vierentwintig|vijfentwintig|zesentwintig|zevenentwintig|achtentwintig|negenentwintig|dertig|eenendertig|tweeëndertig|drieëndertig|vierendertig|vijfendertig|zesendertig|zevenendertig|achtendertig|negenendertig|veertig|eenenveertig|tweeënveertig|drieënveertig|vierenveertig|vijfenveertig|zesenveertig|zevenenveertig|achtenveertig|negenenveertig|eenenvijftig|vijftig|tweeënvijftig|drieënvijftig|vierenvijftig|vijfenvijftig|zesenvijftig|zevenenvijftig|achtenvijftig|negenenvijftig)"; - public const string DeltaMinuteNumRegex = @"(?nul|een|één|twee|drie|vier|vijf|zes|zeven|acht|negen|tien|elf|twaalf|dertien|veertien|vijftien|zestien|zeventien|achttien|negentien|twintig|eenentwintig|éénentwintig|tweeentwintig|tweeëntwintig|drieëntwintig|vierentwintig|vijfentwintig|zesentwintig|zevenentwintig|achtentwintig|negenentwintig|dertig|eenendertig|tweeëndertig|drieëndertig|vierendertig|vijfendertig|zesendertig|zevenendertig|achtendertig|negenendertig|veertig|eenenveertig|tweeënveertig|drieënveertig|vierenveertig|vijfenveertig|zesenveertig|zevenenveertig|achtenveertig|negenenveertig|eenenvijftig|vijftig|tweeënvijftig|drieënvijftig|vierenvijftig|vijfenvijftig|zesenvijftig|zevenenvijftig|achtenvijftig|negenenvijftig)"; - public const string PmRegex = @"(?(('s\s+middags|des\s+middags|'s\s+avonds|des\s+avonds|'s\s+nachts|des\s+nachts)|((in|tegen|op)(\s+de)?)?(\s+((na)?middag|avond|nacht|lunchtijd))))"; - public const string PmRegexFull = @"(?(('s\s+middags|des\s+middags|'s\s+avonds|des\s+avonds|'s\s+nachts|des\s+nachts)|((in|tegen|op)(\s+de)?)?(\s+((na)?middag|avond|nacht|lunchtijd))))"; - public const string AmRegex = @"(?(('s\s+ochtends|des\s+ochtends|'s\s+morgens|des\s+morgens)|((in|tegen|op)\s+de)(\s+(ochtend|morgen))))"; + public static readonly string WeekDayStart = $@"^\s+(op\s+)?{WeekDayRegex}\b"; + public const string RangeUnitRegex = @"\b(?ja(ren|ar)|maand(en)?|we(ken|ek)|dag(en)?)\b"; + public const string HourNumRegex = @"\b(?nul|een|één|twee|drie|vier|vijf|zes|zeven|acht|negen|tien|elf|elven|twaalf|dertien|veertien|vijftien|zestien|zeventien|achttien|negentien|twintig|eenentwintig|éénentwintig|tweeentwintig|tweeëntwintig|drieëntwintig|vierentwintig)\b"; + public const string MinuteNumRegex = @"(?nul|een(?=\s+min(uut)?)|één|twee|drie|vier|vijf|zes|zeven|acht|negen|tien|elf|elven|twaalf|dertien|veertien|vijftien|zestien|zeventien|achttien|negentien|twintig|eenentwintig|éénentwintig|tweeentwintig|tweeëntwintig|drieëntwintig|vierentwintig|vij[fv]entwintig|ze(s|ven)entwintig|achtentwintig|negenentwintig|dertig|eenendertig|tweeëndertig|drieëndertig|vierendertig|vijfendertig|ze(s|ven)endertig|achtendertig|negenendertig|veertig|eenenveertig|tweeënveertig|drieënveertig|vierenveertig|vijfenveertig|ze(s|ven)enveertig|achtenveertig|negenenveertig|eenenvijftig|vijftig|tweeënvijftig|drieënvijftig|vierenvijftig|vijfenvijftig|ze(s|ven)envijftig|achtenvijftig|negenenvijftig)"; + public const string DeltaMinuteNumRegex = @"(?nul|een(?=\s+min(uut)?)|één|twee|drie|vier|vijf|zes|zeven|acht|negen|tien|elf|elven|twaalf|dertien|veertien|vijftien|zestien|zeventien|achttien|negentien|twintig|eenentwintig|éénentwintig|tweeentwintig|tweeëntwintig|drieëntwintig|vierentwintig|vijfentwintig|vijventwintig|zesentwintig|zevenentwintig|achtentwintig|negenentwintig|dertig|eenendertig|tweeëndertig|drieëndertig|vierendertig|vijfendertig|zesendertig|zevenendertig|achtendertig|negenendertig|veertig|eenenveertig|tweeënveertig|drieënveertig|vierenveertig|vijfenveertig|zesenveertig|zevenenveertig|achtenveertig|negenenveertig|eenenvijftig|vijftig|tweeënvijftig|drieënvijftig|vierenvijftig|vijfenvijftig|zesenvijftig|zevenenvijftig|achtenvijftig|negenenvijftig)(?=\b)"; public const string LunchRegex = @"\b(lunchtijd)\b"; - public const string NightRegex = @"\b('s\s+nachts|des\s+nachts|nacht)\b"; + public static readonly string NightRegex = $@"\b((({ApostrofsRegex}|des)\s+)?nachts|(midder)?nacht)\b"; public const string CommonDatePrefixRegex = @"^[\.]"; - public static readonly string LessThanOneHour = $@"(?((een\s+)?((drie\s?)?kwartier|halfuur))|((DeltaMinuteNumRegex\s+)?(minuten|mins|min\.?)))"; - public static readonly string WrittenTimeRegex = $@"(?(kwart\s+(over\s+|voor\s+){HourNumRegex}|half\s+{HourNumRegex}|{HourNumRegex}\s+uur|{HourNumRegex}\s+uur\s+DeltaMinuteNumRegex))"; - public static readonly string TimePrefix = $@"(?(({DeltaMinuteNumRegex}\s+(over\s+|voor\s+))|(kwart\s+(over\s+|voor\s+)){HourNumRegex}))"; - public static readonly string TimeSuffix = $@"(?{AmRegex}|{PmRegex}|{OclockRegex})"; - public static readonly string TimeSuffixFull = $@"(?{AmRegex}|{PmRegexFull}|{OclockRegex})"; - public static readonly string BasicTime = $@"\b(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex})"; - public const string MidnightRegex = @"(?middernacht|in de nacht|('s|des) nachts)"; - public const string MidmorningRegex = @"(?halverwege de ochtend|het midden van de ochtend)"; - public const string MidafternoonRegex = @"(?halverwege de middag|het midden van de middag)"; - public const string MiddayRegex = @"(?middag|namiddag|noen)"; + public static readonly string LessThanOneHour = $@"(?((een\s+)?((drie\s+)?kwart(ier)?|half(uur)?))|{BaseDateTime.DeltaMinuteRegex}(\s+(minuten|mins|min\.?))?|({DeltaMinuteNumRegex}(\s+(minuten|mins|min\.?))?))"; + public static readonly string WrittenTimeRegex = $@"(?({HourNumRegex}\s+{MinuteNumRegex}|(?half)\s+({HourNumRegex})))"; + public static readonly string TimePrefix = $@"(?(half|{LessThanOneHour}\s+(over|voor|na)(\s+half)?)|(uur\s+{LessThanOneHour}))"; + public static readonly string TimeSuffix = $@"(?(({OclockRegex}\s+)?({AmRegex}|{PmRegex}))|{OclockRegex})"; + public static readonly string TimeSuffixFull = $@"(?(({OclockRegex}\s+)?({AmRegex}|{PmRegexFull}))|{OclockRegex})"; + public static readonly string HourDTRegEx = $@"({BaseDateTime.HourRegex})"; + public static readonly string MinuteDTRegEx = $@"({BaseDateTime.MinuteRegex})"; + public static readonly string SecondDTRegEx = $@"({BaseDateTime.SecondRegex})"; + public static readonly string BasicTime = $@"\b(?{WrittenTimeRegex}|{HourNumRegex}|{HourDTRegEx}(:|\.){MinuteDTRegEx}(:{SecondDTRegEx})?|((?half)\s+)?{HourDTRegEx}(?![.,:]?[%\d]))"; + public static readonly string MidnightRegex = $@"(?mid\s*(-\s*)?nacht|middernacht|(in\s+)?de nacht(\s+van)?|({ApostrofsRegex}|des)\s*nachts)"; + public const string MidmorningRegex = @"(?mid\s*(-\s*)?(morgen|ochtend)|halverwege de ochtend|het midden van de ochtend)"; + public const string MidafternoonRegex = @"(?mid\s*(-\s*)?middag|halverwege de middag|het midden van de middag)"; + public static readonly string MiddayRegex = $@"(?(?({MidnightRegex}|{MidmorningRegex}|{MidafternoonRegex}|{MiddayRegex}))"; - public static readonly string AtRegex = $@"\b(((?<=\bat\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\.\d)(\s*((?a)|(?p)))?|{MidTimeRegex}))|{MidTimeRegex})\b"; - public static readonly string IshRegex = $@"\b({BaseDateTime.HourRegex}(-|——)?ish|noonish|noon)\b"; - public const string TimeUnitRegex = @"([^A-Za-z]{1,}|\b)(?uren|uur|u|minuten|minuut|min\.?|mins|secondes|seconden|seconde|secs|sec\.?)\b"; + public static readonly string AtRegex = $@"(((?<=\bom\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\.\d)(\s*((?a)|(?p)))?|{MidTimeRegex}))|{MidTimeRegex})\b"; + public static readonly string IshRegex = $@"\b(tegen\s+{BaseDateTime.HourRegex}(-|——|\s*{ApostrofRegex}\s*)?en|middagloos)\b"; + public const string TimeUnitRegex = @"([^A-Za-z]{1,}|\b)(?(min\.|sec\.)|(u(ur)?|minuut|seconde|(?uren|minuten|seconde[ns])|(min|sec)(?s)?)\b)"; public const string RestrictedTimeUnitRegex = @"(?uur|minuut)\b"; - public const string FivesRegex = @"(?(vijf|tien|vijftien|twintig|vijfentwintig|dertig|vijfendertig|veertig|vijfenveertig|vijftig|vijfenvijftig))\b"; + public const string FivesRegex = @"(?(vijf|tien|vijftien|twintig|vijfentwintig|vijventwintig|dertig|vijfendertig|veertig|vijfenveertig|vijftig|vijfenvijftig))\b"; public static readonly string HourRegex = $@"\b{BaseDateTime.HourRegex}"; - public const string PeriodHourNumRegex = @"\b(?nul|een|één|twee|drie|vier|vijf|zes|zeven|acht|negen|tien|elf|twaalf|dertien|veertien|vijftien|zestien|zeventien|achttien|negentien|twintig|eenentwintig|éénentwintig|tweeentwintig|tweeëntwintig|drieëntwintig|vierentwintig)\b"; + public const string PeriodHourNumRegex = @"\b(?nul|een|één|twee|drie|vier|vijf(en)?|zes|zeven|acht|negen|tien|elf|elven|twaalf|dertien|veertien|vijftien|zestien|zeventien|achttien|negentien|twintig|eenentwintig|éénentwintig|tweeentwintig|tweeëntwintig|drieëntwintig|vierentwintig)\b?"; public static readonly string ConnectNumRegex = $@"\b{BaseDateTime.HourRegex}(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)\s*{DescRegex}"; - public static readonly string TimeRegexWithDotConnector = $@"({BaseDateTime.HourRegex}(\s*\.\s*){BaseDateTime.MinuteRegex})"; - public static readonly string TimeRegex1 = $@"\b({TimePrefix}\s+)?({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})\s*{DescRegex}"; - public static readonly string TimeRegex2 = $@"(\b{TimePrefix}\s+)?(T)?{BaseDateTime.HourRegex}(\s*)?:(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?((\s*{DescRegex})|\b)"; + public static readonly string TimeRegexWithDotConnector = $@"({BaseDateTime.HourRegex}(\s*\.\s*){BaseDateTime.MinuteRegex}(\s*:\s*{BaseDateTime.SecondRegex})?(\s*u\s*)?)"; + public static readonly string TimeRegexFilter = $@"\b((iedere|elke|op)(\s+andere)?\s+)?(week|dag|{SingleWeekDayRegex}|vandaag)\b"; + public static readonly string TimeRegex1 = $@"\b(({TimePrefix}|{AroundRegex})\s+)?(({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})(\s*{DescRegex})|(({AroundRegex})\s+){HourNumRegex}(?!\s+{SingleWeekDayRegex})\b)"; + public static readonly string TimeRegex2 = $@"(\b{TimePrefix}\s+)?(t)?{BaseDateTime.HourRegex}(\s*)?(:|\.)(\s*)?(?[0-5]\d)(?!(\d|\s*(per|pro)\s*cent|%))((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?(\s*u)?((\s*{DescRegex})|\b)"; public static readonly string TimeRegex3 = $@"(\b{TimePrefix}\s+)?{BaseDateTime.HourRegex}\.{BaseDateTime.MinuteRegex}(\s*{DescRegex})"; public static readonly string TimeRegex4 = $@"\b{TimePrefix}\s+{BasicTime}(\s*{DescRegex})?\s+{TimeSuffix}\b"; - public static readonly string TimeRegex5 = $@"\b{TimePrefix}\s+{BasicTime}((\s*{DescRegex})|\b)"; - public static readonly string TimeRegex6 = $@"{BasicTime}(\s*{DescRegex})?\s+{TimeSuffix}\b"; - public static readonly string TimeRegex7 = $@"\b{TimeSuffixFull}\s+at\s+{BasicTime}((\s*{DescRegex})|\b)"; - public static readonly string TimeRegex8 = $@"\b{TimeSuffixFull}\s+{BasicTime}((\s*{DescRegex})|\b)"; - public static readonly string TimeRegex9 = $@"\b{PeriodHourNumRegex}\s+{FivesRegex}((\s*{DescRegex})|\b)"; - public static readonly string TimeRegex10 = $@"\b({TimePrefix}\s+)?{BaseDateTime.HourRegex}(\s*h\s*){BaseDateTime.MinuteRegex}(\s*{DescRegex})?"; - public static readonly string TimeRegex11 = $@"\b(({TimeTokenPrefix}{TimeRegexWithDotConnector})(?!\s*per\s*cent|%)|({TimeRegexWithDotConnector}(\s*{DescRegex})))"; - public static readonly string FirstTimeRegexInTimeRange = $@"\b{TimeRegexWithDotConnector}(\s*{DescRegex})?"; - public static readonly string PureNumFromTo = $@"((van|tussen)\s+)?({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?\s*{TillRegex}\s*({HourRegex}|{PeriodHourNumRegex})(?\s*({PmRegex}|{AmRegex}|{DescRegex}))?"; - public static readonly string PureNumBetweenAnd = $@"(tussen\s+)({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?\s*{RangeConnectorRegex}\s*({HourRegex}|{PeriodHourNumRegex})(?\s*({PmRegex}|{AmRegex}|{DescRegex}))?"; - public static readonly string SpecificTimeFromTo = $@"((van|tussen)\s+)?(?({TimeRegex2}|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))\s*{TillRegex}\s*(?({TimeRegex2}|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))"; - public static readonly string SpecificTimeBetweenAnd = $@"(tussen\s+)(?({TimeRegex2}|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))\s*{RangeConnectorRegex}\s*(?({TimeRegex2}|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))"; - public const string PrepositionRegex = @"(?^(om|rond|tegen|op)(\s+de)?$)"; - public const string TimeOfDayRegex = @"\b(?((((in\s+(de)?\s+)?((?vroege(\s+|-))|(?late(\s+|-)))?(ochtend|middag|nacht|avond)))|(((in\s+(the)?\s+)?)(ochtend|middag|avond|nacht)))s?)\b"; - public static readonly string SpecificTimeOfDayRegex = $@"\b(({StrictRelativeRegex}\s+{TimeOfDayRegex})\b|\bvannacht)s?\b"; + public static readonly string TimeRegex5 = $@"\b({TimePrefix}\s+{BasicTime}|{BasicTime}\s+{TimePrefix})((\s*({DescRegex}|{TimeSuffix}))|\b)"; + public static readonly string TimeRegex6 = $@"{BasicTime}(\s*u\s*)?(\s*{DescRegex})?\s+{TimeSuffix}\b"; + public static readonly string TimeRegex7 = $@"({TimeSuffixFull}\s+(om\s+)?({TimePrefix}\s+)?(?({PmRegex}|{AmRegex}|{DescRegex})))?\s*{TillRegex}\s*({HourDTRegEx}|{PeriodHourNumRegex})(?\s*({PmRegex}|{AmRegex}|{DescRegex}))?"; + public static readonly string PureNumFromToPrefix = $@"(({PmRegexFull}|{AmRegex})\s+)?({RangePrefixRegex}\s+)({HourDTRegEx}|{PeriodHourNumRegex})(\s+uur)?(\s*(?({PmRegex}|{AmRegex}|{DescRegex})))?\s*{RangeConnectorRegex}\s*({HourDTRegEx}|{PeriodHourNumRegex})(\s+uur)?(?\s*({PmRegex}|{AmRegex}|{DescRegex}))?"; + public static readonly string PureNumFromToWithDateBefore = $@"({RangePrefixRegex}\s+)({HourDTRegEx})(\s+(vandaag|morgen)\s+)?(\s*{RangeConnectorRegex}\s*)({HourDTRegEx})"; + public static readonly string PureNumFromToWithDateAfter = $@"({RangePrefixRegex}\s+)({HourDTRegEx})(\s*{RangeConnectorRegex}\s*)({HourDTRegEx}(\s+(vandaag|morgen))?)"; + public static readonly string PureNumFromTo = $@"({PureNumFromToPrefix}|{PureNumFromToPrefixExcluded})"; + public static readonly string TimeDateFromTo = $@"({PureNumFromToWithDateAfter}|{PureNumFromToWithDateBefore})"; + public static readonly string PureNumBetweenAnd = $@"(tussen\s+)({HourDTRegEx}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?\s*{RangeConnectorRegex}\s*({HourDTRegEx}|{PeriodHourNumRegex})(?\s*({PmRegex}|{AmRegex}|{DescRegex}))?"; + public static readonly string SpecificTimeFromTo = $@"(({PmRegexFull}|{AmRegex})\s+)?({RangePrefixRegex}\s+)?(?({TimeRegex2}|({HourDTRegEx}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))\s*{TillRegex}\s*(?({TimeRegex2}|({HourDTRegEx}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))"; + public static readonly string SpecificTimeBetweenAnd = $@"(({PmRegexFull}|{AmRegex})\s+)?(tussen\s+)(?({TimeRegex2}|({HourDTRegEx}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))\s*{RangeConnectorRegex}\s*(?({TimeRegex2}|({HourDTRegEx}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?(\s+{TimeSuffix})?))"; + public const string PrepositionRegex = @"(?^(om|rond|tegen|op|van|deze)(\s+de)?$)"; + public const string EarlyLateRegex = @"\b(((?vroege?|(in\s+het\s+)?(begin))|(?laat|later|late|aan\s+het\s+einde?))((\s+|-)(in\s+de|op\s+de|van\s+de|deze|in|op|van|de))?)"; + public static readonly string TimeOfDayRegex = $@"(?(({EarlyLateRegex}\s+)(aanstaande\s+)?(zondag|maandag|dinsdag|woensdag|donderdag|vrijdag|zaterdag)\s*(ochtend|morgen|(na)?middag|avond|nacht))|(((van\s+deze\s+)|\b(in\s+(de)?\s+)|de\s+)?({EarlyLateRegex}\s+)?({ApostrofsRegex}\s+)?(ochtend(en)?|morgen|middag(en)?|avond(en)?|nacht(\s+van)?)s?((\s+|-)({EarlyLateRegex}))?)|{MealTimeRegex}|((tijdens\s+(de\s+)?)?(kantoor|werk)uren))\b"; + public static readonly string SpecificTimeOfDayRegex = $@"\b((({StrictRelativeRegex}\s+{TimeOfDayRegex})\b|\bvan(ochtend|morgen|middag|avond|nacht)))s?\b"; public static readonly string TimeFollowedUnit = $@"^\s*{TimeUnitRegex}"; public static readonly string TimeNumberCombinedWithUnit = $@"\b(?\d+(\.\d*)?){TimeUnitRegex}"; - public static readonly string[] BusinessHourSplitStrings = { @"business", @"hour" }; - public const string NowRegex = @"\b(?nu|zo snel mogelijk|zo spoedig mogelijk|asap|recent|onlangs|zojuist)\b"; + public static readonly string[] BusinessHourSplitStrings = { @"werk", @"uren" }; + public static readonly string[] BusinessHourSplitStrings2 = { @"kantoor", @"uren" }; + public const string NowRegex = @"(?nu(\s+meteen)?|zo snel mogelijk|zo spoedig mogelijk|asap|recent|onlangs|zojuist)\b"; public const string SuffixRegex = @"^\s*(in de\s+)?(vroege\s+|late\s+)?(ochtend|(na)?middag|avond|nacht)\b"; - public const string DateTimeTimeOfDayRegex = @"\b(?ochtend|(na)?middag|avond|nacht)\b"; - public static readonly string DateTimeSpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{DateTimeTimeOfDayRegex})\b|\bvannacht|vanavond|vanmiddag|vanochtend|vanmorgen)\b"; - public static readonly string TimeOfTodayAfterRegex = $@"^\s*(,\s*)?(in\s+de\s+)?{DateTimeSpecificTimeOfDayRegex}"; - public static readonly string TimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(om|rond|tegen|op))?\s*$"; - public static readonly string SimpleTimeOfTodayAfterRegex = $@"({HourNumRegex}|{BaseDateTime.HourRegex})\s*(,\s*)?(in\s+de\s+)?{DateTimeSpecificTimeOfDayRegex}"; - public static readonly string SimpleTimeOfTodayBeforeRegex = $@"\b{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(om|rond|tegen|op))?\s*({HourNumRegex}|{BaseDateTime.HourRegex})\b"; - public const string SpecificEndOfRegex = @"(het\s+)?einde van(\s+de)?\s*$"; - public const string UnspecificEndOfRegex = @"\b(((om|rond|tegen|op)\s+)?het\s+)?(einde\s+van\s+de\s+dag)\b"; + public const string DateTimeTimeOfDayRegex = @"\b(?morgen|ochtend|(na)?middag|avond|nacht)\b"; + public static readonly string DateTimeSpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{DateTimeTimeOfDayRegex})|van(nacht|avond|middag|ochtend|morgen))\b"; + public static readonly string TimeOfTodayAfterRegex = $@"^\s*(,\s*)?((in\s+de)|(op\s+de))?{DateTimeSpecificTimeOfDayRegex}"; + public static readonly string TimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(om|rond|tegen|op\s+de|op))?\s*$"; + public const string NonTimeContextTokens = @"\b(gebouw)"; + public static readonly string SimpleTimeOfTodayAfterRegex = $@"(?vroege(\s+|-))|(?late(\s+|-)))?(?ochtend|(na)?middag|nacht|avond))\b"; - public static readonly string PeriodSpecificTimeOfDayRegex = $@"\b(({StrictRelativeRegex}\s+{PeriodTimeOfDayRegex})\b|\bvannacht|vanavond|vanmiddag|vanochtend)\b"; - public static readonly string PeriodTimeOfDayWithDateRegex = $@"\b(({TimeOfDayRegex}(\s+(om|rond|tegen|op))?))\b"; - public const string LessThanRegex = @"\b(minder\s+dan)\b"; - public const string MoreThanRegex = @"\b(meer\s+dan)\b"; - public static readonly string DurationUnitRegex = $@"(?{DateUnitRegex}|uur|uren|u|minuten|mins|m|secondes|secs|s)\b"; - public const string SuffixAndRegex = @"(?\s*(en)\s+(een\s+)?(?half|kwart))"; - public const string PeriodicRegex = @"\b(?dagelijks|maandelijks|wekelijks|twee-wekelijks|jaarlijks)\b"; - public static readonly string EachUnitRegex = $@"(?(iedere|elke)(?\s+andere)?\s*{DurationUnitRegex})"; - public const string EachPrefixRegex = @"\b(?(iedere|elke)\s*$)"; - public const string SetEachRegex = @"\b(?(iedere|elke)\s*)"; - public const string SetLastRegex = @"(?volgende?|komende|aankomende|aanstaande|deze|huidige|aanstaande|vorig|verleden|vorige|laatste)"; - public const string EachDayRegex = @"^\s*(elke)\s*dag\b"; - public static readonly string DurationFollowedUnit = $@"^\s*{SuffixAndRegex}?(\s+|-)?{DurationUnitRegex}"; - public static readonly string NumberCombinedWithDurationUnit = $@"\b(?\d+(\.\d*)?)(-)?{DurationUnitRegex}"; - public static readonly string AnUnitRegex = $@"\b(?(een\s(half|halve)))\s+{DurationUnitRegex}"; + public static readonly string PeriodTimeOfDayRegex = $@"((in\s+(de)?\s+)?({EarlyLateRegex}(\s+|-))?(zondag|maandag|dinsdag|woensdag|donderdag|vrijdag|zaterdag|(eer)?gisteren|morgen)?(?ochtend|(na)?middag|avond|nacht))\b"; + public static readonly string PeriodSpecificTimeOfDayRegex = $@"\b(({StrictRelativeRegex}(\s+)?{PeriodTimeOfDayRegex})\b|\bvan(nacht|avond|(na)?middag|ochtend))\b"; + public static readonly string PeriodTimeOfDayWithDateRegex = $@"(({TimeOfDayRegex}(\s+(om|rond|van|tegen|op(\s+de)?))?))\b"; + public static readonly string PeriodTimeOfDayWithDateRegexWithAnchors = $@"((({TimeOfDayRegex}(\s+(om|rond|van|tegen|op(\s+de)?))?))(?=({MiddlePauseRegex})?\s*$)|(?<=^\s*({MiddlePauseRegex})?)(?!{MealTimeRegex}){TimeOfDayRegex})"; + public const string LessThanRegex = @"\b((binnen\s+)?minder\s+dan)\b"; + public const string MoreThanRegex = @"\b((meer|langer)\s+dan|ruim)\b"; + public static readonly string DurationUnitRegex = $@"(?{DateUnitRegex}|(min\.|sec\.)|((?halfuur)|(?kwartier\s+uur)|(?kwartier)|uur|uren|u|minuten|minuut|m(ins?)?|seconde[ns]?|s(ecs?)?|nacht(en)?)\b)(\s+lang\b)?"; + public const string SuffixAndRegex = @"(?\s*(en|ën)(\s*een)?\s*(?hal(f|ve)|kwart|kwartier)|(?(een\s+)?kwartier))"; + public const string PeriodicRegex = @"\b(?dagelijkse?|(drie)?maandelijkse?|wekelijkse?|twee-?wekelijkse?|(half)?jaarlijkse?|kwartaal)\b"; + public static readonly string EachUnitRegex = $@"(?((iedere?|elke?|eenmaal per)(?\s+andere)?\s*({DurationUnitRegex}|(?weekend(en)?))|({DurationUnitRegex}|{WeekDayRegex})\s+om(\s+de)?(?\s+andere)?\s*(week|{DurationUnitRegex})))"; + public const string EachPrefixRegex = @"\b(?(iedere|elke|eenmaal per)\s*$)"; + public static readonly string SetEachRegex = $@"\b(?(iedere|elke|om\s+de)\s*(?\s+andere)?\s*(week\s*(?={WeekDayRegex}))?)"; + public const string SetLastRegex = @"(?volgende?|komende|aankomende|aanstaande|deze|huidige|aanstaande|vorige?|verleden|laatste)"; + public const string EachDayRegex = @"\s*(iedere|elke)\s*dag\b"; + public const string BeforeEachDayRegex = @"(iedere|elke)\s*dag\s*"; + public static readonly string DurationFollowedUnit = $@"^\s*((?(?(?(een\s+)?kwartier)))|{SuffixAndRegex}?(\s+|-)?{DurationUnitRegex})"; + public static readonly string NumberCombinedWithDurationUnit = $@"\b(?\d+([.,:]\d*)?)(-)?{DurationUnitRegex}"; + public static readonly string AnUnitRegex = $@"\b((((nog een|een|nog)\s+(?anderhalf|anderhalve|half|halve)?))|andere)\s*{DurationUnitRegex}"; public const string DuringRegex = @"\b(voor\s+een|gedurende\s+(het|de))\s+(?jaar|maand|week|dag)\b"; - public const string AllRegex = @"\b(?((ge)?hele|volledige|ganse|heel|volledig)(\s+|-)(?jaar|maand|week|dag))\b"; - public const string HalfRegex = @"(((een)\s*)|\b)(?(half|halve)\s+(?jaar|maand|week|dag|uur))\b"; + public const string AllRegex = @"\b(?((de|het|een)\s+)?((ge)?hele|volledige|ganse|heel|volledig|volle)(\s+|-)(?jaar|maand|week|dag))\b"; + public const string HalfRegex = @"(((een)\s*)|\b)(?(half|halve)\s+(?jaar|maand|week|dag|uur|halfuur)|(?halfuur))\b"; public const string ConjunctionRegex = @"\b((en(\s+voor)?)|plus)\b"; - public static readonly string HolidayRegex1 = $@"\b(?(goede\s+vrijdag|pasen|kerst|kerstavond|kerstmis|thanksgiving|halloween|nieuwjaar|bevrijdingsdag))(\s+(van\s+|in\s+)?({YearRegex}|{RelativeRegex}\s+jaar))?\b"; - public static readonly string HolidayRegex2 = $@"\b(?(nationale dodenherdenking|nationale herdenking|dodenherdenking|dag van de leraar|dag van de arbeid|martin luther kingdag|mlkdag))(\s+(van\s+|in\s+)?({YearRegex}|{RelativeRegex}\s+jaar))?\b"; - public static readonly string HolidayRegex3 = $@"\b(?(yuandan|valentijnsdag|valentijn|oude?jaarsavond|nieuwjaarsdag|eerste paasdag|tweede paasdag|prinsjesdag|koningsdag|koninginnedag|bevrijdingsdag|hemelvaartsdag|eerste kerstdag|1e kerstdag|tweede kerstdag|2e kerstdag|vaderdag|moederdag|meisjesdag|amerikaanse onafhankelijkheidsdag|onafhankelijkheidsdag|nederlandse veteranendag|veteranendag|boomplantdag|boomfeestdag))(\s+(van\s+|in\s+)?({YearRegex}|{RelativeRegex}\s+jaar))?\b"; - public const string AMTimeRegex = @"(?('s morgens|'s ochtends)|in\s+de\s+(morgen|ochtend))"; - public const string PMTimeRegex = @"\b(?('s middags|'s avonds|'s nachts)|in\s+de\s+(middag|avond|nacht))\b"; - public const string InclusiveModPrepositions = @"(?((in|tegen|tijdens|op)\s+of\s+)|(\s+of\s+(in|tegen|tijdens|op)))"; - public static readonly string BeforeRegex = $@"(\b{InclusiveModPrepositions}?(voor|vóór|vooraf(gaan)?\s+aan|(niet\s+later|vroeger|eerder)\s+dan|eindigend\s+op\s+|tegen|tot|totdat|(?zo\s+laat\s+als)){InclusiveModPrepositions}?\b\s*)|(?)((?<=)|<)"; - public static readonly string AfterRegex = $@"(\b{InclusiveModPrepositions}?((na|(?>=)|>)"; - public const string SinceRegex = @"(\b(sinds|na\s+of\s+gelijk\s+aan|startend\s+(vanaf|op|met)|zo\s+vroeg\s+als|ieder\s+moment\s+vanaf)\b\s*)|(?=)"; - public const string AroundRegex = @"(\b(rondom|ongeveer(\s+om)?)\s*\b)"; - public const string AgoRegex = @"\b(geleden|voor\s+(?gisteren|vandaag))\b"; - public const string LaterRegex = @"\b(later|vanaf nu|(vanaf|na)\s+(?morgen|vandaag))\b"; - public const string InConnectorRegex = @"\b(in|over)\b"; - public const string SinceYearSuffixRegex = @"(^\s*{SinceRegex}((vanaf|sedert|sinds)\s+(het\s+)?jaar\s+)?{YearSuffix})"; - public static readonly string WithinNextPrefixRegex = $@"\b(in(\s+de|het)?(\s+(?{NextPrefixRegex}))?)\b"; - public static readonly string MorningStartEndRegex = $@"(^(('s|des)\s+morgens|in de morgen|('s|des)\s+ochtends|in de ochtend{AmDescRegex}))|((('s|des)\s+morgens|in de morgen|('s|des)\s+ochtends|in de ochtend{AmDescRegex})$)"; - public static readonly string AfternoonStartEndRegex = $@"(^(('s|des)\s+middags|in de (na)?middag|{PmDescRegex}))|((('s|des)\s+middags|in de (na)?middag|{PmDescRegex})$)"; - public const string EveningStartEndRegex = @"(^(avond|('s|des)?\s+avonds))|((avond|('s|des)?\s+avonds)$)"; - public const string NightStartEndRegex = @"(^(gedurende de nacht|vannacht|nacht|('s|des)?\s+nachts))|((gedurende de nacht|vannacht|('s|des)?\s+nachts|nacht)$)"; - public const string InexactNumberRegex = @"\b(een aantal|meerdere|enkele|verscheidene|)\b"; + public static readonly string HolidayList1 = $@"(?goede vrijdag|pasen|((eerste|tweede)\s+)?paasdag|paas(zondag|maandag)|kerst(avond|mis)?|thanksgiving|halloween|(islamitisch\s+)?nieuwjaar|oud en nieuw|oud & nieuw|pinksteren|oude?jaar|oude?jaarsavond|silvester|silvesteravond|sinterklaas|sinterklaasfeest|sinterklaasavond|pakjesavond|eid al(-|\s+)fitr|eid al(-|\s+)adha|juneteenth|vrijheidsdag|jubilee\s+day)"; + public static readonly string HolidayList2 = $@"(?black friday|cyber monday|nationale dodenherdenking|nationale herdenking|dodenherdenking|dag\s+van\s+de\s+(leraar|leerkracht(en)?|arbeid|aarde)|feest\s+van\s+de\s+arbeid|yuandan|valentijn|sint-maartensfeest|sint-maarten|driekoningen|keti(\s+|-)?koti|ramadan|suikerfeest|offerfeest|allerheiligen|allerheiligenavond|franse nationale feestdag|bestorming van de bastille)"; + public static readonly string HolidayList3 = $@"(?(martin luther king|mlk|dankzeggings|valentijns|nieuwjaars|(eerste|1e|tweede|2e)\s+paas|prinsjes|konings|koninginne|bevrijdings|hemelvaarts|(eerste|1e|tweede|2e)\s+kerst|vader|moeder|meisjes|(amerikaanse|us\s+)?onafhankelijk(heid)?s|(nederlandse\s+)?veteranen|boomplant|(nationale\s+)?boomfeest)dag)"; + public static readonly string HolidayRegex = $@"\b(({StrictRelativeRegex}\s+({HolidayList1}|{HolidayList2}|{HolidayList3}))|(({HolidayList1}|{HolidayList2}|{HolidayList3})(\s+(van\s+)?({YearRegex}|{RelativeRegex}\s+jaar))?))\b"; + public static readonly string AMTimeRegex = $@"(?{ApostrofsRegex}\s*(morgens|ochtends)|in\s+de\s+(morgen|ochtend))"; + public static readonly string PMTimeRegex = $@"(?{ApostrofsRegex}\s*(middags|avonds|nachts)|(in\s+de\s+)?(deze\s+)?((na)?middag|avond|nacht))\b"; + public const string MorningTimeRegex = @"(morgens?|ochtends?)"; + public const string NightTimeRegex = @"(nacht)"; + public const string NowTimeRegex = @"\b(nu)\b"; + public const string RecentlyTimeRegex = @"\b(kort\s+geleden|eerder)\b"; + public const string AsapTimeRegex = @"\b(zo\s+snel\s+mogelijk|zsm)\b"; + public const string InclusiveModPrepositions = @"(?((in|tegen|tijdens|op|om)\s+of\s+)|(\s+of\s+(in|tegen|tijdens|op)))"; + public static readonly string AfterRegex = $@"(\b{InclusiveModPrepositions}?((na(\s+afloop\s+van)?|(?>=)|>)"; + public static readonly string BeforeRegex = $@"(\b(?(al\s+)?zo\s+laat\s+als)){InclusiveModPrepositions}?\b\s*)|(?)((?<=)|<)"; + public const string SinceRegex = @"(\b(sinds|na\s+of\s+gelijk\s+aan|(startend|beginnend)\s+(vanaf|op|met)|(al\s+)?zo\s+vroeg\s+als|(elk|ieder)\s+moment\s+vanaf|een\s+tijdstip\s+vanaf)\b\s*)|(?=)"; + public const string AroundRegex = @"(\b(rond(om)?|ongeveer(\s+om)?)\s*\b)"; + public const string AgoRegex = @"\b(geleden|(voor|eerder\s+dan)\s+(?gisteren|vandaag))\b"; + public const string LaterRegex = @"\b(later|vanaf\s+nu|(vanaf|na|sedert)\s+(?morgen|vandaag))\b"; + public const string BeforeAfterRegex = @"\b(gerekend\s+)?((?voor(dat)?)|(?van(af)?|na))\b"; + public static readonly string ModPrefixRegex = $@"\b({RelativeRegex}|{AroundRegex}|{BeforeRegex}|{AfterRegex}|{SinceRegex})\b"; + public static readonly string ModSuffixRegex = $@"\b({AgoRegex}|{LaterRegex}|{BeforeAfterRegex}|{FutureSuffixRegex}|{PastSuffixRegex})\b"; + public const string InConnectorRegex = @"\b(in|over|na)(\s+de)?\b"; + public static readonly string SinceYearSuffixRegex = $@"(^\s*{SinceRegex}((vanaf|sedert|sinds)\s+(het\s+)?jaar\s+)?{YearSuffix})"; + public static readonly string WithinNextPrefixRegex = $@"\b((binnen)(\s+de|het)?(\s+(?{NextPrefixRegex}))?)\b"; + public const string TodayNowRegex = @"\b(vandaag|nu)\b"; + public static readonly string MorningStartEndRegex = $@"(^(({ApostrofsRegex}|des)\s+(morgens|ochtends)|in\s+de\s+(na)?(morgen|ochtend)|deze\s+(morgen|ochtend)|(morgen|ochtend)\s+in\s+het\s+begin|aan\s+het\s+einde?(\s+van(\s+de)?)?\s+(morgen|ochtend)|{AmDescRegex}|(morgen|ochtend)))|((({ApostrofsRegex}|des)\s+(morgens|ochtends)|deze\s+(morgen|ochtend)|in\s+de\s+(na)?(morgen|ochtend)|(morgen|ochtend)\s+in\s+het\s+begin|(morgen|ochtend)\s+aan\s+het\s+einde?|{AmDescRegex}|(morgen|ochtend))$)"; + public static readonly string AfternoonStartEndRegex = $@"(^(({ApostrofsRegex}|des)\s+middags|in\s+de\s+(na)?middag|deze\s+middag|aan\s+het\s+einde?(\s+van(\s+de)?)?\s+middag|{PmDescRegex}))|((({ApostrofsRegex}|des)?\s+middags|in\s+de\s+(na)?middag|deze\s+middag|middag\s+in\s+het\s+begin|middag\s+aan\s+het\s+einde?|{PmDescRegex}|middag)$)"; + public static readonly string EveningStartEndRegex = $@"(^(({ApostrofsRegex}|des)\s+avonds|in\s+de\s+(na)?avond|deze\s+avond|avond\s+in\s+het\s+begin|aan\s+het\s+einde?(\s+van(\s+de)?)?\s+avond|{PmDescRegex}|avond))|((({ApostrofsRegex}|des)?\s+avonds|deze\s+avond|in\s+de\s+(na)?avond|avond\s+in\s+het\s+begin|avond\s+aan\s+het\s+einde?|{PmDescRegex}|avond)$)"; + public static readonly string NightStartEndRegex = $@"(^(gedurende de nacht|vannacht|nacht|({ApostrofsRegex}|des)?\s+nachts))|((gedurende\s+de\s+nacht|vannacht|({ApostrofsRegex}|des)?\s+nachts|nacht\s+in\s+het\s+begin|nacht)$)"; + public const string InexactNumberRegex = @"\b((een\s+)?aantal|meerdere|enkele|verscheidene|wat|enige|(?(een\s+)?paar))\b"; public static readonly string InexactNumberUnitRegex = $@"({InexactNumberRegex})\s+({DurationUnitRegex})"; public static readonly string RelativeTimeUnitRegex = $@"((({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+({TimeUnitRegex}))|((de|het|mijn))\s+({RestrictedTimeUnitRegex}))"; public static readonly string RelativeDurationUnitRegex = $@"(((?<=({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+)({DurationUnitRegex}))|((the|my))\s+({RestrictedTimeUnitRegex}))"; public static readonly string ReferenceDatePeriodRegex = $@"\b{ReferencePrefixRegex}\s+(?week|maand|jaar|decennium|weekend)\b"; public const string ConnectorRegex = @"^(-|,|voor|t|rond(om)?|@)$"; - public const string FromToRegex = @"\b(van).+(tot)\b.+"; + public const string FromToRegex = @"\b(van(af)?).+(tot)\b.+"; public const string SingleAmbiguousMonthRegex = @"^(de\s+)?(mei)$"; public const string SingleAmbiguousTermsRegex = @"^(de\s+)?(dag|week|maand|jaar)$"; public const string UnspecificDatePeriodRegex = @"^(week|weekend|maand|jaar)$"; - public const string PrepositionSuffixRegex = @"\b(op|in|om|rond(om)?|van|tot)$"; - public const string FlexibleDayRegex = @"(?([A-Za-z]+\s)?[A-Za-z\d]+)"; - public static readonly string ForTheRegex = $@"\b((((?<=for\s+)de\s+{FlexibleDayRegex})|((?<=om\s+)(de\s+)?{FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\s*(,|\.|!|\?|$)))"; + public const string PrepositionSuffixRegex = @"\b((op|in)(\s+de)?|om|rond(om)?|van|tot)$"; + public const string FlexibleDayRegex = @"(?([A-Za-zë]+\s+)?[A-Za-zë\d]+?\s*(ste|de|e))"; + public static readonly string ForTheRegex = $@"\b((((?<=voor\s+)de\s+{FlexibleDayRegex})|((?<=op\s+)de\s+{FlexibleDayRegex}(?<=(ste|de|e))))(?(\s+(tussen|binnen|terug|tegen|aan|uit|mee|bij|vol|uit|aan|op|in|na|af)\s*)?(\s+(ge\w\w\w+|\w\w\w+en)\s*)?(,|\.|!|\?|$)))"; public static readonly string WeekDayAndDayOfMonthRegex = $@"\b{WeekDayRegex}\s+(de\s+{FlexibleDayRegex})\b"; - public static readonly string WeekDayAndDayRegex = $@"\b{WeekDayRegex}\s+(?!(de)){DayRegex}(?!([-]|(\s+({AmDescRegex}|{PmDescRegex}|{OclockRegex}))))\b"; - public const string RestOfDateRegex = @"\brest\s+(van\s+)?((de|het|mijn|dit|deze|huidige)\s+)?(?week|maand|jaar|decennium)\b"; - public const string RestOfDateTimeRegex = @"\brest\s+(van\s+)?((de|het|mijn|dit|deze|huidige)\s+)?(?dag)\b"; - public const string MealTimeRegex = @"\b((tijdens\s+de\s+)?(?lunch))|((om|tegen)\s+lunchtijd)\b"; - public const string AmbiguousRangeModifierPrefix = @"^[.]"; - public static readonly string NumberEndingPattern = $@"^(\s+(?vergadering|afspraak|conferentie|telefoontje|skype-gesprek)\s+om\s+(?{PeriodHourNumRegex}|{HourRegex})((\.)?$|(\.,|,|!|\?)))"; + public static readonly string WeekDayAndDayRegex = $@"\b{WeekDayRegex}\s+{DayRegex}(?!([-]|:\d+|\.\d+|(\s+({AmDescRegex}|{PmDescRegex}|{OclockRegex}))))\b"; + public const string RestOfDateRegex = @"\brest\s+(van\s+)?((de|het|mijn|dit|deze|(de\s+)?huidige)\s+)?(?week|maand|jaar|decennium)\b"; + public const string RestOfDateTimeRegex = @"\brest\s+(van\s+)?((de|het|mijn|dit|deze|(de\s+)?huidige)\s+)?(?vandaag|dag)\b"; + public const string MealTimeRegex = @"\b((((tijdens\s+)?de|het)\s+)?(?ontbijt|lunch|avondeten)|((om|tegen|tijdens)\s+)?(?lunchtijd))\b"; + public const string AmbiguousRangeModifierPrefix = @"(voor)"; + public static readonly string PotentialAmbiguousRangeRegex = $@"\b{AmbiguousRangeModifierPrefix}(?!\s+het\s+(einde?|begin(nen)?))(.+\b(boven|later|groter|erna|daarna|hoger|(?{BaseDateTime.RangeConnectorSymbolRegex}))\b)"; + public static readonly string NumberEndingPattern = $@"^(\s+((?vergadering|afspraak|conferentie|telefoontje|skype-gesprek)\s+)?(om|naar)\s+(?{PeriodHourNumRegex}|{HourRegex})((\.)?$|(\.,|,|!|\?)))"; public const string OneOnOneRegex = @"\b(1\s*:\s*1)|(één\s+(op\s)één|één\s*-\s*één|één\s*:\s*één)\b"; - public static readonly string LaterEarlyPeriodRegex = $@"\b({PrefixPeriodRegex})\s*\b\s*(?{OneWordPeriodRegex})\b"; + public static readonly string LaterEarlyPeriodRegex = $@"\b({PrefixPeriodRegex})\s*\b\s*(?{OneWordPeriodRegex}|(?{BaseDateTime.FourDigitYearRegex}))\b"; public static readonly string WeekWithWeekDayRangeRegex = $@"\b((?({NextPrefixRegex}|{PreviousPrefixRegex}|deze)\s+week)((\s+tussen\s+{WeekDayRegex}\s+en\s+{WeekDayRegex})|(\s+van\s+{WeekDayRegex}\s+tot\s+{WeekDayRegex})))\b"; public const string GeneralEndingRegex = @"^\s*((\.,)|\.|,|!|\?)?\s*$"; public const string MiddlePauseRegex = @"\s*(,)\s*"; @@ -262,15 +308,17 @@ public static class DateTimeDefinitions public const string OrRegex = @"\s*((\b|,\s*)(of|en)\b|,)\s*"; public static readonly string YearPlusNumberRegex = $@"\b(jaar\s+((?(\d{{3,4}}))|{FullTextYearRegex}))\b"; public static readonly string NumberAsTimeRegex = $@"\b({WrittenTimeRegex}|{PeriodHourNumRegex}|{BaseDateTime.HourRegex})\b"; - public static readonly string TimeBeforeAfterRegex = $@"\b(((?<=\b(voor|niet later dan|na)\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}|{MidTimeRegex}))|{MidTimeRegex})\b"; - public const string DateNumberConnectorRegex = @"^\s*(?\s+op)\s*$"; - public const string DecadeRegex = @"(?(de\s+jaren\s+(vijftig|zestig|zeventig|tachtig|negentig))|(fifties|sixties|seventies|eighties|nineties|zeroes|tens|tweeduizend|21e eeuw|(ee|éé)nentwintigste eeuw))"; - public static readonly string DecadeWithCenturyRegex = $@"(de\s+)?(((?\d|1\d|2\d)?(')?(?\d0)(')?s)|(({CenturyRegex}(\s+|-)(en\s+)?)?{DecadeRegex})|({CenturyRegex}(\s+|-)(en\s+)?(?tien|honderd)))"; - public static readonly string RelativeDecadeRegex = $@"\b((de\s+)?{RelativeRegex}\s+((?[\w,]+)\s+)?decennia?)\b"; - public const string SuffixAfterRegex = @"\b(((bij)\s)?(of|en)\s+(boven|na|later|groter)(?!\s+dan))\b"; + public static readonly string TimeBeforeAfterRegex = $@"\b(((?<=\b(voor|niet later dan|na|door)\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}|{MidTimeRegex}))|{MidTimeRegex})\b"; + public const string DateNumberConnectorRegex = @"^\s*(?\s+om)\s*$"; + public const string DecadeRegex = @"(?(nul|tien|twintig|dertig|veertig|vijftig|zestig|zeventig|tachtig|negentig)|(fifties|sixties|seventies|eighties|nineties|zeroes|tens|tweeduizend|(ee|éé)nentwintigste\s+eeuw))"; + public static readonly string DecadeWithCenturyRegex = $@"\b(de\s+)?(jaren\s+)?((?1\d|2\d|\d)?({ApostrofRegex})?(?\d0)({ApostrofRegex})?s?)(?!%)\b|(({CenturyRegex}(\s+|-)?(en\s+)?|(?<=\b(de|jaren)\s+)){DecadeRegex})|({CenturyRegex}(\s+|-)?(en\s+)?(?tien|honderd)))"; + public static readonly string RelativeDecadeRegex = $@"\b(((de|het)\s+)?{RelativeRegex}\s+((?[\w,]+)\s+)?decenni(a|um)?)\b"; + public const string SuffixAfterRegex = @"\b(((bij)\s)?(of|en)\s+(boven|later|groter|erna|daarna|hoger)(?!\s+dan))\b"; public const string DateAfterRegex = @"\b((of|en)\s+(hoger|later|groter)(?!\s+dan))\b"; - public static readonly string YearPeriodRegex = $@"((((vanaf|tijdens|gedurende|in)\s+)?{YearRegex}\s*({TillRegex})\s*{YearRegex})|(((tussen)\s+){YearRegex}\s*({RangeConnectorRegex})\s*{YearRegex}))"; - public static readonly string ComplexDatePeriodRegex = $@"(((vanaf|tijdens|gedurende|in)\s+)?(?.+)\s*({TillRegex})\s*(?.+)|((tussen)\s+)(?.+)\s*({RangeConnectorRegex})\s*(?.+))"; + public static readonly string YearPeriodRegex = $@"((((van(af)?|tijdens|gedurende|in)\s+)?{YearRegex}\s*({TillRegex})\s*{YearRegex})|(((tussen)\s+){YearRegex}\s*({RangeConnectorRegex})\s*{YearRegex}))"; + public const string StartMiddleEndRegex = @"\b((?(((de|het)\s+)?(start|begin)\s+van\s+)?)(?((het\s+)?midden\s+van\s+)?)(?((het\s+)?einde?\s+van\s+)?))"; + public static readonly string ComplexDatePeriodRegex = $@"(((van(af)?|tijdens|gedurende|in(\s+de)?)\s+)?{StartMiddleEndRegex}(?.+)\s*({TillRegex})\s*{StartMiddleEndRegex}(?.+)|((tussen)\s+){StartMiddleEndRegex}(?.+)\s*({RangeConnectorRegex})\s*{StartMiddleEndRegex}(?.+)|(?{WrittenMonthRegex})\s+(?{WrittenMonthRegex}(\s+|\s*,\s*){YearRegex}))"; + public static readonly string ComplexTillRegex = $@"({TillRegex}|{WrittenMonthRegex})"; public static readonly Dictionary UnitMap = new Dictionary { { @"millennium", @"1000Y" }, @@ -280,22 +328,39 @@ public static class DateTimeDefinitions { @"decennia", @"10Y" }, { @"jaren", @"Y" }, { @"jaar", @"Y" }, + { @"jr", @"Y" }, { @"maanden", @"MON" }, { @"maand", @"MON" }, + { @"mnd", @"MON" }, { @"weken", @"W" }, { @"week", @"W" }, + { @"weekend", @"WE" }, + { @"weekenden", @"WE" }, { @"dagen", @"D" }, { @"dag", @"D" }, + { @"werkdagen", @"D" }, + { @"werkdag", @"D" }, + { @"weekdagen", @"D" }, + { @"weekdag", @"D" }, + { @"vandaag", @"D" }, + { @"dgn", @"D" }, + { @"nachten", @"D" }, + { @"nacht", @"D" }, { @"uren", @"H" }, { @"uur", @"H" }, { @"u", @"H" }, { @"minuten", @"M" }, { @"minuut", @"M" }, { @"min", @"M" }, + { @"min.", @"M" }, { @"secondes", @"S" }, + { @"seconden", @"S" }, { @"seconde", @"S" }, { @"secs", @"S" }, - { @"sec", @"S" } + { @"sec", @"S" }, + { @"kwartier", @"H" }, + { @"kwartier uur", @"H" }, + { @"halfuur", @"H" } }; public static readonly Dictionary UnitValueMap = new Dictionary { @@ -304,23 +369,39 @@ public static class DateTimeDefinitions { @"decenniën", 315360000 }, { @"jaren", 31536000 }, { @"jaar", 31536000 }, + { @"jr", 31536000 }, { @"maanden", 2592000 }, { @"maand", 2592000 }, + { @"mnd", 2592000 }, { @"weken", 604800 }, { @"week", 604800 }, + { @"weekenden", 172800 }, + { @"weekend", 172800 }, { @"dagen", 86400 }, { @"dag", 86400 }, + { @"vandaag", 86400 }, + { @"dgn", 86400 }, + { @"nachten", 86400 }, + { @"nacht", 86400 }, + { @"werkdagen", 86400 }, + { @"werkdag", 86400 }, + { @"weekdagen", 86400 }, + { @"weekdag", 86400 }, { @"uren", 3600 }, { @"uur", 3600 }, { @"u", 3600 }, { @"minuten", 60 }, { @"minuut", 60 }, { @"min", 60 }, + { @"min.", 60 }, { @"secondes", 1 }, { @"seconden", 1 }, { @"seconde", 1 }, { @"secs", 1 }, - { @"sec", 1 } + { @"sec", 1 }, + { @"kwartier", 3600 }, + { @"kwartier uur", 3600 }, + { @"halfuur", 3600 } }; public static readonly Dictionary SpecialYearPrefixesMap = new Dictionary { @@ -367,10 +448,20 @@ public static class DateTimeDefinitions { @"vrijdag", 5 }, { @"zaterdag", 6 }, { @"zondag", 0 }, + { @"zondagmiddag", 0 }, + { @"maandagen", 1 }, + { @"dinsdagen", 2 }, + { @"woensdagen", 3 }, + { @"donderdagen", 4 }, + { @"vrijdagen", 5 }, + { @"zaterdagen", 6 }, + { @"zondagen", 0 }, { @"ma", 1 }, { @"ma.", 1 }, { @"dins", 2 }, + { @"dins.", 2 }, { @"woens", 3 }, + { @"woens.", 3 }, { @"di", 2 }, { @"di.", 2 }, { @"wo", 3 }, @@ -390,7 +481,26 @@ public static class DateTimeDefinitions { @"zo", 0 }, { @"zo.", 0 }, { @"zon", 0 }, - { @"zon.", 0 } + { @"zon.", 0 }, + { @"monday", 1 }, + { @"tuesday", 2 }, + { @"wednesday", 3 }, + { @"thursday", 4 }, + { @"friday", 5 }, + { @"saturday", 6 }, + { @"sunday", 0 }, + { @"mon", 1 }, + { @"tue", 2 }, + { @"tues", 2 }, + { @"wed", 3 }, + { @"wedn", 3 }, + { @"weds", 3 }, + { @"thu", 4 }, + { @"thur", 4 }, + { @"thurs", 4 }, + { @"fri", 5 }, + { @"sat", 6 }, + { @"sun", 0 } }; public static readonly Dictionary MonthOfYear = new Dictionary { @@ -409,6 +519,7 @@ public static class DateTimeDefinitions { @"jan", 1 }, { @"feb", 2 }, { @"mar", 3 }, + { @"mrt", 3 }, { @"apr", 4 }, { @"jun", 6 }, { @"jul", 7 }, @@ -422,6 +533,7 @@ public static class DateTimeDefinitions { @"jan.", 1 }, { @"feb.", 2 }, { @"mar.", 3 }, + { @"mrt.", 3 }, { @"apr.", 4 }, { @"jun.", 6 }, { @"jul.", 7 }, @@ -469,6 +581,7 @@ public static class DateTimeDefinitions { @"negen", 9 }, { @"tien", 10 }, { @"elf", 11 }, + { @"elven", 11 }, { @"twaalf", 12 }, { @"dertien", 13 }, { @"veertien", 14 }, @@ -485,6 +598,7 @@ public static class DateTimeDefinitions { @"drieëntwintig", 23 }, { @"vierentwintig", 24 }, { @"vijfentwintig", 25 }, + { @"vijventwintig", 25 }, { @"zesentwintig", 26 }, { @"zevenentwintig", 27 }, { @"achtentwintig", 28 }, @@ -529,10 +643,10 @@ public static class DateTimeDefinitions { @"zevenenzestig", 67 }, { @"achtenzestig", 68 }, { @"negenenzestig", 69 }, - { @"drieënzeventig", 70 }, + { @"zeventig", 70 }, { @"eenenzeventig", 71 }, { @"tweeënzeventig", 72 }, - { @"zeventig", 73 }, + { @"drieënzeventig", 73 }, { @"vierenzeventig", 74 }, { @"vijfenzeventig", 75 }, { @"zesenzeventig", 76 }, @@ -615,8 +729,14 @@ public static class DateTimeDefinitions public static readonly Dictionary DoubleNumbers = new Dictionary { { @"half", 0.5 }, + { @"anderhalf", 1.5 }, + { @"anderhalve", 1.5 }, { @"halve", 0.5 }, + { @"een kwartier", 0.25 }, + { @"kwartier", 0.25 }, { @"kwart", 0.25 }, + { @"driekwart", 0.75 }, + { @"drie kwart", 0.75 }, { @"kwartaal", 0.25 } }; public static readonly Dictionary> HolidayNames = new Dictionary> @@ -626,47 +746,48 @@ public static class DateTimeDefinitions { @"prinsjesdag", new string[] { @"prinsjesdag" } }, { @"dodenherdenking", new string[] { @"nationaledodenherdenking", @"nationaleherdenking", @"dodenherdenking" } }, { @"bevrijdingsdag", new string[] { @"bevrijdingsdag" } }, - { @"hemelvaartsdag", new string[] { @"hemelvaartsdag" } }, - { @"teachersday", new string[] { @"dagvandeleraar" } }, + { @"teachersday", new string[] { @"dagvandeleraar", @"dagvandeleerkracht", @"dagvandeleerkrachten" } }, { @"veteransday", new string[] { @"nederlandseveteranendag", @"veteranendag" } }, { @"eerstekerstdag", new string[] { @"eerstekerstdag", @"1ekerstdag" } }, { @"tweedekerstdag", new string[] { @"tweedekerstdag", @"2ekerstdag" } }, - { @"dagvandearbeid", new string[] { @"dagvandearbeid", @"dagvandenarbeid" } }, + { @"dagvandearbeid", new string[] { @"dagvandearbeid", @"feestvandearbeid" } }, + { @"treeplantingday", new string[] { @"nationaleboomfeestdag", @"boomfeestdag", @"boomplantdag" } }, + { @"sinterklaas", new string[] { @"sinterklaas", @"sinterklaasavond", @"sinterklaasfeest", @"pakjesavond" } }, + { @"driekoningen", new string[] { @"driekoningen" } }, + { @"ketikoti", new string[] { @"ketikoti", @"keti-koti" } }, + { @"stmartinsday", new string[] { @"sint-maartensfeest", @"sint-maarten" } }, + { @"ascensionday", new string[] { @"hemelvaartsdag" } }, + { @"whitesunday", new string[] { @"pinksteren" } }, { @"christmas", new string[] { @"kerstfeest", @"kerstmis", @"kerst", @"xmas" } }, - { @"easterday", new string[] { @"pasen", @"paasdag" } }, - { @"fathers", new string[] { @"vaderdag", @"vadersdag" } }, - { @"mothers", new string[] { @"moederdag", @"moedersdag" } }, - { @"singleday", new string[] { @"singleday", @"vrijgezellendag" } }, - { @"femaleday", new string[] { @"femaleday", @"vrouwendag" } }, - { @"treeplantingday", new string[] { @"boomplantdag", @"boomfeestdag" } }, - { @"girlsday", new string[] { @"girlsday", @"meisjesdag" } }, - { @"newyear", new string[] { @"oudennieuw", @"oud&nieuw" } }, + { @"easterday", new string[] { @"pasen", @"eerstepaasdag", @"paasdag", @"paaszondag" } }, + { @"eastermonday", new string[] { @"tweedepaasdag", @"paasmaandag" } }, + { @"fathers", new string[] { @"vaderdag" } }, + { @"goodfriday", new string[] { @"goedevrijdag" } }, + { @"mothers", new string[] { @"moederdag" } }, + { @"newyear", new string[] { @"nieuwjaar" } }, { @"newyearday", new string[] { @"nieuwjaarsdag" } }, { @"newyearsday", new string[] { @"nieuwjaarsdag" } }, - { @"newyeareve", new string[] { @"oudejaarsavond", @"oudjaarsavond", @"silvester", @"silvesteravond" } }, + { @"newyeareve", new string[] { @"oudjaar", @"oudejaar", @"oudejaarsavond", @"oudjaarsavond", @"silvester", @"silvesteravond", @"oudennieuw", @"oud&nieuw" } }, { @"valentinesday", new string[] { @"valentijnsdag", @"valetijnsdag" } }, - { @"aprilfools", new string[] { @"1aprilgrap", @"1april" } }, - { @"independenceday", new string[] { @"onafhankelijkheidsdag" } }, + { @"independenceday", new string[] { @"onafhankelijkheidsdag", @"onafhankelijksdag" } }, { @"bastilleday", new string[] { @"fransenationalefeestdag", @"bestormingvandebastille" } }, { @"halloweenday", new string[] { @"halloween", @"allerheiligenavond" } }, { @"allhallowday", new string[] { @"allerheiligen" } }, - { @"allsaintsday", new string[] { @"allerheiligen" } }, { @"allsoulsday", new string[] { @"allerzielen" } }, { @"christmaseve", new string[] { @"kerstavond" } }, - { @"columbus", new string[] { @"columbusday", @"columbusdag" } }, + { @"yuandan", new string[] { @"yuandan" } }, + { @"chinesenewyear", new string[] { @"chineesnieuwjaar", @"lentefestival", @"lentefeest" } }, + { @"ramadan", new string[] { @"ramadan" } }, + { @"eidalfitr", new string[] { @"eidalfitr", @"suikerfeest" } }, + { @"sacrifice", new string[] { @"eidaladha", @"offerfeest" } }, + { @"islamicnewyear", new string[] { @"islamitischnieuwjaar" } }, { @"thanksgiving", new string[] { @"thanksgivingday", @"thanksgiving", @"dankzeggingsdag" } }, { @"martinlutherking", new string[] { @"martinlutherkingday", @"martinlutherkingjrday", @"martinlutherkingdag", @"mlkdag" } }, - { @"washingtonsbirthday", new string[] { @"washingtonsbirthday", @"washingtonbirthday" } }, - { @"yuandan", new string[] { @"yuandan" } }, - { @"youthday", new string[] { @"jongerendag" } }, - { @"childrenday", new string[] { @"childrenday", @"childday", @"kinderendag" } }, - { @"stgeorgeday", new string[] { @"sintjoris" } }, - { @"mayday", new string[] { @"dagvandearbeid" } }, - { @"stpatrickday", new string[] { @"stpatrickday", @"stpatricksday" } }, { @"usindependenceday", new string[] { @"amerikaanseonafhankelijkheidsdag", @"usonafhankelijkheidsdag" } }, - { @"groundhougday", new string[] { @"groundhougday", @"bosmarmottendag" } }, - { @"inaugurationday", new string[] { @"inaugurationday", @"inauguratiedag" } }, - { @"arborday", new string[] { @"boomfeestdag" } } + { @"blackfriday", new string[] { @"blackfriday" } }, + { @"cybermonday", new string[] { @"cybermonday" } }, + { @"earthday", new string[] { @"dagvandeaarde" } }, + { @"juneteenth", new string[] { @"jubileeday", @"juneteenth", @"vrijheidsdag" } } }; public static readonly Dictionary WrittenDecades = new Dictionary { @@ -697,13 +818,33 @@ public static class DateTimeDefinitions { @"tachtiger jaren", 80 }, { @"jaren 90", 90 }, { @"jaren negentig", 90 }, - { @"negentiger jaren", 90 } + { @"nul", 0 }, + { @"tien", 10 }, + { @"twintig", 20 }, + { @"twintiger", 20 }, + { @"dertig", 30 }, + { @"dertiger", 30 }, + { @"veertig", 40 }, + { @"veertiger", 40 }, + { @"vijftig", 50 }, + { @"vijftiger", 50 }, + { @"zestig", 60 }, + { @"zestiger", 60 }, + { @"zeventig", 70 }, + { @"zeventiger", 70 }, + { @"tachtig", 80 }, + { @"tachtiger", 80 }, + { @"negentig", 90 }, + { @"negentiger", 90 }, + { @"honderd", 0 } }; public static readonly Dictionary SpecialDecadeCases = new Dictionary { { @"21e eeuw", 2000 }, { @"eenentwintigste eeuw", 2000 }, - { @"tweeduizend", 2000 } + { @"tweeduizend", 2000 }, + { @"jaren nul", 0 }, + { @"nul", 0 } }; public const string DefaultLanguageFallback = @"DMY"; public static readonly IList SuperfluousWordList = new List @@ -714,24 +855,49 @@ public static class DateTimeDefinitions @"say", @"like" }; - public static readonly string[] DurationDateRestrictions = { }; + public static readonly string[] DurationDateRestrictions = { @"vandaag", @"nu" }; public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { - { @"\bmorning|afternoon|evening|night|day\b", @"\b(good\s+(morning|afternoon|evening|night|day))|(nighty\s+night)\b" }, - { @"\bnow\b", @"\b(^now,)|\b((is|are)\s+now\s+for|for\s+now)\b" }, - { @"\bmay\b", @"\b((^may i)|(i|you|he|she|we|they)\s+may|(may\s+((((also|not|(also not)|well)\s+)?(be|ask|contain|constitute|email|e-mail|take|have|result|involve|get|work|reply|differ))|(or may not))))\b" } + { @"^\d{4}$", @"(\d\.\d{4}|\d{4}\.\d)" }, + { @"\b(ontbijt|lunch|avondeten)$", @"(? AmbiguityTimeFiltersDict = new Dictionary + { + { @"^(\d{1,2}|\p{L}+)\s+uur$", @"(?(? MorningTermList = new List { - @"morgen" + @"morgen", + @"morgens", + @"ochtend", + @"ochtends" }; public static readonly IList AfternoonTermList = new List { - @"middag" + @"middag", + @"namiddag", + @"voormiddag" }; public static readonly IList EveningTermList = new List { - @"avond" + @"avond", + @"avonden" + }; + public static readonly IList MealtimeBreakfastTermList = new List + { + @"ontbijt" + }; + public static readonly IList MealtimeLunchTermList = new List + { + @"lunch", + @"lunchtijd" + }; + public static readonly IList MealtimeDinnerTermList = new List + { + @"avondeten" }; public static readonly IList DaytimeTermList = new List { @@ -744,23 +910,37 @@ public static class DateTimeDefinitions }; public static readonly IList SameDayTerms = new List { - @"vandaag" + @"vandaag", + @"huidige dag", + @"huidige datum", + @"actuele datum", + @"actuele dag", + @"deze morgen", + @"actuele morgen", + @"vanmorgen" }; public static readonly IList PlusOneDayTerms = new List { @"morgen", @"dag na", - @"volgende dag" + @"dag erna", + @"volgende dag", + @"morgenochtend", + @"morgenavond" }; public static readonly IList MinusOneDayTerms = new List { @"gisteren", @"dag voor", - @"vorige dag" + @"dag ervoor", + @"vorige dag", + @"gisterenochtend", + @"gisterenavond" }; public static readonly IList PlusTwoDayTerms = new List { - @"overmorgen" + @"overmorgen", + @"dag na morgen" }; public static readonly IList MinusTwoDayTerms = new List { @@ -771,7 +951,10 @@ public static class DateTimeDefinitions @"deze", @"volgend", @"volgende", - @"dit" + @"eropvolgend", + @"eropvolgende", + @"dit", + @"die" }; public static readonly IList LastCardinalTerms = new List { @@ -780,16 +963,19 @@ public static class DateTimeDefinitions }; public static readonly IList MonthTerms = new List { - @"maand" + @"maand", + @"maanden" }; public static readonly IList MonthToDateTerms = new List { @"maand tot heden", + @"maand tot op heden", @"vanaf vorig maandeinde" }; public static readonly IList WeekendTerms = new List { - @"weekend" + @"weekend", + @"weekenden" }; public static readonly IList WeekTerms = new List { @@ -797,7 +983,8 @@ public static class DateTimeDefinitions }; public static readonly IList YearTerms = new List { - @"jaar" + @"jaar", + @"jaren" }; public static readonly IList GenericYearTerms = new List { @@ -806,7 +993,16 @@ public static class DateTimeDefinitions public static readonly IList YearToDateTerms = new List { @"jaar tot heden", + @"jaar tot op heden", @"vanaf vorig jaareinde" }; + public const string DayTypeRegex = @"^((we[er]k)?dag(en|elijkse?)?)$"; + public const string WeekTypeRegex = @"^(wekelijkse?|week)$"; + public const string WeekendTypeRegex = @"^(weekend(en)?)$"; + public const string BiWeekTypeRegex = @"^(tweewekelijkse?)$"; + public const string MonthTypeRegex = @"^(maand(elijkse?)?)$"; + public const string QuarterTypeRegex = @"^(kwartaal|driemaandelijkse?)$"; + public const string YearTypeRegex = @"^(elk\s+jaar|jaar(lijkse?)?)$"; + public const string SemiYearTypeRegex = @"^(halfjaar(lijkse?)?)$"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumberDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumbersDefinitions.cs similarity index 68% rename from .NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumberDefinitions.cs rename to .NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumbersDefinitions.cs index 952135cc65..e3649efac8 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumberDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumbersDefinitions.cs @@ -21,18 +21,20 @@ namespace Microsoft.Recognizers.Definitions.Dutch public static class NumbersDefinitions { - public const string LangMarker = @"Nl"; + public const string LangMarker = @"Dut"; public const bool CompoundNumberLanguage = true; public const bool MultiDecimalSeparatorCulture = false; + public const string DigitsNumberRegex = @"-?(\d+|\d{1,3}(\.\d{3})*)"; public const string RoundNumberIntegerRegex = @"(honderd|duizend|miljoen|miljard|biljoen)"; - public const string ZeroToNineIntegerRegex = @"(drie|zeven|acht|vier|vijf|nul|negen|een|één|twee|zes)"; + public const string ZeroToNineIntegerRegex = @"(((een)(?!\s+((honderdste|duizendste|miljoenste|miljardste|biljoenste)|(nulde|eende|eerste|tweede|derde|vierde|vijfd(e|en)|zesde|zevende|achtst(e|en)|negende|tiend(e|en)|elfde|twaalfde|dertiende|veertiende|vijftiende|zestiende|zeventiende|achttiende|negentiende|twintigste|dertigste|veertigste|vijftigste|zestigste|zeventigste|tachtigste|negentigste))))|(één|drie|zeven|acht|vier|vijf|nul|negen|twee|zes))"; + public const string TwoToNineIntegerRegex = @"(drie|zeven|acht|vier|vijf|negen|twee|zes)"; public const string NegativeNumberTermsRegex = @"(?(min|negatief)\s+)"; public static readonly string NegativeNumberSignRegex = $@"^{NegativeNumberTermsRegex}.*"; public const string AnIntRegex = @"(een|één)(?=\s)"; public const string TenToNineteenIntegerRegex = @"(zeventien|dertien|veertien|achttien|negentien|vijftien|zestien|elf|twaalf|tien)"; public const string TensNumberIntegerRegex = @"(zeventig|twintig|dertig|tachtig|negentig|veertig|vijftig|zestig)"; public static readonly string SeparaIntRegex = $@"((({TenToNineteenIntegerRegex}|({ZeroToNineIntegerRegex}(en|ën){TensNumberIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{RoundNumberIntegerRegex})(\s*{RoundNumberIntegerRegex})*))|{RoundNumberIntegerRegex}|(({AnIntRegex}(\s*{RoundNumberIntegerRegex})+))"; - public static readonly string AllIntRegex = $@"(((({TenToNineteenIntegerRegex}|({ZeroToNineIntegerRegex}(en|ën){TensNumberIntegerRegex})|{TensNumberIntegerRegex}|({ZeroToNineIntegerRegex}|{AnIntRegex}))?(\s*{RoundNumberIntegerRegex}))\s*(en\s*)?)*{SeparaIntRegex})"; + public static readonly string AllIntRegex = $@"(((({TenToNineteenIntegerRegex}|({ZeroToNineIntegerRegex}(en|ën){TensNumberIntegerRegex})|{TensNumberIntegerRegex}|({ZeroToNineIntegerRegex}|{AnIntRegex}))?(\s*{RoundNumberIntegerRegex}))\s*((en|ën)\s*)?)*{SeparaIntRegex})"; public const string PlaceHolderPureNumber = @"\b"; public const string PlaceHolderDefault = @"\D|\b"; public static readonly Func NumbersWithPlaceHolder = (placeholder) => $@"(((?volgende|vorige?|huidige|laatste?|(de\s+op\s+één\s+na\s+|de\s+een\s+voor\s+de\s+|die\s+voor\s+de\s+|twee\s+na\s+|voor)laatste)"; + public static readonly string SuffixBasicOrdinalRegex = $@"(((({ZeroToNineIntegerRegex}{RoundNumberIntegerRegex})|({RoundNumberIntegerRegex}{ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{RoundNumberIntegerRegex})\s*)*((en|ën)\s*)*{BasicOrdinalRegex})"; public static readonly string SuffixRoundNumberOrdinalRegex = $@"(({AllIntRegex}\s*){RoundNumberOrdinalRegex})"; - public static readonly string AllOrdinalRegex = $@"({SuffixBasicOrdinalRegex}|{SuffixRoundNumberOrdinalRegex})"; - public const string OrdinalSuffixRegex = @"(?<=\b)((\d*(1e|2e|3e|4e|5e|6e|7e|8e|9e|0e))|(1ste|2de|3de|4de|5de|6de|7de|8ste|9de|0de)|([0-9]*1[0-9]de)|([0-9]*[2-9][0-9]ste)|([0-9]*[0](1ste|2de|3de|4de|5de|6de|7de|8ste|9de|0de)))(?=\b)"; + public static readonly string AllOrdinalNumberRegex = $@"(?:{SuffixBasicOrdinalRegex}|{SuffixRoundNumberOrdinalRegex})"; + public static readonly string AllOrdinalRegex = $@"(?:{AllOrdinalNumberRegex}|{RelativeOrdinalRegex})"; + public const string OrdinalSuffixRegex = @"(?<=\b)((\d+\s*e)|[18]\s*ste|[092-7]\s*de|([0-9]*1[0-9]\s*de)|([0-9]*[2-9][0-9]\s*ste)|([0-9]*[0]([18]\s*ste|[092-7]\s*de)))(?=\b)"; public const string OrdinalNumericRegex = @"(?<=\b)(\d{1,3}(\s*.\s*\d{3})*\s*e)(?=\b)"; public static readonly string OrdinalRoundNumberRegex = $@"(?anderhalve|anderhalf)|(?driekwart)|half|halve|helft|kwart)"; + public const string FractionHalfRegex = @"([eë]nhalf|[eë]nhalve|ëneenhal(f|ve))$"; + public static readonly string[] OneHalfTokens = { @"een", @"half" }; + public static readonly string FractionMultiplierRegex = $@"(?((\s+en\s+)?(anderhalve|anderhalf|driekwart)|\s+en\s+(een|{TwoToNineIntegerRegex})\s+(half|derde|kwart|vierde|vijfd(e|en)|zesde|zevende|achtst(e|en)|negende|tiend(e|en))))"; + public static readonly string RoundMultiplierWithFraction = $@"(?<=(?(miljoen|miljard|biljoen))(?={FractionMultiplierRegex}?$)"; + public static readonly string RoundMultiplierRegex = $@"\b\s*(van\s+)?({RoundMultiplierWithFraction}|(?(honderd|duizend))$)"; + public static readonly string FractionNounRegex = $@"(?<=\b)(({AllIntRegex}\s+(en\s+)?)?(({AllIntRegex})(\s+|\s*-\s*|\s*/\s*)((({AllOrdinalNumberRegex})|({RoundNumberOrdinalRegex}))n?|halven|vierdes|kwart)|(een\s+(half|kwart)\s+){RoundNumberIntegerRegex}|{FractionUnitsRegex}(\s+{RoundNumberIntegerRegex})?))(?=\b)"; + public static readonly string FractionNounWithArticleRegex = $@"(?<=\b)((({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\s+(en\s)?)?(een)(\s+|\s*-\s*|\s*/\s*)(({AllOrdinalNumberRegex})|({RoundNumberOrdinalRegex})|({FractionUnitsRegex}))|{AllIntRegex}[eë]n(eenhalf|half|halve|helft|kwart))(?=\b)"; public static readonly string FractionPrepositionRegex = $@"(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?!,))(?=\b)"; public static readonly string FractionPrepositionWithinPercentModeRegex = $@"(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?!,))(?=\b)"; public static readonly string AllPointRegex = $@"((\s+{ZeroToNineIntegerRegex})+|(\s+{SeparaIntRegex}))"; @@ -70,30 +80,32 @@ public static class NumbersDefinitions public static readonly string NumberWithPrefixPercentage = $@"(percentage van)(\s*)({BaseNumbers.NumberReplaceToken})"; public static readonly string NumberWithPrepositionPercentage = $@"({BaseNumbers.NumberReplaceToken})\s*(uit|in|van|van\s+de)\s*({BaseNumbers.NumberReplaceToken})"; public const string TillRegex = @"(tot|--|-|—|——|~)"; - public const string MoreRegex = @"((groter|hoger|meer)(\s+dan|\s+als)?|boven|over|>)"; + public const string IncludeTillRegex = @"(tot en met)"; + public const string MoreRegex = @"((groter|hoger|meer)((\s+is)?(\s+dan|\s+als))?|boven|over|>)"; public const string LessRegex = @"((minder|lager|kleiner)(\s+dan|\s+als)?|beneden|onder|<)"; - public const string EqualRegex = @"(gelijk(\s+(aan|tot|als))?|(?)=)"; + public const string EqualRegex = @"(gelijk(\s+(aan|tot|als|dan))?|(?)=)"; public static readonly string MoreOrEqualPrefix = $@"((niet\s+{LessRegex})|(tenminste|op zijn minst|minstens))"; - public static readonly string MoreOrEqual = $@"(({MoreRegex}\s+of\s+{EqualRegex})|minstens|niet\s+{LessRegex}|>\s*=)"; - public const string MoreOrEqualSuffix = @"((en|of)\s+(meer|groter|hoger|grotere)((?!\s+dan)|(?!\s+als)))"; + public static readonly string MoreOrEqual = $@"(({MoreRegex}\s+of\s+{EqualRegex})|({EqualRegex}\s+of\s+{MoreRegex})|{MoreOrEqualPrefix}(\s+(of)?\s+{EqualRegex})?|niet\s+{LessRegex}|>\s*=)"; + public const string MoreOrEqualSuffix = @"((en|of)\s+(meer|groter|hoger|grotere)((?!\s+(dan|als))|(\s+(dan|als)(?!(\s*\d+)))))"; public static readonly string LessOrEqualPrefix = $@"((niet\s+{MoreRegex})|(hooguit|op zijn hoogst|op zijn meest))"; - public static readonly string LessOrEqual = $@"(({LessRegex}\s+of\s+{EqualRegex})|maximum|niet\s+{MoreRegex}|<\s*=)"; - public const string LessOrEqualSuffix = @"((en|of)\s+(minder|lager|kleiner)((?!\s+dan)|(?!\s+als)))"; + public static readonly string LessOrEqual = $@"(({LessRegex}\s+of\s+{EqualRegex})|({EqualRegex}\s+of\s+{LessRegex})|maximum|niet\s+{MoreRegex}|<\s*=)"; + public const string LessOrEqualSuffix = @"((en|of)\s+(minder|lager|kleiner)((?!\s+(dan|als))|(\s+(dan|als)(?!(\s*\d+)))))"; public const string NumberSplitMark = @"(?![,.](?!\d+))"; public const string MoreRegexNoNumberSucceed = @"((groter|hoger|meer)((?!\s+dan)|\s+(dan(?!(\s*\d+))))|(boven|over)(?!(\s*\d+)))"; public const string LessRegexNoNumberSucceed = @"((minder|lager|kleiner)((?!\s+dan)|\s+(dan(?!(\s*\d+))))|(beneden|onder)(?!(\s*\d+)))"; - public const string EqualRegexNoNumberSucceed = @"(gelijk?((?!\s+(aan|tot))|(\s+(aan|tot)(?!(\s*\d+)))))"; - public static readonly string OneNumberRangeMoreRegex1 = $@"({MoreOrEqual}|{MoreRegex})\s*(de\s+)?(?((?!((\.(?!\d+))|(,(?!\d+)))).)+)"; - public static readonly string OneNumberRangeMoreRegex2 = $@"(?((?!((\.(?!\d+))|(,(?!\d+)))).)+)\s*{MoreOrEqualSuffix}"; + public const string EqualRegexNoNumberSucceed = @"(gelijk((?!\s+(aan|tot))|(\s+(aan|tot)(?!(\s*\d+))))|evenveel(?!(\s*\d+)))"; + public static readonly string OneNumberRangeMoreRegex1 = $@"({MoreOrEqual}|{MoreRegex})\s*(de\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeMoreRegex2 = $@"(?({NumberSplitMark}.)+)\s*{MoreOrEqualSuffix}"; public static readonly string OneNumberRangeMoreSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+of\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\s+(?({NumberSplitMark}.)+)(\s+of\s+){EqualRegexNoNumberSucceed})"; - public static readonly string OneNumberRangeLessRegex1 = $@"({LessOrEqual}|{LessRegex})\s*(de\s+)?(?((?!((\.(?!\d+))|(,(?!\d+)))).)+)"; - public static readonly string OneNumberRangeLessRegex2 = $@"(?((?!((\.(?!\d+))|(,(?!\d+)))).)+)\s*{LessOrEqualSuffix}"; + public static readonly string OneNumberRangeLessRegex1 = $@"({LessOrEqual}|{LessRegex})\s*(de\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeLessRegex2 = $@"(?({NumberSplitMark}.)+)\s*{LessOrEqualSuffix}"; public static readonly string OneNumberRangeLessSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+of\s+){LessRegexNoNumberSucceed})|({LessRegex}\s+(?({NumberSplitMark}.)+)(\s+of\s+){EqualRegexNoNumberSucceed})"; - public static readonly string OneNumberRangeEqualRegex = $@"{EqualRegex}\s*(the\s+)?(?((?!((\.(?!\d+))|(,(?!\d+)))).)+)"; - public static readonly string TwoNumberRangeRegex1 = $@"tussen\s*(de\s+)?(?((?!((\.(?!\d+))|(,(?!\d+)))).)+)\s*en\s*(de\s+)?(?((?!((\.(?!\d+))|(,(?!\d+)))).)+)"; - public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\s*(en|maar|,)\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"; + public static readonly string OneNumberRangeEqualRegex = $@"{EqualRegex}\s*(de\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string TwoNumberRangeRegex1 = $@"tussen\s*(de\s+)?(?({NumberSplitMark}.)+)\s+en\s+(de\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\s*(en|(,\s*)?maar|,)\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"; public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\s*(en|maar|,)\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})"; - public static readonly string TwoNumberRangeRegex4 = $@"(van\s+)?(?((?!((\.(?!\d+))|(,(?!\d+))|\van\b)).)+)\s*{TillRegex}\s*(de\s+)?(?((?!((\.(?!\d+))|(,(?!\d+)))).)+)"; + public static readonly string TwoNumberRangeRegex4 = $@"(van\s+)?(?({AllIntRegex}|{AllFloatRegex}|{AllOrdinalRegex}|{DigitsNumberRegex}))\s*{TillRegex}\s*(de\s+)?(?({AllIntRegex}|{AllFloatRegex}|{AllOrdinalRegex}|{DigitsNumberRegex}))"; + public static readonly string TwoNumberRangeRegex5 = $@"(van\s+)?(?({AllIntRegex}|{AllFloatRegex}|{AllOrdinalRegex}|{DigitsNumberRegex}))\s*{IncludeTillRegex}\s*(de\s+)?(?({AllIntRegex}|{AllFloatRegex}|{AllOrdinalRegex}|{DigitsNumberRegex}))"; public const string AmbiguousFractionConnectorsRegex = @"^[.]"; public const char DecimalSeparatorChar = ','; public const string FractionMarkerToken = @"van de"; @@ -102,7 +114,7 @@ public static class NumbersDefinitions public const string WordSeparatorToken = @"en"; public static readonly string[] WrittenDecimalSeparatorTexts = { @"komma" }; public static readonly string[] WrittenGroupSeparatorTexts = { @"punt" }; - public static readonly string[] WrittenIntegerSeparatorTexts = { @"en" }; + public static readonly string[] WrittenIntegerSeparatorTexts = { @"en", @"ën" }; public static readonly string[] WrittenFractionSeparatorTexts = { @"uit", @"van de", @"op de", @"en" }; public const string HalfADozenRegex = @"(een\s+)?half\s+dozijn"; public const string GrossRegex = @"(een\s+)?gros"; @@ -150,6 +162,7 @@ public static class NumbersDefinitions { { @"nulde", 0 }, { @"eerste", 1 }, + { @"eende", 1 }, { @"tweede", 2 }, { @"secundair", 2 }, { @"half", 2 }, @@ -177,13 +190,23 @@ public static class NumbersDefinitions { @"achttiende", 18 }, { @"negentiende", 19 }, { @"twintigste", 20 }, + { @"eenentwintigste", 21 }, + { @"vijfentwintigste", 25 }, + { @"vijventwintigste", 25 }, { @"dertigste", 30 }, + { @"vijfendertigste", 35 }, { @"veertigste", 40 }, + { @"vijfenveertigste", 45 }, { @"vijftigste", 50 }, + { @"vijfenvijftigste", 55 }, { @"zestigste", 60 }, + { @"vijfenzestigste", 65 }, { @"zeventigste", 70 }, + { @"vijfenzeventigste", 75 }, { @"tachtigste", 80 }, + { @"vijfentachtigste", 85 }, { @"negentigste", 90 }, + { @"vijfennegentigste", 95 }, { @"honderdste", 100 }, { @"duizendste", 1000 }, { @"miljoenste", 1000000 }, @@ -227,11 +250,31 @@ public static class NumbersDefinitions }; public static readonly Dictionary RelativeReferenceOffsetMap = new Dictionary { - { @"", @"" } + { @"laatst", @"0" }, + { @"laatste", @"0" }, + { @"volgende", @"1" }, + { @"huidige", @"0" }, + { @"vorige", @"-1" }, + { @"vorig", @"-1" }, + { @"de op één na laatste", @"-1" }, + { @"de een voor de laatste", @"-1" }, + { @"die voor de laatste", @"-1" }, + { @"voorlaatste", @"-1" }, + { @"twee na laatste", @"-2" } }; public static readonly Dictionary RelativeReferenceRelativeToMap = new Dictionary { - { @"", @"" } + { @"laatst", @"end" }, + { @"laatste", @"end" }, + { @"volgende", @"current" }, + { @"huidige", @"current" }, + { @"vorige", @"current" }, + { @"vorig", @"current" }, + { @"de op één na laatste", @"end" }, + { @"de een voor de laatste", @"end" }, + { @"die voor de laatste", @"end" }, + { @"voorlaatste", @"end" }, + { @"twee na laatste", @"end" } }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumberDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumbersDefinitions.tt similarity index 100% rename from .NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumberDefinitions.tt rename to .NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumbersDefinitions.tt diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumbersWithUnitDefinitions.cs index dd2dcf189d..719f80ed20 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumbersWithUnitDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumbersWithUnitDefinitions.cs @@ -154,16 +154,18 @@ public static class NumbersWithUnitDefinitions { @"Kilogram", @"kg|kilogram|kilo" }, { @"Gram", @"g|gram" }, { @"Milligram", @"mg|milligram" }, + { @"Microgram", @"μg|microgram" }, { @"Barrel", @"vat|vaten" }, { @"Gallon", @"-gallon|gallon" }, { @"Metric ton", @"metrische ton" }, - { @"Ton", @"-ton|ton" }, + { @"Ton", @"-ton|ton|t" }, { @"Pound", @"-pond|pond" }, { @"Ounce", @"-ons|ons" } }; public static readonly IList AmbiguousWeightUnitList = new List { @"g", + @"t", @"oz" }; public static readonly Dictionary CurrencySuffixList = new Dictionary @@ -173,6 +175,10 @@ public static class NumbersWithUnitDefinitions { @"Pul", @"pul" }, { @"Euro", @"euros|euro|€|eur" }, { @"Cent", @"cents|cent|-cents|-cent" }, + { @"Kwartje", @"kwartje" }, + { @"Dubbeltje", @"dubbeltje" }, + { @"Stuiver", @"stuiver" }, + { @"Tientje", @"tientje" }, { @"Albanian lek", @"albanese lek|leks|lek" }, { @"Qindarkë", @"qindarkë|qindarkës|qindarke|qindarkes" }, { @"Angolan kwanza", @"angolese kwanza|kz|aoa|kwanza|kwanzas|angolese kwanzas" }, @@ -416,7 +422,10 @@ public static class NumbersWithUnitDefinitions { @"Fen", @"fen" }, { @"Jiao", @"jiao|mao" }, { @"Finnish markka", @"finse mark|finse markka|suomen markka|finnish markka|finsk mark|fim|markkaa|markka" }, - { @"Penni", @"penniä|penni" } + { @"Penni", @"penniä|penni" }, + { @"Bitcoin", @"bitcoin|bitcoins|btc|xbt|₿" }, + { @"Millibitcoin", @"millibitcoin|millibitcoins|milibitcoin|milibitcoins" }, + { @"Satoshi", @"satoshi|satoshis" } }; public static readonly Dictionary CurrencyNameToIsoCodeMap = new Dictionary { @@ -523,7 +532,7 @@ public static class NumbersWithUnitDefinitions { @"Rwandan franc", @"RWF" }, { @"Russian ruble", @"RUB" }, { @"Transnistrian ruble", @"PRB" }, - { @"Belarusian ruble", @"BYN" }, + { @"New Belarusian ruble", @"BYN" }, { @"Algerian dinar", @"DZD" }, { @"Bahraini dinar", @"BHD" }, { @"Iraqi dinar", @"IQD" }, @@ -605,7 +614,8 @@ public static class NumbersWithUnitDefinitions { @"British Virgin Islands dollar", @"_BD" }, { @"Ascension pound", @"_AP" }, { @"Alderney pound", @"_ALP" }, - { @"Abkhazian apsar", @"_AA" } + { @"Abkhazian apsar", @"_AA" }, + { @"Bitcoin", @"_XBT" } }; public static readonly Dictionary FractionalUnitNameToCodeMap = new Dictionary { @@ -681,7 +691,12 @@ public static class NumbersWithUnitDefinitions { @"Kopiyka", @"KOPIYKA" }, { @"Tiyin", @"TIYIN" }, { @"Hào", @"HAO" }, - { @"Ngwee", @"NGWEE" } + { @"Ngwee", @"NGWEE" }, + { @"Kwartje", @"KWARTJE" }, + { @"Dubbeltje", @"DUBBELTJE" }, + { @"Stuiver", @"STUIVER" }, + { @"Millibitcoin", @"MILLIBITCOIN" }, + { @"Satoshi", @"SATOSHI" } }; public const string CompoundUnitConnectorRegex = @"(?en)"; public static readonly Dictionary CurrencyPrefixList = new Dictionary @@ -722,10 +737,11 @@ public static class NumbersWithUnitDefinitions { @"Samoan tālā", @"ws$" }, { @"Chinese yuan", @"¥" }, { @"Japanese yen", @"¥" }, - { @"Euro", @"€" }, + { @"Euro", @"€|eur" }, { @"Pound", @"£" }, { @"Costa Rican colón", @"₡" }, - { @"Turkish lira", @"₺" } + { @"Turkish lira", @"₺" }, + { @"Bitcoin", @"₿|btc|xbt" } }; public static readonly IList AmbiguousCurrencyUnitList = new List { @@ -775,5 +791,30 @@ public static class NumbersWithUnitDefinitions @"yer", @"yen" }; + public static readonly Dictionary AngleSuffixList = new Dictionary + { + { @"Degree", @"graad|graden|°" }, + { @"Radian", @"radiaal|radialen|rad" }, + { @"Turn", @"draai|draaien|slag|slagen" } + }; + public static readonly IList AmbiguousAngleUnitList = new List + { + @"draai", + @"draaien", + @"slag", + @"slagen" + }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"null", @"null" } + }; + public static readonly Dictionary TemperatureAmbiguityFiltersDict = new Dictionary + { + { @"\b(gra(ad|den)|°)$", @"\b((gra(ad|den)|°)\s*(draai(en|t)?|geroteerd|roterend|rotatie|hoek)|(draai(en|t)?|geroteerd|roterend|rotatie|hoek)(\s+(\p{L}+|\d+)){0,4}\s*(gra(ad|den)\b|°))" } + }; + public static readonly Dictionary DimensionAmbiguityFiltersDict = new Dictionary + { + { @"\b(gra(ad|den)|°)$", @"\b((gra(ad|den)|°)\s*(c(elsius)?|f(ah?renheit)?)|(temperatuur)(\s+(\p{L}+|\d+)){0,4}\s*(gra(ad|den)\b|°))" } + }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/QuotedTextDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/QuotedTextDefinitions.cs new file mode 100644 index 0000000000..1717c7b8e0 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/QuotedTextDefinitions.cs @@ -0,0 +1,35 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Dutch\Dutch-QuotedText.yaml +// - Language: Dutch +// - ClassName: QuotedTextDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Dutch +{ + using System; + using System.Collections.Generic; + + public static class QuotedTextDefinitions + { + public const string LangMarker = @"Dut"; + public const string QuotedTextRegex1 = @"(“([^“”]+)”)"; + public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)"; + public const string QuotedTextRegex3 = @"(„([^„”]+)”)"; + public const string QuotedTextRegex4 = @"(,([^,’']+)[’'])"; + public const string QuotedTextRegex5 = @"(""([^""]+)"")"; + public const string QuotedTextRegex6 = @"(\\'([^\']+)\\')"; + public const string QuotedTextRegex7 = @"(`([^`]+)`)"; + public const string QuotedTextRegex8 = @"(123456)"; + public const string QuotedTextRegex9 = @"(123456)"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/QuotedTextDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/QuotedTextDefinitions.tt new file mode 100644 index 0000000000..35a996ef90 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/QuotedTextDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Dutch\Dutch-QuotedText.yaml"; + this.Language = "Dutch"; + this.ClassName = "QuotedTextDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/TimeZoneDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/TimeZoneDefinitions.cs index 9ec3470843..8fcf17d1bd 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/TimeZoneDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/TimeZoneDefinitions.cs @@ -21,7 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Dutch public static class TimeZoneDefinitions { - public const string DirectUtcRegex = @"\b(utc|gmt)(\s*[+\-\u00B1]?\s*[\d]{1,2}(\s*:\s*[\d]{1,2})?)?\b"; + public const string DirectUtcRegex = @"\b(utc|gmt)(\s*[+\-\u00B1]?\s*[\d]{1,2}h?(\s*:\s*[\d]{1,2})?)?\b"; public static readonly IList AbbreviationsList = new List { @"AFT" @@ -30,7 +30,9 @@ public static class TimeZoneDefinitions { @"Afghanistan Standard Time" }; + public const string BaseTimeZoneSuffixRegex = @"((\s+|-)(friendly|compatible))?(\s+|-)time(zone)?"; public const string LocationTimeSuffixRegex = @"((\s+|-)(tijdzone|tijd|timezone|time)\b)"; + public static readonly string TimeZoneEndRegex = $@"({BaseTimeZoneSuffixRegex})$"; public static readonly IList AmbiguousTimezoneList = new List { @"bit", diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/ChoiceDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/English/ChoiceDefinitions.cs index d32fcf6412..833a9dd3cc 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/English/ChoiceDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/ChoiceDefinitions.cs @@ -23,7 +23,8 @@ public static class ChoiceDefinitions { public const string LangMarker = @"Eng"; public const string TokenizerRegex = @"[^\w\d]"; - public const string TrueRegex = @"\b(true|yes|yep|yup|yeah|y|sure|ok|agree)\b|(\uD83D\uDC4D|\uD83D\uDC4C|\u0001f44c)"; - public const string FalseRegex = @"\b(false|nope|nop|no|not\s+ok|disagree)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90|\u0001F44E|\u0001F590)"; + public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)"; + public static readonly string TrueRegex = $@"\b(true|yes|yep|yup|yeah|y|sure|ok|agree)\b|(\uD83D\uDC4D|\uD83D\uDC4C|\u0001f44c){SkinToneRegex}?"; + public static readonly string FalseRegex = $@"\b(false|nope|nop|no|not\s+ok|disagree)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90|\u0001F44E|\u0001F590){SkinToneRegex}?"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs index b4f3494768..f13dcec317 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs @@ -21,66 +21,85 @@ namespace Microsoft.Recognizers.Definitions.English public static class DateTimeDefinitions { + public const string LangMarker = @"Eng"; public const bool CheckBothBeforeAfter = false; - public static readonly string TillRegex = $@"(?\b(to|(un)?till?|thru|through)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; - public static readonly string RangeConnectorRegex = $@"(?\b(and|through|to)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; - public const string RelativeRegex = @"\b(?following|next|(up)?coming|this|last|past|previous|current|the)\b"; - public const string StrictRelativeRegex = @"\b(?following|next|(up)?coming|this|last|past|previous|current)\b"; + public static readonly string TillRegex = $@"(?\b(to|(un)?till?|thru|through)\b(\s+the\b)?|{BaseDateTime.RangeConnectorSymbolRegex})"; + public static readonly string RangeConnectorRegex = $@"(?\b(and|through|to)\b(\s+the\b)?|{BaseDateTime.RangeConnectorSymbolRegex})"; + public const string LastNegPrefix = @"(?following|next|(up)?coming|this|{LastNegPrefix}last|past|previous|current|the)\b"; + public static readonly string StrictRelativeRegex = $@"\b(?following|next|(up)?coming|this|{LastNegPrefix}last|past|previous|current)\b"; public const string UpcomingPrefixRegex = @"((this\s+)?((up)?coming))"; public static readonly string NextPrefixRegex = $@"\b(following|next|{UpcomingPrefixRegex})\b"; public const string AfterNextSuffixRegex = @"\b(after\s+(the\s+)?next)\b"; public const string PastPrefixRegex = @"((this\s+)?past)\b"; - public static readonly string PreviousPrefixRegex = $@"(last|previous|{PastPrefixRegex})\b"; + public static readonly string PreviousPrefixRegex = $@"({LastNegPrefix}last|previous|{PastPrefixRegex})\b"; public const string ThisPrefixRegex = @"(this|current)\b"; public const string RangePrefixRegex = @"(from|between)"; public const string CenturySuffixRegex = @"(^century)\b"; public const string ReferencePrefixRegex = @"(that|same)\b"; - public const string FutureSuffixRegex = @"\b(in\s+the\s+)?(future|hence)\b"; - public const string DayRegex = @"(the\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:th|nd|rd|st)?)(?=\b|t)"; + public const string FutureSuffixRegex = @"\b((in\s+the\s+)?future|hence)\b"; + public const string PastSuffixRegex = @"\b((in\s+the\s+)past)\b"; + public const string DayRegex = @"(the\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:th|nd|rd|st)?)(?=\b|t)"; public const string ImplicitDayRegex = @"(the\s*)?(?(?:3[0-1]|[0-2]?\d)(?:th|nd|rd|st))\b"; public const string MonthNumRegex = @"(?1[0-2]|(0)?[1-9])\b"; public const string WrittenOneToNineRegex = @"(?:one|two|three|four|five|six|seven|eight|nine)"; public const string WrittenElevenToNineteenRegex = @"(?:eleven|twelve|(?:thir|four|fif|six|seven|eigh|nine)teen)"; public const string WrittenTensRegex = @"(?:ten|twenty|thirty|fou?rty|fifty|sixty|seventy|eighty|ninety)"; public static readonly string WrittenNumRegex = $@"(?:{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s+{WrittenOneToNineRegex})?)"; - public static readonly string WrittenCenturyFullYearRegex = $@"(?:(one|two)\s+thousand(\s+and)?(\s+{WrittenOneToNineRegex}\s+hundred(\s+and)?)?)"; - public const string WrittenCenturyOrdinalYearRegex = @"(?:twenty(\s+(one|two))?|ten|eleven|twelve|thirteen|fifteen|eigthteen|(?:four|six|seven|nine)(teen)?|one|two|three|five|eight)"; - public static readonly string CenturyRegex = $@"\b(?{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}(\s+hundred)?(\s+and)?)\b"; - public static readonly string LastTwoYearNumRegex = $@"(?:zero\s+{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s+{WrittenOneToNineRegex})?)"; - public static readonly string FullTextYearRegex = $@"\b((?{CenturyRegex})\s+(?{LastTwoYearNumRegex})\b|\b(?{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}\s+hundred(\s+and)?))\b"; + public const string WrittenOneToNineOrdinalRegex = @"(?:first|second|third|fourth|fifth|sixth|seventh|eighth|nine?th)"; + public const string WrittenTensOrdinalRegex = @"(?:tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)"; + public static readonly string WrittenOrdinalRegex = $@"(?:{WrittenOneToNineOrdinalRegex}|{WrittenTensOrdinalRegex}|{WrittenTensRegex}\s+{WrittenOneToNineOrdinalRegex})"; + public static readonly string WrittenOrdinalDayRegex = $@"\b(the\s+)?(?(?{WrittenOneToNineOrdinalRegex}|(?:tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth)|(?:ten|twenty)\s+{WrittenOneToNineOrdinalRegex}|thirty\s+first))\b"; + public static readonly string WrittenCenturyFullYearRegex = $@"(?:(one|two)\s+thousand((\s+and)?\s+{WrittenOneToNineRegex}\s+hundred)?)"; + public const string WrittenCenturyOrdinalYearRegex = @"(?:twenty(\s+(one|two))?|ten|eleven|twelve|thirteen|fifteen|eighteen|(?:four|six|seven|nine)(teen)?|one|two|three|five|eight)"; + public static readonly string CenturyRegex = $@"\b(?{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}(\s+hundred)?)\b"; + public static readonly string LastTwoYearNumRegex = $@"(?:(zero\s+)?{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s+{WrittenOneToNineRegex})?)"; + public static readonly string FullTextYearRegex = $@"\b((?{CenturyRegex})(\s+and)?\s+(?{LastTwoYearNumRegex})\b|\b(?{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}\s+hundred))\b"; public const string OclockRegex = @"(?o\s*((’|‘|')\s*)?clock|sharp)"; public const string SpecialDescRegex = @"((?)p\b)"; + public const string TasksModeSpecialDescRegex = @"([0-9]+((?)p\b))"; public static readonly string AmDescRegex = $@"(?:{BaseDateTime.BaseAmDescRegex})"; public static readonly string PmDescRegex = $@"(:?{BaseDateTime.BasePmDescRegex})"; public static readonly string AmPmDescRegex = $@"(:?{BaseDateTime.BaseAmPmDescRegex})"; public static readonly string DescRegex = $@"(:?(:?({OclockRegex}\s+)?(?({AmPmDescRegex}|{AmDescRegex}|{PmDescRegex}|{SpecialDescRegex})))|{OclockRegex})"; - public static readonly string TwoDigitYearRegex = $@"\b(?([0-27-9]\d))(?!(\s*((\:)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b"; + public const string OfPrepositionRegex = @"(\bof\b)"; + public static readonly string TwoDigitYearRegex = $@"\b(?([0-9]\d))(?!(\s*((\:\d)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b"; public static readonly string YearRegex = $@"(?:{BaseDateTime.FourDigitYearRegex}|{FullTextYearRegex})"; public const string WeekDayRegex = @"\b(?(?:sun|mon|tues?|thurs?|fri)(day)?|thu|wedn(esday)?|weds?|sat(urday)?)s?\b"; - public const string SingleWeekDayRegex = @"\b(?sunday|saturday|(?:mon|tues?|thurs?|fri)(day)?|thu|wedn(esday)?|weds?|((?<=on\s+)(sat|sun)))\b"; - public static readonly string RelativeMonthRegex = $@"(?(of\s+)?{RelativeRegex}\s+month)\b"; - public const string WrittenMonthRegex = @"(((the\s+)?month of\s+)?(?apr(il)?|aug(ust)?|dec(ember)?|feb(ruary)?|jan(uary)?|july?|june?|mar(ch)?|may|nov(ember)?|oct(ober)?|sept(ember)?|sept?))"; + public const string SingleWeekDayRegex = @"\b(?(?((day\s+)?of\s+)?{RelativeRegex}\s+month)\b"; + public const string MonthRegexNoWordBoundary = @"(?apr(il)?|aug(ust)?|dec(ember)?|feb(ruary)?|jan(uary)?|july?|june?|mar(ch)?|may|nov(ember)?|oct(ober)?|sept(ember)?|sep)(?!\p{L})"; + public static readonly string MonthRegex = $@"\b{MonthRegexNoWordBoundary}"; + public static readonly string WrittenMonthRegex = $@"(((the\s+)?month of\s+)?{MonthRegex})"; public static readonly string MonthSuffixRegex = $@"(?(?:(in|of|on)\s+)?({RelativeMonthRegex}|{WrittenMonthRegex}))"; - public const string DateUnitRegex = @"(?decades?|years?|months?|weeks?|(?business\s+)?days?|fortnights?)\b"; + public const string DateUnitRegex = @"(?(decade|year|(?month|week|fortnight)|(?(business\s+|week\s*))?(?day)|fortnight|weekend)(?s)?|(?<=(^|\s)\d{1,4})[ymwd])\b"; public const string DateTokenPrefix = @"on "; public const string TimeTokenPrefix = @"at "; public const string TokenBeforeDate = @"on "; public const string TokenBeforeTime = @"at "; - public static readonly string SimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex}\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+{DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; - public static readonly string MonthFrontSimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((from)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; - public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+(between\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; - public static readonly string BetweenRegex = $@"\b(between\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b"; - public static readonly string MonthWithYear = $@"\b(({WrittenMonthRegex}[\.]?(\s*)[/\\\-\.,]?(\s+(of|in))?(\s*)({YearRegex}|(?following|next|last|this)\s+year))|(({YearRegex}|(?following|next|last|this)\s+year)(\s*),?(\s*){WrittenMonthRegex}))\b"; + public const string HalfTokenRegex = @"^(half)"; + public const string QuarterTokenRegex = @"^((a\s+)?quarter)"; + public const string ThreeQuarterTokenRegex = @"^(three\s+quarters?)"; + public const string ToTokenRegex = @"\b(to)$"; + public const string FromRegex = @"\b(from(\s+the)?)$"; + public const string BetweenTokenRegex = @"\b(between(\s+the)?)$"; + public static readonly string SimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?({DayRegex}|{WrittenOrdinalDayRegex})\s*{TillRegex}\s*(({DayRegex}|{WrittenOrdinalDayRegex})\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+({DayRegex}|{WrittenOrdinalDayRegex}))((\s+|\s*,\s*){YearRegex})?\b"; + public static readonly string MonthFrontSimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((from)\s+)?({DayRegex}|{WrittenOrdinalDayRegex})\s*{TillRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})((\s+|\s*,\s*){YearRegex})?\b"; + public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+(between\s+)({DayRegex}|{WrittenOrdinalDayRegex})\s*{RangeConnectorRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})((\s+|\s*,\s*){YearRegex})?\b"; + public static readonly string BetweenRegex = $@"\b(between\s+)({DayRegex}|{WrittenOrdinalDayRegex})\s*{RangeConnectorRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b"; + public static readonly string MonthWithYear = $@"\b((({WrittenMonthRegex}[\.]?|((the\s+)?(?first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|sixth|6th|seventh|7th|eighth|8th|ninth|9th|tenth|10th|eleventh|11th|twelfth|12th|last)\s+month(?=\s+(of|in))))((\s*)[/\\\-\.,]?(\s+(of|in))?(\s*)({YearRegex}|{TwoDigitYearRegex}|(?following|next|last|this)\s+year)|\s+(of|in)\s+{TwoDigitYearRegex}))|(({YearRegex}|(?following|next|last|this)\s+year)(\s*),?(\s*){WrittenMonthRegex}))\b"; public const string SpecialYearPrefixes = @"(calendar|(?fiscal|school))"; - public static readonly string OneWordPeriodRegex = $@"\b((((the\s+)?month of\s+)?({StrictRelativeRegex}\s+)?(?apr(il)?|aug(ust)?|dec(ember)?|feb(ruary)?|jan(uary)?|july?|june?|mar(ch)?|may|nov(ember)?|oct(ober)?|sept(ember)?|sept?))|(month|year) to date|({RelativeRegex}\s+)?(my\s+)?(week(end)?|month|(({SpecialYearPrefixes}\s+)?year))(?!((\s+of)?\s+\d+(?!({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex}))|\s+to\s+date))(\s+{AfterNextSuffixRegex})?)\b"; + public static readonly string OneWordPeriodRegex = $@"\b((((the\s+)?month of\s+)?({StrictRelativeRegex}\s+)?{MonthRegex})|(month|year) to date|(?((un)?till?|to)\s+date)|({RelativeRegex}\s+)?(my\s+)?((?working\s+week|workweek)|week(end)?|month|fortnight|(({SpecialYearPrefixes}\s+)?year))(?!((\s+of)?\s+\d+(?!({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex}))|\s+to\s+date))(\s+{AfterNextSuffixRegex})?)\b"; public static readonly string MonthNumWithYear = $@"\b(({BaseDateTime.FourDigitYearRegex}(\s*)[/\-\.](\s*){MonthNumRegex})|({MonthNumRegex}(\s*)[/\-](\s*){BaseDateTime.FourDigitYearRegex}))\b"; public static readonly string WeekOfMonthRegex = $@"\b(?(the\s+)?(?first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|last)\s+week\s+{MonthSuffixRegex}(\s+{BaseDateTime.FourDigitYearRegex}|{RelativeRegex}\s+year)?)\b"; public static readonly string WeekOfYearRegex = $@"\b(?(the\s+)?(?first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|last)\s+week(\s+of)?\s+({YearRegex}|{RelativeRegex}\s+year))\b"; + public static readonly string OfYearRegex = $@"\b((of|in)\s+({YearRegex}|{StrictRelativeRegex}\s+year))\b"; + public const string FirstLastRegex = @"\b(the\s+)?((?first)|(?last))\b"; public static readonly string FollowedDateUnit = $@"^\s*{DateUnitRegex}"; public static readonly string NumberCombinedWithDateUnit = $@"\b(?\d+(\.\d*)?){DateUnitRegex}"; public const string QuarterTermRegex = @"\b(((?first|1st|second|2nd|third|3rd|fourth|4th)[ -]+quarter)|(q(?[1-4])))\b"; - public static readonly string RelativeQuarterTermRegex = $@"\b(?{StrictRelativeRegex})\s+quarter\b"; - public static readonly string QuarterRegex = $@"((the\s+)?{QuarterTermRegex}(?:(\s+of|\s*,\s*)?\s+({YearRegex}|{RelativeRegex}\s+year))?)|{RelativeQuarterTermRegex}"; + public static readonly string RelativeQuarterTermRegex = $@"\b(?{StrictRelativeRegex})\s+((?[\w,]+)\s+)?quarters?\b"; + public static readonly string QuarterRegex = $@"((the\s+)?{QuarterTermRegex}(?:((\s+of)?\s+|\s*[,-]\s*)({YearRegex}|{RelativeRegex}\s+year))?)|{RelativeQuarterTermRegex}"; public static readonly string QuarterRegexYearFront = $@"(?:{YearRegex}|{RelativeRegex}\s+year)('s)?(?:\s*-\s*|\s+(the\s+)?)?{QuarterTermRegex}"; public const string HalfYearTermRegex = @"(?first|1st|second|2nd)\s+half"; public static readonly string HalfYearFrontRegex = $@"(?((1[5-9]|20)\d{{2}})|2100)(\s*-\s*|\s+(the\s+)?)?h(?[1-2])"; @@ -91,58 +110,59 @@ public static class DateTimeDefinitions public const string MidPrefixRegex = @"\b(?mid-?|middle of)\b"; public const string LaterPrefixRegex = @"\b(?late|end of|(?later(\s+in)?))\b"; public static readonly string PrefixPeriodRegex = $@"({EarlyPrefixRegex}|{MidPrefixRegex}|{LaterPrefixRegex})"; - public const string PrefixDayRegex = @"\b((?early)|(?mid(dle)?)|(?later?))(\s+in)?(\s+the\s+day)?$"; + public const string PrefixDayRegex = @"\b((?earl(y|ier))|(?mid(dle)?)|(?later?))(\s+in)?(\s+the\s+day)?$"; public const string SeasonDescRegex = @"(?spring|summer|fall|autumn|winter)"; public static readonly string SeasonRegex = $@"\b(?({PrefixPeriodRegex}\s+)?({RelativeRegex}\s+)?{SeasonDescRegex}((\s+of|\s*,\s*)?\s+({YearRegex}|{RelativeRegex}\s+year))?)\b"; - public const string WhichWeekRegex = @"\b(week)(\s*)(?5[0-3]|[1-4]\d|0?[1-9])\b"; - public const string WeekOfRegex = @"(the\s+)?(week)(\s+of)(\s+the)?"; + public static readonly string WhichWeekRegex = $@"\b(week)(\s*)(?5[0-3]|[1-4]\d|0?[1-9])(\s+of\s+({YearRegex}|{RelativeRegex}\s+year))?\b"; + public const string WeekOfRegex = @"(the\s+)?((week)(\s+(of|(commencing|starting|beginning)(\s+on)?))|w/c)(\s+the)?"; public const string MonthOfRegex = @"(month)(\s*)(of)"; - public const string MonthRegex = @"(?apr(il)?|aug(ust)?|dec(ember)?|feb(ruary)?|jan(uary)?|july?|june?|mar(ch)?|may|nov(ember)?|oct(ober)?|sept(ember)?|sept?)"; - public static readonly string DateYearRegex = $@"(?{BaseDateTime.FourDigitYearRegex}|{TwoDigitYearRegex})"; - public static readonly string YearSuffix = $@"(,?\s*({DateYearRegex}|{FullTextYearRegex}))"; + public static readonly string DateYearRegex = $@"(?{BaseDateTime.FourDigitYearRegex}|(?(3[0-1]|[0-2]?\d)(?:th|nd|rd|st))s?)\b"; public const string PrefixWeekDayRegex = @"(\s*((,?\s*on)|[-—–]))"; public static readonly string ThisRegex = $@"\b(this(\s*week{PrefixWeekDayRegex}?)?\s*{WeekDayRegex})|({WeekDayRegex}((\s+of)?\s+this\s*week))\b"; public static readonly string LastDateRegex = $@"\b({PreviousPrefixRegex}(\s*week{PrefixWeekDayRegex}?)?\s*{WeekDayRegex})|({WeekDayRegex}(\s+(of\s+)?last\s*week))\b"; public static readonly string NextDateRegex = $@"\b({NextPrefixRegex}(\s*week{PrefixWeekDayRegex}?)?\s*{WeekDayRegex})|((on\s+)?{WeekDayRegex}((\s+of)?\s+(the\s+following|(the\s+)?next)\s*week))\b"; - public static readonly string SpecialDayRegex = $@"\b((the\s+)?day before yesterday|(the\s+)?day after (tomorrow|tmr)|the\s+day\s+(before|after)(?!=\s+day)|((the\s+)?({RelativeRegex}|my)\s+day)|yesterday|tomorrow|tmr|today)\b"; - public static readonly string SpecialDayWithNumRegex = $@"\b((?{WrittenNumRegex})\s+days?\s+from\s+(?yesterday|tomorrow|tmr|today))\b"; + public static readonly string SpecialDayRegex = $@"\b((the\s+)?day before yesterday|(the\s+)?day after (tomorrow|tmrw?)|the\s+day\s+(before|after)(?!=\s+day)|((the\s+)?({RelativeRegex}|my)\s+day)|yesterday|tomorrow|tmrw?|today|otd|current date)\b"; + public static readonly string SpecialDayWithNumRegex = $@"\b((?{WrittenNumRegex})\s+days?\s+from\s+(?yesterday|tomorrow|tmrw?|today|current date))\b"; public static readonly string RelativeDayRegex = $@"\b(((the\s+)?{RelativeRegex}\s+day))\b"; public const string SetWeekDayRegex = @"\b(?on\s+)?(?morning|afternoon|evening|night|(sun|mon|tues|wednes|thurs|fri|satur)day)s\b"; - public static readonly string WeekDayOfMonthRegex = $@"(?(the\s+)?(?first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|last)\s+{WeekDayRegex}\s+{MonthSuffixRegex})"; + public static readonly string WeekDayOfMonthRegex = $@"(?(the\s+)?(?first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|last)\s+(week\s+{MonthSuffixRegex}[\.]?\s+(on\s+)?{WeekDayRegex}|{WeekDayRegex}\s+{MonthSuffixRegex}))"; public static readonly string RelativeWeekDayRegex = $@"\b({WrittenNumRegex}\s+{WeekDayRegex}\s+(from\s+now|later))\b"; public static readonly string SpecialDate = $@"(?=\b(on|at)\s+the\s+){DayRegex}\b"; public const string DatePreposition = @"\b(on|in)"; - public static readonly string DateExtractorYearTermRegex = $@"(\s+|\s*,\s*|\s+of\s+){DateYearRegex}"; - public static readonly string DateExtractor1 = $@"\b({WeekDayRegex}\s*[,-]?\s*)?(({MonthRegex}[\.]?\s*[/\\.,-]?\s*{DayRegex})|(\({MonthRegex}\s*[-.]\s*{DayRegex}\)))(\s*\(\s*{WeekDayRegex}\s*\))?({DateExtractorYearTermRegex}\b)?"; - public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}[\.]?(\s+|\s*,\s*|\s+of\s+|\s*-\s*){MonthRegex}[\.]?((\s+|\s*,\s*|\s+in\s+){DateYearRegex})?\b"; + public static readonly string DateExtractorYearTermRegex = $@"(\s+|\s*[/\\.,-]\s*|\s+of\s+){DateYearRegex}"; + public static readonly string DayPrefix = $@"\b({WeekDayRegex}|{SpecialDayRegex})\b"; + public static readonly string DateExtractor1 = $@"\b({DayPrefix}\s*[,-]?\s*)?(({MonthRegex}[\.]?\s*[/\\.,-]?\s*{DayRegex})|(\({MonthRegex}\s*[-./]\s*{DayRegex}\)))(?!\s*\-\s*\d{{2}}\b)(\s*\(\s*{DayPrefix}\s*\))?({DateExtractorYearTermRegex}\b)?"; + public static readonly string DateExtractor3 = $@"\b({DayPrefix}(\s+|\s*,\s*))?({DayRegex}?[\.]?(\s+|\s*[-,/]\s*|\s+of\s+|\s*)(\b)?{MonthRegexNoWordBoundary}[\.]?((\s+in)?{DateExtractorYearTermRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[-./]?\s*(the\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:th|nd|rd|st)?)[\.]?(\s+|\s*[-,/]\s*|\s+of\s+){MonthRegex}[\.]?)\b"; public static readonly string DateExtractor4 = $@"\b{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}[\.]?\s*[/\\\-]\s*{DateYearRegex}"; - public static readonly string DateExtractor5 = $@"\b{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}"; - public static readonly string DateExtractor6 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({WeekDayRegex}\s+)?{MonthNumRegex}[\-\.]{DayRegex}(?![%])\b"; - public static readonly string DateExtractor7L = $@"\b({WeekDayRegex}\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b"; - public static readonly string DateExtractor7S = $@"\b({WeekDayRegex}\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%])\b"; - public static readonly string DateExtractor8 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({WeekDayRegex}\s+)?{DayRegex}[\\\-]{MonthNumRegex}(?![%])\b"; - public static readonly string DateExtractor9L = $@"\b({WeekDayRegex}\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\b"; - public static readonly string DateExtractor9S = $@"\b({WeekDayRegex}\s+)?{DayRegex}\s*/\s*{MonthNumRegex}(?![%])\b"; - public static readonly string DateExtractorA = $@"\b({WeekDayRegex}\s+)?{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DayRegex}"; - public static readonly string OfMonth = $@"^\s*of\s*{MonthRegex}"; + public static readonly string DateExtractor5 = $@"\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; + public static readonly string DateExtractor6 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({DayPrefix}\s+)?{MonthNumRegex}[\-\.]{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor7L = $@"\b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b"; + public static readonly string DateExtractor7S = $@"\b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor8 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({DayPrefix}\s+)?{DayRegex}[\\\-]{MonthNumRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor9L = $@"\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\b"; + public static readonly string DateExtractor9S = $@"\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}(?![%])\b"; + public const string DateExtractorNoSep = @"\b((?((1\d|20)\d{2})|2100)(\s+(?1[0-2]|(0)?[1-9])\s+(?(?:3[0-1]|[1-2]\d|0?[1-9]))|(?1[0-2]|(0)?[1-9])(?(?:3[0-1]|[1-2]\d|0?[1-9])))\b)"; + public static readonly string DateExtractorA = $@"\b({DayPrefix}(\s*,)?\s+)?(({BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DayRegex})|({MonthRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*(the\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:th|nd|rd|st)?))|({DayRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{MonthRegex})|{DateExtractorNoSep})"; + public static readonly string OfMonth = $@"^(\s*(day\s+)?of)?\s*{MonthRegex}"; public static readonly string MonthEnd = $@"{MonthRegex}\s*(the)?\s*$"; public static readonly string WeekDayEnd = $@"(this\s+)?{WeekDayRegex}\s*,?\s*$"; - public const string WeekDayStart = @"^[\.]"; - public const string RangeUnitRegex = @"\b(?years?|months?|weeks?)\b"; + public static readonly string WeekDayStart = $@"^\s+(on\s+)?{WeekDayRegex}\b"; + public const string RangeUnitRegex = @"\b(?years?|months?|weeks?|fortnights?)\b"; public const string HourNumRegex = @"\b(?zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\b"; - public const string MinuteNumRegex = @"(?ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)?|twenty|thirty|forty|fifty|one|two|three|five|eight)"; - public const string DeltaMinuteNumRegex = @"(?ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)?|twenty|thirty|forty|fifty|one|two|three|five|eight)"; - public const string PmRegex = @"(?(((?:at|in|around|on|for)\s+(the\s+)?)?(afternoon|evening|midnight|lunchtime))|((at|in|around|on|for)\s+(the\s+)?night))"; - public const string PmRegexFull = @"(?((?:at|in|around|on|for)\s+(the\s+)?)?(afternoon|evening|(mid)?night|lunchtime))"; - public const string AmRegex = @"(?((?:at|in|around|on|for)\s+(the\s+)?)?(morning))"; + public const string MinuteNumRegex = @"(((?twenty|thirty|fou?rty|fifty)(\s*-?\s*))?(?one|two|three|four|five|six|seven|eight|nine)|(?ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)|twenty|thirty|forty|fifty))"; + public const string DeltaMinuteNumRegex = @"(((?twenty|thirty|fou?rty|fifty)(\s*-?\s*))?(?one|two|three|four|five|six|seven|eight|nine)|(?ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)|twenty|thirty|forty|fifty))"; + public const string PmRegex = @"(?(((?:at|in|around|circa|on|for)\s+(the\s+)?)?(((early|late)\s+)?(afternoon|evening)|midnight|lunchtime))|((at|in|around|on|for)\s+(the\s+)?night))"; + public const string PmRegexFull = @"(?((?:at|in|around|circa|on|for)\s+(the\s+)?)?(((early|late)\s+)?(afternoon|evening)|(mid)?night|lunchtime))"; + public const string AmRegex = @"(?((?:at|in|around|circa|on|for)\s+(the\s+)?)?((early|late)\s+)?(morning))"; public const string LunchRegex = @"\blunchtime\b"; public const string NightRegex = @"\b(mid)?night\b"; public const string CommonDatePrefixRegex = @"^[\.]"; - public static readonly string LessThanOneHour = $@"(?(a\s+)?quarter|three quarter(s)?|half( an hour)?|{BaseDateTime.DeltaMinuteRegex}(\s+(minutes?|mins?))|{DeltaMinuteNumRegex}(\s+(minutes?|mins?)))"; - public static readonly string WrittenTimeRegex = $@"(?{HourNumRegex}\s+({MinuteNumRegex}|(?twenty|thirty|fou?rty|fifty)\s+{MinuteNumRegex}))"; - public static readonly string TimePrefix = $@"(?({LessThanOneHour} past|{LessThanOneHour} to))"; + public static readonly string LessThanOneHour = $@"(?(a\s+)?quarter|three quarter(s)?|half( an hour)?|{BaseDateTime.DeltaMinuteRegex}(\s+(minutes?|mins?)|(?=\s+past))|{DeltaMinuteNumRegex}(\s+(minutes?|mins?)|(?=\s+past)))"; + public static readonly string WrittenTimeRegex = $@"(?{HourNumRegex}\s+{MinuteNumRegex}(\s+(minutes?|mins?))?)"; + public static readonly string TimePrefix = $@"(?{LessThanOneHour}\s+(past|to))"; public static readonly string TimeSuffix = $@"(?{AmRegex}|{PmRegex}|{OclockRegex})"; public static readonly string TimeSuffixFull = $@"(?{AmRegex}|{PmRegexFull}|{OclockRegex})"; public static readonly string BasicTime = $@"\b(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex}(?![%\d]))"; @@ -151,9 +171,9 @@ public static class DateTimeDefinitions public const string MidafternoonRegex = @"(?mid\s*(-\s*)?afternoon)"; public const string MiddayRegex = @"(?mid\s*(-\s*)?day|((12\s)?noon))"; public static readonly string MidTimeRegex = $@"(?({MidnightRegex}|{MidmorningRegex}|{MidafternoonRegex}|{MiddayRegex}))"; - public static readonly string AtRegex = $@"\b(?:(?:(?<=\bat\s+)(?:{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\.\d)(\s*((?a)|(?p)))?|{MidTimeRegex}))|{MidTimeRegex})\b"; + public static readonly string AtRegex = $@"\b(?:(?:(?<=\b(at|(at)?\s*around|circa)\s+)(?:{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\.\d)(\s*((?a)|(?p)))?|{MidTimeRegex}))|{MidTimeRegex})\b"; public static readonly string IshRegex = $@"\b({BaseDateTime.HourRegex}(-|——)?ish|noon(ish)?)\b"; - public const string TimeUnitRegex = @"([^A-Za-z]{1,}|\b)(?h(ou)?rs?|h|min(ute)?s?|sec(ond)?s?)\b"; + public const string TimeUnitRegex = @"([^a-z]{1,}|\b)(?(h(ou)?r|min(ute)?|sec(ond)?)(?s)?|h)\b"; public const string RestrictedTimeUnitRegex = @"(?hour|minute)\b"; public const string FivesRegex = @"(?(?:fifteen|(?:twen|thir|fou?r|fif)ty(\s*five)?|ten|five))\b"; public static readonly string HourRegex = $@"\b{BaseDateTime.HourRegex}"; @@ -165,99 +185,115 @@ public static class DateTimeDefinitions public static readonly string TimeRegex3 = $@"(\b{TimePrefix}\s+)?{BaseDateTime.HourRegex}\.{BaseDateTime.MinuteRegex}(\s*{DescRegex})"; public static readonly string TimeRegex4 = $@"\b{TimePrefix}\s+{BasicTime}(\s*{DescRegex})?\s+{TimeSuffix}\b"; public static readonly string TimeRegex5 = $@"\b{TimePrefix}\s+{BasicTime}((\s*{DescRegex})|\b)"; - public static readonly string TimeRegex6 = $@"{BasicTime}(\s*{DescRegex})?\s+{TimeSuffix}\b"; + public static readonly string TimeRegex6 = $@"({BasicTime})(\s*{DescRegex})?\s+{TimeSuffix}\b"; public static readonly string TimeRegex7 = $@"\b{TimeSuffixFull}\s+(at\s+)?{BasicTime}((\s*{DescRegex})|\b)"; public static readonly string TimeRegex8 = $@".^"; public static readonly string TimeRegex9 = $@"\b{PeriodHourNumRegex}(\s+|-){FivesRegex}((\s*{DescRegex})|\b)"; public static readonly string TimeRegex10 = $@"\b({TimePrefix}\s+)?{BaseDateTime.HourRegex}(\s*h\s*){BaseDateTime.MinuteRegex}(\s*{DescRegex})?"; - public static readonly string TimeRegex11 = $@"\b(?:(?:{TimeTokenPrefix}{TimeRegexWithDotConnector})(?!\s*per\s*cent|%)|(?:{TimeRegexWithDotConnector}(\s*{DescRegex})))"; + public static readonly string TimeRegex11 = $@"\b((?:({TimeTokenPrefix})?{TimeRegexWithDotConnector}(\s*{DescRegex}))|(?:(?:{TimeTokenPrefix}{TimeRegexWithDotConnector})(?!\s*per\s*cent|%)))"; public static readonly string FirstTimeRegexInTimeRange = $@"\b{TimeRegexWithDotConnector}(\s*{DescRegex})?"; public static readonly string PureNumFromTo = $@"({RangePrefixRegex}\s+)?({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?\s*{TillRegex}\s*({HourRegex}|{PeriodHourNumRegex})(?\s*({PmRegex}|{AmRegex}|{DescRegex}))?"; public static readonly string PureNumBetweenAnd = $@"(between\s+)(({BaseDateTime.TwoDigitHourRegex}{BaseDateTime.TwoDigitMinuteRegex})|{HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?\s*{RangeConnectorRegex}\s*(({BaseDateTime.TwoDigitHourRegex}{BaseDateTime.TwoDigitMinuteRegex})|{HourRegex}|{PeriodHourNumRegex})(?\s*({PmRegex}|{AmRegex}|{DescRegex}))?"; public static readonly string SpecificTimeFromTo = $@"({RangePrefixRegex}\s+)?(?(({TimeRegex2}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))\s*{TillRegex}\s*(?(({TimeRegex2}|{TimeRegexWithDotConnector}(?\s*{DescRegex}))|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))"; public static readonly string SpecificTimeBetweenAnd = $@"(between\s+)(?(({TimeRegex2}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))\s*{RangeConnectorRegex}\s*(?(({TimeRegex2}|{TimeRegexWithDotConnector}(?\s*{DescRegex}))|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))"; public const string SuffixAfterRegex = @"\b(((at)\s)?(or|and)\s+(above|after|later|greater)(?!\s+than))\b"; - public const string PrepositionRegex = @"(?^(at|on|of)(\s+the)?$)"; - public const string LaterEarlyRegex = @"((?early(\s+|-))|(?late(r?\s+|-)))"; - public const string MealTimeRegex = @"\b(?breakfast|brunch|lunch(time)?|dinner(time)?|supper)\b"; + public const string PrepositionRegex = @"(?^(,\s*)?(at|on|of)(\s+the)?$)"; + public const string LaterEarlyRegex = @"((?earl(y|ier)(\s+|-))|(?late(r?\s+|-)))"; + public const string MealTimeRegex = @"\b(at\s+)?(?breakfast|brunch|lunch(\s*time)?|dinner(\s*time)?|supper)\b"; public static readonly string UnspecificTimePeriodRegex = $@"({MealTimeRegex})"; - public static readonly string TimeOfDayRegex = $@"\b(?((((in\s+(the)?\s+)?{LaterEarlyRegex}?(in\s+(the)?\s+)?(morning|afternoon|night|evening)))|{MealTimeRegex}|(((in\s+(the)?\s+)?)(daytime|business\s+hour)))s?)\b"; - public static readonly string SpecificTimeOfDayRegex = $@"\b(({StrictRelativeRegex}\s+{TimeOfDayRegex})\b|\btonight)s?\b"; + public static readonly string TimeOfDayRegex = $@"\b(?((((in\s+the\s+){LaterEarlyRegex}?(morning|afternoon|night(-?time)?|evening)s)|((in\s+the\s+)?{LaterEarlyRegex}?(in(\s+the)?\s+)?(morning|afternoon|night(-?time)?|evening)))|{MealTimeRegex}|(((in\s+(the)?\s+)?)(daytime|business\s+hours?))))\b"; + public static readonly string SpecificTimeOfDayRegex = $@"\b(({StrictRelativeRegex}\s+{TimeOfDayRegex})\b|\b(?toni(ght|te)))s?\b"; public static readonly string TimeFollowedUnit = $@"^\s*{TimeUnitRegex}"; public static readonly string TimeNumberCombinedWithUnit = $@"\b(?\d+(\.\d*)?){TimeUnitRegex}"; public static readonly string[] BusinessHourSplitStrings = { @"business", @"hour" }; - public const string NowRegex = @"\b(?(right\s+)?now|as soon as possible|asap|recently|previously)\b"; + public const string NowRegex = @"\b(?(right\s+)?now|as\s+soon\s+as\s+possible|asap|recently|previously|at\s+(present|this\s+time|th(e|is)\s+minute|the\s+(moment|present\s+time)))\b"; + public static readonly string NowParseRegex = $@"\b({NowRegex}|^(date)$)\b"; public const string SuffixRegex = @"^\s*(in the\s+)?(morning|afternoon|evening|night)\b"; public const string NonTimeContextTokens = @"(building)"; - public const string DateTimeTimeOfDayRegex = @"\b(?morning|afternoon|night|evening)\b"; - public static readonly string DateTimeSpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{DateTimeTimeOfDayRegex})\b|\btonight)\b"; + public const string DateTimeTimeOfDayRegex = @"\b(?morning|(?afternoon|night|evening))\b"; + public static readonly string DateTimeSpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{DateTimeTimeOfDayRegex})\b|\btoni(ght|te))\b"; public static readonly string TimeOfTodayAfterRegex = $@"^\s*(,\s*)?(in\s+)?{DateTimeSpecificTimeOfDayRegex}"; - public static readonly string TimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(at|around|in|on))?\s*$"; + public static readonly string TimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(at|around|circa|in|on))?\s*$"; public static readonly string SimpleTimeOfTodayAfterRegex = $@"(?(?tonight))))\b"; + public static readonly string PeriodSpecificTimeOfDayRegex = $@"\b({LaterEarlyRegex}?this\s+{DateTimeTimeOfDayRegex}|({StrictRelativeRegex}\s+{PeriodTimeOfDayRegex})\b|\b(?toni(ght|te)))\b"; public static readonly string PeriodTimeOfDayWithDateRegex = $@"\b(({PeriodTimeOfDayRegex}(\s+(on|of))?))\b"; + public static readonly string TasksmodeMealTimeofDayRegex = $@"\b((in\s+(the)?\s+)?((?earl(y|ier)(\s+|-))|(?late(r?\s+|-)))?((this\s+)?\b(?lunch(\s*time)?|dinner(\s*time)?|brunch|breakfast)\b))\b"; public const string LessThanRegex = @"\b(less\s+than)\b"; public const string MoreThanRegex = @"\b(more\s+than)\b"; - public static readonly string DurationUnitRegex = $@"(?{DateUnitRegex}|h(ou)?rs?|h|min(ute)?s?|sec(ond)?s?)\b"; + public static readonly string DurationUnitRegex = $@"(?{DateUnitRegex}|h(ou)?rs?|h|min(ute)?s?|sec(ond)?s?|nights?)\b"; public const string SuffixAndRegex = @"(?\s*(and)\s+(an?\s+)?(?half|quarter))"; - public const string PeriodicRegex = @"\b(?daily|monthly|weekly|biweekly|yearly|annual(ly)?)\b"; - public static readonly string EachUnitRegex = $@"(?(each|every|once an?)(?\s+other)?\s*{DurationUnitRegex})"; - public const string EachPrefixRegex = @"\b(?(each|(every)|once an?)\s*$)"; - public const string SetEachRegex = @"\b(?(each|(every))\s*)"; - public const string SetLastRegex = @"(?following|next|upcoming|this|last|past|previous|current)"; - public const string EachDayRegex = @"^\s*(each|every)\s*day\b"; + public const string PeriodicRegex = @"\b(?((?semi|bi|tri)(\s*|-))?(daily|monthly|weekly|quarterly|yearly|annual(ly)?))\b"; + public static readonly string EachUnitRegex = $@"\b(?(every|(each|any|once an|one a|once a)\s?)(?\s+(other|alternate|second))?\s*({DurationUnitRegex}|(?quarters?|weekends?)|{WeekDayRegex})|(?weekends))"; + public const string EachPrefixRegex = @"\b(?(each|every|once an?)\s*$)"; + public const string SetEachRegex = @"\b(?(each|every)(?\s+(other|alternate))?\s*)(?!the|that)\b"; + public static readonly string SetLastRegex = $@"(?following|next|upcoming|this|{LastNegPrefix}last|past|previous|current)"; + public const string EachDayRegex = @"\s*((each|every)\s*day)|daily\b"; public static readonly string DurationFollowedUnit = $@"(^\s*{DurationUnitRegex}\s+{SuffixAndRegex})|(^\s*{SuffixAndRegex}?(\s+|-)?{DurationUnitRegex})"; public static readonly string NumberCombinedWithDurationUnit = $@"\b(?\d+(\.\d*)?)(-)?{DurationUnitRegex}"; - public static readonly string AnUnitRegex = $@"(\b((?(half)\s+)?an?|another)|(?(½|half)))\s+{DurationUnitRegex}"; - public const string DuringRegex = @"\b(for|during)\s+the\s+(?year|month|week|day)\b"; - public const string AllRegex = @"\b(?(all|full|whole)(\s+|-)(?year|month|week|day))\b"; - public const string HalfRegex = @"((an?\s*)|\b)(?half\s+(?year|month|week|day|hour))\b"; + public static readonly string AnUnitRegex = $@"(\b((?(half)\s+)?an?|another)|(?(1/2|½|half)))\s+{DurationUnitRegex}"; + public const string DuringRegex = @"\b(for|during)\s+the\s+(?year|month|week|day|fortnight)\b"; + public const string AllRegex = @"\b(?(all|full|whole)(\s+|-)(?year|month|week|day|fortnight))\b"; + public const string HalfRegex = @"((an?\s*)|\b)(?half\s+(?year|month|week|fortnight|day|hour))\b"; public const string ConjunctionRegex = @"\b((and(\s+for)?)|with)\b"; - public static readonly string HolidayRegex1 = $@"\b(?mardi gras|(washington|mao)'s birthday|chinese new year|(new\s+(years'|year\s*'s|years?)\s+eve)|(new\s+(years'|year\s*'s|years?)(\s+day)?)|may\s*day|yuan dan|christmas eve|(christmas|xmas)(\s+day)?|black friday|yuandan|easter(\s+(sunday|saturday|monday))?|clean monday|ash wednesday|palm sunday|maundy thursday|good friday|white\s+(sunday|monday)|trinity sunday|pentecost|corpus christi|cyber monday)(\s+(of\s+)?({YearRegex}|{RelativeRegex}\s+year))?\b"; - public static readonly string HolidayRegex2 = $@"\b(?(thanks\s*giving|all saint's|white lover|s(?:ain)?t?. (?:patrick|george)(?:')?(?:s)?|us independence|all hallow|all souls|guy fawkes|cinco de mayo|halloween|qingming|dragon boat|april fools|tomb\s*sweeping)(\s+day)?)(\s+(of\s+)?({YearRegex}|{RelativeRegex}\s+year))?\b"; - public static readonly string HolidayRegex3 = $@"(?(?:independence|presidents(?:')?|mlk|martin luther king( jr)?|canberra|ascension|columbus|tree( planting)?|arbor|labou?r|(international|int'l)\s+workers'?|mother's|mothers?|father's|fathers?|female|women('s)?|single|teacher'?s|youth|children|girls|lovers?|earth|inauguration|groundhog|valentine'?s|baptiste|bastille|veterans(?:')?|memorial|mid[ \-]autumn|moon|spring|lantern)\s+day)(\s+(of\s+)?({YearRegex}|{RelativeRegex}\s+year))?"; + public const string HolidayList1 = @"(?mardi gras|(washington|mao)'s birthday|juneteenth|(jubilee|freedom)(\s+day)|chinese new year|(new\s+(years'|year\s*'s|years?)\s+eve)|(new\s+(years'|year\s*'s|years?)(\s+day)?)|may\s*day|yuan dan|christmas eve|(christmas|xmas)(\s+day)?|black friday|yuandan|easter(\s+(sunday|saturday|monday))?|clean monday|ash wednesday|palm sunday|maundy thursday|good friday|white\s+(sunday|monday)|trinity sunday|pentecost|corpus christi|cyber monday)"; + public const string HolidayList2 = @"(?(thanks\s*giving|all saint's|white lover|s(?:ain)?t?(\.)?\s+(?:patrick|george)(?:')?(?:s)?|us independence|all hallow|all souls|guy fawkes|cinco de mayo|halloween|qingming|dragon boat|april fools|tomb\s*sweeping)(\s+day)?)"; + public const string HolidayList3 = @"(?(?:independence|presidents(?:')?|mlk|martin luther king( jr)?|canberra|ascension|columbus|tree( planting)?|arbor|labou?r|((international|int'?l)\s+)?workers'?|mother'?s?|father'?s?|female|women('s)?|single|teacher'?s|youth|children|girls|lovers?|earth|inauguration|groundhog|valentine'?s|baptiste|bastille|veterans(?:')?|memorial|mid[ \-]autumn|moon|spring|lantern)\s+day)"; + public const string HolidayList4 = @"(?ramad(h)?an|ram(a)?zan|ramathan|eid al(-|\s+)adha|eid al(-|\s+)azha|eidul(-|\s+)azha|feast of the sacrifice|(islamic|arabic|hijri) new year|eid al(-|\s+)fitr|festival of breaking the fast)"; + public static readonly string HolidayRegex = $@"\b(({StrictRelativeRegex}\s+({HolidayList1}|{HolidayList2}|{HolidayList3}|{HolidayList4}))|(?((the\s+)?weekend\s+of\s+)({HolidayList1}|{HolidayList2}|{HolidayList3}|{HolidayList4})(\s+((of\s+)?({YearRegex}|{RelativeRegex}\s+year)))?)|(({HolidayList1}|{HolidayList2}|{HolidayList3}|{HolidayList4})((?(\s+weekend)(\s+((of\s+)?({YearRegex}|{RelativeRegex}\s+year)))?)|(\s+(of\s+)?({YearRegex}|{RelativeRegex}\s+year)(?\s+weekend)?))?))\b"; + public static readonly string TasksModeHolidayListSupression = $@"(?(?:independence|teacher'?s|youth|children|girls)\s+day)|(?ramad(h)?an|ram(a)?zan|ramathan|eid al(-|\s+)adha|eid al(-|\s+)azha|eidul(-|\s+)azha|feast of the sacrifice|(islamic|arabic|hijri) new year|eid al(-|\s+)fitr|festival of breaking the fast)\b"; public const string AMTimeRegex = @"(?morning)"; public const string PMTimeRegex = @"\b(?afternoon|evening|night)\b"; + public const string NightTimeRegex = @"(night)"; + public const string NowTimeRegex = @"(now|at\s+(present|this\s+time|th(e|is)\s+minute|the\s+(moment|(current|present)\s+time)))"; + public const string RecentlyTimeRegex = @"(recently|previously)"; + public const string AsapTimeRegex = @"(as soon as possible|asap)"; public const string InclusiveModPrepositions = @"(?((on|in|at)\s+or\s+)|(\s+or\s+(on|in|at)))"; + public const string AroundRegex = @"(?:\b(?:around|circa)\s*?\b)(\s+the)?"; public static readonly string BeforeRegex = $@"((\b{InclusiveModPrepositions}?(?:before|in\s+advance\s+of|prior\s+to|(no\s+later|earlier|sooner)\s+than|ending\s+(with|on)|by|(un)?till?|(?as\s+late\s+as)){InclusiveModPrepositions}?\b\s*?)|(?)((?<\s*=)|<))(\s+the)?"; - public static readonly string AfterRegex = $@"((\b{InclusiveModPrepositions}?((after|(starting|beginning)(\s+on)?(?!\sfrom)|(?>\s*=)|>))(\s+the)?"; - public const string SinceRegex = @"(?:(?:\b(?:since|after\s+or\s+equal\s+to|starting\s+(?:from|on|with)|as\s+early\s+as|(any\s+time\s+)?from)\b\s*)|(?=))"; - public const string AroundRegex = @"(?:\b(?:around|circa)\s*\b)"; - public const string AgoRegex = @"\b(ago|before\s+(?yesterday|today))\b"; - public static readonly string LaterRegex = $@"\b(?:later(?!((\s+in)?\s*{OneWordPeriodRegex})|(\s+{TimeOfDayRegex}))|from now|(from|after) (?tomorrow|tmr|today))\b"; + public static readonly string AfterRegex = $@"((\b{InclusiveModPrepositions}?((after(\s+on)?(?!\sfrom)|(?>\s*=)|>))(\s+the)?"; + public const string StartingRegex = @"(starting|beginning)(\s+)?(?:from|on|with)?"; + public const string SinceRegex = @"(?:(?:\b(?:since|after\s+or\s+equal\s+to|(starting|beginning)(\s)?(?:from|on|with)?|as\s+early\s+as|(any\s+time\s+)from)\b\s*?)|(?=))(\s+the)?"; + public static readonly string SinceRegexExp = $@"({SinceRegex}|\bfrom(\s+the)?\b)"; + public const string AgoRegex = @"\b(ago|earlier|before\s+(?yesterday|today))\b"; + public static readonly string LaterRegex = $@"\b(?:later(?!((\s+in)?\s*{OneWordPeriodRegex})|(\s+{TimeOfDayRegex})|\s+than\b)|from now|(from|after)\s+(?tomorrow|tmrw?|today))\b"; + public const string BeforeAfterRegex = @"(,?\s*)\b((?before)|(?from|after))\b"; + public static readonly string ModPrefixRegex = $@"\b({RelativeRegex}|{AroundRegex}|{BeforeRegex}|{AfterRegex}|{SinceRegex})\b"; + public static readonly string ModSuffixRegex = $@"\b({AgoRegex}|{LaterRegex}|{BeforeAfterRegex}|{FutureSuffixRegex}|{PastSuffixRegex})\b"; public const string InConnectorRegex = @"\b(in)\b"; public static readonly string SinceYearSuffixRegex = $@"(^\s*{SinceRegex}(\s*(the\s+)?year\s*)?{YearSuffix})"; public static readonly string WithinNextPrefixRegex = $@"\b(within(\s+the)?(\s+(?{NextPrefixRegex}))?)\b"; + public const string ForPrefixRegex = @"((?for.*from.*)|(?\bfrom\b)|(?\bfor\b))"; + public const string TodayNowRegex = @"\b(today|now|current (date|time))\b"; public static readonly string MorningStartEndRegex = $@"(^(morning|{AmDescRegex}))|((morning|{AmDescRegex})$)"; public static readonly string AfternoonStartEndRegex = $@"(^(afternoon|{PmDescRegex}))|((afternoon|{PmDescRegex})$)"; public const string EveningStartEndRegex = @"(^(evening))|((evening)$)"; - public const string NightStartEndRegex = @"(^(over|to)?night)|((over|to)?night$)"; + public const string NightStartEndRegex = @"(^(over|to)?ni(ght|te))|((over|to)?ni(ght|te)$)"; public const string InexactNumberRegex = @"\b((a\s+)?few|some|several|(?(a\s+)?couple(\s+of)?))\b"; public static readonly string InexactNumberUnitRegex = $@"({InexactNumberRegex})\s+({DurationUnitRegex})"; public static readonly string RelativeTimeUnitRegex = $@"(?:(?:(?:{NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+({TimeUnitRegex}))|((the|my))\s+({RestrictedTimeUnitRegex}))"; public static readonly string RelativeDurationUnitRegex = $@"(?:(?:(?<=({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+)({DurationUnitRegex}))|((the|my))\s+({RestrictedTimeUnitRegex}))"; - public static readonly string ReferenceDatePeriodRegex = $@"\b{ReferencePrefixRegex}\s+(?week|month|year|decade|weekend)\b"; - public const string ConnectorRegex = @"^(-|,|for|t|around|@)$"; - public const string FromToRegex = @"\b(from).+(to)\b.+"; + public static readonly string ReferenceDatePeriodRegex = $@"\b{ReferencePrefixRegex}\s+(?week(end)?|fortnight|month|year|decade)\b"; + public const string ConnectorRegex = @"^(-|,|for|t|around|circa|@)$"; + public const string FromToRegex = @"(\b(from).+(to|and|or)\b.+)"; public const string SingleAmbiguousMonthRegex = @"^(the\s+)?(may|march)$"; public const string SingleAmbiguousTermsRegex = @"^(the\s+)?(day|week|month|year)$"; - public const string UnspecificDatePeriodRegex = @"^(week(end)?|month|year)$"; - public const string PrepositionSuffixRegex = @"\b(on|in|at|around|from|to)$"; + public const string UnspecificDatePeriodRegex = @"^(week|fortnight|month|year)$"; + public const string PrepositionSuffixRegex = @"\b(on|in|at|around|circa|from|to)$"; public const string FlexibleDayRegex = @"(?([A-Za-z]+\s)?[A-Za-z\d]+)"; - public static readonly string ForTheRegex = $@"\b((((?<=for\s+)the\s+{FlexibleDayRegex})|((?<=on\s+)(the\s+)?{FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\s*(,|\.|!|\?|$)))"; + public static readonly string ForTheRegex = $@"\b((((?<=\bfor\s+)the\s+{FlexibleDayRegex})|((?<=\bon\s+)(the\s+)?{FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\s*(,|\.(?!\d)|!|\?|$)))"; public static readonly string WeekDayAndDayOfMonthRegex = $@"\b{WeekDayRegex}\s+(the\s+{FlexibleDayRegex})\b"; public static readonly string WeekDayAndDayRegex = $@"\b{WeekDayRegex}\s+(?!(the)){DayRegex}(?!([-:]|(\s+({AmDescRegex}|{PmDescRegex}|{OclockRegex}))))\b"; - public const string RestOfDateRegex = @"\brest\s+(of\s+)?((the|my|this|current)\s+)?(?week|month|year|decade)\b"; - public const string RestOfDateTimeRegex = @"\brest\s+(of\s+)?((the|my|this|current)\s+)?(?day)\b"; + public const string RestOfDateRegex = @"\b(rest|remaining)\s+(of\s+)?((the|my|this|current)\s+)?(?week|fortnight|month|year|decade)\b"; + public const string RestOfDateTimeRegex = @"\b(rest|remaining)\s+(of\s+)?((the|my|this|current)\s+)?(?day)\b"; public const string AmbiguousRangeModifierPrefix = @"(from)"; - public static readonly string NumberEndingPattern = $@"^(?:\s+(?meeting|appointment|conference|((skype|teams)\s+)?call)\s+to\s+(?{PeriodHourNumRegex}|{HourRegex})([\.]?$|(\.,|,|!|\?)))"; + public static readonly string NumberEndingPattern = $@"^(?:\s+(?meeting|appointment|conference|((skype|teams|zoom|facetime)\s+)?call)\s+to\s+(?{PeriodHourNumRegex}|{HourRegex})([\.]?$|(\.,|,|!|\?)))"; public const string OneOnOneRegex = @"\b(1\s*:\s*1(?!\d))|(one (on )?one|one\s*-\s*one|one\s*:\s*one)\b"; - public static readonly string LaterEarlyPeriodRegex = $@"\b(({PrefixPeriodRegex})\s*\b\s*(?{OneWordPeriodRegex})|({UnspecificEndOfRangeRegex}))\b"; + public static readonly string LaterEarlyPeriodRegex = $@"\b(({PrefixPeriodRegex})\s*\b\s*(?{OneWordPeriodRegex}|(?{BaseDateTime.FourDigitYearRegex}))|({UnspecificEndOfRangeRegex}))\b"; public static readonly string WeekWithWeekDayRangeRegex = $@"\b((?({NextPrefixRegex}|{PreviousPrefixRegex}|this)\s+week)((\s+between\s+{WeekDayRegex}\s+and\s+{WeekDayRegex})|(\s+from\s+{WeekDayRegex}\s+to\s+{WeekDayRegex})))\b"; public const string GeneralEndingRegex = @"^\s*((\.,)|\.|,|!|\?)?\s*$"; public const string MiddlePauseRegex = @"\s*(,)\s*"; @@ -269,28 +305,51 @@ public static class DateTimeDefinitions public static readonly string NumberAsTimeRegex = $@"\b({WrittenTimeRegex}|{PeriodHourNumRegex}|{BaseDateTime.HourRegex})\b"; public static readonly string TimeBeforeAfterRegex = $@"\b(((?<=\b(before|no later than|by|after)\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}|{MidTimeRegex}))|{MidTimeRegex})\b"; public const string DateNumberConnectorRegex = @"^\s*(?\s+at)\s*$"; - public const string DecadeRegex = @"(?(?:nough|twen|thir|fou?r|fif|six|seven|eight|nine)ties|two\s+thousands)"; + public const string DecadeRegex = @"(?(?:nough|twen|thir|fou?r|fif|six|seven|eigh|nine)ties|two\s+thousands)"; public static readonly string DecadeWithCenturyRegex = $@"(the\s+)?(((?\d|1\d|2\d)?(')?(?\d0)(')?(\s)?s\b)|(({CenturyRegex}(\s+|-)(and\s+)?)?{DecadeRegex})|({CenturyRegex}(\s+|-)(and\s+)?(?tens|hundreds)))"; public static readonly string RelativeDecadeRegex = $@"\b((the\s+)?{RelativeRegex}\s+((?[\w,]+)\s+)?decades?)\b"; public static readonly string YearPeriodRegex = $@"((((from|during|in)\s+)?{YearRegex}\s*({TillRegex})\s*{YearRegex})|(((between)\s+){YearRegex}\s*({RangeConnectorRegex})\s*{YearRegex}))"; public static readonly string StrictTillRegex = $@"(?\b(to|(un)?till?|thru|through)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s+of|\s*,\s*))))"; public static readonly string StrictRangeConnectorRegex = $@"(?\b(and|through|to)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s+of|\s*,\s*))))"; - public static readonly string ComplexDatePeriodRegex = $@"(?:((from|during|in)\s+)?(?.+)\s*({StrictTillRegex})\s*(?.+)|((between)\s+)(?.+)\s*({StrictRangeConnectorRegex})\s*(?.+))"; - public static readonly string FailFastRegex = $@"{BaseDateTime.DeltaMinuteRegex}|\b(?:{BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex})|{BaseDateTime.BaseAmPmDescRegex}|\b(?:zero|{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}|{WrittenMonthRegex}|{SeasonDescRegex}|{DecadeRegex}|centur(y|ies)|weekends?|quarters?|hal(f|ves)|yesterday|to(morrow|day|night)|tmr|noonish|\d(-|——)?ish|((the\s+\w*)|\d)(th|rd|nd|st)|(mid\s*(-\s*)?)?(night|morning|afternoon|day)s?|evenings?||noon|lunch(time)?|dinner(time)?|(day|night)time|overnight|dawn|dusk|sunset|hours?|hrs?|h|minutes?|mins?|seconds?|secs?|eo[dmy]|mardi[ -]?gras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|cinco de mayo|all (hallow|souls)|guy fawkes|(st )?patrick|hundreds?|noughties|aughts|thousands?)\b|{WeekDayRegex}|{SetWeekDayRegex}|{NowRegex}|{PeriodicRegex}|\b({DateUnitRegex}|{ImplicitDayRegex})"; + public const string StartMiddleEndRegex = @"\b((?((the\s+)?(start|beginning)\s+of\s+)?)(?((the\s+)?middle\s+of\s+)?)(?((the\s+)?end\s+of\s+)?))"; + public static readonly string ComplexDatePeriodRegex = $@"(?:((from|during|in)\s+)?{StartMiddleEndRegex}(?.+)\s*({StrictTillRegex})\s*{StartMiddleEndRegex}(?.+)|((between)\s+){StartMiddleEndRegex}(?.+)\s*({StrictRangeConnectorRegex})\s*{StartMiddleEndRegex}(?.+))"; + public static readonly string FailFastRegex = $@"{BaseDateTime.DeltaMinuteRegex}|\b(?:{BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex})|{BaseDateTime.BaseAmPmDescRegex}|\b(?:zero|{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}|{WrittenMonthRegex}|{SeasonDescRegex}|{DecadeRegex}|centur(y|ies)|weekends?|quarters?|hal(f|ves)|yesterday|to(morrow|day|night)|tmr|noonish|\d(-|——)?ish|((the\s+\w*)|\d)(th|rd|nd|st)|(mid\s*(-\s*)?)?(night|morning|afternoon|day)s?|evenings?|noon|lunch(time)?|dinner(time)?|(day|night)time|overnight|dawn|dusk|sunset|hours?|hrs?|h|minutes?|mins?|seconds?|secs?|eo[dmy]|mardi[ -]?gras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|cinco de mayo|all (hallow|souls)|guy fawkes|(st )?patrick|hundreds?|noughties|aughts|thousands?)\b|{WeekDayRegex}|{SetWeekDayRegex}|{NowRegex}|{PeriodicRegex}|\b({DateUnitRegex}|{ImplicitDayRegex})"; + public static readonly string TasksModeSupressionRegexes = $@"({AmPmDescRegex}|{TasksModeSpecialDescRegex}|{TasksModeHolidayListSupression}|{DecadeRegex}|{DecadeWithCenturyRegex}|{QuarterRegex}|{QuarterRegexYearFront}|{AllHalfYearRegex}|{SeasonRegex})"; + public const string TasksModeNextPrefix = @"(?next\s+)"; + public static readonly string TasksModeDurationToDatePatterns = $@"\b({TasksModeNextPrefix}((?week)|(?month)|(?year)))\b"; + public static readonly string TimePeriodFromForRegex = $@"(from\s+)(?(({TimeRegex2}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))\s*for\s+(.*?)\s+({DurationUnitRegex})(\s+(.*?)\s+({DurationUnitRegex}))?"; + public static readonly string TimePeriodForFromRegex = $@"for\s+(.*?)\s+({DurationUnitRegex})(\s+(.*?)\s+({DurationUnitRegex}))?\s+(from\s+)(?(({TimeRegex2}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))"; + public static readonly string TimePeriodWithDurationRegex = $@"({TimePeriodFromForRegex}|{TimePeriodForFromRegex})"; public static readonly Dictionary UnitMap = new Dictionary { { @"decades", @"10Y" }, { @"decade", @"10Y" }, { @"years", @"Y" }, { @"year", @"Y" }, + { @"y", @"Y" }, { @"months", @"MON" }, { @"month", @"MON" }, - { @"fortnights", @"2W" }, - { @"fortnight", @"2W" }, + { @"m", @"M" }, + { @"quarters", @"3MON" }, + { @"quarter", @"3MON" }, + { @"semesters", @"6MON" }, + { @"semestres", @"6MON" }, + { @"semester", @"6MON" }, + { @"semestre", @"6MON" }, { @"weeks", @"W" }, { @"week", @"W" }, + { @"w", @"W" }, + { @"weekends", @"WE" }, + { @"weekend", @"WE" }, + { @"fortnights", @"2W" }, + { @"fortnight", @"2W" }, + { @"weekdays", @"WD" }, + { @"weekday", @"WD" }, { @"days", @"D" }, { @"day", @"D" }, + { @"d", @"D" }, + { @"nights", @"D" }, + { @"night", @"D" }, { @"hours", @"H" }, { @"hour", @"H" }, { @"hrs", @"H" }, @@ -311,14 +370,22 @@ public static class DateTimeDefinitions { @"decade", 315360000 }, { @"years", 31536000 }, { @"year", 31536000 }, + { @"y", 31536000 }, { @"months", 2592000 }, { @"month", 2592000 }, + { @"m", 2592000 }, { @"fortnights", 1209600 }, { @"fortnight", 1209600 }, + { @"weekends", 172800 }, + { @"weekend", 172800 }, { @"weeks", 604800 }, { @"week", 604800 }, + { @"w", 604800 }, { @"days", 86400 }, { @"day", 86400 }, + { @"d", 86400 }, + { @"nights", 86400 }, + { @"night", 86400 }, { @"hours", 3600 }, { @"hour", 3600 }, { @"hrs", 3600 }, @@ -366,7 +433,21 @@ public static class DateTimeDefinitions { @"fourth", 4 }, { @"4th", 4 }, { @"fifth", 5 }, - { @"5th", 5 } + { @"5th", 5 }, + { @"sixth", 6 }, + { @"6th", 6 }, + { @"seventh", 7 }, + { @"7th", 7 }, + { @"eighth", 8 }, + { @"8th", 8 }, + { @"ninth", 9 }, + { @"9th", 9 }, + { @"tenth", 10 }, + { @"10th", 10 }, + { @"eleventh", 11 }, + { @"11th", 11 }, + { @"twelfth", 12 }, + { @"12th", 12 } }; public static readonly Dictionary DayOfWeek = new Dictionary { @@ -547,8 +628,11 @@ public static class DateTimeDefinitions public static readonly Dictionary DayOfMonth = new Dictionary { { @"1st", 1 }, + { @"1th", 1 }, { @"2nd", 2 }, + { @"2th", 2 }, { @"3rd", 3 }, + { @"3th", 3 }, { @"4th", 4 }, { @"5th", 5 }, { @"6th", 6 }, @@ -584,8 +668,11 @@ public static class DateTimeDefinitions { @"30th", 30 }, { @"31st", 31 }, { @"01st", 1 }, + { @"01th", 1 }, { @"02nd", 2 }, + { @"02th", 2 }, { @"03rd", 3 }, + { @"03th", 3 }, { @"04th", 4 }, { @"05th", 5 }, { @"06th", 6 }, @@ -636,7 +723,7 @@ public static class DateTimeDefinitions { @"arborday", new string[] { @"arborday" } }, { @"girlsday", new string[] { @"girlsday" } }, { @"whiteloverday", new string[] { @"whiteloverday" } }, - { @"loverday", new string[] { @"loverday" } }, + { @"loverday", new string[] { @"loverday", @"loversday" } }, { @"christmas", new string[] { @"christmasday", @"christmas" } }, { @"xmas", new string[] { @"xmasday", @"xmas" } }, { @"newyear", new string[] { @"newyear" } }, @@ -648,7 +735,7 @@ public static class DateTimeDefinitions { @"stpatrickday", new string[] { @"stpatrickday", @"stpatricksday", @"stpatrick" } }, { @"aprilfools", new string[] { @"aprilfools" } }, { @"stgeorgeday", new string[] { @"stgeorgeday" } }, - { @"mayday", new string[] { @"mayday", @"intlworkersday", @"internationalworkersday" } }, + { @"mayday", new string[] { @"mayday", @"intlworkersday", @"internationalworkersday", @"workersday" } }, { @"cincodemayoday", new string[] { @"cincodemayoday" } }, { @"baptisteday", new string[] { @"baptisteday" } }, { @"usindependenceday", new string[] { @"usindependenceday" } }, @@ -660,7 +747,12 @@ public static class DateTimeDefinitions { @"guyfawkesday", new string[] { @"guyfawkesday" } }, { @"veteransday", new string[] { @"veteransday" } }, { @"christmaseve", new string[] { @"christmaseve" } }, - { @"newyeareve", new string[] { @"newyearseve", @"newyeareve" } } + { @"newyeareve", new string[] { @"newyearseve", @"newyeareve" } }, + { @"juneteenth", new string[] { @"juneteenth", @"freedomday", @"jubileeday" } }, + { @"ramadan", new string[] { @"ramadan", @"ramazan", @"ramzan", @"ramadhan", @"ramathan" } }, + { @"sacrifice", new string[] { @"eidaladha", @"eidalazha", @"eidulazha", @"feastofthesacrifice" } }, + { @"islamicnewyear", new string[] { @"islamicnewyear", @"hijrinewyear", @"arabicnewyear" } }, + { @"eidalfitr", new string[] { @"eidalfitr", @"festivalofbreakingthefast" } } }; public static readonly Dictionary WrittenDecades = new Dictionary { @@ -687,17 +779,32 @@ public static class DateTimeDefinitions @"preferably", @"how about", @"maybe", + @"perhaps", @"say", @"like" }; - public static readonly string[] DurationDateRestrictions = { @"today", @"now" }; + public static readonly string[] DurationDateRestrictions = { @"today", @"now", @"current date" }; public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { + { @"^\d{4}$", @"(\d\.\d{4}|\d{4}\.\d)" }, { @"^(morning|afternoon|evening|night|day)\b", @"\b(good\s+(morning|afternoon|evening|night|day))|(nighty\s+night)\b" }, { @"\bnow\b", @"\b(^now,)|\b((is|are)\s+now\s+for|for\s+now)\b" }, - { @"\bmay\b", @"\b((((!|\.|\?|,|;|)\s+|^)may i)|(i|you|he|she|we|they)\s+may|(may\s+((((also|not|(also not)|well)\s+)?(be|ask|contain|constitute|e-?mail|take|have|result|involve|get|work|reply|differ))|(or may not))))\b" }, - { @"\b(a|one) second\b", @"\b(? AmbiguityTimeFiltersDict = new Dictionary + { + { @"^(\p{L}+|\d{1,2})(\s+(morning|afternoon|evening|night))?$", @"\b(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|\d{1,2})\s+(morning|afternoon|evening|night)\b" } + }; + public static readonly Dictionary AmbiguityDurationFiltersDict = new Dictionary + { + { @"night$", @"\bnight(\s*|-)(club|light|market|shift|work(er)?)s?\b" } }; public static readonly IList MorningTermList = new List { @@ -738,14 +845,22 @@ public static class DateTimeDefinitions { @"night" }; + public static readonly IList NighttimeTermList = new List + { + @"nighttime", + @"night-time" + }; public static readonly IList SameDayTerms = new List { - @"today" + @"today", + @"current date", + @"otd" }; public static readonly IList PlusOneDayTerms = new List { @"tomorrow", @"tmr", + @"tmrw", @"day after" }; public static readonly IList MinusOneDayTerms = new List @@ -756,7 +871,8 @@ public static class DateTimeDefinitions public static readonly IList PlusTwoDayTerms = new List { @"day after tomorrow", - @"day after tmr" + @"day after tmr", + @"day after tmrw" }; public static readonly IList MinusTwoDayTerms = new List { @@ -787,6 +903,11 @@ public static class DateTimeDefinitions { @"week" }; + public static readonly IList FortnightTerms = new List + { + @"fortnight", + @"fourtenight" + }; public static readonly IList YearTerms = new List { @"year" @@ -799,5 +920,15 @@ public static class DateTimeDefinitions { @"year to date" }; + public const string DoubleMultiplierRegex = @"^(bi)(-|\s)?"; + public const string HalfMultiplierRegex = @"^(semi)(-|\s)?"; + public const string DayTypeRegex = @"((week)?da(il)?ys?)$"; + public const string WeekDayTypeRegex = @"(weekday?)$"; + public const string FortNightRegex = @"(fortnight?)$"; + public const string WeekTypeRegex = @"(week(s|ly)?)$"; + public const string WeekendTypeRegex = @"(weekends?)$"; + public const string MonthTypeRegex = @"(month(s|ly)?)$"; + public const string QuarterTypeRegex = @"(quarter(s|ly)?)$"; + public const string YearTypeRegex = @"((years?|annual)(ly)?)$"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersDefinitions.cs index b1a297ad56..e341256d3a 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersDefinitions.cs @@ -24,76 +24,90 @@ public static class NumbersDefinitions public const string LangMarker = @"Eng"; public const bool CompoundNumberLanguage = false; public const bool MultiDecimalSeparatorCulture = true; - public const string RoundNumberIntegerRegex = @"(?:hundred|thousand|million|billion|trillion)"; - public const string ZeroToNineIntegerRegex = @"(?:three|seven|eight|four|five|zero|nine|one|two|six)"; + public static readonly IList NonStandardSeparatorVariants = new List + { + @"en-za", + @"en-na", + @"en-zw" + }; + public const string RoundNumberIntegerRegex = @"(?:hundred|thousand|million|mln|billion|bln|trillion|tln|lakh|crore)s?"; + public const string ZeroToNineIntegerRegex = @"(?:three|seven|eight|four|five|zero|n[ao]ught|nine|one|two|six)"; public const string TwoToNineIntegerRegex = @"(?:three|seven|eight|four|five|nine|two|six)"; public const string NegativeNumberTermsRegex = @"(?(minus|negative)\s+)"; public static readonly string NegativeNumberSignRegex = $@"^{NegativeNumberTermsRegex}.*"; public const string AnIntRegex = @"(an?)(?=\s)"; public const string TenToNineteenIntegerRegex = @"(?:seventeen|thirteen|fourteen|eighteen|nineteen|fifteen|sixteen|eleven|twelve|ten)"; public const string TensNumberIntegerRegex = @"(?:seventy|twenty|thirty|eighty|ninety|forty|fifty|sixty)"; - public static readonly string SeparaIntRegex = $@"(?:(({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}(\s+(and\s+)?|\s*-\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{RoundNumberIntegerRegex})(\s+{RoundNumberIntegerRegex})*))|(({AnIntRegex}(\s+{RoundNumberIntegerRegex})+))"; + public static readonly string SeparaIntRegex = $@"(?:(({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}(\s+(and\s+)?|\s*-\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex})(\s+{RoundNumberIntegerRegex})*))|(({AnIntRegex}(\s+{RoundNumberIntegerRegex})+))"; public static readonly string AllIntRegex = $@"(?:((({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}(\s+(and\s+)?|\s*-\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{AnIntRegex})(\s+{RoundNumberIntegerRegex})+)\s+(and\s+)?)*{SeparaIntRegex})"; public const string PlaceHolderPureNumber = @"\b"; - public const string PlaceHolderDefault = @"\D|\b"; - public static readonly Func NumbersWithPlaceHolder = (placeholder) => $@"(((? NumbersWithPlaceHolder = (placeholder) => $@"(((?(next|previous|current)\s+one|(the\s+second|next)\s+to\s+last|the\s+one\s+before\s+the\s+last(\s+one)?|the\s+last\s+but\s+one|(ante)?penultimate|last|next|previous|current)"; - public static readonly string BasicOrdinalRegex = $@"({NumberOrdinalRegex}|{RelativeOrdinalRegex})"; - public static readonly string SuffixBasicOrdinalRegex = $@"(?:(((({TensNumberIntegerRegex}(\s+(and\s+)?|\s*-\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{AnIntRegex})(\s+{RoundNumberIntegerRegex})+)\s+(and\s+)?)*({TensNumberIntegerRegex}(\s+|\s*-\s*))?{BasicOrdinalRegex})"; + public static readonly string SuffixBasicOrdinalRegex = $@"(?:(((({TensNumberIntegerRegex}(\s+(and\s+)?|\s*-\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{AnIntRegex})(\s+{RoundNumberIntegerRegex})+)\s+(and\s+)?)*({TensNumberIntegerRegex}(\s+|\s*-\s*))?{NumberOrdinalRegex})"; public static readonly string SuffixRoundNumberOrdinalRegex = $@"(?:({AllIntRegex}\s+){RoundNumberOrdinalRegex})"; - public static readonly string AllOrdinalRegex = $@"(?:{SuffixBasicOrdinalRegex}|{SuffixRoundNumberOrdinalRegex})"; + public static readonly string AllOrdinalNumberRegex = $@"(?:{SuffixBasicOrdinalRegex}|{SuffixRoundNumberOrdinalRegex})"; + public static readonly string AllOrdinalRegex = $@"(?:{AllOrdinalNumberRegex}|{RelativeOrdinalRegex})"; public const string OrdinalSuffixRegex = @"(?<=\b)(?:(\d*(1st|2nd|3rd|[4-90]th))|(1[1-2]th))(?=\b)"; public const string OrdinalNumericRegex = @"(?<=\b)(?:\d{1,3}(\s*,\s*\d{3})*\s*th)(?=\b)"; public static readonly string OrdinalRoundNumberRegex = $@"(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?![\.,]))(?=\b)"; + public static readonly string FractionNotationRegex = $@"{BaseNumbers.FractionNotationRegex}"; + public static readonly string FractionMultiplierRegex = $@"(?\s+and\s+(a|one|{TwoToNineIntegerRegex})\s+(half|quarter|third|fourth|fifth|sixth|seventh|eighth|nine?th|tenth)s?)"; + public static readonly string RoundMultiplierWithFraction = $@"(?<=(?(?:million|mln|billion|bln|trillion|tln)s?)(?={FractionMultiplierRegex}?$)"; + public static readonly string RoundMultiplierRegex = $@"\b\s*((of\s+)?a\s+)?({RoundMultiplierWithFraction}|(?(?:hundred|thousand|lakh|crore)s?)$)"; + public static readonly string FractionNounRegex = $@"(?<=\b)({AllIntRegex}\s+(and\s+)?)?(({AllIntRegex})(\s+|\s*-\s*)((({AllOrdinalNumberRegex})|({RoundNumberOrdinalRegex}))s|halves|quarters)((\s+of\s+a)?\s+{RoundNumberIntegerRegex})?|(half(\s+a)?|quarter(\s+of\s+a)?)\s+{RoundNumberIntegerRegex})(?=\b)"; + public static readonly string FractionNounWithArticleRegex = $@"(?<=\b)(((({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\s+(and\s+)?)?(an?|one)(\s+|\s*-\s*)(?!\bfirst\b|\bsecond\b)(({AllOrdinalNumberRegex})|({RoundNumberOrdinalRegex})|(half|quarter)(((\s+of)?\s+a)?\s+{RoundNumberIntegerRegex})?))|(half))(?=\b)"; + public static readonly string FractionPrepositionRegex = $@"(?({AllIntRegex})|((?in|out\s+of))\s+(?({AllIntRegex})|(\d+)(?![\.,]))(?=\b)"; public static readonly string FractionPrepositionWithinPercentModeRegex = $@"(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?![\.,]))(?=\b)"; public static readonly string AllPointRegex = $@"((\s+{ZeroToNineIntegerRegex})+|(\s+{SeparaIntRegex}))"; public static readonly string AllFloatRegex = $@"{AllIntRegex}(\s+point){AllPointRegex}"; - public static readonly string DoubleWithMultiplierRegex = $@"(((? DoubleDecimalPointRegex = (placeholder) => $@"(((? DoubleDecimalPointRegex = (placeholder) => $@"(((? DoubleWithoutIntegralRegex = (placeholder) => $@"(?<=\s|^)(?and)"; - public static readonly string NumberWithSuffixPercentage = $@"(?)"; + public const string TillRegex = @"((?)"; public const string LessRegex = @"(?:(less|lower|smaller|fewer)(\s+than)?|below|under|(?|=)<)"; public const string EqualRegex = @"(equal(s|ing)?(\s+(to|than))?|(?)=)"; public static readonly string MoreOrEqualPrefix = $@"((no\s+{LessRegex})|(at\s+least))"; - public static readonly string MoreOrEqual = $@"(?:({MoreRegex}\s+(or)?\s+{EqualRegex})|({EqualRegex}\s+(or)?\s+{MoreRegex})|{MoreOrEqualPrefix}(\s+(or)?\s+{EqualRegex})?|({EqualRegex}\s+(or)?\s+)?{MoreOrEqualPrefix}|>\s*=)"; - public const string MoreOrEqualSuffix = @"((and|or)\s+(((more|greater|higher|larger|bigger)((?!\s+than)|(\s+than(?!(\s*\d+)))))|((over|above)(?!\s+than))))"; + public static readonly string MoreOrEqual = $@"(?:({MoreRegex}\s+(or)?\s+{EqualRegex})|({EqualRegex}\s+(or)?\s+{MoreRegex})|{MoreOrEqualPrefix}(\s+(or)?\s+{EqualRegex})?|({EqualRegex}\s+(or)?\s+)?{MoreOrEqualPrefix}|>\s*=|≥)"; + public const string MoreOrEqualSuffix = @"((and|or)\s+(((more|greater|higher|larger|bigger)((?!\s+than)|(\s+than(?!((\s+or\s+equal\s+to)?\s*\d+)))))|((over|above)(?!\s+than))))"; public static readonly string LessOrEqualPrefix = $@"((no\s+{MoreRegex})|(at\s+most)|(up\s+to))"; - public static readonly string LessOrEqual = $@"(({LessRegex}\s+(or)?\s+{EqualRegex})|({EqualRegex}\s+(or)?\s+{LessRegex})|{LessOrEqualPrefix}(\s+(or)?\s+{EqualRegex})?|({EqualRegex}\s+(or)?\s+)?{LessOrEqualPrefix}|<\s*=)"; + public static readonly string LessOrEqual = $@"(({LessRegex}\s+(or)?\s+{EqualRegex})|({EqualRegex}\s+(or)?\s+{LessRegex})|{LessOrEqualPrefix}(\s+(or)?\s+{EqualRegex})?|({EqualRegex}\s+(or)?\s+)?{LessOrEqualPrefix}|<\s*=|≤)"; public const string LessOrEqualSuffix = @"((and|or)\s+(less|lower|smaller|fewer)((?!\s+than)|(\s+than(?!(\s*\d+)))))"; - public const string NumberSplitMark = @"(?![,.](?!\d+))"; + public static readonly string NumberSplitMark = $@"(?![,.](?!\d+))(?!\s*\b(and\s+({LessRegex}|{MoreRegex})|but|or|to)\b)"; public const string MoreRegexNoNumberSucceed = @"((bigger|greater|more|higher|larger)((?!\s+than)|\s+(than(?!(\s*\d+))))|(above|over)(?!(\s*\d+)))"; public const string LessRegexNoNumberSucceed = @"((less|lower|smaller|fewer)((?!\s+than)|\s+(than(?!(\s*\d+))))|(below|under)(?!(\s*\d+)))"; public const string EqualRegexNoNumberSucceed = @"(equal(s|ing)?((?!\s+(to|than))|(\s+(to|than)(?!(\s*\d+)))))"; public static readonly string OneNumberRangeMoreRegex1 = $@"({MoreOrEqual}|{MoreRegex})\s*(the\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeMoreRegex1LB = $@"(?({NumberSplitMark}.)+)\s*{MoreOrEqualSuffix}"; public static readonly string OneNumberRangeMoreSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+or\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\s+(?({NumberSplitMark}.)+)(\s+or\s+){EqualRegexNoNumberSucceed})"; public static readonly string OneNumberRangeLessRegex1 = $@"({LessOrEqual}|{LessRegex})\s*(the\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeLessRegex1LB = $@"(?({NumberSplitMark}.)+)\s*{LessOrEqualSuffix}"; public static readonly string OneNumberRangeLessSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+or\s+){LessRegexNoNumberSucceed})|({LessRegex}\s+(?({NumberSplitMark}.)+)(\s+or\s+){EqualRegexNoNumberSucceed})"; - public static readonly string OneNumberRangeEqualRegex = $@"{EqualRegex}\s*(the\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeEqualRegex = $@"(?({NumberSplitMark}.)+)"; public static readonly string TwoNumberRangeRegex1 = $@"between\s*(the\s+)?(?({NumberSplitMark}.)+)\s*and\s*(the\s+)?(?({NumberSplitMark}.)+)"; public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\s*(and|but|,)\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"; public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\s*(and|but|,)\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})"; @@ -109,11 +123,13 @@ public static class NumbersDefinitions public static readonly string[] WrittenIntegerSeparatorTexts = { @"and" }; public static readonly string[] WrittenFractionSeparatorTexts = { @"and" }; public const string HalfADozenRegex = @"half\s+a\s+dozen"; - public static readonly string DigitalNumberRegex = $@"((?<=\b)(hundred|thousand|[mb]illion|trillion|dozen(s)?)(?=\b))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; + public static readonly string DigitalNumberRegex = $@"((?<=\b)(hundred|thousand|[mb]illion|trillion|[mbt]ln|lakh|crore|(doz(en)?|dz)s?)(?=\b))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; public static readonly Dictionary CardinalNumberMap = new Dictionary { { @"a", 1 }, { @"zero", 0 }, + { @"naught", 0 }, + { @"nought", 0 }, { @"an", 1 }, { @"one", 1 }, { @"two", 2 }, @@ -129,6 +145,10 @@ public static class NumbersDefinitions { @"twelve", 12 }, { @"dozen", 12 }, { @"dozens", 12 }, + { @"dz", 12 }, + { @"doz", 12 }, + { @"dzs", 12 }, + { @"dozs", 12 }, { @"thirteen", 13 }, { @"fourteen", 14 }, { @"fifteen", 15 }, @@ -147,8 +167,20 @@ public static class NumbersDefinitions { @"hundred", 100 }, { @"thousand", 1000 }, { @"million", 1000000 }, + { @"mln", 1000000 }, { @"billion", 1000000000 }, - { @"trillion", 1000000000000 } + { @"bln", 1000000000 }, + { @"trillion", 1000000000000 }, + { @"tln", 1000000000000 }, + { @"lakh", 100000 }, + { @"crore", 10000000 }, + { @"hundreds", 100 }, + { @"thousands", 1000 }, + { @"millions", 1000000 }, + { @"billions", 1000000000 }, + { @"trillions", 1000000000000 }, + { @"lakhs", 100000 }, + { @"crores", 10000000 } }; public static readonly Dictionary OrdinalNumberMap = new Dictionary { @@ -164,6 +196,7 @@ public static class NumbersDefinitions { @"seventh", 7 }, { @"eighth", 8 }, { @"ninth", 9 }, + { @"nineth", 9 }, { @"tenth", 10 }, { @"eleventh", 11 }, { @"twelfth", 12 }, @@ -197,6 +230,7 @@ public static class NumbersDefinitions { @"sevenths", 7 }, { @"eighths", 8 }, { @"ninths", 9 }, + { @"nineths", 9 }, { @"tenths", 10 }, { @"elevenths", 11 }, { @"twelfths", 12 }, @@ -226,8 +260,20 @@ public static class NumbersDefinitions { @"hundred", 100 }, { @"thousand", 1000 }, { @"million", 1000000 }, + { @"mln", 1000000 }, { @"billion", 1000000000 }, + { @"bln", 1000000000 }, { @"trillion", 1000000000000 }, + { @"tln", 1000000000000 }, + { @"lakh", 100000 }, + { @"crore", 10000000 }, + { @"hundreds", 100 }, + { @"thousands", 1000 }, + { @"millions", 1000000 }, + { @"billions", 1000000000 }, + { @"trillions", 1000000000000 }, + { @"lakhs", 100000 }, + { @"crores", 10000000 }, { @"hundredth", 100 }, { @"thousandth", 1000 }, { @"millionth", 1000000 }, @@ -240,8 +286,14 @@ public static class NumbersDefinitions { @"trillionths", 1000000000000 }, { @"dozen", 12 }, { @"dozens", 12 }, + { @"dz", 12 }, + { @"doz", 12 }, + { @"dzs", 12 }, + { @"dozs", 12 }, { @"k", 1000 }, { @"m", 1000000 }, + { @"mm", 1000000 }, + { @"mil", 1000000 }, { @"g", 1000000000 }, { @"b", 1000000000 }, { @"t", 1000000000000 } diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersWithUnitDefinitions.cs index 06ff39ada7..59bc50765f 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersWithUnitDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersWithUnitDefinitions.cs @@ -21,13 +21,22 @@ namespace Microsoft.Recognizers.Definitions.English public static class NumbersWithUnitDefinitions { + public static readonly Dictionary AgePrefixList = new Dictionary + { + { @"Age", @"Age|age" } + }; public static readonly Dictionary AgeSuffixList = new Dictionary { - { @"Year", @"years old|year old|year-old|years-old|-year-old|-years-old|years of age|year of age" }, - { @"Month", @"months old|month old|month-old|months-old|-month-old|-months-old|month of age|months of age" }, + { @"Year", @"years old|year old|year-old|years-old|-year-old|-years-old|years of age|year of age|yo" }, + { @"Month", @"months old|month old|month-old|months-old|-month-old|-months-old|month of age|months of age|mo" }, { @"Week", @"weeks old|week old|week-old|weeks-old|-week-old|-weeks-old|week of age|weeks of age" }, { @"Day", @"days old|day old|day-old|days-old|-day-old|-days-old|day of age|days of age" } }; + public static readonly IList AmbiguousAgeUnitList = new List + { + @"yo", + @"mo" + }; public static readonly Dictionary AreaSuffixList = new Dictionary { { @"Square kilometer", @"sq km|sq kilometer|sq kilometre|sq kilometers|sq kilometres|square kilometer|square kilometre|square kilometers|square kilometres|km2|km^2|km²" }, @@ -43,6 +52,10 @@ public static class NumbersWithUnitDefinitions { @"Square yard", @"sq yd|sq yard|sq yards|square yard|square yards|yd2|yd^2|yd²" }, { @"Acre", @"-acre|acre|acres" } }; + public static readonly IList AmbiguousAreaUnitList = new List + { + @"n/a" + }; public static readonly Dictionary CurrencySuffixList = new Dictionary { { @"Abkhazian apsar", @"abkhazian apsar|apsars" }, @@ -142,7 +155,7 @@ public static class NumbersWithUnitDefinitions { @"Halala", @"halalas|halala" }, { @"Samoan tālā", @"samoan tālā|tālā|tala|ws$|samoa|wst|samoan tala" }, { @"Sene", @"sene" }, - { @"São Tomé and Príncipe dobra", @"são tomé and príncipe dobra|dobras|dobra|std" }, + { @"São Tomé and Príncipe dobra", @"são tomé and príncipe dobra|dobras|dobra" }, { @"Sierra Leonean leone", @"sierra leonean leone|sll|leone|le" }, { @"Peseta", @"pesetas|peseta" }, { @"Netherlands guilder", @"florin|netherlands antillean guilder|ang|nederlandse gulden|guilders|guilder|gulden|-guilders|-guilder|dutch guilders|dutch guilder|fl" }, @@ -230,7 +243,7 @@ public static class NumbersWithUnitDefinitions { @"Cuban convertible peso", @"cuban convertible pesos|cuban convertible peso|cuc|cuba convertible pesos|cuba convertible peso" }, { @"Cuban peso", @"cuban pesos|cuban peso|cup|cuba pesos|cuba peso" }, { @"Dominican peso", @"dominican pesos|dominican peso|dop|dominica pesos|dominica peso" }, - { @"Mexican peso", @"mexican pesos|mexican peso|mxn|mexico pesos|mexico peso" }, + { @"Mexican peso", @"mexican pesos|mexican peso|mxn|mexico pesos|mexico peso|mxn$|mxn $|mex$" }, { @"Philippine peso", @"piso|philippine pesos|philippine peso|₱|php" }, { @"Uruguayan peso", @"uruguayan pesos|uruguayan peso|uyu" }, { @"Peso", @"pesos|peso" }, @@ -255,7 +268,7 @@ public static class NumbersWithUnitDefinitions { @"Pence", @"pence" }, { @"Shilling", @"shillings|shilling|shilingi|sh" }, { @"Penny", @"pennies|penny" }, - { @"United States dollar", @"united states dollars|united states dollar|united states $|u.s. dollars|u.s. dollar|u s dollar|u s dollars|usd|american dollars|american dollar|us$|us dollar|us dollars|u.s dollar|u.s dollars" }, + { @"United States dollar", @"united states dollars|united states dollar|united states $|u.s. dollars|u.s. dollar|u s dollar|u s dollars|usd|american dollars|american dollar|us$|us dollar|us dollars|u.s dollar|u.s dollars|usd$" }, { @"East Caribbean dollar", @"east caribbean dollars|east caribbean dollar|east Caribbean $|xcd" }, { @"Australian dollar", @"australian dollars|australian dollar|australian $|australian$|aud|australia dollars|australia dollar|australia $|australia$" }, { @"Bahamian dollar", @"bahamian dollars|bahamian dollar|bahamian $|bahamian$|bsd|bahamia dollars|bahamia dollar|bahamia $|bahamia$" }, @@ -292,7 +305,10 @@ public static class NumbersWithUnitDefinitions { @"Fen", @"fen" }, { @"Jiao", @"jiao|mao" }, { @"Finnish markka", @"suomen markka|finnish markka|finsk mark|fim|markkaa|markka" }, - { @"Penni", @"penniä|penni" } + { @"Penni", @"penniä|penni" }, + { @"Bitcoin", @"bitcoin|bitcoins|btc|xbt|₿" }, + { @"Millibitcoin", @"millibitcoin|millibitcoins|milibitcoin|milibitcoins" }, + { @"Satoshi", @"satoshi|satoshis" } }; public static readonly Dictionary CurrencyNameToIsoCodeMap = new Dictionary { @@ -356,7 +372,7 @@ public static class NumbersWithUnitDefinitions { @"Qatari riyal", @"QAR" }, { @"Saudi riyal", @"SAR" }, { @"Samoan tālā", @"WST" }, - { @"São Tomé and Príncipe dobra", @"STD" }, + { @"São Tomé and Príncipe dobra", @"STN" }, { @"Sierra Leonean leone", @"SLL" }, { @"Swazi lilangeni", @"SZL" }, { @"Tajikistani somoni", @"TJS" }, @@ -398,7 +414,7 @@ public static class NumbersWithUnitDefinitions { @"Rwandan franc", @"RWF" }, { @"Russian ruble", @"RUB" }, { @"Transnistrian ruble", @"PRB" }, - { @"Belarusian ruble", @"BYN" }, + { @"New Belarusian ruble", @"BYN" }, { @"Algerian dinar", @"DZD" }, { @"Bahraini dinar", @"BHD" }, { @"Iraqi dinar", @"IQD" }, @@ -480,7 +496,8 @@ public static class NumbersWithUnitDefinitions { @"British Virgin Islands dollar", @"_BD" }, { @"Ascension pound", @"_AP" }, { @"Alderney pound", @"_ALP" }, - { @"Abkhazian apsar", @"_AA" } + { @"Abkhazian apsar", @"_AA" }, + { @"Bitcoin", @"_XBT" } }; public static readonly Dictionary FractionalUnitNameToCodeMap = new Dictionary { @@ -556,14 +573,20 @@ public static class NumbersWithUnitDefinitions { @"Kopiyka", @"KOPIYKA" }, { @"Tiyin", @"TIYIN" }, { @"Hào", @"HAO" }, - { @"Ngwee", @"NGWEE" } + { @"Ngwee", @"NGWEE" }, + { @"Millibitcoin", @"MILLIBITCOIN" }, + { @"Satoshi", @"SATOSHI" } }; public const string CompoundUnitConnectorRegex = @"(?and)"; + public const string MultiplierRegex = @"\s*\b(thousand|million|billion|trillion)s?\b"; public static readonly Dictionary CurrencyPrefixList = new Dictionary { + { @"Dobra", @"db|std" }, { @"Dollar", @"$" }, - { @"United States dollar", @"united states $|us$|us $|u.s. $|u.s $" }, + { @"Brazilian Real", @"R$" }, + { @"United States dollar", @"united states $|us$|us $|u.s. $|u.s $|usd$" }, { @"East Caribbean dollar", @"east caribbean $" }, + { @"Mexican peso", @"mxn$|mxn $|mex$" }, { @"Australian dollar", @"australian $|australia $" }, { @"Bahamian dollar", @"bahamian $|bahamia $" }, { @"Barbadian dollar", @"barbadian $|barbadin $" }, @@ -601,7 +624,8 @@ public static class NumbersWithUnitDefinitions { @"Euro", @"€" }, { @"Pound", @"£" }, { @"Costa Rican colón", @"₡" }, - { @"Turkish lira", @"₺" } + { @"Turkish lira", @"₺" }, + { @"Bitcoin", @"₿|btc|xbt" } }; public static readonly IList AmbiguousCurrencyUnitList = new List { @@ -619,6 +643,7 @@ public static class NumbersWithUnitDefinitions @"toea", @"vatu", @"yuan", + @"all", @"ang", @"ban", @"bob", @@ -649,7 +674,15 @@ public static class NumbersWithUnitDefinitions @"std", @"try", @"yer", - @"yen" + @"yen", + @"db", + @"pen", + @"ron", + @"mad", + @"zar", + @"gel", + @"satoshi", + @"satoshis" }; public static readonly Dictionary InformationSuffixList = new Dictionary { @@ -671,23 +704,33 @@ public static class NumbersWithUnitDefinitions @"barrel", @"barrels", @"grain", + @"grains", @"pound", @"stone", + @"stones", @"yards", @"yard", @"cord", + @"cords", @"dram", + @"drachm", + @"drachma", @"feet", @"foot", @"gill", @"knot", + @"knots", @"peck", + @"pecks", @"cup", + @"cups", @"fps", @"pts", @"in", @"dm", - @"""" + @"""", + @"pinch", + @"pinches" }; public const string BuildPrefix = @"(?<=(\s|^))"; public const string BuildSuffix = @"(?=(\s|\W|$))"; @@ -721,24 +764,33 @@ public static class NumbersWithUnitDefinitions }; public static readonly Dictionary SpeedSuffixList = new Dictionary { - { @"Meter per second", @"meters / second|m/s|meters per second|metres per second|meter per second|metre per second" }, - { @"Kilometer per hour", @"km/h|kilometres per hour|kilometers per hour|kilometer per hour|kilometre per hour" }, + { @"Meter per second", @"meter/second|meters/second|meters / second|m/s|meters per second|metres per second|meter per second|metre per second" }, + { @"Kilometer per hour", @"km/h|kilometres per hour|kilometers per hour|kilometer per hour|kilometre per hour|kph|kmph|km/hr" }, { @"Kilometer per minute", @"km/min|kilometers per minute|kilometres per minute|kilometer per minute|kilometre per minute" }, { @"Kilometer per second", @"km/s|kilometers per second|kilometres per second|kilometer per second|kilometre per second" }, - { @"Mile per hour", @"mph|mile per hour|miles per hour|mi/h|mile / hour|miles / hour|miles an hour" }, - { @"Knot", @"kt|knot|kn" }, - { @"Foot per second", @"ft/s|foot/s|foot per second|feet per second|fps" }, - { @"Foot per minute", @"ft/min|foot/min|foot per minute|feet per minute" }, + { @"Mile per hour", @"mph|mile per hour|miles per hour|mi/h|mile / hour|miles / hour|miles an hour|mi/hr" }, + { @"Knot", @"kt|knot|knots|kn" }, + { @"Foot per second", @"ft/s|foot/s|feet/s|foot per second|feet per second|fps" }, + { @"Foot per minute", @"ft/min|foot/min|feet/min|foot per minute|feet per minute" }, { @"Yard per minute", @"yards per minute|yard per minute|yards / minute|yards/min|yard/min" }, - { @"Yard per second", @"yards per second|yard per second|yards / second|yards/s|yard/s" } + { @"Yard per second", @"yards per second|yard per second|yards / second|yards/s|yard/s" }, + { @"Meter per millisecond", @"meter/millisecond|meters/millisecond|meter / millisecond|meters / millisecond|meter per millisecond|meters per millisecond|m/ms" }, + { @"Centimeter per millisecond", @"centimeter/millisecond|centimeters/millisecond|centimeter / millisecond|centimeters / millisecond|centimeter per millisecond|centimeters per millisecond|cm/ms" }, + { @"Kilometer per millisecond", @"kilometer/millisecond|kilometers/millisecond|kilometer / millisecond|kilometers / millisecond|kilometer per millisecond|kilometers per millisecond|km/ms" } + }; + public static readonly IList AmbiguousSpeedUnitList = new List + { + @"knot", + @"knots", + @"fps" }; public static readonly Dictionary TemperatureSuffixList = new Dictionary { - { @"F", @"degrees fahrenheit|degree fahrenheit|deg fahrenheit|degs fahrenheit|fahrenheit|°f|degrees farenheit|degree farenheit|deg farenheit|degs farenheit|degrees f|degree f|deg f|degs f|farenheit|f" }, + { @"F", @"degrees fahrenheit|degree fahrenheit|deg fahrenheit|degs fahrenheit|fahrenheit|°f|° f|degrees farenheit|degree farenheit|deg farenheit|degs farenheit|degrees f|degree f|deg f|degs f|farenheit|f" }, { @"K", @"k|K|kelvin" }, { @"R", @"rankine|°r" }, { @"D", @"delisle|°de" }, - { @"C", @"degrees celsius|degree celsius|deg celsius|degs celsius|celsius|degrees celcius|degree celcius|celcius|deg celcius|degs celcius|degrees centigrade|degree centigrade|centigrade|degrees centigrate|degree centigrate|degs centigrate|deg centigrate|centigrate|degrees c|degree c|deg c|degs c|°c|c" }, + { @"C", @"degrees celsius|degree celsius|deg celsius|degs celsius|celsius|degrees celcius|degree celcius|celcius|deg celcius|degs celcius|degrees centigrade|degree centigrade|centigrade|degrees centigrate|degree centigrate|degs centigrate|deg centigrate|centigrate|degrees c|degree c|deg c|degs c|°c|° c|c" }, { @"Degree", @"degree|degrees|deg.|deg|°" } }; public static readonly IList AmbiguousTemperatureUnitList = new List @@ -757,16 +809,26 @@ public static class NumbersWithUnitDefinitions { @"Liter", @"l|litre|liter|liters|litres" }, { @"Deciliter", @"dl|deciliter|decilitre|deciliters|decilitres" }, { @"Centiliter", @"cl|centiliter|centilitre|centiliters|centilitres" }, - { @"Milliliter", @"ml|mls|millilitre|milliliter|millilitres|milliliters" }, + { @"Milliliter", @"ml|mls|millilitre|milliliter|millilitres|milliliters|cc" }, { @"Cubic yard", @"cubic yard|cubic yards" }, { @"Cubic inch", @"cubic inch|cubic inches" }, { @"Cubic foot", @"cubic foot|cubic feet" }, { @"Cubic mile", @"cubic mile|cubic miles" }, { @"Fluid ounce", @"fl oz|fluid ounce|fluid ounces" }, - { @"Teaspoon", @"teaspoon|teaspoons" }, - { @"Tablespoon", @"tablespoon|tablespoons" }, - { @"Pint", @"pint|pints" }, - { @"Volume unit", @"fluid dram|gill|quart|minim|cord|peck|bushel|hogshead|barrels|barrel|bbl" } + { @"Teaspoon", @"teaspoon|teaspoons|teaspoonful|teaspoonfuls|tsp|tsp.|tspn|tspn.|tea spoon|tea spoons|t.|ts." }, + { @"Tablespoon", @"tablespoon|tablespoons|tablespoonful|tablespoonfuls|tbl|tbl.|tbs|tbs.|tbsp|tbsp.|table spoon|table spoons|T.|Tb.|tbls.|tbls" }, + { @"Pint", @"pint|pints|fl pt| fluid pint" }, + { @"Quart", @"quart|quarts|fl qt" }, + { @"Cup", @"cup|cups" }, + { @"Gill", @"gill|gills" }, + { @"Pinch", @"pinch|pinches" }, + { @"Fluid Dram", @"fluid dram|fluid drachm|fluid drachma|fluidram|fluidrams" }, + { @"Barrel", @"barrel|bbl|barrels" }, + { @"Minim", @"minim" }, + { @"Cord", @"cord|cords" }, + { @"Peck", @"peck|pecks" }, + { @"Bushel", @"bushel" }, + { @"Hogshead", @"hogshead" } }; public static readonly IList AmbiguousVolumeUnitList = new List { @@ -774,33 +836,96 @@ public static class NumbersWithUnitDefinitions @"ounce", @"oz", @"cup", + @"cups", @"peck", + @"pecks", @"cord", - @"gill" + @"cords", + @"gill", + @"gills", + @"barrel", + @"barrels", + @"tbl", + @"quart", + @"quarts", + @"pinch", + @"t.", + @"T.", + @"Tb.", + @"ts." }; public static readonly Dictionary WeightSuffixList = new Dictionary { - { @"Kilogram", @"kg|kilogram|kilograms|kilo|kilos" }, - { @"Gram", @"g|gram|grams" }, - { @"Milligram", @"mg|milligram|milligrams" }, - { @"Gallon", @"-gallon|gallons|gallon" }, + { @"Kilogram", @"kg|kilogram|kilograms|kilo|kilos|kilogramme|kilogrammes" }, + { @"Gram", @"g|gram|grams|gm|gramme|grammes" }, + { @"Milligram", @"mg|milligram|milligrams|milligramme|milligrammes" }, + { @"Microgram", @"μg|microgram|micrograms|micro gram|micro grams|microgramme|microgrammes|mcg" }, + { @"Gallon", @"-gallon|gallons|gallon|gal" }, { @"Metric ton", @"metric tons|metric ton" }, - { @"Ton", @"-ton|ton|tons|tonne|tonnes" }, + { @"Ton", @"-ton|ton|tons|tonne|tonnes|t" }, { @"Pound", @"pound|pounds|lb|lbs" }, { @"Ounce", @"-ounce|ounce|oz|ounces" }, - { @"Weight unit", @"pennyweight|grain|british long ton|us short hundredweight|stone|dram" } + { @"Grain", @"grain|grains|gr" }, + { @"Pennyweight", @"pennyweight" }, + { @"Long ton (British)", @"british long ton|long ton (british)" }, + { @"Short ton (US)", @"us short ton|short ton (us)" }, + { @"Short hundredweight (US)", @"us short hundredweight|short hundredweight (us)" }, + { @"Stone", @"stone" }, + { @"Dram", @"dram|drachm|drachma|roman drachma|greek drachma" } }; public static readonly IList AmbiguousWeightUnitList = new List { @"g", + @"t", + @"gr", @"oz", @"stone", @"dram", - @"lbs" + @"lbs", + @"gal", + @"grain", + @"grains" + }; + public static readonly Dictionary AngleSuffixList = new Dictionary + { + { @"Degree", @"degree|degrees|deg.|deg|°" }, + { @"Radian", @"radian|radians|rad" }, + { @"Turn", @"turn|turns" } + }; + public static readonly IList AmbiguousAngleUnitList = new List + { + @"turn", + @"turns" }; public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { - { @"\bm\b", @"((('|’)\s*m)|(m\s*('|’)))" } + { @"\bm\b", @"((('|’)\s*m)|(m\s*('|’)))" }, + { @"^\d{5} [cf]$", @"\b([a-z]{2} \d{5} [cf])\b" }, + { @"\b\d+\s*\p{L}+$", @"((\d+(\s*\p{L}+[-—–-]|\p{L}+)\d+)|(((\p{L}|\d)[-—–-]\d+\s*|\p{L}\d+)\p{L}+))" }, + { @"^(all|bob|pen|cad|cup|cop|sos|ron|mad|mop|zar|gel)", @"(all|bob|pen|cad|cup|cop|sos|ron|mad|mop|zar|gel)\s*(\d|\p{L})" }, + { @"\d\s*pm\b", @"\b(at|until|since|before|after)\s\d(\d)?\s*pm\b" }, + { @"\bin\b", @"\bin\s*(\d{4}|(jan|febr)uary|march|april|may|ju(ne|ly)|august|october|(sept|nov|dec)ember)" }, + { @"\b(13)?f\b", @"(\bf-series\b|\b13f\s(filings?|be fill?ed))" }, + { @"\d\d\df", @"boeing\s777f" }, + { @"\bc\b", @"\bc\.p\.i(\.)?" }, + { @"\d\s*c\b", @"(dividend (of|by) (\d\.)?\d(\d)?\s*c|(\d\.)?\d(\d)?\s*c/? (a share|per security))" } + }; + public static readonly Dictionary TemperatureAmbiguityFiltersDict = new Dictionary + { + { @"\b(deg(rees?)?|°)$", @"\b((deg(rees?)?|°)\s*(angle|rotation)|(rotat(ion|e[ds]?|ing)|angle)(\s+(\p{L}+|\d+)){0,4}\s*(deg(rees?)?\b|°))" } + }; + public static readonly Dictionary DimensionAmbiguityFiltersDict = new Dictionary + { + { @"\b(deg(rees?)?|°)$", @"\b((deg(rees?)?|°)\s*(c(elsius|entigrate)?|f(ah?renheit)?)|(temperature)(\s+(\p{L}+|\d+)){0,4}\s*(deg(rees?)?\b|°))" }, + { @"\b\d+\s*\p{L}+$", @"((\d+\s*\p{L}+\d+)|(\p{L}\d+\s*\p{L}+))" } + }; + public static readonly Dictionary LengthSubUnitFractionalRatios = new Dictionary + { + { @"Inch", 12 } + }; + public static readonly Dictionary LengthUnitToSubUnitMap = new Dictionary + { + { @"Foot", @"Inch" } }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/PhoneNumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/English/PhoneNumbersDefinitions.cs new file mode 100644 index 0000000000..5f4556cfad --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/PhoneNumbersDefinitions.cs @@ -0,0 +1,27 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\English\English-PhoneNumbers.yaml +// - Language: English +// - ClassName: PhoneNumbersDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.English +{ + using System; + using System.Collections.Generic; + + public static class PhoneNumbersDefinitions + { + public const string NumberReplaceToken = @"@builtin.phonenumber"; + public const string FalsePositivePrefixRegex = @"(account|card)(\s+(#|number))?(\s+is)?:?\s*$"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/PhoneNumbersDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/English/PhoneNumbersDefinitions.tt new file mode 100644 index 0000000000..489a4310a8 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/PhoneNumbersDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\English\English-PhoneNumbers.yaml"; + this.Language = "English"; + this.ClassName = "PhoneNumbersDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/QuotedTextDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/English/QuotedTextDefinitions.cs new file mode 100644 index 0000000000..5e59371563 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/QuotedTextDefinitions.cs @@ -0,0 +1,35 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\English\English-QuotedText.yaml +// - Language: English +// - ClassName: QuotedTextDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.English +{ + using System; + using System.Collections.Generic; + + public static class QuotedTextDefinitions + { + public const string LangMarker = @"Eng"; + public const string QuotedTextRegex1 = @"(“([^“”]+)”)"; + public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)"; + public const string QuotedTextRegex3 = @"(""([^""]+)"")"; + public const string QuotedTextRegex4 = @"(\\'([^\']+)\\')"; + public const string QuotedTextRegex5 = @"(`([^`]+)`)"; + public const string QuotedTextRegex6 = @"(123456)"; + public const string QuotedTextRegex7 = @"(123456)"; + public const string QuotedTextRegex8 = @"(123456)"; + public const string QuotedTextRegex9 = @"(123456)"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/QuotedTextDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/English/QuotedTextDefinitions.tt new file mode 100644 index 0000000000..d63cabc742 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/QuotedTextDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\English\English-QuotedText.yaml"; + this.Language = "English"; + this.ClassName = "QuotedTextDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/TimeZoneDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/English/TimeZoneDefinitions.cs index ec79ebd36c..654a95b028 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/English/TimeZoneDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/TimeZoneDefinitions.cs @@ -21,7 +21,7 @@ namespace Microsoft.Recognizers.Definitions.English public static class TimeZoneDefinitions { - public const string DirectUtcRegex = @"\b(utc|gmt)(\s*[+\-\u00B1]?\s*[\d]{1,2}(\s*:\s*[\d]{1,2})?)?\b"; + public const string DirectUtcRegex = @"\b(utc|gmt)(\s*[+\-\u00B1]?\s*[\d]{1,2}h?(\s*:\s*[\d]{1,2})?)?\b"; public static readonly IList AbbreviationsList = new List { @"ABST", @@ -37,6 +37,8 @@ public static class TimeZoneDefinitions @"AKST", @"AMST", @"AMT", + @"AOE", + @"AoE", @"ARBST", @"ARST", @"ART", @@ -180,6 +182,7 @@ public static class TimeZoneDefinitions @"Acre Time", @"Afghanistan Standard Time", @"Alaskan Standard Time", + @"Anywhere on Earth", @"Arab Standard Time", @"Arabian Standard Time", @"Arabic Standard Time", @@ -330,7 +333,9 @@ public static class TimeZoneDefinitions @"Pacific", @"Eastern" }; - public const string LocationTimeSuffixRegex = @"((\s+|-)(timezone|time)\b)"; + public const string BaseTimeZoneSuffixRegex = @"((\s+|-)(friendly|compatible))?(\s+|-)time(zone)?"; + public static readonly string LocationTimeSuffixRegex = $@"({BaseTimeZoneSuffixRegex})\b"; + public static readonly string TimeZoneEndRegex = $@"({BaseTimeZoneSuffixRegex})$"; public static readonly IList AmbiguousTimezoneList = new List { @"bit", @@ -368,6 +373,7 @@ public static class TimeZoneDefinitions { @"akst", -540 }, { @"amst", -10000 }, { @"amt", -10000 }, + { @"aoe", -720 }, { @"arbst", 180 }, { @"arst", 180 }, { @"art", -180 }, @@ -406,7 +412,7 @@ public static class TimeZoneDefinitions { @"esat", -180 }, { @"est", -300 }, { @"estm", -300 }, - { @"et", -240 }, + { @"et", -300 }, { @"fjst", 780 }, { @"fjt", 720 }, { @"get", 240 }, @@ -472,7 +478,7 @@ public static class TimeZoneDefinitions { @"psat", -240 }, { @"pst", -480 }, { @"pstm", -480 }, - { @"pt", -420 }, + { @"pt", -480 }, { @"pyst", -10000 }, { @"pyt", -10000 }, { @"rst", 60 }, @@ -523,10 +529,9 @@ public static class TimeZoneDefinitions { @"redmond", -480 }, { @"seattle", -480 }, { @"bellevue", -480 }, - { @"pacific daylight", -420 }, - { @"pacific", -480 }, { @"afghanistan standard", 270 }, { @"alaskan standard", -540 }, + { @"anywhere on earth", -720 }, { @"arab standard", 180 }, { @"arabian standard", 180 }, { @"arabic standard", 180 }, @@ -551,7 +556,9 @@ public static class TimeZoneDefinitions { @"central america standard", -360 }, { @"central asia standard", 360 }, { @"central brazilian standard", -240 }, + { @"central", -360 }, { @"central daylight", -10000 }, + { @"central daylight saving", -10000 }, { @"central europe", 60 }, { @"central european", 60 }, { @"central europe std", 60 }, @@ -571,9 +578,9 @@ public static class TimeZoneDefinitions { @"e. south america standard", -180 }, { @"europe central", 60 }, { @"european central", 60 }, - { @"central", -300 }, - { @"eastern", -240 }, + { @"eastern", -300 }, { @"eastern daylight", -10000 }, + { @"eastern daylight saving", -10000 }, { @"eastern standard time (mexico)", -300 }, { @"eastern standard", -300 }, { @"egypt standard", 120 }, @@ -601,7 +608,9 @@ public static class TimeZoneDefinitions { @"middle east standard", 120 }, { @"montevideo standard", -180 }, { @"morocco standard", 0 }, - { @"mountain", -360 }, + { @"mountain", -420 }, + { @"mountain daylight", -360 }, + { @"mountain daylight saving", -360 }, { @"mountain standard", -420 }, { @"mountain standard time (mexico)", -420 }, { @"myanmar standard", 390 }, @@ -614,9 +623,12 @@ public static class TimeZoneDefinitions { @"north asia standard", 420 }, { @"north korea standard", 510 }, { @"west coast", -420 }, - { @"pacific sa standard", -240 }, + { @"pacific", -480 }, + { @"pacific daylight", -420 }, + { @"pacific daylight saving", -420 }, { @"pacific standard", -480 }, { @"pacific standard time (mexico)", -480 }, + { @"pacific sa standard", -240 }, { @"pakistan standard", 300 }, { @"paraguay standard", -240 }, { @"romance standard", 60 }, @@ -666,11 +678,14 @@ public static class TimeZoneDefinitions { @"west asia standard", 300 }, { @"west pacific standard", 600 }, { @"yakutsk standard", 540 }, - { @"pacific daylight saving", -420 }, { @"australian western daylight", 540 }, { @"australian west daylight", 540 }, { @"austrialian western daylight", 540 }, { @"austrialian west daylight", 540 }, + { @"australian western daylight saving", 540 }, + { @"australian west daylight saving", 540 }, + { @"austrialian western daylight saving", 540 }, + { @"austrialian west daylight saving", 540 }, { @"colombia", -300 }, { @"hong kong", 480 }, { @"madrid", 60 }, @@ -1399,11 +1414,14 @@ public static class TimeZoneDefinitions @"U.S. Virgin Islands", @"US Virgin Islands", @"Uganda", + @"UK", @"Ukraine", @"United Arab Emirates", @"United Kingdom", @"United States", @"Uruguay", + @"US", + @"USA", @"UT", @"Utah", @"Uzbekistan", diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/French/ChoiceDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/French/ChoiceDefinitions.cs index 8610a515f3..a64c077213 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/French/ChoiceDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/French/ChoiceDefinitions.cs @@ -23,7 +23,8 @@ public static class ChoiceDefinitions { public const string LangMarker = @"Fr"; public const string TokenizerRegex = @"[^\w\d\u00E0-\u00FC]"; - public const string TrueRegex = @"\b(s[uû]r|ouais|oui|yep|y|sure|approuver|accepter|consentir|d'accord|ça march[eé])\b|(\uD83D\uDC4D|\uD83D\uDC4C)"; - public const string FalseRegex = @"\b(faux|nan|non|pas\s+d'accord|pas\s+concorder|n'est\s+pas\s+(correct|ok)|pas)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)"; + public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)"; + public static readonly string TrueRegex = $@"\b(s[uû]r|ouais|oui|yep|y|sure|approuver|accepter|consentir|d'accord|ça march[eé])\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?"; + public static readonly string FalseRegex = $@"\b(faux|nan|non|pas\s+d'accord|pas\s+concorder|n'est\s+pas\s+(correct|ok)|pas)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/French/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/French/DateTimeDefinitions.cs index c69a30bef8..5b0adad63e 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/French/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/French/DateTimeDefinitions.cs @@ -21,113 +21,124 @@ namespace Microsoft.Recognizers.Definitions.French public static class DateTimeDefinitions { + public const string LangMarker = @"Fre"; public const bool CheckBothBeforeAfter = false; - public const string TillRegex = @"(?au|et|(jusqu')?[aà]|avant|--|-|—|——)"; - public const string RangeConnectorRegex = @"(?de la|au|[aà]|et(\s*la)?|--|-|—|——)"; - public const string RelativeRegex = @"(?prochaine?|de|du|ce(tte)?|l[ae]|derni[eè]re|pr[eé]c[eé]dente|au\s+cours+(de|du\s*))"; - public const string StrictRelativeRegex = @"(?prochaine?|derni[eè]re|pr[eé]c[eé]dente|au\s+cours+(de|du\s*))"; - public const string NextSuffixRegex = @"(?prochaines?|prochain|suivante)\b"; - public const string PastSuffixRegex = @"(?derni[eè]re?|pr[eé]c[eé]dente)\b"; + public const string TillRegex = @"(?\b(au|et|(jusqu')?a|avant)\b|(jusqu')?à|--|-|—|——)"; + public const string RangeConnectorRegex = @"(?\b(de\s+la|au|(jusqu')?a|et(\s*la)?)\b|(jusqu')?à|--|-|—|——)"; + public const string RelativeRegex = @"(?prochaine?|de|du|ce(tte)?|l[ae]|derni[eè]re?s?|hier|pr[eé]c[eé]dente|au\s+cours+(de|du\s*))"; + public const string StrictRelativeRegex = @"(?prochaine?|derni[eè]re?s?|hier|pr[eé]c[eé]dente|au\s+cours+(de|du\s*))"; + public const string NextSuffixRegex = @"(?prochain(es?)?|suivante)\b"; + public const string PastSuffixRegex = @"(?derni[eè]re?s?|pr[eé]c[eé]dente)\b"; public const string ThisPrefixRegex = @"(?ce(tte)?|au\s+cours+(du|de))\b"; public const string RangePrefixRegex = @"(du|depuis|des?|entre)"; - public const string DayRegex = @"(?01|02|03|04|05|06|07|08|09|10|11e?|12e?|13e?|14e?|15e?|16e?|17e?|18e?|19e?|1er|1|21e?|20e?|22e?|23e?|24e?|25e?|26e?|27e?|28e?|29e?|2e?|30e?|31e?|3e?|4e?|5e?|6e?|7e?|8e?|9e?)(?=\b|t)"; - public const string MonthNumRegex = @"(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)\b"; + public const string DayRegex = @"(?(?:3[0-1]|[1-2]\d|0?[1-9])(e(r)?)?)(?=\b|t)"; + public const string WrittenDayRegex = @"(?((vingt|trente)(\s*-\s*|\s+)et(\s*-\s*|\s+))?un|(vingt(\s*-\s*|\s+))?(deux|trois|quatre|cinq|six|sept|huit|neuf)|dix|onze|douze|treize|quatorze|quinze|seize|dix-(sept|huit|neuf)|vingt|trente)"; + public const string MonthNumRegex = @"(?1[0-2]|(0)?[1-9])\b"; public const string SpecialDescRegex = @"(p\b)"; - public static readonly string AmDescRegex = $@"(h|{BaseDateTime.BaseAmDescRegex})"; - public static readonly string PmDescRegex = $@"(h|{BaseDateTime.BasePmDescRegex})"; - public static readonly string AmPmDescRegex = $@"(h|{BaseDateTime.BaseAmPmDescRegex})"; + public static readonly string AmDescRegex = $@"(h\b|{BaseDateTime.BaseAmDescRegex})"; + public static readonly string PmDescRegex = $@"(h\b|{BaseDateTime.BasePmDescRegex})"; + public static readonly string AmPmDescRegex = $@"(h\b|{BaseDateTime.BaseAmPmDescRegex})"; public static readonly string DescRegex = $@"(?{AmPmDescRegex}|{AmDescRegex}|{PmDescRegex}|{SpecialDescRegex})"; - public static readonly string TwoDigitYearRegex = $@"\b(?([0-27-9]\d))(?!(\s*((\:)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b"; - public const string FullTextYearRegex = @"^[\*]"; + public static readonly string TwoDigitYearRegex = $@"\b(?([0-9]\d))(?!(\s*((\:\d)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b"; + public const string WrittenOneToNineRegex = @"(?:une?|deux|trois|quatre|cinq|six|sept|huit|neuf)"; + public const string WrittenElevenToNineteenRegex = @"(?:(seize|quinze|quatorze|treize|douze|onze)|dix\W(neuf|huit|sept))"; + public const string WrittenTensRegex = @"(?:quatre\Wvingt(s|\Wdix)?|soixante(\Wdix)?|dix|vingt|trente|quarante|cinquante|septante|octante|huitante|nonante)"; + public static readonly string WrittenCenturyFullYearRegex = $@"(?:(deux\s+)?mille((\s+{WrittenOneToNineRegex})?\s+cents?)?)"; + public static readonly string WrittenCenturyOrdinalYearRegex = $@"({WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|dix)"; + public static readonly string CenturyRegex = $@"\b(?{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}(\s+cents?)?)\b"; + public static readonly string LastTwoYearNumRegex = $@"(({WrittenTensRegex}(\s+|-))?({WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex})|{WrittenTensRegex})"; + public static readonly string FullTextYearRegex = $@"\b(?(?{CenturyRegex})\s+(?{LastTwoYearNumRegex})\b|\b(?{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}\s+cents))\b"; public static readonly string YearRegex = $@"({BaseDateTime.FourDigitYearRegex}|{FullTextYearRegex})"; - public const string WeekDayRegex = @"(?dimanche|lundi|mardi|mercredi|jeudi|vendredi|samedi|lun|mar|mer|jeu|ven|sam|dim)\b"; + public const string WeekDayRegex = @"(?dimanche|lundi|mardi|mercredi|jeudi|vendredi|samedi|lun(\.)?|mar(\.)?|mer(\.)?|jeu(\.)?|ven(\.)?|sam(\.)?|dim(\.)?)"; public static readonly string RelativeMonthRegex = $@"(?({ThisPrefixRegex}\s+mois)|(mois\s+{PastSuffixRegex})|(mois\s+{NextSuffixRegex}))\b"; - public const string WrittenMonthRegex = @"(?avril|avr\.|avr|ao[uû]t|d[eé]cembre|d[eé]c\.|d[eé]c|f[eé]vrier|f[eé]v|f[eé]vr\.|f[eé]vr|janvier|janv\.|janv|jan|juillet|jul|juil\.|juil|juin|jun|mars?|mai|novembre|nov\.|nov|octobre|oct\.|oct|septembre|sept\.|sept|sep)"; + public const string WrittenMonthRegex = @"(?avril|avr(\.)?|ao[uû]t|d[eé]cembre|d[eé]c(\.)?|f[eé]vrier|f[eé]vr?(\.)?|janvier|janv?(\.)?|juillet|jui?[ln](\.)?|mars?(\.)?|mai|novembre|nov(\.)?|octobre|oct(\.)?|septembre|sept?(\.)?(?!\s+heures))"; public static readonly string MonthSuffixRegex = $@"(?(en\s*|le\s*|de\s*|dans\s*)?({RelativeMonthRegex}|{WrittenMonthRegex}))"; - public const string DateUnitRegex = @"(?(l')?ann[eé]es?|an|mois|semaines?|journ[eé]es?|jours?)\b"; + public const string DateUnitRegex = @"(?an(?s)?|(?mois)|((l')?ann[eé]e|semaine|journ[eé]e|jour)(?s)?)\b"; public static readonly string SimpleCasesRegex = $@"\b((d[ue])|entre\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b"; public static readonly string MonthFrontSimpleCasesRegex = $@"\b((d[ue]|entre)\s+)?{MonthSuffixRegex}\s+((d[ue]|entre)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+(entre|d[ue]\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; public static readonly string BetweenRegex = $@"\b(entre\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b"; public const string YearWordRegex = @"\b(?l'ann[ée]e)\b"; - public static readonly string MonthWithYear = $@"\b((?avril|avr\.|avr|ao[uû]t|d[eé]cembre|d[eé]c\.|d[eé]c|f[eé]vrier|f[eé]v|f[eé]vr\.|f[eé]vr|janvier|janv\.|janv|jan|juillet|jul|juil\.|juil|juin|jun|mars?|mai|novembre|nov\.|nov|octobre|oct\.|oct|septembre|sept\.|sept|sep)(\s*),?(\s+de)?(\s*)({YearRegex}|(?cette)\s*{YearWordRegex})|{YearWordRegex}\s*({PastSuffixRegex}|{NextSuffixRegex}))"; - public static readonly string OneWordPeriodRegex = $@"\b(({RelativeRegex}\s+)?(?avril|avr\.|avr|ao[uû]t|d[eé]cembre|d[eé]c\.|d[eé]c|f[eé]vrier|f[eé]v|f[eé]vr\.|f[eé]vr|janvier|janv\.|janv|jan|juillet|jul|juil\.|juil|juin|jun|mars?|mai|novembre|nov\.|nov|octobre|oct\.|oct|septembre|sept\.|sept|sep)|(la\s+)?(weekend|(fin de )?semaine|week-end|mois|ans?|l'année)\s+{StrictRelativeRegex}|{RelativeRegex}\s+(weekend|(fin de )?semaine|week-end|mois|ans?|l'année)|weekend|week-end|(mois|l'année))\b"; + public static readonly string MonthWithYear = $@"\b({WrittenMonthRegex}(\s*),?(\s+de)?(\s*)({YearRegex}|{TwoDigitYearRegex}|(?cette)\s*{YearWordRegex})|{YearWordRegex}\s*({PastSuffixRegex}|{NextSuffixRegex}))"; + public static readonly string OneWordPeriodRegex = $@"\b(({RelativeRegex}\s+)?{WrittenMonthRegex}|(la\s+)?(weekend|(fin de )?semaine|week-end|mois|ans?|l'année)\s+{StrictRelativeRegex}|{RelativeRegex}\s+(weekend|(fin de )?semaine|week-end|mois|ans?|l'année)|weekend|week-end|mois|l'année|an)\b"; public static readonly string MonthNumWithYear = $@"({YearRegex}(\s*)[/\-\.](\s*){MonthNumRegex})|({MonthNumRegex}(\s*)[/\-](\s*){YearRegex})"; - public static readonly string WeekOfMonthRegex = $@"(?(le\s+)?(?premier|1er|duexi[èe]me|2|troisi[èe]me|3|quatri[èe]me|4|cinqi[èe]me|5)\s+semaine\s+{MonthSuffixRegex})"; + public static readonly string WeekOfMonthRegex = $@"(?(le\s+)?(?premier|1er|duexi[èe]me|2|troisi[èe]me|3|quatri[èe]me|4|cinqi[èe]me|5)\s+semaine(\s+de)?\s+{MonthSuffixRegex})"; public static readonly string WeekOfYearRegex = $@"(?(le\s+)?(?premier|1er|duexi[èe]me|2|troisi[èe]me|3|quatri[èe]me|4|cinqi[èe]me|5)\s+semaine(\s+de)?\s+({YearRegex}|{RelativeRegex}\s+ann[ée]e))"; + public static readonly string OfYearRegex = $@"\b((of|in)\s+({YearRegex}|{StrictRelativeRegex}\s+year))\b"; + public const string FirstLastRegex = @"\b(the\s+)?((?first)|(?last))\b"; public static readonly string FollowedDateUnit = $@"^\s*{DateUnitRegex}"; public static readonly string NumberCombinedWithDateUnit = $@"\b(?\d+(\.\d*)?){DateUnitRegex}"; public static readonly string QuarterRegex = $@"(le\s+)?(?premier|1er|duexi[èe]me|2|troisi[èe]me|3|quatri[èe]me|4)\s+quart(\s+de|\s*,\s*)?\s+({YearRegex}|{RelativeRegex}\s+l'ann[eé]e)"; public static readonly string QuarterRegexYearFront = $@"({YearRegex}|l'année\s+({PastSuffixRegex}|{NextSuffixRegex})|{RelativeRegex}\s+ann[eé]e)\s+(le\s+)?(?premier|1er|duexi[èe]me|2|troisi[èe]me|3|quatri[èe]me|4)\s+quarts"; - public const string AllHalfYearRegex = @"^[.]"; - public const string PrefixDayRegex = @"^[.]"; - public const string CenturySuffixRegex = @"^[.]"; + public const string AllHalfYearRegex = @"^\b$"; + public const string PrefixDayRegex = @"\b((?t[ôo]t\sdans)|(?au\smilieu\sde)|(?tard\sdans))(\s+la\s+journ[ée]e)?$"; + public const string CenturySuffixRegex = @"^\b$"; public static readonly string SeasonRegex = $@"\b((printemps|été|automne|hiver)+\s*({NextSuffixRegex}|{PastSuffixRegex}))|(?({RelativeRegex}\s+)?(?printemps|[ée]t[ée]|automne|hiver)((\s+de|\s*,\s*)?\s+({YearRegex}|{RelativeRegex}\s+l'ann[eé]e))?)\b"; public const string WhichWeekRegex = @"\b(semaine)(\s*)(?5[0-3]|[1-4]\d|0?[1-9])\b"; public const string WeekOfRegex = @"(semaine)(\s*)(de)"; public const string MonthOfRegex = @"(mois)(\s*)(de)"; - public const string MonthRegex = @"(?avril|avr\.|avr|ao[uû]t|d[eé]cembre|d[eé]c\.|d[eé]c|f[eé]vrier|f[eé]vr?\.|f[eé]vr?|janvier|janv\.|janv?|juillet|jul|juil\.|juil|juin|jun|mars?|mai|novembre|nov\.|nov|octobre|oct\.|oct|septembre|sept\.|sept?)"; + public const string MonthRegex = @"(?avril|avr(\.)?|ao[uû]t|d[eé]cembre|d[eé]c(\.)?|f[eé]vrier|f[eé]vr?(\.)?|janvier|janv?(\.)?|juillet|jui?[ln](\.)?|mars?(\.)?|mai|novembre|nov(\.)?|octobre|oct(\.)?|septembre|sept?(\.)?(?!\s+heures))"; public static readonly string OnRegex = $@"(?<=\b(en|sur\s*l[ea]|sur)\s+)({DayRegex}s?)\b"; public const string RelaxedOnRegex = @"(?<=\b(en|le|dans|sur\s*l[ea]|du|sur)\s+)((?10e|11e|12e|13e|14e|15e|16e|17e|18e|19e|1er|20e|21e|22e|23e|24e|25e|26e|27e|28e|29e|2e|30e|31e|3e|4e|5e|6e|7e|8e|9e)s?)\b"; public static readonly string ThisRegex = $@"\b((cette(\s*semaine)?\s+){WeekDayRegex})|({WeekDayRegex}(\s+cette\s*semaine))\b"; public static readonly string LastDateRegex = $@"\b(({WeekDayRegex}(\s*(de)?\s*la\s*semaine\s+{PastSuffixRegex}))|({WeekDayRegex}(\s+{PastSuffixRegex})))\b"; public static readonly string NextDateRegex = $@"\b(({WeekDayRegex}(\s+{NextSuffixRegex}))|({WeekDayRegex}(\s*(de)?\s*la\s*semaine\s+{NextSuffixRegex})))\b"; - public const string SpecialDayRegex = @"\b(avant[\s|-]hier|apr[eè]s(-demain|\s*demain)|(le\s)?jour suivant|(le\s+)?dernier jour|hier|lendemain|demain|de la journ[ée]e|aujourd'hui)\b"; - public const string SpecialDayWithNumRegex = @"^[.]"; + public const string SpecialDayRegex = @"\b(avant[\s|-]hier|apr[eè]s(-demain|\s*demain)|(le\s)?jour suivant|(le\s+)?derni[eè]re?s? jour|hier|lendemain|demain|(de\s)?la journ[ée]e|aujourd'hui)\b"; + public const string SpecialDayWithNumRegex = @"^\b$"; public const string StrictWeekDay = @"\b(?dim(anche)?|lun(di)?|mar(di)?|mer(credi)?|jeu(di)?|ven(dredi)?|sam(edi)?)s?\b"; - public const string SetWeekDayRegex = @"\b(?le\s+)?(?matin([ée]e)?|apres-midi|soir([ée]e)?|dimanche|lundi|mardi|mercredi|jeudi|vendredi|samedi)s\b"; + public const string SetWeekDayRegex = @"\b(?le\s+)?(?matin([ée]e)?|apr[eè]s-midi|soir([ée]e)?|dimanche|lundi|mardi|mercredi|jeudi|vendredi|samedi)s\b"; public static readonly string WeekDayOfMonthRegex = $@"(?(le\s+)?(?premier|1er|duexi[èe]me|2|troisi[èe]me|3|quatri[èe]me|4|cinqi[èe]me|5)\s+{WeekDayRegex}\s+{MonthSuffixRegex})"; - public const string RelativeWeekDayRegex = @"^[.]"; - public const string AmbiguousRangeModifierPrefix = @"^[.]"; - public const string NumberEndingPattern = @"^[.]"; - public static readonly string SpecialDate = $@"(?<=\b([àa]|au|le)\s+){DayRegex}(?!:)\b"; + public const string RelativeWeekDayRegex = @"^\b$"; + public const string AmbiguousRangeModifierPrefix = @"^\b$"; + public const string NumberEndingPattern = @"^\b$"; + public static readonly string SpecialDate = $@"(?<=\b(au|le)\s+){DayRegex}(?!:)\b"; public static readonly string DateYearRegex = $@"(?{YearRegex}|{TwoDigitYearRegex})"; - public static readonly string DateExtractor1 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{MonthRegex}\s*[/\\\.\-]?\s*{DayRegex}\b"; - public static readonly string DateExtractor2 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}(\s+|\s*,\s*|\s+){MonthRegex}\s*[\.\-]?\s*{DateYearRegex}\b"; - public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}(\s+|\s*,\s*|\s*-\s*){MonthRegex}((\s+|\s*,\s*){DateYearRegex})?\b"; - public static readonly string DateExtractor4 = $@"\b{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}\s*[/\\\-]\s*{DateYearRegex}"; - public static readonly string DateExtractor5 = $@"\b{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}"; - public static readonly string DateExtractor6 = $@"(?<=\b(le|sur|sur l[ae])\s+){MonthNumRegex}[\-\.\/]{DayRegex}\b"; - public static readonly string DateExtractor7 = $@"\b{DayRegex}\s*/\s*{MonthNumRegex}((\s+|\s*,\s*){DateYearRegex})?\b"; - public static readonly string DateExtractor8 = $@"(?<=\b(le)\s+){DayRegex}[\\\-]{MonthNumRegex}\b"; - public static readonly string DateExtractor9 = $@"\b{DayRegex}\s*/\s*{MonthNumRegex}((\s+|\s*,\s*){DateYearRegex})?\b"; - public static readonly string DateExtractorA = $@"\b{DateYearRegex}\s*[/\\\-\.]\s*{MonthNumRegex}\s*[/\\\-\.]\s*{DayRegex}"; - public static readonly string OfMonth = $@"^\s*de\s*{MonthRegex}"; + public static readonly string DateExtractor1 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{MonthRegex}\s*[/\\\.\-]?\s*{DayRegex}(\s*([/\\\.\-]|\bde\b)?\s*{BaseDateTime.FourDigitYearRegex})?\b"; + public static readonly string DateExtractor2 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}(\s+|\s*,\s*|\s+){MonthRegex}\s*([\.\-]|\bde\b)?\s*{DateYearRegex}\b"; + public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?((?(l')?ann[eé]e(s)?|mois|semaines?)\b"; - public const string HourNumRegex = @"\b(?zero|[aá]\s+une?|deux|trois|quatre|cinq|six|sept|huit|neuf|onze|douze|treize|quatorze|quinze|dix-six|dix-sept|dix-huit|dix-neuf|vingt|vingt-et-un|vingt-deux|vingt-trois|dix)\b"; - public const string MinuteNumRegex = @"(?un|deux|trois|quatre|cinq|six|sept|huit|neuf|onze|douze|treize|quatorze|quinze|seize|dix-sept|dix-huit|dix-neuf|vingt|trente|quarante|cinquante|dix)"; - public const string DeltaMinuteNumRegex = @"(?un|deux|trois|quatre|cinq|six|sept|huit|neuf|onze|douze|treize|quatorze|quinze|seize|dix-sept|dix-huit|dix-neuf|vingt|trente|quarante|cinquante|dix)"; + public const string HourNumRegex = @"\b(?zero|une?(?=\s+heure)|deux|trois|quatre|cinq|six|sept|huit|neuf|onze|douze|treize|quatorze|quinze|dix-six|seize|dix(-|\s+)sept|dix(-|\s+)huit|dix(-|\s+)neuf|vingt|vingt(-|\s+)et(-|\s+)un|vingt(-|\s+)deux|vingt(-|\s+)trois|dix)\b"; + public const string MinuteNumRegex = @"(?((vingt|trente|quarante|cinquante)(\s*(et|-)?\s*))?(un|deux|trois|quatre|cinq|six|sept|huit|neuf)|onze|douze|treize|quatorze|quinze|seize|dix-sept|dix-huit|dix-neuf|vingt|trente|quarante|cinquante|dix)"; + public const string DeltaMinuteNumRegex = @"(?((vingt|trente|quarante|cinquante)(\s*(et|-)?\s*))?(un|deux|trois|quatre|cinq|six|sept|huit|neuf)|onze|douze|treize|quatorze|quinze|seize|dix-sept|dix-huit|dix-neuf|vingt|trente|quarante|cinquante|dix)"; public const string OclockRegex = @"(?heures?|h)"; public const string PmRegex = @"(?(dans l'\s*)?apr[eè]s(\s*|-)midi|(du|ce|de|le)\s*(soir([ée]e)?)|(dans l[ea]\s+)?(nuit|soir[eé]e))"; public const string AmRegex = @"(?(du|de|ce|(du|de|dans)\s*l[ea]|le)?\s*matin[ée]e|(du|de|ce|dans l[ea]|le)?\s*matin)"; - public static readonly string LessThanOneHour = $@"(?(une\s+)?quart|trois quart(s)?|demie( heure)?|{BaseDateTime.DeltaMinuteRegex}(\s+(minutes?|mins?))|{DeltaMinuteNumRegex}(\s+(minutes?|mins?)))"; - public static readonly string WrittenTimeRegex = $@"(?{HourNumRegex}\s+({MinuteNumRegex}|(?vingt|trente|quarante|cinquante)\s+{MinuteNumRegex}))"; - public static readonly string TimePrefix = $@"(?(heures\s*et\s+{LessThanOneHour}|et {LessThanOneHour}|{LessThanOneHour} [àa]))"; + public static readonly string LessThanOneHour = $@"(?(une\s+)?quart|trois quart(s)?|demie( heure)?|({BaseDateTime.DeltaMinuteRegex}|{DeltaMinuteNumRegex})(\s+(minutes?|mins?))|(?<=heures?\s+((et|moins)\s+)?)({BaseDateTime.DeltaMinuteRegex}|{DeltaMinuteNumRegex}))"; + public static readonly string WrittenTimeRegex = $@"(?{HourNumRegex}\s+(heures\s+)?(et\s+)?{MinuteNumRegex}(?!\s+heures)(\s+(minutes?|mins?))?)"; + public static readonly string TimePrefix = $@"(?(heures?\s+((et|moins)\s+)?{LessThanOneHour}|(et|moins)\s+{LessThanOneHour}|{LessThanOneHour}\s+[àa]))"; public static readonly string TimeSuffix = $@"(?{AmRegex}|{PmRegex}|{OclockRegex})"; - public static readonly string BasicTime = $@"(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex})"; + public static readonly string BasicTime = $@"(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(:|\s*h\s*){BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex})"; public const string MidnightRegex = @"(?minuit)"; public const string CommonDatePrefixRegex = @"^[\.]"; public const string MorningRegex = @"(?matin([ée]e)?)"; public const string AfternoonRegex = @"(?(d'|l')?apr[eè]s(-|\s*)midi)"; public static readonly string MidmorningRegex = $@"(?milieu\s*d[ue]\s*{MorningRegex})"; - public const string MiddayRegex = @"(?milieu(\s*|-)d[eu]\s*(jour|midi)|apr[eè]s(-|\s*)midi)"; + public const string MiddayRegex = @"(?milieu(\s*|-)d[eu]\s*(jour|midi)|apr[eè]s(-|\s*)midi|(?<=à\s+)midi)"; public static readonly string MidafternoonRegex = $@"(?milieu\s*d'+{AfternoonRegex})"; public static readonly string MidTimeRegex = $@"(?({MidnightRegex}|{MidmorningRegex}|{MidafternoonRegex}|{MiddayRegex}))"; - public static readonly string AtRegex = $@"\b(((?<=\b[àa]\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}|{MidTimeRegex}))|{MidTimeRegex})\b"; + public static readonly string AtRegex = $@"\b(((?<=\b[àa]\s+)({WrittenTimeRegex}|{HourNumRegex}(\s+heures)?|{BaseDateTime.HourRegex}|{MidTimeRegex}))|{MidTimeRegex})\b"; public static readonly string IshRegex = $@"\b(peu\s*pr[èe]s\s*{BaseDateTime.HourRegex}|peu\s*pr[èe]s\s*{WrittenTimeRegex}|peu\s*pr[èe]s\s*[àa]\s*{BaseDateTime.HourRegex}|peu pr[èe]s midi)\b"; - public const string TimeUnitRegex = @"(?heures?|hrs?|h|minutes?|mins?|secondes?|secs?)\b"; + public const string TimeUnitRegex = @"(?h|(heure|hr|minute|min|seconde|sec)(?s)?)\b"; public const string RestrictedTimeUnitRegex = @"(?huere|minute)\b"; - public static readonly string ConnectNumRegex = $@"{BaseDateTime.HourRegex}(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)\s*{DescRegex}"; + public static readonly string ConnectNumRegex = $@"{BaseDateTime.HourRegex}(?[0-5][0-9])\s*{DescRegex}"; public const string FivesRegex = @"(?(quinze|vingt(\s*|-*(cinq))?|trente(\s*|-*(cinq))?|quarante(\s*|-*(cinq))??|cinquante(\s*|-*(cinq))?|dix|cinq))\b"; public const string PeriodHourNumRegex = @"(?vingt-et-un|vingt-deux|vingt-trois|vingt-quatre|zero|une|deux|trois|quatre|cinq|six|sept|huit|neuf|dix|onze|douze|treize|quatorze|quinze|seize|dix-sept|dix-huit|dix-neuf|vingt)"; - public static readonly string TimeRegex1 = $@"\b({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})\s*{DescRegex}(\s+{TimePrefix})?"; + public static readonly string TimeRegex1 = $@"\b({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})\s*{DescRegex}(\s+{TimePrefix})?\b"; public static readonly string TimeRegex2 = $@"(\b{TimePrefix}\s+)?(t)?{BaseDateTime.HourRegex}(\s*)?:(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?((\s*{DescRegex})|\b)"; public static readonly string TimeRegex3 = $@"\b{BaseDateTime.HourRegex}\.{BaseDateTime.MinuteRegex}(\s*{DescRegex})(\s+{TimePrefix})?"; public static readonly string TimeRegex4 = $@"\b{BasicTime}(\s*{DescRegex})?(\s+{TimePrefix})?\s+{TimeSuffix}\b"; - public static readonly string TimeRegex5 = $@"\b{BasicTime}((\s*{DescRegex})|\b)(\s+{TimePrefix})?"; + public static readonly string TimeRegex5 = $@"\b{BasicTime}((\s*{DescRegex})(\s+{TimePrefix})?|\s+{TimePrefix})"; public static readonly string TimeRegex6 = $@"{BasicTime}(\s*{DescRegex})?\s+{TimeSuffix}\b"; public static readonly string TimeRegex7 = $@"\b{TimeSuffix}\s+[àa]\s+{BasicTime}((\s*{DescRegex})|\b)"; public static readonly string TimeRegex8 = $@"\b{TimeSuffix}\s+{BasicTime}((\s*{DescRegex})|\b)"; @@ -137,43 +148,45 @@ public static class DateTimeDefinitions public const string PeriodDescRegex = @"(?pm|am|p\.m\.|a\.m\.|p)"; public const string PeriodPmRegex = @"(?dans l'apr[eè]s-midi|ce soir|d[eu] soir|dans l[ea] soir[eé]e|dans la nuit|d[eu] soir[ée]e)s?"; public const string PeriodAmRegex = @"(?d[eu] matin|matin([ée]e)s?"; - public static readonly string PureNumFromTo = $@"((du|depuis|des?)\s+)?({HourRegex}|{PeriodHourNumRegex})(\s*(?{PeriodDescRegex}))?\s*{TillRegex}\s*({HourRegex}|{PeriodHourNumRegex})\s*(?{PmRegex}|{AmRegex}|{PeriodDescRegex})?"; + public static readonly string PureNumFromTo = $@"((du|depuis|des?)\s+)?(?{PeriodDescRegex}))?\s*{TillRegex}\s*({HourRegex}|{PeriodHourNumRegex})\s*(?{PmRegex}|{AmRegex}|{PeriodDescRegex})?"; public static readonly string PureNumBetweenAnd = $@"(entre\s+)({HourRegex}|{PeriodHourNumRegex})(\s*(?{PeriodDescRegex}))?\s*{RangeConnectorRegex}\s*({HourRegex}|{PeriodHourNumRegex})\s*(?{PmRegex}|{AmRegex}|{PeriodDescRegex})?"; - public const string SpecificTimeFromTo = @"^[.]"; - public const string SpecificTimeBetweenAnd = @"^[.]"; + public const string SpecificTimeFromTo = @"^\b$"; + public const string SpecificTimeBetweenAnd = @"^\b$"; public const string PrepositionRegex = @"(?^([aà](\s+?la)?|en|sur(\s*l[ea])?|de)$)"; public const string TimeOfDayRegex = @"\b(?((((dans\s+(l[ea])?\s+)?((?d[eé]but(\s+|-)|t[oô]t(\s+|-)(l[ea]\s*)?)|(?fin\s*|fin de(\s+(la)?)|tard\s*))?(matin([ée]e)?|((d|l)?'?)apr[eè]s[-|\s*]midi|nuit|soir([eé]e)?)))|(((\s+(l[ea])?\s+)?)jour(n[eé]e)?))s?)\b"; - public static readonly string SpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{TimeOfDayRegex})|({TimeOfDayRegex}\s*({NextSuffixRegex}))\b|\bsoir|\bdu soir)s?\b"; + public static readonly string SpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{TimeOfDayRegex})|({TimeOfDayRegex}\s*({NextSuffixRegex}))\b|\b(du )?soir)s?\b"; public static readonly string TimeFollowedUnit = $@"^\s*{TimeUnitRegex}"; public static readonly string TimeNumberCombinedWithUnit = $@"\b(?\d+(\.\d*)?){TimeUnitRegex}"; - public const string NowRegex = @"\b(?(ce\s+)?moment|maintenant|d[eè]s que possible|dqp|r[eé]cemment|auparavant)\b"; - public const string SuffixRegex = @"^\s*(dans\s+l[ea]\s+)|(en\s+)|(du)?(matin([eé]e)?|apr[eè]s-midi|soir[eé]e|nuit)\b"; + public const string NowRegex = @"\b(?(ce\s+)?moment|maintenant|(d[eè]s|aussit[oô]t) que possible|dqp|r[eé]cemment|auparavant|le plus tôt( que)? possible)\b"; + public const string SuffixRegex = @"^\s*((dans\s+l[ea]\s+)|(en\s+)|(d(u|\'))?(matin([eé]e)?|apr[eè]s-midi|soir[eé]e|nuit))\b"; public const string DateTimeTimeOfDayRegex = @"\b(?matin([eé]e)?|apr[eè]s-midi|nuit|soir)\b"; public static readonly string DateTimeSpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{DateTimeTimeOfDayRegex})\b|\b(ce(tte)?\s+)(soir|nuit))\b"; public static readonly string TimeOfTodayAfterRegex = $@"^\s*(,\s*)?(en|dans|du\s+)?{DateTimeSpecificTimeOfDayRegex}"; - public static readonly string TimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+([àa]|pour))?\s*$"; + public static readonly string TimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+([àa]|vers|pour))?\s*$"; public static readonly string SimpleTimeOfTodayAfterRegex = $@"({HourNumRegex}|{BaseDateTime.HourRegex})\s*(,\s*)?(en|[àa]\s+)?{DateTimeSpecificTimeOfDayRegex}"; - public static readonly string SimpleTimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+([àa]|vers))?\s*({HourNumRegex}|{BaseDateTime.HourRegex})"; + public static readonly string SimpleTimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+([àa]|vers|pour))?\s*({HourNumRegex}|{BaseDateTime.HourRegex})"; public const string SpecificEndOfRegex = @"(la\s+)?fin(\s+de\s*|\s*de*l[ea])?\s*$"; - public const string UnspecificEndOfRegex = @"^[.]"; - public const string UnspecificEndOfRangeRegex = @"^[.]"; + public const string UnspecificEndOfRegex = @"^\b$"; + public const string UnspecificEndOfRangeRegex = @"^\b$"; public const string PeriodTimeOfDayRegex = @"\b((dans\s+(le)?\s+)?((?d[eé]but(\s+|-|d[ue]|de la)|t[oô]t)|(?tard\s*|fin(\s+|-|d[eu])?))?(?matin|((d|l)?'?)apr[eè]s-midi|nuit|soir([eé]e)?))\b"; public static readonly string PeriodSpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{PeriodTimeOfDayRegex})\b|\b(ce(tte)?\s+)(soir|nuit))\b"; public static readonly string PeriodTimeOfDayWithDateRegex = $@"\b(({TimeOfDayRegex}))\b"; - public const string LessThanRegex = @"^[.]"; - public const string MoreThanRegex = @"^[.]"; - public const string DurationUnitRegex = @"(?ann[eé]es?|ans?|mois|semaines?|jours?|heures?|hrs?|h|minutes?|mins?|secondes?|secs?|journ[eé]e)\b"; + public const string LessThanRegex = @"^\b$"; + public const string MoreThanRegex = @"^\b$"; + public const string DecadeRegex = @"(?(?:dix|vingt|trente|quarante|cinquante|soixante-dix|soixante|quatre-vingt-dix|quatre-vingts|deux\s+mille))"; + public static readonly string DecadeWithCenturyInnerRegex = $@"(((?\d|1\d|2\d)?((?\d0)\b)|(?\d0)(?=s))|(({CenturyRegex}(\s+)(et\s+)?)?{DecadeRegex})|({CenturyRegex}(\s+)(et\s+)?(?dix|centaines)))"; + public static readonly string DurationUnitRegex = $@"(?\bann[eé]es?(?!\s+{DecadeWithCenturyInnerRegex})\b|ans?|mois|semaines?|jours?|heures?|hrs?|h|minutes?|mins?|secondes?|secs?|journ[eé]e)\b"; public const string SuffixAndRegex = @"(?\s*(et)\s+(une?\s+)?(?demi|quart))"; public const string PeriodicRegex = @"\b(?quotidien(ne)?|journellement|mensuel(le)?|jours?|hebdomadaire|bihebdomadaire|annuel(lement)?)\b"; public static readonly string EachUnitRegex = $@"(?(chaque|toutes les|tous les)(?\s+autres)?\s*{DurationUnitRegex})"; public const string EachPrefixRegex = @"\b(?(chaque|tous les|(toutes les))\s*$)"; public const string SetEachRegex = @"\b(?(chaque|tous les|(toutes les))\s*)"; - public const string SetLastRegex = @"(?prochain|dernier|derni[eè]re|pass[ée]s|pr[eé]c[eé]dent|courant|en\s*cours)"; - public const string EachDayRegex = @"^\s*(chaque|tous les)\s*(jour|jours)\b"; + public const string SetLastRegex = @"(?prochain|derni[eè]re?s?|pass[ée]s|pr[eé]c[eé]dent|courant|en\s*cours)"; + public const string EachDayRegex = @"\s*(chaque|tous les)\s*(jour|jours)\b"; public static readonly string DurationFollowedUnit = $@"^\s*{SuffixAndRegex}?(\s+|-)?{DurationUnitRegex}"; public static readonly string NumberCombinedWithDurationUnit = $@"\b(?\d+(\.\d*)?)(-)?{DurationUnitRegex}"; public static readonly string AnUnitRegex = $@"\b(((?demi\s+)?(-)\s+{DurationUnitRegex}))"; - public const string DuringRegex = @"^[.]"; + public const string DuringRegex = @"^\b$"; public const string AllRegex = @"\b(?toute\s(l['ea])\s?(?ann[eé]e|mois|semaines?|jours?|journ[eé]e))\b"; public const string HalfRegex = @"((une?\s*)|\b)(?demi?(\s*|-)+(?ann[eé]e|ans?|mois|semaine|jour|heure))\b"; public const string ConjunctionRegex = @"\b((et(\s+de|pour)?)|avec)\b"; @@ -185,19 +198,26 @@ public static class DateTimeDefinitions public const string TimeTokenPrefix = @"à "; public const string TokenBeforeDate = @"le "; public const string TokenBeforeTime = @"à "; + public const string HalfTokenRegex = @"\b(demie)$"; + public const string QuarterTokenRegex = @"\b((un\s+)?quart)$"; + public const string ThreeQuarterTokenRegex = @"\b(trois\s+quarts)$"; + public const string ToTokenRegex = @"\b(moins|[aà]$)\b"; public const string AMTimeRegex = @"(?matin([ée]e)?)"; public const string PMTimeRegex = @"\b(?(d'|l')?apr[eè]s-midi|nuit|((\s*ce|du)\s+)?soir)\b"; + public const string AsapTimeRegex = @"((d[eè]s|le plus t[oô]t|aussit[oô]t)( que)? possible|dqp)"; public const string BeforeRegex = @"\b(avant)\b"; public const string BeforeRegex2 = @"\b(entre\s*(le|la(s)?)?)\b"; public const string AfterRegex = @"\b(apres)\b"; public const string SinceRegex = @"\b(depuis)\b"; - public const string AroundRegex = @"^[.]"; + public const string AroundRegex = @"\b(vers|à\s+peu\s+près|environ)\b"; public const string AgoPrefixRegex = @"\b(y a)\b"; - public const string LaterRegex = @"\b(plus tard)\b"; - public const string AgoRegex = @"^[.]"; - public const string InConnectorRegex = @"\b(dans|en|sur)\b"; - public const string SinceYearSuffixRegex = @"^[.]"; - public const string WithinNextPrefixRegex = @"^[.]"; + public const string LaterRegex = @"\b(plus\s+tard|à\s+partir\s+(de\s+(maintenant|demain)|d'aujourd'hui)|après\s+(aujourd'hui|demain))\b"; + public static readonly string AgoRegex = $@"\b((il\s+y\s*a)(\s+{AroundRegex})?|auparavant|avant\s+(?hier|aujourd'hui))\b"; + public const string BeforeAfterRegex = @"^\b$"; + public const string InConnectorRegex = @"\b(dans|en)\b"; + public const string SinceYearSuffixRegex = @"^\b$"; + public const string WithinNextPrefixRegex = @"\b(dans\s+les)\b"; + public const string TodayNowRegex = @"\b(aujourd'hui|maintenant)\b"; public const string MorningStartEndRegex = @"(^(matin))|((matin)$)"; public const string AfternoonStartEndRegex = @"(^((d'|l')?apr[eè]s-midi))|(((d'|l')?apr[eè]s-midi)$)"; public const string EveningStartEndRegex = @"(^(soir[ée]e|soir))|((soir[ée]e|soir)$)"; @@ -205,47 +225,48 @@ public static class DateTimeDefinitions public const string InexactNumberRegex = @"\b(quel qu[ée]s|quelqu[ée]s?|plusieurs?|divers)\b"; public static readonly string InexactNumberUnitRegex = $@"({InexactNumberRegex})\s+({DurationUnitRegex})"; public static readonly string RelativeTimeUnitRegex = $@"(((({ThisPrefixRegex})?)\s+({TimeUnitRegex}(\s*{NextSuffixRegex}|{PastSuffixRegex})?))|((le))\s+({RestrictedTimeUnitRegex}))"; - public static readonly string RelativeDurationUnitRegex = $@"((\b({DurationUnitRegex})(\s+{NextSuffixRegex}|{PastSuffixRegex})?)|((le|my))\s+({RestrictedTimeUnitRegex}))"; - public const string ReferenceDatePeriodRegex = @"^[.]"; + public static readonly string RelativeDurationUnitRegex = $@"\b(((?<=({NextSuffixRegex}|{PastSuffixRegex})\s+)({DurationUnitRegex}))|(({DurationUnitRegex})(\s+({NextSuffixRegex}|{PastSuffixRegex}))?)|((le|my))\s+({RestrictedTimeUnitRegex}))\b"; + public const string ReferenceDatePeriodRegex = @"^\b$"; public const string UpcomingPrefixRegex = @".^"; public const string NextPrefixRegex = @".^"; public const string PastPrefixRegex = @".^"; public const string PreviousPrefixRegex = @".^"; - public const string RelativeDayRegex = @"^[\.]"; - public const string ConnectorRegex = @"^(,|pour|t|vers)$"; - public const string ConnectorAndRegex = @"\b(et\s*(le|las?)?)\b.+"; + public static readonly string RelativeDayRegex = $@"\b(((la\s+)?{RelativeRegex}\s+journ[ée]e))\b"; + public const string ConnectorRegex = @"^(,|pour|t|vers|le)$"; + public const string ConnectorAndRegex = @"\b(et\s*(le|las?)?)\b"; public const string FromRegex = @"((de|du)?)$"; public const string FromRegex2 = @"((depuis|de)(\s*las?)?)$"; public const string FromToRegex = @"\b(du|depuis|des?).+(au|à|a)\b.+"; public const string SingleAmbiguousMonthRegex = @"^(le\s+)?(may|march)$"; - public const string UnspecificDatePeriodRegex = @"^[.]"; + public const string UnspecificDatePeriodRegex = @"^(semaine|mois|an(n[eé]e)?)$"; public const string PrepositionSuffixRegex = @"\b(du|de|[àa]|vers|dans)$"; public const string FlexibleDayRegex = @"(?([A-Za-z]+\s)?[A-Za-z\d]+)"; public static readonly string ForTheRegex = $@"\b(((pour le {FlexibleDayRegex})|(dans (le\s+)?{FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\s*(,|\.|!|\?|$)))"; - public static readonly string WeekDayAndDayOfMonthRegex = $@"\b{WeekDayRegex}\s+(le\s+{FlexibleDayRegex})\b"; - public static readonly string WeekDayAndDayRegex = $@"\b{WeekDayRegex}\s+(?!(the)){DayRegex}(?!([-]|(\s+({AmDescRegex}|{PmDescRegex}|{OclockRegex}))))\b"; + public static readonly string WeekDayAndDayOfMonthRegex = $@"\b({WeekDayRegex}\s+(le\s+{FlexibleDayRegex})|le\s+(?{DayRegex}|{WrittenDayRegex})\s+{WeekDayRegex})\b"; + public static readonly string WeekDayAndDayRegex = $@"\b{WeekDayRegex}\s+(?!(the)){DayRegex}(?!([-:]|(\s+({AmDescRegex}|{PmDescRegex}|{OclockRegex}))))\b"; public const string RestOfDateRegex = @"\b(reste|fin)\s+(d[eu]\s+)?((le|ce(tte)?)\s+)?(?semaine|mois|l'ann[ée]e)\b"; public const string RestOfDateTimeRegex = @"\b(reste|fin)\s+(d[eu]\s+)?((le|ce(tte)?)\s+)?(?jour)\b"; - public const string LaterEarlyPeriodRegex = @"^[.]"; - public const string WeekWithWeekDayRangeRegex = @"^[.]"; - public const string GeneralEndingRegex = @"^[.]"; - public const string MiddlePauseRegex = @"^[.]"; - public const string DurationConnectorRegex = @"^[.]"; + public const string LaterEarlyPeriodRegex = @"^\b$"; + public const string WeekWithWeekDayRangeRegex = @"^\b$"; + public const string GeneralEndingRegex = @"^\b$"; + public const string MiddlePauseRegex = @"^\b$"; + public const string DurationConnectorRegex = @"^\s*(?\s+|et|,)\s*$"; public const string PrefixArticleRegex = @"^[\.]"; - public const string OrRegex = @"^[.]"; - public const string YearPlusNumberRegex = @"^[.]"; - public const string NumberAsTimeRegex = @"^[.]"; - public const string TimeBeforeAfterRegex = @"^[.]"; - public const string DateNumberConnectorRegex = @"^[.]"; - public const string CenturyRegex = @"^[.]"; - public const string DecadeRegex = @"^[.]"; - public const string DecadeWithCenturyRegex = @"^[.]"; - public const string RelativeDecadeRegex = @"^[.]"; - public static readonly string YearSuffix = $@"(,?\s*({DateYearRegex}|{FullTextYearRegex}))"; - public const string SuffixAfterRegex = @"^[.]"; - public const string YearPeriodRegex = @"^[.]"; - public const string FutureSuffixRegex = @"^[.]"; - public const string ComplexDatePeriodRegex = @"^[.]"; + public const string OrRegex = @"^\b$"; + public const string YearPlusNumberRegex = @"^\b$"; + public const string NumberAsTimeRegex = @"^\b$"; + public const string TimeBeforeAfterRegex = @"^\b$"; + public const string DateNumberConnectorRegex = @"^\s*(?\s+[aà])\s*$"; + public static readonly string DecadeWithCenturyRegex = $@"(les\s+)?(années)\s+{DecadeWithCenturyInnerRegex}"; + public const string RelativeDecadeRegex = @"^\b$"; + public static readonly string YearSuffix = $@"(,?(\s*à)?\s*({DateYearRegex}|{FullTextYearRegex}))"; + public const string SuffixAfterRegex = @"^\b$"; + public static readonly string YearPeriodRegex = $@"((((du|depuis|des?)\s+)?{YearRegex}\s*({TillRegex})\s*{YearRegex})|(((entre)\s+){YearRegex}\s*({RangeConnectorRegex})\s*{YearRegex}))"; + public const string FutureSuffixRegex = @"\b(dans\s+le\s+futur)\b"; + public static readonly string ModPrefixRegex = $@"\b({RelativeRegex}|{AroundRegex}|{BeforeRegex}|{AfterRegex}|{SinceRegex})\b"; + public static readonly string ModSuffixRegex = $@"\b({AgoRegex}|{LaterRegex}|{BeforeAfterRegex}|{FutureSuffixRegex}|{PastSuffixRegex})\b"; + public const string ComplexDatePeriodRegex = @"^\b$"; + public const string AmbiguousPointRangeRegex = @"^(mar\.?)$"; public static readonly Dictionary UnitMap = new Dictionary { { @"annees", @"Y" }, @@ -349,7 +370,14 @@ public static class DateTimeDefinitions { @"jeu", 4 }, { @"ven", 5 }, { @"sam", 6 }, - { @"dim", 0 } + { @"dim", 0 }, + { @"lun.", 1 }, + { @"mar.", 2 }, + { @"mer.", 3 }, + { @"jeu.", 4 }, + { @"ven.", 5 }, + { @"sam.", 6 }, + { @"dim.", 0 } }; public static readonly Dictionary MonthOfYear = new Dictionary { @@ -358,11 +386,14 @@ public static class DateTimeDefinitions { @"février", 2 }, { @"mars", 3 }, { @"mar", 3 }, + { @"mar.", 3 }, { @"avril", 4 }, { @"avr", 4 }, + { @"avr.", 4 }, { @"mai", 5 }, { @"juin", 6 }, { @"jun", 6 }, + { @"jun.", 6 }, { @"juillet", 7 }, { @"aout", 8 }, { @"août", 8 }, @@ -374,14 +405,18 @@ public static class DateTimeDefinitions { @"janv", 1 }, { @"janv.", 1 }, { @"jan", 1 }, + { @"jan.", 1 }, { @"fevr", 2 }, { @"fevr.", 2 }, { @"févr.", 2 }, { @"févr", 2 }, { @"fev", 2 }, + { @"fev.", 2 }, { @"juil", 7 }, { @"jul", 7 }, + { @"jul.", 7 }, { @"sep", 9 }, + { @"sep.", 9 }, { @"sept.", 9 }, { @"sept", 9 }, { @"oct", 10 }, @@ -389,6 +424,7 @@ public static class DateTimeDefinitions { @"nov", 11 }, { @"nov.", 11 }, { @"dec", 12 }, + { @"dec.", 12 }, { @"déc.", 12 }, { @"déc", 12 }, { @"1", 1 }, @@ -685,14 +721,35 @@ public static class DateTimeDefinitions public const string NightRegex = @"\b(minuit|nuit)\b"; public static readonly Dictionary WrittenDecades = new Dictionary { - { @"", 0 } + { @"dix", 10 }, + { @"vingt", 20 }, + { @"trente", 30 }, + { @"quarante", 40 }, + { @"cinquante", 50 }, + { @"soixante", 60 }, + { @"soixante-dix", 70 }, + { @"quatre-vingt", 80 }, + { @"quatre-vingt-dix", 90 } }; public static readonly Dictionary SpecialDecadeCases = new Dictionary { - { @"", 0 } + { @"deux mille", 2000 } }; public const string DefaultLanguageFallback = @"DMY"; public static readonly string[] DurationDateRestrictions = { }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"^\d{4}$", @"(\d\.\d{4}|\d{4}\.\d)" }, + { @"^([eé]t[eé])$", @"(? AmbiguityTimeFiltersDict = new Dictionary + { + { @"\bheures?\b", @"\b(pour|durée\s+de|pendant|dure|durera)\s+(\S+\s+){1,2}heures?.*$" } + }; public static readonly IList MorningTermList = new List { @"matinee", diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/French/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/French/NumbersDefinitions.cs index dc34d49fa5..7fb24b0809 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/French/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/French/NumbersDefinitions.cs @@ -21,18 +21,19 @@ namespace Microsoft.Recognizers.Definitions.French public static class NumbersDefinitions { - public const string LangMarker = @"Fr"; + public const string LangMarker = @"Fre"; public const bool CompoundNumberLanguage = false; public const bool MultiDecimalSeparatorCulture = true; - public const string RoundNumberIntegerRegex = @"(cent|mille|millions|million|milliard|milliards|billion|billions)"; - public const string ZeroToNineIntegerRegex = @"(et un|un|une|deux|trois|quatre|cinq|six|sept|huit|neuf)"; + public const string RoundNumberIntegerRegex = @"(cent|mille|millions?|milliards?|billions?)"; + public const string ZeroToNineIntegerRegex = @"(une?|deux|trois|quatre|cinq|six|sept|huit|neuf|z[ée]ro)"; + public const string TwoToNineIntegerRegex = @"(deux|trois|quatre|cinq|six|sept|huit|neuf)"; public const string TenToNineteenIntegerRegex = @"((seize|quinze|quatorze|treize|douze|onze)|dix(\Wneuf|\Whuit|\Wsept)?)"; - public const string TensNumberIntegerRegex = @"(quatre\Wvingt(s|\Wdix)?|soixante\Wdix|vingt|trente|quarante|cinquante|soixante|septante|octante|huitante|nonante)"; + public const string TensNumberIntegerRegex = @"(quatre\Wvingt(s|\Wdix)?|soixante(\Wdix)?|vingt|trente|quarante|cinquante|septante|octante|huitante|nonante)"; public const string DigitsNumberRegex = @"\d|\d{1,3}(\.\d{3})"; public const string NegativeNumberTermsRegex = @"^[.]"; public static readonly string NegativeNumberSignRegex = $@"^({NegativeNumberTermsRegex}\s+).*"; public static readonly string HundredsNumberIntegerRegex = $@"(({ZeroToNineIntegerRegex}(\s+cent))|cent|((\s+cent\s)+{TensNumberIntegerRegex}))"; - public static readonly string BelowHundredsRegex = $@"(({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}([-\s]+({TenToNineteenIntegerRegex}|{ZeroToNineIntegerRegex}))?))|{ZeroToNineIntegerRegex})"; + public static readonly string BelowHundredsRegex = $@"(({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}((-|(\s+et)?\s+)({TenToNineteenIntegerRegex}|{ZeroToNineIntegerRegex}))?))|{ZeroToNineIntegerRegex})"; public static readonly string BelowThousandsRegex = $@"(({HundredsNumberIntegerRegex}(\s+{BelowHundredsRegex})?|{BelowHundredsRegex}|{TenToNineteenIntegerRegex})|cent\s+{TenToNineteenIntegerRegex})"; public static readonly string SupportThousandsRegex = $@"(({BelowThousandsRegex}|{BelowHundredsRegex})\s+{RoundNumberIntegerRegex}(\s+{RoundNumberIntegerRegex})?)"; public static readonly string SeparaIntRegex = $@"({SupportThousandsRegex}(\s+{SupportThousandsRegex})*(\s+{BelowThousandsRegex})?|{BelowThousandsRegex})"; @@ -43,26 +44,31 @@ public static class NumbersDefinitions public const string NumbersWithDozenSuffix = @"(((?prochain[es]?|pr[eé]c[eé]dent[es]?|(l[’'])?actuel(le)?(\s+une?)?|(l[’'])?avant(\s+|-)derniere?|(ant[eé])?p[eé]nulti[eè]me|derni[eè]r[es]?|suivant[es]?|courant[es]?|cel(le|ui)\s+d['’]avant\s+l[ae]\s+derni[èe]re?)"; public static readonly string ComplexOrdinalRegex = $@"(({OverThousandOrdinalRegex}(\s)?)?{UnderThousandOrdinalRegex}|{OverThousandOrdinalRegex}|{UnderHundredOrdinalRegex})"; public static readonly string SuffixOrdinalRegex = $@"(({AllIntRegex})({SimpleRoundOrdinalRegex}))"; public static readonly string ComplexRoundOrdinalRegex = $@"((({SuffixOrdinalRegex}(\s)?)?{ComplexOrdinalRegex})|{SuffixOrdinalRegex})"; - public static readonly string AllOrdinalRegex = $@"({ComplexOrdinalRegex}|{SimpleRoundOrdinalRegex}|{ComplexRoundOrdinalRegex})"; + public static readonly string AllOrdinalNumberRegex = $@"({ComplexOrdinalRegex}|{SimpleRoundOrdinalRegex}|{ComplexRoundOrdinalRegex})"; + public static readonly string AllOrdinalRegex = $@"(?:{AllOrdinalNumberRegex}|{RelativeOrdinalRegex})"; public const string PlaceHolderPureNumber = @"\b"; public const string PlaceHolderDefault = @"\D|\b"; - public const string OrdinalSuffixRegex = @"(?<=\b)((\d*(1er|2e|2eme|3e|3eme|4e|4eme|5e|5eme|6e|6eme|7e|7eme|8e|8eme|9e|9eme|0e|0eme))|(11e|11eme|12e|12eme))(?=\b)"; + public const string OrdinalSuffixRegex = @"(?<=\b)((\d*(11e(me)?|1[eè]re?|[02-9]e(me)?)))(?=\b)"; public static readonly string OrdinalFrenchRegex = $@"(?<=\b){AllOrdinalRegex}(?=\b)"; public const string FractionNotationWithSpacesRegex = @"(((?<=\W|^)-\s*)|(?<=\b))\d+\s+\d+[/]\d+(?=(\b[^/]|$))"; - public const string FractionNotationRegex = @"(((?<=\W|^)-\s*)|(?<=\b))\d+[/]\d+(?=(\b[^/]|$))"; - public static readonly string FractionNounRegex = $@"(?<=\b)({AllIntRegex}\s+((et)\s+)?)?({AllIntRegex})(\s+((et)\s)?)((({AllOrdinalRegex})s?|({SuffixOrdinalRegex})s?)|demis?|tiers?|quarts?)(?=\b)"; - public static readonly string FractionNounWithArticleRegex = $@"(?<=\b)({AllIntRegex}\s+(et\s+)?)?(un|une)(\s+)(({AllOrdinalRegex})|({SuffixOrdinalRegex})|(et\s+)?demis?)(?=\b)"; + public static readonly string FractionNotationRegex = $@"{BaseNumbers.FractionNotationRegex}"; + public static readonly string FractionMultiplierRegex = $@"(?\s+et\s+(demi[es]?|(une?|{TwoToNineIntegerRegex})\s+(demie?|tier|quart|(cinqui|sixi|septi|hui[tr]i|neuvi|dixi)[eè]me)s?))"; + public static readonly string RoundMultiplierWithFraction = $@"(?(millions?|milliards?|billions?))(?={FractionMultiplierRegex}?$)"; + public static readonly string RoundMultiplierRegex = $@"\b\s*({RoundMultiplierWithFraction}|(?(cent|mille))$)"; + public static readonly string FractionNounRegex = $@"(?<=\b)({AllIntRegex}\s+((et)\s+)?)?({AllIntRegex}(\s+((et)\s)?)(({AllOrdinalNumberRegex}s?|{SuffixOrdinalRegex}s?)|(demi[es]?|tiers?|quarts?))|(un\s+)?(demi|tier|quart)(\s+(de\s+)?|\s*-\s*){RoundNumberIntegerRegex})(?=\b)"; + public static readonly string FractionNounWithArticleRegex = $@"(?<=\b)(({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\s+(et\s+)?)?((une?)(\s+)(({AllOrdinalNumberRegex})|({SuffixOrdinalRegex})|(et\s+)?demi[es]?)|demi[es]?)(?=\b)"; public static readonly string FractionPrepositionRegex = $@"(?({AllIntRegex})|((?({AllIntRegex})|((\d+)(?!\.)))(?=\b)"; public static readonly string AllPointRegex = $@"((\s+{ZeroToNineIntegerRegex})+|(\s+{SeparaIntRegex}))"; public static readonly string AllFloatRegex = $@"({AllIntRegex}(\s+(virgule|point)){AllPointRegex})"; @@ -73,8 +79,35 @@ public static class NumbersDefinitions public static readonly string DoubleAllFloatRegex = $@"((?<=\b){AllFloatRegex}(?=\b))"; public const string DoubleExponentialNotationRegex = @"(((?)"; + public const string LessRegex = @"(?:(less|plus\s+(bas|petit|jeune)|moins|inf[ée]rieure?s?)(\s+([àa]|d[e'’]|que))?|((en )?dessous)\s+de|under|(?|=)<)"; + public const string EqualRegex = @"(([ée]ga(l(es)?|ux)|au\s+nombre)(\s+([àa]|d[e'’]))?|(?)=)"; + public static readonly string MoreOrEqualPrefix = $@"((pas\s+{LessRegex})|(au\s+moins|[àa] partir d[e'’]))"; + public static readonly string MoreOrEqual = $@"(?:({MoreRegex}\s+(ou)?\s+{EqualRegex})|({EqualRegex}\s+(ou)?\s+{MoreRegex})|{MoreOrEqualPrefix}(\s+(ou)?\s+{EqualRegex})?|({EqualRegex}\s+(ou)?\s+)?{MoreOrEqualPrefix}|>\s*=|≥)"; + public const string MoreOrEqualSuffix = @"((et|ou)\s+(((more|greater|higher|plus(\s+grand)?|sup[ée]rieure?s?)((?!\s+([àa]|que))|(\s+([àa]|que)(?!((\s+ou\s+[èe]ga(l(es)?|ux)\s+[àa])?\s*\d+)))))|((a plus|au-dessus)\s+d[e'’](?!\s+than))))"; + public static readonly string LessOrEqualPrefix = $@"((pas\s+{MoreRegex})|(au\s+plus)|(jusqu'[àa]))"; + public static readonly string LessOrEqual = $@"(({LessRegex}\s+(ou)?\s+{EqualRegex})|({EqualRegex}\s+(ou)?\s+{LessRegex})|{LessOrEqualPrefix}(\s+(ou)?\s+{EqualRegex})?|({EqualRegex}\s+(ou)?\s+)?{LessOrEqualPrefix}|<\s*=|≤)"; + public const string LessOrEqualSuffix = @"((et|ou)\s+(less|lower|plus petit|moins|inf[ée]rieure?s?)((?!\s+([àa]|de|que))|(\s+([àa]|d[e'’]|que)(?!(\s*\d+)))))"; + public static readonly string NumberSplitMark = $@"(?![,.](?!\d+))(?!\s*\b(et\s+({LessRegex}|{MoreRegex})|mais|ou|to)\b)"; + public const string MoreRegexNoNumberSucceed = @"((bigger|greater|more|plus(\s+grand)?|sup[ée]rieure?s?)((?!\s+([àa]|que))|\s+(([àa]|que)(?!(\s*\d+))))|((au-dessus|a plus)\s+d[e'’])(?!(\s*\d+)))"; + public const string LessRegexNoNumberSucceed = @"((less|lower|plus petit|moins|inf[ée]rieure?s?)((?!\s+([àa]|d[e'’]|que))|\s+(([àa]|d[e'’]|que)(?!(\s*\d+))))|(((en )?dessous)\s+d[e'’]|under)(?!(\s*\d+)))"; + public const string EqualRegexNoNumberSucceed = @"([èe]ga(l(es)?|ux)((?!\s+([àa]))|(\s+([àa]|que)(?!(\s*\d+)))))"; + public static readonly string OneNumberRangeMoreRegex1 = $@"({MoreOrEqual}|{MoreRegex})\s*(l[ae]\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeMoreRegex1LB = $@"(?({NumberSplitMark}.)+)\s*{MoreOrEqualSuffix}"; + public static readonly string OneNumberRangeMoreSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+ou\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\s+(?({NumberSplitMark}.)+)(\s+ou\s+){EqualRegexNoNumberSucceed})"; + public static readonly string OneNumberRangeLessRegex1 = $@"({LessOrEqual}|{LessRegex})\s*(l[ae]\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeLessRegex1LB = $@"(?({NumberSplitMark}.)+)\s*{LessOrEqualSuffix}"; + public static readonly string OneNumberRangeLessSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+ou\s+){LessRegexNoNumberSucceed})|({LessRegex}\s+(?({NumberSplitMark}.)+)(\s+ou\s+){EqualRegexNoNumberSucceed})"; + public static readonly string OneNumberRangeEqualRegex = $@"(?({NumberSplitMark}.)+)"; + public static readonly string TwoNumberRangeRegex1 = $@"entre\s*(l[ae]\s+)?(?({NumberSplitMark}.)+)\s*et\s*(l[ae]\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\s*(et|mais|,)\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"; + public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\s*(et|mais|,)\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})"; + public static readonly string TwoNumberRangeRegex4 = $@"(de\s+)?(?({NumberSplitMark}(?!\bde\b).)+)\s*{TillRegex}\s*(l[ae]\s+)?(?({NumberSplitMark}.)+)"; public const char DecimalSeparatorChar = ','; public const string FractionMarkerToken = @"sur"; public const char NonDecimalSeparatorChar = '.'; @@ -84,8 +117,9 @@ public static class NumbersDefinitions public static readonly string[] WrittenGroupSeparatorTexts = { @"point", @"points" }; public static readonly string[] WrittenIntegerSeparatorTexts = { @"et", @"-" }; public static readonly string[] WrittenFractionSeparatorTexts = { @"et", @"sur" }; - public const string HalfADozenRegex = @"(?<=\b)demi\s+douzaine"; - public static readonly string DigitalNumberRegex = $@"((?<=\b)(cent|mille|million|millions|milliard|milliards|billions|billion|douzaine(s)?)(?=\b))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; + public static readonly string[] OneHalfTokens = { @"un", @"demi" }; + public const string HalfADozenRegex = @"(?<=\b)demie?\s+douzaine"; + public static readonly string DigitalNumberRegex = $@"((?<=\b)(cent|mille|millions?|milliards?|billions?|douzaines?)(?=\b))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; public const string AmbiguousFractionConnectorsRegex = @"^[.]"; public static readonly Dictionary CardinalNumberMap = new Dictionary { @@ -104,12 +138,15 @@ public static class NumbersDefinitions { @"dix", 10 }, { @"onze", 11 }, { @"douze", 12 }, + { @"douzaine", 12 }, + { @"douzaines", 12 }, { @"treize", 13 }, { @"quatorze", 14 }, { @"quinze", 15 }, { @"seize", 16 }, { @"dix-sept", 17 }, { @"dix-huit", 18 }, + { @"dix-huir", 18 }, { @"dix-neuf", 19 }, { @"vingt", 20 }, { @"trente", 30 }, @@ -157,12 +194,15 @@ public static class NumbersDefinitions { @"premier", 1 }, { @"première", 1 }, { @"premiere", 1 }, + { @"unième", 1 }, + { @"unieme", 1 }, { @"deuxième", 2 }, { @"deuxieme", 2 }, { @"second", 2 }, { @"seconde", 2 }, { @"troisième", 3 }, { @"demi", 2 }, + { @"demie", 2 }, { @"tiers", 3 }, { @"tierce", 3 }, { @"quart", 4 }, @@ -178,10 +218,14 @@ public static class NumbersDefinitions { @"septieme", 7 }, { @"huitième", 8 }, { @"huitieme", 8 }, + { @"huirième", 8 }, + { @"huirieme", 8 }, { @"neuvième", 9 }, { @"neuvieme", 9 }, { @"dixième", 10 }, { @"dixieme", 10 }, + { @"dizième", 10 }, + { @"dizieme", 10 }, { @"onzième", 11 }, { @"onzieme", 11 }, { @"douzième", 12 }, @@ -189,7 +233,7 @@ public static class NumbersDefinitions { @"treizième", 13 }, { @"treizieme", 13 }, { @"quatorzième", 14 }, - { @"quatorizieme", 14 }, + { @"quatorzieme", 14 }, { @"quinzième", 15 }, { @"quinzieme", 15 }, { @"seizième", 16 }, @@ -198,6 +242,8 @@ public static class NumbersDefinitions { @"dix-septieme", 17 }, { @"dix-huitième", 18 }, { @"dix-huitieme", 18 }, + { @"dix-huirième", 18 }, + { @"dix-huirieme", 18 }, { @"dix-neuvième", 19 }, { @"dix-neuvieme", 19 }, { @"vingtième", 20 }, @@ -353,11 +399,143 @@ public static class NumbersDefinitions }; public static readonly Dictionary RelativeReferenceOffsetMap = new Dictionary { - { @"", @"" } + { @"prochain", @"1" }, + { @"prochaine", @"1" }, + { @"prochains", @"1" }, + { @"precedent", @"-1" }, + { @"precedente", @"-1" }, + { @"precédent", @"-1" }, + { @"precédente", @"-1" }, + { @"précedent", @"-1" }, + { @"précedente", @"-1" }, + { @"précédent", @"-1" }, + { @"précédente", @"-1" }, + { @"actuel", @"0" }, + { @"actuelle", @"0" }, + { @"actuel un", @"0" }, + { @"actuelle une", @"0" }, + { @"l'actuel", @"0" }, + { @"l'actuelle", @"0" }, + { @"l’actuel", @"0" }, + { @"l’actuelle", @"0" }, + { @"l'actuel un", @"0" }, + { @"l'actuelle une", @"0" }, + { @"l’actuel un", @"0" }, + { @"l’actuelle une", @"0" }, + { @"avant dernier", @"-1" }, + { @"avant derniere", @"-1" }, + { @"avant-dernier", @"-1" }, + { @"avant-derniere", @"-1" }, + { @"l'avant dernier", @"-1" }, + { @"l'avant derniere", @"-1" }, + { @"l'avant-dernier", @"-1" }, + { @"l'avant-derniere", @"-1" }, + { @"l’avant dernier", @"-1" }, + { @"l’avant derniere", @"-1" }, + { @"l’avant-dernier", @"-1" }, + { @"l’avant-derniere", @"-1" }, + { @"celle d'avant la dernière", @"-1" }, + { @"celui d'avant le dernièr", @"-1" }, + { @"celle d'avant la derniere", @"-1" }, + { @"celui d'avant le dernier", @"-1" }, + { @"celle d’avant la dernière", @"-1" }, + { @"celui d’avant le dernièr", @"-1" }, + { @"celle d’avant la derniere", @"-1" }, + { @"celui d’avant le dernier", @"-1" }, + { @"penultieme", @"-1" }, + { @"penultième", @"-1" }, + { @"pénultieme", @"-1" }, + { @"pénultième", @"-1" }, + { @"antepenultieme", @"-2" }, + { @"antépenultieme", @"-2" }, + { @"antepenultième", @"-2" }, + { @"antépenultième", @"-2" }, + { @"antepénultieme", @"-2" }, + { @"antépénultieme", @"-2" }, + { @"antepénultième", @"-2" }, + { @"antépénultième", @"-2" }, + { @"dernier", @"0" }, + { @"dernièr", @"0" }, + { @"derniere", @"0" }, + { @"derniers", @"0" }, + { @"dernière", @"0" }, + { @"dernièrs", @"0" }, + { @"suivant", @"1" }, + { @"suivante", @"1" }, + { @"suivants", @"1" }, + { @"courant", @"0" }, + { @"courante", @"0" }, + { @"courants", @"0" } }; public static readonly Dictionary RelativeReferenceRelativeToMap = new Dictionary { - { @"", @"" } + { @"prochain", @"current" }, + { @"prochaine", @"current" }, + { @"prochains", @"current" }, + { @"precedent", @"current" }, + { @"precedente", @"current" }, + { @"precédent", @"current" }, + { @"precédente", @"current" }, + { @"précedent", @"current" }, + { @"précedente", @"current" }, + { @"précédent", @"current" }, + { @"précédente", @"current" }, + { @"actuel", @"current" }, + { @"actuelle", @"current" }, + { @"actuel un", @"current" }, + { @"actuelle une", @"current" }, + { @"l'actuel", @"current" }, + { @"l'actuelle", @"current" }, + { @"l’actuel", @"current" }, + { @"l’actuelle", @"current" }, + { @"l'actuel un", @"current" }, + { @"l'actuelle une", @"current" }, + { @"l’actuel un", @"current" }, + { @"l’actuelle une", @"current" }, + { @"avant dernier", @"end" }, + { @"avant-dernier", @"end" }, + { @"avant derniere", @"end" }, + { @"avant-derniere", @"end" }, + { @"l'avant dernier", @"end" }, + { @"l'avant-dernier", @"end" }, + { @"l'avant derniere", @"end" }, + { @"l'avant-derniere", @"end" }, + { @"l’avant dernier", @"end" }, + { @"l’avant-dernier", @"end" }, + { @"l’avant derniere", @"end" }, + { @"l’avant-derniere", @"end" }, + { @"celle d'avant la dernière", @"end" }, + { @"celui d'avant le dernièr", @"end" }, + { @"celle d'avant la derniere", @"end" }, + { @"celui d'avant le dernier", @"end" }, + { @"celle d’avant la dernière", @"end" }, + { @"celui d’avant le dernièr", @"end" }, + { @"celle d’avant la derniere", @"end" }, + { @"celui d’avant le dernier", @"end" }, + { @"penultieme", @"end" }, + { @"penultième", @"end" }, + { @"pénultieme", @"end" }, + { @"pénultième", @"end" }, + { @"antepenultieme", @"end" }, + { @"antépenultieme", @"end" }, + { @"antepenultième", @"end" }, + { @"antépenultième", @"end" }, + { @"antepénultieme", @"end" }, + { @"antépénultieme", @"end" }, + { @"antepénultième", @"end" }, + { @"antépénultième", @"end" }, + { @"dernier", @"end" }, + { @"dernièr", @"end" }, + { @"derniere", @"end" }, + { @"derniers", @"end" }, + { @"dernière", @"end" }, + { @"dernièrs", @"end" }, + { @"suivant", @"current" }, + { @"suivante", @"current" }, + { @"suivants", @"current" }, + { @"courant", @"current" }, + { @"courante", @"current" }, + { @"courants", @"current" } }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/French/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/French/NumbersWithUnitDefinitions.cs index 85dfc35cb4..f101731701 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/French/NumbersWithUnitDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/French/NumbersWithUnitDefinitions.cs @@ -165,13 +165,13 @@ public static class NumbersWithUnitDefinitions { @"Ban", @"bani|-ban|ban" }, { @"Roupie népalaise", @"roupie népalaise|roupie nepalaise|npr" }, { @"Roupie pakistanaise", @"roupie pakistanaise|pkr" }, - { @"Roupie indienne", @"roupie indienne|inr|roupie indien|inr|₹" }, + { @"Roupie indienne", @"roupie indienne|roupies indiennes|inr|roupie indien|inr|₹" }, { @"Roupie seychelloise", @"roupie seychelloise|scr|sr|sre" }, { @"Roupie mauricienne", @"roupie mauricienne|mur" }, { @"Rufiyaa maldives", @"rufiyaa maldives|mvr|.ރ|rf" }, { @"Roupie srilankaise", @"roupie srilankaise|lrk|රු|ரூ" }, { @"Rupiah Indonésie", @"rupia indonésie|rupia indonesie|rupiah|rp|idr" }, - { @"Roupie", @"roupie" }, + { @"Roupie", @"roupie|roupies" }, { @"Couronne danoise", @"couronne danoise|dkk|couronnes danoise|couronne danemark|couronnes danemark" }, { @"Couronne norvégienne", @"couronne norvégienne|couronne norvegienne|couronnes norvégienne|couronnes norvegienne|nok" }, { @"Couronne féroïenne", @"couronne féroïenne|couronne feroienne" }, @@ -269,9 +269,279 @@ public static class NumbersWithUnitDefinitions { @"Yuan Chinois", @"yuan|yuans|yuan chinois|renminbi|cny|rmb|¥" }, { @"Fen", @"fen" }, { @"Jiao", @"jiao" }, - { @"Mark Finlandais", @"marks finlandais|mark finlandais|fim|mark" } + { @"Mark Finlandais", @"marks finlandais|mark finlandais|fim|mark" }, + { @"Bitcoin", @"bitcoin|bitcoins|btc|xbt|₿" }, + { @"Millibitcoin", @"millibitcoin|millibitcoins|milibitcoin|milibitcoins" }, + { @"Satoshi", @"satoshi|satoshis" } }; - public const string CompoundUnitConnectorRegex = @"(?[^.])"; + public static readonly Dictionary CurrencyNameToIsoCodeMap = new Dictionary + { + { @"Afghan afghani", @"AFN" }, + { @"Euro", @"EUR" }, + { @"Albanian lek", @"ALL" }, + { @"Kwanza angolais", @"AOA" }, + { @"Armenian dram", @"AMD" }, + { @"Florins d'Aruba", @"AWG" }, + { @"Bangladeshi taka", @"BDT" }, + { @"Ngultrum bhoutanais", @"BTN" }, + { @"Boliviano bolivien", @"BOB" }, + { @"Bosnie-Herzégovine mark convertible", @"BAM" }, + { @"Pula", @"BWP" }, + { @"Réal brésilien", @"BRL" }, + { @"Lev bulgare", @"BGN" }, + { @"Riel cambodgien", @"KHR" }, + { @"Escudo du cap-vert", @"CVE" }, + { @"Colon du costa rica", @"CRC" }, + { @"Kuna croate", @"HRK" }, + { @"Couronne tchèque", @"CZK" }, + { @"Nakfas érythréens", @"ERN" }, + { @"Birr éthiopien", @"ETB" }, + { @"Dalasi gambienne", @"GMD" }, + { @"Lari géorgien", @"GEL" }, + { @"Cedi", @"GHS" }, + { @"Quetzal guatémaltèque", @"GTQ" }, + { @"Gourdes haïtiennes", @"HTG" }, + { @"Lempira hondurien", @"HNL" }, + { @"Forint hongrois", @"HUF" }, + { @"Rial iranien", @"IRR" }, + { @"Yemeni rial", @"YER" }, + { @"Israeli new shekel", @"ILS" }, + { @"Yen Japonais", @"JPY" }, + { @"Tenge kazakh", @"KZT" }, + { @"Shilling kényan", @"KES" }, + { @"Corée du nord won", @"KPW" }, + { @"Won sud-coréen", @"KRW" }, + { @"Som Kirghizie", @"KGS" }, + { @"Kip laotien", @"LAK" }, + { @"Loti", @"LSL" }, + { @"Rand sud-africain", @"ZAR" }, + { @"Pataca macanais", @"MOP" }, + { @"Dinar macédonien", @"MKD" }, + { @"Ariary malagache", @"MGA" }, + { @"Kwacha malawien", @"MWK" }, + { @"Ringitt malaisien", @"MYR" }, + { @"Ouguiya mauritanienne", @"MRO" }, + { @"Togrogs mongoles", @"MNT" }, + { @"Metical mozambique", @"MZN" }, + { @"Kyat birmanie", @"MMK" }, + { @"Cordoba nicaraguayen", @"NIO" }, + { @"Nigerian naira", @"NGN" }, + { @"Livre turque", @"TRY" }, + { @"Rials omanais", @"OMR" }, + { @"Balboa panaméennes", @"PAB" }, + { @"Kina", @"PGK" }, + { @"Guaraní paraguayen", @"PYG" }, + { @"Peruvian sol", @"PEN" }, + { @"Złoty polonais", @"PLN" }, + { @"Riyal qatari", @"QAR" }, + { @"Riyal saudi", @"SAR" }, + { @"Tala", @"WST" }, + { @"São Tomé and Príncipe dobra", @"STN" }, + { @"Leone", @"SLL" }, + { @"Lilangeni", @"SZL" }, + { @"Somoni tadjikistan", @"TJS" }, + { @"Baht thaïlandais", @"THB" }, + { @"Hryvnia ukrainien", @"UAH" }, + { @"Vanuatu vatu", @"VUV" }, + { @"Bolívar vénézuélien", @"VEF" }, + { @"Kwacha de Zambie", @"ZMW" }, + { @"Dirham marocain", @"MAD" }, + { @"Dirham des Émirats arabes unis", @"AED" }, + { @"Manat azerbaïdjanais", @"AZN" }, + { @"Manat turkmène", @"TMT" }, + { @"Shilling somalien", @"SOS" }, + { @"Shilling tanzanien", @"TZS" }, + { @"Shilling ougandais", @"UGX" }, + { @"Leu roumain", @"RON" }, + { @"Leu moldave", @"MDL" }, + { @"Roupie népalaise", @"NPR" }, + { @"Roupie pakistanaise", @"PKR" }, + { @"Roupie indienne", @"INR" }, + { @"Roupie seychelloise", @"SCR" }, + { @"Roupie mauricienne", @"MUR" }, + { @"Rufiyaa maldives", @"MVR" }, + { @"Sri Lankan rupee", @"LKR" }, + { @"Rupiah Indonésie", @"IDR" }, + { @"Couronne danoise", @"DKK" }, + { @"Couronne norvégienne", @"NOK" }, + { @"Icelandic króna", @"ISK" }, + { @"Couronne suédoise", @"SEK" }, + { @"Franc CFA de l'Afrique de l'Ouest", @"XOF" }, + { @"Franc CFA d'Afrique centrale", @"XAF" }, + { @"Franc comorien", @"KMF" }, + { @"Franc congolais", @"CDF" }, + { @"Franc burundais", @"BIF" }, + { @"Franc djiboutienne", @"DJF" }, + { @"Franc CFP", @"XPF" }, + { @"Franc guinéen", @"GNF" }, + { @"Franc Suisse", @"CHF" }, + { @"Franc rwandais", @"RWF" }, + { @"Rouble russe", @"RUB" }, + { @"Rouble transnistriens", @"PRB" }, + { @"Nouveau rouble biélorusse", @"BYN" }, + { @"Dinar algérien", @"DZD" }, + { @"Dinar de bahreïn", @"BHD" }, + { @"Dinar iraquien", @"IQD" }, + { @"Dinar jordanien", @"JOD" }, + { @"Dinar koweïtien", @"KWD" }, + { @"Dinar libyen", @"LYD" }, + { @"Dinar serbe", @"RSD" }, + { @"Dinar tunisien", @"TND" }, + { @"Peso argentin", @"ARS" }, + { @"Peso chilien", @"CLP" }, + { @"Peso colombien", @"COP" }, + { @"Peso cubains convertibles", @"CUC" }, + { @"Peso cubains", @"CUP" }, + { @"Peso dominicain", @"DOP" }, + { @"Mexican peso", @"MXN" }, + { @"Peso uruguayen", @"UYU" }, + { @"Livre britannique", @"GBP" }, + { @"Livre sainte-hélène", @"SHP" }, + { @"Livre égyptienne", @"EGP" }, + { @"Livre des îles falkland", @"FKP" }, + { @"Livre gibraltar", @"GIP" }, + { @"Livre manx", @"IMP" }, + { @"Livre jersey", @"JEP" }, + { @"Livre libanaise", @"LBP" }, + { @"South Sudanese pound", @"SSP" }, + { @"Livre soudanaise", @"SDG" }, + { @"Livre syrienne", @"SYP" }, + { @"Dollar États-Unis", @"USD" }, + { @"Dollar Australien", @"AUD" }, + { @"Dollar des bahamas", @"BSD" }, + { @"Dollar barbadien", @"BBD" }, + { @"Dollar de belize", @"BZD" }, + { @"Dollar des bermudes", @"BMD" }, + { @"Dollar de brunei", @"BND" }, + { @"Dollar de Singapour", @"SGD" }, + { @"Dollar Canadien", @"CAD" }, + { @"Dollar des îles Caïmans", @"KYD" }, + { @"Dollar néo-zélandais", @"NZD" }, + { @"Dollar de Fidji", @"FJD" }, + { @"Dollar guyanien", @"GYD" }, + { @"Dollar de Hong Kong", @"HKD" }, + { @"Dollar jamaïcain", @"JMD" }, + { @"Dollar libérien", @"LRD" }, + { @"Dollar namibien", @"NAD" }, + { @"Dollar des îles Salomon", @"SBD" }, + { @"Dollar du suriname", @"SRD" }, + { @"Nouveau dollar de Taïwan", @"TWD" }, + { @"Dollar trinidadien", @"TTD" }, + { @"Tuvaluan dollar", @"TVD" }, + { @"Yuan Chinois", @"CNY" }, + { @"Rial", @"__RI" }, + { @"Shiling", @"__S" }, + { @"Som", @"__SO" }, + { @"Dirham", @"__DR" }, + { @"Dinar", @"_DN" }, + { @"Dollar", @"__D" }, + { @"Manat", @"__MA" }, + { @"Rupee", @"__R" }, + { @"Krone", @"__K" }, + { @"Krona", @"__K" }, + { @"Crown", @"__K" }, + { @"Frank", @"__F" }, + { @"Mark", @"__M" }, + { @"Ruble", @"__RB" }, + { @"Peso", @"__PE" }, + { @"Pound", @"__P" }, + { @"Tristan da Cunha pound", @"_TP" }, + { @"South Georgia and the South Sandwich Islands pound", @"_SP" }, + { @"Somaliland shilling", @"_SS" }, + { @"Pitcairn Islands dollar", @"_PND" }, + { @"Palauan dollar", @"_PD" }, + { @"Niue dollar", @"_NID" }, + { @"Nauruan dollar", @"_ND" }, + { @"Micronesian dollar", @"_MD" }, + { @"Kiribati dollar", @"_KID" }, + { @"Guernsey pound", @"_GGP" }, + { @"Faroese króna", @"_FOK" }, + { @"Cook Islands dollar", @"_CKD" }, + { @"British Virgin Islands dollar", @"_BD" }, + { @"Ascension pound", @"_AP" }, + { @"Alderney pound", @"_ALP" }, + { @"Abkhazian apsar", @"_AA" }, + { @"Bitcoin", @"_XBT" } + }; + public static readonly Dictionary FractionalUnitNameToCodeMap = new Dictionary + { + { @"Jiao", @"JIAO" }, + { @"Kopek", @"KOPEK" }, + { @"Pul", @"PUL" }, + { @"Cent", @"CENT" }, + { @"Qindarkë", @"QINDARKE" }, + { @"Penny", @"PENNY" }, + { @"Santeem", @"SANTEEM" }, + { @"Cêntimo", @"CENTIMO" }, + { @"Centavo", @"CENTAVO" }, + { @"Luma", @"LUMA" }, + { @"Qəpik", @"QƏPIK" }, + { @"Fils", @"FILS" }, + { @"Poisha", @"POISHA" }, + { @"Kapyeyka", @"KAPYEYKA" }, + { @"Centime", @"CENTIME" }, + { @"Chetrum", @"CHETRUM" }, + { @"Paisa", @"PAISA" }, + { @"Fening", @"FENING" }, + { @"Thebe", @"THEBE" }, + { @"Sen", @"SEN" }, + { @"Stotinka", @"STOTINKA" }, + { @"Fen", @"FEN" }, + { @"Céntimo", @"CENTIMO" }, + { @"Lipa", @"LIPA" }, + { @"Haléř", @"HALER" }, + { @"Øre", @"ØRE" }, + { @"Piastre", @"PIASTRE" }, + { @"Santim", @"SANTIM" }, + { @"Oyra", @"OYRA" }, + { @"Butut", @"BUTUT" }, + { @"Tetri", @"TETRI" }, + { @"Pesewa", @"PESEWA" }, + { @"Fillér", @"FILLER" }, + { @"Eyrir", @"EYRIR" }, + { @"Dinar", @"DINAR" }, + { @"Agora", @"AGORA" }, + { @"Tïın", @"TIIN" }, + { @"Chon", @"CHON" }, + { @"Jeon", @"JEON" }, + { @"Tyiyn", @"TYIYN" }, + { @"Att", @"ATT" }, + { @"Sente", @"SENTE" }, + { @"Dirham", @"DIRHAM" }, + { @"Rappen", @"RAPPEN" }, + { @"Avo", @"AVO" }, + { @"Deni", @"DENI" }, + { @"Iraimbilanja", @"IRAIMBILANJA" }, + { @"Tambala", @"TAMBALA" }, + { @"Laari", @"LAARI" }, + { @"Khoums", @"KHOUMS" }, + { @"Ban", @"BAN" }, + { @"Möngö", @"MONGO" }, + { @"Pya", @"PYA" }, + { @"Kobo", @"KOBO" }, + { @"Kuruş", @"KURUS" }, + { @"Baisa", @"BAISA" }, + { @"Centésimo", @"CENTESIMO" }, + { @"Toea", @"TOEA" }, + { @"Sentimo", @"SENTIMO" }, + { @"Grosz", @"GROSZ" }, + { @"Sene", @"SENE" }, + { @"Halala", @"HALALA" }, + { @"Para", @"PARA" }, + { @"Öre", @"ORE" }, + { @"Diram", @"DIRAM" }, + { @"Satang", @"SATANG" }, + { @"Seniti", @"SENITI" }, + { @"Millime", @"MILLIME" }, + { @"Tennesi", @"TENNESI" }, + { @"Kopiyka", @"KOPIYKA" }, + { @"Tiyin", @"TIYIN" }, + { @"Hào", @"HAO" }, + { @"Ngwee", @"NGWEE" }, + { @"Millibitcoin", @"MILLIBITCOIN" }, + { @"Satoshi", @"SATOSHI" } + }; + public const string CompoundUnitConnectorRegex = @"(?et)"; public static readonly Dictionary CurrencyPrefixList = new Dictionary { { @"Dollar", @"$" }, @@ -301,7 +571,8 @@ public static class NumbersWithUnitDefinitions { @"Yuan Chinois", @"cny|rmb|¥|元" }, { @"Yen Japonais", @"¥|jpy" }, { @"Euro", @"€|eur" }, - { @"Livre", @"£" } + { @"Livre", @"£" }, + { @"Bitcoin", @"₿|btc|xbt" } }; public static readonly IList AmbiguousCurrencyUnitList = new List { @@ -333,7 +604,9 @@ public static class NumbersWithUnitDefinitions @"sos", @"std", @"try", - @"yer" + @"yer", + @"livre", + @"ils" }; public static readonly Dictionary InformationSuffixList = new Dictionary { @@ -355,7 +628,6 @@ public static class NumbersWithUnitDefinitions @"mi", @"barils", @"grain", - @"l", @"pierre", @"fps", @"pts" @@ -382,19 +654,14 @@ public static class NumbersWithUnitDefinitions }; public static readonly IList AmbiguousLengthUnitList = new List { - @"m", + @"m.", @"yard", @"yards", @"pm", + @"pouce", @"pt", @"pts" }; - public static readonly IList AmbuguousLengthUnitList = new List - { - @"m", - @"pouce", - @"pm" - }; public static readonly Dictionary SpeedSuffixList = new Dictionary { { @"Mètre par seconde", @"m/s|metres/seconde|metres par seconde|metre par seconde|metres par secondes|mètre par seconde|mètres par seconde|mètres par secondes" }, @@ -406,13 +673,19 @@ public static class NumbersWithUnitDefinitions { @"Pied par seconde", @"ft/s|pied par seconde|pieds par seconde|pied/s|pieds/s" }, { @"Pied par minute", @"pieds/minute|pied/minute|ft/minute|ft/min|pied/min" } }; + public static readonly IList AmbiguousSpeedUnitList = new List + { + @"noeud", + @"noeuds", + @"nuds" + }; public static readonly Dictionary TemperatureSuffixList = new Dictionary { { @"Kelvin", @"k|K|kelvin" }, - { @"F", @"°f|degres f|degrés f|deg f|° f|degrés fahrenheit|degres fahrenheit|fahrenheit" }, + { @"F", @"°f|° f|degres f|degrés f|deg f|degrés fahrenheit|degres fahrenheit|fahrenheit|deg fahrenheit|degs fahrenheit" }, { @"R", @"rankine|°r|° r" }, - { @"C", @"°c|deg c|degrés celsius|degrés c|degres celsius|celsius|deg celsius|degs celsius|centigrade|deg centigrade|degs centigrade|degrés centigrade|degres centigrade|degré centigrade|degre centigrade" }, - { @"Degré", @"degrés|degres|deg.|°| °|degré|degre|deg" } + { @"C", @"°c|° c|degres c|degrés c|deg c|degrés celsius|degres celsius|celsius|deg celsius|degs celsius|centigrade|deg centigrade|degs centigrade|degrés centigrade|degres centigrade|degré centigrade|degre centigrade" }, + { @"Degré", @"degrés|degres|deg.|°|degré|degre|deg|degs" } }; public static readonly Dictionary VolumeSuffixList = new Dictionary { @@ -433,19 +706,15 @@ public static class NumbersWithUnitDefinitions }; public static readonly IList AmbiguousVolumeUnitList = new List { - @"ounce", @"oz", - @"l", - @"cup", - @"peck", - @"cord", - @"gill" + @"l" }; public static readonly Dictionary WeightSuffixList = new Dictionary { { @"Kilogramme", @"kg|kilogramme|kilogrammes|kilo|kilos" }, { @"Gram", @"g|gramme|grammes" }, { @"Milligramme", @"mg|milligramme|milligrammes" }, + { @"Microgramme", @"µg|ug|microgramme|microgrammes" }, { @"Tonne métrique", @"tonne métrique|tonnes métrique|tonnes métriques|tonne metrique|tonnes metrique" }, { @"Tonne", @"tonne|tonnes|-tonnes|-tonne" }, { @"Livre", @"livre|livres" } @@ -453,13 +722,30 @@ public static class NumbersWithUnitDefinitions public static readonly IList AmbiguousWeightUnitList = new List { @"g", - @"oz", - @"stone", - @"dram" + @"oz" + }; + public static readonly Dictionary AngleSuffixList = new Dictionary + { + { @"Degree", @"degré|degrés|degre|degres|deg|°" }, + { @"Radian", @"radian|radians|rad" }, + { @"Turn", @"tour|tours" } + }; + public static readonly IList AmbiguousAngleUnitList = new List + { + @"tour", + @"tours" }; public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { { @"\bcent\b", @"\bpour\s+cent\b" } }; + public static readonly Dictionary TemperatureAmbiguityFiltersDict = new Dictionary + { + { @"\b(deg(r[eé]s?)?|°)$", @"\b((deg(r[eé]s?)?|°)\s*(angle|rotation)|(tourn([eé]|er|ant)|rotation|angle)(\s+(\p{L}+|\d+)){0,4}\s*(deg(r[eé]s?)?\b|°))" } + }; + public static readonly Dictionary DimensionAmbiguityFiltersDict = new Dictionary + { + { @"\b(deg(r[eé]s?)?|°)$", @"\b((deg(r[eé]s?)?|°)\s*(c(elsius|entigrade)?|f(ah?renheit)?)|(temp[eé]rature)(\s+(\p{L}+|\d+)){0,4}\s*(deg(r[eé]s?)?\b|°))" } + }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/French/QuotedTextDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/French/QuotedTextDefinitions.cs new file mode 100644 index 0000000000..988847c3b6 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/French/QuotedTextDefinitions.cs @@ -0,0 +1,35 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\French\French-QuotedText.yaml +// - Language: French +// - ClassName: QuotedTextDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.French +{ + using System; + using System.Collections.Generic; + + public static class QuotedTextDefinitions + { + public const string LangMarker = @"Fr"; + public const string QuotedTextRegex1 = @"(“([^“”]+)”)"; + public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)"; + public const string QuotedTextRegex3 = @"(""([^""]+)"")"; + public const string QuotedTextRegex4 = @"(\\'([^\']+)\\')"; + public const string QuotedTextRegex5 = @"(`([^`]+)`)"; + public const string QuotedTextRegex6 = @"(«([^«»]+)»)"; + public const string QuotedTextRegex7 = @"(‹([^‹›]+)›)"; + public const string QuotedTextRegex8 = @"(123456)"; + public const string QuotedTextRegex9 = @"(123456)---"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/French/QuotedTextDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/French/QuotedTextDefinitions.tt new file mode 100644 index 0000000000..44a6b9bb44 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/French/QuotedTextDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\French\French-QuotedText.yaml"; + this.Language = "French"; + this.ClassName = "QuotedTextDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/German/ChoiceDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/German/ChoiceDefinitions.cs index 9cff65afeb..6b28a94d10 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/German/ChoiceDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/German/ChoiceDefinitions.cs @@ -23,7 +23,8 @@ public static class ChoiceDefinitions { public const string LangMarker = @"Ger"; public const string TokenizerRegex = @"[^\w\d\u00E0-\u00FC]"; - public const string TrueRegex = @"\b(wahr|ja|jep|j|sicher|ok|einverstanden|mit\s+Sicherheit|sicherlich|jap|mache ich)\b|(\uD83D\uDC4D|\uD83D\uDC4C)"; - public const string FalseRegex = @"\b(falsch|nein|ne|nö|nicht\s+ok|nicht\s+einverstanden|n)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)"; + public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)"; + public static readonly string TrueRegex = $@"\b(wahr|ja|jep|j|sicher|ok|einverstanden|mit\s+Sicherheit|sicherlich|jap|mache ich)\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?"; + public static readonly string FalseRegex = $@"\b(falsch|nein|ne|nö|nicht\s+ok|nicht\s+einverstanden|n)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/German/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/German/DateTimeDefinitions.cs index b4f1e5fa07..379d2710f5 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/German/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/German/DateTimeDefinitions.cs @@ -21,113 +21,128 @@ namespace Microsoft.Recognizers.Definitions.German public static class DateTimeDefinitions { + public const string LangMarker = @"Ger"; public const bool CheckBothBeforeAfter = false; public const string TillRegex = @"(?zu|bis\s*zum|zum|bis|bis\s*hin(\s*zum)?|--|-|—|——)"; public const string RangeConnectorRegex = @"(?und|--|-|—|——)"; - public const string RelativeRegex = @"(?nächst(er|en|es|e)|kommend(er|en|es|e)|dies(er|em|en|es|e)|letzt(er|en|es|e)|vergangen(er|en|es|e)|vorherig(er|en|es|e)|vorig(er|en|es|e)|dies(er|en|es|e)|jetzig(er|en|es|e)|heutig(er|en|es|e)|aktuell(er|en|es|e)|gestrig(er|en|es|e)|morgig(er|en|es|e)|de[rmsn]|am)"; - public const string StrictRelativeRegex = @"(?nächst(er|en|es|e)|kommend(er|en|es|e)|dies(er|em|en|es|e)|letzt(er|en|es|e)|vergangen(er|en|es|e)|vorherig(er|en|es|e)|vorig(er|en|es|e)|dies(er|en|es|e)|jetzig(er|en|es|e)|heutig(er|en|es|e)|aktuell(er|en|es|e)|gestrig(er|en|es|e)|morgig(er|en|es|e))"; + public const string RelativeRegex = @"\b(?(über)?nächste[rmns]?|kommende[rns]?|diese[rmns]?|vorletzte[snm]?|letzte[rmns]?|vergangene[rns]?|vorherige[rns]?|vorige[rns]?|jetzige[rns]?|heutige[rns]?|aktuelle[rns]?|gestrige[rns]?|morgige[rns]?|de[rmsn]|das(?!\s+tag)|am)"; + public const string StrictRelativeRegex = @"\b(?(über)?nächste[rmns]?|kommende[rns]?|diese[rmns]?|letzte[rmns]?|vergangene[rns]?|vorherige[rns]?|vorige[rns]?|jetzige[rns]?|heutige[rns]?|aktuelle[rns]?|gestrige[rns]?|morgige[rns]?)"; public const string UpcomingPrefixRegex = @".^"; - public static readonly string NextPrefixRegex = $@"\b(nächst(er|en|es|e)|kommend(er|en|es|e)|{UpcomingPrefixRegex})\b"; + public static readonly string NextPrefixRegex = $@"\b((über)?nächste[rmns]?|kommende[rns]?|{UpcomingPrefixRegex})\b"; + public const string AfterNextPrefixRegex = @"\bübernächste[rmns]?\b"; public const string PastPrefixRegex = @".^"; - public static readonly string PreviousPrefixRegex = $@"\b(letzt(er|en|es|e)|vergangen(er|en|es|e)|vorherig(er|en|es|e)|vorig(er|en|es|e)|vor|{PastPrefixRegex})\b"; - public const string ThisPrefixRegex = @"\b(dies(er|en|em|es|e)|jetzig(er|en|es|e)|heutig(er|en|es|e)|aktuell(er|en|es|e))\b"; - public const string RangePrefixRegex = @"(von|vom|zwischen)"; - public const string DayRegex = @"(de[rmsn]\s*)?(?(01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9))(\.|\b)"; - public const string MonthNumRegex = @"(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)(\.)?"; + public static readonly string PreviousPrefixRegex = $@"\b(letzte[rmns]?|vergangene[rns]?|vorherige[rns]?|vor(ige[rns]?)?|{PastPrefixRegex})\b"; + public const string ThisPrefixRegex = @"\b(diese[rnms]?|jetzige[rns]?|heutige[rns]?|aktuelle[rns]?)\b"; + public const string RangePrefixRegex = @"(vo[nm]|zwischen)"; + public const string PenultimatePrefixRegex = @"\b(vorletzte[snm]?)\b"; + public const string WrittenOneToNineRegex = @"(eins?|zw(een|ei|o)|drei|vier|fünf|fuenf|sechs|sieben|acht|neun)"; + public const string DayRegex = @"(de[rmsn]\s*)?(?(01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)(\.|\b))"; + public static readonly string WrittenDayNumRegex = $@"\b(de[rmsn]\s+)?(?erst|zweit|dritt|viert|fünft|fuenft|sechst|siebt|acht|neunt|zehnt|elft|zwölft|zwoelft|dreizehnt|vierzehnt|fünfzehnt|fuenfzehnt|sechzehnt|siebzehnt|achtzehnt|neunzehnt|({WrittenOneToNineRegex}und)?zwanzigst|(einund)?dreißigst)e[nr]\b"; + public const string MonthNumRegex = @"(?(01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)(\.|\b))"; + public const string WrittenMonthNumRegex = @"\b(?erst|zweit|dritt|viert|fünft|fuenft|sechst|siebt|acht|neunt|zehnt|elft|zw(ö|oe)lft)e[nr]\b"; public static readonly string AmDescRegex = $@"({BaseDateTime.BaseAmDescRegex})"; public static readonly string PmDescRegex = $@"({BaseDateTime.BasePmDescRegex})"; public static readonly string AmPmDescRegex = $@"({BaseDateTime.BaseAmPmDescRegex})"; + public const string OclockRegex = @"(?uhr)"; public static readonly string DescRegex = $@"({OclockRegex})"; - public static readonly string TwoDigitYearRegex = $@"\b(?([0-27-9]\d))(?!(\s*((\:)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b"; + public static readonly string TwoDigitYearRegex = $@"\b(?([0-9]\d))(?!(\s*((\:\d)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b"; + public const string CenturyRegex = @"\b(?((ein|zwei)?tausend(und)?)?((ein|zwei|drei|vier|fünf|sechs|sieben|acht|neun|zehn|elf|zwölf|dreizehn|vierzehn|fünfzehn|sechzehn|siebzehn|achtzehn|neunzehn)hundert))\b"; + public static readonly string WrittenNumRegex = $@"(zw(ö|oe)lf|dreizehn|vierzehn|fünfzehn|sechzehn|siebzehn|achtzehn|neunzehn|zwanzig|dreißig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig|elf|zehn|{WrittenOneToNineRegex})"; public static readonly string FullTextYearRegex = $@"\b((?{CenturyRegex})\s+(?((zwanzig|dreißig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig)\s+{WrittenNumRegex})|{WrittenNumRegex}))\b|\b(?{CenturyRegex})\b"; public static readonly string YearRegex = $@"({BaseDateTime.FourDigitYearRegex}|{FullTextYearRegex})"; - public const string WeekDayRegex = @"(?sonntag|montag|dienstag|mittwoch|donnerstag|freitag|samstag|(mo|di|mi|do|fr|sa|so)(\.))"; - public const string SingleWeekDayRegex = @"(?sonntag|montag|dienstag|mittwoch|donnerstag|freitag|samstag|(mo|di|mi|do|fr|sa|so)(\.))"; + public const string WeekDayRegex = @"(?sonntag|montag|dienstag|mittwoch|donnerstag|freitag|samstag|sonnabend|(mo|di|mi|do|fr|sa|so)(\.|\b))"; + public const string SingleWeekDayRegex = @"\b(?sonntag|montag|dienstag|mittwoch|donnerstag|freitag|samstag|sonnabend|(mo|di|mi|do|fr|sa|so)(\.|\b))"; public static readonly string RelativeMonthRegex = $@"(?{RelativeRegex}\s+monat(s)?)"; - public const string WrittenMonthRegex = @"((monat\s*)?(?april|apr\.|august|aug\.|dezember|dez\.|februar|feber|feb\.|januar|j[äa]nner|j[äa]n\.|juli|julei|jul\.|jun[io]|jun\.|märz|mai|november|nov\.|oktober|okt\.|september|sept?\.))"; + public const string WrittenMonthRegex = @"((monat\s*)?(?apr(il|\.)|aug(ust|\.)|dez(ember|\.)|feb(ruar|ber|\.)|j[äa]n(uar|ner|\.)|jul(e?i|l\.)|jun([io]|\.)|märz|mai|nov(ember|\.)|okt(ober|\.)|sept?(ember|\.)))"; public static readonly string MonthSuffixRegex = $@"(?(im\s*|des\s*)?({RelativeMonthRegex}|{WrittenMonthRegex}|{MonthNumRegex}))"; - public const string DateUnitRegex = @"(?jahre|jahr|monate|monat|wochen?|tage|tag)(s)?"; + public const string DateUnitRegex = @"(?tg|woche(?n)?|(jahr|monat|tag)(?e)?)(s)?"; + public const string HalfTokenRegex = @"^(halb)"; + public const string QuarterToTokenRegex = @"^(viertel\s+vor|dreiviertel)\s*$"; + public const string QuarterPastTokenRegex = @"^(viertel\s+nach)\s*$"; + public const string ThreeQuarterToTokenRegex = @"^(viertel|dreiviertel\s+vor)\s*$"; + public const string ThreeQuarterPastTokenRegex = @"^(dreiviertel\s+nach)\s*$"; public static readonly string SimpleCasesRegex = $@"((vom|zwischen)\s+)?({DayRegex})(\s*{MonthSuffixRegex})?\s*{TillRegex}\s*({DayRegex})(\s*{MonthSuffixRegex})?((\s+|\s*,\s*){YearRegex})?"; public static readonly string MonthFrontSimpleCasesRegex = $@"((vom|zwischen)\s*)?{MonthSuffixRegex}\s*((vom|zwischen)\s*)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?"; public static readonly string MonthFrontBetweenRegex = $@"({MonthSuffixRegex}\s+(zwischen\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?)"; public static readonly string BetweenRegex = $@"((zwischen\s+)({DayRegex})(\s+{MonthSuffixRegex})?\s*{RangeConnectorRegex}\s*({DayRegex})(\s+{MonthSuffixRegex})((\s+|\s*,\s*){YearRegex})?|(zwischen\s+)({DayRegex})(\s+{MonthSuffixRegex})?\s*{RangeConnectorRegex}\s*({DayRegex})(\s+{MonthSuffixRegex})?((\s+|\s*,\s*){YearRegex})?)"; - public static readonly string MonthWithYear = $@"\b((?april|apr\.|august|aug\.|dezember|dez\.|februar|feber|feb\.|januar|j[äa]nner|j[äa]n\.|juli|julei|jul\.|jun[io]|jun\.|märz|mai|november|nov\.|oktober|okt\.|september|sept?\.)(\s*),?(\s+des)?(\s*)({YearRegex}|(?nächsten|letzten|diese(s|n))\s+jahres))"; - public static readonly string OneWordPeriodRegex = $@"\b((((im\s+)?monat\s+)?({RelativeRegex}\s*(jahr\s*(im\s*)?)?)?(?april|apr\.|august|aug\.|dezember|dez\.|februar|feber|feb\.|januar|j[äa]nner|j[äa]n\.|juli|julei|jul\.|jun[io]|jun\.|märz|mai|november|nov\.|oktober|okt\.|september|sept?\.))|({RelativeRegex}\s+)?(wochenende|woche|monat|jahr)|(monat|jahr))\b"; + public static readonly string MonthWithYear = $@"\b((?apr(il|\.)|aug(ust|\.)|dez(ember|\.)|feb(ruar|ber|\.)|januar|j[äa]n(ner|\.)|jul(e?i|l\.)|jun([io]|\.)|märz|mai|nov(ember|\.)|okt(ober|\.)|sept?(ember|\.))(\s*),?(\s+des)?(\s*)({YearRegex}|{TwoDigitYearRegex}|(?nächste[mn]|letzte[mn]|diese(s|n))\s+jahres))"; + public static readonly string OneWordPeriodRegex = $@"\b((((im\s+)?monat\s+)?({RelativeRegex}\s*(jahr\s*(im\s*)?)?)?(?apr(il|\.)|aug(ust|\.)|dez(ember|\.)|feb(ruar|ber|\.)|j[äa]n(uar|ner|\.)|jul(e?i|l\.)|jun([io]|\.)|märz|mai|nov(ember|\.)|okt(ober|\.)|sept?(ember|\.)))|(?unter\s+der\s+woche)|({RelativeRegex}\s+)?((?werktags|arbeitswoche)|woche(nende)?|monat(s)?|jahr|jahres)(?!(\s+\d+(?!({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex}))|\s+bis\s+heute)))\b"; public static readonly string MonthNumWithYear = $@"({YearRegex}(\s*)[/\-\.](\s*){MonthNumRegex})|({MonthNumRegex}(\s*)[/\-\.](\s*){YearRegex})"; - public static readonly string WeekOfMonthRegex = $@"(?((die|der)\s+)(?erst(er|en|es|e)|1\.|zweit(er|en|es|e)|2\.|dritt(er|en|es|e)|3\.|viert(er|en|es|e)|4\.|fünft(er|en|es|e)|5\.|letzt(er|en|es|e))\s+woche\s+(des|diese(s|n)|im)\s+({MonthSuffixRegex}|monat(s)?))"; - public static readonly string WeekOfYearRegex = $@"(?((die|der)\s+)?(?(erst(er|en|es|e)|1\.|zweit(er|en|es|e)|2\.|dritt(er|en|es|e)|3\.|viert(er|en|es|e)|4\.|fünft(er|en|es|e)|5\.|letzt(er|en|es|e))\s+woche\s+(im|diese(s|n)|\s+des)?\s+({YearRegex}|{RelativeRegex}\s+jahr(en|es|e)?)))"; + public static readonly string WeekOfMonthRegex = $@"(?((die|der)\s+)(?erste[rns]?|1\.|zweite[rns]?|2\.|dritte[rns]?|3\.|vierte[rns]?|4\.|fünfte[rns]?|5\.|letzte[rmns]?)\s+woche\s+(des|diese(s|n)|im)\s+({MonthSuffixRegex}|monat(s)?))"; + public static readonly string WeekOfYearRegex = $@"(?((die|der)\s+)?(?(erste[rns]?|1\.|zweite[rns]?|2\.|dritte[rns]?|3\.|vierte[rns]?|4\.|fünfte[rns]?|5\.|letzte[rmns]?)\s+woche\s+(im|diese(s|n)|\s+des)?\s+({YearRegex}|{RelativeRegex}\s+jahr(en|es|e)?)))"; + public static readonly string OfYearRegex = $@"\b((of|in)\s+({YearRegex}|{StrictRelativeRegex}\s+year))\b"; + public const string FirstLastRegex = @"\b(the\s+)?((?first)|(?last))\b"; public static readonly string FollowedDateUnit = $@"^\s*{DateUnitRegex}"; public static readonly string NumberCombinedWithDateUnit = $@"\b(?\d+(\.\d*)?){DateUnitRegex}"; - public static readonly string QuarterRegex = $@"((das|dem|im|in dem)\s+)?(?erst(en|es|e)|1\.|zweit(en|es|e)|2\.|dritt(en|es|e)|3\.|viert(en|es|e)|4\.)\s+quartal(\s+(von|des jahres)?|\s*,\s*)?\s+({YearRegex}|{RelativeRegex})"; + public static readonly string QuarterRegex = $@"((das|im|in dem|dem)\s+)?(?erste[rns]?|1\.|zweite[rns]?|2\.|dritte[rns]?|3\.|vierte[rns]?|4\.)\s+quartal(\s+(von|des jahres)?|\s*,\s*)?\s+({YearRegex}|{RelativeRegex})"; public static readonly string QuarterRegexYearFront = $@"(?!)#({YearRegex}|{RelativeRegex}\s+year)\s+(the\s+)?(?first|1st|second|2nd|third|3rd|fourth|4th)\s+quarter"; - public static readonly string AllHalfYearRegex = $@"((das|dem|im|in dem)\s+)?(?erst(en|es|e)|1\.|zweit(en|es|e)|2\.)\s+(halbjahr|hälfte)(\s+(von|des jahres)?|\s*,\s*)?\s+({YearRegex}|{RelativeRegex})"; - public const string PrefixDayRegex = @"\b((?früh)|(?mitten|in der mitte)|(?spät|später))(\s+am\s+tag)?(\s+des\s+tages)?$"; + public static readonly string AllHalfYearRegex = $@"((das|im|in dem|dem)\s+)?(?erste[rns]?|1\.|zweite[rns]?|2\.)\s+(halbjahr|hälfte)(\s+(von|des jahres)?|\s*,\s*)?\s+({YearRegex}|{RelativeRegex})"; + public const string PrefixDayRegex = @"((?früh)|(?mitten|in der mitte)|(?spät(er)?))(\s+am\s+tag)?(\s+des\s+tages)?$"; public const string CenturySuffixRegex = @"(^jahrhundert)\b"; public const string ReferencePrefixRegex = @"((gleich(e|en))|(selb(e|en)))\b"; public static readonly string SeasonRegex = $@"\b(?({RelativeRegex}\s+)?(?frühling|sommer|herbst|winter)((\s+(von|des jahres)?|\s*,\s*)?\s+({YearRegex}|{RelativeRegex}\s+jahr(e(s)?)?))?)\b"; public const string WhichWeekRegex = @"\b(week)(\s*)(?5[0-3]|[1-4]\d|0?[1-9])\b"; public const string WeekOfRegex = @"(die\s+)?(woche)(\s+des)"; public const string MonthOfRegex = @"(monat)(\s*)(des)"; - public const string MonthRegex = @"(?april|apr\.|august|aug\.|dezember|dez\.|februar|feber|feb\.|januar|j[äa]nner|j[äa]n\.|juli|julei|jul\.|jun[io]|jun\.|märz|mai|november|nov\.|oktober|okt\.|september|sept?\.)"; + public const string MonthRegex = @"(?apr((il)?\b|\.)|aug((ust)?\b|\.)|dez((ember)?\b|\.)|feb((ruar|ber)?\b|\.)|januar|j[äa]n((ner)?\b|\.)|jul((e?i|l)?\b|l\.)|jun([io]?\b|\.)|märz|mai|nov((ember)?\b|\.)|okt((ober)?\b|\.)|sept?((ember)?\b|\.))"; public static readonly string DateYearRegex = $@"(?{BaseDateTime.FourDigitYearRegex}|{TwoDigitYearRegex})"; public static readonly string OnRegex = $@"(?<=\bam\s+)({DayRegex}s?)\b"; public const string RelaxedOnRegex = @"(?<=\b(am|an dem)\s+)((?10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)([\.]))"; public static readonly string ThisRegex = $@"(((diese((n|m)|(\s*woche))(\s*am)?\s+){WeekDayRegex})|diese(n|r)?\s*(sommer|winter|frühling|herbst))"; public static readonly string LastDateRegex = $@"({PreviousPrefixRegex}(\s*(woche|monat|jahr)?(\s*(am|im))?)?\s+({WeekDayRegex}|sommer|winter|frühling|herbst))|((am\s+)?{WeekDayRegex}(\s+{PreviousPrefixRegex}\s*woche))"; public static readonly string NextDateRegex = $@"({NextPrefixRegex}(\s*(woche|monat|jahr)?(\s*(am|im))?)?\s+({WeekDayRegex}|sommer|winter|frühling|herbst))|((am\s+)?{WeekDayRegex}(\s+{NextPrefixRegex}\s*woche))"; - public static readonly string SpecialDayRegex = $@"(vorgestern|übermorgen|((der\s+)?{RelativeRegex}\s+(tag|morgen))|\bgestern\b|\bmorgen\b|heute|(heutig(e|en|es)?|aktuelle(n|s)?) (datum|tag(s|es)?))"; - public static readonly string SpecialDayWithNumRegex = $@"\b((?{WrittenNumRegex})\s+tage?\s+(von|nach|ab)\s+(?\bgestern\b|\bmorgen\b|heute|(heutig(e|en|es)?|aktuelle(n|s)?) (datum|tag(s|es)?)))\b"; - public static readonly string RelativeDayRegex = $@"((((de[rmns])\s+)?{RelativeRegex}\s+tag(e(s)?)?))"; - public const string SetWeekDayRegex = @"\b(?(an)\s+)?(?sonntag|montag|dienstag|mittwoch|donnerstag|freitag|samstag)(s|en)\b"; - public static readonly string WeekDayOfMonthRegex = $@"\b(?((an( dem)?|de[rs]|am)\s+)?(?erst(er|en|e)|1\.|zweit(er|en|e)|2\.|dritt(er|en|e)|3\.|viert(er|en|e)|4\.|fünft(er|en|e)|5\.|letzt(er|en|e))\s+{WeekDayRegex}\s+{MonthSuffixRegex})\b"; + public static readonly string SpecialDayRegex = $@"\b(vorgestern|übermorgen|((der\s+)?{RelativeRegex}\s+(tag(s|es)?|(?{WrittenNumRegex})\s+tage?\s+(von|nach|ab)\s+(?\bgestern\b|\bmorgen\b|heute|(heutige[rns]?|aktuelle[rns]?) (datum|tag(s|es)?)))\b"; + public static readonly string RelativeDayRegex = $@"\b((((de[rmns])\s+)?{RelativeRegex}\s+tag(e(s)?)?))"; + public const string SetWeekDayRegex = @"\b(?(an|immer)\s+)?(?sonntag|montag|dienstag|mittwoch|donnerstag|freitag|samstag|sonnabend)(s|en)\b"; + public static readonly string WeekDayOfMonthRegex = $@"\b(?((an( dem)?|de[rs]|am)\s+)?(?erste[rns]?|1\.|zweite[rns]?|2\.|dritte[rns]?|3\.|vierte[rns]?|4\.|fünfte[rns]?|5\.|letzte[rmns]?)\s+{WeekDayRegex}\s+{MonthSuffixRegex})\b"; public static readonly string RelativeWeekDayRegex = $@"\b({WrittenNumRegex}\s+{WeekDayRegex}e\s+(von\s+jetzt|später))\b"; - public const string WrittenNumRegex = @"(zwölf|zwoelf|dreizehn|vierzehn|fünfzehn|sechzehn|siebzehn|achtzehn|neunzehn|zwanzig|dreißig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig|eins?|zwei|zwo|drei|vier|fünf|fuenf|sechs|sieben|acht|neun|zehn|elf)"; public static readonly string SpecialDate = $@"(?=\b(an( dem)?|am)\s+){DayRegex}\b"; - public static readonly string DateExtractor1 = $@"\b(({WeekDayRegex})(\s+|\s*,\s*))?({DayRegex})((\s*){MonthRegex})((\s+|\s*,\s*)({DateYearRegex}))?\b"; - public static readonly string DateExtractor2 = $@"\b({DayRegex}((\s*){MonthRegex})((\,\s*|\s*){DateYearRegex})?)\b"; + public static readonly string DateExtractor1 = $@"\b(({WeekDayRegex})(\s+|\s*,\s*))?({DayRegex}\s*[/\\.,\- ]\s*{MonthRegex}(\s*[/\\.,\- ]\s*{DateYearRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[/\\.,\- ]\s*{DayRegex}\s*[/\\.,\- ]\s*{MonthRegex})\b"; + public static readonly string DateExtractor2 = $@"\b({MonthRegex}\s*[/\\.,\- ]\s*{DayRegex}(?!\s*\-\s*\d{{2}}\b)(\s*[/\\.,\- ]\s*{DateYearRegex})?)\b"; public static readonly string DateExtractor3 = $@"\b({DayRegex}{MonthRegex})"; public static readonly string DateExtractor4 = $@"\b({DayRegex}\s*{MonthNumRegex}\s*{DateYearRegex})\b"; - public static readonly string DateExtractor5 = $@"\b({DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex})\b"; - public static readonly string DateExtractor6 = $@"\b(({WeekDayRegex}\s*)?(in)\s*(\d(\d)?)\s*(woche(n)?))\b"; - public static readonly string DateExtractor7 = $@"({DayRegex}\s*[\.]\s*{MonthNumRegex}[\.])"; - public static readonly string DateExtractor8 = $@"(?<=\b(am)\s+){DayRegex}[/\\\.]{MonthNumRegex}[/\\\.]({DateYearRegex})?\b"; - public static readonly string DateExtractor9 = $@"\b({DayRegex}\s*/\s*{MonthNumRegex}((\s+|\s*,\s*){DateYearRegex})?)\b"; - public static readonly string DateExtractor10 = $@"\b({RelativeRegex}\s*jahr(\s*im)?({MonthNumRegex}|sommer|winter|frühling|herbst)?)\b"; - public static readonly string DateExtractorA = $@"({DateYearRegex}\s*[/\\\-\.]\s*{MonthNumRegex}\s*[/\\\-\.]\s*{DayRegex})"; + public static readonly string DateExtractor5 = $@"\b(({WeekDayRegex})(\s+|\s*,\s*))?({DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex})\b(?!\s*[/\\\-\.]\s*\d+)"; + public static readonly string DateExtractor6 = $@"^[.]"; + public static readonly string DateExtractor7 = $@"({DayRegex}\s*[\.]\s*{MonthNumRegex}[\.]){BaseDateTime.CheckDecimalRegex}"; + public static readonly string DateExtractor8 = $@"(?<=\b(am)\s+){DayRegex}[/\\\.]{MonthNumRegex}([/\\\.]{DateYearRegex})?{BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor9 = $@"\b({DayRegex}\s*/\s*{MonthNumRegex}((\s+|\s*,\s*){DateYearRegex})?){BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor10 = $@"^[.]"; + public static readonly string DateExtractor11 = $@"\b(({WeekDayRegex})(\s+|\s*,\s*)|(?<=\bam\s+))({DayRegex}\.|{WrittenDayNumRegex})\s*[/\\.\- ]\s*({MonthNumRegex}\.|{WrittenMonthNumRegex})(\s*[/\\.\- ]\s*{DateYearRegex})?"; + public static readonly string DateExtractorA = $@"({DateYearRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DayRegex}|{MonthRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{DayRegex}|{DayRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{MonthRegex})(?!\s*[/\\\-\.:]\s*\d+)"; public static readonly string OfMonth = $@"^(\s*des\s*|\s*)?{MonthRegex}"; public static readonly string MonthEnd = $@"{MonthRegex}\s*(de[rmn])?\s*$"; public static readonly string WeekDayEnd = $@"{WeekDayRegex}\s*,?\s*$"; - public const string WeekDayStart = @"^[\.]"; + public static readonly string WeekDayStart = $@"^\s+(am\s+)?{WeekDayRegex}\b"; public const string RangeUnitRegex = @"(?jahren?|jahr|monaten?|monat|wochen?|tagen?|tag)"; - public const string OclockRegex = @"(?uhr)"; - public const string HourNumRegex = @"\b(?einundzwanzig|zweiundzwanzig|dreiundzwanzig|vierundzwanzig|zwölf|zwoelf|dreizehn|vierzehn|fünfzehn|sechzehn|siebzehn|achtzehn|neunzehn|zwanzig|'null'|eins?|zwei|zwo|drei|vier|fünf|fuenf|sechs|sieben|acht|neun|zehn|elf)\b"; - public const string MinuteNumRegex = @"(?zwanzig|dreißig|vierzig|fünfzig|zwölf|zwoelf|dreizehn|vierzehn|fünfzehn|fuenfzehn|sechzehn|siebzehn|achtzehn|neunzehn|eins?|zwei|zwo|drei|vier|fünf|fuenf|sechs|sieben|acht|neun|zehn|elf)"; - public const string DeltaMinuteNumRegex = @"(?zwanzig|dreißig|vierzig|fünfzig|zwölf|zwoelf|dreizehn|vierzehn|fünfzehn|fuenfzehn|sechzehn|siebzehn|achtzehn|neunzehn|eins?|zwei|zwo|drei|vier|fünf|fuenf|sechs|sieben|acht|neun|zehn|elf)"; - public const string PmRegex = @"((am|gegen|in der)\s+)?(?(nachmittags?|abends?|mitternachts?|\bmittags?|((in der )?nachts?)))"; - public const string PmRegexFull = @"((am|gegen|in der)\s+)?(?(nachmittags?|abends?|mitternachts?|\bmittags?|((in der )?nachts?)))"; - public const string AmRegex = @"(?(((am|gegen)\s+)?(früh|vormittags?)|(morgens|(am|gegen) morgen)))"; + public const string HourNumRegex = @"\b(?einundzwanzig|zweiundzwanzig|dreiundzwanzig|vierundzwanzig|zw(ö|oe)lf|dreizehn|vierzehn|fünfzehn|sechzehn|siebzehn|achtzehn|neunzehn|zwanzig|'null'|eins?|zw(een|ei|o)|drei|vier|fünf|fuenf|sechs|sieben|acht|neun|zehn|elf)\b"; + public const string MinuteNumRegex = @"(?zwanzig|dreißig|vierzig|fünfzig|zw(ö|oe)lf|dreizehn|vierzehn|fünfzehn|fuenfzehn|sechzehn|siebzehn|achtzehn|neunzehn|eins?|zw(een|ei|o)|drei|vier|fünf|fuenf|sechs|sieben|acht|neun|zehn|elf)"; + public const string DeltaMinuteNumRegex = @"(?zwanzig|dreißig|vierzig|fünfzig|zw(ö|oe)lf|dreizehn|vierzehn|fünfzehn|fuenfzehn|sechzehn|siebzehn|achtzehn|neunzehn|eins?|zw(een|ei|o)|drei|vier|fünf|fuenf|sechs|sieben|acht|neun|zehn|elf)"; + public const string PmRegex = @"\b((am|gegen|in der)\s+)?(?(((früh|spät)\s*)?(nachmittags?|abends?)|mitternachts?|\bmittags?|((in der )?nachts?)))"; + public const string PmRegexFull = @"\b((am|gegen|in der)\s+)?(?(((früh|spät)\s*)?(nachmittags?|abends?)|mitternachts?|\bmittags?|((in der )?nachts?)))"; + public const string AmRegex = @"(?(((früh|spät)\s*)?morgens|((am|gegen)\s+)?(früh|vormittags?)|(am|gegen)\s+morgen))"; public const string LunchRegex = @"\b(mittag(essen|s)?)\b"; - public const string NightRegex = @"\b(mitternacht|(nachts?|primetime))\b"; - public const string AmPmPrefixRegex = @"((((um|gegen)\s*)?((am morgen)|morgens|(vor|nach)mittags?|abends?|früh|mitternachts?)|(in der\s*)?nachts?)\s*(um|gegen|von)\s*)"; + public const string NightRegex = @"\b(mitternacht|(nachts?|primetime|abends?))\b"; + public const string AmPmPrefixRegex = @"\b((((um|gegen)\s*)?(?(((?am morgen)|((früh|spät)\s*)?morgens|früh|(vor|nach)mittags?)|(?((früh|spät)\s*)?(nachmittags?|abends?)|mitternachts?))|(in der\s*)?(?nachts?)))\s*(um|gegen|von)\s*)"; public const string CommonDatePrefixRegex = @"^[\.]"; - public static readonly string LessThanOneHour = $@"(?(ein(er?)?\s+)?((drei)?viertel|halb(en?)?)(\s*stunden?)?)|{BaseDateTime.DeltaMinuteRegex}(\s+(min(uten?)?))|{DeltaMinuteNumRegex}(\s+(min(uten?)?))"; + public static readonly string LessThanOneHour = $@"\b(?(ein(er?)?\s+)?((drei)?viertel|halb(en?)?)(\s*stunden?)?)|{BaseDateTime.DeltaMinuteRegex}(\s+(min(uten?)?))|{DeltaMinuteNumRegex}(\s+(min(uten?)?))"; public static readonly string WrittenTimeRegex = $@"(um\s*)?(?{HourNumRegex}(\s*{OclockRegex}\s*)({MinuteNumRegex}|{MinuteNumRegex}und(?zwanzig|dreißig|vierzig|fünfzig)))"; public static readonly string TimePrefix = $@"(?({LessThanOneHour})(\s*(vor(\W)?|nach(\W)?))?)"; public static readonly string TimeSuffix = $@"(?{AmRegex}|{PmRegex}|{OclockRegex})"; - public static readonly string TimeSuffixFull = $@"(?{AmRegex}|{PmRegex}|{OclockRegex}|(?nachmittag(s)?|nacht(s)?|abend(s)?))"; + public static readonly string TimeSuffixFull = $@"(?{AmRegex}|{PmRegex}|{OclockRegex}|\b(?nachmittag(s)?|nacht(s)?|abend(s)?))"; public static readonly string BasicTime = $@"(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(:|\s+uhr(\s+und)?\s+){BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|\b{BaseDateTime.HourRegex}(?![%\d]))"; - public const string MidnightRegex = @"(?mitternacht|mitten in der nacht)"; + public const string MidnightRegex = @"(?mitte(r|n in der )nachts?)"; public const string MidmorningRegex = @"(?mitten am vormittag)"; public const string MidafternoonRegex = @"(?mitten am nachmittag)"; - public const string MiddayRegex = @"((?(am )?mittags?)|(?(?<=montag|dienstag|mittwoch|donnerstag|freitag|samstag|sonntag)(mittags?)))"; + public const string MiddayRegex = @"((?(am\s+)?mittag(s(zeit)?)?)|(?(?<=montag|dienstag|mittwoch|donnerstag|freitag|samstag|sonnabend|sonntag)(mittags?)))"; public static readonly string MidTimeRegex = $@"(?({MidnightRegex}|{MidmorningRegex}|{MidafternoonRegex}|{MiddayRegex}))"; public static readonly string AtRegex = $@"(((?<=\b(um|gegen)\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?![%\d])|{MidTimeRegex}))|{MidTimeRegex})\b"; public static readonly string IshRegex = $@"\b(noonish)\b"; - public const string TimeUnitRegex = @"(?stunden?|std?|h|minuten?|min|sekunden?|sek|s)\b"; + public const string TimeUnitRegex = @"(?(stunde|minute|sekunde)(?n)?|std?|min|sek|h|s)\b"; public const string RestrictedTimeUnitRegex = @"(?stunde|minute)\b"; public const string FivesRegex = @"(?(fünfzehn|(fünfund)?(zwanzig?|dreißig|vierzig|fünfzig)|zehn|fünf))\b"; public static readonly string HourRegex = $@"\b{BaseDateTime.HourRegex}"; - public const string PeriodHourNumRegex = @"(?einundzwanzig|zweiundzwanzig|dreiundzwanzig|vierundzwanzig|zwölf|zwoelf|dreizehn|vierzehn|fünfzehn|sechzehn|siebzehn|achtzehn|neunzehn|zwanzig|'null'|eins?|zwei|drei|vier|fünf|sechs|sieben|acht|neun|zehn|elf)\b"; + public const string PeriodHourNumRegex = @"(?einundzwanzig|zweiundzwanzig|dreiundzwanzig|vierundzwanzig|zw(ö|oe)lf|dreizehn|vierzehn|fünfzehn|sechzehn|siebzehn|achtzehn|neunzehn|zwanzig|'null'|eins?|zwei|drei|vier|fünf|sechs|sieben|acht|neun|zehn|elf)\b"; public static readonly string ConnectNumRegex = $@"{BaseDateTime.HourRegex}(?[0-5][0-9])\s*{DescRegex}"; public static readonly string TimeRegex1 = $@"({TimePrefix}\s+)?({WrittenTimeRegex}(\s*{DescRegex})?|({HourNumRegex}|{BaseDateTime.HourRegex}\b)\s*{DescRegex})"; public static readonly string TimeRegex2 = $@"({TimePrefix}\s+)?(t)?{BaseDateTime.HourRegex}(\s*)?(:|\s+uhr(\s+und)?\s+)(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?((\s*{DescRegex})|\b)"; @@ -143,12 +158,12 @@ public static class DateTimeDefinitions public static readonly string PureNumBetweenAnd = $@"\b(?({PmRegex}|{AmRegex})\s+)?(zwischen\s+)({HourRegex}|{PeriodHourNumRegex})(\s*(?({PmRegex}|{AmRegex}|{DescRegex})))?\s*{RangeConnectorRegex}\s*({HourRegex}|{PeriodHourNumRegex})\s*(?\s*{PmRegex}|{AmRegex}|{DescRegex}){{0,2}}\b"; public static readonly string SpecificTimeFromTo = $@"((?({PmRegex}|{AmRegex})\s+)?(von)\s+)?(?({TimeRegex2}|({HourRegex}|{PeriodHourNumRegex})(\s*(?({PmRegex}|{AmRegex}|{DescRegex})))?))\s*{TillRegex}\s*(?({TimeRegex2}|({HourRegex}|{PeriodHourNumRegex})(\s*(?({PmRegex}|{AmRegex}|{DescRegex})))?))"; public static readonly string SpecificTimeBetweenAnd = $@"(?({PmRegex}|{AmRegex})\s+)?(zwischen\s+)(?({TimeRegex2}|({HourRegex}|{PeriodHourNumRegex})(\s*(?({PmRegex}|{AmRegex}|{DescRegex})))?))\s*{RangeConnectorRegex}\s*(?({TimeRegex2}|({HourRegex}|{PeriodHourNumRegex})(\s*(?({PmRegex}|{AmRegex}|{DescRegex})))?))"; - public const string PrepositionRegex = @"(?^(um|am|vo[mn]|in|zur)(\s+(de[rmn]))?$)"; - public const string TimeOfDayRegex = @"\b(?(((((?(früh( am|er)|am frühen)(\s+|-))|(?(spät( am|er)|am späten)(\s+|-)))?((am )?morgens?(?! (früh|vor|nach|abend|(nacht|primetime)|morgen))|(vor|nach)mittags?|abends?|früh|(nachts?|primetime))))))\b"; + public const string PrepositionRegex = @"(?^(um|am|in|zur)(\s+(de[rmn]))?$)"; + public const string TimeOfDayRegex = @"(?((((?(früh(\s+am|er)?|am frühen))|(?(spät(\s+am|er)?|am späten)))(\s*|-))?((am\s+)?morgens?(?! (früh|vor|nach|abend|(nacht|primetime)|morgen))|(vor|nach)mittags?|(?\d+(\,\d*)?){TimeUnitRegex}"; - public const string NowRegex = @"\b(?(genau\s+)?jetzt|momentan|im moment|derzeit|in diesem moment|aktuell|gerade|so früh wie möglich|frühestmöglich|neulich|vorher)\b"; + public const string NowRegex = @"\b(?(genau\s+)?jetzt|momentan|im moment|derzeit|in diesem moment|aktuelle?|gerade|so früh wie möglich|frühestmöglich|neulich|vorher)\b"; public const string SuffixRegex = @"^\s*((am|zur|in der)\s+)?(am morgen|morgens|frühe?|(vor|nach)mittags?|abends?|(nacht|primetime)s?)\b"; public const string DateTimeTimeOfDayRegex = @"\b(?(vor|nach)?mittags?|abends?|(nachts?|primetime|morgen))\b"; public static readonly string DateTimeSpecificTimeOfDayRegex = $@"\b((({RelativeRegex}|heute)\s+{DateTimeTimeOfDayRegex}))\b"; @@ -156,49 +171,53 @@ public static class DateTimeDefinitions public static readonly string TimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(um|gegen|in|on))?\s*$"; public static readonly string SimpleTimeOfTodayAfterRegex = $@"({HourNumRegex}|{BaseDateTime.HourRegex})\s*(,\s*)?(am\s+)?{DateTimeSpecificTimeOfDayRegex}"; public static readonly string SimpleTimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+um)?\s*({HourNumRegex}|{BaseDateTime.HourRegex})"; - public const string SpecificEndOfRegex = @"((das|am|an( dem)?)\s+)?ende(\s+(de[mnsr])?)\s*"; + public const string SpecificEndOfRegex = @"((das|am|an( dem)?)\s+)?\bende(\s+(de[mnsr])?)\s*$"; public const string UnspecificEndOfRegex = @"^[.]"; public const string UnspecificEndOfRangeRegex = @"^[.]"; - public const string PeriodTimeOfDayRegex = @"\b(((?(früh( am|er)|am frühen)(\s+|-))|(?(spät( am|er)|am späten)(\s+|-)))?(?morgens?|früh|(vor|nach)mittags?|(nachts?|primetime)|abends?))\b"; + public const string PeriodTimeOfDayRegex = @"(((?(früh( am|er)?|am frühen)(\s*|-))|(?(spät( am|er)|am späten)(\s*|-)))?(?morgens?|früh|(vor|nach)mittags?|(nachts?|primetime)|abends?))"; public static readonly string PeriodSpecificTimeOfDayRegex = $@"(({StrictRelativeRegex}\s+{PeriodTimeOfDayRegex})|heute)"; - public static readonly string PeriodTimeOfDayWithDateRegex = $@"\b((((am|zur|von|in der)\s+)?{TimeOfDayRegex}(\s+am)?))|(?(?<=montag|dienstag|mittwoch|donnerstag|freitag|samstag|sonntag)((vor|nach)?mittags?|abends?|(nachts?|primetime)|morgens?))\b"; + public static readonly string PeriodTimeOfDayWithDateRegex = $@"((((am|zur|von|in der)\s+)?{TimeOfDayRegex}(\s+am)?))|(?(?<=montag|dienstag|mittwoch|donnerstag|freitag|samstag|sonnabend|sonntag)((vor|nach)?mittags?|abends?|(nachts?|primetime)|morgens?))\b"; public const string LessThanRegex = @"\b(weniger\s+als)\b"; public const string MoreThanRegex = @"\b(mehr\s+als)\b"; - public const string DurationUnitRegex = @"(?jahr(e(n|s)?)?|monat(en?|s)?|wochen?|tag(e(n|s)?)?|stunden?|std?|h|min(uten?)?|sek(unden?)?)\b"; + public const string DurationUnitRegex = @"(?jahr(e(n|s)?)?|monat(en?|s)?|wochen?|tag(e(n|s)?)?|tg|stunden?|std?|h|min(uten?)?|sek(unden?)?)\b"; public const string SpecialNumberUnitRegex = @"\b(?beiden)\b"; public const string SuffixAndRegex = @"(?\s*und\s+(eine\s+)?(?halbe|viertel))"; - public const string PeriodicRegex = @"(?(all)?täglich(e(r|n|s)?)?|(all)?monatlich(e(r|n|s)?)?|(all)?wöchentlich(e(r|n|s)?)?|(all)?jährlich(e(r|n|s)?)?)\b"; + public const string PeriodicRegex = @"(?(all)?täglich(e(r|n|s)?)?|(all)?monatlich(e[rns]?)?|(all)?wöchentlich(e[rns]?)?|(all)?jährlich(e[rns]?)?)\b"; public static readonly string EachUnitRegex = $@"(?(jede(s|r|n|m)?|alle)(?\s+andere(n)?)?\s*{DurationUnitRegex})"; - public const string EachPrefixRegex = @"\b(?(jede(r|n|s|m)?|alle)\s*$)"; - public const string SetEachRegex = @"\b(?(jede(r|n|s|m)?|alle)\s*)"; - public const string SetLastRegex = @"(?nächste(r|n|s)?|kommende(r|n|s)?|diese(r|n|m|s)?|letzte(r|n|s)?|vorige(r|n|s)?|vorherige(r|n|s)?|jetzige(r|n|s)?|derzeitige(r|n|s)?)\b"; + public const string EachPrefixRegex = @"\b(?(jede(r|n|s|m)?|alle|immer)\s*$)"; + public const string SetEachRegex = @"\b(?(jede(r|n|s|m)?|alle|immer)\s*)"; + public const string SetLastRegex = @"(?(über)?nächste[rmns]?|kommende[rns]?|diese[rnms]?|letzte[rmns]?|vorige[rns]?|vorherige[rns]?|jetzige[rns]?|derzeitige[rns]?)\b"; public const string EachDayRegex = @"\s*(jeden)\s*tag\s*\b"; public const string BeforeEachDayRegex = @"(jeden)\s*tag\s*"; public static readonly string DurationFollowedUnit = $@"(^\s*{SuffixAndRegex}?(\s+|-)?{DurationUnitRegex})"; public static readonly string NumberCombinedWithDurationUnit = $@"\b(?\d+(\.\d*)?)(-)?{DurationUnitRegex}"; - public static readonly string AnUnitRegex = $@"\b(((((ein(e(r|s|n|m)?)?)(?\s+halb(e(s|r|n)?))?))\s+{DurationUnitRegex})|((ein(e(r|s|n|m)?)?)\s+((?viertel)|(?dreiviertel))(?stunde)))"; + public static readonly string AnUnitRegex = $@"\b(((((ein(e[rsnm]?)?)(?\s+halb(e[srn]?))?))\s+{DurationUnitRegex})|((ein(e[rsnm]?)?)\s+((?viertel)|(?dreiviertel))(?stunde)))"; public const string DuringRegex = @"\b(während|im\slaufe)\s+(de[sr])\s+(?jahres|monats|woche|tages)\b"; - public const string AllRegex = @"\b(?ganz(e(n|s|r)?)\s+(?jahr|monat|woche|tag))\b"; - public const string HalfRegex = @"(((ein(e(n|r|s)?)?)\s*)|\b)(?halb(e(n|r|s)?)?\s+(?jahr(e(r|s)?)?|monat(s|e)?|woch(en?)?|tag(e(n|r|s)?)?|stund(en?)?))\b"; + public const string AllRegex = @"\b(?ganz(e[nsr]?)\s+(?jahr|monat|woche|tag))\b"; + public const string HalfRegex = @"(((ein(e[nrs]?)?)\s*)|\b)(?halb(e[nrs]?)?\s+(?jahr(e[rs]?)?|monat[se]?|woch(en?)?|tag(e[nrs]?)?|stund(en?)?))\b"; public const string ConjunctionRegex = @"\b((und(\s+für)?)|mit|für)\b"; - public static readonly string HolidayRegex1 = $@"\b((dieses jahr)\s*)?(?reformations(tag|fest)|gedenktag der reformation|martinstag|st. martin|sankt martin|martinsfest|martini|nikolaustag|dreikönigstag|dreikönigsfest|walpurgisnacht|nationalfeiertag|mariä empfängnis|weihnachten|weihnachtstag|erste(r|n)? weihnachtstag|1. weihnachtstag|erste(r|n)? weihnachtsfeiertag|1\. weihnachtsfeiertag|zweite(r|n)? weihnachtstag|zweite(r|n)? weihnachtsfeiertag|2\. weihnachtstag|zweite(r|n)? weihnachtsfeiertag|stefanitag|stafanstag|berchtoldstag|bechtelistag|bächtelistag|berchtelistag|bärzelistag|josefstag|joseftag|josefitag|ostermontag|ostersonntag|bundesfeiertag|bundesfeier|mariä himmelfahrt|tag der deutschen einheit|ostern|vatertag|muttertag|erntedankfest|thanksgiving|martin luther king day|martin luther king jr day|washington's birthday|washington birthday|canberraday|tag der arbeit|columbus day|memorial day|yuandan|mao's birthday|teachersday|teacher day|single day|allerheiligen|tag der jugend|kindertag|frauentag|treeplanting day|tag des baumes|girlsday|white lover day|loverday|weihnachten|weihnachtstag|xmas|neujahr|neujahrstag|neujahr|neujahrstag|neujahr|inauguration day|murmeltiertag|sommeranfang|winteranfang|frühlingsanfang|herbstanfang|valentinstag|st patrick day|erster april|april scherz|georgstag|mayday|maitag|tag der arbeit|maifeiertag|geburt johannes des täufers|us unabhängigkeitstag|unabhängigkeitstag|sturm auf die bastille|halloween|allerheiligen|allerseelen|guy fawkes day|guy fawkes night|veterans day|heiligabend|silvester|pi-tag|pitag|pi (tag|day))(\s+((diesen)\s+)?(im jahr {YearRegex}|{YearRegex}|{RelativeRegex}\s+jahres))?\b"; - public static readonly string HolidayRegex2 = $@"\b((dieses jahr)\s*)?(?martin luther king|martin luther king jr|allerheiligen|tree planting day|white lover|st patrick|st george|independence|us independence|allerheiligen|allerseelen|guy fawkes|silvester|weiberfastnacht|karneval|aschermittwoch|palmensonntag|karfreitag|christi himmelfahrt|pfingstsonntag|pfingstmontag|fronleichnam|rosenmontag|fastnacht|gründonnerstag|himmelfahrt|volkstrauertag|buß und bettag|buß- und bettag|buss- und bettag|buss und bettag|totensonntag|erste(r|n)? advent|1\. advent|zweite(r|n)? advent|2\. advent|dritte(r|n)? advent|3\. advent|vierte(r|n)? advent|4\. advent|schweizer buss- und bettag|schweizer buss und bettag|schweizer buß und bettag|schweizer buß- und bettag)(\s+((diesen)\s+)?(im jahr {YearRegex}|{YearRegex}|{RelativeRegex}\s+jahres))?\b"; - public static readonly string HolidayRegex3 = $@"((dieses jahr)\s*)?(?(canberra|columbus|thanks\s*giving|groundhog|bastille|halloween|veterans|memorial|spring|lantern|qingming|dragon boat)\s+(day))(\s+((diesen)\s+)?(im jahr {YearRegex}|{YearRegex}|{RelativeRegex}\s+jahres))?"; + public static readonly string HolidayRegex1 = $@"\b((dieses jahr)\s*)?(?reformations(tag|fest)|gedenktag der reformation|martinstag|st. martin|sankt martin|martinsfest|martini|nikolaustag|dreikönigstag|dreikönigsfest|walpurgisnacht|nationalfeiertag|mariä empfängnis|weihnachten|weihnachts(feier)?tag|erste(r|n)? weihnachtstag|1\. weihnachtstag|erste(r|n)? weihnachtsfeiertag|1\. weihnachtsfeiertag|zweite(r|n)? weihnachtstag|zweite(r|n)? weihnachtsfeiertag|2\. weihnachtstag|zweite(r|n)? weihnachtsfeiertag|2\. weihnachtsfeiertag|stefanitag|stafanstag|berchtoldstag|bechtelistag|bächtelistag|berchtelistag|bärzelistag|josefstag|joseftag|josefitag|pfingsten|ostermontag|ostersonntag|bundesfeiertag|bundesfeier|mariä himmelfahrt|tag der deutschen einheit|ostern|vatertag|muttertag|erntedank(fest)?|thanksgiving|martin luther king day|martin luther king jr day|washington's birthday|washington birthday|canberraday|tag der arbeit|columbus day|memorial day|yuandan|mao's birthday|teachersday|teacher day|single day|tag der jugend|kindertag|(Internationaler\s+)?frauentag|treeplanting day|tag des baumes|girlsday|white lover day|loverday|weihnachten|weihnachtstag|xmas|neujahr|neujahrstag|neujahr|neujahrstag|neujahr|inauguration day|murmeltiertag|sommeranfang|winteranfang|frühlingsanfang|herbstanfang|valentinstag|st patrick day|erster april|april scherz|georgstag|mayday|maitag|maifeiertag|geburt johannes des täufers|us unabhängigkeitstag|unabhängigkeitstag|sturm auf die bastille|halloween|guy fawkes day|guy fawkes night|veterans day|heiligabend|silvester|pi-tag|pitag|pi (tag|day))(\s+((diesen)\s+)?(im jahr {YearRegex}|{YearRegex}|(im\s+)?{RelativeRegex}\s+jahr(es)?))?\b"; + public static readonly string HolidayRegex2 = $@"\b((dieses jahr)\s*)?(?martin luther king|martin luther king jr|allerheiligen|tree planting day|white lover|st patrick|st george|independence|us independence|allerseelen|guy fawkes|silvester|weiberfastnacht|karneval|aschermittwoch|palm(en)?sonntag|karsamstag|fastnachtssamstag|fastnachtssonntag|heilige drei könige|barbaratag|reformationstag|weltkindertag|augsburger friedensfest|johannistag|peter und paul|karfreitag|christi himmelfahrt|pfingstsonntag|pfingstmontag|fronleichnam|rosenmontag|fastnacht|gründonnerstag|himmelfahrt|volkstrauertag|buß und bettag|buß- und bettag|buss- und bettag|buss und bettag|toten(sonntag|fest)|ewigkeitssonntag|erste(r|n)? advent|1\. advent|zweite(r|n)? advent|2\. advent|dritte(r|n)? advent|3\. advent|vierte(r|n)? advent|4\. advent|schweizer buss- und bettag|schweizer buss und bettag|schweizer buß und bettag|schweizer buß- und bettag)(\s+((diesen)\s+)?(im jahr {YearRegex}|{YearRegex}|(im\s+)?{RelativeRegex}\s+jahr(es)?))?\b"; + public static readonly string HolidayRegex3 = $@"((dieses jahr)\s*)?(?(canberra|columbus|thanks\s*giving|groundhog|bastille|halloween|veterans|memorial|spring|lantern|qingming|dragon boat)\s+(day))(\s+((diesen)\s+)?(im jahr {YearRegex}|{YearRegex}|(im\s+)?{RelativeRegex}\s+jahr(es)?))?"; public const string DateTokenPrefix = @"am "; public const string TimeTokenPrefix = @"um "; public const string TokenBeforeDate = @"am "; public const string TokenBeforeTime = @"um "; + public const string FromRegex = @"\b(vo[mn](\s+de[rmsn])?)$"; + public const string BetweenTokenRegex = @"\b(zwischen(\s+de[rmsn])?)$"; public const string AMTimeRegex = @"(?morgens|vormittags?|früh)"; - public const string PMTimeRegex = @"(?nachmittags?|abends?|nachts?)"; - public const string BeforeRegex = @"(vorher(ige(s|n|r)?)?|bevor|vor(\W)?|vorige(s|n|r)?|bis)"; - public const string AfterRegex = @"(nach(\W)?)"; - public const string SinceRegex = @"\b(seit|ab)\b"; + public const string PMTimeRegex = @"\b(?nachmittags?|abends?|nachts?)"; + public const string BeforeRegex = @"(vorher(ige(s|n|r)?)?|bevor|vor(\W)?|vorige(s|n|r)?|bis)(\s+de[rmsn]\b)?"; + public const string AfterRegex = @"(nach(\W)?)(\s+de[rmsn]\b)?"; + public const string SinceRegex = @"\b(seit|ab)(\s+de[rmsn])?\b"; public const string AgoRegex = @"\b(danach)\b"; public const string AroundRegex = @"(\b(ca\.?|gegen|circa)\s*\b)"; public const string LaterRegex = @"\b(später|von jetzt|(ab|nach) (?morgen|heute))\b"; + public const string BeforeAfterRegex = @"^[.]"; public const string InConnectorRegex = @"\b(in)\b"; public const string SinceYearSuffixRegex = @"^[.]"; public static readonly string WithinNextPrefixRegex = $@"\b(innerhalb|während(\s+der|de(s|m))?(\s+(?{NextPrefixRegex}))?)\b"; + public const string TodayNowRegex = @"\b(heute|jetzt)\b"; public const string MorningStartEndRegex = @"(^(früh|vormittag(s)?)|(morgens?|früh|vormittags?)$)"; public const string AfternoonStartEndRegex = @"(^(nachmittags?)|(nachmittags?)$)"; public const string EveningStartEndRegex = @"(^(abends?)|(abends?)$)"; @@ -214,27 +233,26 @@ public static class DateTimeDefinitions public const string UnspecificDatePeriodRegex = @"^(woche(nende)?|monat|jahr)$"; public const string PrepositionSuffixRegex = @"\b(am|in|um|gegen|vo[mn]|zu(m|r))$"; public const string FlexibleDayRegex = @"(?([A-Za-z]+\s)?[A-Za-z\d]+)"; - public static readonly string ForTheRegex = $@"\b(für den {FlexibleDayRegex})"; - public static readonly string WeekDayAndDayOfMonthRegex = $@"\b{WeekDayRegex}\s+((de(r|n))\s+{FlexibleDayRegex})\b"; - public static readonly string WeekDayAndDayRegex = $@"\b{WeekDayRegex}\s+(?!(the)){DayRegex}(?!([-]|(\s+({AmDescRegex}|{{PmDescRegex|{OclockRegex}}}))))\b"; + public static readonly string ForTheRegex = $@"\b(für\s+den\s+{FlexibleDayRegex}(?\s*(,|\.(?!(\d|\s?{WrittenMonthRegex}))|!|\?|$)))"; + public static readonly string WeekDayAndDayOfMonthRegex = $@"\b{WeekDayRegex}(\s*,)?\s+((de(r|n))\s+{FlexibleDayRegex})\b"; + public static readonly string WeekDayAndDayRegex = $@"\b{WeekDayRegex}\s+(?!de[nr]){DayRegex}(?!([-:]|(\s+({AmDescRegex}|{PmDescRegex}|{OclockRegex}))))\b"; public const string RestOfDateRegex = @"\brest\s+((de[rs]|dieser)\s+)((aktuellen|jetzigen)\s+)?(?woche|monats|jahres)\b"; public const string RestOfDateTimeRegex = @"\brest\s+((des|diesen)\s+)((aktuellen|heutigen)\s+)?(?tages)\b"; public const string MealTimeRegex = @"\b((zu(m|r))\s+)?(?(essenszeit|mittagessen|mittag))\b"; public const string AmbiguousRangeModifierPrefix = @"^[.]"; public static readonly string NumberEndingPattern = $@"^(\s+(?meeting|termin|telefonkonferenz|conference|skype call|call)\s+to\s+(?{PeriodHourNumRegex}|{HourRegex})((\.)?$|(\.,|,|!|\?)))"; public const string OneOnOneRegex = @"\b(1\s*:\s*1)|(one (on )?one|one\s*-\s*one|one\s*:\s*one)\b"; - public static readonly string LaterEarlyPeriodRegex = $@"\b((?spät(e(r|n)?)?)|(?früh(e(r|n)?)))\s+(?{OneWordPeriodRegex})\b"; + public static readonly string LaterEarlyPeriodRegex = $@"\b(((?ende|spät(e(r|n)?)?)|(?früh(e(r|n)?)?))\s+(?{OneWordPeriodRegex}|(?{BaseDateTime.FourDigitYearRegex}))|(?{OneWordPeriodRegex}|(?{BaseDateTime.FourDigitYearRegex}))\s+((?ende|spät(e(r|n)?)?)|(?früh(e(r|n)?)?)))\b"; public static readonly string WeekWithWeekDayRangeRegex = $@"\b((?({NextPrefixRegex}|{PreviousPrefixRegex}|diese(r|n|m))\s+woche)((\s+zwischen\s+{WeekDayRegex}\s+und\s+{WeekDayRegex})|(\s+vo(n|m)\s+{WeekDayRegex}\s+(bis\s+)?zum\s+{WeekDayRegex})))\b"; public const string GeneralEndingRegex = @"\s*((\.,)|\.|,|!|\?)?\s*"; public const string MiddlePauseRegex = @"\s*(,)\s*"; public const string DurationConnectorRegex = @"^\s*(?\s+|und|für|,)\s*$"; public const string PrefixArticleRegex = @"\b(der|dem|des)?\s+"; public const string OrRegex = @"\s*((\b|,\s*)(oder|und)\b|,)\s*"; - public const string YearPlusNumberRegex = @"\b(Jahr\s+(?(\d{3,4})))\b"; + public static readonly string YearPlusNumberRegex = $@"\b(jahr\s+((?(\d{{2,4}}))|{FullTextYearRegex}))\b"; public static readonly string NumberAsTimeRegex = $@"\b({WrittenTimeRegex}|{PeriodHourNumRegex}|{BaseDateTime.HourRegex})\b"; public static readonly string TimeBeforeAfterRegex = $@"\b(((?<=\b(vor|nach)\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}|{MidTimeRegex}))|{MidTimeRegex})\b"; public const string DateNumberConnectorRegex = @"\s*(?am)\s*"; - public const string CenturyRegex = @"\b(?((ein|zwei)?tausend(und)?)?((ein|zwei|drei|vier|fünf|sechs|sieben|acht|neun|zehn|elf|zwölf|dreizehn|vierzehn|fünfzehn|sechzehn|siebzehn|achtzehn|neunzehn)hundert))\b"; public const string DecadeRegex = @"(?zwanziger|dreißiger|vierziger|fünfziger|sechziger|siebziger|achtziger|neunziger|zweitausender)(n)?"; public static readonly string DecadeWithCenturyRegex = $@"(die\s+)?(((?\d|1\d|2\d)?(')?(?\d0)(')?s)|(({CenturyRegex}(\s+|-)(und\s+)?)?{DecadeRegex})|({CenturyRegex}(\s+|-)(und\s+)?(?zehner|hunderter)))"; public static readonly string RelativeDecadeRegex = $@"\b((das|die\s+)?{RelativeRegex}\s+((?[\w,]+)\s+)?jahrzehnte?)\b"; @@ -242,6 +260,9 @@ public static class DateTimeDefinitions public const string SuffixAfterRegex = @"\b(o\s+(nach|in der Vergangenheit))\b"; public static readonly string YearPeriodRegex = $@"((((von|während|zwischen)\s+)?{YearRegex}\s*({TillRegex})\s*{YearRegex})|(((zwischen)\s+){YearRegex}\s*({RangeConnectorRegex})\s*{YearRegex}))"; public const string FutureSuffixRegex = @"\b(in\s+der\s+)?(zukunft|zukünftig)\b"; + public const string PastSuffixRegex = @"^\b$"; + public static readonly string ModPrefixRegex = $@"\b({RelativeRegex}|{AroundRegex}|{BeforeRegex}|{AfterRegex}|{SinceRegex})\b"; + public static readonly string ModSuffixRegex = $@"\b({AgoRegex}|{LaterRegex}|{BeforeAfterRegex}|{FutureSuffixRegex}|{PastSuffixRegex})\b"; public static readonly string ComplexDatePeriodRegex = $@"(((von|während|in)\s+)?(?.+)\s*({TillRegex})\s*(?.+)|((zwischen)\s+)(?.+)\s*({RangeConnectorRegex})\s*(?.+))"; public static readonly Dictionary UnitMap = new Dictionary { @@ -259,6 +280,7 @@ public static class DateTimeDefinitions { @"tages", @"D" }, { @"tage", @"D" }, { @"tag", @"D" }, + { @"tg", @"D" }, { @"stunden", @"H" }, { @"stunde", @"H" }, { @"h", @"H" }, @@ -283,6 +305,7 @@ public static class DateTimeDefinitions { @"tagen", 86400 }, { @"tage", 86400 }, { @"tag", 86400 }, + { @"tg", 86400 }, { @"stunden", 3600 }, { @"stunde", 3600 }, { @"std", 3600 }, @@ -350,6 +373,7 @@ public static class DateTimeDefinitions { @"donnerstag", 4 }, { @"freitag", 5 }, { @"samstag", 6 }, + { @"sonnabend", 6 }, { @"sonntag", 0 }, { @"mo.", 1 }, { @"di.", 2 }, @@ -432,7 +456,21 @@ public static class DateTimeDefinitions { @"06", 6 }, { @"07", 7 }, { @"08", 8 }, - { @"09", 9 } + { @"09", 9 }, + { @"erst", 1 }, + { @"zweit", 2 }, + { @"dritt", 3 }, + { @"viert", 4 }, + { @"fünft", 5 }, + { @"fuenft", 5 }, + { @"sechst", 6 }, + { @"siebt", 7 }, + { @"acht", 8 }, + { @"neunt", 9 }, + { @"zehnt", 10 }, + { @"elft", 11 }, + { @"zwölft", 12 }, + { @"zwoelft", 12 } }; public static readonly Dictionary Numbers = new Dictionary { @@ -440,6 +478,7 @@ public static class DateTimeDefinitions { @"eins", 1 }, { @"ein", 1 }, { @"eine", 1 }, + { @"zween", 2 }, { @"zwei", 2 }, { @"zwo", 2 }, { @"drei", 3 }, @@ -544,37 +583,37 @@ public static class DateTimeDefinitions }; public static readonly Dictionary DayOfMonth = new Dictionary { - { @"1. ", 1 }, - { @"2. ", 2 }, - { @"3. ", 3 }, - { @"4. ", 4 }, - { @"5. ", 5 }, - { @"6. ", 6 }, - { @"7. ", 7 }, - { @"8. ", 8 }, - { @"9. ", 9 }, - { @"10. ", 10 }, - { @"11. ", 11 }, - { @"12. ", 12 }, - { @"13. ", 13 }, - { @"14. ", 14 }, - { @"15. ", 15 }, - { @"16. ", 16 }, - { @"17. ", 17 }, - { @"18. ", 18 }, - { @"19. ", 19 }, - { @"20. ", 20 }, - { @"21. ", 21 }, - { @"22. ", 22 }, - { @"23. ", 23 }, - { @"24. ", 24 }, - { @"25. ", 25 }, - { @"26. ", 26 }, - { @"27. ", 27 }, - { @"28. ", 28 }, - { @"29. ", 29 }, - { @"30. ", 30 }, - { @"31. ", 31 }, + { @"1.", 1 }, + { @"2.", 2 }, + { @"3.", 3 }, + { @"4.", 4 }, + { @"5.", 5 }, + { @"6.", 6 }, + { @"7.", 7 }, + { @"8.", 8 }, + { @"9.", 9 }, + { @"10.", 10 }, + { @"11.", 11 }, + { @"12.", 12 }, + { @"13.", 13 }, + { @"14.", 14 }, + { @"15.", 15 }, + { @"16.", 16 }, + { @"17.", 17 }, + { @"18.", 18 }, + { @"19.", 19 }, + { @"20.", 20 }, + { @"21.", 21 }, + { @"22.", 22 }, + { @"23.", 23 }, + { @"24.", 24 }, + { @"25.", 25 }, + { @"26.", 26 }, + { @"27.", 27 }, + { @"28.", 28 }, + { @"29.", 29 }, + { @"30.", 30 }, + { @"31.", 31 }, { @"1", 1 }, { @"2", 2 }, { @"3", 3 }, @@ -605,7 +644,42 @@ public static class DateTimeDefinitions { @"28", 28 }, { @"29", 29 }, { @"30", 30 }, - { @"31", 31 } + { @"31", 31 }, + { @"erst", 1 }, + { @"zweit", 2 }, + { @"dritt", 3 }, + { @"viert", 4 }, + { @"fünft", 5 }, + { @"fuenft", 5 }, + { @"sechst", 6 }, + { @"siebt", 7 }, + { @"acht", 8 }, + { @"neunt", 9 }, + { @"zehnt", 10 }, + { @"elft", 11 }, + { @"zwölft", 12 }, + { @"zwoelft", 12 }, + { @"dreizehnt", 13 }, + { @"vierzehnt", 14 }, + { @"fünfzehnt", 15 }, + { @"fuenfzehnt", 15 }, + { @"sechzehnt", 16 }, + { @"siebzehnt", 17 }, + { @"achtzehnt", 18 }, + { @"neunzehnt", 19 }, + { @"zwanzigst", 20 }, + { @"einundzwanzigst", 21 }, + { @"zweiundzwanzigst", 22 }, + { @"dreiundzwanzigst", 23 }, + { @"vierundzwanzigst", 24 }, + { @"fünfundzwanzigst", 25 }, + { @"fuenfundzwanzigst", 25 }, + { @"sechsundzwanzigst", 26 }, + { @"siebenundzwanzigst", 27 }, + { @"achtundzwanzigst", 28 }, + { @"neunundzwanzigst", 29 }, + { @"dreißigst", 30 }, + { @"einunddreißigst", 31 } }; public static readonly Dictionary DoubleNumbers = new Dictionary { @@ -621,23 +695,23 @@ public static class DateTimeDefinitions { @"walpurgisnight", new string[] { @"walpurgisnacht" } }, { @"austriannationalday", new string[] { @"nationalfeiertag" } }, { @"immaculateconception", new string[] { @"mariäempfängnis" } }, - { @"firstchristmasday", new string[] { @"weihnachten", @"weihnachtstag", @"erstenweihnachtstag", @"erstenweihnachtstag", @"ersterweihnachtstag", @"1.weihnachtstag", @"ersterweihnachtsfeiertag", @"1.weihnachtsfeiertag" } }, - { @"secondchristmasday", new string[] { @"zweiterweihnachtstag", @"zweiteweihnachtsfeiertag", @"zweitenweihnachtsfeiertag", @"zweiterweihnachtsfeiertag", @"2.weihnachtstag", @"zweiterweihnachtsfeiertag", @"stefanitag", @"stafanstag" } }, + { @"firstchristmasday", new string[] { @"weihnachten", @"weihnachtstag", @"weihnachtsfeiertag", @"erstenweihnachtstag", @"erstenweihnachtsfeiertag", @"ersterweihnachtstag", @"ersterweihnachtsfeiertag", @"erstenweihnachtstag", @"erstenweihnachtsfeiertag", @"1.weihnachtstag", @"1.weihnachtsfeiertag" } }, + { @"secondchristmasday", new string[] { @"zweiterweihnachtstag", @"zweiterweihnachtsfeiertag", @"zweiteweihnachtstag", @"zweiteweihnachtsfeiertag", @"zweitenweihnachtstag", @"zweitenweihnachtsfeiertag", @"2.weihnachtstag", @"2.weihnachtsfeiertag", @"stefanitag", @"stafanstag" } }, { @"berchtoldsday", new string[] { @"berchtoldstag", @"bechtelistag", @"bächtelistag", @"berchtelistag", @"bärzelistag" } }, { @"saintjosephsday", new string[] { @"josefstag", @"joseftag", @"josefitag" } }, + { @"easterday", new string[] { @"ostern" } }, { @"eastermonday", new string[] { @"ostermontag" } }, { @"eastersunday", new string[] { @"ostersonntag" } }, { @"swissnationalday", new string[] { @"bundesfeiertag", @"bundesfeier" } }, { @"assumptionofmary", new string[] { @"mariähimmelfahrt" } }, { @"germanunityday", new string[] { @"tagderdeutscheneinheit" } }, - { @"easterday", new string[] { @"ostern" } }, { @"fathers", new string[] { @"vatertag", @"männertag" } }, { @"mothers", new string[] { @"muttertag" } }, - { @"thanksgiving", new string[] { @"erntedankfest", @"thanksgiving" } }, + { @"thanksgiving", new string[] { @"erntedankfest", @"erntedank", @"thanksgiving" } }, { @"martinlutherking", new string[] { @"martinlutherkingday", @"martinlutherkingjrday" } }, { @"washingtonsbirthday", new string[] { @"washingtonsbirthday", @"washingtonbirthday" } }, { @"canberra", new string[] { @"canberraday" } }, - { @"labour", new string[] { @"tag der arbeit" } }, + { @"labour", new string[] { @"tagderarbeit" } }, { @"columbus", new string[] { @"columbusday" } }, { @"memorial", new string[] { @"memorialday" } }, { @"yuandan", new string[] { @"yuandan" } }, @@ -646,7 +720,7 @@ public static class DateTimeDefinitions { @"singleday", new string[] { @"singleday" } }, { @"allsaintsday", new string[] { @"allerheiligen" } }, { @"youthday", new string[] { @"tag der jugend" } }, - { @"childrenday", new string[] { @"kindertag" } }, + { @"childrenday", new string[] { @"kindertag", @"weltkindertag" } }, { @"femaleday", new string[] { @"frauentag" } }, { @"treeplantingday", new string[] { @"treeplantingday" } }, { @"arborday", new string[] { @"tag des baumes" } }, @@ -665,7 +739,7 @@ public static class DateTimeDefinitions { @"stgeorgeday", new string[] { @"georgstag" } }, { @"mayday", new string[] { @"mayday", @"maitag", @"tagderarbeit", @"maifeiertag" } }, { @"laborday", new string[] { @"mayday", @"maitag", @"tagderarbeit", @"maifeiertag" } }, - { @"cincodemayoday", new string[] { @"cinco de mayo" } }, + { @"cincodemayoday", new string[] { @"cincodemayo" } }, { @"baptisteday", new string[] { @"geburtjohannesdestäufers" } }, { @"usindependenceday", new string[] { @"usunabhängigkeitstag" } }, { @"independenceday", new string[] { @"unabhängigkeitstag" } }, @@ -681,10 +755,10 @@ public static class DateTimeDefinitions { @"weiberfastnacht", new string[] { @"weiberfastnacht" } }, { @"carnival", new string[] { @"karneval" } }, { @"ashwednesday", new string[] { @"aschermittwoch" } }, - { @"palmsunday", new string[] { @"palmensonntag" } }, + { @"palmsunday", new string[] { @"palmensonntag", @"palmsonntag" } }, { @"goodfriday", new string[] { @"karfreitag" } }, { @"ascensionofchrist", new string[] { @"christihimmelfahrt" } }, - { @"whitsunday", new string[] { @"pfingstsonntag" } }, + { @"whitesunday", new string[] { @"pfingstsonntag", @"pfingsten" } }, { @"whitemonday", new string[] { @"pfingstmontag" } }, { @"corpuschristi", new string[] { @"fronleichnam" } }, { @"rosenmontag", new string[] { @"rosenmontag" } }, @@ -692,7 +766,7 @@ public static class DateTimeDefinitions { @"holythursday", new string[] { @"gründonnerstag", @"himmelfahrt" } }, { @"memorialdaygermany", new string[] { @"volkstrauertag" } }, { @"dayofrepentance", new string[] { @"bußundbettag", @"buß-undbettag", @"bussundbettag" } }, - { @"totensonntag", new string[] { @"totensonntag" } }, + { @"totensonntag", new string[] { @"totensonntag", @"totenfest", @"ewigkeitssonntag" } }, { @"firstadvent", new string[] { @"ersteadvent", @"erstenadvent", @"ersteradvent", @"1.advent" } }, { @"secondadvent", new string[] { @"zweiteadvent", @"zweitenadvent", @"zweiteradvent", @"2.advent" } }, { @"thirdadvent", new string[] { @"dritteadvent", @"drittenadvent", @"dritteradvent", @"3.advent" } }, @@ -701,7 +775,15 @@ public static class DateTimeDefinitions { @"beginningofsummer", new string[] { @"sommeranfang" } }, { @"beginningofwinter", new string[] { @"winteranfang" } }, { @"beginningofspring", new string[] { @"frühlingsanfang" } }, - { @"beginningoffall", new string[] { @"herbstanfang" } } + { @"beginningoffall", new string[] { @"herbstanfang" } }, + { @"eastersaturday", new string[] { @"karsamstag" } }, + { @"fastnachtssamstag", new string[] { @"fastnachtssamstag" } }, + { @"fastnachtssonntag", new string[] { @"fastnachtssonntag" } }, + { @"heiligedreikönige", new string[] { @"heiligedreikönige" } }, + { @"barbaratag", new string[] { @"barbaratag" } }, + { @"augsburgerfriedensfest", new string[] { @"augsburgerfriedensfest" } }, + { @"johannistag", new string[] { @"johannistag" } }, + { @"peterundpaul", new string[] { @"peterundpaul" } } }; public static readonly Dictionary WrittenDecades = new Dictionary { @@ -732,8 +814,13 @@ public static class DateTimeDefinitions public static readonly string[] DurationDateRestrictions = { @"today", @"now" }; public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { - { @"\b(morgen|nachmittag|abend|nacht|tag)\b", @"\b(gut(en?)?\s+(morgen|nachmittag|abend|nacht|tag))\b" } + { @"^\d{4}$", @"(\d\.\d{4}|\d{4}\.\d)" }, + { @"\b(morgen|nachmittag|abend|nacht|tag)\b", @"\b(gut(en?)?\s+(morgen|nachmittag|abend|nacht|tag))\b" }, + { @"^(apr|aug|dez|feb|j[äa]n|jul|jun|märz|mai|nov|okt|sept?)$", @"([$%£&!?@#])(apr|aug|dez|feb|j[äa]n|jul|jun|märz|mai|nov|okt|sept?)|(apr|aug|dez|feb|j[äa]n|jul|jun|märz|mai|nov|okt|sept?)([$%£&@#])" }, + { @"^(mo|di|mi|do|fr|sa|so)$", @"\b(mo|di|mi|do|fr|sa|so)\b" }, + { @"^((früh|spät)(\s+am|er)?\s*)?(abends?|morgens?|nachts?|(vor|nach)mittags?|früh|primetime)$", @"((? MorningTermList = new List { @"morgen", @@ -755,6 +842,7 @@ public static class DateTimeDefinitions public static readonly IList NightTermList = new List { @"nacht", + @"nachts", @"primetime" }; public static readonly IList SameDayTerms = new List @@ -806,13 +894,15 @@ public static class DateTimeDefinitions @"nächster", @"nächstes", @"nächsten", + @"nächstem", @"nächste" }; public static readonly IList LastCardinalTerms = new List { @"letzter", @"letztes", - @"letzten" + @"letzten", + @"letztem" }; public static readonly IList MonthTerms = new List { diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/German/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/German/NumbersDefinitions.cs index 58aa49cd4e..0c45978fdf 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/German/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/German/NumbersDefinitions.cs @@ -24,36 +24,45 @@ public static class NumbersDefinitions public const string LangMarker = @"Ger"; public const bool CompoundNumberLanguage = true; public const bool MultiDecimalSeparatorCulture = false; - public const string ZeroToNineIntegerRegex = @"(drei|sieben|acht|vier|fuenf|fünf|null|neun|eins|(ein(?!($|\.|,|!|\?)))|eine|einer|einen|zwei|zwo|sechs)"; - public const string RoundNumberIntegerRegex = @"(hundert|einhundert|tausend|(\s*million\s*)|(\s*millionen\s*)|(\s*mio\s*)|(\s*milliarde\s*)|(\s*milliarden\s*)|(\s*mrd\s*)|(\s*billion\s*)|(\s*billionen\s*))"; - public const string AnIntRegex = @"(eine|ein)(?=\s)"; - public const string TenToNineteenIntegerRegex = @"(siebzehn|dreizehn|vierzehn|achtzehn|neunzehn|fuenfzehn|sechzehn|elf|zwoelf|zwölf|zehn)"; - public const string TensNumberIntegerRegex = @"(siebzig|zwanzig|dreißig|achtzig|neunzig|vierzig|fuenfzig|fünfzig|sechzig)"; + public const string ZeroToNineIntegerRegex = @"(drei|sieben|acht|vier|fuenf|fünf|null|neun|eins|(ein(?!($|\.|,|!|\?)))|eine[rn]?|zwei|zwo|sechs)"; + public const string TwoToNineIntegerRegex = @"(drei|sieben|acht|vier|fuenf|fünf|neun|zwei|zwo|sechs)"; + public const string RoundNumberIntegerRegex = @"((ein)?hundert|tausend|((million(en)?|mio|milliarden?|mrd|billion(en)?)))"; + public const string AnIntRegex = @"(eine?)(?=\s)"; + public const string TenToNineteenIntegerRegex = @"(siebzehn|dreizehn|vierzehn|achtzehn|neunzehn|fünfzehn|fuenfzehn|sechzehn|elf|zwoelf|zwölf|zehn)"; + public const string TensNumberIntegerRegex = @"(siebzig|zwanzig|dreißig|achtzig|neunzig|vierzig|fuenfzig|fünfzig|sechzig|hundert|tausend)"; public const string NegativeNumberTermsRegex = @"^[.]"; public static readonly string NegativeNumberSignRegex = $@"^({NegativeNumberTermsRegex}\s+).*"; public static readonly string SeparaIntRegex = $@"((({TenToNineteenIntegerRegex}|({ZeroToNineIntegerRegex}und{TensNumberIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex})(\s*{RoundNumberIntegerRegex})*))|(({AnIntRegex}(\s*{RoundNumberIntegerRegex})+))"; - public static readonly string AllIntRegex = $@"(((({TenToNineteenIntegerRegex}|({ZeroToNineIntegerRegex}und{TensNumberIntegerRegex})|{TensNumberIntegerRegex}|({ZeroToNineIntegerRegex}|{AnIntRegex}))?(\s*{RoundNumberIntegerRegex})))*{SeparaIntRegex})"; + public static readonly string AllIntRegex = $@"(((({TenToNineteenIntegerRegex}|({ZeroToNineIntegerRegex}und{TensNumberIntegerRegex})|{TensNumberIntegerRegex}|({ZeroToNineIntegerRegex}|{AnIntRegex}))?(\s*{RoundNumberIntegerRegex}\s*)))*{SeparaIntRegex})"; public const string PlaceHolderPureNumber = @"\b"; public const string PlaceHolderDefault = @"\D|\b"; - public static readonly Func NumbersWithPlaceHolder = (placeholder) => $@"(((? NumbersWithPlaceHolder = (placeholder) => $@"(((?(nächste|vorherige|aktuelle|jetzige|(vor|dritt)?letzte)[nr]?|zuletzt|früher)"; public const string BasicOrdinalRegex = @"(zuerst|erst(er|es|en|e)|zweit(er|es|en|e)?|dritt(er|es|en|el|e)?|viert(er|es|en|el|e)?|fünft(er|es|en|el|e)?|fuenft(er|es|en|el|e)?|sechst(er|es|en|el|e)?|siebt(er|es|en|el|e)?|acht(er|es|en|el|e)?|neunt(er|es|en|el|e)?|zehnt(er|es|en|el|e)?|elft(er|es|en|el|e)?|zwölft(er|es|en|el|e)?|zwoelft(er|es|en|el|e)?|dreizehnt(er|es|en|el|e)?|vierzehnt(er|es|en|el|e)?|fünfzehnt(er|es|en|el|e)?|fuenfzehnt(er|es|en|el|e)?|sechzehnt(er|es|en|el|e)?|siebzehnt(er|es|en|el|e)?|achtzehnt(er|es|en|el|e)?|neunzehnt(er|es|en|el|e)?|zwanzigst(er|es|en|el|e)?|dreißigst(er|es|en|el|e)?|vierziegt(er|es|en|el|e)?|fünfzigst(er|es|en|el|e)?|fuenfzigst(er|es|en|el|e)?|sechzigst(er|es|en|el|e)?|siebzigst(er|es|en|el|e)?|achtzigst(er|es|en|el|e)?|neunzigst(er|es|en|el|e)?)"; public static readonly string SuffixBasicOrdinalRegex = $@"({BasicOrdinalRegex}|({ZeroToNineIntegerRegex}(und|\s){BasicOrdinalRegex}))"; public static readonly string SuffixRoundNumberOrdinalRegex = $@"(({AllIntRegex}\s*){RoundNumberOrdinalRegex})"; - public static readonly string AllOrdinalRegex = $@"(({AllIntRegex}\s*)*{SuffixBasicOrdinalRegex}|{SuffixRoundNumberOrdinalRegex})"; + public static readonly string AllOrdinalNumberRegex = $@"(({AllIntRegex}\s*)*{SuffixBasicOrdinalRegex}|{SuffixRoundNumberOrdinalRegex})"; + public static readonly string AllOrdinalRegex = $@"(?:{AllOrdinalNumberRegex}|{RelativeOrdinalRegex})"; public const string OrdinalSuffixRegex = @"^[\.]"; public const string OrdinalNumericRegex = @"(?<=\b)(\d{1,3}\.)(?=(\s+|^))"; public static readonly string OrdinalRoundNumberRegex = $@"(?anderthalb|einundhalb)|(?dreiviertel))"; + public const string FractionHalfRegex = @"(einhalb(es)?)$"; + public static readonly string[] OneHalfTokens = { @"ein", @"halb", @"halbes" }; + public static readonly string FractionMultiplierRegex = $@"(?(\s+und\s+)?(anderthalb|einundhalb|dreiviertel)|(\s+und\s+)?(eine?|{TwoToNineIntegerRegex})\s*(halbe?|(dritt|viert|fünft|fuenft|sechst|siebt|acht|neunt|zehnt)(er|es|en|el|e)?))"; + public static readonly string RoundMultiplierWithFraction = $@"(?<=(?(million(en)?|mio|milliarden?|mrd|billion(en)?))(?={FractionMultiplierRegex}?$)"; + public static readonly string RoundMultiplierRegex = $@"\b\s*((von\s+)?ein(er|es|en|el|e)?\s+)?({RoundMultiplierWithFraction}|(?(?:hundert|tausend))$)"; + public static readonly string FractionNounRegex = $@"(?<=\b)({AllIntRegex}\s+(und\s+)?)?(({AllIntRegex})(\s*|\s*-\s*)((({AllOrdinalNumberRegex})|({RoundNumberOrdinalRegex}))|halb(e[rs]?)?|hälfte)(\s+{RoundNumberIntegerRegex})?|(eine\s+(halbe|viertel)\s+){RoundNumberIntegerRegex}|{FractionUnitsRegex}(\s+{RoundNumberIntegerRegex})?)(?=\b)"; + public static readonly string FractionNounWithArticleRegex = $@"(?<=\b)((({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\s+(und\s+)?)?eine?(\s+|\s*-\s*)({AllOrdinalNumberRegex}|{RoundNumberOrdinalRegex}|{FractionUnitsRegex}|({AllIntRegex}ein)?(halb(e[rs]?)?|hälfte))|{AllIntRegex}ein(halb)(\s+{RoundNumberIntegerRegex})?)(?=\b)"; public static readonly string FractionPrepositionRegex = $@"(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?!\.))(?=\b)"; public static readonly string AllPointRegex = $@"((\s*{ZeroToNineIntegerRegex})+|(\s*{SeparaIntRegex}))"; public static readonly string AllFloatRegex = $@"({AllIntRegex}(\s*komma\s*){AllPointRegex})"; @@ -64,8 +73,34 @@ public static class NumbersDefinitions public static readonly Func DoubleWithoutIntegralRegex = (placeholder) => $@"(?<=\s|^)(?und)"; public static readonly string NumberWithSuffixPercentage = $@"(?)"; + public const string LessRegex = @"(?:(weniger|winziger|kleiner|wenig)(\s+als)?|darunter|unter|(?|=)<)"; + public const string EqualRegex = @"(gleich(\s+(als|zu))?|(?)=)"; + public static readonly string MoreOrEqualPrefix = $@"((nicht\s+{LessRegex})|(als\s+letzte(r)?))"; + public static readonly string MoreOrEqual = $@"(?:({MoreRegex}\s+(oder)?\s+{EqualRegex})|({EqualRegex}\s+(oder)?\s+{MoreRegex})|{MoreOrEqualPrefix}(\s+(oder)?\s+{EqualRegex})?|({EqualRegex}\s+(oder)?\s+)?{MoreOrEqualPrefix}|>\s*=|≥)"; + public const string MoreOrEqualSuffix = @"((und|oder)\s+(((mehr|größer|höher)((?!\s+als)|(\s+als(?!(\s*\d+)))))|((über|darüber)(?!\s+als))))"; + public static readonly string LessOrEqualPrefix = $@"((nicht\s+{MoreRegex})|(at\s+viele)|(bis\s+zu))"; + public static readonly string LessOrEqual = $@"(({LessRegex}\s+(oder)?\s+{EqualRegex})|({EqualRegex}\s+(oder)?\s+{LessRegex})|{LessOrEqualPrefix}(\s+(oder)?\s+{EqualRegex})?|({EqualRegex}\s+(oder)?\s+)?{LessOrEqualPrefix}|<\s*=|≤)"; + public const string LessOrEqualSuffix = @"((und|oder)\s+(weniger|geringer|kleiner|winziger)((?!\s+als)|(\s+als(?!(\s*\d+)))))"; + public const string NumberSplitMark = @"(?![,.](?!\d+))"; + public const string MoreRegexNoNumberSucceed = @"((größer|mehr|höhrer|breiter)((?!\s+als)|\s+(als(?!(\s*\d+))))|((dar)?über)(?!(\s*\d+)))"; + public const string LessRegexNoNumberSucceed = @"((kleiner|weniger|winziger)((?!\s+als)|\s+(als(?!(\s*\d+))))|((dar)?unter)(?!(\s*\d+)))"; + public const string EqualRegexNoNumberSucceed = @"(gleich(s|ing)?((?!\s+(zu|als))|(\s+(zu|als)(?!(\s*\d+)))))"; + public static readonly string OneNumberRangeMoreRegex1 = $@"({MoreOrEqual}|{MoreRegex})\s*(der\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeMoreRegex2 = $@"(?({NumberSplitMark}.)+)\s*{MoreOrEqualSuffix}"; + public static readonly string OneNumberRangeMoreSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+or\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\s+(?({NumberSplitMark}.)+)(\s+oder\s+){EqualRegexNoNumberSucceed})"; + public static readonly string OneNumberRangeLessRegex1 = $@"({LessOrEqual}|{LessRegex})\s*(the\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeLessRegex2 = $@"(?({NumberSplitMark}.)+)\s*{LessOrEqualSuffix}"; + public static readonly string OneNumberRangeLessSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+or\s+){LessRegexNoNumberSucceed})|({LessRegex}\s+(?({NumberSplitMark}.)+)(\s+oder\s+){EqualRegexNoNumberSucceed})"; + public static readonly string OneNumberRangeEqualRegex = $@"{EqualRegex}\s*(the\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string TwoNumberRangeRegex1 = $@"zwischen\s*(der\s+)?(?({NumberSplitMark}.)+)\s*und\s*(der\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\s*(und|aber|,)\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"; + public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\s*(und|aber|,)\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"; + public static readonly string TwoNumberRangeRegex4 = $@"(von\s+)?(?({NumberSplitMark}(?!\bvon\b).)+)\s*{TillRegex}\s*(der\s+)?(?({NumberSplitMark}.)+)"; public const string AmbiguousFractionConnectorsRegex = @"^[.]"; public const char DecimalSeparatorChar = ','; public const string FractionMarkerToken = @"over"; @@ -75,7 +110,7 @@ public static class NumbersDefinitions public static readonly string[] WrittenDecimalSeparatorTexts = { @"komma" }; public static readonly string[] WrittenGroupSeparatorTexts = { @"punkt" }; public static readonly string[] WrittenIntegerSeparatorTexts = { @"und" }; - public static readonly string[] WrittenFractionSeparatorTexts = { @"durch" }; + public static readonly string[] WrittenFractionSeparatorTexts = { @"durch", @"und" }; public const string HalfADozenRegex = @"ein\s+halbes\s+dutzend"; public static readonly string DigitalNumberRegex = $@"((?<=\b)(hundert|tausend|million(en)?|mio|milliarde(n)?|mrd|billion(en)?|dutzend(e)?)(?=\b))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; public static readonly Dictionary CardinalNumberMap = new Dictionary @@ -381,15 +416,59 @@ public static class NumbersDefinitions }; public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { - { @"^[.]", @"" } + { @"^(tausend|hundert)$", @"(ed(ward(\s+m(\.)?)?)?|mary(\s+c(\.)?)?|joachim|claudia|franz|maria|klaus|prof(\.|essor)?|dr(\.)?|herr|fr[äa]u(lein)?|frl?\.)\s+(tausend|hundert)" } }; public static readonly Dictionary RelativeReferenceOffsetMap = new Dictionary { - { @"", @"" } + { @"letzte", @"0" }, + { @"letzten", @"0" }, + { @"letzter", @"0" }, + { @"nächste", @"1" }, + { @"nächsten", @"1" }, + { @"nächster", @"1" }, + { @"vorherige", @"-1" }, + { @"vorherigen", @"-1" }, + { @"vorheriger", @"-1" }, + { @"aktuelle", @"0" }, + { @"aktuellen", @"0" }, + { @"aktueller", @"0" }, + { @"jetzige", @"0" }, + { @"jetzigen", @"0" }, + { @"jetziger", @"0" }, + { @"vorletzte", @"-1" }, + { @"vorletzten", @"-1" }, + { @"vorletzter", @"-1" }, + { @"drittletzte", @"-2" }, + { @"drittletzten", @"-2" }, + { @"drittletzter", @"-2" }, + { @"zuletzt", @"0" }, + { @"früher", @"-1" } }; public static readonly Dictionary RelativeReferenceRelativeToMap = new Dictionary { - { @"", @"" } + { @"letzte", @"end" }, + { @"letzten", @"end" }, + { @"letzter", @"end" }, + { @"nächste", @"current" }, + { @"nächsten", @"current" }, + { @"nächster", @"current" }, + { @"vorherige", @"current" }, + { @"vorherigen", @"current" }, + { @"vorheriger", @"current" }, + { @"aktuelle", @"current" }, + { @"aktuellen", @"current" }, + { @"aktueller", @"current" }, + { @"jetzige", @"current" }, + { @"jetzigen", @"current" }, + { @"jetziger", @"current" }, + { @"vorletzte", @"end" }, + { @"vorletzten", @"end" }, + { @"vorletzter", @"end" }, + { @"drittletzte", @"end" }, + { @"drittletzten", @"end" }, + { @"drittletzter", @"end" }, + { @"zuletzt", @"end" }, + { @"früher", @"current" } }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/German/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/German/NumbersWithUnitDefinitions.cs index 496fb64242..422a3f3b9f 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/German/NumbersWithUnitDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/German/NumbersWithUnitDefinitions.cs @@ -173,7 +173,7 @@ public static class NumbersWithUnitDefinitions { @"United Arab Emirates dirham", @"vae dirham|vae-dirham|dirham der vereinigten arabischen emirate|د.إ|aed" }, { @"Azerbaijani manat", @"aserbaidschan-manat|azn" }, { @"Turkmenistan manat", @"turkmenistan-manat|tmt" }, - { @"Manat", @"manat" }, + { @"Manat", @"manat|manats" }, { @"Qəpik", @"qəpik" }, { @"Somali shilling", @"somalia-schilling|sh.so.|sos" }, { @"Somaliland shilling", @"somaliland-schilling" }, @@ -191,7 +191,7 @@ public static class NumbersWithUnitDefinitions { @"Maldivian rufiyaa", @"maledivischer rufiyaa|maledivische rufiyaa|maledivischen rufiyaa|mvr|.ރ" }, { @"Sri Lankan rupee", @"sri-lanka-rupie|sri-lanka-rupien|lkr|රු|ரூ" }, { @"Indonesian rupiah", @"indonesischer rupiah|indonesische rupiah|indonesischen rupiah|rupiah|perak|rp|idr" }, - { @"Rupee", @"rupie|rs" }, + { @"Rupee", @"rupie|rupien|rs" }, { @"Danish krone", @"dänische krone|dänischen krone|dänischer kronen|dänische kronen|dänischen kronen|daenische krone|daenischen krone|daenischer kronen|daenische kronen|daenischen kronen|dkk" }, { @"Norwegian krone", @"norwegische krone|norwegischen krone|norwegischer kronen|norwegische kronen|norwegischen kronen|nok" }, { @"Faroese króna", @"färöische króna|färöische krone|färöischen krone|färöischer kronen|färöische kronen|färöischen kronen" }, @@ -244,7 +244,7 @@ public static class NumbersWithUnitDefinitions { @"Mexican peso", @"mexikanischer peso|mexikanische peso|mexikanischen peso|mxn" }, { @"Philippine peso", @"piso|philippinischer peso|philippinische peso|philippinischen peso|₱|php" }, { @"Uruguayan peso", @"uruguayischer peso|uruguayische peso|uruguayischen peso|uyu" }, - { @"Peso", @"peso" }, + { @"Peso", @"peso|pesos" }, { @"Centavo", @"centavos|centavo" }, { @"Alderney pound", @"alderney pfund|alderney £" }, { @"British pound", @"britischer pfund|britische pfund|britischen pfund|british £|gbp|pfund sterling" }, @@ -303,9 +303,278 @@ public static class NumbersWithUnitDefinitions { @"Fen", @"fen" }, { @"Jiao", @"jiao" }, { @"Finnish markka", @"suomen markka|finnish markka|finsk mark|fim|markkaa|markka|finnische mark|finnischen mark" }, - { @"Penni", @"penniä|penni" } + { @"Penni", @"penniä|penni" }, + { @"Bitcoin", @"bitcoin|bitcoins|btc|xbt|₿" }, + { @"Millibitcoin", @"millibitcoin|millibitcoins|milibitcoin|milibitcoins" } }; - public const string CompoundUnitConnectorRegex = @"(?[^.])"; + public static readonly Dictionary CurrencyNameToIsoCodeMap = new Dictionary + { + { @"Afghan afghani", @"AFN" }, + { @"Euro", @"EUR" }, + { @"Albanian lek", @"ALL" }, + { @"Angolan kwanza", @"AOA" }, + { @"Armenian dram", @"AMD" }, + { @"Aruban florin", @"AWG" }, + { @"Bangladeshi taka", @"BDT" }, + { @"Bhutanese ngultrum", @"BTN" }, + { @"Bolivian boliviano", @"BOB" }, + { @"Bosnia and Herzegovina convertible mark", @"BAM" }, + { @"Botswana pula", @"BWP" }, + { @"Brazilian real", @"BRL" }, + { @"Bulgarian lev", @"BGN" }, + { @"Cambodian riel", @"KHR" }, + { @"Cape Verdean escudo", @"CVE" }, + { @"Costa Rican colón", @"CRC" }, + { @"Croatian kuna", @"HRK" }, + { @"Czech koruna", @"CZK" }, + { @"Eritrean nakfa", @"ERN" }, + { @"Ethiopian birr", @"ETB" }, + { @"Gambian dalasi", @"GMD" }, + { @"Georgian lari", @"GEL" }, + { @"Ghanaian cedi", @"GHS" }, + { @"Guatemalan quetzal", @"GTQ" }, + { @"Haitian gourde", @"HTG" }, + { @"Honduran lempira", @"HNL" }, + { @"Hungarian forint", @"HUF" }, + { @"Iranian rial", @"IRR" }, + { @"Yemeni rial", @"YER" }, + { @"Israeli new shekel", @"ILS" }, + { @"Japanese yen", @"JPY" }, + { @"Kazakhstani tenge", @"KZT" }, + { @"Kenyan shilling", @"KES" }, + { @"North Korean won", @"KPW" }, + { @"South Korean won", @"KRW" }, + { @"Kyrgyzstani som", @"KGS" }, + { @"Lao kip", @"LAK" }, + { @"Lesotho loti", @"LSL" }, + { @"South African rand", @"ZAR" }, + { @"Macanese pataca", @"MOP" }, + { @"Macedonian denar", @"MKD" }, + { @"Malagasy ariary", @"MGA" }, + { @"Malawian kwacha", @"MWK" }, + { @"Malaysian ringgit", @"MYR" }, + { @"Mauritanian ouguiya", @"MRO" }, + { @"Mongolian tögrög", @"MNT" }, + { @"Mozambican metical", @"MZN" }, + { @"Burmese kyat", @"MMK" }, + { @"Nicaraguan córdoba", @"NIO" }, + { @"Nigerian naira", @"NGN" }, + { @"Turkish lira", @"TRY" }, + { @"Omani rial", @"OMR" }, + { @"Panamanian balboa", @"PAB" }, + { @"Papua New Guinean kina", @"PGK" }, + { @"Paraguayan guaraní", @"PYG" }, + { @"Peruvian sol", @"PEN" }, + { @"Polish złoty", @"PLN" }, + { @"Qatari riyal", @"QAR" }, + { @"Saudi riyal", @"SAR" }, + { @"Samoan tālā", @"WST" }, + { @"São Tomé and Príncipe dobra", @"STN" }, + { @"Sierra Leonean leone", @"SLL" }, + { @"Swazi lilangeni", @"SZL" }, + { @"Tajikistani somoni", @"TJS" }, + { @"Thai baht", @"THB" }, + { @"Ukrainian hryvnia", @"UAH" }, + { @"Vanuatu vatu", @"VUV" }, + { @"Venezuelan bolívar", @"VEF" }, + { @"Zambian kwacha", @"ZMW" }, + { @"Moroccan dirham", @"MAD" }, + { @"United Arab Emirates dirham", @"AED" }, + { @"Azerbaijani manat", @"AZN" }, + { @"Turkmenistan manat", @"TMT" }, + { @"Somali shilling", @"SOS" }, + { @"Tanzanian shilling", @"TZS" }, + { @"Ugandan shilling", @"UGX" }, + { @"Romanian leu", @"RON" }, + { @"Moldovan leu", @"MDL" }, + { @"Nepalese rupee", @"NPR" }, + { @"Pakistani rupee", @"PKR" }, + { @"Indian rupee", @"INR" }, + { @"Seychellois rupee", @"SCR" }, + { @"Mauritian rupee", @"MUR" }, + { @"Maldivian rufiyaa", @"MVR" }, + { @"Sri Lankan rupee", @"LKR" }, + { @"Indonesian rupiah", @"IDR" }, + { @"Danish krone", @"DKK" }, + { @"Norwegian krone", @"NOK" }, + { @"Icelandic króna", @"ISK" }, + { @"Swedish krona", @"SEK" }, + { @"West African CFA franc", @"XOF" }, + { @"Central African CFA franc", @"XAF" }, + { @"Comorian franc", @"KMF" }, + { @"Congolese franc", @"CDF" }, + { @"Burundian franc", @"BIF" }, + { @"Djiboutian franc", @"DJF" }, + { @"CFP franc", @"XPF" }, + { @"Guinean franc", @"GNF" }, + { @"Swiss franc", @"CHF" }, + { @"Rwandan franc", @"RWF" }, + { @"Russian ruble", @"RUB" }, + { @"Transnistrian ruble", @"PRB" }, + { @"New Belarusian ruble", @"BYN" }, + { @"Algerian dinar", @"DZD" }, + { @"Bahraini dinar", @"BHD" }, + { @"Iraqi dinar", @"IQD" }, + { @"Jordanian dinar", @"JOD" }, + { @"Kuwaiti dinar", @"KWD" }, + { @"Libyan dinar", @"LYD" }, + { @"Serbian dinar", @"RSD" }, + { @"Tunisian dinar", @"TND" }, + { @"Argentine peso", @"ARS" }, + { @"Chilean peso", @"CLP" }, + { @"Colombian peso", @"COP" }, + { @"Cuban convertible peso", @"CUC" }, + { @"Cuban peso", @"CUP" }, + { @"Dominican peso", @"DOP" }, + { @"Mexican peso", @"MXN" }, + { @"Uruguayan peso", @"UYU" }, + { @"British pound", @"GBP" }, + { @"Saint Helena pound", @"SHP" }, + { @"Egyptian pound", @"EGP" }, + { @"Falkland Islands pound", @"FKP" }, + { @"Gibraltar pound", @"GIP" }, + { @"Manx pound", @"IMP" }, + { @"Jersey pound", @"JEP" }, + { @"Lebanese pound", @"LBP" }, + { @"South Sudanese pound", @"SSP" }, + { @"Sudanese pound", @"SDG" }, + { @"Syrian pound", @"SYP" }, + { @"United States dollar", @"USD" }, + { @"Australian dollar", @"AUD" }, + { @"Bahamian dollar", @"BSD" }, + { @"Barbadian dollar", @"BBD" }, + { @"Belize dollar", @"BZD" }, + { @"Bermudian dollar", @"BMD" }, + { @"Brunei dollar", @"BND" }, + { @"Singapore dollar", @"SGD" }, + { @"Canadian dollar", @"CAD" }, + { @"Cayman Islands dollar", @"KYD" }, + { @"New Zealand dollar", @"NZD" }, + { @"Fijian dollar", @"FJD" }, + { @"Guyanese dollar", @"GYD" }, + { @"Hong Kong dollar", @"HKD" }, + { @"Jamaican dollar", @"JMD" }, + { @"Liberian dollar", @"LRD" }, + { @"Namibian dollar", @"NAD" }, + { @"Solomon Islands dollar", @"SBD" }, + { @"Surinamese dollar", @"SRD" }, + { @"New Taiwan dollar", @"TWD" }, + { @"Trinidad and Tobago dollar", @"TTD" }, + { @"Tuvaluan dollar", @"TVD" }, + { @"Chinese yuan", @"CNY" }, + { @"Rial", @"__RI" }, + { @"Shiling", @"__S" }, + { @"Som", @"__SO" }, + { @"Dirham", @"__DR" }, + { @"Dinar", @"_DN" }, + { @"Dollar", @"__D" }, + { @"Manat", @"__MA" }, + { @"Rupee", @"__R" }, + { @"Krone", @"__K" }, + { @"Krona", @"__K" }, + { @"Crown", @"__K" }, + { @"Frank", @"__F" }, + { @"Mark", @"__M" }, + { @"Ruble", @"__RB" }, + { @"Peso", @"__PE" }, + { @"Pound", @"__P" }, + { @"Tristan da Cunha pound", @"_TP" }, + { @"South Georgia and the South Sandwich Islands pound", @"_SP" }, + { @"Somaliland shilling", @"_SS" }, + { @"Pitcairn Islands dollar", @"_PND" }, + { @"Palauan dollar", @"_PD" }, + { @"Niue dollar", @"_NID" }, + { @"Nauruan dollar", @"_ND" }, + { @"Micronesian dollar", @"_MD" }, + { @"Kiribati dollar", @"_KID" }, + { @"Guernsey pound", @"_GGP" }, + { @"Faroese króna", @"_FOK" }, + { @"Cook Islands dollar", @"_CKD" }, + { @"British Virgin Islands dollar", @"_BD" }, + { @"Ascension pound", @"_AP" }, + { @"Alderney pound", @"_ALP" }, + { @"Abkhazian apsar", @"_AA" }, + { @"Bitcoin", @"_XBT" } + }; + public static readonly Dictionary FractionalUnitNameToCodeMap = new Dictionary + { + { @"Jiao", @"JIAO" }, + { @"Kopek", @"KOPEK" }, + { @"Pul", @"PUL" }, + { @"Cent", @"CENT" }, + { @"Qindarkë", @"QINDARKE" }, + { @"Penny", @"PENNY" }, + { @"Santeem", @"SANTEEM" }, + { @"Cêntimo", @"CENTIMO" }, + { @"Centavo", @"CENTAVO" }, + { @"Luma", @"LUMA" }, + { @"Qəpik", @"QƏPIK" }, + { @"Fils", @"FILS" }, + { @"Poisha", @"POISHA" }, + { @"Kapyeyka", @"KAPYEYKA" }, + { @"Centime", @"CENTIME" }, + { @"Chetrum", @"CHETRUM" }, + { @"Paisa", @"PAISA" }, + { @"Fening", @"FENING" }, + { @"Thebe", @"THEBE" }, + { @"Sen", @"SEN" }, + { @"Stotinka", @"STOTINKA" }, + { @"Fen", @"FEN" }, + { @"Céntimo", @"CENTIMO" }, + { @"Lipa", @"LIPA" }, + { @"Haléř", @"HALER" }, + { @"Øre", @"ØRE" }, + { @"Piastre", @"PIASTRE" }, + { @"Santim", @"SANTIM" }, + { @"Oyra", @"OYRA" }, + { @"Butut", @"BUTUT" }, + { @"Tetri", @"TETRI" }, + { @"Pesewa", @"PESEWA" }, + { @"Fillér", @"FILLER" }, + { @"Eyrir", @"EYRIR" }, + { @"Dinar", @"DINAR" }, + { @"Agora", @"AGORA" }, + { @"Tïın", @"TIIN" }, + { @"Chon", @"CHON" }, + { @"Jeon", @"JEON" }, + { @"Tyiyn", @"TYIYN" }, + { @"Att", @"ATT" }, + { @"Sente", @"SENTE" }, + { @"Dirham", @"DIRHAM" }, + { @"Rappen", @"RAPPEN" }, + { @"Avo", @"AVO" }, + { @"Deni", @"DENI" }, + { @"Iraimbilanja", @"IRAIMBILANJA" }, + { @"Tambala", @"TAMBALA" }, + { @"Laari", @"LAARI" }, + { @"Khoums", @"KHOUMS" }, + { @"Ban", @"BAN" }, + { @"Möngö", @"MONGO" }, + { @"Pya", @"PYA" }, + { @"Kobo", @"KOBO" }, + { @"Kuruş", @"KURUS" }, + { @"Baisa", @"BAISA" }, + { @"Centésimo", @"CENTESIMO" }, + { @"Toea", @"TOEA" }, + { @"Sentimo", @"SENTIMO" }, + { @"Grosz", @"GROSZ" }, + { @"Sene", @"SENE" }, + { @"Halala", @"HALALA" }, + { @"Para", @"PARA" }, + { @"Öre", @"ORE" }, + { @"Diram", @"DIRAM" }, + { @"Satang", @"SATANG" }, + { @"Seniti", @"SENITI" }, + { @"Millime", @"MILLIME" }, + { @"Tennesi", @"TENNESI" }, + { @"Kopiyka", @"KOPIYKA" }, + { @"Tiyin", @"TIYIN" }, + { @"Hào", @"HAO" }, + { @"Ngwee", @"NGWEE" }, + { @"Millibitcoin", @"MILLIBITCOIN" }, + { @"Satoshi", @"SATOSHI" } + }; + public const string CompoundUnitConnectorRegex = @"(?und)"; public static readonly Dictionary CurrencyPrefixList = new Dictionary { { @"Dollar", @"$" }, @@ -347,7 +616,8 @@ public static class NumbersWithUnitDefinitions { @"Euro", @"€" }, { @"Pound", @"£" }, { @"Costa Rican colón", @"₡" }, - { @"Turkish lira", @"₺" } + { @"Turkish lira", @"₺" }, + { @"Bitcoin", @"₿|btc|xbt" } }; public static readonly IList AmbiguousCurrencyUnitList = new List { @@ -436,6 +706,7 @@ public static class NumbersWithUnitDefinitions }; public const string BuildPrefix = @"(?<=(\s|^))"; public const string BuildSuffix = @"(?=(\s|\W|$))"; + public const string ConnectorToken = @"-"; public static readonly Dictionary LengthSuffixList = new Dictionary { { @"Kilometer", @"km|kilometer|kilometern" }, @@ -511,7 +782,7 @@ public static class NumbersWithUnitDefinitions { @"Teaspoon", @"teelöffel|teeloeffel" }, { @"Tablespoon", @"esslöffel|essloeffel" }, { @"Pint", @"pinte" }, - { @"Volume unit", @"fluid dram|Fluid drachm|Flüssigdrachme|Gill|Quart|Minim|Barrel|Cord|Peck|Beck|Scheffel|Hogshead|Oxhoft" } + { @"Volume unit", @"fluid dram|fluid drachm|flüssigdrachme|gill|quart|minim|cord|peck|beck|scheffel|hogshead|oxhoft" } }; public static readonly IList AmbiguousVolumeUnitList = new List { @@ -528,6 +799,7 @@ public static class NumbersWithUnitDefinitions { @"Kilogram", @"kg|kilogramm|kilo" }, { @"Gram", @"g|gramm" }, { @"Milligram", @"mg|milligramm" }, + { @"Microgram", @"μg|mikrogramm" }, { @"Barrel", @"barrel" }, { @"Gallon", @"gallone|gallonen" }, { @"Metric ton", @"metrische tonne|metrische tonnen" }, @@ -543,5 +815,27 @@ public static class NumbersWithUnitDefinitions @"stone", @"dram" }; + public static readonly Dictionary AngleSuffixList = new Dictionary + { + { @"Degree", @"grad|°" }, + { @"Radian", @"radiant|rad" }, + { @"Turn", @"turn" } + }; + public static readonly IList AmbiguousAngleUnitList = new List + { + @"turn" + }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"null", @"null" } + }; + public static readonly Dictionary TemperatureAmbiguityFiltersDict = new Dictionary + { + { @"\b(grad|°)$", @"\b((grad|°)\s*(winkel|dreh(ung|en|t)|gedreht)|(dreh(ung|en|t)|gedreht|winkel)(\s+(\p{L}+|\d+)){0,4}\s*(grad\b|°))" } + }; + public static readonly Dictionary DimensionAmbiguityFiltersDict = new Dictionary + { + { @"\b(grad|°)$", @"\b((grad|°)\s*(c(elsius|entigrate)?|f(ah?renheit)?)|(temperatur)(\s+(\p{L}+|\d+)){0,4}\s*(grad\b|°))" } + }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/German/QuotedTextDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/German/QuotedTextDefinitions.cs new file mode 100644 index 0000000000..cee55ee8ae --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/German/QuotedTextDefinitions.cs @@ -0,0 +1,35 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\German\German-QuotedText.yaml +// - Language: German +// - ClassName: QuotedTextDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.German +{ + using System; + using System.Collections.Generic; + + public static class QuotedTextDefinitions + { + public const string LangMarker = @"Ger"; + public const string QuotedTextRegex1 = @"(„([^„“]+)“)"; + public const string QuotedTextRegex2 = @"(‚([^‚‘]+)‘)"; + public const string QuotedTextRegex3 = @"(""([^""]+)"")"; + public const string QuotedTextRegex4 = @"(\\'([^\']+)\\')"; + public const string QuotedTextRegex5 = @"(`([^`]+)`)"; + public const string QuotedTextRegex6 = @"(«([^«»]+)»)"; + public const string QuotedTextRegex7 = @"(‹([^‹›]+)›)"; + public const string QuotedTextRegex8 = @"(»([^»«]+)«)"; + public const string QuotedTextRegex9 = @"(›([^›‹]+)‹)"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/German/QuotedTextDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/German/QuotedTextDefinitions.tt new file mode 100644 index 0000000000..6585704096 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/German/QuotedTextDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\German\German-QuotedText.yaml"; + this.Language = "German"; + this.ClassName = "QuotedTextDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/ChoiceDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/ChoiceDefinitions.cs index d7275ed1e0..52751e861b 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/ChoiceDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/ChoiceDefinitions.cs @@ -22,8 +22,9 @@ namespace Microsoft.Recognizers.Definitions.Hindi public static class ChoiceDefinitions { public const string LangMarker = @"Hin"; - public const string TokenizerRegex = @"[^\w\d]"; - public const string TrueRegex = @"\b(true|yes|yep|y|sure|ok|agree)\b|(\uD83D\uDC4D|\uD83D\uDC4C|\u0001f44c)"; - public const string FalseRegex = @"\b(false|nope|nop|no|not\s+ok|disagree)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90|\u0001F44E|\u0001F590)"; + public const string TokenizerRegex = @"[^\w\d\u0900-\u097f]"; + public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)"; + public static readonly string TrueRegex = $@"\b(true|yes|yep|y|sure|ok|agree|बिलकुल|हाँ|हां|सहमत)\b|(\uD83D\uDC4D|\uD83D\uDC4C|\u0001f44c){SkinToneRegex}?"; + public static readonly string FalseRegex = $@"\b(false|nope|nop|no|not\s+ok|disagree)\b|((नहीं|ठीक\s+नहीं|असहमत)\b|(ना))|(\uD83D\uDC4E|\u270B|\uD83D\uDD90|\u0001F44E|\u0001F590){SkinToneRegex}?"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/DateTimeDefinitions.cs new file mode 100644 index 0000000000..81bcfb679f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/DateTimeDefinitions.cs @@ -0,0 +1,1524 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Hindi\Hindi-DateTime.yaml +// - Language: Hindi +// - ClassName: DateTimeDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Hindi +{ + using System; + using System.Collections.Generic; + + public static class DateTimeDefinitions + { + public const string LangMarker = @"Hin"; + public const bool CheckBothBeforeAfter = true; + public static readonly string TillRegex = $@"(?\b(तक|द्वारा|से\s+लेकर|(तारीख\s+)?से|to)|{BaseDateTime.RangeConnectorSymbolRegex})"; + public static readonly string RangeConnectorRegex = $@"(?\b(और|and|through|to|से\s+लेकर)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; + public const string RelativeRegex = @"\b(?परसों\s+वापस\s+गय\s+था|अगला|अगले|अगली|दूसरे|आने\s+वा(ले|ला)|आगामी|पिछला|पिछले|पिछली|आखिरी|अंतिम|यह|इसी|इस|वर्तमान|अभी\s+(के|वाला)|इस|उस|चालू)"; + public const string StrictRelativeRegex = @"\b(?अगला|अगले|अगली|दूसरे|आने\s+वा(ले|ला)|आगामी|पिछला|पिछली|पिछले|आखिरी|अंतिम|वर्तमान|अभी\s+(के|वाला)|इसी|इस|उस)"; + public const string UpcomingPrefixRegex = @"(((इस|इसी)\s+)?(आने\s+(वाले|वाला)|आगामी))"; + public static readonly string NextPrefixRegex = $@"\b(अगला|अगले|अगली|{UpcomingPrefixRegex}|{FutureSuffixRegex})"; + public static readonly string NextPrefixRegexNoWeek = $@"\b(अगला|अगले|अगली|{UpcomingPrefixRegex}|{FutureSuffixRegex})(?!\s+(सप्ताह|हफ्ते|हफ़्ते))"; + public const string AfterNextSuffixRegex = @"(के\s+बाद)"; + public const string PastPrefixRegex = @"(((इस|इसी)\s+)?पिछला|पिछले|पिछली|के बाद)"; + public static readonly string PreviousPrefixRegex = $@"(आखिरी|अंतिम|पिछला|पिछले|पिछली|{PastPrefixRegex})"; + public const string ThisPrefixRegex = @"((इस|इसी)|यह|वर्तमान|अभी\s+(के|वाला))"; + public const string RangePrefixRegex = @"(से\s+लेकर|से|(तक\s+)?के\s+बीच|(?<=के\s+)बीच|तक)"; + public const string CenturySuffixRegex = @"^(सन|सदी|शताब्दी)"; + public const string ReferencePrefixRegex = @"(उस|उसी)"; + public const string FutureSuffixRegex = @"\b((आने\s+वा(ले|ला)\s+)?(भविष्य|बाद)(\s+(में|मे))?|आज\s+से)"; + public const string PastSuffixRegex = @"^\b$"; + public const string DayRegex = @"(उस\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?!\d+)(?:ला|ली|रा|था|वां|वीं|वें|वाँ|वा|ठा|th|nd|rd|st)?(?=तारीख|दिन)?"; + public const string ImplicitDayRegex = @"(उस\s*)?(?(?:3[0-1]|[0-2]?\d))(?:ला|ली|रा|था|वां|वीं|वें|वाँ|वा|ठा)(?=\b|\s*(तारीख|दिन))?"; + public const string MonthNumRegex = @"(?1[0-2]|(0)?[1-9])\b"; + public const string WrittenOneToNineRegex = @"(?:सात|आठ|फ़ोर|चार|पांच|पाँच|नौ|दो|छह|एक|तीन)"; + public const string WrittenElevenToNineteenRegex = @"(?:सत्रह|तेरह|चौदह|अठारह|उन्नीस|पंद्रह|सोलह|ग्यारह|बारह)"; + public const string WrittenTwentyOneToTwentyNineIntegerRegex = @"(सत्ताईस|तेईस|चौबीस|अट्ठाईस|अट्ठाइस|उनतीस|पच्चीस|छब्बीस|इक्कीस|बाईस)"; + public const string WrittenThirtyOneToThirtyNineIntegerRegex = @"(सैंतीस|तैंतीस|चौंतीस|अड़तीस|उनतालीस|पैंतीस|छ्त्तीस|इकतीस|इकत्तीस|बत्तीस)"; + public const string WrittenFourtyOneToFourtyNineIntegerRegex = @"(सैंतालीस|तैंतालीस|चौंतालीस|अड़तालीस|उनचास|पैंतालीस|छियालीस|इकतालीस|बयालीस)"; + public const string WrittenFiftyOneToFiftyNineIntegerRegex = @"(सत्तावन|तिरेपन|चौबन|अट्ठावन|उनसठ|पचपन|छप्पन|इक्याबन|बावन)"; + public const string WrittenSixtyOneToSixtyNineIntegerRegex = @"((सड़|सड़|तिर|चौं|अड़|उनहत्तर|पैं|छिया|इक|बा|एक)(सठ))"; + public const string WrittenSeventyOneToSeventyNineIntegerRegex = @"(?:सतहत्तर|तिहत्तर|अठहत्तर|चौहत्तर|उनासी|पचहत्तर|छिहत्तर|इकहत्तर|अठत्तर|बहत्तर)"; + public const string WrittenEightyOneToEightyNineIntegerRegex = @"(सतासी|तिरासी|चौरासी|अठासी|नवासी|पचासी|छियासी|इक्यासी|बयासी)"; + public const string WrittenNinetyOneToNinetyNineIntegerRegex = @"(सत्तानवे|तिरानवे|चौरानवे|अट्ठानवे|निन्यानवे|पचानवे|पंचानबे|छियानवे|इक्यानबे|बानवे)"; + public const string WrittenTensRegex = @"(?:सत्तर|बीस|तीस|अस्सी|नब्बे|चालीस|पचास|साठ|दस)"; + public static readonly string AllWrittenNumericalRegex = $@"({WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTwentyOneToTwentyNineIntegerRegex}|{WrittenThirtyOneToThirtyNineIntegerRegex}|{WrittenFourtyOneToFourtyNineIntegerRegex}|{WrittenFiftyOneToFiftyNineIntegerRegex}|{WrittenSixtyOneToSixtyNineIntegerRegex}|{WrittenSeventyOneToSeventyNineIntegerRegex}|{WrittenEightyOneToEightyNineIntegerRegex}|{WrittenNinetyOneToNinetyNineIntegerRegex}|{WrittenTensRegex})"; + public static readonly string WrittenNumRegex = $@"(?:{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s+{WrittenOneToNineRegex})?|WrittenTwentyToTwentyNineIntegerRegex)"; + public static readonly string WrittenCenturyFullYearRegex = $@"(?:(एक|दो)\s+(हजार|हज़ार)(\s+और)?(\s+{WrittenOneToNineRegex}(\s+सौ)?(\s+और)?)?)"; + public const string WrittenCenturyOrdinalYearRegex = @"(?:(बीस|इक्कीस|बाईस)(वां|वीं|वें|वाँ)?|(?:सत्रह|तेरह|चौदह|अठारह|उन्नीस|पंद्रह|सोलह|ग्यारह|बारह|दस)(वां|वीं|वें|वाँ)?|(सात|आठ|फ़ोर|चार|पांच|पाँच|नौ|दो|छह|एक|तीन)(वां|वीं|वें|वाँ)?)"; + public static readonly string CenturyRegex = $@"\b(?{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}(\s+सौ)?(\s+और)?)"; + public static readonly string LastTwoYearNumRegex = $@"(?:(शून्य\s+)?{AllWrittenNumericalRegex}|{WrittenTensRegex}(\s+(और\s+)?){WrittenOneToNineRegex})"; + public static readonly string FullTextYearRegex = $@"\b((?{CenturyRegex})\s+(?{LastTwoYearNumRegex})|\b(?{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}\s+सौ(\s+और)?))"; + public const string OclockRegex = @"(?अराउंड|लगभग|बजे|ओक्लॉक|o\s*((’|‘|')\s*)?clock)"; + public static readonly string AmDescRegex = $@"(?:{BaseDateTime.BaseAmDescRegex}|पू\.|सुबह|पूर्वाहन|ए\.एम\.|एएम)"; + public static readonly string PmDescRegex = $@"(:?{BaseDateTime.BasePmDescRegex}|अप\.|पीएम|पी\.एम\.|अपराह्न|अपराहन)"; + public static readonly string AmPmDescRegex = $@"(?:{BaseDateTime.BaseAmPmDescRegex})"; + public static readonly string DescRegex = $@"(?:(?:({OclockRegex}\s+)?(?({AmPmDescRegex}|{AmDescRegex}|{PmDescRegex})))|{OclockRegex})"; + public static readonly string DescRegexA = $@"(?:(?:({OclockRegex}\s+)?(?({AmPmDescRegex}|(?:{BaseDateTime.BaseAmDescRegex})|(?:{BaseDateTime.BasePmDescRegex}))))|{OclockRegex})"; + public static readonly string TwoDigitYearRegex = $@"\b(?([0-9]\d))(?!(\s*((\:\d)|{AmDescRegex}|{PmDescRegex}|{OclockRegex}|\.\d)))"; + public static readonly string YearRegex = $@"(?:{BaseDateTime.FourDigitYearRegex}|{FullTextYearRegex})"; + public const string WeekDayRegex = @"\b(?(?:रवि|इत|एत|सोम|मंगल|म्गल|गुरु|((बृ|वृ)हस्पति)|वीर|शुक्र)(वार)?|बुध(वार)?|बीफ़े|शनि(वार)?|संडे|मंडे)"; + public const string SingleWeekDayRegex = @"\b(?रविवार|इतवार|एतवार|शनिवार|संडे|मंडे|(?:सोम|मंगल|म्गल|गुरु|((बृ|वृ)हस्पति)|वीर|शुक्र)(वार)?|बुध(वार)?|बीफ़े|((शनि|रवि)(?<=को\s+)))\b"; + public static readonly string RelativeMonthRegex = $@"(?{RelativeRegex}\s+(माह|महि(ने|ना)|महीनों|महीने)(\s+(का|की|के))?)"; + public static readonly string WrittenMonthRegex = $@"\b({MonthRegex}(\s+(का|के(?!\s+बीच)|की)(\s+(माह|महि(ने|ना)))?)?)"; + public static readonly string MonthSuffixRegex = $@"(?({RelativeMonthRegex}|{WrittenMonthRegex})(\s*(का|के|की))?)"; + public const string DateUnitRegex = @"(?(?decade|(?दशकों)|दशक|decades)|(?साल|(?वर्षों)|वर्ष?)|(?माह|(?महीनों|महीना)|महीने?)|(?(?हफ़्तों|हफ्तों)|हफ़्ते|हफ्ता|(?!सप्ताहांत)सप्ताह?|हफ्ते)|(?(व्यावसायिक|कार्य|व्यापारिक|व्यापार\s+के)\s?)?(?(दिवस|(?दिनों)|दिन)|(?<=रो)ज|^ज$)|(?(?fortnights)fortnight|पखवाड़े|पखवाड़ा|पखवाड़े))"; + public const string DateTokenPrefix = @"को "; + public const string TimeTokenPrefix = @"at "; + public const string TokenBeforeDate = @"को|की"; + public const string TokenBeforeTime = @"at "; + public const string HalfTokenRegex = @"^(साढ़े|साढ़े)"; + public const string QuarterTokenRegex = @"^(सव|पौने)"; + public const string ThreeQuarterTokenRegex = @"^(पैंतालीस)"; + public const string ToTokenRegex = @"\b(बाकी|पौने)$"; + public static readonly string SimpleCasesRegex = $@"\b((({YearRegex}(\s+|\s*,\s*))(में\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex}\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+{DayRegex})(?!\d+)(\s+{RangePrefixRegex})?)|(({DayRegex})\s*{TillRegex}\s*({DayRegex}\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+{DayRegex})(?!\d+)(\s+{RangePrefixRegex})?((\s+|\s*,\s*){YearRegex})?)|((?अगला|अगले|अगली|आने\s+वा(ले|ला)|आगामी|इस|आखिरी|अंतिम)\s+साल))|(({YearRegex}|(?अगला|अगले|अगली|आने\s+वा(ले|ला)|आगामी|इस|आखिरी|अंतिम)\s+साल)(\s+के)?(\s*),?(\s*){WrittenMonthRegex}))"; + public const string SpecialYearPrefixes = @"(calendar|(?fiscal|school))"; + public static readonly string OneWordPeriodRegex = $@"\b(?अप्रील|अप्रैल|अप्र|अगस्त|अग|दिसम्बर|दिसंबर|दिसं|दिस|फरवरी|फ़रवरी|फर|फ़र|फ़ेब|फेब्रू|जनवरी|जन|जुलाई|जु|जुल|जून|जू|मार्च|मा|मई|नवंबर|नवम्बर|नवं|अक्तूबर|अक्टूबर|आक्ट|अक्टू|सितंबर|सितम्बर|सितं|सित)(\.|(?=[/\\.,-])|(?![\u0900-\u097f])))(\s+(का|की|के)\s+(माह|((महि|मही)(ने|ना))))?))"; + public static readonly string MonthNumWithYear = $@"\b(({BaseDateTime.FourDigitYearRegex}(\s*)[/\-\.](\s*){MonthNumRegex})|({MonthNumRegex}(\s*)[/\-](\s*){BaseDateTime.FourDigitYearRegex}))"; + public static readonly string WeekOfMonthRegex = $@"\b(?{MonthSuffixRegex}(\s+({BaseDateTime.FourDigitYearRegex}|{RelativeRegex}\s*साल)\s*)?(\s*(का|के|की)\s*)?(?पहला|पहली|पहले|1st|दूसरा|दूसरे|दूसरी|2nd|तीसरा|तीसरे|तीसरी|3rd|चौथा|चौथी|4th|पाँचवाँ|पांचवां|5th|आखिरी|अंतिम)\s+(हफ़्ते|हफ्ते|(?!सप्ताहांत)सप्ताह))(\s+में)?"; + public static readonly string WeekOfYearRegex = $@"\b(?({YearRegex}|{RelativeRegex}\s+(साल|वर्ष)))(\s+(का|की|के))?\s+(?पहला|पहली|पहले|1st|दूसरा|दूसरे|दूसरी|2nd|तीसरा|तीसरे|तीसरी|3rd|चौथा|चौथी|4th|पाँचवाँ|पांचवां|5th|आखिरी|अंतिम)\s+(हफ़्ते|हफ्ते|(?!सप्ताहांत)सप्ताह)"; + public static readonly string OfYearRegex = $@"\b((of|in)\s+({YearRegex}|{StrictRelativeRegex}\s+year))\b"; + public const string FirstLastRegex = @"\b(the\s+)?((?first)|(?last))\b"; + public static readonly string FollowedDateUnit = $@"^\s*{DateUnitRegex}"; + public static readonly string NumberCombinedWithDateUnit = $@"\b(?\d+(\.\d*)?){DateUnitRegex}"; + public static readonly string QuarterTermRegex = $@"\b(((?पहला|पहली|पहले|1st|दूसरा|दूसरे|दूसरी|2nd|तीसरा|तीसरे|तीसरी|3rd|चौथा|चौथी|4th)[ -]+तिमाही)(\s*{BaseDateTime.FourDigitYearRegex})?)"; + public static readonly string RelativeQuarterTermRegex = $@"\b(?{StrictRelativeRegex})\s+तिमाही\b"; + public static readonly string QuarterRegex = $@"((?:({YearRegex}|{RelativeRegex}\s*(साल|वर्ष)\s*)(\s*(का|की|के)|\s*,\s*)?\s+)?{QuarterTermRegex})|{RelativeQuarterTermRegex}"; + public static readonly string QuarterRegexYearFront = $@"(?:{YearRegex}|{RelativeRegex}\s+साल)(?:\s*-\s*|\s+(इस\s+)?)?{QuarterTermRegex}"; + public const string HalfYearTermRegex = @"(?पहली|1st|second|2nd)\s+half"; + public static readonly string HalfYearFrontRegex = $@"(?((1[5-9]|20)\d{{2}})|2100)(\s*-\s*|\s+(the\s+)?)?h(?[1-2])"; + public static readonly string HalfYearBackRegex = $@"(the\s+)?(h(?[1-2])|({HalfYearTermRegex}))(\s+of|\s*,\s*)?\s+({YearRegex})"; + public static readonly string HalfYearRelativeRegex = $@"(the\s+)?{HalfYearTermRegex}(\s+of|\s*,\s*)?\s+({RelativeRegex}\s+साल)"; + public static readonly string AllHalfYearRegex = $@"({HalfYearFrontRegex})|({HalfYearBackRegex})|({HalfYearRelativeRegex})"; + public const string EarlyPrefixRegex = @"\b(?(?पूर्व|(इससे|उससे) पहले)|((के|की)\s+)शुरुआत(?=\s+(में|के|मे))?|सवेरे|प्रारंभिक|early|beginning of|start of)"; + public const string MidPrefixRegex = @"\b(?के बीच|बीच के|बीच में|mid-?|बीच|के दौरान)"; + public const string LaterPrefixRegex = @"\b(?देर से|देरी से|के अंत|के खत्म|के बाद|(?बाद(?=\s+(में|के|मे))?))"; + public static readonly string PrefixPeriodRegex = $@"({EarlyPrefixRegex}|{MidPrefixRegex}|{LaterPrefixRegex})"; + public const string PrefixDayRegex = @"\b(\s+the\s+day)(\s+in)??((?early)|(?mid(dle)?)|(?later?))$"; + public const string SeasonDescRegex = @"(?वसंत|spring|गर्मी|गर्मियों|fall|autumn|शरद\s+ऋतु|winter|सर्दियों)"; + public static readonly string SeasonRegex = $@"\b(?({RelativeRegex}\s+)?(({YearRegex}|{RelativeRegex}\s+साल)(\s+(के|की)|\s*,\s*)?\s+)?{SeasonDescRegex})(\s+{PrefixPeriodRegex})?"; + public const string WhichWeekRegex = @"(\b(सप्ताह)(\s*)(?5[0-3]|[1-4]\d|0?[1-9])\b)|(\b(?5[0-3]|[1-4]\d|0?[1-9])(\s*(वें)?\s*)(हफ़्ते|हफ्ते|(?!सप्ताहांत)सप्ताह(?!\s+में)))"; + public const string WeekOfRegex = @"(\s*(के|का|वाले|तारीख वाली)\s+)(हफ़्ते|हफ्ते|(?!सप्ताहांत)सप्ताह)"; + public const string MonthOfRegex = @"(\s*(का|की|के)\s+)?((महि|मही(ने|ना))|माह)"; + public const string MonthRegex = @"\b(?अप्रील|अप्रैल|अगस्त|मई|दिसम्बर|दिसंबर|फरवरी|जनवरी|जुलाई|जून|मार्च|नवंबर|नवम्बर|अक्तूबर|अक्टूबर|आक्ट|सितंबर|सितम्बर|(अप्र|अग|दिस|फर|फ़ेब|फेब्रू|फ़र|जन|जु|जू|मा|नवं|नव|अक्टू|सितं|सित)(\.|(?=[/\\.,-])|(?![\u0900-\u097f]))|(?<=(3[0-1]|[0-2]?\d)(ली)?\s+)(अप्र|अग|दिस|फर|फ़ेब|फेब्रू|फ़र|जन|जु|जू|मा|नवं|नव|अक्टू|सितं|सित))"; + public const string AmbiguousMonthP0Regex = @"\b((((!|\.|\?|,|;|)\s+|^)मे आई)|(आई|you|he|she|we|they)\s+मे|(मे\s+((((also|not|(also not)|well)\s+)?(be|ask|contain|constitute|e-?mail|take|have|result|involve|get|work|reply|differ))|(or मे नहीं))))"; + public static readonly string DateYearRegex = $@"(?{BaseDateTime.FourDigitYearRegex}|{TwoDigitYearRegex})"; + public static readonly string YearSuffix = $@"(^(\s*तारीख)?,?\s*(सन\s+)?({DateYearRegex}|{FullTextYearRegex})|({DateYearRegex}|{FullTextYearRegex}),?\s*$)"; + public static readonly string OnRegex = $@"({DayRegex})(?=\b\s+को)"; + public const string RelaxedOnRegex = @"(?(3[0-1]|[0-2]?\d)(?:ला|रा|था|वां|वीं|वें|वाँ|वा|ठा))(?=\s+(दिन\sको|तक|के|दिन|को))"; + public const string PrefixWeekDayRegex = @"(\s*([-—–]|(,?\s*(पर|में|के|को))))"; + public static readonly string ThisRegex = $@"\b(इस(\s*(हफ्ते|हफ़्ते|(?!सप्ताहांत)सप्ताह){PrefixWeekDayRegex}?)?\s*{WeekDayRegex})|({WeekDayRegex}((\s+(का|की|के))?\s+इस\s*(हफ्ते|हफ़्ते)))"; + public static readonly string LastDateRegex = $@"\b({PreviousPrefixRegex}(\s*(हफ़्ते|हफ्ते|(?!सप्ताहांत)सप्ताह){PrefixWeekDayRegex}?)?\s*{WeekDayRegex})|({WeekDayRegex}(\s+((का|की|के)\s+)?(आखिरी|अंतिम)\s*(हफ़्ते|हफ्ते|(?!सप्ताहांत)सप्ताह)))"; + public static readonly string NextDateRegex = $@"\b({NextPrefixRegex}(\s*(हफ़्ते|हफ्ते|(?!सप्ताहांत)सप्ताह){PrefixWeekDayRegex}?)?\s*{WeekDayRegex})|({WeekDayRegex}((\s+(का|की|के|को))?\s+(अगला|अगले|अगली|आने\s+वाले|आने\s+वाला)\s*(हफ़्ते|हफ्ते|(?!सप्ताहांत)सप्ताह)))"; + public static readonly string SpecialDayRegex = $@"(?परसों|\bकल\b|आज)\s+से\s+(?{AllWrittenNumericalRegex})\s+दिन(\s+(बाद)?))"; + public static readonly string RelativeDayRegex = $@"\b(((the\s+)?{RelativeRegex}\s+दिन))"; + public const string SetWeekDayRegex = @"\b(?को\s+)?(?morning|afternoon|evening|night|रात|(sun|mon|tues|wednes|thurs|fri|satur)day)s\b"; + public static readonly string WeekDayOfMonthRegex = $@"(?(the\s+)?({MonthSuffixRegex}\s+)(?पहला|पहली|पहले|1st|दूसरा|दूसरे|दूसरी|2nd|तीसरा|तीसरे|तीसरी|3rd|चौथा|चौथी|4th|पाँचवाँ|पांचवां|5th|आखिरी|अंतिम)\s+(तारीख|{WeekDayRegex}))"; + public static readonly string RelativeWeekDayRegex = $@"\b((अभी\s+से\s+अगले|अब\s+से|बाद|अगले)\s+{AllWrittenNumericalRegex}\s+{WeekDayRegex}(\s+के\s+बाद)?)"; + public static readonly string SpecialDate = $@"({DayRegex}(?=\s*(को|पर)))"; + public const string DatePreposition = @"\b(को|में)"; + public static readonly string DateExtractorYearTermRegex = $@"(\s+|\s*[/\\.,-]\s*|\s+(का|की|के|को)\s+){DateYearRegex}"; + public static readonly string DateExtractor1 = $@"((({DateYearRegex}(\s+|\s*,\s*|\s+(का|की|के|को)\s+))({MonthRegex}[\.]?\s*[/\\.,-]?\s*{DayRegex}(?!(\:)?\d+)))|(\b({WeekDayRegex}\s*[\(,-]?\s*)?(({MonthRegex}[\.]?\s*[/\\.,-]?\s*{DayRegex})|(\({MonthRegex}\s*[-.]\s*{DayRegex}\)))(\s*\))?(\s*\(\s*{WeekDayRegex}\s*\))?({DateExtractorYearTermRegex}\b)?))"; + public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?({DayRegex}[\.]?(\s+|\s*[/\\.,-]\s*|\s+(का|की|के|को)\s+){MonthRegex}[\.]?((\s+|\s*[/\\.,-]\s*|\s+में\s+){DateYearRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[/\\.,\- ]\s*{DayRegex}(\s+|\s*[/\\.,-]\s*|\s+(का|की|के|को)\s+){MonthRegex}[\.]?)"; + public static readonly string DateExtractor4 = $@"\b{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}[\.]?\s*[/\\\-]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; + public static readonly string DateExtractor5 = $@"\b{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; + public static readonly string DateExtractor6 = $@"({StrictRelativeRegex}\s+)?({WeekDayRegex}\s+)?{MonthNumRegex}[\-\.]{DayRegex}(?![%])(?=\s+{DatePreposition})"; + public static readonly string DateExtractor7L = $@"\b({WeekDayRegex}\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])"; + public static readonly string DateExtractor7S = $@"\b({WeekDayRegex}\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{BaseDateTime.CheckDecimalRegex}(?![%])"; + public static readonly string DateExtractor8 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({WeekDayRegex}\s+)?{DayRegex}[\\\-]{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}(?![%])"; + public static readonly string DateExtractor9L = $@"\b({WeekDayRegex}\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])"; + public static readonly string DateExtractor9S = $@"\b({WeekDayRegex}\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}(?![%])"; + public static readonly string DateExtractorA = $@"\b({WeekDayRegex}\s+)?({BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DayRegex}|{MonthRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{DayRegex}|{DayRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{MonthRegex})(?!\s*[/\\\-\.]\s*\d+)"; + public static readonly string OfMonth = $@"^\s*{MonthRegex}(?=\s*(को|की)?)"; + public static readonly string MonthEnd = $@"({MonthRegex}(\s+की)?|{RelativeMonthRegex})\s*$"; + public static readonly string WeekDayEnd = $@"(इस\s+)?{WeekDayRegex}\s*,?\s*$"; + public const string WeekDayStart = @"^[\.]"; + public const string RangeUnitRegex = @"\b(?वर्षों|सालों|महीनों|(?!सप्ताहांत)सप्ताह|हफ़्ते)\b"; + public const string HourNumRegex = @"(?शून्य|एक|दो|तीन|फ़ोर|चार|पांच|पाँच|छह|सात|आठ|नौ|दस|ग्यारह|बारह|तेरह|चौदह|पंद्रह|सोलह|सत्रह|अठारह|उन्नीस|बीस|इक्कीस|बाईस|तेईस|चौबीस|zero|one|two|three|five|eight|ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)?|twenty|वन|टू|थ्री|फोर|फ़ाइव|सिक्स|सेवन|एइट|नाइन|टेन|इलेवन|ट्वेल्व|थर्टीन|फ़ोर्टीन|फ़िफ़्टीन|सिक्सटीन|सेवेनटीन|एइटीन|नाइनटीन|ट्वेन्टी|ट्वेंटी)(?![\u0900-\u097f])"; + public const string MinuteNumRegex = @"(?शून्य|एक|दो|तीन|फ़ोर|चार|पांच|पाँच|छह|सात|आठ|नौ|दस|ग्यारह|बारह|तेरह|चौदह|पंद्रह|सोलह|सत्रह|अठारह|उन्नीस|बीस|इक्कीस|बाईस|तेईस|चौबीस|पच्चीस|छब्बीस|सत्ताईस|अट्ठाईस|अट्ठाइस|उनतीस|तीस|इकतीस|इकत्तीस|बत्तीस|तैंतीस|चौंतीस|पैंतीस|छ्त्तीस|सैंतीस|अड़तीस|उनतालीस|चालीस|इकतालीस|बयालीस|तैंतालीस|चौंतालीस|पैंतालीस|पैंतालिस|पेंतालिस|छियालीस|सैंतालीस|अड़तालीस|उनचास|पचास|इक्याबन|बावन|तिरेपन|चौबन|पचपन|छप्पन|सत्तावन|अट्ठावन|उनसठ|ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)?|twenty|thirty|forty|fifty|one|two|three|five|eight|टेन|इलेवन|ट्वेल्व|थर्टीन|फ़ोर्टीन|फ़िफ़्टीन|सिक्सटीन|सेवेनटीन|एइटीन|नाइनटीन|ट्वेन्टी|ट्वेंटी|वन|टू|थ्री|फोर|फ़ाइव|सिक्स|सेवन|एइट|नाइन|थर्टी|फ़ोर्टी|फ़िफ़्टी)(?![\u0900-\u097f])"; + public const string DeltaMinuteNumRegex = @"(?शून्य|एक|दो|तीन|फ़ोर|चार|पांच|पाँच|छह|सात|आठ|नौ|दस|ग्यारह|बारह|तेरह|चौदह|पंद्रह|सोलह|सत्रह|अठारह|उन्नीस|बीस|इक्कीस|बाईस|तेईस|चौबीस|पच्चीस|छब्बीस|सत्ताईस|अट्ठाईस|अट्ठाइस|उनतीस|तीस|इकतीस|इकत्तीस|बत्तीस|तैंतीस|चौंतीस|पैंतीस|छ्त्तीस|सैंतीस|अड़तीस|उनतालीस|चालीस|इकतालीस|बयालीस|तैंतालीस|चौंतालीस|पैंतालीस|पैंतालिस|पेंतालिस|छियालीस|सैंतालीस|अड़तालीस|उनचास|पचास|इक्याबन|बावन|तिरेपन|चौबन|पचपन|छप्पन|सत्तावन|अट्ठावन|उनसठ|ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)?|twenty|thirty|forty|fifty|one|two|three|five|eight|टेन|इलेवन|ट्वेल्व|थर्टीन|फ़ोर्टीन|फ़िफ़्टीन|सिक्सटीन|सेवेनटीन|एइटीन|नाइनटीन|ट्वेन्टी|ट्वेंटी|वन|टू|थ्री|फोर|फ़ाइव|सिक्स|सेवन|एइट|नाइन|थर्टी|फ़ोर्टी|फ़िफ़्टी)"; + public const string PmRegex = @"(?(दोपहर\s+)?खाने\s+के\s+वक़्त\s+तक|(दोपहर|सायं|शामों|शाम|संध्या|सायंकाल|evening|((आधी|अर्ध)\s+)?रात)(\s+((के(?!\s+लिए)|को|में|की)(?!\s+दौरान)|(?=(\s+(के|को|में|की)\s+दौरान))))?|आधी\s*(-\s*)?रात|अर्ध\s*(-\s*)?रात्रि|लंचटाइम|लंच\s+के\s+समय|दोपहर\s+खाने\s+के\s+समय)"; + public const string RangePmRegex = @"(?(दोपहर\s+)?खाने\s+के\s+वक़्त\s+तक|(दोपहर|सायं|शामों|शाम|संध्या|सायंकाल|evening|(अर्ध\s+)?रात)(\s+((के(?!\s+लिए)|को|में|की)(?!\s+दौरान)|(?=(\s+(के|को|में|की)\s+दौरान))))?|अर्ध\s*(-\s*)?रात्रि|लंचटाइम|लंच\s+के\s+समय|दोपहर\s+खाने\s+के\s+समय)"; + public const string PmRegexFull = @"(?(दोपहर\s+)?खाने\s+के\s+वक़्त\s+तक|(दोपहर|सायं|शामों|शाम|संध्या|सायंकाल|अपराह्न|अपराहन|evening|((आधी|अर्ध)\s+)?रात)(\s+((के(?!\s+लिए)|को|में|की)(?!\s+दौरान)|(?=(\s+(के|को|में|की)\s+दौरान))))?|आधी\s*(-\s*)?रात|अर्ध\s*(-\s*)?रात्रि|लंचटाइम|लंच\s+के\s+समय)"; + public const string AmRegex = @"(?सवेरे|पूर्वाह्न|(सुबह(?!\s+(के\s+दौरान))|morning|दिन(?!\s+हैं)|पूर्वाहन|प्रातः)(\s*(-सुबह|(?:के|को|में|की)))?)"; + public const string LunchRegex = @"(खाने\s+के\s+वक़्त\s+तक|लंचटाइम|लंच\s+के\s+समय)"; + public const string NightRegex = @"((आधी\s*(-\s*)?)?रात|अर्ध\s*(-\s*)?रात्रि)"; + public const string CommonDatePrefixRegex = @"^[\.]"; + public static readonly string LessThanOneHour = $@"(?सवा|पौने|साढ़े|साढ़े|{BaseDateTime.DeltaMinuteRegex}(\s+(मिनट?|मि\.?|घण्टे))|{DeltaMinuteNumRegex}(\s+(मिनट?|मि\.?|घण्टे)))"; + public static readonly string WrittenTimeRegex = $@"(?{HourNumRegex}\s+({MinuteNumRegex}|(?बीस|तीस|चालीस|पचास|twenty|thirty|fou?rty|fifty)\s+{MinuteNumRegex}))"; + public static readonly string TimePrefix = $@"(?((बजकर|बजने\s+(में|से))\s+)?{LessThanOneHour}(\s+(बाकी|पहले|होने के\s+{MinuteNumRegex}\s+मिनट\s+बाद))?)"; + public static readonly string TimeSuffix = $@"(?{AmRegex}|{PmRegex}|{OclockRegex})"; + public static readonly string TimeSuffixFull = $@"(?{AmRegex}|{PmRegexFull}|{OclockRegex})"; + public static readonly string BasicTime = $@"\b(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex}(?![%\d]))"; + public const string MidnightRegex = @"(?आधी\s*(-\s*)?रात|अर्ध\s*(-\s*)?रात्रि|मध्य\s*रात्रि)"; + public const string MidmorningRegex = @"(?(मध्य|बीच)\s*(-\s*)?सुबह|दिन\s+चढ़ने\s+से\s+पूर्व)"; + public const string MidafternoonRegex = @"(?देर\s*दोपहर|दोपहर(\s*(देर|के\s*आसपास))|भरी\s*दुपहरी)"; + public const string MiddayRegex = @"(?दिन\s*के\s*मध्य|दिन\s*के\s*बीच|दोपहर\s+में|दोपहर(\s+के\s+खाने\s+के\s+वक़्त)?(\s*(12|बारह)\s*बजे)?|मध्याह्न|दिन\s+(के\s+)?चढ़ते\s+ही|दिन\s+चढ़ने\s+पर)"; + public static readonly string MidTimeRegex = $@"(?({MidafternoonRegex}|{MiddayRegex}|{MidnightRegex}|{MidmorningRegex}))"; + public static readonly string AtRegex = $@"\b(?:(?<=\bकी\s+)(?:{MidTimeRegex}|{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\.\d))|{MidTimeRegex})"; + public static readonly string IshRegex = $@"\b({BaseDateTime.HourRegex}((-|——)?ish|दोपहर(\s*((के\s*आसपास)|देर))?|(\s+बजे)?\s+के\s+आसपास)|लगभग\s+लंच\s+के\s+समय)"; + public const string TimeUnitRegex = @"([\u0900-\u097f]{1,}|\b)(?h(ou)?r|h|(?घंटों|घंटे|घंटा|आर्स|सेकंड्स|घण्टों|मिनटों|seconds|hours)|मिनट|सेकंड|sec(ond)?)"; + public const string RestrictedTimeUnitRegex = @"(?hour|घंटे|घंटा|घंटों|घण्टे|घण्टों|मिनट|आर्स|minute|मि|घण्टे)"; + public const string FivesRegex = @"(?पाँच|पांच|दस|पंद्रह|बीस|पच्चीस|तीस|पैंतीस|चालीस|पैंतालीस|पचास|पचपन)"; + public static readonly string HourRegex = $@"\b{BaseDateTime.HourRegex}"; + public const string HindiHourRegex = @"(?२[०-४]|[०-१]?[०-९])(h)?"; + public const string HindiMinRegex = @"(?[०-५]?[०-९])(?!\d)"; + public const string HindiSecRegex = @"(?[०-५]?[०-९])"; + public const string PeriodHourNumRegex = @"\b(?एक|दो|तीन|फ़ोर|चार|पांच|पाँच|छह|सात|आठ|नौ|दस|ग्यारह|बारह|तेरह|चौदह|पंद्रह|सोलह|सत्रह|अठारह|उन्नीस|बीस|इक्कीस|बाईस|तेईस|चौबीस|twenty(\s+(one|two|three|four))?|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)?|zero|one|two|three|five|eight|ten|वन|टू|थ्री|फोर|फ़ाइव|सिक्स|सेवन|एइट|नाइन|टेन|इलेवन|ट्वेल्व|थर्टीन|फ़ोर्टीन|फ़िफ़्टीन|सिक्सटीन|सेवेनटीन|एइटीन|नाइनटीन|ट्वेन्टी|ट्वेंटी)(?![\u0900-\u097f])"; + public static readonly string ConnectNumRegex = $@"\b(({DescRegex}\s+)?(({TimePrefix}|{TimeSuffix})\s+)?{BaseDateTime.HourRegex}(?[0-5][0-9])\s*{DescRegex}|{DescRegex}\s+{BaseDateTime.HourRegex}(?[0-5][0-9]))"; + public static readonly string TimeRegexWithDotConnector = $@"(({DescRegex}\s+)?{BaseDateTime.HourRegex}(\s*\.\s*){BaseDateTime.MinuteRegex})"; + public static readonly string TimeRegex1 = $@"\b({DescRegex}\s+)?(({TimePrefix}|{TimeSuffix})\s+)?({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})(\s*|[.])({DescRegex}|{MinuteNumRegex})"; + public static readonly string TimeRegex2 = $@"\b({DescRegex}\s+)?(({TimePrefix}|{TimeSuffix})\s+)?(t)?({BaseDateTime.HourRegex}|{HindiHourRegex})(\s*)?:(\s*)?({BaseDateTime.MinuteRegex}|{HindiMinRegex})((\s*)?:(\s*)?({BaseDateTime.SecondRegex}|{HindiSecRegex}))?((\s*{DescRegex})|\b)"; + public static readonly string TimeRegex2A = $@"\b({DescRegexA}\s+)?({BaseDateTime.HourRegex}|{HindiHourRegex})(\s*)?:(\s*)?({BaseDateTime.MinuteRegex}|{HindiMinRegex})((\s*)?:(\s*)?({BaseDateTime.SecondRegex}|{HindiSecRegex}))?((\s*{DescRegexA})|\b)"; + public static readonly string TimeRegex3 = $@"\b({DescRegex}\s+)?({TimePrefix}\s+)?{BaseDateTime.HourRegex}\.{BaseDateTime.MinuteRegex}(\s*{DescRegex})"; + public static readonly string TimeRegex4 = $@"\b({DescRegex}\s+)?({TimeSuffix}\s+)?{TimePrefix}\s+{BasicTime}(\s*{DescRegex})?(\s+{TimeSuffix})?"; + public static readonly string TimeRegex5 = $@"\b({DescRegex}\s+)?{TimePrefix}\s+{BasicTime}((\s*{DescRegex})|\s+{TimePrefix}|\b)|({TimeSuffix}\s+)?({LessThanOneHour}\s+)?{BasicTime}\s+{TimePrefix}"; + public static readonly string TimeRegex6 = $@"((({DescRegex}\s+)?({TimeSuffix}\s+){BasicTime}(\s*{DescRegex})?(\s+{TimeSuffix})?)|(({DescRegex}\s+)?({TimeSuffix}\s+)?{BasicTime}(\s*{DescRegex})?(\s+{TimeSuffix})))"; + public static readonly string TimeRegex7 = $@"\b({DescRegex}\s+)?{TimeSuffixFull}(\s+{TimePrefix}\s+)?\s+{BasicTime}((\s*{DescRegex})|\b)"; + public static readonly string TimeRegex8 = $@"((?{DescRegex})\s*)?({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?\s*{TillRegex}\s*(?{DescRegex}\s*)?({HourRegex}|{PeriodHourNumRegex})(?\s*{DescRegex})?(?![/\\\-\.\d])(\s+{RangePrefixRegex})?"; + public static readonly string PureNumBetweenAnd = $@"(?{DescRegex})\s*)?(({BaseDateTime.TwoDigitHourRegex}{BaseDateTime.TwoDigitMinuteRegex})|{HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?\s*{RangeConnectorRegex}\s*(?{DescRegex}\s*)?(({BaseDateTime.TwoDigitHourRegex}{BaseDateTime.TwoDigitMinuteRegex})|{HourRegex}|{PeriodHourNumRegex})(?\s*{DescRegex})?(?![/\\\-\.\d])(\s+{RangePrefixRegex})?"; + public static readonly string SpecificTimeFromTo = $@"(?{DescRegexA})\s*)?(?(({TimeRegex2A}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegexA}))?))\s*{TillRegex}\s*(?{DescRegexA}\s*)?(?(({TimeRegex2A}|{TimeRegexWithDotConnector}(?\s*{DescRegexA}))|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegexA}))?))(?![/\\\-\.\d])(\s+{{RangePrefixRegex}})?"; + public static readonly string SpecificTimeBetweenAnd = $@"(?{DescRegexA})\s*)?(?(({TimeRegex2A}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegexA}))?))\s*{RangeConnectorRegex}\s*(?{DescRegexA}\s*)?(?(({TimeRegex2A}|{TimeRegexWithDotConnector}(?\s*{DescRegexA}))|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegexA}))?))(?![/\\\-\.\d])(\s+{RangePrefixRegex})?"; + public const string SuffixAfterRegex = @"\b(((को|तक)\s+)?(या|और)\s+(above|after|(उसके\s+)?बाद|greater)?(?!\s+than)?)\b"; + public const string PrepositionRegex = @"(?^(at|(को|के),?|on|of)?(\s+the)?$)"; + public const string LaterEarlyRegex = @"((?((को|की)\s+)?(जल्दी|तड़के|सुबह(?!\s+देर))(\s+से)?-?)|(?((को|की)\s+)?(प्रहर\s+)?देर(\s+से)?-?))"; + public const string MealTimeRegex = @"\b(?खाने\s+के\s+वक़्त\s+तक|लंचटाइम|लंच\s+के\s+समय)"; + public static readonly string UnspecificTimePeriodRegex = $@"({MealTimeRegex})"; + public static readonly string TimeOfDayRegex = $@"\b(?(रोज\s+)?{LaterEarlyRegex}\s*(सुबह|दोपहर|शाम|{AmRegex}|{RangePmRegex})(?:\s+(को|में))?|(रोज\s+)?(सुबह|दोपहर|{AmRegex}|{RangePmRegex}|संध्या\s+प्रहर)(\s+{LaterEarlyRegex})?(?:\s+(को|में))?|शाम|(काम|व्यापार)\s+के\s+(समय|घंटों)(\s+में)?)"; + public static readonly string SpecificTimeOfDayRegex = $@"\b(({StrictRelativeRegex}\s+{TimeOfDayRegex})|\b(tonight|आज(\s+रात)?))s?"; + public static readonly string TimeFollowedUnit = $@"^\s*{TimeUnitRegex}"; + public static readonly string TimeNumberCombinedWithUnit = $@"\b(?\d+(\.\d*)?){TimeUnitRegex}"; + public static readonly string[] BusinessHourSplitStrings = { @"business", @"hour", @"व्यापारिक", @"घण्टे", @"व्यापार", @"काम" }; + public const string NowRegex = @"\b(?अभी|अब|ज़ल्दी\s+से\s+ज़ल्दी|जल्द\s+से\s+जल्द|जितनी\s+जल्दी\s+हो\s+सके|यथाशीघ्र|हाल\s+ही\s+में|पहले से)"; + public const string SuffixRegex = @"^\s*(in the\s+)?(morning|सुबह|afternoon|दोपहर|दिन|evening|संध्या|शाम|सायं|सायंकाल|night|रात)\b"; + public const string NonTimeContextTokens = @"(building)"; + public const string DateTimeTimeOfDayRegex = @"\b(?morning|सुबह|afternoon|दोपहर|night|रात|evening|संध्या|शाम|सायं|सायंकाल)(?![\u0900-\u097f])"; + public static readonly string DateTimeSpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{DateTimeTimeOfDayRegex})\b|\bआज(\s+रात)?((\s+(आधी|अर्ध))?\s*रात)?)"; + public static readonly string TimeOfTodayAfterRegex = $@"^\s*(,\s*)?(in\s+)?{DateTimeSpecificTimeOfDayRegex}"; + public static readonly string TimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(at|around|लगभग|in|on))?\s*$"; + public static readonly string SimpleTimeOfTodayAfterRegex = $@"(?{DateUnitRegex}|((घं|मि)(\.|(?=[/\\.,-])))|घण्टों|घण्टे(\sऔर)?|कार्य\s?दिवस(\sऔर)?|घंटों|घंटे|घंटा|घं|आर्स|h|मिनटों|मिनट|मिन\.|min\.|min(ute)?s?|सेकंड्स|सेकंड|सेकेंड|sec(ond)?s?)"; + public const string SuffixAndRegex = @"(?\s*(और)\s+((एक)?\s+)?(?आधे|साढ़े|तिमाही)|(?साढ़े|आधे|तिमाही))"; + public const string PeriodicRegex = @"\b(?(?दैनिक|रोज़)|(?मासिक)|(?साप्ताहिक)|(?वार्षिक|ऐनुअली\s+एक\s+बार|सालाना|सालान|साल\s+में\s+एक\s+बार)|(?हफ्ते\s+में\s+दो\s+बार))"; + public static readonly string EachUnitRegex = $@"(?(प्रत्येक\s+से|हरेक|हर\s+तीसरे|हर(\s+एक)?|प्रति|रो(?=ज))(?\s+other)?\s*(?!दिन\s+(सुबह|दोपहर|संध्या|रात)){DurationUnitRegex})"; + public const string EachPrefixRegex = @"\b(?(से\s+प्रत्येक|रोजाना|रोज|डेली|हरेक|हर\s+तीसरे|हर(\s+(एक|रोज|दिन))?)\s*$)"; + public const string SetEachRegex = @"\b(?((से\s+)?प्रत्येक|प्रतिदिन|रोजाना|रोज|डेली|हरेक|हर(\s+(एक|रोज|दिन))?)\s*)"; + public const string SetLastRegex = @"(?निम्नलिखित|अगले|आगामी|इस|पिछले|पिछले|पिछले|वर्तमान)"; + public const string EachDayRegex = @"\s*(हर दिन|रोज़|रोज|प्रतिदिन)\b"; + public const string EachDayRegexPrefix = @"\b(हर दिन|रोज़|प्रतिदिन)\s*$"; + public static readonly string DurationFollowedUnit = $@"(^\s*{DurationUnitRegex}\s+{SuffixAndRegex})|(^\s*{SuffixAndRegex}?(\s+|-)?{DurationUnitRegex})"; + public static readonly string NumberCombinedWithDurationUnit = $@"\b(?\d+(\.\d*)?)(-)?{DurationUnitRegex}"; + public static readonly string AnUnitRegex = $@"\b((?आधे|आधा)|एक(\sऔर)?)\s+{DurationUnitRegex}"; + public const string DuringRegex = @"\b(?साल|महीनों|महीना|(?!सप्ताहांत)सप्ताह|हफ़्ते|हफ्ते|दिन|घंटे|घंटा|आर्स)\s+(के लिए|के लिये|दौरान)"; + public const string AllRegex = @"\b((?(all|full|whole|पूरे|पूरा|सारा|सारे)(\s+|-))(?year|वर्ष|साल|month|माह|महीनों|महीना|महीने|week|सप्ताह|हफ्ते|हफ़्ते|हफ्ता|day|दिन)(\s+भर)?|(?year|वर्ष|साल|month|माह|महीनों|महीना|महीने|week|सप्ताह|हफ्ते|हफ़्ते|हफ्ता|day|दिन)(?\s+भर))"; + public const string HalfRegex = @"((an?\s*)|\b)(?(साढ़े|आधे|आधा)\s+(?year|वर्ष|साल|month|महीनों|महीने|week|(?!सप्ताहांत)सप्ताह|हफ़्ते|हफ्ते|day|दिनों|दिन|hour|घंटे|घण्टे|घंटा|आर्स))"; + public const string ConjunctionRegex = @"\b((and(\s+for)?)|with)\b"; + public static readonly string HolidayRegex1 = $@"\b(({YearRegex}|{RelativeRegex}\s+(year|साल|वर्ष?))\s+((की|के)\s+)?)?(?mardi gras|(washington|mao)'s birthday|chinese new year|(new\s+(years'|year\s*'s|years?)\s+eve)|(new\s+(years'|year\s*'s|years?)(\s+day)?)|नया\s+साल|नए\s+साल\s+की\s+शाम|नववर्ष\s+की\s+पूर्वसंध्या|न्यू\s+इयर\s+ईव|may\s*day|yuan dan|christmas eve|((christmas|xmas|क्रिसमस)(\s+के)?(\s+वाले)?(\s+(day|दिन))?)|गांधी\s+जयंती(\sके\s?दिन)?|black friday|ब्लैक\s+फ़्राइड|yuandan|ईस्टर(\s+(संडे|रवि|सोम|शनि)(वार)?)?|clean monday|ash wednesday|palm sunday|maundy thursday|good friday|white\s+(sunday|monday)|trinity sunday|pentecost|corpus christi|cyber monday|सायबर\s+मंडे)(\s+(of\s+)?({YearRegex}|{RelativeRegex}\s+(year|साल|वर्ष?)))?"; + public static readonly string HolidayRegex2 = $@"\b(({YearRegex}|{RelativeRegex}\s+(year|साल|वर्ष?))\s+((की|के)\s+)?)?(?(thanks\s*giving|रक्षा(\s)?बंधन(\sके)?|होली|नवरा(त्रों|त्र)(\sके\s+पहले)?|वैशाखी|बैसाखी(\sहर\sसाल)?|दिवाली|all saint's|white lover|s(?:ain)?t?. (?:patrick|george)(?:')?(?:s)?|सेंट\s+पैट्रिक्स|us independence|all hallow|all souls|guy fawkes|cinco de mayo|halloween|हैलोवीन|qingming|dragon boat|april fools|tomb\s*sweeping)(\s+(day|दिन|डे))?|थैंक्स\s+गिविंग\s+के\s+दिन|थैंक्स\s*गिविंग)(\s+(of\s+)?({YearRegex}|{RelativeRegex}\s+(year|साल|वर्ष?)))?"; + public static readonly string HolidayRegex3 = $@"\b(({YearRegex}|{RelativeRegex}\s+(year|साल|वर्ष?))\s+((की|के)\s+)?)?(?(?:independence|स्वतंत्रता|स्वतंत्रता|गणतंत्र|योग|presidents(?:')?|प्रेसिडेंट्स|mlk|martin luther king( jr)?|मार्टिन\s+लूथर\s+किंग|एम\.\s+एल\.\s+के\.|canberra|ascension|columbus|tree( planting)?|arbor|labou?r|मजदूर|(अंतर्राष्ट्रीय|अंतरराष्ट्रीय)\s+मजदूर?|(international|int'l)\s+workers'?|श्रमिक|मई|mother's|mothers?|father's|फादर्स|fathers?|female|women('s)?|single|teacher'?s|youth|children|girls|lovers?|earth|पृथ्वी|inauguration|groundhog|valentine'?s|baptiste|bastille|veterans(?:')?|memorial|mid[ \-]autumn|moon|spring|lantern)\s+(day|दिवस|डे))(\s+(of\s+)?({YearRegex}|{RelativeRegex}\s+(year|साल|वर्ष?)))?"; + public const string AMTimeRegex = @"(?morning|सुबह)"; + public const string PMTimeRegex = @"\b(?afternoon|evening|night|दोपहर|शाम|रात)\b"; + public const string NowTimeRegex = @"(now|अब)"; + public const string RecentlyTimeRegex = @"(recently|previously|हाल\s+ही(\s+में)?|पहले(\s+से)?)"; + public const string AsapTimeRegex = @"(as\s+soon\s+as\s+possible|asap|ज़ल्दी\s+से\s+ज़ल्दी|जितनी\s+जल्दी\s+हो\s+सके|जल्द\s+से\s+जल्द)"; + public const string InclusiveModPrepositions = @"(?((को|में|बीच में|in|से|पर|at)\s+(अथवा|या)\s+)|(\s+(अथवा|या)\s+(को|में|बीच में|in|से|पर))|या)"; + public static readonly string BeforeRegex = $@"((({InclusiveModPrepositions}\s*)?\s*(?:(((से|के|उससे)\s+)?पहले(\s+नहीं(?!\sजा))?)|तक(?!\sया)|पहले\s+से|(उससे|के|से)\s+पूर्व|(बाद में नही|पूर्व)\s+से|ending\s+(with|on)|by|(un)?till?|(?as\s+late\s+as))(\s*{InclusiveModPrepositions})?\b)|(?)((?<\s*=)|<))(\s+the)?"; + public static readonly string AfterRegex = $@"((({InclusiveModPrepositions}\s*)?(((\s+)?के बाद|(starting|beginning)(\s+on)?(?!\sfrom)|(?>\s*=)|>))(\s+the)?"; + public const string SinceRegex = @"(?:(?:\b(?:(से|के) बाद\s+(अथवा|या)\s+के बराबर|से|starting\s+(?:from|on|with)|as\s+early\s+as|(any\s+time\s+)?from)\b)|(?=))"; + public const string AroundRegex = @"(?:\b(?:around|circa|लगभग|(के\s+)?आसपास))"; + public const string AgoRegex = @"((?(कल से पहले)|कल|आज)(\s+(से|के)\s*पहले)?|पहले)"; + public static readonly string LaterRegex = $@"\b(?:(?(कल|अब|आज)\s+से)\s+बाद|बाद(?!((\s+में)?\s*{OneWordPeriodRegex})|(\s+{TimeOfDayRegex}))|(?कल|आज) (से|बाद)|अब\s+से|के\s+बाद)"; + public const string BeforeAfterRegex = @"^[.]"; + public static readonly string ModPrefixRegex = $@"\b({RelativeRegex}|{AroundRegex}|{BeforeRegex}|{AfterRegex}|{SinceRegex})\b"; + public static readonly string ModSuffixRegex = $@"\b({AgoRegex}|{LaterRegex}|{BeforeAfterRegex}|{FutureSuffixRegex}|{PastSuffixRegex})\b"; + public const string InConnectorRegex = @"\b(में|को)"; + public static readonly string SinceYearSuffixRegex = $@"(^\s*{SinceRegex}(\s*(the\s+)?year\s*)?{YearSuffix})"; + public static readonly string WithinNextPrefixRegex = $@"\b(((?{NextPrefixRegex}?के)\s+)?(अंदर|भीतर))"; + public const string TodayNowRegex = @"\b(आज|अभी)(?![\u0900-\u097f])"; + public static readonly string MorningStartEndRegex = $@"(^(सुबह|{AmDescRegex}))|((सुबह|{AmDescRegex})$)"; + public static readonly string AfternoonStartEndRegex = $@"(^(दोपहर|{PmDescRegex}))|((दोपहर|{PmDescRegex})$)"; + public const string EveningStartEndRegex = @"(^(शाम))|((शाम)$)"; + public const string NightStartEndRegex = @"(^(रातों|आज)?\s*रात)|((over|to)?रात$)"; + public const string InexactNumberRegex = @"\b(कुछ|कई|कुछ ही|कुछेक|(?(का\s+)?(एक\s+)?जोड़ा))"; + public static readonly string InexactNumberUnitRegex = $@"({InexactNumberRegex})\s+({DurationUnitRegex})"; + public static readonly string RelativeTimeUnitRegex = $@"(?:(?:(?:{NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+({TimeUnitRegex}))|((the|my))\s+({RestrictedTimeUnitRegex}))"; + public static readonly string RelativeDurationUnitRegex = $@"(?:(?:(?<=({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+)({DurationUnitRegex}))|((the|my))\s+({RestrictedTimeUnitRegex}))"; + public static readonly string ReferenceDatePeriodRegex = $@"\b{ReferencePrefixRegex}\s+(?सप्ताहांत|सप्ताह|हफ्ते|हफ़्ते|month|महीने|year|साल|दशक|weekend|वीकेंड)"; + public const string ConnectorRegex = @"^(-|,|के\s+(लिए|लिये)\s+|t|लगभग|तारीख\s+को|around|@)$"; + public const string FromTokenRegex = @"\bसे\b"; + public const string BetweenTokenRegex = @"\bबीच$"; + public const string PluralTokenRegex = @"रोज"; + public const string FromToRegex = @"\b(से|अब से).+(से|to)\b.+"; + public const string SingleAmbiguousMonthRegex = @"^(यह\s+)?(मे|मार्च)$"; + public const string SingleAmbiguousTermsRegex = @"^(यह\s+)?(day|(?!सप्ताहांत)सप्ताह|हफ्ते|हफ़्ते|month|year)$"; + public const string UnspecificDatePeriodRegex = @"^(सप्ताहांत|सप्ताह|हफ्ते|हफ़्ते|month|year)$"; + public const string PrepositionSuffixRegex = @"\b(on|in|at|around|से|अब से|to|लगभग)$"; + public const string WrittenDayRegex = @"(?सात|दूसरी|आठ|फ़ोर|चार|पांच|पाँच|नौ|दो|छह|एक(सठ)?|तीन|सत्रह|तेरह|चौदह|अठारह|उन्नीस|पंद्रह|सोलह|ग्यारह|बारह|दस|सत्ताईस|तेईस|चौबीस|अट्ठाईस|अट्ठाइस|उनतीस|पच्चीस|छब्बीस|इक्कीस|बाईस|बीस|तीस|इकत्तीस|इकतीस)(?![\u0900-\u097f])"; + public static readonly string FlexibleDayRegex = $@"(?({WeekDayRegex}+\s+)?({WrittenDayRegex}|(उस\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))))(?:ला|ली|रा|था|वां|वीं|वें|वाँ|वा|ठा|th|nd|rd|st)?"; + public static readonly string ForTheRegex = $@"\b(((उस\s+)?{FlexibleDayRegex}(?=\s+(तारीख\s+)?(के|को))|((यह\s+)?{FlexibleDayRegex}(?<=(?:ला|रा|था|वां|वीं|वें|वाँ|वा|ठा))(?<=\s+को\s+)))(?\s*(,|\.|!|\?|$))?)"; + public static readonly string WeekDayAndDayOfMonthRegex = $@"\b({WeekDayRegex}\s+(से\s+)?({FlexibleDayRegex})|{FlexibleDayRegex}(\s+तारीख)?\s+{WeekDayRegex})"; + public static readonly string WeekDayAndDayRegex = $@"\b{WeekDayRegex}\s+(से\s+)?(?!(the)){DayRegex}(?!([-:]|(\s+({AmDescRegex}|{PmDescRegex}|{OclockRegex}))))\b"; + public const string RestOfDateRegex = @"\b(बाकी\s+(के\s+)?(?(?!सप्ताहांत)(?(?!सप्ताहांत)(?(दिन|तारीख|समय))(\s+आज)?|\b((इस|उस|the|my|this|चालू|आज)\s+)?(?(दिन|तारीख|समय))((\s+)?(के\s+बाकी\s+बचे\sभाग))"; + public const string AmbiguousRangeModifierPrefix = @"^[.]"; + public static readonly string NumberEndingPattern = $@"^(?:\s+((वाले|वाला)\s+)?(?meeting|appointment|अपॉइंटमेंट|conference|call|skype call)\s+(को|से)\s+(?{PeriodHourNumRegex}|{HourRegex})([\.]?$|(\.,|,|!|\?)|\s*में\s*बदल\s*देती\s*हूँ\s*))"; + public const string OneOnOneRegex = @"\b(1\s*:\s*1(?!\d))|(one (on )?one|one\s*-\s*one|one\s*:\s*one)\b"; + public static readonly string LaterEarlyPeriodRegex = $@"\b((?{OneWordPeriodRegex})|({UnspecificEndOfRangeRegex}))\s*({PrefixPeriodRegex})"; + public static readonly string WeekWithWeekDayRangeRegex = $@"\b(((?({NextPrefixRegex}|{PreviousPrefixRegex}|इस)\s+((?!सप्ताहांत)सप्ताह|हफ़्ते|हफ्ते))((\s+between\s+{WeekDayRegex}\s+and\s+{WeekDayRegex})|(\s+{WeekDayRegex}\s+और\s+{WeekDayRegex}\s+के\s+बीच)|(\s+{WeekDayRegex}\s+से\s+{WeekDayRegex})))|(((between\s+{WeekDayRegex}\s+and\s+{WeekDayRegex})|({WeekDayRegex}\s+(और|से)\s+{WeekDayRegex}\s+(के\s+बीच|तक))|(from\s+{WeekDayRegex}\s+to\s+{WeekDayRegex}))\s+(?({NextPrefixRegex}|{PreviousPrefixRegex}|this)\s+((?!सप्ताहांत)सप्ताह|हफ़्ते|हफ्ते))))"; + public const string GeneralEndingRegex = @"^\s*((\.,)|\.|,|!|\?)?\s*$"; + public const string MiddlePauseRegex = @"\s*(,)\s*"; + public const string DurationConnectorRegex = @"^\s*(?\s+|and|और|,)\s*$"; + public const string PrefixArticleRegex = @"\bthe\s+"; + public const string OrRegex = @"\s*((\b|,\s*)(or|and)\b|,)\s*"; + public static readonly string SpecialYearTermsRegex = $@"\b((({SpecialYearPrefixes}\s+)?(year|साल))|(cy|(?fy|sy)))"; + public static readonly string YearPlusNumberRegex = $@"\b({SpecialYearTermsRegex}\s*((?(\d{{2,4}}))|{FullTextYearRegex}))\b"; + public static readonly string NumberAsTimeRegex = $@"\b({WrittenTimeRegex}|{PeriodHourNumRegex}|{BaseDateTime.HourRegex})\b"; + public static readonly string TimeBeforeAfterRegex = $@"\b(((?<=\b(before|no later than|by|after)\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}|{MidTimeRegex}))|{MidTimeRegex})\b"; + public const string DateNumberConnectorRegex = @"^\s*(?\s+(at|को))\s*$"; + public const string DecadeRegex = @"(?((?:दस|बीस|तीस|चालीस|पचास|साठ|सत्तर|अस्सी|नब्बे)|(दो\s+हजार)?)(\s+के\s+दशक)|इस\s+सदी\s+के\s+पहले\s+दशक|20वीं\s+सदी\s+के\s+पहले\s+दशक)"; + public static readonly string DecadeWithCenturyRegex = $@"(((?\d|1\d|2\d)?(?\d0)(\s+के दशक))|(({CenturyRegex}(\s+|-)(और\s+)?)?{DecadeRegex})|({CenturyRegex}(\s+|-)(और\s+)?(?दसवें|सौवां)))"; + public static readonly string RelativeDecadeRegex = $@"\b({RelativeRegex}\s+((?[\w,\u0900-\u097f]+)\s+)?(\s+के\s+)?(दशकों?|दशक))"; + public static readonly string YearPeriodRegex = $@"((((from|during|in)\s+)?{YearRegex}\s*({TillRegex})\s*{YearRegex})|(((between)\s+){YearRegex}\s*({RangeConnectorRegex})\s*{YearRegex}))"; + public static readonly string StrictTillRegex = $@"(?\b(से)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s+of|\s*,\s*))))"; + public static readonly string StrictRangeConnectorRegex = $@"(?\b(और|से)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s+of|\s*,\s*))))"; + public static readonly string ComplexDatePeriodRegex = $@"(?:(?.+)\s*({StrictTillRegex})\s*(?.+)(\s+(तक))|(?.+)\s*({StrictRangeConnectorRegex})\s*(?.+)(\s+(के बीच)))"; + public static readonly string FailFastRegex = $@"{BaseDateTime.DeltaMinuteRegex}|\b(?:{BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex})|{BaseDateTime.BaseAmPmDescRegex}|\b(?:zero|{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}|{WrittenMonthRegex}|{SeasonDescRegex}|{DecadeRegex}|centur(y|ies)|weekends?|quarters?|hal(f|ves)|yesterday|to(morrow|day|night)|tmr|noonish|\d(-|——)?ish|((the\s+\w*)|\d)(th|rd|nd|st)|(mid\s*(-\s*)?)?(night|morning|afternoon|day)s?|evenings?||noon|lunch(time)?|dinner(time)?|(day|night)time|overnight|dawn|dusk|sunset|hours?|hrs?|h|घण्टे?|minutes?|मि\.?|mins?|seconds?|secs?|eo[dmy]|mardi[ -]?gras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|cinco de mayo|all (hallow|souls)|guy fawkes|(st )?patrick|hundreds?|noughties|aughts|thousands?)\b|{WeekDayRegex}|{SetWeekDayRegex}|{NowRegex}|{PeriodicRegex}|\b({DateUnitRegex}|{ImplicitDayRegex})"; + public static readonly Dictionary UnitMap = new Dictionary + { + { @"दशकों", @"10Y" }, + { @"दशक", @"10Y" }, + { @"साल", @"Y" }, + { @"वर्षों", @"Y" }, + { @"वर्ष", @"Y" }, + { @"महीनों", @"MON" }, + { @"महीना", @"MON" }, + { @"महीने", @"MON" }, + { @"माह", @"MON" }, + { @"पखवाड़ा", @"2W" }, + { @"पखवाड़े", @"2W" }, + { @"पखवाड़े", @"2W" }, + { @"हफ़्तों", @"W" }, + { @"सप्ताह", @"W" }, + { @"हफ़्ते", @"W" }, + { @"हफ्ते", @"W" }, + { @"हफ्ता", @"W" }, + { @"हफ़्ता", @"W" }, + { @"हफ्तों", @"W" }, + { @"दिन", @"D" }, + { @"दिनों", @"D" }, + { @"ज", @"D" }, + { @"hours", @"H" }, + { @"घंटे", @"H" }, + { @"घंटा", @"H" }, + { @"hour", @"H" }, + { @"घण्टे", @"H" }, + { @"घण्टों", @"H" }, + { @"घंटों", @"H" }, + { @"hrs", @"H" }, + { @"आर्स", @"H" }, + { @"hr", @"H" }, + { @"h", @"H" }, + { @"घं", @"H" }, + { @"घं.", @"H" }, + { @"minutes", @"M" }, + { @"मिनटों", @"M" }, + { @"minute", @"M" }, + { @"मिनट", @"M" }, + { @"मि", @"M" }, + { @"मि.", @"M" }, + { @"मिन", @"M" }, + { @"mins", @"M" }, + { @"min", @"M" }, + { @"seconds", @"S" }, + { @"second", @"S" }, + { @"सेकंड", @"S" }, + { @"सेकेंड", @"S" }, + { @"secs", @"S" }, + { @"sec", @"S" }, + { @"सेकंड्स", @"S" } + }; + public static readonly Dictionary UnitValueMap = new Dictionary + { + { @"दशकों", 315360000 }, + { @"दशक", 315360000 }, + { @"साल", 31536000 }, + { @"वर्षों", 31536000 }, + { @"वर्ष", 31536000 }, + { @"महीना", 2592000 }, + { @"महीने", 2592000 }, + { @"महीनों", 2592000 }, + { @"माह", 2592000 }, + { @"पखवाड़ा", 1209600 }, + { @"पखवाड़े", 1209600 }, + { @"पखवाड़े", 1209600 }, + { @"हफ़्तों", 604800 }, + { @"सप्ताह", 604800 }, + { @"हफ्ते", 604800 }, + { @"हफ़्ते", 604800 }, + { @"हफ्ता", 604800 }, + { @"हफ़्ता", 604800 }, + { @"हफ्तों", 604800 }, + { @"दिन", 86400 }, + { @"दिनों", 86400 }, + { @"दिवस", 86400 }, + { @"ज", 86400 }, + { @"hours", 3600 }, + { @"घंटे", 3600 }, + { @"घंटा", 3600 }, + { @"hour", 3600 }, + { @"घण्टे", 3600 }, + { @"घण्टों", 3600 }, + { @"घंटों", 3600 }, + { @"hrs", 3600 }, + { @"आर्स", 3600 }, + { @"hr", 3600 }, + { @"h", 3600 }, + { @"घं", 3600 }, + { @"घं.", 3600 }, + { @"minutes", 60 }, + { @"मिनटों", 60 }, + { @"minute", 60 }, + { @"मिनट", 60 }, + { @"मि", 60 }, + { @"मि.", 60 }, + { @"मिन", 60 }, + { @"mins", 60 }, + { @"min", 60 }, + { @"seconds", 1 }, + { @"second", 1 }, + { @"सेकंड", 1 }, + { @"सेकेंड", 1 }, + { @"सेकंड्स", 1 }, + { @"secs", 1 }, + { @"sec", 1 } + }; + public static readonly Dictionary SpecialYearPrefixesMap = new Dictionary + { + { @"fiscal", @"FY" }, + { @"school", @"SY" }, + { @"fy", @"FY" }, + { @"sy", @"SY" } + }; + public static readonly Dictionary SeasonMap = new Dictionary + { + { @"spring", @"SP" }, + { @"वसंत", @"SP" }, + { @"summer", @"SU" }, + { @"गर्मी", @"SU" }, + { @"गर्मियों", @"SU" }, + { @"fall", @"FA" }, + { @"autumn", @"FA" }, + { @"शरद\s+ऋतु", @"FA" }, + { @"winter", @"WI" }, + { @"सर्दियों", @"WI" } + }; + public static readonly Dictionary SeasonValueMap = new Dictionary + { + { @"SP", 3 }, + { @"SU", 6 }, + { @"FA", 9 }, + { @"WI", 12 } + }; + public static readonly Dictionary CardinalMap = new Dictionary + { + { @"पहला", 1 }, + { @"पहली", 1 }, + { @"पहले", 1 }, + { @"1st", 1 }, + { @"दूसरा", 2 }, + { @"दूसरे", 2 }, + { @"दूसरी", 2 }, + { @"2nd", 2 }, + { @"तीसरा", 3 }, + { @"तीसरे", 3 }, + { @"तीसरी", 3 }, + { @"3rd", 3 }, + { @"चौथा", 4 }, + { @"चौथी", 4 }, + { @"4th", 4 }, + { @"पाँचवाँ", 5 }, + { @"पांचवां", 5 }, + { @"5th", 5 }, + { @"first", 1 }, + { @"second", 2 }, + { @"third", 3 }, + { @"fourth", 4 }, + { @"fifth", 5 } + }; + public static readonly Dictionary DayOfWeek = new Dictionary + { + { @"सोमवार", 1 }, + { @"मंगलवार", 2 }, + { @"म्गलवार", 2 }, + { @"बुधवार", 3 }, + { @"गुरुवार", 4 }, + { @"वृहस्पतिवार", 4 }, + { @"बृहस्पतिवार", 4 }, + { @"वीरवार", 4 }, + { @"शुक्रवार", 5 }, + { @"शनिवार", 6 }, + { @"रविवार", 0 }, + { @"इतवार", 0 }, + { @"एतवार", 0 }, + { @"सोम", 1 }, + { @"मंडे", 1 }, + { @"मंगल", 2 }, + { @"म्गल", 2 }, + { @"बुध", 3 }, + { @"गुरु", 4 }, + { @"बीफ़े", 4 }, + { @"शुक्र", 5 }, + { @"शनि", 6 }, + { @"रवि", 0 }, + { @"संडे", 0 }, + { @"monday", 1 }, + { @"tuesday", 2 }, + { @"wednesday", 3 }, + { @"thursday", 4 }, + { @"friday", 5 }, + { @"saturday", 6 }, + { @"sunday", 0 }, + { @"mon", 1 }, + { @"tue", 2 }, + { @"tues", 2 }, + { @"wed", 3 }, + { @"wedn", 3 }, + { @"weds", 3 }, + { @"thu", 4 }, + { @"thur", 4 }, + { @"thurs", 4 }, + { @"fri", 5 }, + { @"sat", 6 }, + { @"sun", 0 } + }; + public static readonly Dictionary MonthOfYear = new Dictionary + { + { @"जनवरी", 1 }, + { @"फ़रवरी", 2 }, + { @"फरवरी", 2 }, + { @"फेब्रूवरी", 2 }, + { @"मार्च", 3 }, + { @"अप्रैल", 4 }, + { @"अप्रील", 4 }, + { @"मई", 5 }, + { @"जून", 6 }, + { @"जुलाई", 7 }, + { @"अगस्त", 8 }, + { @"सितम्बर", 9 }, + { @"सितंबर", 9 }, + { @"अक्तूबर", 10 }, + { @"अक्टूबर", 10 }, + { @"अक्टू", 10 }, + { @"नवंबर", 11 }, + { @"दिसंबर", 12 }, + { @"दिसम्बर", 12 }, + { @"जन", 1 }, + { @"जन.", 1 }, + { @"फ़ेब", 2 }, + { @"फ़ेब.", 2 }, + { @"फेब", 2 }, + { @"फेब.", 2 }, + { @"फ़र", 2 }, + { @"फ़र.", 2 }, + { @"फर", 2 }, + { @"फर.", 2 }, + { @"मा.", 3 }, + { @"मा", 3 }, + { @"अप्र.", 4 }, + { @"अप्र", 4 }, + { @"जू.", 6 }, + { @"जु.", 7 }, + { @"जुल", 7 }, + { @"जुल.", 7 }, + { @"अग", 8 }, + { @"अग.", 8 }, + { @"सित", 9 }, + { @"सित.", 9 }, + { @"सेप्ट", 9 }, + { @"सेप्ट.", 9 }, + { @"सितं", 9 }, + { @"सितं.", 9 }, + { @"आक्ट.", 10 }, + { @"आक्ट", 10 }, + { @"अक्टू.", 10 }, + { @"नवं", 11 }, + { @"नवं.", 11 }, + { @"नव", 11 }, + { @"नव.", 11 }, + { @"दिस", 12 }, + { @"दिस.", 12 }, + { @"january", 1 }, + { @"february", 2 }, + { @"march", 3 }, + { @"april", 4 }, + { @"may", 5 }, + { @"june", 6 }, + { @"july", 7 }, + { @"august", 8 }, + { @"september", 9 }, + { @"october", 10 }, + { @"november", 11 }, + { @"december", 12 }, + { @"jan", 1 }, + { @"feb", 2 }, + { @"mar", 3 }, + { @"apr", 4 }, + { @"jun", 6 }, + { @"jul", 7 }, + { @"aug", 8 }, + { @"sep", 9 }, + { @"sept", 9 }, + { @"oct", 10 }, + { @"nov", 11 }, + { @"dec", 12 }, + { @"1", 1 }, + { @"2", 2 }, + { @"3", 3 }, + { @"4", 4 }, + { @"5", 5 }, + { @"6", 6 }, + { @"7", 7 }, + { @"8", 8 }, + { @"9", 9 }, + { @"10", 10 }, + { @"11", 11 }, + { @"12", 12 }, + { @"01", 1 }, + { @"02", 2 }, + { @"03", 3 }, + { @"04", 4 }, + { @"05", 5 }, + { @"06", 6 }, + { @"07", 7 }, + { @"08", 8 }, + { @"09", 9 } + }; + public static readonly Dictionary Numbers = new Dictionary + { + { @"शून्य", 0 }, + { @"एक", 1 }, + { @"दो", 2 }, + { @"तीन", 3 }, + { @"फ़ोर", 4 }, + { @"चार", 4 }, + { @"पाँच", 5 }, + { @"पांच", 5 }, + { @"छह", 6 }, + { @"सात", 7 }, + { @"आठ", 8 }, + { @"नौ", 9 }, + { @"दस", 10 }, + { @"ग्यारह", 11 }, + { @"बारह", 12 }, + { @"दर्जन", 12 }, + { @"दर्जनों", 12 }, + { @"डज़न", 12 }, + { @"तेरह", 13 }, + { @"चौदह", 14 }, + { @"पंद्रह", 15 }, + { @"सोलह", 16 }, + { @"सत्रह", 17 }, + { @"अठारह", 18 }, + { @"उन्नीस", 19 }, + { @"बीस", 20 }, + { @"इक्कीस", 21 }, + { @"बाईस", 22 }, + { @"बाइस", 22 }, + { @"तेईस", 23 }, + { @"तेइस", 23 }, + { @"चौबीस", 24 }, + { @"पच्चीस", 25 }, + { @"छब्बीस", 26 }, + { @"सत्ताईस", 27 }, + { @"सत्ताइस", 27 }, + { @"अट्ठाईस", 28 }, + { @"अट्ठाइस", 28 }, + { @"उनतीस", 29 }, + { @"तीस", 30 }, + { @"इकतीस", 31 }, + { @"इकत्तीस", 31 }, + { @"बत्तीस", 32 }, + { @"तैंतीस", 33 }, + { @"चौंतीस", 34 }, + { @"पैंतीस", 35 }, + { @"पैंतीसवां", 35 }, + { @"छ्त्तीस", 36 }, + { @"सैंतीस", 37 }, + { @"अड़तीस", 38 }, + { @"उनतालीस", 39 }, + { @"चालीस", 40 }, + { @"इकतालीस", 41 }, + { @"बयालीस", 42 }, + { @"तैंतालीस", 43 }, + { @"चौंतालीस", 44 }, + { @"पैंतालीस", 45 }, + { @"पैंतालिस", 45 }, + { @"पेंतालिस", 45 }, + { @"छियालीस", 46 }, + { @"सैंतालीस", 47 }, + { @"अड़तालीस", 48 }, + { @"उनचास", 49 }, + { @"पचास", 50 }, + { @"इक्याबन", 51 }, + { @"बावन", 52 }, + { @"तिरेपन", 53 }, + { @"चौबन", 54 }, + { @"पचपन", 55 }, + { @"छप्पन", 56 }, + { @"सत्तावन", 57 }, + { @"अट्ठावन", 58 }, + { @"उनसठ", 59 }, + { @"साठ", 60 }, + { @"एकसठ", 61 }, + { @"इकसठ", 61 }, + { @"बासठ", 62 }, + { @"तिरसठ", 63 }, + { @"चौंसठ", 64 }, + { @"पैंसठ", 65 }, + { @"छियासठ", 66 }, + { @"सड़सठ", 67 }, + { @"सड़सठ", 67 }, + { @"अड़सठ", 68 }, + { @"उनहत्तर", 69 }, + { @"सत्तर", 70 }, + { @"इकहत्तर", 71 }, + { @"बहत्तर", 72 }, + { @"तिहत्तर", 73 }, + { @"चौहत्तर", 74 }, + { @"पचहत्तर", 75 }, + { @"छिहत्तर", 76 }, + { @"सतहत्तर", 77 }, + { @"अठहत्तर", 78 }, + { @"उनासी", 79 }, + { @"अस्सी", 80 }, + { @"इक्यासी", 81 }, + { @"बयासी", 82 }, + { @"तिरासी", 83 }, + { @"चौरासी", 84 }, + { @"पचासी", 85 }, + { @"छियासी", 86 }, + { @"सतासी", 87 }, + { @"अठासी", 88 }, + { @"नवासी", 89 }, + { @"नब्बे", 90 }, + { @"इक्यानबे", 91 }, + { @"बानवे", 92 }, + { @"तिरानवे", 93 }, + { @"चौरानवे", 94 }, + { @"पचानवे", 95 }, + { @"पंचानबे", 95 }, + { @"छियानवे", 96 }, + { @"सत्तानवे", 97 }, + { @"अट्ठानवे", 98 }, + { @"निन्यानवे", 99 }, + { @"सौ", 100 }, + { @"वन", 1 }, + { @"टू", 2 }, + { @"थ्री", 3 }, + { @"फोर", 4 }, + { @"फ़ाइव", 5 }, + { @"सिक्स", 6 }, + { @"सेवन", 7 }, + { @"एइट", 8 }, + { @"नैन", 9 }, + { @"टेन", 10 }, + { @"इलेवन", 11 }, + { @"ट्वेल्व", 12 }, + { @"थर्टीन", 13 }, + { @"फ़ोर्टीन", 14 }, + { @"फ़िफ़्टीन", 15 }, + { @"सिक्सटीन", 16 }, + { @"सेवेनटीन", 17 }, + { @"एइटीन", 18 }, + { @"नईनटीन", 19 }, + { @"ट्वेन्टी", 20 }, + { @"ट्वेंटी", 20 }, + { @"थर्टी", 30 }, + { @"फ़ोर्टी", 40 }, + { @"फ़िफ़्टी", 50 }, + { @"सिक्सटी", 60 }, + { @"सेवेंटी", 70 }, + { @"एइट्टी", 80 }, + { @"नैनटी", 90 }, + { @"०", 0 }, + { @"१", 1 }, + { @"२", 2 }, + { @"३", 3 }, + { @"४", 4 }, + { @"५", 5 }, + { @"६", 6 }, + { @"७", 7 }, + { @"८", 8 }, + { @"९", 9 }, + { @"००", 0 }, + { @"०१", 1 }, + { @"०२", 2 }, + { @"०३", 3 }, + { @"०४", 4 }, + { @"०५", 5 }, + { @"०६", 6 }, + { @"०७", 7 }, + { @"०८", 8 }, + { @"०९", 9 }, + { @"१०", 10 }, + { @"११", 11 }, + { @"१२", 12 }, + { @"१३", 13 }, + { @"१४", 14 }, + { @"१५", 15 }, + { @"१६", 16 }, + { @"१७", 17 }, + { @"१८", 18 }, + { @"१९", 19 }, + { @"२०", 20 }, + { @"२१", 21 }, + { @"२२", 22 }, + { @"२३", 23 }, + { @"२४", 24 }, + { @"२५", 25 }, + { @"२६", 26 }, + { @"२७", 27 }, + { @"२८", 28 }, + { @"२९", 29 }, + { @"३०", 30 }, + { @"३१", 31 }, + { @"३२", 32 }, + { @"३३", 33 }, + { @"३४", 34 }, + { @"३५", 35 }, + { @"३६", 36 }, + { @"३७", 37 }, + { @"३८", 38 }, + { @"३९", 39 }, + { @"४०", 40 }, + { @"४१", 41 }, + { @"४२", 42 }, + { @"४३", 43 }, + { @"४४", 44 }, + { @"४५", 45 }, + { @"४६", 46 }, + { @"४७", 47 }, + { @"४८", 48 }, + { @"४९", 49 }, + { @"५०", 50 }, + { @"५१", 51 }, + { @"५२", 52 }, + { @"५३", 53 }, + { @"५४", 54 }, + { @"५५", 55 }, + { @"५६", 56 }, + { @"५७", 57 }, + { @"५८", 58 }, + { @"५९", 59 }, + { @"६०", 60 }, + { @"६१", 61 }, + { @"६२", 62 }, + { @"६३", 63 }, + { @"६४", 64 }, + { @"६५", 65 }, + { @"६६", 66 }, + { @"६७", 67 }, + { @"६८", 68 }, + { @"६९", 69 }, + { @"७०", 70 }, + { @"७१", 71 }, + { @"७२", 72 }, + { @"७३", 73 }, + { @"७४", 74 }, + { @"७५", 75 }, + { @"७६", 76 }, + { @"७७", 77 }, + { @"७८", 78 }, + { @"७९", 79 }, + { @"८०", 80 }, + { @"८१", 81 }, + { @"८२", 82 }, + { @"८३", 83 }, + { @"८४", 84 }, + { @"८५", 85 }, + { @"८६", 86 }, + { @"८७", 87 }, + { @"८८", 88 }, + { @"८९", 89 }, + { @"९०", 90 }, + { @"९१", 91 }, + { @"९२", 92 }, + { @"९३", 93 }, + { @"९४", 94 }, + { @"९५", 95 }, + { @"९६", 96 }, + { @"९७", 97 }, + { @"९८", 98 }, + { @"९९", 99 }, + { @"१००", 100 }, + { @"zero", 0 }, + { @"one", 1 }, + { @"a", 1 }, + { @"an", 1 }, + { @"two", 2 }, + { @"three", 3 }, + { @"four", 4 }, + { @"five", 5 }, + { @"six", 6 }, + { @"seven", 7 }, + { @"eight", 8 }, + { @"nine", 9 }, + { @"ten", 10 }, + { @"eleven", 11 }, + { @"twelve", 12 }, + { @"thirteen", 13 }, + { @"fourteen", 14 }, + { @"fifteen", 15 }, + { @"sixteen", 16 }, + { @"seventeen", 17 }, + { @"eighteen", 18 }, + { @"nineteen", 19 }, + { @"twenty", 20 }, + { @"twenty one", 21 }, + { @"twenty two", 22 }, + { @"twenty three", 23 }, + { @"twenty four", 24 }, + { @"twenty five", 25 }, + { @"twenty six", 26 }, + { @"twenty seven", 27 }, + { @"twenty eight", 28 }, + { @"twenty nine", 29 }, + { @"thirty", 30 }, + { @"thirty one", 31 }, + { @"thirty two", 32 }, + { @"thirty three", 33 }, + { @"thirty four", 34 }, + { @"thirty five", 35 }, + { @"thirty six", 36 }, + { @"thirty seven", 37 }, + { @"thirty eight", 38 }, + { @"thirty nine", 39 }, + { @"forty", 40 }, + { @"forty one", 41 }, + { @"forty two", 42 }, + { @"forty three", 43 }, + { @"forty four", 44 }, + { @"forty five", 45 }, + { @"forty six", 46 }, + { @"forty seven", 47 }, + { @"forty eight", 48 }, + { @"forty nine", 49 }, + { @"fifty", 50 }, + { @"fifty one", 51 }, + { @"fifty two", 52 }, + { @"fifty three", 53 }, + { @"fifty four", 54 }, + { @"fifty five", 55 }, + { @"fifty six", 56 }, + { @"fifty seven", 57 }, + { @"fifty eight", 58 }, + { @"fifty nine", 59 }, + { @"sixty", 60 }, + { @"sixty one", 61 }, + { @"sixty two", 62 }, + { @"sixty three", 63 }, + { @"sixty four", 64 }, + { @"sixty five", 65 }, + { @"sixty six", 66 }, + { @"sixty seven", 67 }, + { @"sixty eight", 68 }, + { @"sixty nine", 69 }, + { @"seventy", 70 }, + { @"seventy one", 71 }, + { @"seventy two", 72 }, + { @"seventy three", 73 }, + { @"seventy four", 74 }, + { @"seventy five", 75 }, + { @"seventy six", 76 }, + { @"seventy seven", 77 }, + { @"seventy eight", 78 }, + { @"seventy nine", 79 }, + { @"eighty", 80 }, + { @"eighty one", 81 }, + { @"eighty two", 82 }, + { @"eighty three", 83 }, + { @"eighty four", 84 }, + { @"eighty five", 85 }, + { @"eighty six", 86 }, + { @"eighty seven", 87 }, + { @"eighty eight", 88 }, + { @"eighty nine", 89 }, + { @"ninety", 90 }, + { @"ninety one", 91 }, + { @"ninety two", 92 }, + { @"ninety three", 93 }, + { @"ninety four", 94 }, + { @"ninety five", 95 }, + { @"ninety six", 96 }, + { @"ninety seven", 97 }, + { @"ninety eight", 98 }, + { @"ninety nine", 99 }, + { @"one hundred", 100 } + }; + public static readonly Dictionary DayOfMonth = new Dictionary + { + { @"1st", 1 }, + { @"2nd", 2 }, + { @"3rd", 3 }, + { @"4th", 4 }, + { @"5th", 5 }, + { @"6th", 6 }, + { @"7th", 7 }, + { @"8th", 8 }, + { @"9th", 9 }, + { @"10th", 10 }, + { @"11th", 11 }, + { @"11st", 11 }, + { @"12th", 12 }, + { @"12nd", 12 }, + { @"13th", 13 }, + { @"13rd", 13 }, + { @"14th", 14 }, + { @"15th", 15 }, + { @"16th", 16 }, + { @"17th", 17 }, + { @"18th", 18 }, + { @"19th", 19 }, + { @"20th", 20 }, + { @"21st", 21 }, + { @"21th", 21 }, + { @"22nd", 22 }, + { @"22th", 22 }, + { @"23rd", 23 }, + { @"23th", 23 }, + { @"24th", 24 }, + { @"25th", 25 }, + { @"26th", 26 }, + { @"27th", 27 }, + { @"28th", 28 }, + { @"29th", 29 }, + { @"30th", 30 }, + { @"31st", 31 }, + { @"01st", 1 }, + { @"02nd", 2 }, + { @"03rd", 3 }, + { @"04th", 4 }, + { @"05th", 5 }, + { @"06th", 6 }, + { @"07th", 7 }, + { @"08th", 8 }, + { @"09th", 9 } + }; + public static readonly Dictionary DoubleNumbers = new Dictionary + { + { @"half", 0.5 }, + { @"साढ़े", 0.5 }, + { @"साढ़े", 0.5 }, + { @"quarter", 0.25 }, + { @"तिमाही", 0.25 }, + { @"ढाई", 2.5 } + }; + public static readonly Dictionary> HolidayNames = new Dictionary> + { + { @"indianindependence", new string[] { @"स्वतंत्रतादिवस", @"स्वतंत्रता", @"आजादी" } }, + { @"republicday", new string[] { @"गणतंत्रदिवस", @"गणतंत्र", @"गणतंत्रता" } }, + { @"yogaday", new string[] { @"योगदिवस", @"योग" } }, + { @"holi", new string[] { @"होली" } }, + { @"diwali", new string[] { @"दिवाली" } }, + { @"gandhijayanti", new string[] { @"गांधीजयंती", @"गांधीजयंतीकेदिन" } }, + { @"easterday", new string[] { @"easterday", @"easter", @"eastersunday", @"ईस्टरसंडे" } }, + { @"ashwednesday", new string[] { @"ashwednesday" } }, + { @"palmsunday", new string[] { @"palmsunday" } }, + { @"maundythursday", new string[] { @"maundythursday" } }, + { @"goodfriday", new string[] { @"goodfriday" } }, + { @"eastersaturday", new string[] { @"eastersaturday" } }, + { @"eastermonday", new string[] { @"eastermonday" } }, + { @"ascensionday", new string[] { @"ascensionday" } }, + { @"whitesunday", new string[] { @"whitesunday", @"pentecost", @"pentecostday" } }, + { @"whitemonday", new string[] { @"whitemonday" } }, + { @"trinitysunday", new string[] { @"trinitysunday" } }, + { @"corpuschristi", new string[] { @"corpuschristi" } }, + { @"earthday", new string[] { @"earthday", @"पृथ्वीदिवस" } }, + { @"fathers", new string[] { @"fatherday", @"fathersday", @"फादर्सडे" } }, + { @"mothers", new string[] { @"motherday", @"mothersday" } }, + { @"thanksgiving", new string[] { @"thanksgivingday", @"thanksgiving", @"थैंक्सगिविंगकेदिन", @"थैंक्सगिविंग" } }, + { @"blackfriday", new string[] { @"blackfriday", @"ब्लैकफ़्राइड" } }, + { @"cybermonday", new string[] { @"cybermonday", @"सायबरमंडे" } }, + { @"martinlutherking", new string[] { @"mlkday", @"martinlutherkingday", @"martinlutherkingjrday", @"मार्टिनलूथरकिंगदिवस", @"एमएलकेदिवस" } }, + { @"washingtonsbirthday", new string[] { @"washingtonsbirthday", @"washingtonbirthday", @"presidentsday", @"प्रेसिडेंट्सडे" } }, + { @"canberra", new string[] { @"canberraday" } }, + { @"labour", new string[] { @"labourday", @"laborday" } }, + { @"columbus", new string[] { @"columbusday" } }, + { @"memorial", new string[] { @"memorialday" } }, + { @"yuandan", new string[] { @"yuandan" } }, + { @"maosbirthday", new string[] { @"maosbirthday" } }, + { @"teachersday", new string[] { @"teachersday", @"teacherday" } }, + { @"singleday", new string[] { @"singleday" } }, + { @"allsaintsday", new string[] { @"allsaintsday" } }, + { @"youthday", new string[] { @"youthday" } }, + { @"childrenday", new string[] { @"childrenday", @"childday" } }, + { @"femaleday", new string[] { @"femaleday" } }, + { @"treeplantingday", new string[] { @"treeplantingday" } }, + { @"arborday", new string[] { @"arborday" } }, + { @"girlsday", new string[] { @"girlsday" } }, + { @"whiteloverday", new string[] { @"whiteloverday" } }, + { @"loverday", new string[] { @"loverday" } }, + { @"christmas", new string[] { @"christmasday", @"christmas", @"क्रिसमस", @"क्रिसमसकेदिन", @"क्रिसमसकेवालेदिन" } }, + { @"xmas", new string[] { @"xmasday", @"xmas" } }, + { @"newyear", new string[] { @"newyear", @"नयासाल" } }, + { @"newyearday", new string[] { @"newyearday" } }, + { @"newyearsday", new string[] { @"newyearsday" } }, + { @"inaugurationday", new string[] { @"inaugurationday" } }, + { @"groundhougday", new string[] { @"groundhougday" } }, + { @"valentinesday", new string[] { @"valentinesday" } }, + { @"stpatrickday", new string[] { @"stpatrickday", @"stpatricksday", @"stpatrick", @"सेंटपैट्रिक्सडे", @"सेंटपैट्रिक्स" } }, + { @"aprilfools", new string[] { @"aprilfools" } }, + { @"stgeorgeday", new string[] { @"stgeorgeday" } }, + { @"mayday", new string[] { @"mayday", @"intlworkersday", @"internationalworkersday", @"मजदूरदिवस", @"श्रमिकदिवस", @"अंतर्राष्ट्रीयमजदूरदिवस", @"मईदिवस" } }, + { @"cincodemayoday", new string[] { @"cincodemayoday" } }, + { @"baptisteday", new string[] { @"baptisteday" } }, + { @"usindependenceday", new string[] { @"usindependenceday" } }, + { @"independenceday", new string[] { @"independenceday" } }, + { @"bastilleday", new string[] { @"bastilleday" } }, + { @"halloweenday", new string[] { @"halloweenday", @"halloween", @"हैलोवीन" } }, + { @"allhallowday", new string[] { @"allhallowday" } }, + { @"allsoulsday", new string[] { @"allsoulsday" } }, + { @"guyfawkesday", new string[] { @"guyfawkesday" } }, + { @"veteransday", new string[] { @"veteransday" } }, + { @"christmaseve", new string[] { @"christmaseve" } }, + { @"newyeareve", new string[] { @"newyearseve", @"newyeareve", @"नएसालकीशाम", @"नववर्षकीपूर्वसंध्या", @"न्यूइयरईव" } }, + { @"rakshabandhan", new string[] { @"रक्षाबंधनकेदिन", @"रक्षाबंधन" } }, + { @"vaishakhi", new string[] { @"बैसाखीहरसाल", @"बैसाखी", @"वैशाखी" } } + }; + public static readonly Dictionary> HoliDiwaliRakshabandhanBaisakhiDates = new Dictionary> + { + { 1900, new int[] { 3, 16, 10, 23, 8, 10, 4, 13 } }, + { 1901, new int[] { 3, 05, 11, 11, 8, 29, 4, 13 } }, + { 1902, new int[] { 3, 24, 10, 31, 8, 19, 4, 13 } }, + { 1903, new int[] { 3, 13, 10, 20, 8, 08, 4, 13 } }, + { 1904, new int[] { 3, 02, 11, 07, 8, 25, 4, 13 } }, + { 1905, new int[] { 3, 21, 10, 28, 8, 15, 4, 13 } }, + { 1906, new int[] { 3, 10, 10, 17, 8, 04, 4, 13 } }, + { 1907, new int[] { 3, 29, 11, 05, 8, 23, 4, 13 } }, + { 1908, new int[] { 3, 18, 10, 25, 8, 12, 4, 13 } }, + { 1909, new int[] { 3, 07, 11, 13, 8, 31, 4, 13 } }, + { 1910, new int[] { 3, 25, 11, 02, 8, 20, 4, 13 } }, + { 1911, new int[] { 3, 14, 10, 22, 8, 09, 4, 13 } }, + { 1912, new int[] { 3, 03, 11, 09, 8, 27, 4, 13 } }, + { 1913, new int[] { 3, 22, 10, 29, 8, 16, 4, 13 } }, + { 1914, new int[] { 3, 12, 10, 19, 8, 05, 4, 13 } }, + { 1915, new int[] { 3, 01, 11, 07, 8, 24, 4, 13 } }, + { 1916, new int[] { 3, 19, 10, 26, 8, 13, 4, 13 } }, + { 1917, new int[] { 3, 08, 11, 14, 8, 03, 4, 13 } }, + { 1918, new int[] { 3, 27, 11, 03, 8, 22, 4, 13 } }, + { 1919, new int[] { 3, 16, 10, 23, 8, 11, 4, 13 } }, + { 1920, new int[] { 3, 04, 11, 10, 8, 29, 4, 13 } }, + { 1921, new int[] { 3, 23, 10, 30, 8, 18, 4, 13 } }, + { 1922, new int[] { 3, 13, 10, 20, 8, 07, 4, 13 } }, + { 1923, new int[] { 3, 03, 11, 08, 8, 26, 4, 13 } }, + { 1924, new int[] { 3, 21, 10, 28, 8, 14, 4, 13 } }, + { 1925, new int[] { 3, 10, 10, 17, 8, 04, 4, 13 } }, + { 1926, new int[] { 3, 29, 11, 05, 8, 23, 4, 13 } }, + { 1927, new int[] { 3, 18, 10, 25, 8, 13, 4, 14 } }, + { 1928, new int[] { 3, 06, 11, 12, 8, 30, 4, 13 } }, + { 1929, new int[] { 3, 25, 11, 01, 8, 20, 4, 13 } }, + { 1930, new int[] { 3, 14, 10, 21, 8, 09, 4, 13 } }, + { 1931, new int[] { 3, 04, 11, 09, 8, 28, 4, 14 } }, + { 1932, new int[] { 3, 22, 10, 29, 8, 16, 4, 13 } }, + { 1933, new int[] { 3, 12, 10, 19, 8, 05, 4, 13 } }, + { 1934, new int[] { 3, 01, 11, 07, 8, 24, 4, 13 } }, + { 1935, new int[] { 3, 20, 10, 27, 8, 14, 4, 14 } }, + { 1936, new int[] { 3, 08, 11, 14, 8, 03, 4, 13 } }, + { 1937, new int[] { 3, 26, 11, 03, 8, 21, 4, 13 } }, + { 1938, new int[] { 3, 16, 10, 23, 8, 11, 4, 13 } }, + { 1939, new int[] { 3, 05, 11, 11, 8, 29, 4, 14 } }, + { 1940, new int[] { 3, 23, 10, 30, 8, 17, 4, 13 } }, + { 1941, new int[] { 3, 13, 10, 20, 8, 07, 4, 13 } }, + { 1942, new int[] { 3, 03, 11, 08, 8, 26, 4, 13 } }, + { 1943, new int[] { 3, 21, 10, 29, 8, 15, 4, 14 } }, + { 1944, new int[] { 3, 10, 10, 17, 8, 04, 4, 13 } }, + { 1945, new int[] { 2, 26, 11, 04, 8, 23, 4, 13 } }, + { 1946, new int[] { 3, 17, 10, 24, 8, 12, 4, 13 } }, + { 1947, new int[] { 3, 07, 11, 12, 8, 31, 4, 14 } }, + { 1948, new int[] { 3, 25, 11, 01, 8, 19, 4, 13 } }, + { 1949, new int[] { 3, 14, 10, 21, 8, 08, 4, 13 } }, + { 1950, new int[] { 3, 04, 11, 09, 8, 27, 4, 13 } }, + { 1951, new int[] { 3, 23, 10, 30, 8, 17, 4, 14 } }, + { 1952, new int[] { 3, 11, 10, 18, 8, 05, 4, 13 } }, + { 1953, new int[] { 2, 28, 11, 06, 8, 24, 4, 13 } }, + { 1954, new int[] { 3, 19, 10, 26, 8, 14, 4, 13 } }, + { 1955, new int[] { 3, 08, 11, 14, 8, 03, 4, 14 } }, + { 1956, new int[] { 3, 26, 11, 02, 8, 21, 4, 13 } }, + { 1957, new int[] { 3, 16, 10, 23, 8, 10, 4, 13 } }, + { 1958, new int[] { 3, 05, 11, 11, 8, 29, 4, 13 } }, + { 1959, new int[] { 3, 24, 10, 31, 8, 18, 4, 14 } }, + { 1960, new int[] { 3, 13, 10, 20, 8, 06, 4, 13 } }, + { 1961, new int[] { 3, 02, 11, 08, 8, 26, 4, 13 } }, + { 1962, new int[] { 3, 21, 10, 28, 8, 15, 4, 13 } }, + { 1963, new int[] { 3, 10, 10, 17, 8, 05, 4, 14 } }, + { 1964, new int[] { 3, 28, 11, 04, 8, 23, 4, 13 } }, + { 1965, new int[] { 3, 17, 10, 24, 8, 12, 4, 13 } }, + { 1966, new int[] { 3, 07, 11, 12, 8, 30, 4, 14 } }, + { 1967, new int[] { 3, 26, 11, 02, 8, 19, 4, 14 } }, + { 1968, new int[] { 3, 14, 10, 21, 8, 08, 4, 13 } }, + { 1969, new int[] { 3, 04, 11, 09, 8, 27, 4, 13 } }, + { 1970, new int[] { 3, 23, 10, 30, 8, 17, 4, 14 } }, + { 1971, new int[] { 3, 12, 10, 19, 8, 06, 4, 14 } }, + { 1972, new int[] { 2, 29, 11, 06, 8, 24, 4, 13 } }, + { 1973, new int[] { 3, 18, 10, 26, 8, 13, 4, 13 } }, + { 1974, new int[] { 3, 08, 11, 14, 8, 03, 4, 14 } }, + { 1975, new int[] { 3, 27, 11, 03, 8, 21, 4, 14 } }, + { 1976, new int[] { 3, 16, 10, 23, 8, 09, 4, 13 } }, + { 1977, new int[] { 3, 05, 11, 11, 8, 28, 4, 13 } }, + { 1978, new int[] { 3, 24, 10, 31, 8, 18, 4, 14 } }, + { 1979, new int[] { 3, 13, 10, 21, 8, 08, 4, 14 } }, + { 1980, new int[] { 3, 01, 11, 07, 8, 26, 4, 13 } }, + { 1981, new int[] { 3, 20, 10, 27, 8, 15, 4, 13 } }, + { 1982, new int[] { 3, 09, 11, 15, 8, 04, 4, 14 } }, + { 1983, new int[] { 3, 28, 11, 04, 8, 23, 4, 14 } }, + { 1984, new int[] { 3, 17, 10, 24, 8, 11, 4, 13 } }, + { 1985, new int[] { 3, 07, 11, 12, 8, 30, 4, 13 } }, + { 1986, new int[] { 3, 26, 11, 02, 8, 19, 4, 14 } }, + { 1987, new int[] { 3, 15, 10, 22, 8, 09, 4, 14 } }, + { 1988, new int[] { 3, 03, 11, 09, 8, 27, 4, 13 } }, + { 1989, new int[] { 3, 22, 10, 29, 8, 17, 4, 13 } }, + { 1990, new int[] { 3, 11, 10, 18, 8, 06, 4, 14 } }, + { 1991, new int[] { 2, 28, 11, 06, 8, 25, 4, 14 } }, + { 1992, new int[] { 3, 18, 10, 25, 8, 13, 4, 13 } }, + { 1993, new int[] { 3, 08, 11, 13, 8, 02, 4, 13 } }, + { 1994, new int[] { 3, 27, 11, 03, 8, 21, 4, 14 } }, + { 1995, new int[] { 3, 17, 10, 24, 8, 10, 4, 14 } }, + { 1996, new int[] { 3, 05, 11, 11, 8, 28, 4, 13 } }, + { 1997, new int[] { 3, 24, 10, 31, 8, 18, 4, 13 } }, + { 1998, new int[] { 3, 13, 10, 20, 8, 07, 4, 14 } }, + { 1999, new int[] { 3, 02, 11, 08, 8, 26, 4, 14 } }, + { 2000, new int[] { 3, 20, 10, 27, 8, 15, 4, 13 } }, + { 2001, new int[] { 3, 09, 11, 15, 8, 04, 4, 13 } }, + { 2002, new int[] { 3, 28, 11, 04, 8, 22, 4, 14 } }, + { 2003, new int[] { 3, 18, 10, 25, 8, 12, 4, 14 } }, + { 2004, new int[] { 3, 06, 11, 12, 8, 29, 4, 13 } }, + { 2005, new int[] { 3, 25, 11, 02, 8, 19, 4, 14 } }, + { 2006, new int[] { 3, 14, 10, 22, 8, 09, 4, 14 } }, + { 2007, new int[] { 3, 03, 11, 09, 8, 28, 4, 14 } }, + { 2008, new int[] { 3, 21, 10, 28, 8, 16, 4, 13 } }, + { 2009, new int[] { 3, 11, 10, 18, 8, 05, 4, 14 } }, + { 2010, new int[] { 2, 28, 11, 06, 8, 24, 4, 14 } }, + { 2011, new int[] { 3, 19, 10, 26, 8, 13, 4, 14 } }, + { 2012, new int[] { 3, 08, 11, 13, 8, 02, 4, 13 } }, + { 2013, new int[] { 3, 27, 11, 03, 8, 20, 4, 14 } }, + { 2014, new int[] { 3, 16, 10, 23, 8, 10, 4, 14 } }, + { 2015, new int[] { 3, 05, 11, 11, 8, 29, 4, 14 } }, + { 2016, new int[] { 3, 23, 10, 30, 8, 18, 4, 13 } }, + { 2017, new int[] { 3, 12, 10, 19, 8, 07, 4, 14 } }, + { 2018, new int[] { 3, 02, 11, 07, 8, 26, 4, 14 } }, + { 2019, new int[] { 3, 21, 10, 28, 8, 15, 4, 14 } }, + { 2020, new int[] { 3, 09, 11, 15, 8, 03, 4, 13 } }, + { 2021, new int[] { 3, 28, 11, 04, 8, 22, 4, 14 } }, + { 2022, new int[] { 3, 18, 10, 25, 8, 11, 4, 14 } }, + { 2023, new int[] { 3, 07, 11, 13, 8, 30, 4, 14 } }, + { 2024, new int[] { 3, 25, 11, 01, 8, 19, 4, 13 } }, + { 2025, new int[] { 3, 14, 10, 21, 8, 09, 4, 14 } }, + { 2026, new int[] { 3, 03, 11, 09, 8, 28, 4, 14 } }, + { 2027, new int[] { 3, 22, 10, 29, 8, 17, 4, 14 } }, + { 2028, new int[] { 3, 11, 10, 18, 8, 05, 4, 13 } }, + { 2029, new int[] { 2, 28, 11, 06, 8, 23, 4, 14 } }, + { 2030, new int[] { 3, 19, 10, 26, 8, 13, 4, 14 } }, + { 2031, new int[] { 3, 09, 11, 14, 8, 02, 4, 14 } }, + { 2032, new int[] { 3, 27, 11, 03, 8, 20, 4, 13 } }, + { 2033, new int[] { 3, 16, 10, 23, 8, 10, 4, 14 } }, + { 2034, new int[] { 3, 05, 11, 11, 8, 29, 4, 14 } }, + { 2035, new int[] { 3, 23, 10, 31, 8, 18, 4, 14 } }, + { 2036, new int[] { 3, 12, 10, 19, 8, 06, 4, 13 } }, + { 2037, new int[] { 3, 01, 11, 07, 8, 25, 4, 14 } }, + { 2038, new int[] { 3, 21, 10, 28, 8, 14, 4, 14 } }, + { 2039, new int[] { 3, 10, 11, 16, 8, 04, 4, 14 } }, + { 2040, new int[] { 3, 28, 11, 04, 8, 22, 4, 13 } }, + { 2041, new int[] { 3, 17, 10, 25, 8, 11, 4, 14 } }, + { 2042, new int[] { 3, 06, 11, 12, 8, 30, 4, 14 } }, + { 2043, new int[] { 3, 25, 11, 01, 8, 20, 4, 14 } }, + { 2044, new int[] { 3, 13, 10, 20, 8, 08, 4, 13 } }, + { 2045, new int[] { 3, 03, 11, 08, 8, 27, 4, 14 } }, + { 2046, new int[] { 3, 22, 10, 29, 8, 16, 4, 14 } }, + { 2047, new int[] { 3, 12, 10, 19, 8, 05, 4, 14 } }, + { 2048, new int[] { 2, 29, 11, 06, 8, 23, 4, 14 } }, + { 2049, new int[] { 3, 19, 10, 26, 8, 13, 4, 14 } }, + { 2050, new int[] { 3, 08, 11, 14, 8, 02, 4, 14 } }, + { 2051, new int[] { 3, 27, 11, 03, 8, 21, 4, 14 } }, + { 2052, new int[] { 3, 15, 10, 22, 8, 10, 4, 14 } }, + { 2053, new int[] { 3, 04, 11, 10, 8, 29, 4, 14 } }, + { 2054, new int[] { 3, 23, 10, 30, 8, 18, 4, 14 } }, + { 2055, new int[] { 3, 13, 10, 20, 8, 07, 4, 14 } }, + { 2056, new int[] { 3, 02, 11, 07, 8, 25, 4, 14 } }, + { 2057, new int[] { 3, 21, 10, 28, 8, 14, 4, 14 } }, + { 2058, new int[] { 3, 10, 11, 16, 8, 04, 4, 14 } }, + { 2059, new int[] { 3, 29, 11, 05, 8, 23, 4, 14 } }, + { 2060, new int[] { 3, 17, 10, 24, 8, 11, 4, 14 } }, + { 2061, new int[] { 3, 06, 11, 12, 8, 30, 4, 14 } }, + { 2062, new int[] { 3, 25, 11, 01, 8, 20, 4, 14 } }, + { 2063, new int[] { 3, 14, 10, 21, 8, 09, 4, 14 } }, + { 2064, new int[] { 3, 03, 11, 08, 8, 26, 4, 14 } }, + { 2065, new int[] { 3, 22, 10, 29, 8, 16, 4, 14 } }, + { 2066, new int[] { 3, 11, 10, 19, 8, 05, 4, 14 } }, + { 2067, new int[] { 3, 01, 11, 06, 8, 24, 4, 14 } }, + { 2068, new int[] { 3, 18, 10, 26, 8, 13, 4, 14 } }, + { 2069, new int[] { 3, 07, 11, 13, 9, 01, 4, 14 } }, + { 2070, new int[] { 3, 26, 11, 02, 8, 21, 4, 14 } }, + { 2071, new int[] { 3, 16, 10, 23, 8, 10, 4, 14 } }, + { 2072, new int[] { 3, 04, 11, 10, 8, 28, 4, 14 } }, + { 2073, new int[] { 3, 23, 10, 30, 8, 17, 4, 14 } }, + { 2074, new int[] { 3, 13, 10, 20, 8, 06, 4, 14 } }, + { 2075, new int[] { 3, 02, 11, 08, 8, 25, 4, 14 } }, + { 2076, new int[] { 3, 20, 10, 27, 8, 14, 4, 14 } }, + { 2077, new int[] { 3, 09, 11, 15, 8, 04, 4, 14 } }, + { 2078, new int[] { 3, 28, 11, 04, 8, 23, 4, 14 } }, + { 2079, new int[] { 3, 17, 10, 24, 8, 12, 4, 14 } }, + { 2080, new int[] { 3, 05, 11, 11, 8, 30, 4, 14 } }, + { 2081, new int[] { 3, 25, 11, 01, 8, 19, 4, 14 } }, + { 2082, new int[] { 3, 14, 10, 21, 8, 08, 4, 14 } }, + { 2083, new int[] { 3, 04, 11, 09, 8, 27, 4, 14 } }, + { 2084, new int[] { 3, 22, 10, 29, 8, 15, 4, 14 } }, + { 2085, new int[] { 3, 11, 10, 18, 8, 05, 4, 14 } }, + { 2086, new int[] { 2, 28, 11, 06, 8, 24, 4, 14 } }, + { 2087, new int[] { 3, 19, 10, 26, 8, 14, 4, 15 } }, + { 2088, new int[] { 3, 07, 11, 13, 8, 31, 4, 14 } }, + { 2089, new int[] { 3, 26, 11, 02, 8, 21, 4, 14 } }, + { 2090, new int[] { 3, 15, 10, 23, 8, 10, 4, 14 } }, + { 2091, new int[] { 3, 05, 11, 11, 8, 28, 4, 15 } }, + { 2092, new int[] { 3, 23, 10, 30, 8, 17, 4, 14 } }, + { 2093, new int[] { 3, 13, 10, 20, 8, 06, 4, 14 } }, + { 2094, new int[] { 3, 02, 11, 08, 8, 25, 4, 14 } }, + { 2095, new int[] { 3, 21, 10, 28, 8, 15, 4, 15 } }, + { 2096, new int[] { 3, 09, 11, 14, 8, 04, 4, 14 } }, + { 2097, new int[] { 3, 27, 11, 04, 8, 22, 4, 14 } }, + { 2098, new int[] { 3, 17, 10, 24, 8, 11, 4, 14 } }, + { 2099, new int[] { 3, 06, 11, 12, 8, 30, 4, 15 } } + }; + public static readonly Dictionary WrittenDecades = new Dictionary + { + { @"सौ", 100 }, + { @"सत्तर", 70 }, + { @"बीस", 20 }, + { @"तीस", 30 }, + { @"अस्सी", 80 }, + { @"नब्बे", 90 }, + { @"चालीस", 40 }, + { @"पचास", 50 }, + { @"साठ", 60 }, + { @"दस", 10 }, + { @"सौ के दशक", 100 }, + { @"सत्तर के दशक", 70 }, + { @"बीस के दशक", 20 }, + { @"तीस के दशक", 30 }, + { @"अस्सी के दशक", 80 }, + { @"नब्बे के दशक", 90 }, + { @"चालीस के दशक", 40 }, + { @"पचास के दशक", 50 }, + { @"साठ के दशक", 60 }, + { @"दस के दशक", 10 }, + { @"hundreds", 0 }, + { @"tens", 10 }, + { @"twenties", 20 }, + { @"thirties", 30 }, + { @"forties", 40 }, + { @"fifties", 50 }, + { @"sixties", 60 }, + { @"seventies", 70 }, + { @"eighties", 80 }, + { @"nineties", 90 } + }; + public static readonly Dictionary SpecialDecadeCases = new Dictionary + { + { @"noughties", 2000 }, + { @"aughts", 2000 }, + { @"two thousands", 2000 }, + { @"दो हजार", 2000 }, + { @"दो हजार के दशक", 2000 }, + { @"इस सदी के पहले दशक", 2000 }, + { @"20वीं सदी के पहले दशक", 2000 } + }; + public const string DefaultLanguageFallback = @"MDY"; + public static readonly IList SuperfluousWordList = new List + { + @"preferably", + @"how about", + @"maybe", + @"say", + @"like" + }; + public static readonly string[] DurationDateRestrictions = { @"आज", @"today", @"now" }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"\bदिन\b", @"\bदिन-ब-दिन\b" }, + { @"^\d{1,4}-\d{1,4}$", @"\d{1,4}-\d{1,4}-\d|\d-\d{1,4}-\d{1,4}" }, + { @"^\d{1,4}-\d{1,4}-\d{1,4}$", @"\d{1,4}-\d{1,4}-\d{1,4}-\d|\d-\d{1,4}-\d{1,4}-\d{1,4}" } + }; + public static readonly IList MorningTermList = new List + { + @"सुबह" + }; + public static readonly IList AfternoonTermList = new List + { + @"दोपहर", + @"दोपहर" + }; + public static readonly IList EveningTermList = new List + { + @"सायं", + @"शाम", + @"संध्या", + @"सायंकाल" + }; + public static readonly IList MealtimeBreakfastTermList = new List + { + @"नाश्ता", + @"जलपान", + @"ब्रेकफ़ास्ट", + @"ब्रेकफास्ट", + @"breakfast" + }; + public static readonly IList MealtimeBrunchTermList = new List + { + @"brunch", + @"ब्रंच" + }; + public static readonly IList MealtimeLunchTermList = new List + { + @"lunch", + @"lunchtime", + @"लंच", + @"लंच टाइम", + @"लंचटाइम", + @"दोपहर का भोजन", + @"कलेवा" + }; + public static readonly IList MealtimeDinnerTermList = new List + { + @"dinner", + @"dinnertime", + @"supper", + @"रात्रिभोजन", + @"डिनर", + @"सपर", + @"डिनरटाइम" + }; + public static readonly IList DaytimeTermList = new List + { + @"दिन" + }; + public static readonly IList NightTermList = new List + { + @"रात" + }; + public static readonly IList SameDayTerms = new List + { + @"आज" + }; + public static readonly IList PlusOneDayTerms = new List + { + @"tomorrow", + @"tmr", + @"day after", + @"कल", + @"कल से" + }; + public static readonly IList MinusOneDayTerms = new List + { + @"yesterday", + @"day before", + @"कल", + @"कल वापस गया था", + @"कल से पहले" + }; + public static readonly IList PlusTwoDayTerms = new List + { + @"day after tomorrow", + @"day after tmr", + @"परसों" + }; + public static readonly IList MinusTwoDayTerms = new List + { + @"day before yesterday", + @"परसों वापस गया था" + }; + public static readonly IList FutureTerms = new List + { + @"इस", + @"अगला", + @"अगले", + @"अगली", + @"दूसरे" + }; + public static readonly IList LastCardinalTerms = new List + { + @"पिछले", + @"पिछला", + @"पिछले", + @"पिछली", + @"आखिरी", + @"अंतिम" + }; + public static readonly IList MonthTerms = new List + { + @"महीना", + @"महीने", + @"महीनों", + @"माह" + }; + public static readonly IList MonthToDateTerms = new List + { + @"month to date" + }; + public static readonly IList WeekendTerms = new List + { + @"सप्ताहांत", + @"वीकेंड" + }; + public static readonly IList WeekTerms = new List + { + @"week", + @"हफ़्ते", + @"हफ्ते", + @"सप्ताह" + }; + public static readonly IList YearTerms = new List + { + @"साल", + @"वर्षों", + @"वर्ष" + }; + public static readonly IList GenericYearTerms = new List + { + @"y" + }; + public static readonly IList YearToDateTerms = new List + { + @"year to date" + }; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/DateTimeDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/DateTimeDefinitions.tt new file mode 100644 index 0000000000..0532a8b156 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/DateTimeDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Hindi\Hindi-DateTime.yaml"; + this.Language = "Hindi"; + this.ClassName = "DateTimeDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/NumbersDefinitions.cs index 9fda65b397..511dcb6ead 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/NumbersDefinitions.cs @@ -22,99 +22,290 @@ namespace Microsoft.Recognizers.Definitions.Hindi public static class NumbersDefinitions { public const string LangMarker = @"Hin"; - public const bool CompoundNumberLanguage = false; - public const bool MultiDecimalSeparatorCulture = true; - public const string RoundNumberIntegerRegex = @"(?:hundred|thousand|million|billion|trillion)"; - public const string ZeroToNineIntegerRegex = @"(?:three|seven|eight|four|five|zero|nine|one|two|six)"; - public const string TwoToNineIntegerRegex = @"(?:three|seven|eight|four|five|nine|two|six)"; - public const string NegativeNumberTermsRegex = @"(?(minus|negative)\s+)"; + public const bool CompoundNumberLanguage = true; + public const bool MultiDecimalSeparatorCulture = false; + public const string RoundNumberIntegerRegex = @"(सौ|हजार|हज़ार|लाख|करोड़|अरब|खरब)"; + public const string ZeroToNineIntegerRegex = @"(सात|आठ|फ़ोर|चार|पांच|पाँच|शून्य|नौ|दो|छह|एक(सठ)?|तीन|०|१|२|३|४|५|६|७|८|९)"; + public const string TwoToNineIntegerRegex = @"(तीन|सात|आठ|फ़ोर|चार|पाँच|पांच|नौ|दो|छह)"; + public const string NegativeNumberTermsRegex = @"(?(माइनस|निगेटिव)\s+)"; public static readonly string NegativeNumberSignRegex = $@"^{NegativeNumberTermsRegex}.*"; - public const string AnIntRegex = @"(an?)(?=\s)"; - public const string TenToNineteenIntegerRegex = @"(?:seventeen|thirteen|fourteen|eighteen|nineteen|fifteen|sixteen|eleven|twelve|ten)"; - public const string TensNumberIntegerRegex = @"(?:seventy|twenty|thirty|eighty|ninety|forty|fifty|sixty)"; - public static readonly string SeparaIntRegex = $@"(?:(({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}(\s+(and\s+)?|\s*-\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex})(\s+{RoundNumberIntegerRegex})*))|(({AnIntRegex}(\s+{RoundNumberIntegerRegex})+))"; - public static readonly string AllIntRegex = $@"(?:((({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}(\s+(and\s+)?|\s*-\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{AnIntRegex})(\s+{RoundNumberIntegerRegex})+)\s+(and\s+)?)*{SeparaIntRegex})"; + public const string AnIntRegex = @"(एक|1|१)(?=\s)"; + public const string TenToNineteenIntegerRegex = @"(सत्रह|तेरह|चौदह|अठारह|उन्नीस|पंद्रह|सोलह|ग्यारह|बारह|दस)"; + public const string TwentyToTwentyNineIntegerRegex = @"(सत्ताईस|तेईस|चौबीस|अट्ठाईस|अट्ठाइस|उनतीस|पच्चीस|छब्बीस|इक्कीस|बाईस|बीस)"; + public const string ThirtyToThirtyNineIntegerRegex = @"(सैंतीस|तैंतीस|चौंतीस|अड़तीस|उनतालीस|पैंतीस|छ्त्तीस|इकतीस|इकत्तीस|बत्तीस|तीस)"; + public const string FourtyToFourtyNineIntegerRegex = @"(सैंतालीस|तैंतालीस|चौंतालीस|अड़तालीस|उनचास|पैंतालीस|छियालीस|इकतालीस|बयालीस|चालीस)"; + public const string FiftyToFiftyNineIntegerRegex = @"(सत्तावन|तिरेपन|चौबन|अट्ठावन|उनसठ|पचपन|छप्पन|इक्याबन|बावन|पचास)"; + public const string SixtyToSixtyNineIntegerRegex = @"((सड़|सड़|तिर|चौं|अड़|उनहत्तर|पैं|छिया|इक|बा|साठ|एक)(सठ))"; + public const string SeventyToSeventyNineIntegerRegex = @"(?:सतहत्तर|तिहत्तर|चौहत्तर|अठहत्तर|उनासी|पचहत्तर|छिहत्तर|इकहत्तर|अठत्तर|बहत्तर|सत्तर)"; + public const string EightyToEightyNineIntegerRegex = @"(सतासी|तिरासी|चौरासी|अठासी|नवासी|पचासी|छियासी|इक्यासी|बयासी|अस्सी)"; + public const string NinetyToNinetyNineIntegerRegex = @"(सत्तानवे|तिरानवे|चौरानवे|अट्ठानवे|निन्यानवे|पचानवे|पंचानबे|छियानवे|इक्यानबे|बानवे|नब्बे)"; + public const string TensNumberIntegerRegex = @"(सत्तर|बीस|तीस|अस्सी|नब्बे|चालीस|पचास|साठ)"; + public const string DigitsNumberRegex = @"\d|\d{1,3}(\.\d{3})"; + public static readonly string AllNumericalIntRegex = $@"({ZeroToNineIntegerRegex}|{TenToNineteenIntegerRegex}|{TwentyToTwentyNineIntegerRegex}|{ThirtyToThirtyNineIntegerRegex}|{FourtyToFourtyNineIntegerRegex}|{FiftyToFiftyNineIntegerRegex}|{SixtyToSixtyNineIntegerRegex}|{SeventyToSeventyNineIntegerRegex}|{EightyToEightyNineIntegerRegex}|{NinetyToNinetyNineIntegerRegex})"; + public static readonly string SeparaIntRegex = $@"(?:((({AllNumericalIntRegex}|({TensNumberIntegerRegex}(\s+(और\s+)?){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex})(\s+({RoundNumberIntegerRegex}|{RoundNumberHinglishIntegerRegex}))*)|(({AnIntRegex}?(\s+({RoundNumberIntegerRegex}))+))|({RoundNumberIntegerRegex})|({AllNumericalIntRegex})))"; + public static readonly string AllIntRegex = $@"(?:((({AllNumericalIntRegex}|({TensNumberIntegerRegex}(\s+(और\s+)?){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{AnIntRegex})(\s+({RoundNumberIntegerRegex}|{RoundNumberHinglishIntegerRegex}))+)\s+(और\s+)?)*({SeparaIntRegex}))"; public const string PlaceHolderPureNumber = @"\b"; public const string PlaceHolderDefault = @"\D|\b"; - public static readonly Func NumbersWithPlaceHolder = (placeholder) => $@"(((? NumbersWithPlaceHolder = (placeholder) => $@"(((?(next|previous|current)\s+one|(the\s+second|next)\s+to\s+last|the\s+one\s+before\s+the\s+last(\s+one)?|the\s+last\s+but\s+one|(ante)?penultimate|last|next|previous|current)"; + public static readonly string RoundNumberIntegerRegexWithLocks = $@"(?<=\b)\d+\s+({RoundNumberIntegerRegex}|{RoundNumberHinglishIntegerRegex})"; + public const string NumbersWithDozenSuffix = @"\d+\s+दर्जन(नों)?"; + public const string AdditionTermsRegex = @"(?<=\s)(और|व|तथा|एवं|प्लस|plus|जमा)(?=\s)"; + public static readonly string AllIntRegexWithLocks = $@"(?<=\b){AllIntRegex}"; + public const string RoundNumberHinglishIntegerRegex = @"(?:हंड्रेड|थाउजेंड|मिलियन|बिलियन|ट्रिलियन)"; + public const string OnetoNineHinglishIntegerRegex = @"(?:वन|टू|थ्री|फोर|फ़ाइव|सिक्स|सेवन|एइट|नाइन)"; + public const string ElevenToNineteenHinglishIntegerRegex = @"(?:इलेवन|ट्वेल्व|थर्टीन|फ़ोर्टीन|फ़िफ़्टीन|सिक्सटीन|सेवेनटीन|एइटीन|नाइनटीन)"; + public const string TensHinglishIntegerRegex = @"(?:टेन|ट्वेन्टी|ट्वेंटी|थर्टी|फ़ोर्टी|फ़िफ़्टी|सिक्सटी|सेवेंटी|एइट्टी|नाइनटी)"; + public static readonly string HinglishIntegerRegex = $@"(({TensHinglishIntegerRegex}\s+{OnetoNineHinglishIntegerRegex})|{OnetoNineHinglishIntegerRegex}|{ElevenToNineteenHinglishIntegerRegex}|{TensHinglishIntegerRegex})"; + public static readonly string CompoundHindiNumberRegex = $@"((({HinglishIntegerRegex}\s+)?({RoundNumberHinglishIntegerRegex}\s+)?(अंड\s+)?)+({HinglishIntegerRegex}|{RoundNumberHinglishIntegerRegex})|{HinglishIntegerRegex}|{RoundNumberHinglishIntegerRegex})"; + public static readonly string NegativeHinglishRegex = $@"(({NegativeNumberTermsRegex})?{CompoundHindiNumberRegex})"; + public static readonly string AllIntRegexWithDozenSuffixLocks = $@"(?<=\b)(((आधा\s+|एक\s+)दर्जन)|({AllIntRegex}\s+दर्जन(नों)?)|({CompoundHindiNumberRegex}\s+डज़न))"; + public const string RoundNumberEnglishIntegerRegex = @"(?:hundred|thousand|million|billion|trillion)"; + public const string ZeroToNineEnglishIntegerRegex = @"(?:three|seven|eight|four|five|zero|nine|one|two|six)"; + public const string TenToNineteenEnglishIntegerRegex = @"(?:seventeen|thirteen|fourteen|eighteen|nineteen|fifteen|sixteen|eleven|twelve|ten)"; + public const string TensNumberEnglishIntegerRegex = @"(?:seventy|twenty|thirty|eighty|ninety|forty|fifty|sixty)"; + public static readonly string EnglishIntegerRegex = $@"(({TensNumberEnglishIntegerRegex}\s+{ZeroToNineEnglishIntegerRegex})|{ZeroToNineEnglishIntegerRegex}|{TenToNineteenEnglishIntegerRegex}|{TensNumberEnglishIntegerRegex})"; + public static readonly string CompoundEnglishNumberRegex = $@"((({EnglishIntegerRegex}\s+)?({RoundNumberEnglishIntegerRegex}\s+)?(and\s+)?)+({EnglishIntegerRegex}|{RoundNumberEnglishIntegerRegex})|({EnglishIntegerRegex}\s+{RoundNumberEnglishIntegerRegex})|{EnglishIntegerRegex}|{RoundNumberEnglishIntegerRegex})"; + public const string DecimalUnitsRegex = @"(?:डेढ़|डेढ़|डेढ|ढाई|सवा|सावा)"; + public static readonly string DecimalUnitsWithRoundNumberRegex = $@"({DecimalUnitsRegex}\s+({{AllNumericalIntRegex}}\s+)?{RoundNumberIntegerRegex}|{DecimalUnitsRegex})"; + public const string RoundNumberOrdinalRegex = @"(?:(सौ|हजार|हज़ार|लाख|करोड़|अरब|खरब)(वां|वीं|वें|वाँ))"; + public const string OneToNineOrdinalRegex = @"(?:पहला|(?((अंतिम वाले|आखिरी वाले)\s+(से पहले वाला|का पिछला(\s+वाला)?))|((आखिरी|अंतिम)\s+(से पहले का|के बगल वाला))|((पिछला|पिछले|पिछली)\s+पर केवल एक))"; + public const string CompoundThreeWordRelativeOrdinalRegex = @"(?((आखिरी से|आखिरी का|आखिरी के|आखिरी की|अंतिम से|अंतिम का|अंतिम के|अंतिम की)\s+(पहला|पहले|पहली|दूसरा|दूसरे|दूसरी)))"; + public const string CompoundTwoWordRelativeOrdinalRegex = @"(?((पिछला|पिछले|पिछली|अगला|अगले|अगली|अंतिम|आखिरी|अभी)\s+(वाला|वाले|वाली))|((अगला|अगले|अगली)\s+चीज)|((आखिरी|अंतिम)\s+(का|के|की)))"; + public const string SimpleRelativeOrdinalRegex = @"(?अंतिम|आखिरी|अगला|अगले|अगली|पिछला|पिछले|पिछली|वर्तमान|लास्ट|प्रीवियस|नेक्स्ट|करेंट|last|previous|next|current)"; + public static readonly string RelativeOrdinalRegex = $@"({CompoundFiveWordRelativeOrdinalRegex}|{CompoundThreeWordRelativeOrdinalRegex}|{CompoundTwoWordRelativeOrdinalRegex}|{SimpleRelativeOrdinalRegex})"; + public const string RoundNumberHinglishOrdinalRegex = @"(?:हंड्रेडथ|थाउजेंडथ|मिलियनथ|बिलियनथ|ट्रिलियनथ)"; + public const string OnetoNineHinglishOrdinalRegex = @"(?:फ़र्स्ट|सेकेंड|थर्ड|फोर्थ|फिफ्थ|सिक्स्थ|सेवंथ|एइथ|नाइन्थ)"; + public const string ElevenToNineteenHinglishOrdinalRegex = @"(?:इलेवेन्थ|ट्वेल्फ्थ|थरटीन्थ|फोर्टीन्थ|फिफ्टीन्थ|सिक्सटीन्थ|सेवेंटीन्थ|एइटीन्थ|नाइटीन्थ)"; + public const string TensHinglishOrdinalRegex = @"(?:टेन्थ|ट्वेन्टीएथ|ट्वेंटीएथ|थरटिएथ|फ़ोर्टीएथ|फ़िफ़्टीएथ|सिक्सटिएथ|सेवेंटिएथ|एइटिएथ|नाइन्टीएथ)"; + public const string TensHinglishNumberRegex = @"(?:ट्वेन्टी|ट्वेंटी|थर्टी|फोर्टी|फिफ्टी|फ़िफ़्टी|सिक्सटी|सेवेंटी|एइट्टी|नाइनटी)"; + public static readonly string HinglishOrdinalRegex = $@"(({TensHinglishNumberRegex}\s+{OnetoNineHinglishOrdinalRegex})|{OnetoNineHinglishOrdinalRegex}|{ElevenToNineteenHinglishOrdinalRegex}|{TensHinglishOrdinalRegex})"; + public static readonly string CompoundHindiOrdinalRegex = $@"((({HinglishOrdinalRegex}\s+)?({RoundNumberHinglishOrdinalRegex}\s+)?(अंड\s+)?)+({HinglishOrdinalRegex}|{RoundNumberHinglishOrdinalRegex})|{HinglishOrdinalRegex}|{RoundNumberHinglishOrdinalRegex})"; + public const string RoundNumberEnglishOrdinalRegex = @"(?:hundredth|thousandth|millionth|billionth|trillionth)"; + public const string OnetoNineEnglishOrdinalRegex = @"(?:first|second|third|fourth|fifth|sixth|seventh|eighth|ninth)"; + public const string ElevenToNineteenEnglishOrdinalRegex = @"(?:eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth)"; + public const string TensEnglishOrdinalRegex = @"(?:tenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)"; + public static readonly string EnglishOrdinalRegex = $@"(({TensNumberEnglishIntegerRegex}\s+{OnetoNineEnglishOrdinalRegex})|{OnetoNineEnglishOrdinalRegex}|{ElevenToNineteenEnglishOrdinalRegex}|{TensEnglishOrdinalRegex})"; + public static readonly string CompoundEnglishOrdinalRegex = $@"((({EnglishIntegerRegex}\s+)?({RoundNumberEnglishIntegerRegex}\s+)?(and\s+)?)+({EnglishOrdinalRegex}|{RoundNumberEnglishOrdinalRegex})|({EnglishIntegerRegex}\s+{RoundNumberEnglishOrdinalRegex})|{EnglishOrdinalRegex})"; public static readonly string BasicOrdinalRegex = $@"({NumberOrdinalRegex}|{RelativeOrdinalRegex})"; - public static readonly string SuffixBasicOrdinalRegex = $@"(?:(((({TensNumberIntegerRegex}(\s+(and\s+)?|\s*-\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{AnIntRegex})(\s+{RoundNumberIntegerRegex})+)\s+(and\s+)?)*({TensNumberIntegerRegex}(\s+|\s*-\s*))?{BasicOrdinalRegex})"; + public static readonly string SuffixBasicOrdinalRegex = $@"(?:((({AllNumericalIntRegex}|{TensNumberIntegerRegex})(\s+({RoundNumberIntegerRegex})(\s+))+)(({NumberOrdinalRegex}))))"; public static readonly string SuffixRoundNumberOrdinalRegex = $@"(?:({AllIntRegex}\s+){RoundNumberOrdinalRegex})"; - public static readonly string AllOrdinalRegex = $@"(?:{SuffixBasicOrdinalRegex}|{SuffixRoundNumberOrdinalRegex})"; - public const string OrdinalSuffixRegex = @"(?<=\b)(?:(\d*(1st|2nd|3rd|[4-90]th))|(1[1-2]th))(?=\b)"; - public const string OrdinalNumericRegex = @"(?<=\b)(?:\d{1,3}(\s*,\s*\d{3})*\s*th)(?=\b)"; - public static readonly string OrdinalRoundNumberRegex = $@"(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?![\.,]))(?=\b)"; - public static readonly string FractionPrepositionWithinPercentModeRegex = $@"(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?![\.,]))(?=\b)"; - public static readonly string AllPointRegex = $@"((\s+{ZeroToNineIntegerRegex})+|(\s+{SeparaIntRegex}))"; - public static readonly string AllFloatRegex = $@"{AllIntRegex}(\s+point){AllPointRegex}"; + public static readonly string FractionNotationRegex = $@"{BaseNumbers.FractionNotationRegex}"; + public static readonly string FractionNounRegex = $@"(?<=\b)(((({AllNumericalIntRegex})(\s?)((({RoundNumberIntegerRegex})|({RoundNumberOrdinalRegex}))\s?)?)((और\s)?))+((आधा|आधे|चौथाई|तिहाई)))(?=\b)"; + public static readonly string FractionNounWithArticleRegex = $@"(?<=\b)((({AllIntRegex}\s+(और\s+)?)?(एक)(\s+|\s*-\s*)(?!\bफ़र्स्ट)(({AllOrdinalRegex})|({RoundNumberOrdinalRegex})|(आधा|आधे|चौथाई|तिहाई)))|(आधा|आधे))"; + public static readonly string FractionPrepositionRegex = $@"(?(({AllIntRegex}\s+)|((?(({AllOrdinalRegex})|({CompoundNumberOrdinals})|(\d+)|({AllIntRegex})|({AllNumericalIntRegex})))"; + public static readonly string FractionPrepositionInverseRegex = $@"(?(({AllIntRegex}\s+)|((?(({AllOrdinalRegex})|({CompoundNumberOrdinals})|(\d+)|({AllIntRegex})|({AllNumericalIntRegex})))"; + public static readonly string FractionPrepositionWithinPercentModeRegex = $@"(?<=\b)(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?![\.,]))(?=\b)"; + public static readonly string NegativeCompoundNumberOrdinals = $@"({NegativeNumberTermsRegex}(?({AllNumericalIntRegex}\s+)?({RoundNumberIntegerRegex}\s+)?(और\s+)?)+((का|के|की)(\s+))?(?({NumberOrdinalRegex}|{RoundNumberOrdinalRegex})))"; + public static readonly string NegativeFractionRegex = $@"({NegativeNumberTermsRegex}{FractionNounWithArticleRegex}|{NegativeNumberTermsRegex}{FractionPrepositionRegex})"; + public static readonly string FractionRegex = $@"((?<=\b)?(({AllIntRegex}\s+)(और\s+)?({FractionPrepositionRegex}))|(?<=\b)?({FractionPrepositionRegex}))"; + public static readonly string AllPointRegex = $@"((\s+{AllNumericalIntRegex})+|(\s+{SeparaIntRegex}))"; + public static readonly string AllFloatRegex = $@"{AllIntRegex}(\s+(पॉइंट|दशमलव)){AllPointRegex}"; public static readonly string DoubleWithMultiplierRegex = $@"(((? DoubleDecimalPointRegex = (placeholder) => $@"(((? DoubleWithoutIntegralRegex = (placeholder) => $@"(?<=\s|^)(?and)"; - public static readonly string NumberWithSuffixPercentage = $@"(?और)"; + public static readonly string NumberWithSuffixPercentage = $@"(?)"; - public const string LessRegex = @"(?:(less|lower|smaller|fewer)(\s+than)?|below|under|(?|=)<)"; - public const string EqualRegex = @"(equal(s|ing)?(\s+(to|than))?|(?)=)"; - public static readonly string MoreOrEqualPrefix = $@"((no\s+{LessRegex})|(at\s+least))"; - public static readonly string MoreOrEqual = $@"(?:({MoreRegex}\s+(or)?\s+{EqualRegex})|({EqualRegex}\s+(or)?\s+{MoreRegex})|{MoreOrEqualPrefix}(\s+(or)?\s+{EqualRegex})?|({EqualRegex}\s+(or)?\s+)?{MoreOrEqualPrefix}|>\s*=)"; - public const string MoreOrEqualSuffix = @"((and|or)\s+(((more|greater|higher|larger|bigger)((?!\s+than)|(\s+than(?!(\s*\d+)))))|((over|above)(?!\s+than))))"; - public static readonly string LessOrEqualPrefix = $@"((no\s+{MoreRegex})|(at\s+most)|(up\s+to))"; - public static readonly string LessOrEqual = $@"(({LessRegex}\s+(or)?\s+{EqualRegex})|({EqualRegex}\s+(or)?\s+{LessRegex})|{LessOrEqualPrefix}(\s+(or)?\s+{EqualRegex})?|({EqualRegex}\s+(or)?\s+)?{LessOrEqualPrefix}|<\s*=)"; - public const string LessOrEqualSuffix = @"((and|or)\s+(less|lower|smaller|fewer)((?!\s+than)|(\s+than(?!(\s*\d+)))))"; + public const string TillRegex = @"(के|से|--|-|—|——|~|–)"; + public const string MoreRegex = @"(?:((उससे|(?)"; + public const string LessRegex = @"(?:((उससे|उसके|(?|=)<)"; + public const string EqualRegex = @"((उसके|इसके|के)\s*(समान|बराबर)|(?)=)"; + public static readonly string MoreOrEqualPrefix = $@"(({LessRegex}\s*नहीं)|(कम\s+से\s+कम))"; + public static readonly string MoreOrEqual = $@"(?:{MoreOrEqualPrefix}|({MoreRegex}\s+(या)?\s+{EqualRegex})|({EqualRegex}\s+(या)?\s+{MoreRegex})|({MoreOrEqualPrefix}\s+(या)?\s+{EqualRegex})|({EqualRegex}\s+(या)?\s+{MoreOrEqualPrefix})|>\s*=)"; + public static readonly string MoreOrEqualNew = $@"(?:({MoreOrEqualPrefix}(\s+(या)?\s+{EqualRegex})?)|(({EqualRegex}\s+(या)?\s+)?{MoreOrEqualPrefix})|>\s*=)"; + public const string MoreOrEqualSuffix = @"((और|या)\s+((((?!(उससे|से)\s+)|((?!(\d+\s*))(उससे|से)))(ज़्यादा\s*बड़ी|ज़्यादा(?!\s+से\s+ज़्यादा)|बड़ी|अधिक))|((?!(उससे|से|के)\s+)(ऊपर|पार))))"; + public static readonly string LessOrEqualPrefix = $@"(({MoreRegex}\s*नहीं)|(ज़्यादा\s+से\s+ज़्यादा)|तक)"; + public static readonly string LessOrEqual = $@"({LessOrEqualPrefix}|({LessRegex}\s+(या)?\s+{EqualRegex})|({EqualRegex}\s+(या)?\s+{LessRegex})|({LessOrEqualPrefix}\s+(या)?\s+{EqualRegex})|({EqualRegex}\s+(या)?\s+{LessOrEqualPrefix})|<\s*=)"; + public static readonly string LessOrEqualNew = $@"(({LessOrEqualPrefix}(\s+(या)?\s+{EqualRegex})?)|(({EqualRegex}\s+(या)?\s+)?{LessOrEqualPrefix})|<\s*=)"; + public const string LessOrEqualSuffix = @"((और|या)\s+((?!(उससे|उसके|से|के)\s+)|((?!(\d+\s*))(उससे|उसके|से|के)\s+))(कम(?!\s+से\s+कम)|छोटी|नीचे))"; public const string NumberSplitMark = @"(?![,.](?!\d+))"; - public const string MoreRegexNoNumberSucceed = @"((bigger|greater|more|higher|larger)((?!\s+than)|\s+(than(?!(\s*\d+))))|(above|over)(?!(\s*\d+)))"; - public const string LessRegexNoNumberSucceed = @"((less|lower|smaller|fewer)((?!\s+than)|\s+(than(?!(\s*\d+))))|(below|under)(?!(\s*\d+)))"; - public const string EqualRegexNoNumberSucceed = @"(equal(s|ing)?((?!\s+(to|than))|(\s+(to|than)(?!(\s*\d+)))))"; - public static readonly string OneNumberRangeMoreRegex1 = $@"({MoreOrEqual}|{MoreRegex})\s*(the\s+)?(?({NumberSplitMark}.)+)"; + public const string MoreRegexNoNumberSucceed = @"(((?!(उससे|से)\s+)|((?!(\d+\s*))(उससे|से))\s+)(ज़्यादा(\s*बड़ी)?|बड़ी|अधिक)|(?!(\d+\s*))(ऊपर|पार))"; + public const string LessRegexNoNumberSucceed = @"(((?!(उससे|उसके|से|के)\s+)|((?!(\d+\s*))(उससे|उसके|से|के))\s+)(कम|छोटी|नीचे)|(?!(\d+\s*))(कम|नीचे))"; + public const string EqualRegexNoNumberSucceed = @"(((?!(उसके|इसके|के)\s+)|((?!(\d+\s*))(उसके|इसके|के)\s+))(समान|बराबर))"; + public static readonly string NumberGroupCondition1 = $@"(((-|माइनस|निगेटिव)\s*)?(\b(?({NumberSplitMark}.)+)(\s+(या)?\s+{EqualRegex})|({MoreOrEqualNew}|{MoreRegex})\s*(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeMoreRegex1 = $@"(?({NumberSplitMark}.)+)\s*साल\s*({MoreRegex}|{MoreOrEqualNew})|(?{NumberGroupCondition2})\s*({MoreRegex}|{MoreOrEqualNew})"; public static readonly string OneNumberRangeMoreRegex2 = $@"(?({NumberSplitMark}.)+)\s*{MoreOrEqualSuffix}"; - public static readonly string OneNumberRangeMoreSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+or\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\s+(?({NumberSplitMark}.)+)(\s+or\s+){EqualRegexNoNumberSucceed})"; - public static readonly string OneNumberRangeLessRegex1 = $@"({LessOrEqual}|{LessRegex})\s*(the\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeMoreSeparateRegex = $@"((?({NumberSplitMark}.)+)\s+{EqualRegex}(\s+या\s+){MoreRegexNoNumberSucceed})|((?({NumberSplitMark}.)+)\s+{MoreRegex}(\s+या\s+){EqualRegexNoNumberSucceed})"; + public static readonly string OneNumberRangeLessRegex0 = $@"({LessOrEqualNew}|{LessRegex})\s*(?({NumberSplitMark}.)+)(\s+(या)?\s+{EqualRegex})|({LessOrEqualNew}|{LessRegex})\s*(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeLessRegex1 = $@"(?({NumberSplitMark}.)+)\s*साल\s*({LessRegex}|{LessOrEqualNew})|(?{NumberGroupCondition2})\s*({LessRegex}|{LessOrEqualNew})"; public static readonly string OneNumberRangeLessRegex2 = $@"(?({NumberSplitMark}.)+)\s*{LessOrEqualSuffix}"; - public static readonly string OneNumberRangeLessSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+or\s+){LessRegexNoNumberSucceed})|({LessRegex}\s+(?({NumberSplitMark}.)+)(\s+or\s+){EqualRegexNoNumberSucceed})"; - public static readonly string OneNumberRangeEqualRegex = $@"{EqualRegex}\s*(the\s+)?(?({NumberSplitMark}.)+)"; - public static readonly string TwoNumberRangeRegex1 = $@"between\s*(the\s+)?(?({NumberSplitMark}.)+)\s*and\s*(the\s+)?(?({NumberSplitMark}.)+)"; - public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\s*(and|but|,)\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"; - public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\s*(and|but|,)\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})"; - public static readonly string TwoNumberRangeRegex4 = $@"(from\s+)?(?({NumberSplitMark}(?!\bfrom\b).)+)\s*{TillRegex}\s*(the\s+)?(?({NumberSplitMark}.)+)"; - public const string AmbiguousFractionConnectorsRegex = @"(\bin\b)"; + public static readonly string OneNumberRangeLessSeparateRegex = $@"((?({NumberSplitMark}.)+)\s*{EqualRegex}(\s+या\s+){LessRegexNoNumberSucceed})|((?({NumberSplitMark}.)+)\s*{LessRegex}(\s+या\s+){EqualRegexNoNumberSucceed})"; + public static readonly string OneNumberRangeEqualRegex = $@"(?({NumberSplitMark}.)+)\s*साल\s*{EqualRegex}|(?{NumberGroupCondition2})\s*{EqualRegex}"; + public static readonly string TwoNumberRangeRegex1 = $@"(?({NumberSplitMark}.)+)\s*और\s*(?({NumberSplitMark}.)+)\s*के\s*बीच"; + public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreSeparateRegex}|{OneNumberRangeMoreRegex0}|{OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\s*(है\s*)?(और|पर|,)\s*({OneNumberRangeLessSeparateRegex}|{OneNumberRangeLessRegex0}|{OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"; + public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeLessSeparateRegex}|{OneNumberRangeLessRegex0}|{OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\s*(है\s*)?(और|पर|,)\s*({OneNumberRangeMoreSeparateRegex}|{OneNumberRangeMoreRegex0}|{OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})"; + public static readonly string TwoNumberRangeRegex4 = $@"(?({NumberSplitMark}(?!\b\s+से\b).)+)\s*{TillRegex}\s*(हरेक|लेकर)?\s*(?({NumberSplitMark}.)+)"; + public const string AmbiguousFractionConnectorsRegex = @"(\bमें(?![\u0900-\u097f]))"; public const char DecimalSeparatorChar = '.'; - public const string FractionMarkerToken = @"over"; + public const string FractionMarkerToken = @"भाग"; public const char NonDecimalSeparatorChar = ','; - public const string HalfADozenText = @"six"; - public const string WordSeparatorToken = @"and"; - public static readonly string[] WrittenDecimalSeparatorTexts = { @"point" }; - public static readonly string[] WrittenGroupSeparatorTexts = { @"punto" }; - public static readonly string[] WrittenIntegerSeparatorTexts = { @"and" }; - public static readonly string[] WrittenFractionSeparatorTexts = { @"and" }; - public const string HalfADozenRegex = @"half\s+a\s+dozen"; - public static readonly string DigitalNumberRegex = $@"((?<=\b)(hundred|thousand|[mb]illion|trillion|dozen(s)?)(?=\b))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; + public const string HalfADozenText = @"छह"; + public const string WordSeparatorToken = @"और"; + public static readonly string[] WrittenDecimalSeparatorTexts = { @"पॉइंट", @"दशमलव" }; + public static readonly string[] WrittenGroupSeparatorTexts = { @"कोमा" }; + public static readonly string[] WrittenIntegerSeparatorTexts = { @"और" }; + public static readonly string[] WrittenFractionSeparatorTexts = { @"और" }; + public const string HalfADozenRegex = @"(आधा\s+)दर्जन"; + public static readonly string DigitalNumberRegex = $@"((?<=\b)(सौ|हजार|हज़ार|लाख|करोड़|अरब|खरब|दर्जन|हंड्रेड|थाउजेंड|मिलियन|बिलियन|ट्रिलियन|hundred|thousand|million|billion|trillion))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; public static readonly Dictionary CardinalNumberMap = new Dictionary { - { @"a", 1 }, - { @"zero", 0 }, - { @"an", 1 }, + { @"शून्य", 0 }, + { @"एक", 1 }, + { @"दो", 2 }, + { @"तीन", 3 }, + { @"फ़ोर", 4 }, + { @"चार", 4 }, + { @"पाँच", 5 }, + { @"पांच", 5 }, + { @"छह", 6 }, + { @"सात", 7 }, + { @"आठ", 8 }, + { @"नौ", 9 }, + { @"दस", 10 }, + { @"ग्यारह", 11 }, + { @"बारह", 12 }, + { @"दर्जन", 12 }, + { @"दर्जनों", 12 }, + { @"डज़न", 12 }, + { @"तेरह", 13 }, + { @"चौदह", 14 }, + { @"पंद्रह", 15 }, + { @"सोलह", 16 }, + { @"सत्रह", 17 }, + { @"अठारह", 18 }, + { @"उन्नीस", 19 }, + { @"बीस", 20 }, + { @"इक्कीस", 21 }, + { @"बाईस", 22 }, + { @"बाइस", 22 }, + { @"तेईस", 23 }, + { @"तेइस", 23 }, + { @"चौबीस", 24 }, + { @"पच्चीस", 25 }, + { @"छब्बीस", 26 }, + { @"सत्ताईस", 27 }, + { @"सत्ताइस", 27 }, + { @"अट्ठाईस", 28 }, + { @"अट्ठाइस", 28 }, + { @"उनतीस", 29 }, + { @"तीस", 30 }, + { @"इकतीस", 31 }, + { @"इकत्तीस", 31 }, + { @"बत्तीस", 32 }, + { @"तैंतीस", 33 }, + { @"चौंतीस", 34 }, + { @"पैंतीस", 35 }, + { @"पैंतीसवां", 35 }, + { @"छ्त्तीस", 36 }, + { @"सैंतीस", 37 }, + { @"अड़तीस", 38 }, + { @"उनतालीस", 39 }, + { @"चालीस", 40 }, + { @"इकतालीस", 41 }, + { @"बयालीस", 42 }, + { @"तैंतालीस", 43 }, + { @"चौंतालीस", 44 }, + { @"पैंतालीस", 45 }, + { @"छियालीस", 46 }, + { @"सैंतालीस", 47 }, + { @"अड़तालीस", 48 }, + { @"उनचास", 49 }, + { @"पचास", 50 }, + { @"इक्याबन", 51 }, + { @"बावन", 52 }, + { @"तिरेपन", 53 }, + { @"चौबन", 54 }, + { @"पचपन", 55 }, + { @"छप्पन", 56 }, + { @"सत्तावन", 57 }, + { @"अट्ठावन", 58 }, + { @"उनसठ", 59 }, + { @"साठ", 60 }, + { @"एकसठ", 61 }, + { @"इकसठ", 61 }, + { @"बासठ", 62 }, + { @"तिरसठ", 63 }, + { @"चौंसठ", 64 }, + { @"पैंसठ", 65 }, + { @"छियासठ", 66 }, + { @"सड़सठ", 67 }, + { @"सड़सठ", 67 }, + { @"अड़सठ", 68 }, + { @"उनहत्तर", 69 }, + { @"सत्तर", 70 }, + { @"इकहत्तर", 71 }, + { @"बहत्तर", 72 }, + { @"तिहत्तर", 73 }, + { @"चौहत्तर", 74 }, + { @"पचहत्तर", 75 }, + { @"छिहत्तर", 76 }, + { @"सतहत्तर", 77 }, + { @"अठहत्तर", 78 }, + { @"अठत्तर", 78 }, + { @"उनासी", 79 }, + { @"अस्सी", 80 }, + { @"इक्यासी", 81 }, + { @"बयासी", 82 }, + { @"तिरासी", 83 }, + { @"चौरासी", 84 }, + { @"पचासी", 85 }, + { @"छियासी", 86 }, + { @"सतासी", 87 }, + { @"अठासी", 88 }, + { @"नवासी", 89 }, + { @"नब्बे", 90 }, + { @"इक्यानबे", 91 }, + { @"बानवे", 92 }, + { @"तिरानवे", 93 }, + { @"चौरानवे", 94 }, + { @"पचानवे", 95 }, + { @"पंचानबे", 95 }, + { @"छियानवे", 96 }, + { @"सत्तानवे", 97 }, + { @"अट्ठानवे", 98 }, + { @"निन्यानवे", 99 }, + { @"सौ", 100 }, + { @"हजार", 1000 }, + { @"हज़ार", 1000 }, + { @"लाख", 100000 }, + { @"करोड़", 10000000 }, + { @"अरब", 1000000000 }, + { @"खरब", 100000000000 }, + { @"हंड्रेड", 100 }, + { @"थाउजेंड", 1000 }, + { @"मिलियन", 1000000 }, + { @"बिलियन", 1000000000 }, + { @"ट्रिलियन", 1000000000000 }, + { @"hundred", 100 }, + { @"thousand", 1000 }, + { @"million", 1000000 }, + { @"billion", 1000000000 }, + { @"trillion", 1000000000000 }, { @"one", 1 }, { @"two", 2 }, { @"three", 3 }, @@ -127,8 +318,6 @@ public static class NumbersDefinitions { @"ten", 10 }, { @"eleven", 11 }, { @"twelve", 12 }, - { @"dozen", 12 }, - { @"dozens", 12 }, { @"thirteen", 13 }, { @"fourteen", 14 }, { @"fifteen", 15 }, @@ -144,36 +333,98 @@ public static class NumbersDefinitions { @"seventy", 70 }, { @"eighty", 80 }, { @"ninety", 90 }, - { @"hundred", 100 }, - { @"thousand", 1000 }, - { @"million", 1000000 }, - { @"billion", 1000000000 }, - { @"trillion", 1000000000000 } + { @"वन", 1 }, + { @"टू", 2 }, + { @"थ्री", 3 }, + { @"फोर", 4 }, + { @"फ़ाइव", 5 }, + { @"सिक्स", 6 }, + { @"सेवन", 7 }, + { @"एइट", 8 }, + { @"नैन", 9 }, + { @"टेन", 10 }, + { @"इलेवन", 11 }, + { @"ट्वेल्व", 12 }, + { @"थर्टीन", 13 }, + { @"फ़ोर्टीन", 14 }, + { @"फ़िफ़्टीन", 15 }, + { @"सिक्सटीन", 16 }, + { @"सेवेनटीन", 17 }, + { @"एइटीन", 18 }, + { @"नईनटीन", 19 }, + { @"ट्वेन्टी", 20 }, + { @"ट्वेंटी", 20 }, + { @"थर्टी", 30 }, + { @"फ़ोर्टी", 40 }, + { @"फ़िफ़्टी", 50 }, + { @"सिक्सटी", 60 }, + { @"सेवेंटी", 70 }, + { @"एइट्टी", 80 }, + { @"नैनटी", 90 } }; public static readonly Dictionary OrdinalNumberMap = new Dictionary { + { @"प्रथम", 1 }, + { @"पहला", 1 }, + { @"पहले", 1 }, + { @"पहली", 1 }, + { @"दूसरा", 2 }, + { @"दूसरे", 2 }, + { @"दूसरी", 2 }, + { @"आधा", 2 }, + { @"आधे", 2 }, + { @"तीसरा", 3 }, + { @"तीसरी", 3 }, + { @"तिहाई", 3 }, + { @"तीसरे", 3 }, + { @"चौथा", 4 }, + { @"चौथी", 4 }, + { @"चौथाई", 4 }, + { @"छठवा", 6 }, + { @"साँतवा", 7 }, + { @"आँठवा", 8 }, + { @"इकत्तीसवां", 31 }, + { @"फ़र्स्ट", 1 }, + { @"सेकेंड", 2 }, + { @"थर्ड", 3 }, + { @"फोर्थ", 4 }, + { @"फिफ्थ", 5 }, + { @"पाँचवाँ", 5 }, + { @"पांचवां", 5 }, + { @"सिक्स्थ", 6 }, + { @"सेवंथ", 7 }, + { @"एइथ", 8 }, + { @"नाइन्थ", 9 }, + { @"इलेवेन्थ", 11 }, + { @"ट्वेल्फ्थ", 12 }, + { @"थरटीन्थ", 13 }, + { @"फोर्टीन्थ", 14 }, + { @"फिफ्टीन्थ", 15 }, + { @"सिक्सटीन्थ", 16 }, + { @"सेवेंटीन्थ", 17 }, + { @"एइटीन्थ", 18 }, + { @"नाइटीन्थ", 19 }, + { @"टेन्थ", 10 }, + { @"ट्वेन्टीएथ", 20 }, + { @"ट्वेंटीएथ", 20 }, + { @"थरटिएथ", 30 }, + { @"फ़ोर्टीएथ", 40 }, + { @"फ़िफ़्टीएथ", 50 }, + { @"सिक्सटिएथ", 60 }, + { @"सेवेंटिएथ", 70 }, + { @"एइटिएथ", 80 }, + { @"नैनटिएथ", 90 }, + { @"हंड्रेडथ", 100 }, + { @"थाउजेंडथ", 1000 }, + { @"मिलियनथ", 1000000 }, + { @"बिलियनथ", 1000000000 }, + { @"ट्रिलियनथ", 1000000000000 }, { @"first", 1 }, { @"second", 2 }, - { @"secondary", 2 }, - { @"half", 2 }, { @"third", 3 }, - { @"fourth", 4 }, - { @"quarter", 4 }, { @"fifth", 5 }, - { @"sixth", 6 }, - { @"seventh", 7 }, - { @"eighth", 8 }, { @"ninth", 9 }, - { @"tenth", 10 }, - { @"eleventh", 11 }, { @"twelfth", 12 }, - { @"thirteenth", 13 }, - { @"fourteenth", 14 }, - { @"fifteenth", 15 }, - { @"sixteenth", 16 }, - { @"seventeenth", 17 }, - { @"eighteenth", 18 }, - { @"nineteenth", 19 }, { @"twentieth", 20 }, { @"thirtieth", 30 }, { @"fortieth", 40 }, @@ -181,70 +432,54 @@ public static class NumbersDefinitions { @"sixtieth", 60 }, { @"seventieth", 70 }, { @"eightieth", 80 }, - { @"ninetieth", 90 }, - { @"hundredth", 100 }, - { @"thousandth", 1000 }, - { @"millionth", 1000000 }, - { @"billionth", 1000000000 }, - { @"trillionth", 1000000000000 }, - { @"firsts", 1 }, - { @"halves", 2 }, - { @"thirds", 3 }, - { @"fourths", 4 }, - { @"quarters", 4 }, - { @"fifths", 5 }, - { @"sixths", 6 }, - { @"sevenths", 7 }, - { @"eighths", 8 }, - { @"ninths", 9 }, - { @"tenths", 10 }, - { @"elevenths", 11 }, - { @"twelfths", 12 }, - { @"thirteenths", 13 }, - { @"fourteenths", 14 }, - { @"fifteenths", 15 }, - { @"sixteenths", 16 }, - { @"seventeenths", 17 }, - { @"eighteenths", 18 }, - { @"nineteenths", 19 }, - { @"twentieths", 20 }, - { @"thirtieths", 30 }, - { @"fortieths", 40 }, - { @"fiftieths", 50 }, - { @"sixtieths", 60 }, - { @"seventieths", 70 }, - { @"eightieths", 80 }, - { @"ninetieths", 90 }, - { @"hundredths", 100 }, - { @"thousandths", 1000 }, - { @"millionths", 1000000 }, - { @"billionths", 1000000000 }, - { @"trillionths", 1000000000000 } + { @"ninetieth", 90 } }; public static readonly Dictionary RoundNumberMap = new Dictionary { + { @"सौ", 100 }, + { @"सैकड़ा", 100 }, + { @"हजार", 1000 }, + { @"हज़ार", 1000 }, + { @"लाख", 100000 }, + { @"करोड़", 10000000 }, + { @"अरब", 1000000000 }, + { @"खरब", 100000000000 }, + { @"हंड्रेड", 100 }, + { @"थाउजेंड", 1000 }, + { @"मिलियन", 1000000 }, + { @"बिलियन", 1000000000 }, + { @"ट्रिलियन", 1000000000000 }, { @"hundred", 100 }, { @"thousand", 1000 }, { @"million", 1000000 }, { @"billion", 1000000000 }, { @"trillion", 1000000000000 }, - { @"hundredth", 100 }, - { @"thousandth", 1000 }, - { @"millionth", 1000000 }, - { @"billionth", 1000000000 }, - { @"trillionth", 1000000000000 }, - { @"hundredths", 100 }, - { @"thousandths", 1000 }, - { @"millionths", 1000000 }, - { @"billionths", 1000000000 }, - { @"trillionths", 1000000000000 }, - { @"dozen", 12 }, - { @"dozens", 12 }, - { @"k", 1000 }, - { @"m", 1000000 }, - { @"g", 1000000000 }, - { @"b", 1000000000 }, - { @"t", 1000000000000 } + { @"दर्जन", 12 }, + { @"दर्जनों", 12 }, + { @"डज़न", 12 }, + { @"k", 1000 } + }; + public static readonly Dictionary DecimalUnitsMap = new Dictionary + { + { @"डेढ", 1.5 }, + { @"डेढ़", 1.5 }, + { @"डेढ़", 1.5 }, + { @"सवा", 1.25 }, + { @"सावा", 1.25 }, + { @"ढाई", 2.5 } + }; + public static readonly Dictionary ZeroToNineMap = new Dictionary + { + { '०', 0 }, + { '१', 1 }, + { '२', 2 }, + { '३', 3 }, + { '४', 4 }, + { '५', 5 }, + { '६', 6 }, + { '७', 7 }, + { '८', 8 }, + { '९', 9 } }; public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { @@ -252,37 +487,150 @@ public static class NumbersDefinitions }; public static readonly Dictionary RelativeReferenceOffsetMap = new Dictionary { + { @"अंतिम", @"0" }, + { @"आखिरी", @"0" }, + { @"अगला", @"1" }, + { @"अगले", @"1" }, + { @"अगली", @"1" }, + { @"पिछला", @"-1" }, + { @"पिछले", @"-1" }, + { @"पिछली", @"-1" }, + { @"वर्तमान", @"0" }, + { @"पिछला वाला", @"-1" }, + { @"पिछले वाले", @"-1" }, + { @"पिछली वाली", @"-1" }, + { @"अगला वाला", @"1" }, + { @"अगले वाले", @"1" }, + { @"अगली वाली", @"1" }, + { @"अंतिम वाला", @"0" }, + { @"अंतिम वाले", @"0" }, + { @"अंतिम वाली", @"0" }, + { @"आखिरी वाला", @"0" }, + { @"आखिरी वाले", @"0" }, + { @"आखिरी वाली", @"0" }, + { @"अभी वाला", @"0" }, + { @"आखिरी से पहला", @"-1" }, + { @"आखिरी से पहले", @"-1" }, + { @"आखिरी से पहली", @"-1" }, + { @"आखिरी से दूसरा", @"-2" }, + { @"आखिरी से दूसरे", @"-2" }, + { @"आखिरी से दूसरी", @"-2" }, + { @"आखिरी का पहला", @"0" }, + { @"आखिरी के पहले", @"0" }, + { @"आखिरी की पहली", @"0" }, + { @"आखिरी का दूसरा", @"-1" }, + { @"आखिरी के दूसरे", @"-1" }, + { @"आखिरी की दूसरी", @"-1" }, + { @"अंतिम से दूसरा", @"-1" }, + { @"अंतिम से दूसरे", @"-1" }, + { @"अंतिम से दूसरी", @"-1" }, + { @"अंतिम का पहला", @"0" }, + { @"अंतिम के पहले", @"0" }, + { @"अंतिम की पहली", @"0" }, + { @"अंतिम का दूसरा", @"-1" }, + { @"अंतिम के दूसरे", @"-1" }, + { @"अंतिम की दूसरी", @"-1" }, + { @"आखिरी का", @"0" }, + { @"आखिरी के", @"0" }, + { @"आखिरी की", @"0" }, + { @"अंतिम का", @"0" }, + { @"अंतिम के", @"0" }, + { @"अंतिम की", @"0" }, + { @"आखिरी से पहले का", @"-1" }, + { @"अंतिम से पहले का", @"-1" }, + { @"आखिरी के बगल वाला", @"-1" }, + { @"अंतिम के बगल वाला", @"-1" }, + { @"आखिरी वाले का पिछला वाला", @"-1" }, + { @"अंतिम वाले का पिछला वाला", @"-1" }, + { @"आखिरी वाले का पिछला", @"-1" }, + { @"अंतिम वाले का पिछला", @"-1" }, + { @"आखिरी वाले से पहले वाला", @"-1" }, + { @"अंतिम वाले से पहले वाला", @"-1" }, + { @"अगली चीज", @"1" }, + { @"पिछला पर केवल एक", @"-1" }, + { @"लास्ट", @"0" }, + { @"प्रीवियस", @"-1" }, + { @"नेक्स्ट", @"1" }, + { @"करेंट", @"0" }, { @"last", @"0" }, - { @"next one", @"1" }, - { @"current", @"0" }, - { @"current one", @"0" }, - { @"previous one", @"-1" }, - { @"the second to last", @"-1" }, - { @"the one before the last one", @"-1" }, - { @"the one before the last", @"-1" }, - { @"next to last", @"-1" }, - { @"penultimate", @"-1" }, - { @"the last but one", @"-1" }, - { @"antepenultimate", @"-2" }, + { @"previous", @"-1" }, { @"next", @"1" }, - { @"previous", @"-1" } + { @"current", @"0" } }; public static readonly Dictionary RelativeReferenceRelativeToMap = new Dictionary { + { @"अंतिम", @"end" }, + { @"आखिरी", @"end" }, + { @"अगला", @"current" }, + { @"अगली", @"current" }, + { @"अगले", @"current" }, + { @"पिछला", @"current" }, + { @"पिछले", @"current" }, + { @"पिछली", @"current" }, + { @"वर्तमान", @"current" }, + { @"पिछला वाला", @"current" }, + { @"पिछले वाले", @"current" }, + { @"पिछली वाली", @"current" }, + { @"अगला वाला", @"current" }, + { @"अगले वाले", @"current" }, + { @"अगली वाली", @"current" }, + { @"अभी वाला", @"current" }, + { @"अंतिम वाला", @"end" }, + { @"अंतिम वाले", @"end" }, + { @"अंतिम वाली", @"end" }, + { @"आखिरी वाला", @"end" }, + { @"आखिरी वाले", @"end" }, + { @"आखिरी वाली", @"end" }, + { @"आखिरी से पहला", @"end" }, + { @"आखिरी से पहले", @"end" }, + { @"आखिरी से पहली", @"end" }, + { @"आखिरी से दूसरा", @"end" }, + { @"आखिरी से दूसरे", @"end" }, + { @"आखिरी से दूसरी", @"end" }, + { @"आखिरी का पहला", @"end" }, + { @"आखिरी के पहले", @"end" }, + { @"आखिरी की पहली", @"end" }, + { @"आखिरी का दूसरा", @"end" }, + { @"आखिरी के दूसरे", @"end" }, + { @"आखिरी की दूसरी", @"end" }, + { @"अंतिम से पहला", @"end" }, + { @"अंतिम से पहले", @"end" }, + { @"अंतिम से पहली", @"end" }, + { @"अंतिम से दूसरा", @"end" }, + { @"अंतिम से दूसरे", @"end" }, + { @"अंतिम से दूसरी", @"end" }, + { @"अंतिम का पहला", @"end" }, + { @"अंतिम के पहले", @"end" }, + { @"अंतिम की पहली", @"end" }, + { @"अंतिम का दूसरा", @"end" }, + { @"अंतिम के दूसरे", @"end" }, + { @"अंतिम की दूसरी", @"end" }, + { @"आखिरी का", @"end" }, + { @"आखिरी के", @"end" }, + { @"आखिरी की", @"end" }, + { @"अंतिम का", @"end" }, + { @"अंतिम के", @"end" }, + { @"अंतिम की", @"end" }, + { @"आखिरी से पहले का", @"end" }, + { @"अंतिम से पहले का", @"end" }, + { @"आखिरी के बगल वाला", @"end" }, + { @"अंतिम के बगल वाला", @"end" }, + { @"आखिरी वाले का पिछला वाला", @"end" }, + { @"अंतिम वाले का पिछला वाला", @"end" }, + { @"आखिरी वाले का पिछला", @"end" }, + { @"अंतिम वाले का पिछला", @"end" }, + { @"आखिरी वाले से पहले वाला", @"end" }, + { @"अंतिम वाले से पहले वाला", @"end" }, + { @"अगली चीज", @"current" }, + { @"पिछला पर केवल एक", @"end" }, + { @"लास्ट", @"end" }, + { @"प्रीवियस", @"current" }, + { @"नेक्स्ट", @"current" }, + { @"करेंट", @"current" }, { @"last", @"end" }, - { @"next one", @"current" }, - { @"previous one", @"current" }, - { @"current", @"current" }, - { @"current one", @"current" }, - { @"the second to last", @"end" }, - { @"the one before the last one", @"end" }, - { @"the one before the last", @"end" }, - { @"next to last", @"end" }, - { @"penultimate", @"end" }, - { @"the last but one", @"end" }, - { @"antepenultimate", @"end" }, + { @"previous", @"current" }, { @"next", @"current" }, - { @"previous", @"current" } + { @"current", @"current" } }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/NumbersWithUnitDefinitions.cs index 69378ff842..2076cc5f69 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/NumbersWithUnitDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/NumbersWithUnitDefinitions.cs @@ -23,276 +23,279 @@ public static class NumbersWithUnitDefinitions { public static readonly Dictionary AgeSuffixList = new Dictionary { - { @"Year", @"years old|year old|year-old|years-old|-year-old|-years-old|years of age|year of age" }, - { @"Month", @"months old|month old|month-old|months-old|-month-old|-months-old|month of age|months of age" }, - { @"Week", @"weeks old|week old|week-old|weeks-old|-week-old|-weeks-old|week of age|weeks of age" }, - { @"Day", @"days old|day old|day-old|days-old|-day-old|-days-old|day of age|days of age" } + { @"Year", @"years|year|year का|years का|यर्स के|यर्स का|यर्स की|यर्स के|यर का|यर की|यर के|यर|साल|साल पुराना|साल पुरानी|साल पुराने|साल की|साल का|साल के|वर्ष पुराणी|वर्ष पुराने|वर्ष पुराना|वर्ष का|वर्ष के|वर्ष की थी|साल पुराना था|साल पुरानी थी|साल पुराने थे|साल की थी|साल का था|साल के थे|वर्ष पुराना था|वर्ष का था|वर्ष के थे|साल की आयु|साल का आयु|साल के आयु|की उम्र|के उम्र|साल से ज़्यादा पुरानी|साल की उमर|" }, + { @"Month", @"months|month|महीने पुराना|महीने की|महीने का|महीने के|महीने की उम्र|महीने|महीने पहले की" }, + { @"Week", @"week|weeks|weeks का|weeks की|week के|वीक्स का|वीक्स|सप्ताह पुराना|सप्ताह की|सप्ताह का|सप्ताह के|हफ़्ता पुराना|हफ्ते की|हफ्ते का|हफ्ते के|हफ़्ते की उम्र|हफ़्ता पुरानी|हफ़्ते पुरानी" }, + { @"Day", @"day|days|दिन पुराना|दिन की|दिन का|दिन के|दिन पुरानी" } }; public static readonly Dictionary AreaSuffixList = new Dictionary { - { @"Square kilometer", @"sq km|sq kilometer|sq kilometre|sq kilometers|sq kilometres|square kilometer|square kilometre|square kilometers|square kilometres|km2|km^2|km²" }, - { @"Square hectometer", @"sq hm|sq hectometer|sq hectometre|sq hectometers|sq hectometres|square hectometer|square hectometre|square hectometers|square hectometres|hm2|hm^2|hm²|hectare|hectares" }, - { @"Square decameter", @"sq dam|sq decameter|sq decametre|sq decameters|sq decametres|square decameter|square decametre|square decameters|square decametres|sq dekameter|sq dekametre|sq dekameters|sq dekametres|square dekameter|square dekametre|square dekameters|square dekametres|dam2|dam^2|dam²" }, - { @"Square meter", @"sq m|sq meter|sq metre|sq meters|sq metres|sq metre|square meter|square meters|square metre|square metres|m2|m^2|m²" }, - { @"Square decimeter", @"sq dm|sq decimeter|sq decimetre|sq decimeters|sq decimetres|square decimeter|square decimetre|square decimeters|square decimetres|dm2|dm^2|dm²" }, - { @"Square centimeter", @"sq cm|sq centimeter|sq centimetre|sq centimeters|sq centimetres|square centimeter|square centimetre|square centimeters|square centimetres|cm2|cm^2|cm²" }, - { @"Square millimeter", @"sq mm|sq millimeter|sq millimetre|sq millimeters|sq millimetres|square millimeter|square millimetre|square millimeters|square millimetres|mm2|mm^2|mm²" }, - { @"Square inch", @"sq in|sq inch|square inch|square inches|in2|in^2|in²" }, - { @"Square foot", @"sqft|sq ft|sq foot|sq feet|square foot|square feet|feet2|feet^2|feet²|ft2|ft^2|ft²" }, - { @"Square mile", @"sq mi|sq mile|sqmiles|square mile|square miles|mi2|mi^2|mi²" }, - { @"Square yard", @"sq yd|sq yard|sq yards|square yard|square yards|yd2|yd^2|yd²" }, - { @"Acre", @"-acre|acre|acres" } + { @"Square kilometer", @"sq km|sq kilometer|sq kilometre|sq kilometers|sq kilometres|square kilometer|square kilometre|square kilometers|square kilometres|km2|km^2|km²|वर्ग किमी|वर्ग किलोमीटर|वर्ग कि.मी.|वर्ग कि. मी.|वर्ग किलो मीटर|वर्ग कीलोमीटर|वर्ग कीलो मीटर|कीमी2|कीमी²" }, + { @"Square hectometer", @"sq hm|sq hectometer|sq hectometre|sq hectometers|sq hectometres|square hectometer|square hectometre|square hectometers|square hectometres|hm2|hm^2|hm²|hectare|hectares|वर्ग एचएम|वर्ग हेक्टोमीटर|वर्ग हेक्टोमिटर|वर्ग हेक्टो मीटर|वर्ग हेक्टो मिटर|एचएम2|हेक्टेयर" }, + { @"Square decameter", @"sq dam|sq decameter|sq decametre|sq decameters|sq decametres|square decameter|square decametre|square decameters|square decametres|sq dekameter|sq dekametre|sq dekameters|sq dekametres|square dekameter|square dekametre|square dekameters|square dekametres|dam2|dam^2|dam²|वर्ग डेकामीटर|वर्ग डेका मीटर|वर्ग डेका मिटर" }, + { @"Square meter", @"sq m|sq meter|sq metre|sq meters|sq metres|sq metre|square meter|square meters|square metre|square metres|m2|m^2|m²|वर्ग मीटर|मीटर वर्ग|स्क्वेयर मीटर|मी²|मी.²|मी2" }, + { @"Square decimeter", @"sq dm|sq decimeter|sq decimetre|sq decimeters|sq decimetres|square decimeter|square decimetre|square decimeters|square decimetres|dm2|dm^2|dm²|वर्ग डेसीमीटर|वर्ग डेसी मीटर" }, + { @"Square centimeter", @"sq cm|sq centimeter|sq centimetre|sq centimeters|sq centimetres|square centimeter|square centimetre|square centimeters|square centimetres|cm2|cm^2|cm²|वर्ग सेंटीमीटर|वर्ग सेमी|वर्ग से.मी.|वर्ग सेंटीमीटर|वर्ग सेन्टीमीटर|वर्ग सेण्टीमीटर|वर्ग सेंटी मीटर|वर्ग सेन्टी मीटर|वर्ग से. मी.|सेमी²|से.मी.²" }, + { @"Square millimeter", @"sq mm|sq millimeter|sq millimetre|sq millimeters|sq millimetres|square millimeter|square millimetre|square millimeters|square millimetres|mm2|mm^2|mm²|वर्ग मिमी|वर्ग मि.मी.|वर्ग मिलीमीटर|वर्ग मिलिमीटर|वर्ग मीलीमीटर|वर्ग मिली मीटर|वर्ग मीली मीटर|वर्ग मि. मी.|मिमी²" }, + { @"Square inch", @"sq in|sq inch|square inch|square inches|in2|in^2|in²|वर्ग इंच|इंच²" }, + { @"Square foot", @"sqft|sq ft|sq foot|sq feet|square foot|square feet|feet2|feet^2|feet²|ft2|ft^2|ft²|वर्ग फुट|वर्ग फ़ीट|वर्ग फीट|वर्ग फ़ुट|फ़ुट²|फुट²|स्क्वेयर फ़ुट|स्क्वेयर फुट" }, + { @"Square mile", @"sq mi|sq mile|sqmiles|square mile|square miles|mi2|mi^2|mi²|वर्ग मील|वर्ग माइल|वर्ग माइल्स|वर्ग मील²|स्क्वेयर मील|स्क्वेयर माइल|स्क्वेयर माइल्स" }, + { @"Square yard", @"sq yd|sq yard|sq yards|square yard|square yards|yd2|yd^2|yd²|वर्ग गज|स्क्वेयर यार्ड|स्क्वेयर गज|गज²|यार्ड²" }, + { @"Acre", @"-acre|acre|acres|-एकड़" } }; public static readonly Dictionary CurrencySuffixList = new Dictionary { - { @"Abkhazian apsar", @"abkhazian apsar|apsars" }, - { @"Afghan afghani", @"afghan afghani|؋|afn|afghanis|afghani" }, - { @"Pul", @"pul" }, - { @"Euro", @"euros|euro|€|eur" }, - { @"Cent", @"cents|cent|-cents|-cent" }, - { @"Albanian lek", @"albanian lek|leks|lek" }, - { @"Qindarkë", @"qindarkë|qindarkës|qindarke|qindarkes" }, - { @"Angolan kwanza", @"angolan kwanza|kz|aoa|kwanza|kwanzas|angolan kwanzas" }, - { @"Armenian dram", @"armenian drams|armenian dram" }, - { @"Aruban florin", @"aruban florin|ƒ|awg|aruban florins" }, - { @"Bangladeshi taka", @"bangladeshi taka|৳|bdt|taka|takas|bangladeshi takas" }, - { @"Paisa", @"poisha|paisa" }, - { @"Bhutanese ngultrum", @"bhutanese ngultrum|nu.|btn" }, - { @"Chetrum", @"chetrums|chetrum" }, - { @"Bolivian boliviano", @"bolivian boliviano|bob|bs.|bolivia boliviano|bolivia bolivianos|bolivian bolivianos" }, - { @"Bosnia and Herzegovina convertible mark", @"bosnia and herzegovina convertible mark|bam" }, - { @"Fening", @"fenings|fenings" }, - { @"Botswana pula", @"botswana pula|bwp|pula|pulas|botswana pulas" }, - { @"Thebe", @"thebe" }, - { @"Brazilian real", @"brazilian real|r$|brl|brazil real|brazil reals|brazilian reals" }, - { @"Bulgarian lev", @"bulgarian lev|bgn|лв|bulgaria lev|bulgaria levs|bulgarian levs" }, - { @"Stotinka", @"stotinki|stotinka" }, - { @"Cambodian riel", @"cambodian riel|khr|៛|cambodia riel|cambodia riels|cambodian riels" }, - { @"Cape Verdean escudo", @"cape verdean escudo|cve" }, - { @"Costa Rican colón", @"costa rican colón|costa rican colóns|crc|₡|costa rica colón|costa rica colóns|costa rican colon|costa rican colons|costa rica colon|costa rica colons" }, - { @"Salvadoran colón", @"svc|salvadoran colón|salvadoran colóns|salvador colón|salvador colóns|salvadoran colon|salvadoran colons|salvador colon|salvador colons" }, - { @"Céntimo", @"céntimo" }, - { @"Croatian kuna", @"croatian kuna|kn|hrk|croatia kuna|croatian kunas|croatian kuna kunas" }, - { @"Lipa", @"lipa" }, - { @"Czech koruna", @"czech koruna|czk|kč|czech korunas" }, - { @"Haléř", @"haléř" }, - { @"Eritrean nakfa", @"eritrean nakfa|nfk|ern|eritrean nakfas" }, - { @"Ethiopian birr", @"ethiopian birr|etb" }, + { @"Abkhazian apsar", @"abkhazian apsar|apsars|अब्खाज़ियन् अप्सर|अप्सरस" }, + { @"Afghan afghani", @"afghan afghani|afn|afghanis|afghani|अफगान अफघानी|؋|अफन|अफघानीस|अफघानी" }, + { @"Pul", @"pul|पुल" }, + { @"Euro", @"euros|euro|€|eur|यूरोज़|यूरो|यूर" }, + { @"Cent", @"cents|cent|-cents|-cent|सेंट्स|सेंट|-सेंट्स|-सेंट" }, + { @"Albanian lek", @"albanian lek|leks|lek|अल्बानियन लेक|लेक्स|लेक" }, + { @"Qindarkë", @"qindarkë|qindarkës|qindarke|qindarkes|किन्दारकइ|किन्दारकेस" }, + { @"Angolan kwanza", @"angolan kwanza|kz|kwanza|kwanzas|angolan kwanzas|अंगोलन क्वांज़ा|क्ज़|aoa|क्वांज़ा|क्वान्जास|अंगोलन क्वान्जास" }, + { @"Armenian dram", @"armenian drams|armenian dram|आर्मेनियन ड्रामस|आर्मेनियन ड्राम" }, + { @"Aruban florin", @"aruban florin|awg|aruban florins|अरुबान फ़्लोरिन|ƒ|awg|अरुबान फ़्लोरिंस" }, + { @"Bangladeshi taka", @"bangladeshi taka|৳|bdt|taka|takas|bangladeshi takas|बंगलादेशी टाका|৳|bdt|टाका|टाकास|बंगलादेशी टाकास" }, + { @"Paisa", @"poisha|paisa|पोइशा|पैसा" }, + { @"Bhutanese ngultrum", @"bhutanese ngultrum|btn|भूटानी गलट्रुम|nu.|भटन" }, + { @"Chetrum", @"chetrums|chetrum|चैतरूम्स|चैतरूम" }, + { @"Bolivian boliviano", @"bolivian boliviano|bolivia boliviano|bolivia bolivianos|bolivian bolivianos|बोलिवियन बोलिवियानो|bob|bs.|बोलिविया बोलिवियानो|बोलिविया बोलीवियानोस|बोलिवियन बोलीवियानोस" }, + { @"Bosnia and Herzegovina convertible mark", @"bosnia and herzegovina convertible mark|बॉस्निया और हर्ज़ेगोविना कन्वर्टिबल मार्क|bam" }, + { @"Fening", @"fenings|fenings|फेनिंग|फेनिंग्स" }, + { @"Botswana pula", @"botswana pula|pula|pulas|botswana pulas|बोट्सवाना पुला|bwp|पुला|पुलास|बोट्सवाना पुलास" }, + { @"Thebe", @"thebe|थेबे" }, + { @"Brazilian real", @"brazilian real|brazil real|brazil reals|brazilian reals|ब्राजिलियन रियाल|r$|brl|ब्राज़ील रियाल|ब्राज़ील रियाल्स|ब्राजिलियन रियाल्स" }, + { @"Bulgarian lev", @"bulgarian lev|bulgaria lev|bulgaria levs|bulgarian levs|बुल्गारियन लेव|bgn|лв|बुल्गारिया लेव|बुल्गारिया लेव्स|बुल्गारियन लेव्स" }, + { @"Stotinka", @"stotinki|stotinka|स्टोटिन्की|स्टोटिन्का" }, + { @"Cambodian riel", @"cambodian riel|cambodia riel|cambodia riels|cambodian riels|कम्बोडियन रील|khr|៛|कंबोडिया रील|कंबोडिया रील्स|कम्बोडियन रील्स" }, + { @"Cape Verdean escudo", @"cape verdean escudo|केप वरदीन एस्कुडो|cve" }, + { @"Costa Rican colón", @"costa rican colón|costa rican colóns|costa rica colón|costa rica colóns|costa rican colon|costa rican colons|costa rica colon|costa rica colons|कोस्टा रिकान कोलोन|कोस्टा रिकान कोलोन्स|crc|₡|कोस्टा रिका कोलोन|कोस्टा रिका कोलोन्स" }, + { @"Salvadoran colón", @"svc|salvadoran colón|salvadoran colóns|salvador colón|salvador colóns|salvadoran colon|salvadoran colons|salvador colon|salvador colons|सल्वाडोरान कोलोन|सल्वाडोरान कोलोन्स|सल्वाडोर कोलोन|सल्वाडोर कोलोन्स" }, + { @"Céntimo", @"céntimo|सेंतिमो" }, + { @"Croatian kuna", @"croatian kuna|croatia kuna|croatian kunas|croatian kuna kunas|क्रोएशियन कुना|kn|hrk|क्रोएशिया कुना|क्रोएशियन कुनास|क्रोएशियन कुना कुनास" }, + { @"Lipa", @"lipa|लीपा" }, + { @"Czech koruna", @"czech koruna|czech korunas|चेक कोरुना|czk|kč|चेक कोरुनास" }, + { @"Haléř", @"haléř|हेल्लर" }, + { @"Eritrean nakfa", @"eritrean nakfa|eritrean nakfas|एरिट्रान नाफ्का|nfk|ern|एरिट्रान नाफ्कास" }, + { @"Ethiopian birr", @"ethiopian birr|इथोपियन बीर|etb" }, { @"Gambian dalasi", @"gmd" }, - { @"Butut", @"bututs|butut" }, - { @"Georgian lari", @"georgian lari|lari|gel|₾" }, - { @"Tetri", @"tetri" }, - { @"Ghanaian cedi", @"ghanaian cedi|ghs|₵|gh₵" }, - { @"Pesewa", @"pesewas|pesewa" }, - { @"Guatemalan quetzal", @"guatemalan quetzal|gtq|guatemala quetzal" }, - { @"Haitian gourde", @"haitian gourde|htg" }, - { @"Honduran lempira", @"honduran lempira|hnl" }, - { @"Hungarian forint", @"hungarian forint|huf|ft|hungary forint|hungary forints|hungarian forints" }, - { @"Fillér", @"fillér" }, - { @"Iranian rial", @"iranian rial|irr|iran rial|iran rials|iranian rials" }, - { @"Yemeni rial", @"yemeni rial|yer|yemeni rials" }, - { @"Israeli new shekel", @"₪|ils|agora" }, - { @"Lithuanian litas", @"ltl|lithuanian litas|lithuan litas|lithuanian lit|lithuan lit" }, - { @"Japanese yen", @"japanese yen|jpy|yen|-yen|¥|yens|japanese yens|japan yen|japan yens" }, - { @"Kazakhstani tenge", @"kazakhstani tenge|kazakh tenge|kazak tenge|kzt" }, - { @"Kenyan shilling", @"kenyan shilling|kes" }, - { @"North Korean won", @"north korean won|kpw|north korean wons" }, - { @"South Korean won", @"south korean won|krw|south korean wons" }, - { @"Korean won", @"korean won|₩|korean wons" }, - { @"Kyrgyzstani som", @"kyrgyzstani som|kgs" }, - { @"Uzbekitan som", @"uzbekitan som|uzs" }, - { @"Lao kip", @"lao kip|lak|₭n|₭" }, - { @"Att", @"att" }, - { @"Lesotho loti", @"lesotho loti|lsl|loti" }, - { @"Sente", @"sente|lisente" }, - { @"South African rand", @"south african rand|zar|south africa rand|south africa rands|south african rands" }, - { @"Macanese pataca", @"macanese pataca|mop$|mop" }, - { @"Avo", @"avos|avo" }, - { @"Macedonian denar", @"macedonian denar|mkd|ден" }, - { @"Deni", @"deni" }, - { @"Malagasy ariary", @"malagasy ariary|mga" }, - { @"Iraimbilanja", @"iraimbilanja" }, - { @"Malawian kwacha", @"malawian kwacha|mk|mwk" }, - { @"Tambala", @"tambala" }, - { @"Malaysian ringgit", @"malaysian ringgit|rm|myr|malaysia ringgit|malaysia ringgits|malaysian ringgits" }, - { @"Mauritanian ouguiya", @"mauritanian ouguiya|um|mro|mauritania ouguiya|mauritania ouguiyas|mauritanian ouguiyas" }, - { @"Khoums", @"khoums" }, + { @"Butut", @"bututs|butut|बुटुट्स|बुटुट" }, + { @"Georgian lari", @"georgian lari|lari|जॉर्जियन लारी|लारी|gel|₾" }, + { @"Tetri", @"tetri|तेतरी" }, + { @"Ghanaian cedi", @"ghanaian cedi|घानायन सीडी|ghs|₵|gh₵" }, + { @"Pesewa", @"pesewas|pesewa|पसेवास|पसेवा" }, + { @"Guatemalan quetzal", @"guatemalan quetzal|gtq|guatemala quetzal|ग्वाटेमेलियन क्वेटज़ल|gtq|ग्वाटेमाला क्वेटज़ल" }, + { @"Haitian gourde", @"haitian gourde|htg|हेतिअन गॉर्ड|htg" }, + { @"Honduran lempira", @"honduran lempira|होंडुरं लेम्पिरा|hnl" }, + { @"Hungarian forint", @"hungarian forint|hungary forint|hungary forints|hungarian forints|हंगेरियन फ़ोरिंट|huf|ft|हंगरी फ़ोरिंट|हंगरी फोरिंट्स|हंगेरियन फोरिंट्स" }, + { @"Fillér", @"fillér|फिलर" }, + { @"Iranian rial", @"iranian rial|iran rial|iran rials|iranian rials|ईरानियन रियाल|irr|ईरान रियाल|ईरान रियाल्स|ईरानियन रियाल्स" }, + { @"Yemeni rial", @"yemeni rial|yemeni rials|येमेनी रियाल|yer|येमेनी रियाल्स" }, + { @"Israeli new shekel", @"agora|₪|ils|अगोरा" }, + { @"Lithuanian litas", @"lithuanian litas|lithuan litas|lithuanian lit|lithuan lit|ltl|लिथुअनिअन लिटास|लिथुयन लिटास|लिथुअनिअन लिट्|लिथुयन लिट्" }, + { @"Japanese yen", @"japanese yen|yen|-yen|¥|yens|japanese yens|japan yen|japan yens|येन|जापानी येन|jpy|-येन|¥|येन्स|जापानी येन्स|जापान येन|जापान येन्स|" }, + { @"Kazakhstani tenge", @"kazakhstani tenge|kazakh tenge|kazak tenge|कज़ाखस्तानी तेंगे|कजाख तेंगे|कज़ाक तेंगे|kzt" }, + { @"Kenyan shilling", @"kenyan shilling|केन्यन शिलिंग|kes" }, + { @"North Korean won", @"north korean won|north korean wons|उत्तर कोरिया वोन|kpw|उत्तर कोरिया वोनस" }, + { @"South Korean won", @"south korean won|south korean wons|दक्षिण कोरिया वोन|krw|दक्षिण कोरिया वोनस" }, + { @"Korean won", @"korean won|korean wons|कोरियाई वोन|₩|कोरियाई वोनस" }, + { @"Kyrgyzstani som", @"kyrgyzstani som|किर्ग़िज़स्तानी सोम|kgs" }, + { @"Uzbekitan som", @"uzbekitan som|उज़्बेकिस्तान सोम|uzs" }, + { @"Lao kip", @"lao kip|lak|लाओ किप|लाक|₭n|₭" }, + { @"Att", @"att|अट्ट" }, + { @"Lesotho loti", @"lesotho loti|लेसोथो लोटी|lsl|loti|लोटी" }, + { @"Sente", @"sente|lisente|सेन्टे|लिसेन्टे" }, + { @"South African rand", @"south african rand|zar|south africa rand|south africa rands|south african rands|सोउत् अफ़्रिcअन् रन्ड्|ज़र्|सोउत् अफ़्रिcअ रन्ड्|सोउत् अफ़्रिcअन् रन्ड्स्" }, + { @"Macanese pataca", @"macanese pataca|mop$|mop|मैकाणी पाटाका|मोप्" }, + { @"Avo", @"avos|avo|अवोस्|अवो" }, + { @"Macedonian denar", @"macedonian denar|mkd|ден|मcएडोनिअन् डेनर्" }, + { @"Deni", @"deni|डेनि" }, + { @"Malagasy ariary", @"malagasy ariary|mga|मैलागासी अरीरी" }, + { @"Iraimbilanja", @"iraimbilanja|इरैम्बिलन्ज" }, + { @"Malawian kwacha", @"malawian kwacha|mk|mwk|मलावी क्वाचा|" }, + { @"Tambala", @"tambala|टम्बल|टम्बला" }, + { @"Malaysian ringgit", @"malaysian ringgit|rm|myr|malaysia ringgit|malaysia ringgits|malaysian ringgits|मलेशियाई रिंग्गित|मलेशिया रिंगित|मलेशिया रिंगित|मलेशिया रिंगितस|मलेशियाई रिंगिट|" }, + { @"Mauritanian ouguiya", @"mauritanian ouguiya|um|mro|mauritania ouguiya|mauritania ouguiyas|mauritanian ouguiyas|मॉरिटानियन औगुइया|मॉरिटानिया औगुइया|मॉरिटानिया औगुइयास|मॉरिटानियन औगुइया|मॉरिटानियन औगुइयास" }, + { @"Khoums", @"khoums|खोमस|खुम्स" }, { @"Mongolian tögrög", @"mongolian tögrög|mnt|₮|mongolia tögrög|mongolia tögrögs|mongolian tögrögs|mongolian togrog|mongolian togrogs|mongolia togrog|mongolia togrogs" }, - { @"Mozambican metical", @"mozambican metical|mt|mzn|mozambica metical|mozambica meticals|mozambican meticals" }, - { @"Burmese kyat", @"burmese kyat|ks|mmk" }, - { @"Pya", @"pya" }, - { @"Nicaraguan córdoba", @"nicaraguan córdoba|nio" }, - { @"Nigerian naira", @"nigerian naira|naira|ngn|₦|nigeria naira|nigeria nairas|nigerian nairas" }, - { @"Kobo", @"kobo" }, - { @"Turkish lira", @"turkish lira|try|tl|turkey lira|turkey liras|turkish liras" }, - { @"Kuruş", @"kuruş" }, - { @"Omani rial", @"omani rial|omr|ر.ع." }, - { @"Panamanian balboa", @"panamanian balboa|b/.|pab" }, - { @"Centesimo", @"centesimo" }, - { @"Papua New Guinean kina", @"papua new guinean kina|kina|pgk" }, - { @"Toea", @"toea" }, - { @"Paraguayan guaraní", @"paraguayan guaraní|₲|pyg" }, - { @"Peruvian sol", @"peruvian sol|soles|sol|peruvian nuevo sol" }, - { @"Polish złoty", @"złoty|polish złoty|zł|pln|zloty|polish zloty|poland zloty|poland złoty" }, - { @"Grosz", @"groszy|grosz|grosze" }, - { @"Qatari riyal", @"qatari riyal|qar|qatari riyals|qatar riyal|qatar riyals" }, - { @"Saudi riyal", @"saudi riyal|sar|saudi riyals" }, - { @"Riyal", @"riyal|riyals|rial|﷼" }, - { @"Dirham", @"dirham|dirhem|dirhm" }, - { @"Halala", @"halalas|halala" }, - { @"Samoan tālā", @"samoan tālā|tālā|tala|ws$|samoa|wst|samoan tala" }, - { @"Sene", @"sene" }, - { @"São Tomé and Príncipe dobra", @"são tomé and príncipe dobra|dobras|dobra|std" }, - { @"Sierra Leonean leone", @"sierra leonean leone|sll|leone|le" }, - { @"Peseta", @"pesetas|peseta" }, - { @"Netherlands guilder", @"florin|netherlands antillean guilder|ang|nederlandse gulden|guilders|guilder|gulden|-guilders|-guilder|dutch guilders|dutch guilder|fl" }, - { @"Swazi lilangeni", @"swazi lilangeni|lilangeni|szl|emalangeni" }, - { @"Tajikistani somoni", @"tajikistani somoni|tjs|somoni" }, - { @"Diram", @"dirams|diram" }, - { @"Thai baht", @"thai baht|฿|thb|baht" }, - { @"Satang", @"satang|satangs" }, - { @"Tongan paʻanga", @"tongan paʻanga|paʻanga|tongan pa'anga|pa'anga" }, - { @"Seniti", @"seniti" }, - { @"Ukrainian hryvnia", @"ukrainian hryvnia|hyrvnia|uah|₴|ukrain hryvnia|ukrain hryvnias|ukrainian hryvnias" }, - { @"Vanuatu vatu", @"vanuatu vatu|vatu|vuv" }, - { @"Venezuelan bolívar", @"venezuelan bolívar|venezuelan bolívars|bs.f.|vef|bolívar fuerte|venezuelan bolivar|venezuelan bolivars|venezuela bolivar|venezuela bolivarsvenezuelan bolivar|venezuelan bolivars" }, - { @"Vietnamese dong", @"vietnamese dong|vnd|đồng|vietnam dong|vietnamese dongs|vietnam dongs" }, - { @"Zambian kwacha", @"zambian kwacha|zk|zmw|zambia kwacha|kwachas|zambian kwachas" }, - { @"Moroccan dirham", @"moroccan dirham|mad|د.م." }, - { @"United Arab Emirates dirham", @"united arab emirates dirham|د.إ|aed" }, - { @"Azerbaijani manat", @"azerbaijani manat|azn" }, + { @"Mozambican metical", @"mozambican metical|mt|mzn|mozambica metical|mozambica meticals|mozambican meticals|मोज़ाम्बिक मेटिकल|मोज़ाम्बिका मेटिकल|मोज़ाम्बिका मेटिकलस|मोज़ाम्बिकान मेटिकल्स" }, + { @"Burmese kyat", @"burmese kyat|ks|mmk|बर्मी कायत" }, + { @"Pya", @"pya|प्या|पया" }, + { @"Nicaraguan córdoba", @"nicaraguan córdoba|nio|निकारागुअन कोर्डोबा|निकारागुआ कोर्डोबा" }, + { @"Nigerian naira", @"nigerian naira|naira|ngn|₦|nigeria naira|nigeria nairas|nigerian nairas|नाइजीरियाई नायरा|नाइजीरियाई नाइरा|नाइरा|नायरा|निगरिया नायरा|निगरिया नाइरा|नाइजीरियाई नाइरास|नाइजीरियाई नायरास" }, + { @"Kobo", @"kobo|कोबो" }, + { @"Turkish lira", @"turkish lira|try|tl|turkey lira|turkey liras|turkish liras|तुर्की लीरा|तुर्की लीरा|तुर्की लीरा|तुर्की लीरास" }, + { @"Kuruş", @"kuruş|कुरूस" }, + { @"Omani rial", @"omani rial|omr|ر.ع.|ओमानी रियाल|ओमान रियाल" }, + { @"Panamanian balboa", @"panamanian balboa|b/.|pab|पनामानियन बाल्बोआ|पनामा बाल्बोआ" }, + { @"Centesimo", @"centesimo|सएन्टेसिमो" }, + { @"Papua New Guinean kina", @"papua new guinean kina|kina|pgk|पापुआ न्यू गिनी किना|पापुआ न्यू गिनीयन|किना" }, + { @"Toea", @"toea|टू ईए|टोएया" }, + { @"Paraguayan guaraní", @"paraguayan guaraní|₲|pyg|पराग्वे ग्वारानी|परागुआयन गुआरानी" }, + { @"Peruvian sol", @"peruvian sol|soles|sol|peruvian nuevo sol|पेरुवियन सोल|सोलस|सोल|पेरुवियन न्यूवो|सोल" }, + { @"Polish złoty", @"złoty|polish złoty|zł|pln|zloty|polish zloty|poland zloty|poland złoty|पोलिश ज़्लॉटी|ज़्लॉटी|" }, + { @"Grosz", @"groszy|grosz|grosze|ग्रोज़ी|ग्रौस्ज़|ग्रौस्ज़े|" }, + { @"Qatari riyal", @"qatari riyal|qar|qatari riyals|qatar riyal|qatar riyals|क़तारी रियाल|क़तारी रियालस|क़तार रियाल|क़तार रियालस" }, + { @"Saudi riyal", @"saudi riyal|sar|saudi riyals|साउदी रियाल|साउदी रियालस" }, + { @"Riyal", @"riyal|riyals|rial|﷼|रियाल|रियालस" }, + { @"Dirham", @"dirham|dirhem|dirhm|दिरहम|डेर्हैम" }, + { @"Halala", @"halalas|halala|हलल|हललस्" }, + { @"Samoan tālā", @"samoan tālā|tālā|tala|ws$|samoa|wst|samoan tala|सामोन ताल|समोआ ताल|ताल" }, + { @"Sene", @"sene|सेने" }, + { @"São Tomé and Príncipe dobra", @"são tomé and príncipe dobra|dobras|dobra|std|साओ टोमे और प्रिंसिपे डोबरा|डोबरा|डोबरास" }, + { @"Sierra Leonean leone", @"sierra leonean leone|sll|leone|le|सिएरा लियोन लियोन|लियोन|सिएरा लियोनियाई लियोन" }, + { @"Peseta", @"pesetas|peseta|पेसेटा|पेसेटास|पसेटा" }, + { @"Netherlands guilder", @"florin|netherlands antillean guilder|ang|nederlandse gulden|guilders|guilder|gulden|-guilders|-guilder|dutch guilders|dutch guilder|fl|फ्लोरिन|नीदरलैंड एंटीलियन गिल्डर|आंग|नेदरलॅंड्स गुल्डन|गिल्डर|गिल्डरस|डच गिल्डर" }, + { @"Swazi lilangeni", @"swazi lilangeni|lilangeni|szl|emalangeni|स्वाज़ी लिलंगेंनी|लिलंगेंनी|एमलैंगेनी" }, + { @"Tajikistani somoni", @"tajikistani somoni|tjs|somoni|ताजिकिस्तानी सोमोनी|सोमोनी" }, + { @"Diram", @"dirams|diram|दिराम|दिरामस" }, + { @"Thai baht", @"thai baht|฿|thb|baht|ईलैण्ड की मुद्रा|थाई बात|बात" }, + { @"Satang", @"satang|satangs|सातंग|सातंगस" }, + { @"Tongan paʻanga", @"tongan paʻanga|paʻanga|tongan pa'anga|pa'anga|टोंगन पांगा|पांगा|" }, + { @"Seniti", @"seniti|सेनिटी" }, + { @"Ukrainian hryvnia", @"ukrainian hryvnia|hyrvnia|uah|₴|ukrain hryvnia|ukrain hryvnias|ukrainian hryvnias|साझेदार रिव्निया|रिव्निया|यूक्रेन के रिव्निया|यूक्रेन रिव्निया|यूक्रेन रिव्नियास|यूक्रेन के रिव्नियास" }, + { @"Vanuatu vatu", @"vanuatu vatu|vatu|vuv|वानुअतु वातु|वातु||वूव" }, + { @"Venezuelan bolívar", @"venezuelan bolívar|venezuelan bolívars|bs.f.|vef|bolívar fuerte|venezuelan bolivar|venezuelan bolivars|venezuela bolivar|venezuela bolivarsvenezuelan bolivar|venezuelan bolivars|वेनेज़ुएलन बोलिवर|वेनेज़ुएलन|बोलिवरस|वेफ|बोलिवर फुर्ते|वेनेजुएला बोलिवर्स" }, + { @"Vietnamese dong", @"vietnamese dong|vnd|đồng|vietnam dong|vietnamese dongs|vietnam dongs|वियतनामी डोंग|डोंग|वियतनामी डोंग|वियतनामी डोंगस|वियतनामी डोंग" }, + { @"Zambian kwacha", @"zambian kwacha|zk|zmw|zambia kwacha|kwachas|zambian kwachas|जाम्बिया केवाचा|क्वाचा|जाम्बिया क्वाचा|जाम्बिया केवाचास|जाम्बिया क्वाचास" }, + { @"Moroccan dirham", @"moroccan dirham|mad|د.م.|मोरोचन दिरहम|मोरक्कन दिरहम" }, + { @"United Arab Emirates dirham", @"united arab emirates dirham|د.إ|aed|संयुक्त अरब अमीरात दिरहम|" }, + { @"Azerbaijani manat", @"azerbaijani manat|azn|अज़रभैजानि मनात" }, { @"Turkmenistan manat", @"turkmenistan manat|turkmenistan new manat|tmt" }, - { @"Manat", @"manats|manat" }, + { @"Manat", @"manats|manat|तुर्कमेनिस्तान मैनाट|तुर्कमेनिस्तान नई मनत|टीएमटी मनत" }, { @"Qəpik", @"qəpik" }, - { @"Somali shilling", @"somali shillings|somali shilling|shilin soomaali|-shilin soomaali|scellino|shilin|sh.so.|sos" }, - { @"Somaliland shilling", @"somaliland shillings|somaliland shilling|soomaaliland shilin" }, - { @"Tanzanian shilling", @"tanzanian shilling|tanzanian shillings|tsh|tzs|tanzania shilling|tanzania shillings" }, - { @"Ugandan shilling", @"ugandan shilling|ugandan shillings|ugx|uganda shilling|uganda shillings" }, - { @"Romanian leu", @"romanian leu|lei|ron|romania leu" }, - { @"Moldovan leu", @"moldovan leu|mdl|moldova leu" }, - { @"Leu", @"leu" }, - { @"Ban", @"bani|-ban|ban" }, - { @"Nepalese rupee", @"nepalese rupees|nepalese rupee|npr" }, - { @"Pakistani rupee", @"pakistani rupees|pakistani rupee|pkr" }, - { @"Indian rupee", @"indian rupees|indian rupee|inr|₹|india rupees|india rupee" }, - { @"Seychellois rupee", @"seychellois rupees|seychellois rupee|scr|sr|sre" }, - { @"Mauritian rupee", @"mauritian rupees|mauritian rupee|mur" }, - { @"Maldivian rufiyaa", @"maldivian rufiyaas|maldivian rufiyaa|mvr|.ރ|maldive rufiyaas|maldive rufiyaa" }, - { @"Sri Lankan rupee", @"sri lankan rupees|sri lankan rupee|lkr|රු|ரூ" }, - { @"Indonesian rupiah", @"indonesian rupiah|rupiah|perak|rp|idr" }, - { @"Rupee", @"rupee|rupees|rs" }, - { @"Danish krone", @"danish krone|dkk|denmark krone|denmark krones|danish krones" }, - { @"Norwegian krone", @"norwegian krone|nok|norway krone|norway krones|norwegian krones" }, - { @"Faroese króna", @"faroese króna|faroese krona" }, - { @"Icelandic króna", @"icelandic króna|isk|icelandic krona|iceland króna|iceland krona" }, - { @"Swedish krona", @"swedish krona|sek|swedan krona" }, - { @"Krone", @"kronor|krona|króna|krone|krones|kr|-kr" }, - { @"Øre", @"Øre|oyra|eyrir" }, - { @"West African CFA franc", @"west african cfa franc|xof|west africa cfa franc|west africa franc|west african franc" }, - { @"Central African CFA franc", @"central african cfa franc|xaf|central africa cfa franc|central african franc|central africa franc" }, - { @"Comorian franc", @"comorian franc|kmf" }, - { @"Congolese franc", @"congolese franc|cdf" }, - { @"Burundian franc", @"burundian franc|bif" }, - { @"Djiboutian franc", @"djiboutian franc|djf" }, - { @"CFP franc", @"cfp franc|xpf" }, - { @"Guinean franc", @"guinean franc|gnf" }, - { @"Swiss franc", @"swiss francs|swiss franc|chf|sfr." }, - { @"Rwandan franc", @"Rwandan franc|rwf|rf|r₣|frw" }, - { @"Belgian franc", @"belgian franc|bi.|b.fr.|bef|belgium franc" }, - { @"Rappen", @"rappen|-rappen" }, - { @"Franc", @"francs|franc|fr.|fs" }, - { @"Centime", @"centimes|centime|santim" }, - { @"Russian ruble", @"russian ruble|₽|rub|russia ruble|russia ₽|russian ₽|russian rubles|russia rubles" }, - { @"New Belarusian ruble", @"new belarusian ruble|byn|new belarus ruble|new belarus rubles|new belarusian rubles" }, - { @"Old Belarusian ruble", @"old belarusian ruble|byr|old belarus ruble|old belarus rubles|old belarusian rubles" }, - { @"Transnistrian ruble", @"transnistrian ruble|prb|р." }, - { @"Belarusian ruble", @"belarusian ruble|belarus ruble|belarus rubles|belarusian rubles" }, - { @"Kopek", @"kopek|kopeks" }, - { @"Kapyeyka", @"kapyeyka" }, - { @"Ruble", @"rubles|ruble|br" }, - { @"Algerian dinar", @"algerian dinar|د.ج|dzd|algerian dinars|algeria dinar|algeria dinars" }, - { @"Bahraini dinar", @"bahraini dinars|bahraini dinar|bhd|.د.ب" }, - { @"Santeem", @"santeem|santeems" }, - { @"Iraqi dinar", @"iraqi dinars|iraqi dinar|iraq dinars|iraq dinar|iqd|ع.د" }, - { @"Jordanian dinar", @"jordanian dinars|jordanian dinar|د.ا|jod|jordan dinar|jordan dinars" }, - { @"Kuwaiti dinar", @"kuwaiti dinars|kuwaiti dinar|kwd|د.ك" }, - { @"Libyan dinar", @"libyan dinars|libyan dinar|libya dinars|libya dinar|lyd" }, - { @"Serbian dinar", @"serbian dinars|serbian dinar|din.|rsd|дин.|serbia dinars|serbia dinar" }, - { @"Tunisian dinar", @"tunisian dinars|tunisian dinar|tnd|tunisia dinars|tunisia dinar" }, - { @"Yugoslav dinar", @"yugoslav dinars|yugoslav dinar|yun" }, - { @"Dinar", @"dinars|dinar|denar|-dinars|-dinar" }, - { @"Fils", @"fils|fulūs|-fils|-fil" }, - { @"Para", @"para|napa" }, - { @"Millime", @"millimes|millime" }, - { @"Argentine peso", @"argentine peso|ars|argetina peso|argetina pesos|argentine pesos" }, - { @"Chilean peso", @"chilean pesos|chilean peso|clp|chile peso|chile peso" }, - { @"Colombian peso", @"colombian pesos|colombian peso|cop|colombia peso|colombia pesos" }, - { @"Cuban convertible peso", @"cuban convertible pesos|cuban convertible peso|cuc|cuba convertible pesos|cuba convertible peso" }, - { @"Cuban peso", @"cuban pesos|cuban peso|cup|cuba pesos|cuba peso" }, - { @"Dominican peso", @"dominican pesos|dominican peso|dop|dominica pesos|dominica peso" }, - { @"Mexican peso", @"mexican pesos|mexican peso|mxn|mexico pesos|mexico peso" }, - { @"Philippine peso", @"piso|philippine pesos|philippine peso|₱|php" }, - { @"Uruguayan peso", @"uruguayan pesos|uruguayan peso|uyu" }, - { @"Peso", @"pesos|peso" }, - { @"Centavo", @"centavos|centavo" }, - { @"Alderney pound", @"alderney pounds|alderney pound|alderney £" }, - { @"British pound", @"british pounds|british pound|british £|gbp|pound sterling|pound sterlings|sterling|pound scot|pound scots" }, - { @"Guernsey pound", @"guernsey pounds|guernsey £|ggp" }, - { @"Ascension pound", @"ascension pounds|ascension pound|ascension £" }, - { @"Saint Helena pound", @"saint helena pounds|saint helena pound|saint helena £|shp" }, - { @"Egyptian pound", @"egyptian pounds|egyptian pound|egyptian £|egp|ج.م|egypt pounds|egypt pound" }, - { @"Falkland Islands pound", @"falkland islands pounds|falkland islands pound|falkland islands £|fkp|falkland island pounds|falkland island pound|falkland island £" }, - { @"Gibraltar pound", @"gibraltar pounds|gibraltar pound|gibraltar £|gip" }, - { @"Manx pound", @"manx pounds|manx pound|manx £|imp" }, - { @"Jersey pound", @"jersey pounds|jersey pound|jersey £|jep" }, - { @"Lebanese pound", @"lebanese pounds|lebanese pound|lebanese £|lebanan pounds|lebanan pound|lebanan £|lbp|ل.ل" }, - { @"South Georgia and the South Sandwich Islands pound", @"south georgia and the south sandwich islands pounds|south georgia and the south sandwich islands pound|south georgia and the south sandwich islands £" }, - { @"South Sudanese pound", @"south sudanese pounds|south sudanese pound|south sudanese £|ssp|south sudan pounds|south sudan pound|south sudan £" }, - { @"Sudanese pound", @"sudanese pounds|sudanese pound|sudanese £|ج.س.|sdg|sudan pounds|sudan pound|sudan £" }, - { @"Syrian pound", @"syrian pounds|syrian pound|syrian £|ل.س|syp|syria pounds|syria pound|syria £" }, - { @"Tristan da Cunha pound", @"tristan da cunha pounds|tristan da cunha pound|tristan da cunha £" }, - { @"Pound", @"pounds|pound|-pounds|-pound|£" }, - { @"Pence", @"pence" }, - { @"Shilling", @"shillings|shilling|shilingi|sh" }, - { @"Penny", @"pennies|penny" }, - { @"United States dollar", @"united states dollars|united states dollar|united states $|u.s. dollars|u.s. dollar|u s dollar|u s dollars|usd|american dollars|american dollar|us$|us dollar|us dollars|u.s dollar|u.s dollars" }, - { @"East Caribbean dollar", @"east caribbean dollars|east caribbean dollar|east Caribbean $|xcd" }, - { @"Australian dollar", @"australian dollars|australian dollar|australian $|australian$|aud|australia dollars|australia dollar|australia $|australia$" }, - { @"Bahamian dollar", @"bahamian dollars|bahamian dollar|bahamian $|bahamian$|bsd|bahamia dollars|bahamia dollar|bahamia $|bahamia$" }, - { @"Barbadian dollar", @"barbadian dollars|barbadian dollar|barbadian $|bbd" }, - { @"Belize dollar", @"belize dollars|belize dollar|belize $|bzd" }, - { @"Bermudian dollar", @"bermudian dollars|bermudian dollar|bermudian $|bmd|bermudia dollars|bermudia dollar|bermudia $" }, - { @"British Virgin Islands dollar", @"british virgin islands dollars|british virgin islands dollar|british virgin islands $|bvi$|virgin islands dollars|virgin islands dolalr|virgin islands $|virgin island dollars|virgin island dollar|virgin island $" }, - { @"Brunei dollar", @"brunei dollar|brunei $|bnd" }, - { @"Sen", @"sen" }, - { @"Singapore dollar", @"singapore dollars|singapore dollar|singapore $|s$|sgd" }, - { @"Canadian dollar", @"canadian dollars|canadian dollar|canadian $|cad|can$|c$|canada dollars|canada dolllar|canada $" }, - { @"Cayman Islands dollar", @"cayman islands dollars|cayman islands dollar|cayman islands $|kyd|ci$|cayman island dollar|cayman island doolars|cayman island $" }, - { @"New Zealand dollar", @"new zealand dollars|new zealand dollar|new zealand $|nz$|nzd|kiwi" }, - { @"Cook Islands dollar", @"cook islands dollars|cook islands dollar|cook islands $|cook island dollars|cook island dollar|cook island $" }, - { @"Fijian dollar", @"fijian dollars|fijian dollar|fijian $|fjd|fiji dollars|fiji dollar|fiji $" }, - { @"Guyanese dollar", @"guyanese dollars|guyanese dollar|gyd|gy$" }, - { @"Hong Kong dollar", @"hong kong dollars|hong kong dollar|hong kong $|hk$|hkd|hk dollars|hk dollar|hk $|hongkong$" }, - { @"Jamaican dollar", @"jamaican dollars|jamaican dollar|jamaican $|j$|jamaica dollars|jamaica dollar|jamaica $|jmd" }, - { @"Kiribati dollar", @"kiribati dollars|kiribati dollar|kiribati $" }, - { @"Liberian dollar", @"liberian dollars|liberian dollar|liberian $|liberia dollars|liberia dollar|liberia $|lrd" }, - { @"Micronesian dollar", @"micronesian dollars|micronesian dollar|micronesian $" }, - { @"Namibian dollar", @"namibian dollars|namibian dollar|namibian $|nad|n$|namibia dollars|namibia dollar|namibia $" }, - { @"Nauruan dollar", @"nauruan dollars|nauruan dollar|nauruan $" }, - { @"Niue dollar", @"niue dollars|niue dollar|niue $" }, - { @"Palauan dollar", @"palauan dollars|palauan dollar|palauan $" }, - { @"Pitcairn Islands dollar", @"pitcairn islands dollars|pitcairn islands dollar|pitcairn islands $|pitcairn island dollars|pitcairn island dollar|pitcairn island $" }, - { @"Solomon Islands dollar", @"solomon islands dollars|solomon islands dollar|solomon islands $|si$|sbd|solomon island dollars|solomon island dollar|solomon island $" }, - { @"Surinamese dollar", @"surinamese dollars|surinamese dollar|surinamese $|srd" }, - { @"New Taiwan dollar", @"new taiwan dollars|new taiwan dollar|nt$|twd|ntd" }, - { @"Trinidad and Tobago dollar", @"trinidad and tobago dollars|trinidad and tobago dollar|trinidad and tobago $|trinidad $|trinidad dollar|trinidad dollars|trinidadian dollar|trinidadian dollars|trinidadian $|ttd" }, - { @"Tuvaluan dollar", @"tuvaluan dollars|tuvaluan dollar|tuvaluan $" }, - { @"Dollar", @"dollars|dollar|$" }, - { @"Chinese yuan", @"yuan|kuai|chinese yuan|renminbi|cny|rmb|¥|元" }, - { @"Fen", @"fen" }, - { @"Jiao", @"jiao|mao" }, - { @"Finnish markka", @"suomen markka|finnish markka|finsk mark|fim|markkaa|markka" }, - { @"Penni", @"penniä|penni" } + { @"Somali shilling", @"somali shillings|somali shilling|shilin soomaali|-shilin soomaali|scellino|shilin|sh.so.|sos|सोमालि शिलिंग|सोमालि शिलिंगस|शिलिन सुमाली|स्केलिनो|शिलिन" }, + { @"Somaliland shilling", @"somaliland shillings|somaliland shilling|soomaaliland shilin|सोमालीलैंड शिलिंग|सोमालीलैंड शिलिंग" }, + { @"Tanzanian shilling", @"tanzanian shilling|tanzanian shillings|tsh|tzs|tanzania shilling|tanzania shillings|तंजानिया शिलिंग|तंजानिया शिलिंगस|तंजानिया शिलिंग्स" }, + { @"Ugandan shilling", @"ugandan shilling|ugandan shillings|ugx|uganda shilling|uganda shillings|युगांडा शिलिंग|युगांडा शिलिंग" }, + { @"Romanian leu", @"romanian leu|lei|romania leu|रोमानियन लेउ|लेइ|ron|रोमानिया लेउ" }, + { @"Moldovan leu", @"moldovan leu|moldova leu|मोल्डोवन लेउ|mdl|मोल्डोवा लेउ" }, + { @"Leu", @"leu|लेउ" }, + { @"Ban", @"bani|-ban|ban|बानी|-बान|बान" }, + { @"Nepalese rupee", @"nepalese rupees|nepalese rupee|नेपाली रुपये|नेपाली रुपया|npr" }, + { @"Pakistani rupee", @"pakistani rupees|pakistani rupee|पाकिस्तानी रुपये|पाकिस्तानी रुपया|pkr" }, + { @"Indian rupee", @"indian rupees|indian rupee|भारतीय रुपये|भारतीय रुपया|inr|₹|india rupees|india rupee" }, + { @"Seychellois rupee", @"seychellois rupees|seychellois rupee|सेशेल्स रुपये|सेशेल्स रुपया|scr|sr|sre" }, + { @"Mauritian rupee", @"mauritian rupees|mauritian rupee|मॉरीशस रुपये|मॉरीशस रुपया|mur" }, + { @"Maldivian rufiyaa", @"maldivian rufiyaas|maldivian rufiyaa|mvr|माल्डिवियन रुफियास|माल्डिवियन रुफ़िया|mvr|.ރ|maldive rufiyaas|maldive rufiyaa" }, + { @"Sri Lankan rupee", @"sri lankan rupees|sri lankan rupee|श्री लंकन रुपये|श्री लंकन रुपया|lkr|රු|ரூ" }, + { @"Indonesian rupiah", @"indonesian rupiah|rupiah|perak|इन्डोनेशियाई रुपयाः|रुपयाः|पेराक|rp|idr" }, + { @"Rupee", @"rupee|rupees|rs|रुपया|रुपये|रु." }, + { @"Danish krone", @"danish krone|डेनिश क्रौन|dkk|डेनमार्क क्रौन|डेनमार्क क्रोन्स|डेनिश क्रोन्स|denmark krone|denmark krones|danish krones" }, + { @"Norwegian krone", @"नार्वेजियन क्रौन|norwegian krone|nok|norway krone|norway krones|norwegian krones|नॉर्वे क्रौन|नॉर्वे क्रोन्स|नार्वेजियन क्रोन्स" }, + { @"Faroese króna", @"फिरोज़ी क्रोना|faroese króna|faroese krona" }, + { @"Icelandic króna", @"आइसलैंडिक क्रोना|icelandic króna|isk|icelandic krona|iceland króna|iceland krona|आइसलैंड क्रोना" }, + { @"Swedish krona", @"swedish krona|sek|swedan krona|स्वीडिश क्रोना|स्वीडन क्रोना" }, + { @"Krone", @"क्रोनर|क्रोना|क्रौन|क्रोन्स|kronor|krona|króna|krone|krones|kr|-kr" }, + { @"Øre", @"Øre|oyra|eyrir|ओएरा|इरीर" }, + { @"West African CFA franc", @"पश्चिम अफ़्रीकी cfa फ्रैंक|west african cfa franc|xof|west africa cfa franc|west africa franc|west african franc|पश्चिम अफ्रीका cfa फ्रैंक|पश्चिम अफ्रीका फ्रैंक|पश्चिम अफ़्रीकी फ्रैंक" }, + { @"Central African CFA franc", @"मध्य अफ़्रीकी cfa फ्रैंक|central african cfa franc|xaf|central africa cfa franc|central african franc|central africa franc|मध्य अफ्रीका cfa फ्रैंक|मध्य अफ़्रीकी फ्रैंक|मध्य अफ्रीका फ्रैंक" }, + { @"Comorian franc", @"कोमोरियाई फ्रैंक|comorian franc|kmf" }, + { @"Congolese franc", @"कॉंगोलीस फ्रैंक|congolese franc|cdf" }, + { @"Burundian franc", @"बुरूंडीएन फ्रैंक|burundian franc|bif" }, + { @"Djiboutian franc", @"जिबूती फ्रैंक|djiboutian franc|djf" }, + { @"CFP franc", @"cfp फ्रैंक|cfp franc|xpf" }, + { @"Guinean franc", @"गिनी फ्रैंक|guinean franc|gnf" }, + { @"Swiss franc", @"स्विस फ्रैंक्स|स्विस फ्रैंक|swiss francs|swiss franc|chf|sfr." }, + { @"Rwandan franc", @"रवांडन फ्रैंक|Rwandan franc|rwf|rf|r₣|frw" }, + { @"Belgian franc", @"बेल्जियन फ्रैंक|belgian franc|bi.|b.fr.|bef|belgium franc|बेल्जियम फ्रैंक" }, + { @"Rappen", @"rappen|-rappen|राप्पेन|-राप्पेन" }, + { @"Franc", @"francs|franc|fr.|fs|फ्रैंक्स|फ्रैंक|फ़्रैंक|फ़्रैंक" }, + { @"Centime", @"centimes|centime|santim|सेंटीम्स|सेंटीम|सांटीम" }, + { @"Russian ruble", @"रुस्सियन रूबल|russian ruble|₽|rub|russia ruble|russia ₽|russian ₽|russian rubles|russia rubles|रूस रूबल|रूस ₽|रुस्सियन ₽|रुस्सियन रूबल्स|रूस रूबल्स" }, + { @"New Belarusian ruble", @"नया बेलारूसियन रूबल|new belarusian ruble|byn|new belarus ruble|new belarus rubles|new belarusian rubles|नया बेलारूस रूबल|नया बेलारूस रूबल्स|नया बेलारूसियन रूबल्स" }, + { @"Old Belarusian ruble", @"ओल्ड बेलारूसियन रूबल|old belarusian ruble|byr|old belarus ruble|old belarus rubles|old belarusian rubles|ओल्ड बेलारूस रूबल|ओल्ड बेलारूस रूबल्स|ओल्ड बेलारूसियन रूबल्स" }, + { @"Transnistrian ruble", @"ट्रांसनिस्ट्रियन रूबल|transnistrian ruble|prb|р." }, + { @"Belarusian ruble", @"belarusian ruble|belarus ruble|belarus rubles|belarusian rubles|बेलारूसियन रूबल|बेलारूस रूबल|बेलारूस रूबल्स|बेलारूसियन रूबल्स" }, + { @"Kopek", @"कोपेक|कोपेक्स|kopek|kopeks" }, + { @"Kapyeyka", @"काप्येका|kapyeyka" }, + { @"Ruble", @"रूबल्स|रूबल|rubles|ruble|br" }, + { @"Algerian dinar", @"एलजीरियन दीनार|algerian dinar|د.ج|dzd|algerian dinars|algeria dinar|algeria dinars|एलजीरियन दिनार्स|अल्जीरिया दीनार|अल्जीरिया दिनार्स" }, + { @"Bahraini dinar", @"बहरैनी दिनार्स|बहरैनी दीनार|bahraini dinars|bahraini dinar|bhd|.د.ب" }, + { @"Santeem", @"santeem|santeems|संतीम|संतीम्स" }, + { @"Iraqi dinar", @"इराकी दीनार|इराकी दीनार|इराक दिनार्स|इराक दीनार|iraqi dinars|iraqi dinar|iraq dinars|iraq dinar|iqd|ع.د" }, + { @"Jordanian dinar", @"जोर्डनियन दिनार्स|जोर्डनियन दीनार|jordanian dinars|jordanian dinar|د.ا|jod|jordan dinar|jordan dinars|जॉर्डन दीनार|जॉर्डन दिनार्स" }, + { @"Kuwaiti dinar", @"कुवैती दिनार्स|कुवैती दीनार|kuwaiti dinars|kuwaiti dinar|kwd|د.ك" }, + { @"Libyan dinar", @"लिबयन दिनार्स|लिबयन दीनार|लीबिया दिनार्स|लीबिया दीनार|libyan dinars|libyan dinar|libya dinars|libya dinar|lyd" }, + { @"Serbian dinar", @"सर्बियन दिनार्स|सर्बियन दीनार|serbian dinars|serbian dinar|din.|rsd|дин.|serbia dinars|serbia dinar|सर्बिया दिनार्स|सर्बिया दीनार" }, + { @"Tunisian dinar", @"टुनिशियन दिनार्स|टुनिशियन दीनार|tunisian dinars|tunisian dinar|tnd|tunisia dinars|tunisia dinar|टुनिशिया दिनार्स|टुनिशिया दीनार" }, + { @"Yugoslav dinar", @"यूगोस्लाव दिनार्स|यूगोस्लाव दीनार|yugoslav dinars|yugoslav dinar|yun" }, + { @"Dinar", @"दिनार्स|दीनार|देनार|-दिनार्स|-दीनार|dinars|dinar|denar|-dinars|-dinar" }, + { @"Fils", @"फिल्स|फुलूस|-फिल्स|-फिल|fils|fulūs|-fils|-fil" }, + { @"Para", @"पारा|नापा|para|napa" }, + { @"Millime", @"मिलिम्स|मिल्लीम|millimes|millime" }, + { @"Argentine peso", @"अर्जेंटाइन पेसो|argentine peso|ars|argetina peso|argetina pesos|argentine pesos|अर्जेंटीना पेसो|अर्जेंटीना पेसोस|अर्जेंटाइन पेसोस" }, + { @"Chilean peso", @"चिलियन पेसोस|चिलियन पेसो|chilean pesos|chilean peso|clp|chile peso|chile peso|चिली पेसो|चिली पेसोस" }, + { @"Colombian peso", @"कोलम्बियाई पेसोस|कोलम्बियाई पेसो|colombian pesos|colombian peso|cop|colombia peso|colombia pesos|कोलंबिया पेसो|कोलंबिया पेसोस" }, + { @"Cuban convertible peso", @"क्युबन कन्वर्टिबल पेसोस|क्युबन कन्वर्टिबल पेसो|cuban convertible pesos|cuban convertible peso|cuc|cuba convertible pesos|cuba convertible peso|क्यूबा कन्वर्टिबल पेसोस|क्यूबा कन्वर्टिबल पेसो" }, + { @"Cuban peso", @"क्युबन पेसोस|क्युबन पेसो|cuban pesos|cuban peso|cup|cuba pesos|cuba peso|क्यूबा पेसोस|क्यूबा पेसो" }, + { @"Dominican peso", @"डोमिनिकन पेसोस|डोमिनिकन पेसो|dominican pesos|dominican peso|dop|dominica pesos|dominica peso|डॉमिनिका पेसोस|डॉमिनिका पेसो" }, + { @"Mexican peso", @"मेक्सिकन पेसोस|मेक्सिकन पेसो|mexican pesos|mexican peso|mxn|mexico pesos|mexico peso|मेक्सिको पेसोस|मेक्सिको पेसो" }, + { @"Philippine peso", @"पीसो|फिलिप्पीन पेसोस|फिलिप्पीन पेसो|piso|philippine pesos|philippine peso|₱|php" }, + { @"Uruguayan peso", @"उरुगायन पेसोस|उरुगायन पेसो|uruguayan pesos|uruguayan peso|uyu" }, + { @"Peso", @"पेसोस|पेसो|pesos|peso" }, + { @"Centavo", @"सेंतावोस|सेंतावो|centavos|centavo" }, + { @"Alderney pound", @"आल्डरने पाउंड्स|आल्डरने पाउंड|आल्डरने £|alderney pounds|alderney pound|alderney £" }, + { @"British pound", @"ब्रिटिश पाउंड्स|ब्रिटिश पाउंड|ब्रिटिश £|british pounds|british pound|british £|gbp|pound sterling|pound sterlings|sterling|pound scot|pound scots|पाउंड स्टरलिंग|पाउंड स्टरलिंग्स|स्टरलिंग|पाउंड स्कोट|पाउंड स्कोट्स" }, + { @"Guernsey pound", @"गर्नजी पाउंड्स|गर्नजी ggp|guernsey pounds|guernsey £|ggp" }, + { @"Ascension pound", @"एसेंशन पाउंड्स|एसेंशन पाउंड|एसेंशन £|ascension pounds|ascension pound|ascension £" }, + { @"Saint Helena pound", @"संत हेलेना पाउंड्स|संत हेलेना पाउंड|संत हेलेना shp|saint helena pounds|saint helena pound|saint helena £|shp" }, + { @"Egyptian pound", @"मिस्त्री पाउंड्स|मिस्त्री पाउंड|मिस्त्री egp|ج.م|मिस्र पाउंड्स|मिस्र पाउंड|egyptian pounds|egyptian pound|egyptian £|egp|ج.م|egypt pounds|egypt pound" }, + { @"Falkland Islands pound", @"फ़ॉकलैंड आइलैंड पाउंड्स|फ़ॉकलैंड आइलैंड पाउंड|फ़ॉकलैंड आइलैंड fkp|फ़ॉकलैंड आइलैंड पाउंड्स|फ़ॉकलैंड आइलैंड पाउंड|फ़ॉकलैंड आइलैंड £|falkland islands pounds|falkland islands pound|falkland islands £|fkp|falkland island pounds|falkland island pound|falkland island £" }, + { @"Gibraltar pound", @"गिब्राल्टर पाउंड्स|गिब्राल्टर पाउंड|गिब्राल्टर gip|gibraltar pounds|gibraltar pound|gibraltar £|gip" }, + { @"Manx pound", @"मैंक्स पाउंड्स|मैंक्स पाउंड|मैंक्स imp|manx pounds|manx pound|manx £|imp" }, + { @"Jersey pound", @"जर्सी पाउंड्स|जर्सी पाउंड|जर्सी jep|jersey pounds|jersey pound|jersey £|jep" }, + { @"Lebanese pound", @"लेबनीज पाउंड्स|लेबनीज पाउंड|लेबनीज लेबनान पाउंड्स|लेबनान पाउंड|लेबनान £| lebanese pounds|lebanese pound|lebanese £|lebanan pounds|lebanan pound|lebanan £|lbp|ل.ل" }, + { @"South Georgia and the South Sandwich Islands pound", @"दक्षिण जॉर्जिया और दक्षिण सैंडविच इसलैंड्स पाउंड्स|दक्षिण जॉर्जिया और दक्षिण सैंडविच इसलैंड्स पाउंड|दक्षिण जॉर्जिया और दक्षिण सैंडविच इसलैंड्स £|south georgia and the south sandwich islands pounds|south georgia and the south sandwich islands pound|south georgia and the south sandwich islands £" }, + { @"South Sudanese pound", @"दक्षिण सूडानी पाउंड्स|दक्षिण सूडानी पाउंड|दक्षिण सूडानी ssp|दक्षिण सूडान पाउंड्स|दक्षिण सूडान पाउंड|दक्षिण सूडान £|south sudanese pounds|south sudanese pound|south sudanese £|ssp|south sudan pounds|south sudan pound|south sudan £" }, + { @"Sudanese pound", @"सूडानी पाउंड्स|सूडानी पाउंड|सूडानी £|sudanese pounds|sudanese pound|sudanese £|ج.س.|sdg|sudan pounds|sudan pound|sudan £|सूडान पाउंड्स|सूडान पाउंड|सूडान £" }, + { @"Syrian pound", @"सीरियन पाउंड्स|सीरियन पाउंड|सीरियन £|syrian pounds|syrian pound|syrian £|ل.س|syp|syria pounds|syria pound|syria £|सीरिया पाउंड्स|सीरिया पाउंड|सीरिया £" }, + { @"Tristan da Cunha pound", @"त्रिस्तान डा कुन्हा पाउंड्स|त्रिस्तान डा कुन्हा पाउंड|त्रिस्तान डा कुन्हा £|tristan da cunha pounds|tristan da cunha pound|tristan da cunha £" }, + { @"Pound", @"पाउंड्स|पाउंड|-पाउंड्स|-पाउंड|£ |£|pounds|pound|-pounds|-pound|£" }, + { @"Pence", @"पेंस|pence" }, + { @"Shilling", @"शिलिंग्स|शिलिंग|शिलिंगी|sh|shillings|shilling|shilingi|sh" }, + { @"Penny", @"पेन्नीस|पेन्नी|पेनी|pennies|penny" }, + { @"United States dollar", @"united states dollars|united states dollar|united states $|u.s. dollars|u.s. dollar|u s dollar|u s dollars|usd|american dollars|american dollar|us$|us dollar|us dollars|u.s dollar|u.s dollars|अमेरिकी डॉलर|यू. एस. $|अमरीकी डॉलर|अमरीकी डॉलरस|संयुक्त राज्य अमेरिका डॉलर|संयुक्त राज्य अमेरिका डॉलरस|अमरीकी $|यू एस डॉलर|यू.एस. डॉलर|यू एस डॉलरस|यू.एस. डॉलरस|अमेरिकी$" }, + { @"East Caribbean dollar", @"east caribbean dollars|east caribbean dollar|east Caribbean $|xcd|पूर्वी कैरिबियन डॉलर|पूर्वी कैरिबियन डॉलरस|पूर्वी कैरिबियन $" }, + { @"Australian dollar", @"australian dollars|australian dollar|australian $|australian$|aud|australia dollars|australia dollar|australia $|australia$|ऑस्ट्रेलियाई डॉलर|ऑस्ट्रेलियाई डॉलरस|ऑस्ट्रेलियाई $|ऑस्ट्रेलियाई$|ऑस्ट्रेलिया डॉलर|ऑस्ट्रेलिया डॉलरस|ऑस्ट्रेलिया$|ऑस्ट्रेलिया $|" }, + { @"Bahamian dollar", @"bahamian dollars|bahamian dollar|bahamian $|bahamian$|bsd|bahamia dollars|bahamia dollar|bahamia $|bahamia$|बाहमियन डॉलरस|बाहमियन डॉलर|बाहमियन$|बाहमियन $|बाहमिअ $|बाहमिअ$|" }, + { @"Barbadian dollar", @"barbadian dollars|barbadian dollar|barbadian $|bbd|बारबेडियन डॉलरस|बारबेडियन डॉलर|बारबेडियन $|बारबेडियन$" }, + { @"Belize dollar", @"belize dollars|belize dollar|belize $|bzd|बेलीज़ डॉलरस|बेलीज़ डॉलर|बेलीज़ $|बेलीज़ डॉलर|बेलीज़$|बेलीज़ $|बेलीज़$" }, + { @"Bermudian dollar", @"bermudian dollars|bermudian dollar|bermudian $|bmd|bermudia dollars|bermudia dollar|bermudia $|बेरमुडियन डॉलर|बेरमुडियन डॉलरस|बेरमुडियन$|बेरमुडियन $|बरमूडा डॉलर|बरमूडा डॉलरस|बरमूडा$|बरमूडा $" }, + { @"British Virgin Islands dollar", @"british virgin islands dollars|british virgin islands dollar|british virgin islands $|bvi$|virgin islands dollars|virgin islands dollar|virgin islands $|virgin island dollars|virgin island dollar|virgin island $|ब्रिटिश वर्जिन आइलैंड्स डॉलर|ब्रिटिश वर्जिन आइलैंड्स डॉलरस|ब्रिटिश वर्जिन आइलैंड्स $ |वर्जिन आइलैंड्स $|वर्जिन आइलैंड्स डॉलरस|वर्जिन आइलैंड्स डॉलर" }, + { @"Brunei dollar", @"brunei dollar|brunei $|bnd|ब्रुनेई डॉलर|ब्रुनेई डॉलरस|ब्रुनेई$|ब्रुनेई $" }, + { @"Sen", @"sen|सेन" }, + { @"Singapore dollar", @"singapore dollars|singapore dollar|singapore $|s$|sgd|सिंगापूर डॉलर|सिंगापूर डॉलरस|सिंगापूर$|सिंगापूर $" }, + { @"Canadian dollar", @"canadian dollars|canadian dollar|canadian $|cad|can$|c$|canada dollars|canada dollar|canada $|कैनेडियन डॉलर|कैनेडियन डॉलरस|कैनेडियन $|कैनेडियन$|कैन$|कनाडा डॉलर|कनाडा डॉलरस|कनाडा $|कनाडा$" }, + { @"Cayman Islands dollar", @"cayman islands dollars|cayman islands dollar|cayman islands $|kyd|ci$|cayman island dollar|cayman island doolars|cayman island $|केमैन द्वीप डॉलर|केमैन द्वीप डॉलरस|केमैन द्वीप $|केमैन द्वीप$|केमैन द्वीप डॉलरस|केमैन द्वीप डॉलर|" }, + { @"New Zealand dollar", @"new zealand dollars|new zealand dollar|new zealand $|nz$|nzd|kiwi|न्यू जीलैंड डॉलर|न्यू जीलैंड डॉलरस|न्यू जीलैंड $|न्यू जीलैंड$|कीवी" }, + { @"Cook Islands dollar", @"cook islands dollars|cook islands dollar|cook islands $|cook island dollars|cook island dollar|cook island $|कुक आइलैंड्स डॉलर|कुक आइलैंड्स डॉलरस|कुक आइलैंड्स $|कुक आइलैंड डॉलर|कुक आइलैंड डॉलरस|कुक आइलैंड $" }, + { @"Fijian dollar", @"fijian dollars|fijian dollar|fijian $|fjd|fiji dollars|fiji dollar|fiji $|फिजियन डॉलरस|फिजियन डॉलर|फिजियन $|फिजि डॉलरस|फिजि डॉलर|फिजि $" }, + { @"Guyanese dollar", @"guyanese dollars|guyanese dollar|gyd|gy$|गुयाना डॉलर|गुयाना डॉलरस|गुयाना $" }, + { @"Hong Kong dollar", @"hong kong dollars|hong kong dollar|hong kong $|hk$|hkd|hk dollars|hk dollar|hk $|hongkong$|होन्ग कोंग डॉलर|होन्ग कोंग डॉलरस|होन्ग कोंग $|होन्गकोंग डॉलरस" }, + { @"Jamaican dollar", @"jamaican dollars|jamaican dollar|jamaican $|j$|jamaica dollars|jamaica dollar|jamaica $|jmd|जमैका डॉलर|जमैका डॉलरस|जमैकन डॉलरस|जमैकन डॉलर|जमैका $|जमैकन $" }, + { @"Kiribati dollar", @"kiribati dollars|kiribati dollar|kiribati $|किरिबाती डॉलरस|किरिबाती डॉलरस|किरिबाती $" }, + { @"Liberian dollar", @"liberian dollars|liberian dollar|liberian $|liberia dollars|liberia dollar|liberia $|lrd|लाइबेरिया डॉलरस|लाइबेरिया डॉलर|लाइबेरिया $" }, + { @"Micronesian dollar", @"micronesian dollars|micronesian dollar|micronesian $|मैक्रोनेशियन डॉलर|मैक्रोनेशियन डॉलरस|मैक्रोनेशियन $" }, + { @"Namibian dollar", @"namibian dollars|namibian dollar|namibian $|nad|n$|namibia dollars|namibia dollar|namibia $|नामीबियाई डॉलर|नामीबियाई डॉलरस|नामीबियाई $|" }, + { @"Nauruan dollar", @"nauruan dollars|nauruan dollar|nauruan $|नाउरू डॉलरस|नाउरू डॉलर|नाउरू $" }, + { @"Niue dollar", @"niue dollars|niue dollar|niue $|नियू डॉलरस|नियू डॉलर|नियू $" }, + { @"Palauan dollar", @"palauan dollars|palauan dollar|palauan $|पलाउअन डॉलर|पलाउअन $|पलाउअन डॉलरस|पलाउअन $" }, + { @"Pitcairn Islands dollar", @"pitcairn islands dollars|pitcairn islands dollar|pitcairn islands $|pitcairn island dollars|pitcairn island dollar|pitcairn island $|पिटकेर्न डॉलरस|पिटकेर्न डॉलर|पिटकेर्न $" }, + { @"Solomon Islands dollar", @"solomon islands dollars|solomon islands dollar|solomon islands $|si$|sbd|solomon island dollars|solomon island dollar|solomon island $|सोलोमन द्वीप डॉलर|सोलोमन द्वीप डॉलरस|सोलोमन द्वीप $" }, + { @"Surinamese dollar", @"surinamese dollars|surinamese dollar|surinamese $|srd|सूरीनाम डॉलर|सूरीनाम डॉलरस|सूरीनाम $|सूरीनामी डॉलरस|सूरीनामी डॉलरस|सूरीनामी $" }, + { @"New Taiwan dollar", @"new taiwan dollars|new taiwan dollar|nt$|twd|ntd|नया ताईवान डॉलरस|नया ताईवान डॉलर|नया ताईवान $" }, + { @"Trinidad and Tobago dollar", @"trinidad and tobago dollars|trinidad and tobago dollar|trinidad and tobago $|trinidad $|trinidad dollar|trinidad dollars|trinidadian dollar|trinidadian dollars|trinidadian $|ttd|ट्रिनिडाड और टोबैगो डॉलर|ट्रिनिडाड और टोबैगो डॉलरस|ट्रिनिडाड और टोबैगो $|ट्रिनिडाड डॉलर|ट्रिनिडाड डॉलरस|ट्रिनिडाड $" }, + { @"Tuvaluan dollar", @"tuvaluan dollars|tuvaluan dollar|tuvaluan $|तुवालुअन डॉलर|तुवालुअन डॉलरस|तुवालुअन $" }, + { @"Dollar", @"dollars|dollar|$|डॉलर" }, + { @"Chinese yuan", @"yuan|kuai|chinese yuan|renminbi|cny|rmb|¥|元|युआन|चीनी युआन|रॅन्मिन्बी|कुआई" }, + { @"Fen", @"fen|फ़ेन|फेन" }, + { @"Jiao", @"jiao|mao|जिआओ|माओ" }, + { @"Finnish markka", @"suomen markka|finnish markka|finsk mark|fim|markkaa|markka|सुओमें मार्का|फिनिश मार्का|मार्का|फ़िन्निश मार्का" }, + { @"Penni", @"penniä|penni" }, + { @"Bitcoin", @"bitcoin|bitcoins|btc|xbt|₿" }, + { @"Millibitcoin", @"millibitcoin|millibitcoins|milibitcoin|milibitcoins" }, + { @"Satoshi", @"satoshi|satoshis" } }; public static readonly Dictionary CurrencyNameToIsoCodeMap = new Dictionary { @@ -398,7 +401,7 @@ public static class NumbersWithUnitDefinitions { @"Rwandan franc", @"RWF" }, { @"Russian ruble", @"RUB" }, { @"Transnistrian ruble", @"PRB" }, - { @"Belarusian ruble", @"BYN" }, + { @"New Belarusian ruble", @"BYN" }, { @"Algerian dinar", @"DZD" }, { @"Bahraini dinar", @"BHD" }, { @"Iraqi dinar", @"IQD" }, @@ -480,7 +483,8 @@ public static class NumbersWithUnitDefinitions { @"British Virgin Islands dollar", @"_BD" }, { @"Ascension pound", @"_AP" }, { @"Alderney pound", @"_ALP" }, - { @"Abkhazian apsar", @"_AA" } + { @"Abkhazian apsar", @"_AA" }, + { @"Bitcoin", @"_XBT" } }; public static readonly Dictionary FractionalUnitNameToCodeMap = new Dictionary { @@ -556,13 +560,15 @@ public static class NumbersWithUnitDefinitions { @"Kopiyka", @"KOPIYKA" }, { @"Tiyin", @"TIYIN" }, { @"Hào", @"HAO" }, - { @"Ngwee", @"NGWEE" } + { @"Ngwee", @"NGWEE" }, + { @"Millibitcoin", @"MILLIBITCOIN" }, + { @"Satoshi", @"SATOSHI" } }; - public const string CompoundUnitConnectorRegex = @"(?and)"; + public const string CompoundUnitConnectorRegex = @"(?और)"; public static readonly Dictionary CurrencyPrefixList = new Dictionary { { @"Dollar", @"$" }, - { @"United States dollar", @"united states $|us$|us $|u.s. $|u.s $" }, + { @"United States dollar", @"united states $|us$|us $|u.s. $|u.s $|यू. एस. $" }, { @"East Caribbean dollar", @"east caribbean $" }, { @"Australian dollar", @"australian $|australia $" }, { @"Bahamian dollar", @"bahamian $|bahamia $" }, @@ -571,7 +577,7 @@ public static class NumbersWithUnitDefinitions { @"Bermudian dollar", @"bermudian $" }, { @"British Virgin Islands dollar", @"british virgin islands $|bvi$|virgin islands $|virgin island $|british virgin island $" }, { @"Brunei dollar", @"brunei $|b$" }, - { @"Sen", @"sen" }, + { @"Sen", @"sen|सेन" }, { @"Singapore dollar", @"singapore $|s$" }, { @"Canadian dollar", @"canadian $|can$|c$|c $|canada $" }, { @"Cayman Islands dollar", @"cayman islands $|ci$|cayman island $" }, @@ -601,7 +607,8 @@ public static class NumbersWithUnitDefinitions { @"Euro", @"€" }, { @"Pound", @"£" }, { @"Costa Rican colón", @"₡" }, - { @"Turkish lira", @"₺" } + { @"Turkish lira", @"₺" }, + { @"Bitcoin", @"₿|btc|xbt" } }; public static readonly IList AmbiguousCurrencyUnitList = new List { @@ -649,36 +656,51 @@ public static class NumbersWithUnitDefinitions @"std", @"try", @"yer", - @"yen" + @"yen", + @"सेन", + @"यूरो", + @"लेक", + @"पैसा", + @"बात", + @"पारा", + @"यूर" }; public static readonly Dictionary InformationSuffixList = new Dictionary { - { @"Bit", @"-bit|bit|bits" }, - { @"Kilobit", @"kilobit|kilobits|kb|Kb|kbit" }, - { @"Megabit", @"megabit|megabits|mb|Mb|mbit" }, - { @"Gigabit", @"gigabit|gigabits|gb|Gb|gbit" }, - { @"Terabit", @"terabit|terabits|tb|Tb|tbit" }, - { @"Petabit", @"petabit|petabits|pb|Pb|pbit" }, - { @"Byte", @"-byte|byte|bytes" }, - { @"Kilobyte", @"-kilobyte|-kilobytes|kilobyte|kB|KB|kilobytes|kilo byte|kilo bytes|kbyte" }, - { @"Megabyte", @"-megabyte|-megabytes|megabyte|mB|MB|megabytes|mega byte|mega bytes|mbyte" }, - { @"Gigabyte", @"-gigabyte|-gigabytes|gigabyte|gB|GB|gigabytes|giga byte|giga bytes|gbyte" }, - { @"Terabyte", @"-terabyte|-terabytes|terabyte|tB|TB|terabytes|tera byte|tera bytes|tbyte" }, - { @"Petabyte", @"-petabyte|-petabytes|petabyte|pB|PB|petabytes|peta byte|peta bytes|pbyte" } + { @"Bit", @"-bit|bit|bits|-बिट|बिट|बिट्स" }, + { @"Kilobit", @"kilobit|kilobits|kb|Kb|kbit|किलोबिट|किलोबिट्स|केबिट्स|कीलोबिट|कीलोबिट्स" }, + { @"Megabit", @"megabit|megabits|mb|Mb|mbit|मेगाबिट|मेगाबिट्स|एमबिट|एमबिट्स" }, + { @"Gigabit", @"gigabit|gigabits|gb|Gb|gbit|गिगाबिट|गीगाबिट|गिगाबिट्स|गीगाबिट्स" }, + { @"Terabit", @"terabit|terabits|tb|Tb|tbit|टेराबिट|टेराबिट|टेराबिट्स|टेराबिट्स" }, + { @"Petabit", @"petabit|petabits|pb|Pb|pbit|पेटाबिट|पेटाबिट|पेटाबिट्स|पेटाबिट्स" }, + { @"Byte", @"-byte|byte|bytes|बाइट|बाईट" }, + { @"Kilobyte", @"-kilobyte|-kilobytes|kilobyte|kB|KB|kilobytes|kilo byte|kilo bytes|kbyte|किलोबाइट|किलोबाईट|कीलोबाइट|कीलोबाईट|केबी|किलो बाइट|किलो बाईट|कीलो बाइट|कीलो बाईट" }, + { @"Megabyte", @"-megabyte|-megabytes|megabyte|mB|MB|megabytes|mega byte|mega bytes|mbyte|मेगाबाइट|मेगाबाईट|एमबी|मेगा बाइट|मेगा बाईट" }, + { @"Gigabyte", @"-gigabyte|-gigabytes|gigabyte|gB|GB|gigabytes|giga byte|giga bytes|gbyte|गिगाबाइट|गीगाबाइट|गिगाबाईट|गीगाबाईट|जीबी|गिगा बाइट|गीगा बाईट" }, + { @"Terabyte", @"-terabyte|-terabytes|terabyte|tB|TB|terabytes|tera byte|tera bytes|tbyte|टेराबाइट|टेराबाईट|टीबी|टेरा बाइट|टेरा बाईट" }, + { @"Petabyte", @"-petabyte|-petabytes|petabyte|pB|PB|petabytes|peta byte|peta bytes|pbyte|पेटाबाइट|पेटाबाईट|पीबी|पेटा बाइट|पेटा बाईट" } }; public static readonly IList AmbiguousDimensionUnitList = new List { + @"ग्राम", + @"ग्रा", + @"इं", @"barrel", @"barrels", + @"बैरल", @"grain", @"pound", + @"पाउंड", @"stone", @"yards", @"yard", @"cord", @"dram", @"feet", + @"फ़ीट", + @"फीट", @"foot", + @"फुट", @"gill", @"knot", @"peck", @@ -687,27 +709,32 @@ public static class NumbersWithUnitDefinitions @"pts", @"in", @"dm", - @"""" + @"""", + @"नॉट", + @"कप", + @"सेर", + @"पसेरी", + @"ड्राम" }; public const string BuildPrefix = @"(?<=(\s|^))"; - public const string BuildSuffix = @"(?=(\s|\W|$))"; + public const string BuildSuffix = @"(?=(\s|\W|\b|$))"; public static readonly Dictionary LengthSuffixList = new Dictionary { - { @"Kilometer", @"km|kilometer|kilometre|kilometers|kilometres|kilo meter|kilo meters|kilo metres|kilo metre" }, - { @"Hectometer", @"hm|hectometer|hectometre|hectometers|hectometres|hecto meter|hecto meters|hecto metres|hecto metre" }, - { @"Decameter", @"dam|decameter|decametre|decameters|decametres|deca meter|deca meters|deca metres|deca metre" }, - { @"Meter", @"m|meter|metre|meters|metres" }, - { @"Decimeter", @"dm|decimeter|decimeters|decimetre|decimetres|deci meter|deci meters|deci metres|deci metre" }, - { @"Centimeter", @"cm|centimeter|centimeters|centimetre|centimetres|centi meter|centi meters|centi metres|centi metre" }, - { @"Millimeter", @"mm|millimeter|millimeters|millimetre|millimetres|milli meter|milli meters|milli metres|milli metre" }, - { @"Micrometer", @"μm|micrometer|micrometre|micrometers|micrometres|micro meter|micro meters|micro metres|micro metre" }, - { @"Nanometer", @"nm|nanometer|nanometre|nanometers|nanometres|nano meter|nano meters|nano metres|nano metre" }, - { @"Picometer", @"pm|picometer|picometre|picometers|picometres|pico meter|pico meters|pico metres|pico metre" }, - { @"Mile", @"-mile|mile|miles" }, - { @"Yard", @"yard|yards" }, - { @"Inch", @"-inch|inch|inches|in|""" }, - { @"Foot", @"-foot|foot|feet|ft" }, - { @"Light year", @"light year|light-year|light years|light-years" }, + { @"Kilometer", @"km|kilometer|kilometre|kilometers|kilometres|kilo meter|kilo meters|kilo metres|kilo metre|किमी|किलोमीटर|कीलोमीटर|किलोमिटर|किलो मीटर|कीलो मीटर|किलो मिटर|कि.मी.|कि. मी." }, + { @"Hectometer", @"hm|hectometer|hectometre|hectometers|hectometres|hecto meter|hecto meters|hecto metres|hecto metre|एचएम|हेक्टोमीटर|हेक्टोमिटर|हेक्टो मीटर|हेक्टो मिटर" }, + { @"Decameter", @"dam|decameter|decametre|decameters|decametres|deca meter|deca meters|deca metres|deca metre|डेकामीटर|डेकामिटर|डेका मीटर|डेका मिटर" }, + { @"Meter", @"m|meter|metre|meters|metres|मीटर|मी|मी." }, + { @"Decimeter", @"dm|decimeter|decimeters|decimetre|decimetres|deci meter|deci meters|deci metres|deci metre|डेसीमीटर|डेसिमीटर|डेसीमिटर|डेमी" }, + { @"Centimeter", @"cm|centimeter|centimeters|centimetre|centimetres|centi meter|centi meters|centi metres|centi metre।सेमी|से.मी.|सेंटीमीटर|सेन्टीमीटर|सेण्टीमीटर|सेंटी मीटर|सेन्टी मीटर|से. मी." }, + { @"Millimeter", @"mm|millimeter|millimeters|millimetre|millimetres|milli meter|milli meters|milli metres|milli metre|मिमी|मि.मी.|मिलीमीटर|मिलिमीटर|मीलीमीटर|मिली मीटर|मीली मीटर|मि. मी." }, + { @"Micrometer", @"μm|micrometer|micrometre|micrometers|micrometres|micro meter|micro meters|micro metres|micro metre।माइक्रोमीटर|माइक्रो मीटर" }, + { @"Nanometer", @"nm|nanometer|nanometre|nanometers|nanometres|nano meter|nano meters|nano metres|nano metre|नैनोमीटर|नैनो मीटर" }, + { @"Picometer", @"pm|picometer|picometre|picometers|picometres|pico meter|pico meters|pico metres|pico metre|पिकोमीटर|पीकोमीटर|पीको मिटर|पिको मीटर|पीएम" }, + { @"Mile", @"-mile|mile|miles|मील|-मील|माइल|माइल्स" }, + { @"Yard", @"yard|yards|गज" }, + { @"Inch", @"-inch|inch|inches|in|""|इंच|-इंच|इं\b|इं." }, + { @"Foot", @"-foot|foot|feet|ft|फुट|फ़ीट|फीट|फ़ुट|-फुट" }, + { @"Light year", @"light year|light-year|light years|light-years|लाइट यर|लाइट यर|प्रकाश वर्ष|लाइट इयर" }, { @"Pt", @"pt|pts" } }; public static readonly IList AmbiguousLengthUnitList = new List @@ -717,61 +744,64 @@ public static class NumbersWithUnitDefinitions @"yards", @"pm", @"pt", - @"pts" + @"pts", + @"पीएम", + @"इं" }; public static readonly Dictionary SpeedSuffixList = new Dictionary { - { @"Meter per second", @"meters / second|m/s|meters per second|metres per second|meter per second|metre per second" }, - { @"Kilometer per hour", @"km/h|kilometres per hour|kilometers per hour|kilometer per hour|kilometre per hour" }, - { @"Kilometer per minute", @"km/min|kilometers per minute|kilometres per minute|kilometer per minute|kilometre per minute" }, - { @"Kilometer per second", @"km/s|kilometers per second|kilometres per second|kilometer per second|kilometre per second" }, - { @"Mile per hour", @"mph|mile per hour|miles per hour|mi/h|mile / hour|miles / hour|miles an hour" }, - { @"Knot", @"kt|knot|kn" }, - { @"Foot per second", @"ft/s|foot/s|foot per second|feet per second|fps" }, - { @"Foot per minute", @"ft/min|foot/min|foot per minute|feet per minute" }, - { @"Yard per minute", @"yards per minute|yard per minute|yards / minute|yards/min|yard/min" }, - { @"Yard per second", @"yards per second|yard per second|yards / second|yards/s|yard/s" } + { @"Meter per second", @"meters / second|m/s|meters per second|metres per second|meter per second|metre per second |मीटर/सेकेंड|मीटर / सेकेंड|मी/से|मी./से.|मीटर प्रति सेकेंड|मीटर पर सेकेंड" }, + { @"Kilometer per hour", @"km/h|kilometres per hour|kilometers per hour|kilometer per hour|kilometre per hour|किमी/घं|किलोमीटर प्रति घंटा|किमी./घं.|किमीटर/घंटा|किमीटर / घंटा|किलोमीटर प्रति घं.|किलोमीटर पर घंटा|किलोमीटर / घंटे|किलोमीटर प्रति घंटे|किलोमीटर पर घंटे" }, + { @"Kilometer per minute", @"km/min|kilometers per minute|kilometres per minute|kilometer per minute|kilometre per minute|किमि/मि|किमी/मि|कि.मी./मि.|किलोमीटर प्रति मिनट|किलोमीटर पर मिनट|किलोमीटर हर मिनट|किलोमीटर प्रत्येक मिनट" }, + { @"Kilometer per second", @"km/s|kilometers per second|kilometres per second|kilometer per second|kilometre per second|किमि/से|किमी/से|कि.मी./से.|किलोमीटर प्रति सेकेंड|किलोमीटर पर सेकेंड|किलोमीटर हर सेकेंड|किलोमीटर प्रत्येक सेकेंड" }, + { @"Mile per hour", @"mph|mile per hour|miles per hour|mi/h|mile / hour|miles / hour|miles an hour|एमपीएच|मील प्रति घंटा|मील प्रति घंटे|मी/घं|मी./घं.मील / घंटा|मील / घंटे|मील हर घंटे|मील हर घंटा|मील प्रत्येक घंटे|मील प्रत्येक घंटा" }, + { @"Knot", @"kt|knot|kn|नॉट" }, + { @"Foot per second", @"ft/s|foot/s|foot per second|feet per second|fps|फ़ी/से|फी/से|फी./से.|फूट/से|फूट/से|फूट/से.|फूट प्रति सेकेंड|फ़ूट प्रति सेकेंड|फूट प्रत्येक सेकेंड|फ़ूट प्रत्येक सेकेंड|फूट हर सेकेंड|फ़ूट हर सेकेंड|फूट हरेक सेकेंड|फ़ूट हरेक सेकेंड|फीट प्रति सेकेंड|फ़ीट प्रति सेकेंड|फीट प्रत्येक सेकेंड|फ़ीट प्रत्येक सेकेंड|फीट हर सेकेंड|फ़ीट हर सेकेंड|फीट हरेक सेकेंड|फ़ीट हरेक सेकेंड|एफ़पीएस" }, + { @"Foot per minute", @"ft/min|foot/min|foot per minute|feet per minute|फ़ी/मि|फी/मि|फी./मि.|फूट/मि|फूट/मि|फूट/मि.|फूट प्रति मिनट|फ़ूट प्रति मिनट|फूट प्रत्येक मिनट|फ़ूट प्रत्येक मिनट|फूट हर मिनट|फ़ूट हर मिनट|फूट हरेक मिनट|फ़ूट हरेक मिनट|फीट प्रति मिनट|फ़ीट प्रति मिनट|फीट प्रत्येक मिनट|फ़ीट प्रत्येक मिनट|फीट हर मिनट|फ़ीट हर मिनट|फीट हरेक मिनट|फ़ीट हरेक मिनट" }, + { @"Yard per minute", @"yards per minute|yard per minute|yards / minute|yards/min|yard/min|ग/मि|ग/मि|ग./मि.|गज/मि|गज/मि|गज/मि.|गज प्रति मिनट|गज प्रत्येक मिनट|गज हर मिनट|गज हरेक मिनट|गज पर मिनट|गज / मिनट" }, + { @"Yard per second", @"yards per second|yard per second|yards / second|yards/s|yard/s|ग/से|ग/से|ग./से.|गज/से|गज/से|गज/से.|गज प्रति सेकेंड|गज प्रत्येक सेकेंड|गज हर सेकेंड|गज हरेक सेकेंड|गज पर सेकेंड|गज / सेकेंड" } }; public static readonly Dictionary TemperatureSuffixList = new Dictionary { - { @"F", @"degrees fahrenheit|degree fahrenheit|deg fahrenheit|degs fahrenheit|fahrenheit|°f|degrees farenheit|degree farenheit|deg farenheit|degs farenheit|degrees f|degree f|deg f|degs f|farenheit|f" }, - { @"K", @"k|K|kelvin" }, - { @"R", @"rankine|°r" }, - { @"D", @"delisle|°de" }, - { @"C", @"degrees celsius|degree celsius|deg celsius|degs celsius|celsius|degrees celcius|degree celcius|celcius|deg celcius|degs celcius|degrees centigrade|degree centigrade|centigrade|degrees centigrate|degree centigrate|degs centigrate|deg centigrate|centigrate|degrees c|degree c|deg c|degs c|°c|c" }, - { @"Degree", @"degree|degrees|deg.|deg|°" } + { @"F", @"degree fahrenheit|degrees fahrenheit|fahrenheit|डिग्रीज़ फ़ॉरेनहाइट|डिग्री फ़ॉरेनहाइट|डि. फ़ॉरेनहाइट|फ़ॉरेनहाइट|°फ़ॉ|डिग्रीज़ फ़ॉ.|डिग्री फ़ॉ.|डि. फ़ॉ.|फ़ॉ." }, + { @"K", @"kelvin|के.|केल्विन" }, + { @"R", @"renkine|रैंकिन" }, + { @"D", @"delisle|डेलील" }, + { @"C", @"degree celsius|degrees celsius|celsius|डिग्रीज़ सेल्सियस|डिग्री सेल्सियस|डि. सेल्सियस|सेल्सियस|डिग्रीज़ सेल्सयस|डिग्री सेल्सयस|सेल्सयस|डि. सेल्सयस|डिग्रीज़ सेंटिग्रेड|डिग्री सेंटिग्रेड|सेंटिग्रेड|डिग्रीज़ सेंटिग्रेट|डिग्री सेंटिग्रेट|डि. सेंटिग्रेट|सेंटिग्रेट|डिग्रीज़ से.|डिग्री से.|डि. से.|°से|से." }, + { @"Degree", @"degrees|degree|डिग्री|डिग्रीज़|डि.|डि|°" } }; public static readonly IList AmbiguousTemperatureUnitList = new List { - @"c", - @"f", - @"k" + @"से.", + @"फ़ॉ.", + @"के." }; public static readonly Dictionary VolumeSuffixList = new Dictionary { - { @"Cubic meter", @"m3|cubic meter|cubic meters|cubic metre|cubic metres" }, - { @"Cubic centimeter", @"cubic centimeter|cubic centimetre|cubic centimeters|cubic centimetres" }, - { @"Cubic millimiter", @"cubic millimiter|cubic millimitre|cubic millimiters|cubic millimitres" }, - { @"Hectoliter", @"hectoliter|hectolitre|hectoliters|hectolitres" }, - { @"Decaliter", @"decaliter|decalitre|dekaliter|dekalitre|decaliters|decalitres|dekaliters|dekalitres" }, - { @"Liter", @"l|litre|liter|liters|litres" }, - { @"Deciliter", @"dl|deciliter|decilitre|deciliters|decilitres" }, - { @"Centiliter", @"cl|centiliter|centilitre|centiliters|centilitres" }, - { @"Milliliter", @"ml|mls|millilitre|milliliter|millilitres|milliliters" }, - { @"Cubic yard", @"cubic yard|cubic yards" }, - { @"Cubic inch", @"cubic inch|cubic inches" }, - { @"Cubic foot", @"cubic foot|cubic feet" }, - { @"Cubic mile", @"cubic mile|cubic miles" }, - { @"Fluid ounce", @"fl oz|fluid ounce|fluid ounces" }, - { @"Teaspoon", @"teaspoon|teaspoons" }, - { @"Tablespoon", @"tablespoon|tablespoons" }, - { @"Pint", @"pint|pints" }, - { @"Volume unit", @"fluid dram|gill|quart|minim|cord|peck|bushel|hogshead|barrels|barrel|bbl" } + { @"Cubic meter", @"m3|cubic meter|cubic meters|cubic metre|cubic metres|क्यूब मीटर|मीटर क्यूब|क्यूबिक मीटर|मी3|घन मीटर" }, + { @"Cubic centimeter", @"cubic centimeter|cubic centimetre|cubic centimeters|cubic centimetres|क्यूबिक सेंटीमीटर|क्यूबिक सेमी|सेमी3|क्यूबिक सेन्टीमीटर|क्यूबिक से.मी." }, + { @"Cubic millimiter", @"cubic millimiter|cubic millimitre|cubic millimiters|cubic millimitres|क्यूबिक मिलिमीटर|क्यूबिक मिमी|क्यूबिक मि.मी." }, + { @"Hectoliter", @"hectoliter|hectolitre|hectoliters|hectolitres|क्यूबिक हेक्टोलीटर" }, + { @"Decaliter", @"decaliter|decalitre|dekaliter|dekalitre|decaliters|decalitres|dekaliters|dekalitres|डेकालीटर" }, + { @"Liter", @"l|litre|liter|liters|litres|लीटर|ली." }, + { @"Deciliter", @"dl|deciliter|decilitre|deciliters|decilitres|डेसीलीटर" }, + { @"Centiliter", @"cl|centiliter|centilitre|centiliters|centilitres|सेंटीलीटर|सें.ली." }, + { @"Milliliter", @"ml|mls|millilitre|milliliter|millilitres|milliliters|मिलीलीटर|मिली लीटर|मि.ली." }, + { @"Cubic yard", @"cubic yard|cubic yards|क्यूबिक यार्ड|घन गज" }, + { @"Cubic inch", @"cubic inch|cubic inches|क्यूबिक इंच|घन इंच" }, + { @"Cubic foot", @"cubic foot|cubic feet|क्यूबिक फीट|घन फीट|क्यूबिक फ़ीट|घन फ़ीट" }, + { @"Cubic mile", @"cubic mile|cubic miles|क्यूबिक माइल|क्यूबिक मील|घन माइल|घन मील" }, + { @"Fluid ounce", @"fl oz|fluid ounce|fluid ounces|द्रव्य आउंस|द्रव्य औंस|फ़्लूइड आउंस" }, + { @"Teaspoon", @"teaspoon|teaspoons|टीस्पून|चाय की चम्मच" }, + { @"Tablespoon", @"tablespoon|tablespoons|टेबल स्पून" }, + { @"Pint", @"pint|pints|पिंट" }, + { @"Volume unit", @"fluid dram|gill|quart|minim|cord|peck|bushel|hogshead|barrels|barrel|bbl|बैरल|फ़्लुइड ड्रम|फ़्लुईड ड्राम|सेर|पसेरी|गिल|क्वार्ट|बुशेल|हॉग्सहेड|बीबीएल" } }; public static readonly IList AmbiguousVolumeUnitList = new List { @"l", @"ounce", + @"आउंस", @"oz", @"cup", @"peck", @@ -781,14 +811,15 @@ public static class NumbersWithUnitDefinitions public static readonly Dictionary WeightSuffixList = new Dictionary { { @"Kilogram", @"kg|kilogram|kilograms|kilo|kilos" }, - { @"Gram", @"g|gram|grams" }, - { @"Milligram", @"mg|milligram|milligrams" }, - { @"Gallon", @"-gallon|gallons|gallon" }, - { @"Metric ton", @"metric tons|metric ton" }, - { @"Ton", @"-ton|ton|tons|tonne|tonnes" }, - { @"Pound", @"pound|pounds|lb|lbs" }, - { @"Ounce", @"-ounce|ounce|oz|ounces" }, - { @"Weight unit", @"pennyweight|grain|british long ton|us short hundredweight|stone|dram" } + { @"Gram", @"g|gram|grams|ग्रा.|ग्रा|ग्राम" }, + { @"Milligram", @"mg|milligram|milligrams|मिलीग्राम|मिग्रा|मि.ग्रा|मि. ग्रा.|मीलीग्राम|एमजी|एम.जी." }, + { @"Microgram", @"μg|microgram|micrograms|micro gram|micro grams|microgramme|microgrammes|।माइक्रोग्राम|माइक्रो ग्राम" }, + { @"Gallon", @"-gallon|gallons|gallon|गैलन|-गैलन" }, + { @"Metric ton", @"metric tons|metric ton|मीट्रिक टन" }, + { @"Ton", @"-ton|ton|tons|tonne|tonnes|टन" }, + { @"Pound", @"pound|pounds|lb|lbs|पाउंड" }, + { @"Ounce", @"-ounce|ounce|oz|ounces|आउंस|-आउंस" }, + { @"Weight unit", @"pennyweight|grain|british long ton|us short hundredweight|stone|dram|ग्रेन|ब्रिटिश लॉंग|स्टोन|ड्राम|ड्रम" } }; public static readonly IList AmbiguousWeightUnitList = new List { @@ -796,7 +827,8 @@ public static class NumbersWithUnitDefinitions @"oz", @"stone", @"dram", - @"lbs" + @"lbs", + @"ग्राम" }; public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/QuotedTextDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/QuotedTextDefinitions.cs new file mode 100644 index 0000000000..6b892fc764 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/QuotedTextDefinitions.cs @@ -0,0 +1,35 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Hindi\Hindi-QuotedText.yaml +// - Language: Hindi +// - ClassName: QuotedTextDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Hindi +{ + using System; + using System.Collections.Generic; + + public static class QuotedTextDefinitions + { + public const string LangMarker = @"Hin"; + public const string QuotedTextRegex1 = @"(“([^“”]+)”)"; + public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)"; + public const string QuotedTextRegex3 = @"(""([^""]+)"")"; + public const string QuotedTextRegex4 = @"(\\'([^\']+)\\')"; + public const string QuotedTextRegex5 = @"(`([^`]+)`)"; + public const string QuotedTextRegex6 = @"(123456)"; + public const string QuotedTextRegex7 = @"(123456)"; + public const string QuotedTextRegex8 = @"(123456)"; + public const string QuotedTextRegex9 = @"(123456)"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/QuotedTextDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/QuotedTextDefinitions.tt new file mode 100644 index 0000000000..d76967bee6 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/QuotedTextDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Hindi\Hindi-QuotedText.yaml"; + this.Language = "Hindi"; + this.ClassName = "QuotedTextDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/TimeZoneDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/TimeZoneDefinitions.cs new file mode 100644 index 0000000000..5292acd555 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/TimeZoneDefinitions.cs @@ -0,0 +1,1490 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Hindi\Hindi-TimeZone.yaml +// - Language: Hindi +// - ClassName: TimeZoneDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Hindi +{ + using System; + using System.Collections.Generic; + + public static class TimeZoneDefinitions + { + public const string DirectUtcRegex = @"\b(utc|gmt)(\s*[+\-\u00B1]?\s*[\d]{1,2}h?(\s*:\s*[\d]{1,2})?)?\b"; + public static readonly IList AbbreviationsList = new List + { + @"ABST", + @"ACDT", + @"ACST", + @"ACT", + @"ADT", + @"AEDT", + @"AEST", + @"AET", + @"AFT", + @"AKDT", + @"AKST", + @"AMST", + @"AMT", + @"AOE", + @"AoE", + @"ARBST", + @"ARST", + @"ART", + @"AST", + @"AWDT", + @"AWST", + @"AZOST", + @"AZOT", + @"AZST", + @"AZT", + @"BIT", + @"BST", + @"BTT", + @"CADT", + @"CAST", + @"CBST", + @"CBT", + @"CCST", + @"CDT", + @"CDTM", + @"CEST", + @"CET", + @"COT", + @"CST", + @"CSTM", + @"CT", + @"CVT", + @"EAT", + @"ECT", + @"EDT", + @"EDTM", + @"EEST", + @"EET", + @"EGST", + @"ESAST", + @"ESAT", + @"EST", + @"ESTM", + @"ET", + @"FJST", + @"FJT", + @"GET", + @"GMT", + @"GNDT", + @"GNST", + @"GST", + @"GTBST", + @"HADT", + @"HAST", + @"HDT", + @"HKT", + @"HST", + @"IRDT", + @"IRKT", + @"IRST", + @"ISDT", + @"ISST", + @"IST", + @"JDT", + @"JST", + @"KRAT", + @"KST", + @"LINT", + @"MAGST", + @"MAGT", + @"MAT", + @"MDT", + @"MDTM", + @"MEST", + @"MOST", + @"MSK", + @"MSK+1", + @"MSK+2", + @"MSK+3", + @"MSK+4", + @"MSK+5", + @"MSK+6", + @"MSK+7", + @"MSK+8", + @"MSK+9", + @"MSK-1", + @"MST", + @"MSTM", + @"MUT", + @"MVST", + @"MYST", + @"NCAST", + @"NDT", + @"NMDT", + @"NMST", + @"NPT", + @"NST", + @"NZDT", + @"NZST", + @"NZT", + @"PDST", + @"PDT", + @"PDTM", + @"PETT", + @"PKT", + @"PSAST", + @"PSAT", + @"PST", + @"PSTM", + @"PT", + @"PYST", + @"PYT", + @"RST", + @"SAEST", + @"SAPST", + @"SAST", + @"SAWST", + @"SBT", + @"SGT", + @"SLT", + @"SMST", + @"SNST", + @"SST", + @"TADT", + @"TAST", + @"THA", + @"TIST", + @"TOST", + @"TOT", + @"TRT", + @"TST", + @"ULAT", + @"UTC", + @"VET", + @"VLAT", + @"WAST", + @"WAT", + @"WEST", + @"WET", + @"WPST", + @"YAKT", + @"YEKT" + }; + public static readonly IList FullNameList = new List + { + @"Acre Time", + @"Afghanistan Standard Time", + @"Alaskan Standard Time", + @"Anywhere on Earth", + @"Arab Standard Time", + @"Arabian Standard Time", + @"Arabic Standard Time", + @"Argentina Standard Time", + @"Atlantic Standard Time", + @"AUS Central Standard Time", + @"Australian Central Time", + @"AUS Eastern Standard Time", + @"Australian Eastern Time", + @"Australian Eastern Standard Time", + @"Australian Central Daylight Time", + @"Australian Eastern Daylight Time", + @"Azerbaijan Standard Time", + @"Azores Standard Time", + @"Bahia Standard Time", + @"Bangladesh Standard Time", + @"Belarus Standard Time", + @"Canada Central Standard Time", + @"Cape Verde Standard Time", + @"Caucasus Standard Time", + @"Cen. Australia Standard Time", + @"Central America Standard Time", + @"Central Asia Standard Time", + @"Central Brazilian Standard Time", + @"Central Daylight Time", + @"Europe Central Time", + @"European Central Time", + @"Central Europe Standard Time", + @"Central Europe Std Time", + @"Central European Std Time", + @"Central European Standard Time", + @"Central Pacific Standard Time", + @"Central Standard Time", + @"Central Standard Time (Mexico)", + @"China Standard Time", + @"Dateline Standard Time", + @"E. Africa Standard Time", + @"E. Australia Standard Time", + @"E. Europe Standard Time", + @"E. South America Standard Time", + @"Eastern Time", + @"Eastern Daylight Time", + @"Eastern Standard Time", + @"Eastern Standard Time (Mexico)", + @"Egypt Standard Time", + @"Ekaterinburg Standard Time", + @"Fiji Standard Time", + @"FLE Standard Time", + @"Georgian Standard Time", + @"GMT Standard Time", + @"Greenland Standard Time", + @"Greenwich Standard Time", + @"GTB Standard Time", + @"Hawaiian Standard Time", + @"India Standard Time", + @"Iran Standard Time", + @"Israel Standard Time", + @"Jordan Standard Time", + @"Kaliningrad Standard Time", + @"Kamchatka Standard Time", + @"Korea Standard Time", + @"Libya Standard Time", + @"Line Islands Standard Time", + @"Magadan Standard Time", + @"Mauritius Standard Time", + @"Mid-Atlantic Standard Time", + @"Middle East Standard Time", + @"Montevideo Standard Time", + @"Morocco Standard Time", + @"Mountain Standard Time", + @"Mountain Standard Time (Mexico)", + @"Myanmar Standard Time", + @"N. Central Asia Standard Time", + @"Namibia Standard Time", + @"Nepal Standard Time", + @"New Zealand Standard Time", + @"Newfoundland Standard Time", + @"North Asia East Standard Time", + @"North Asia Standard Time", + @"North Korea Standard Time", + @"Pacific SA Standard Time", + @"Pacific Standard Time", + @"Pacific Daylight Time", + @"Pacific Time", + @"Pacific Standard Time", + @"Pacific Standard Time (Mexico)", + @"Pakistan Standard Time", + @"Paraguay Standard Time", + @"Romance Standard Time", + @"Russia Time Zone 1", + @"Russia Time Zone 2", + @"Russia Time Zone 3", + @"Russia Time Zone 4", + @"Russia Time Zone 5", + @"Russia Time Zone 6", + @"Russia Time Zone 7", + @"Russia Time Zone 8", + @"Russia Time Zone 9", + @"Russia Time Zone 10", + @"Russia Time Zone 11", + @"Russian Standard Time", + @"SA Eastern Standard Time", + @"SA Pacific Standard Time", + @"SA Western Standard Time", + @"Samoa Standard Time", + @"SE Asia Standard Time", + @"Singapore Standard Time", + @"Singapore Time", + @"South Africa Standard Time", + @"Sri Lanka Standard Time", + @"Syria Standard Time", + @"Taipei Standard Time", + @"Tasmania Standard Time", + @"Tokyo Standard Time", + @"Tonga Standard Time", + @"Turkey Standard Time", + @"Ulaanbaatar Standard Time", + @"US Eastern Standard Time", + @"US Mountain Standard Time", + @"Mountain", + @"Venezuela Standard Time", + @"Vladivostok Standard Time", + @"W. Australia Standard Time", + @"W. Central Africa Standard Time", + @"W. Europe Standard Time", + @"West Asia Standard Time", + @"West Pacific Standard Time", + @"Yakutsk Standard Time", + @"Pacific Daylight Saving Time", + @"Austrialian Western Daylight Time", + @"Austrialian West Daylight Time", + @"Australian Western Daylight Time", + @"Australian West Daylight Time", + @"Colombia Time", + @"Hong Kong Time", + @"Central Europe Time", + @"Central European Time", + @"Central Europe Summer Time", + @"Central European Summer Time", + @"Central Europe Standard Time", + @"Central European Standard Time", + @"Central Europe Std Time", + @"Central European Std Time", + @"West Coast Time", + @"West Coast", + @"Central Time", + @"Central", + @"Pacific", + @"Eastern" + }; + public const string BaseTimeZoneSuffixRegex = @"((\s+|-)(friendly|compatible))?(\s+|-)time(zone)?"; + public static readonly string LocationTimeSuffixRegex = $@"({BaseTimeZoneSuffixRegex})\b"; + public static readonly string TimeZoneEndRegex = $@"({BaseTimeZoneSuffixRegex})$"; + public static readonly IList AmbiguousTimezoneList = new List + { + @"bit", + @"get", + @"art", + @"cast", + @"eat", + @"lint", + @"mat", + @"most", + @"west", + @"vet", + @"wet", + @"cot", + @"pt", + @"et", + @"eastern", + @"pacific", + @"central", + @"mountain", + @"west coast" + }; + public static readonly Dictionary AbbrToMinMapping = new Dictionary + { + { @"abst", 180 }, + { @"acdt", 630 }, + { @"acst", 570 }, + { @"act", -10000 }, + { @"adt", -10000 }, + { @"aedt", 660 }, + { @"aest", 600 }, + { @"aet", 600 }, + { @"aft", 270 }, + { @"akdt", -480 }, + { @"akst", -540 }, + { @"amst", -10000 }, + { @"amt", -10000 }, + { @"aoe", -720 }, + { @"arbst", 180 }, + { @"arst", 180 }, + { @"art", -180 }, + { @"ast", -10000 }, + { @"awdt", 540 }, + { @"awst", 480 }, + { @"azost", 0 }, + { @"azot", -60 }, + { @"azst", 300 }, + { @"azt", 240 }, + { @"bit", -720 }, + { @"bst", -10000 }, + { @"btt", 360 }, + { @"cadt", -360 }, + { @"cast", 480 }, + { @"cbst", -240 }, + { @"cbt", -240 }, + { @"ccst", -360 }, + { @"cdt", -10000 }, + { @"cdtm", -360 }, + { @"cest", 120 }, + { @"cet", 60 }, + { @"cot", -300 }, + { @"cst", -10000 }, + { @"cstm", -360 }, + { @"ct", -360 }, + { @"cvt", -60 }, + { @"eat", 180 }, + { @"ect", -10000 }, + { @"edt", -240 }, + { @"edtm", -300 }, + { @"eest", 180 }, + { @"eet", 120 }, + { @"egst", 0 }, + { @"esast", -180 }, + { @"esat", -180 }, + { @"est", -300 }, + { @"estm", -300 }, + { @"et", -300 }, + { @"fjst", 780 }, + { @"fjt", 720 }, + { @"get", 240 }, + { @"gmt", 0 }, + { @"gndt", -180 }, + { @"gnst", -180 }, + { @"gst", -10000 }, + { @"gtbst", 120 }, + { @"hadt", -540 }, + { @"hast", -600 }, + { @"hdt", -540 }, + { @"hkt", 480 }, + { @"hst", -600 }, + { @"irdt", 270 }, + { @"irkt", 480 }, + { @"irst", 210 }, + { @"isdt", 120 }, + { @"isst", 120 }, + { @"ist", -10000 }, + { @"jdt", 120 }, + { @"jst", 540 }, + { @"krat", 420 }, + { @"kst", -10000 }, + { @"lint", 840 }, + { @"magst", 720 }, + { @"magt", 660 }, + { @"mat", -120 }, + { @"mdt", -360 }, + { @"mdtm", -420 }, + { @"mest", 120 }, + { @"most", 0 }, + { @"msk+1", 240 }, + { @"msk+2", 300 }, + { @"msk+3", 360 }, + { @"msk+4", 420 }, + { @"msk+5", 480 }, + { @"msk+6", 540 }, + { @"msk+7", 600 }, + { @"msk+8", 660 }, + { @"msk+9", 720 }, + { @"msk-1", 120 }, + { @"msk", 180 }, + { @"mst", -420 }, + { @"mstm", -420 }, + { @"mut", 240 }, + { @"mvst", -180 }, + { @"myst", 390 }, + { @"ncast", 420 }, + { @"ndt", -150 }, + { @"nmdt", 60 }, + { @"nmst", 60 }, + { @"npt", 345 }, + { @"nst", -210 }, + { @"nzdt", 780 }, + { @"nzst", 720 }, + { @"nzt", 720 }, + { @"pdst", -420 }, + { @"pdt", -420 }, + { @"pdtm", -480 }, + { @"pett", 720 }, + { @"pkt", 300 }, + { @"psast", -240 }, + { @"psat", -240 }, + { @"pst", -480 }, + { @"pstm", -480 }, + { @"pt", -480 }, + { @"pyst", -10000 }, + { @"pyt", -10000 }, + { @"rst", 60 }, + { @"saest", -180 }, + { @"sapst", -300 }, + { @"sast", 120 }, + { @"sawst", -240 }, + { @"sbt", 660 }, + { @"sgt", 480 }, + { @"slt", 330 }, + { @"smst", 780 }, + { @"snst", 480 }, + { @"sst", -10000 }, + { @"tadt", 600 }, + { @"tast", 600 }, + { @"tha", 420 }, + { @"tist", 480 }, + { @"tost", 840 }, + { @"tot", 780 }, + { @"trt", 180 }, + { @"tst", 540 }, + { @"ulat", 480 }, + { @"utc", 0 }, + { @"vet", -240 }, + { @"vlat", 600 }, + { @"wast", 120 }, + { @"wat", -10000 }, + { @"west", 60 }, + { @"wet", 0 }, + { @"wpst", 600 }, + { @"yakt", 540 }, + { @"yekt", 300 } + }; + public static readonly Dictionary FullToMinMapping = new Dictionary + { + { @"beijing", 480 }, + { @"shanghai", 480 }, + { @"shenzhen", 480 }, + { @"suzhou", 480 }, + { @"tianjian", 480 }, + { @"chengdu", 480 }, + { @"guangzhou", 480 }, + { @"wuxi", 480 }, + { @"xiamen", 480 }, + { @"chongqing", 480 }, + { @"shenyang", 480 }, + { @"china", 480 }, + { @"redmond", -480 }, + { @"seattle", -480 }, + { @"bellevue", -480 }, + { @"afghanistan standard", 270 }, + { @"alaskan standard", -540 }, + { @"anywhere on earth", -720 }, + { @"arab standard", 180 }, + { @"arabian standard", 180 }, + { @"arabic standard", 180 }, + { @"argentina standard", -180 }, + { @"atlantic standard", -240 }, + { @"aus central standard", 570 }, + { @"aus eastern standard", 600 }, + { @"australian eastern", 600 }, + { @"australian eastern standard", 600 }, + { @"australian central daylight", 630 }, + { @"australian eastern daylight", 660 }, + { @"azerbaijan standard", 240 }, + { @"azores standard", -60 }, + { @"bahia standard", -180 }, + { @"bangladesh standard", 360 }, + { @"belarus standard", 180 }, + { @"canada central standard", -360 }, + { @"cape verde standard", -60 }, + { @"caucasus standard", 240 }, + { @"cen. australia standard", 570 }, + { @"central australia standard", 570 }, + { @"central america standard", -360 }, + { @"central asia standard", 360 }, + { @"central brazilian standard", -240 }, + { @"central", -360 }, + { @"central daylight", -10000 }, + { @"central daylight saving", -10000 }, + { @"central europe", 60 }, + { @"central european", 60 }, + { @"central europe std", 60 }, + { @"central european std", 60 }, + { @"central europe standard", 60 }, + { @"central european standard", 60 }, + { @"central europe summer", 120 }, + { @"central european summer", 120 }, + { @"central pacific standard", 660 }, + { @"central standard time (mexico)", -360 }, + { @"central standard", -360 }, + { @"china standard", 480 }, + { @"dateline standard", -720 }, + { @"e. africa standard", 180 }, + { @"e. australia standard", 600 }, + { @"e. europe standard", 120 }, + { @"e. south america standard", -180 }, + { @"europe central", 60 }, + { @"european central", 60 }, + { @"eastern", -300 }, + { @"eastern daylight", -10000 }, + { @"eastern daylight saving", -10000 }, + { @"eastern standard time (mexico)", -300 }, + { @"eastern standard", -300 }, + { @"egypt standard", 120 }, + { @"ekaterinburg standard", 300 }, + { @"fiji standard", 720 }, + { @"fle standard", 120 }, + { @"georgian standard", 240 }, + { @"gmt standard", 0 }, + { @"greenland standard", -180 }, + { @"greenwich standard", 0 }, + { @"gtb standard", 120 }, + { @"hawaiian standard", -600 }, + { @"india standard", 330 }, + { @"iran standard", 210 }, + { @"israel standard", 120 }, + { @"jordan standard", 120 }, + { @"kaliningrad standard", 120 }, + { @"kamchatka standard", 720 }, + { @"korea standard", 540 }, + { @"libya standard", 120 }, + { @"line islands standard", 840 }, + { @"magadan standard", 660 }, + { @"mauritius standard", 240 }, + { @"mid-atlantic standard", -120 }, + { @"middle east standard", 120 }, + { @"montevideo standard", -180 }, + { @"morocco standard", 0 }, + { @"mountain", -420 }, + { @"mountain daylight", -360 }, + { @"mountain daylight saving", -360 }, + { @"mountain standard", -420 }, + { @"mountain standard time (mexico)", -420 }, + { @"myanmar standard", 390 }, + { @"n. central asia standard", 420 }, + { @"namibia standard", 60 }, + { @"nepal standard", 345 }, + { @"new zealand standard", 720 }, + { @"newfoundland standard", -210 }, + { @"north asia east standard", 480 }, + { @"north asia standard", 420 }, + { @"north korea standard", 510 }, + { @"west coast", -420 }, + { @"pacific", -480 }, + { @"pacific daylight", -420 }, + { @"pacific daylight saving", -420 }, + { @"pacific standard", -480 }, + { @"pacific standard time (mexico)", -480 }, + { @"pacific sa standard", -240 }, + { @"pakistan standard", 300 }, + { @"paraguay standard", -240 }, + { @"romance standard", 60 }, + { @"russia time zone 1", 120 }, + { @"russia time zone 2", 180 }, + { @"russia time zone 3", 240 }, + { @"russia time zone 4", 300 }, + { @"russia time zone 5", 360 }, + { @"russia time zone 6", 420 }, + { @"russia time zone 7", 480 }, + { @"russia time zone 8", 540 }, + { @"russia time zone 9", 600 }, + { @"russia time zone 10", 660 }, + { @"russia time zone 11", 720 }, + { @"russian standard", 180 }, + { @"sa eastern standard", -180 }, + { @"sa pacific standard", -300 }, + { @"sa western standard", -240 }, + { @"samoa standard", -660 }, + { @"se asia standard", 420 }, + { @"singapore standard", 480 }, + { @"singapore", 480 }, + { @"south africa standard", 120 }, + { @"sri lanka standard", 330 }, + { @"syria standard", 120 }, + { @"taipei standard", 480 }, + { @"tasmania standard", 600 }, + { @"tokyo standard", 540 }, + { @"tonga standard", 780 }, + { @"turkey standard", 180 }, + { @"ulaanbaatar standard", 480 }, + { @"us eastern standard", -300 }, + { @"us mountain standard", -420 }, + { @"venezuela standard", -240 }, + { @"vladivostok standard", 600 }, + { @"w. australia standard", 480 }, + { @"w. central africa standard", 60 }, + { @"w. europe standard", 0 }, + { @"western european", 0 }, + { @"west europe standard", 0 }, + { @"west europe std", 0 }, + { @"western europe standard", 0 }, + { @"western europe summer", 60 }, + { @"w. europe summer", 60 }, + { @"western european summer", 60 }, + { @"west europe summer", 60 }, + { @"west asia standard", 300 }, + { @"west pacific standard", 600 }, + { @"yakutsk standard", 540 }, + { @"australian western daylight", 540 }, + { @"australian west daylight", 540 }, + { @"austrialian western daylight", 540 }, + { @"austrialian west daylight", 540 }, + { @"australian western daylight saving", 540 }, + { @"australian west daylight saving", 540 }, + { @"austrialian western daylight saving", 540 }, + { @"austrialian west daylight saving", 540 }, + { @"colombia", -300 }, + { @"hong kong", 480 }, + { @"madrid", 60 }, + { @"bilbao", 60 }, + { @"seville", 60 }, + { @"valencia", 60 }, + { @"malaga", 60 }, + { @"las Palmas", 60 }, + { @"zaragoza", 60 }, + { @"alicante", 60 }, + { @"alche", 60 }, + { @"oviedo", 60 }, + { @"gijón", 60 }, + { @"avilés", 60 } + }; + public static readonly IList MajorLocations = new List + { + @"Dominican Republic", + @"Dominica", + @"Guinea Bissau", + @"Guinea-Bissau", + @"Guinea", + @"Equatorial Guinea", + @"Papua New Guinea", + @"New York City", + @"New York", + @"York", + @"Mexico City", + @"New Mexico", + @"Mexico", + @"Aberdeen", + @"Adelaide", + @"Anaheim", + @"Atlanta", + @"Auckland", + @"Austin", + @"Bangkok", + @"Baltimore", + @"Baton Rouge", + @"Beijing", + @"Belfast", + @"Birmingham", + @"Bolton", + @"Boston", + @"Bournemouth", + @"Bradford", + @"Brisbane", + @"Bristol", + @"Calgary", + @"Canberra", + @"Cardiff", + @"Charlotte", + @"Chicago", + @"Christchurch", + @"Colchester", + @"Colorado Springs", + @"Coventry", + @"Dallas", + @"Denver", + @"Derby", + @"Detroit", + @"Dubai", + @"Dublin", + @"Dudley", + @"Dunedin", + @"Edinburgh", + @"Edmonton", + @"El Paso", + @"Glasgow", + @"Gold Coast", + @"Hamilton", + @"Hialeah", + @"Houston", + @"Ipswich", + @"Jacksonville", + @"Jersey City", + @"Kansas City", + @"Kingston-upon-Hull", + @"Leeds", + @"Leicester", + @"Lexington", + @"Lincoln", + @"Liverpool", + @"London", + @"Long Beach", + @"Los Angeles", + @"Louisville", + @"Lubbock", + @"Luton", + @"Madison", + @"Manchester", + @"Mansfield", + @"Melbourne", + @"Memphis", + @"Mesa", + @"Miami", + @"Middlesbrough", + @"Milan", + @"Milton Keynes", + @"Minneapolis", + @"Montréal", + @"Montreal", + @"Nashville", + @"New Orleans", + @"Newark", + @"Newcastle-upon-Tyne", + @"Newcastle", + @"Northampton", + @"Norwich", + @"Nottingham", + @"Oklahoma City", + @"Oldham", + @"Omaha", + @"Orlando", + @"Ottawa", + @"Perth", + @"Peterborough", + @"Philadelphia", + @"Phoenix", + @"Plymouth", + @"Portland", + @"Portsmouth", + @"Preston", + @"Québec City", + @"Quebec City", + @"Québec", + @"Quebec", + @"Raleigh", + @"Reading", + @"Redmond", + @"Richmond", + @"Rome", + @"San Antonio", + @"San Diego", + @"San Francisco", + @"San José", + @"Santa Ana", + @"Seattle", + @"Sheffield", + @"Southampton", + @"Southend-on-Sea", + @"Spokane", + @"St Louis", + @"St Paul", + @"St Petersburg", + @"St. Louis", + @"St. Paul", + @"St. Petersburg", + @"Stockton-on-Tees", + @"Stockton", + @"Stoke-on-Trent", + @"Sunderland", + @"Swansea", + @"Swindon", + @"Sydney", + @"Tampa", + @"Tauranga", + @"Telford", + @"Toronto", + @"Vancouver", + @"Virginia Beach", + @"Walsall", + @"Warrington", + @"Washington", + @"Wellington", + @"Wolverhampton", + @"Abilene", + @"Akron", + @"Albuquerque", + @"Alexandria", + @"Allentown", + @"Amarillo", + @"Anchorage", + @"Ann Arbor", + @"Antioch", + @"Arlington", + @"Arvada", + @"Athens", + @"Augusta", + @"Aurora", + @"Bakersfield", + @"Beaumont", + @"Bellevue", + @"Berkeley", + @"Billings", + @"Boise", + @"Boulder", + @"Bridgeport", + @"Broken Arrow", + @"Brownsville", + @"Buffalo", + @"Burbank", + @"Cambridge", + @"Cape Coral", + @"Carlsbad", + @"Carrollton", + @"Cary", + @"Cedar Rapids", + @"Centennial", + @"Chandler", + @"Charleston", + @"Chattanooga", + @"Chengdu", + @"Chesapeake", + @"Chongqing", + @"Chula Vista", + @"Cincinnati", + @"Clarksville", + @"Clearwater", + @"Cleveland", + @"Clovis", + @"College Station", + @"Columbia", + @"Columbus", + @"Concord", + @"Coral Springs", + @"Corona", + @"Costa Mesa", + @"Daly City", + @"Davenport", + @"Dayton", + @"Denton", + @"Des Moines", + @"Downey", + @"Durham", + @"Edison", + @"El Cajon", + @"El Monte", + @"Elgin", + @"Elizabeth", + @"Elk Grove", + @"Erie", + @"Escondido", + @"Eugene", + @"Evansville", + @"Everett", + @"Fairfield", + @"Fargo", + @"Farmington Hills", + @"Fayetteville", + @"Fontana", + @"Fort Collins", + @"Fort Lauderdale", + @"Fort Wayne", + @"Fort Worth", + @"Fremont", + @"Fresno", + @"Frisco", + @"Fullerton", + @"Gainesville", + @"Garden Grove", + @"Garland", + @"Gilbert", + @"Glendale", + @"Grand Prairie", + @"Grand Rapids", + @"Green Bay", + @"Greensboro", + @"Gresham", + @"Guangzhou", + @"Hampton", + @"Hartford", + @"Hayward", + @"Henderson", + @"High Point", + @"Hollywood", + @"Honolulu", + @"Huntington Beach", + @"Huntsville", + @"Independence", + @"Indianapolis", + @"Inglewood", + @"Irvine", + @"Irving", + @"Jackson", + @"Joliet", + @"Kent", + @"Killeen", + @"Knoxville", + @"Lafayette", + @"Lakeland", + @"Lakewood", + @"Lancaster", + @"Lansing", + @"Laredo", + @"Las Cruces", + @"Las Vegas", + @"Lewisville", + @"Little Rock", + @"Lowell", + @"Macon", + @"McAllen", + @"McKinney", + @"Mesquite", + @"Miami Gardens", + @"Midland", + @"Milwaukee", + @"Miramar", + @"Mobile", + @"Modesto", + @"Montgomery", + @"Moreno Valley", + @"Murfreesboro", + @"Murrieta", + @"Naperville", + @"New Haven", + @"Newport News", + @"Norfolk", + @"Norman", + @"North Charleston", + @"North Las Vegas", + @"Norwalk", + @"Oakland", + @"Oceanside", + @"Odessa", + @"Olathe", + @"Ontario", + @"Orange", + @"Overland Park", + @"Oxnard", + @"Palm Bay", + @"Palmdale", + @"Pasadena", + @"Paterson", + @"Pearland", + @"Pembroke Pines", + @"Peoria", + @"Pittsburgh", + @"Plano", + @"Pomona", + @"Pompano Beach", + @"Providence", + @"Provo", + @"Pueblo", + @"Rancho Cucamonga", + @"Reno", + @"Rialto", + @"Richardson", + @"Riverside", + @"Rochester", + @"Rockford", + @"Roseville", + @"Round Rock", + @"Sacramento", + @"Saint Paul", + @"Salem", + @"Salinas", + @"Salt Lake City", + @"San Bernardino", + @"San Jose", + @"San Mateo", + @"Sandy Springs", + @"Santa Clara", + @"Santa Clarita", + @"Santa Maria", + @"Santa Rosa", + @"Savannah", + @"Scottsdale", + @"Shanghai", + @"Shenyang", + @"Shenzhen", + @"Shreveport", + @"Simi Valley", + @"Sioux Falls", + @"South Bend", + @"Springfield", + @"Stamford", + @"Sterling Heights", + @"Sunnyvale", + @"Surprise", + @"Suzhou", + @"Syracuse", + @"Tacoma", + @"Tallahassee", + @"Temecula", + @"Tempe", + @"Thornton", + @"Thousand Oaks", + @"Tianjing", + @"Toledo", + @"Topeka", + @"Torrance", + @"Tucson", + @"Tulsa", + @"Tyler", + @"Vallejo", + @"Ventura", + @"Victorville", + @"Visalia", + @"Waco", + @"Warren", + @"Waterbury", + @"West Covina", + @"West Jordan", + @"West Palm Beach", + @"West Valley City", + @"Westminster", + @"Wichita", + @"Wichita Falls", + @"Wilmington", + @"Winston-Salem", + @"Worcester", + @"Wuxi", + @"Xiamen", + @"Yonkers", + @"Bentonville", + @"Afghanistan", + @"AK", + @"AL", + @"Alabama", + @"Åland", + @"Åland Islands", + @"Alaska", + @"Albania", + @"Algeria", + @"American Samoa", + @"Andorra", + @"Angola", + @"Anguilla", + @"Antarctica", + @"Antigua and Barbuda", + @"AR", + @"Argentina", + @"Arizona", + @"Arkansas", + @"Armenia", + @"Aruba", + @"Australia", + @"Austria", + @"AZ", + @"Azerbaijan", + @"Bahamas", + @"Bahrain", + @"Bangladesh", + @"Barbados", + @"Belarus", + @"Belgium", + @"Belize", + @"Benin", + @"Bermuda", + @"Bhutan", + @"Bolivia", + @"Bonaire", + @"Bosnia", + @"Bosnia and Herzegovina", + @"Botswana", + @"Bouvet Island", + @"Brazil", + @"British Indian Ocean Territory", + @"British Virgin Islands", + @"Brunei", + @"Bulgaria", + @"Burkina Faso", + @"Burundi", + @"CA", + @"Cabo Verde", + @"California", + @"Cambodia", + @"Cameroon", + @"Canada", + @"Cayman Islands", + @"Central African Republic", + @"Chad", + @"Chile", + @"China", + @"Christmas Island", + @"CO", + @"Cocos Islands", + @"Colombia", + @"Colorado", + @"Comoros", + @"Congo", + @"Congo (DRC)", + @"Connecticut", + @"Cook Islands", + @"Costa Rica", + @"Côte d’Ivoire", + @"Croatia", + @"CT", + @"Cuba", + @"Curaçao", + @"Cyprus", + @"Czechia", + @"DE", + @"Delaware", + @"Denmark", + @"Djibouti", + @"Ecuador", + @"Egypt", + @"El Salvador", + @"Eritrea", + @"Estonia", + @"eSwatini", + @"Ethiopia", + @"Falkland Islands", + @"Falklands", + @"Faroe Islands", + @"Fiji", + @"Finland", + @"FL", + @"Florida", + @"France", + @"French Guiana", + @"French Polynesia", + @"French Southern Territories", + @"FYROM", + @"GA", + @"Gabon", + @"Gambia", + @"Georgia", + @"Georgia", + @"Germany", + @"Ghana", + @"Gibraltar", + @"Greece", + @"Greenland", + @"Grenada", + @"Guadeloupe", + @"Guam", + @"Guatemala", + @"Guernsey", + @"Guyana", + @"Haiti", + @"Hawaii", + @"Herzegovina", + @"HI", + @"Honduras", + @"Hong Kong", + @"Hungary", + @"IA", + @"Iceland", + @"ID", + @"Idaho", + @"IL", + @"Illinois", + @"IN", + @"India", + @"Indiana", + @"Indonesia", + @"Iowa", + @"Iran", + @"Iraq", + @"Ireland", + @"Isle of Man", + @"Israel", + @"Italy", + @"Ivory Coast", + @"Jamaica", + @"Jan Mayen", + @"Japan", + @"Jersey", + @"Jordan", + @"Kansas", + @"Kazakhstan", + @"Keeling Islands", + @"Kentucky", + @"Kenya", + @"Kiribati", + @"Korea", + @"Kosovo", + @"KS", + @"Kuwait", + @"KY", + @"Kyrgyzstan", + @"LA", + @"Laos", + @"Latvia", + @"Lebanon", + @"Lesotho", + @"Liberia", + @"Libya", + @"Liechtenstein", + @"Lithuania", + @"Louisiana", + @"Luxembourg", + @"MA", + @"Macao", + @"Macedonia", + @"Madagascar", + @"Maine", + @"Malawi", + @"Malaysia", + @"Maldives", + @"Mali", + @"Malta", + @"Marshall Islands", + @"Martinique", + @"Maryland", + @"Massachusetts", + @"Mauritania", + @"Mauritius", + @"Mayotte", + @"MD", + @"ME", + @"MI", + @"Michigan", + @"Micronesia", + @"Minnesota", + @"Mississippi", + @"Missouri", + @"MN", + @"MO", + @"Moldova", + @"Monaco", + @"Mongolia", + @"Montana", + @"Montenegro", + @"Montserrat", + @"Morocco", + @"Mozambique", + @"MS", + @"MT", + @"Myanmar", + @"Namibia", + @"Nauru", + @"NC", + @"ND", + @"NE", + @"Nebraska", + @"Nepal", + @"Netherlands", + @"Nevada", + @"New Caledonia", + @"New Hampshire", + @"New Jersey", + @"New Zealand", + @"NH", + @"Nicaragua", + @"Niger", + @"Nigeria", + @"Niue", + @"NJ", + @"NM", + @"Norfolk Island", + @"North Carolina", + @"North Dakota", + @"North Korea", + @"Northern Mariana Islands", + @"Norway", + @"NV", + @"NY", + @"OH", + @"Ohio", + @"OK", + @"Oklahoma", + @"Oman", + @"OR", + @"Oregon", + @"PA", + @"Pakistan", + @"Palau", + @"Palestinian Authority", + @"Panama", + @"Paraguay", + @"Pennsylvania", + @"Peru", + @"Philippines", + @"Pitcairn Islands", + @"Poland", + @"Portugal", + @"Puerto Rico", + @"Qatar", + @"Réunion", + @"Rhode Island", + @"RI", + @"Romania", + @"Russia", + @"Rwanda", + @"Saba", + @"Saint Barthélemy", + @"Saint Kitts and Nevis", + @"Saint Lucia", + @"Saint Martin", + @"Saint Pierre and Miquelon", + @"Saint Vincent and the Grenadines", + @"Samoa", + @"San Marino", + @"São Tomé and Príncipe", + @"Saudi Arabia", + @"SC", + @"SD", + @"Senegal", + @"Serbia", + @"Seychelles", + @"Sierra Leone", + @"Singapore", + @"Sint Eustatius", + @"Sint Maarten", + @"Slovakia", + @"Slovenia", + @"Solomon Islands", + @"Somalia", + @"South Africa", + @"South Carolina", + @"South Dakota", + @"South Sudan", + @"Spain", + @"Sri Lanka", + @"Sudan", + @"Suriname", + @"Svalbard", + @"Swaziland", + @"Sweden", + @"Switzerland", + @"Syria", + @"Taiwan", + @"Tajikistan", + @"Tanzania", + @"Tennessee", + @"Texas", + @"Thailand", + @"Timor-Leste", + @"TN", + @"Togo", + @"Tokelau", + @"Tonga", + @"Trinidad and Tobago", + @"Tunisia", + @"Turkey", + @"Turkmenistan", + @"Turks and Caicos Islands", + @"Tuvalu", + @"TX", + @"U.S. Outlying Islands", + @"US Outlying Islands", + @"U.S. Virgin Islands", + @"US Virgin Islands", + @"Uganda", + @"UK", + @"Ukraine", + @"United Arab Emirates", + @"United Kingdom", + @"United States", + @"Uruguay", + @"US", + @"USA", + @"UT", + @"Utah", + @"Uzbekistan", + @"VA", + @"Vanuatu", + @"Vatican City", + @"Venezuela", + @"Vermont", + @"Vietnam", + @"Virginia", + @"VT", + @"WA", + @"Wallis and Futuna", + @"West Virginia", + @"WI", + @"Wisconsin", + @"WV", + @"WY", + @"Wyoming", + @"Yemen", + @"Zambia", + @"Zimbabwe", + @"Paris", + @"Tokyo", + @"Shanghai", + @"Sao Paulo", + @"Rio de Janeiro", + @"Rio", + @"Brasília", + @"Brasilia", + @"Recife", + @"Milan", + @"Mumbai", + @"Moscow", + @"Frankfurt", + @"Munich", + @"Berlim", + @"Madrid", + @"Lisbon", + @"Warsaw", + @"Johannesburg", + @"Seoul", + @"Istanbul", + @"Kuala Kumpur", + @"Jakarta", + @"Amsterdam", + @"Brussels", + @"Valencia", + @"Seville", + @"Bilbao", + @"Malaga", + @"Las Palmas", + @"Zaragoza", + @"Alicante", + @"Elche", + @"Oviedo", + @"Gijón", + @"Avilés", + @"West Coast", + @"Central", + @"Pacific", + @"Eastern", + @"Mountain" + }; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/TimeZoneDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/TimeZoneDefinitions.tt new file mode 100644 index 0000000000..b9b81a3045 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/TimeZoneDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Hindi\Hindi-TimeZone.yaml"; + this.Language = "Hindi"; + this.ClassName = "TimeZoneDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Italian/ChoiceDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Italian/ChoiceDefinitions.cs index 950d3c83ab..32ffe75615 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Italian/ChoiceDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Italian/ChoiceDefinitions.cs @@ -23,7 +23,8 @@ public static class ChoiceDefinitions { public const string LangMarker = @"Ita"; public const string TokenizerRegex = @"[^\w\d\u00E0-\u00FC]"; - public const string TrueRegex = @"\b(vero|s[iì]|certo|sicuro|ok|d'accordo|(?\b(fino\s+a(l(l[aoe'])?|gli|i)?|a(l(l[aoe'])?|gli|i)?|e\s+(il?|l[aoe']|gli))\b|--|-|—|——|~)"; - public const string RestrictedTillRegex = @"(?\b(fino\s+a(l(l[aoe'])?|gli|i)?)\b|--|-|—|——|~)"; - public static readonly string RangeConnectorRegex = $@"(?\b(e(\s+l[aoe']|gli|i)?|a(l(l[aoe'])?|gli|i)?)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; + public const string TillRegex = @"(?\b(fino\s+a(l(l[aoe'])?|gli|i|d)?|a(l(l[aoe'])?|gli|i|d)?|e\s+(il?|l[aoe']|gli))\b|--|-|—|——|~)"; + public const string RestrictedTillRegex = @"(?\b(fino\s+a(l(l[aoe'])?|gli|i|d)?)\b|--|-|—|——|~)"; + public static readonly string RangeConnectorRegex = $@"(?\b(e(\s+l[aoe']|gli|i|d)?|a(l(l[aoe'])?|gli|i|d)?)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; public const string RelativeRegex = @"\b(?((il?|l[ae])\s+)?prossim['oaie]|((il?|l[ae])\s+)?seguent[ei]|((il?|l[ae])\s+)?successiv[oaei]|in\s+arrivo|quest['oaie]|(l'|gli\s+|le\s+)?ultim['oaie]|((il?|l[ae])\s+)?passat[oaie]|((il?|l[ae])\s+)?precedent[ei]|((lo|l[ae]|gli)\s+)?scors[oaie]|((il?|l[ae])\s+)?corrent[ei]|quest['oaei]|il?|l[oae']|gli)"; public const string StrictRelativeRegex = @"(?prossim['oaie]|seguent[ei]|successiv[oaei]|in\s+arrivo|quest['oaie]|ultim['oaie]|passat[oaie]|precedent[ei]|scors[oaie]|corrent[ei]|quest['oaei])"; public const string NextSuffixRegex = @"(?prossim[oaei]|seguent[ei]|successiv[oaei]|imminent[ei]|in\s+arrivo)\b"; @@ -46,7 +47,7 @@ public static class DateTimeDefinitions public const string MonthNumRegex = @"(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)\b"; public const string WeekDayRegex = @"\b(?domenica|lunedì|martedì|mercoledì|giovedì|venerdì|sabato|lun|mar|mer|gio|ven|sab|dom)\b"; public static readonly string RelativeMonthRegex = $@"(?((di|del|dell[o'])\s+)?(({ThisPrefixRegex}\s+mese)|(mese\s+({PastSuffixRegex}|{NextSuffixRegex}))|(({PastSuffixRegex}|{NextSuffixRegex})\s+mese)))\b"; - public const string MonthRegex = @"(?aprile|apr|agosto|ago|dicembre|dic|febbraio|feb|gennaio|gen|luglio|lug|giugno|giu|marzo|mar|maggio|mag|novembre|nov|ottobre|ott|settembre|set|sett)\b"; + public const string MonthRegex = @"(?apr(ile)?|ago(sto)?|dic(embre)?|feb(braio)?|gen(naio)?|lug(lio)?|giu(gno)?|mar(zo)?|mag(gio)?|nov(embre)?|ott(obre)?|set(tembre|t)?)\b"; public static readonly string EngMonthRegex = $@"(((il\s+)?mese di\s+)?{MonthRegex})"; public static readonly string MonthSuffixRegex = $@"(?(in\s+|nel(l['o])?\s*|di\s+|del(l['o])?\s*|a([ld]|ll['o])?\s*|(il|l[o'])\s*)?({RelativeMonthRegex}|{EngMonthRegex}))"; public const string SpecialDescRegex = @"((?)p\b)"; @@ -63,22 +64,24 @@ public static class DateTimeDefinitions public static readonly string RelativeDecadeRegex = $@"\b((((il?|l'|gli)\s*)?{RelativeRegex}\s+((?[\w,]+)\s+)?decennio?)|(((il?|l'|gli)\s*)?((?[\w,]+)\s+)?decennio?\s+{RelativeRegex}))\b"; public static readonly string LastTwoYearNumRegex = $@"({WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}({WrittenOneToNineRegex})?)"; public static readonly string FullTextYearRegex = $@"\b((?{WrittenCenturyFullYearRegex})(?{LastTwoYearNumRegex})?)\b"; - public static readonly string TwoDigitYearRegex = $@"\b(?([0-27-9]\d))(?!(\s*((\:)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b"; + public static readonly string TwoDigitYearRegex = $@"\b(?([0-9]\d))(?!(\s*((\:\d)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b"; public static readonly string DateYearRegex = $@"(?{BaseDateTime.FourDigitYearRegex}|{TwoDigitYearRegex})"; public static readonly string YearSuffix = $@"(,?\s*({DateYearRegex}|{FullTextYearRegex}))"; public static readonly string YearRegex = $@"({BaseDateTime.FourDigitYearRegex}|{FullTextYearRegex})"; - public const string DateUnitRegex = @"\b(?decennio?|ann[oi]|mes[ei]|settiman[ae]|giorn[oi])\b"; + public const string DateUnitRegex = @"\b(?decennio|mese|settimana|(ann|giorn)(o|(?i))|(?decenni|mesi|settimane))\b"; public static readonly string SimpleCasesRegex = $@"\b((dal?|[tf]ra)\s+)?({DayRegex})(\s+{MonthSuffixRegex})?\s*{TillRegex}\s*({DayRegex}\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+{DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; public static readonly string MonthFrontSimpleCasesRegex = $@"\b((dal?|[tf]ra)\s+)?{MonthSuffixRegex}\s+((dal?|[tf]ra)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+([tf]ra\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; public static readonly string BetweenRegex = $@"\b([tf]ra\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b"; public const string YearWordRegex = @"\b(?l'anno)\b"; - public static readonly string MonthWithYear = $@"\b({MonthRegex}(\.)?(\s*)[/\\\-\.,]?(((\s+del)?\s+{YearRegex})|((\s+(del|di|il))?\s+(?prossim['o]|passato|quest['o])\s*anno)|((\s+(del)?l')anno\s+(?prossimo|passato))))"; + public static readonly string MonthWithYear = $@"\b({MonthRegex}(\.)?(\s*)[/\\\-\.,]?(((\s+del)?\s+{YearRegex}|{TwoDigitYearRegex})|((\s+(del|di|il))?\s+(?prossim['o]|passato|quest['o])\s*anno)|((\s+(del)?l')anno\s+(?prossimo|passato))))"; public const string SpecialYearPrefixes = @"(?fiscale|scolastico)"; public static readonly string OneWordPeriodRegex = $@"\b((((il|l[o'])\s*)?((mese di\s+)|({RelativeRegex}\s*))?{MonthRegex}(\s+{RelativeRegex})?)|dall'inizio\s+del(l')\s*(mese|anno)|({RelativeRegex}\s*)?(mi[ao]\s+)?(weekend|finesettimana|settimana|mese|anno)(\s+{RelativeRegex})?(?!((\s+di|del)?\s+\d+))(\s+{AfterNextSuffixRegex})?)\b"; public static readonly string MonthNumWithYear = $@"({YearRegex}[/\-\.]{MonthNumRegex})|({MonthNumRegex}[/\-]{YearRegex})"; public static readonly string WeekOfMonthRegex = $@"\b(?(l[a']\s*)?(?prima|seconda|terza|quarta|quinta|ultima)\s+settimana\s+{MonthSuffixRegex}(\s+{BaseDateTime.FourDigitYearRegex}|{RelativeRegex}\s+year)?)\b"; public static readonly string WeekOfYearRegex = $@"\b(?(l[a']\s*)?(?prima|seconda|terza|quarta|quinta|ultima)\s+settimana(\s+(di|del(l[o'])?))?\s*({YearRegex}|({RelativeRegex}\s*anno)|(anno\s+(({NextSuffixRegex})|({PastSuffixRegex})))))"; + public static readonly string OfYearRegex = $@"\b((of|in)\s+({YearRegex}|{StrictRelativeRegex}\s+year))\b"; + public const string FirstLastRegex = @"\b(the\s+)?((?first)|(?last))\b"; public static readonly string FollowedDateUnit = $@"^\s*{DateUnitRegex}"; public static readonly string NumberCombinedWithDateUnit = $@"\b(?\d+([\.,]\d*)?)\s*{DateUnitRegex}"; public static readonly string QuarterRegex = $@"\b((il|l')\s*)?(?primo|secondo|terzo|quarto|ultimo)\s+trimestre((\s+(di|del(l[o'])?)|\s*,\s*)?\s*({YearRegex}|({RelativeRegex}\s*anno)|(anno\s+(({NextSuffixRegex})|({PastSuffixRegex})))))?"; @@ -108,16 +111,16 @@ public static class DateTimeDefinitions public static readonly string WeekDayOfMonthRegex = $@"(?((la|il)\s+)?(?prim[ao]|second[ao]|terz[ao]|quart[ao]|quint[ao]|ultim[ao])\s+{WeekDayRegex}\s+{MonthSuffixRegex})"; public static readonly string RelativeWeekDayRegex = $@"\b({WrittenNumRegex}\s+{WeekDayRegex}\s+(da\s+ora|dopo))\b"; public static readonly string SpecialDate = $@"(?<=\b(il|l'|al(l')?)\s*){DayRegex}\b"; - public static readonly string DateExtractor1 = $@"\b((quest[oa]\s+)?{WeekDayRegex}\s*[,-]?\s*)?(({MonthRegex}(\.)?\s*[/\\.,-]?\s*{DayRegex})|(\({MonthRegex}\s*[-.]\s*{DayRegex}\))|({DayRegex}\s*[/\\.,-]?\s*{MonthRegex}(\.)?))(\s*\(\s*{WeekDayRegex}\s*\))?"; - public static readonly string DateExtractor2 = $@"({DateExtractor1}(\s+|\s*[\-/,]\s*|\s+del\s+)({DateYearRegex}))\b"; - public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}(\.)?(\s+|\s*,\s*|\s+di\s+|\s*-\s*){MonthRegex}(\.)?((\s+|\s*,\s*){DateYearRegex})?\b"; - public static readonly string DateExtractor4 = $@"\b((il|l')\s*)?{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}(\.)?\s*[/\\\-]\s*{DateYearRegex}"; - public static readonly string DateExtractor5 = $@"\b{DayRegex}\s*[/\\\-]\s*({MonthNumRegex}|{MonthRegex}(\.)?)\s*[/\\\-]\s*{DateYearRegex}"; - public static readonly string DateExtractor6 = $@"(?in punto)"; public const string OclockPrefix = @"(?ore)"; public const string PmRegex = @"\b(?(((di|del|a(l(l')?)?|nel|il|per|verso)\s+(il\s+)?)?(pomeriggio|ora di pranzo))|(((di|della|nella|a|alla|verso|la|per|in)\s+)?(la\s+)?(notte|serata|sera|mezzanotte)))\b"; + public const string RangePmRegex = @"\b(?(((di|del|a(l(l')?)?|nel|il|per|verso)\s+(il\s+)?)?(pomeriggio))|(((di|della|nella|a|alla|verso|la|per|in)\s+)?(la\s+)?(notte|serata|sera)))\b"; public const string AmRegex = @"\b(?((((la|alla|verso|per|della|di|in)\s+(la\s+)?)?(mattinata|mattina))|(((il|al|verso|per|del|di)\s+(il\s+)?)?(mattino))))\b"; public const string LunchRegex = @"\b(ora di pranzo)\b"; public const string NightRegex = @"\b(mezzanotte|notte)\b"; - public static readonly string LessThanOneHour = $@"(?(un\s+quarto|tre quarti?|mezz[oa]|mezz'ora|{BaseDateTime.DeltaMinuteRegex}(\s+(minut[oi]|min))?|{DeltaMinuteNumRegex}(\s+(minut[oi]|min))|(?<=(e|meno)\s+){DeltaMinuteNumRegex}))"; + public static readonly string LessThanOneHour = $@"(?(un\s+quarto|tre quarti?|mezz[oa]|mezz'ora|{BaseDateTime.DeltaMinuteRegex}|{DeltaMinuteNumRegex}))"; public static readonly string EngTimeRegex = $@"(?{HourNumRegex}\s+e\s+({MinuteNumRegex}|(?venti?|trenta?|quaranta?|cinquanta?){MinuteNumRegex}))"; - public static readonly string TimePrefix = $@"(?(e\s+{LessThanOneHour}|{LessThanOneHour}\s+(minut[oi]|min)\s+all[e']|meno {LessThanOneHour}))"; + public static readonly string TimePrefix = $@"(?((e|meno)\s+{LessThanOneHour}(\s+(minut[oi]|min))?|{LessThanOneHour}\s+(minut[oi]|min)\s+all[e']))"; public static readonly string TimeSuffix = $@"(?{AmRegex}|{PmRegex}|{OclockRegex})"; public static readonly string BasicTime = $@"\b(?{EngTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex})"; public const string MidnightRegex = @"(?mezzanotte|mezza notte)"; @@ -151,13 +155,13 @@ public static class DateTimeDefinitions public const string PrepRegex = @"\b((\.|,)(?!\d+)|!|\?|$|circa|presso|vicino|sopra|sotto|lontano|dentro|fuori|di|del(l['oae])?|dei|degli|a|al(l['oae])?|ai|agli|da|dal(l['oae])?|dai|dagli|in|nel(l['oae])?|nei|negli|su|sul(l['oae])?|sui|sugli)\b"; public static readonly string AtRegex = $@"\b(((?<=\b(a)\s+){MidTimeRegex}|{MidTimeRegex}\b)|((?<=\ball[e']\s*){EngTimeRegex}\b)|((?<=\b\d+\s+all[e']\s*)({HourNumRegex}|{BaseDateTime.HourRegex})\b(?=\s*(e|{DescRegex}|{PrepRegex})\b))|((?<=([^\d\s]|^)\s*\ball[e']\s*)({HourNumRegex}|{BaseDateTime.HourRegex})\b(?![\.,]\d+)))"; public static readonly string IshRegex = $@"\b(((poco\s+(prima|dopo)(\s+(di|dell[e']))?)|circa(\s+a(ll['e])?)?|più o meno(\s+a(ll['e])?)?)\s*(({BaseDateTime.HourRegex}|{PeriodHourNumRegex})(?!\s*({DescRegex}|[:\.,/e]\s*\d+))|mezzogiorno)|(({BaseDateTime.HourRegex}|{PeriodHourNumRegex})(?!\s*({DescRegex}|[:\.,/e]\s*\d+))(\s+circa)|mezzogiorno(\s+circa)|verso\s+mezzogiorno))\b"; - public const string TimeUnitRegex = @"(\b\d+([\.,]\d*)?|\b)(?or[ae]|hrs|hr|h|minut[oi]|mins|min|second[oi]|secs|sec)\b"; + public const string TimeUnitRegex = @"(\b\d+([\.,]\d*)?|\b)(?or(a|(?e))|(minut|second)(o|(?i))|h|(hr|min|sec)(?s)?)\b"; public const string RestrictedTimeUnitRegex = @"(?ore|minuti)\b"; public static readonly string ConnectNumRegex = $@"{BaseDateTime.HourRegex}(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)\s*{DescRegex}"; public const string AmbiguousRangeModifierPrefix = @"^[.]"; public static readonly string NumberEndingPattern = $@"^(\s+(?riunione|appuntamento|conferenza|chiamata|chiamata skype)\s+all['e]\s*(?{PeriodHourNumRegex}|{HourRegex})((\.)?$|(\.,|,|!|\?)))"; public static readonly string TimeRegex1 = $@"(((((?<=\b(da|al)?(le|l'|ore)\s*)({EngTimeRegex}))|((?<=\b(da|al)?(le|l'|ore)\s*)({HourNumRegex}|{BaseDateTime.HourRegex})(?![\.,]\d+)(?=\s*({PrepRegex}))))|(({TimePrefix}\s+)({EngTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}))|(({EngTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})\s+{TimePrefix}))((\s*{DescRegex})|\b))"; - public static readonly string TimeRegex2 = $@"({BaseDateTime.HourRegex})(\s*)?:(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?((\s*{DescRegex})|\b)"; + public static readonly string TimeRegex2 = $@"(t)?({BaseDateTime.HourRegex})(\s*)?:(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?((\s*{DescRegex})|\b)"; public static readonly string TimeRegex3 = $@"\b{BaseDateTime.HourRegex}\.{BaseDateTime.MinuteRegex}(\s*{DescRegex})(\s+{TimePrefix})?"; public static readonly string TimeRegex4 = $@"\b({BasicTime}(\s*{DescRegex})?(\s+{TimePrefix})?(\s*{DescRegex})?\s+{TimeSuffix}|{OclockPrefix}\s+{BasicTime}(\s*{DescRegex})?(\s+{TimePrefix})?(\s*{DescRegex})?)\b"; public static readonly string TimeRegex5 = $@"\b(({BasicTime}\s*{DescRegex}(\s+{TimePrefix})?)|({BasicTime}(\s+{TimePrefix})((\s*{DescRegex})|\b))|((?<=\b(da|al)?(le|l'|ore)\s*)(\b(?{EngTimeRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?)|\b(?{HourNumRegex}|{BaseDateTime.HourRegex})(?![\.,]\d+)(?=\s*({PrepRegex})\b))))"; @@ -228,9 +232,11 @@ public static class DateTimeDefinitions public const string AgoPrefixRegex = @"\b(di)\b"; public const string LaterRegex = @"\b(dopo|da\s+adesso|da\s+questo\s+momento)\b"; public const string AgoRegex = @"\b(fa|prima|addietro)\b"; + public const string BeforeAfterRegex = @"^[.]"; public const string InConnectorRegex = @"\b(in|tra|fra|a)\b"; public static readonly string SinceYearSuffixRegex = $@"(^\s*{SinceRegex}\s*(ann[oi]\s*)?({DateYearRegex}|{FullTextYearRegex}))"; public static readonly string WithinNextPrefixRegex = $@"\b(entro(\s+(?{NextPrefixRegex}))?)\b"; + public const string TodayNowRegex = @"\b(oggi|adesso)\b"; public static readonly string MorningStartEndRegex = $@"(^(((di\s+|questa\s+|sta)?mattin[oa]|mattinata)|{AmDescRegex}))|((((di\s+|questa\s+|sta)?mattin[oa]|mattinata)|{AmDescRegex})$)"; public static readonly string AfternoonStartEndRegex = $@"(^((((di|al)\s+|questo\s+|sto)?pomeriggio)|{PmDescRegex}))|(((((di|il)\s+|questo\s+|sto)?pomeriggio)|{PmDescRegex})$)"; public const string EveningStartEndRegex = @"(^((di\s+|questa\s+|sta)?sera|serata))|(((di\s+|questa\s+|sta)?sera|serata)$)"; @@ -250,10 +256,10 @@ public static class DateTimeDefinitions public const string SingleAmbiguousTermsRegex = @"(?([A-Za-z]+\s)?({WrittenDayRegex}|{DayRegex}))"; + public static readonly string FlexibleDayRegex = $@"(?({WrittenDayRegex}|{DayRegex}))"; public static readonly string ForTheRegex = $@"\b((((?<=per\s+il\s+){FlexibleDayRegex})|((?\s*(,|\.|!|\?|$)))"; public static readonly string WeekDayAndDayOfMonthRegex = $@"\b{WeekDayRegex},?\s+((il\s+)?{FlexibleDayRegex})\b(?!\s+{MonthRegex}\b)"; - public static readonly string WeekDayAndDayRegex = $@"\b{WeekDayRegex}\s*,?\s+(?!(il|l')){DayRegex}(?!([-]|(\s+({AmDescRegex}|{{PmDescRegex|{OclockRegex}}}))))\b"; + public static readonly string WeekDayAndDayRegex = $@"\b{WeekDayRegex}\s*,?\s+(?!(il|l')){DayRegex}(?!([-:]|(\s+({AmDescRegex}|{{PmDescRegex|{OclockRegex}}}))))\b"; public const string RestOfDateRegex = @"\bresto\s+((di|del(l[a'])?)\s*)?((quest[oa']|corrente|mia)\s*)?(?settimana|mese|anno|decennio)(\s+corrente)?\b"; public const string RestOfDateTimeRegex = @"\bresto\s+((di|del(la)?)\s+)?((quest[oa]|corrente|mi[oa])\s+)?(?giorn(o|ata))(\s+corrente)?\b"; public const string SuffixAfterRegex = @"\b(((a)\s)?(o|e)\s+(sopra|dopo|più\s+tardi|più\s+grande)(?!\s+di))\b"; @@ -271,7 +277,10 @@ public static class DateTimeDefinitions public const string DateAfterRegex = @"\b((e|o)\s+(dopo|passato|oltre))\b"; public static readonly string YearPeriodRegex = $@"(((dal|durante|nel|[tf]ra)\s+(il\s+)?)?{YearRegex}\s*{TillRegex}\s*{YearRegex})"; public const string FutureSuffixRegex = @"\b(((in|nel)\s+)?(futuro|seguito))|(da\s+questo\s+momento)\b"; + public static readonly string ModPrefixRegex = $@"\b({RelativeRegex}|{AroundRegex}|{BeforeRegex}|{AfterRegex}|{SinceRegex})\b"; + public static readonly string ModSuffixRegex = $@"\b({AgoRegex}|{LaterRegex}|{BeforeAfterRegex}|{FutureSuffixRegex}|{PastSuffixRegex})\b"; public static readonly string ComplexDatePeriodRegex = $@"(((da(l(l[aeo'])?|i)?|durante|in|nel(l[aeo'])?|negli)\s*)?(?.+)\s*({TillRegex})\s*(?.+)|(([tf]ra)\s+)(?.+)\s*({RangeConnectorRegex})\s*(?.+))"; + public const string AmbiguousPointRangeRegex = @"^(mar)$"; public static readonly Dictionary UnitMap = new Dictionary { { @"decenni", @"10Y" }, @@ -643,7 +652,11 @@ public static class DateTimeDefinitions public static readonly string[] DurationDateRestrictions = { }; public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { - { @"\bgiorno|pomeriggio|sera|notte\b", @"\b(buona?\s*(giorno|pomeriggio|sera|notte))\b" } + { @"^\d{4}$", @"(\d\.\d{4}|\d{4}\.\d)" }, + { @"\bgiorno|pomeriggio|sera|notte\b", @"\b(buona?\s*(giorno|pomeriggio|sera|notte))\b" }, + { @"^(apr|ago|dic|feb|gen|lug|giu|mar|mag|nov|ott|sett?)$", @"([$%£&!?@#])(apr|ago|dic|feb|gen|lug|giu|mar|mag|nov|ott|sett?)|(apr|ago|dic|feb|gen|lug|giu|mar|mag|nov|ott|sett?)([$%£&@#])" }, + { @"^\d{1,4}-\d{1,4}$", @"\d{1,4}-\d{1,4}-\d|\d-\d{1,4}-\d{1,4}" }, + { @"^\d{1,4}-\d{1,4}-\d{1,4}$", @"\d{1,4}-\d{1,4}-\d{1,4}-\d|\d-\d{1,4}-\d{1,4}-\d{1,4}" } }; public static readonly IList MorningTermList = new List { diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Italian/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Italian/NumbersDefinitions.cs index b67bc1b07c..40af7c9d6b 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Italian/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Italian/NumbersDefinitions.cs @@ -25,14 +25,14 @@ public static class NumbersDefinitions public const bool CompoundNumberLanguage = true; public const bool MultiDecimalSeparatorCulture = false; public const string DigitsNumberRegex = @"\d|\d{1,3}(\.\d{3})"; - public const string RoundNumberIntegerRegex = @"(cento?|mille?|mila|milion[ei]?|miliard[oi]?|bilion[ei]?|trilion[ei]?)"; - public const string ZeroToNineIntegerRegex = @"(un[oa]?|due?|tre?|quattro?|cinque?|sei|sette?|otto?|nove?|zero)"; - public const string TwoToNineIntegerRegex = @"(due?|tre?|quattro?|cinque?|sei|sette?|otto?|nove?)"; + public const string RoundNumberIntegerRegex = @"(cent(o|(?!\b)|(?='))|mill(e|(?!\b)|(?='))|mila|miliard([oi]|(?!\b)|(?='))|(milion|bilion|trilion)([ei]|(?!\b)|(?=')))"; + public const string ZeroToNineIntegerRegex = @"(un[oa]?|due|tre|quattro|cinque|sei|sette|otto|nove|zero)"; + public const string TwoToNineIntegerRegex = @"(due|tre|quattro|cinque|sei|sette|otto|nove)"; public const string NegativeNumberTermsRegex = @"(?meno\s+)"; public static readonly string NegativeNumberSignRegex = $@"^{NegativeNumberTermsRegex}.*"; public const string AnIntRegex = @"(un)(?=\s)"; - public const string TenToNineteenIntegerRegex = @"(diciassette?|tredici?|quattordici?|diciotto?|diciannove?|quindici?|sedici?|undici?|dodici?|dieci?)"; - public const string TensNumberIntegerRegex = @"(settanta?|venti?|trenta?|ottanta?|novanta?|quaranta?|cinquanta?|sessanta?)"; + public const string TenToNineteenIntegerRegex = @"(diciott(o|(?!\b)|(?='))|(diciassett|diciannov)(e|(?!\b)|(?='))|(tredic|quattordic|quindic|sedic|undic|dodic|diec)(i|(?!\b)|(?=')))"; + public const string TensNumberIntegerRegex = @"(vent(i|(?!\b)|(?='))|(settant|trent|ottant|novant|quarant|cinquant|sessant)(a|(?!\b)|(?=')))"; public static readonly string SeparaIntRegex = $@"((({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}{ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex})(\s*{RoundNumberIntegerRegex})*))|((({AnIntRegex})?(\s*{RoundNumberIntegerRegex})+))"; public static readonly string AllIntRegex = $@"(((({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}{ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|({AnIntRegex})?)(\s*{RoundNumberIntegerRegex})+)\s*(e\s+)?)*{SeparaIntRegex})"; public const string PlaceHolderPureNumber = @"\b"; @@ -46,19 +46,24 @@ public static class NumbersDefinitions public const string RoundNumberOrdinalRegex = @"(centesim[oaie]|millesim[oaie]|milionesim[oaie]|miliardesim[oaie]|bilionesim[oaie]|trilionesim[oaie])"; public const string OneToNineOrdinalRegex = @"(prim[oaie]|second[oaie]|terz[oaie]|quart[oaie]|quint[oaie]|sest[oaie]|settim[oaie]|ottav[oaie]|non[oaie])"; public const string NumberOrdinalRegex = @"(prim[oaie]|second[oaie]|terz[oaie]|quart[oaie]|quint[oaie]|sest[oaie]|settim[oaie]|ottav[oaie]|non[oaie]|decim[oaie]|undicesim[oaie]|dodicesim[oaie]|tredicesim[oaie]|quattordicesim[oaie]|quindicesim[oaie]|sedicesim[oaie]|diciassettesim[oaie]|diciottesim[oaie]|diciannovesim[oaie]|ventesim[oaie]|trentesim[oaie]|quarantesim[oaie]|cinquantesim[oaie]|sessantesim[oaie]|settantesim[oaie]|ottantesim[oaie]|novantesim[oaie])"; - public const string RelativeOrdinalRegex = @"(precedente|seguente|penultim[oa]|terzultim[oa]|ultim[oa])"; - public static readonly string BasicOrdinalRegex = $@"(({NumberOrdinalRegex}|{RelativeOrdinalRegex})(?!\s*({TwoToNineIntegerRegex}|([2-9]+))\b))"; - public static readonly string SuffixBasicOrdinalRegex = $@"((((({TensNumberIntegerRegex}{ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|({AnIntRegex})|{RoundNumberIntegerRegex})(\s*{RoundNumberIntegerRegex})*)\s*(e\s+)?)*({TensNumberIntegerRegex}?{ZeroToNineIntegerRegex}esim[oaie]|{BasicOrdinalRegex}))"; + public const string OneToNineOrdinalCompoundRegex = @"(un|du|tre|quattr|cinqu|sei|sett|ott|nov)esim[oaie]"; + public const string RelativeOrdinalRegex = @"(?precedent[ei]|seguent[ei]|prossim[aoei]|corrent[ei]|successiv[aoei]|penultim[oaei]|terzultim[oaei]|(l')?(ultim[oaei]|attual[ei])|quell[ao]\s+prima\s+dell'ultim[ao])"; + public static readonly string BasicOrdinalRegex = $@"({NumberOrdinalRegex}(?!\s*({TwoToNineIntegerRegex}|([2-9]+))\b))"; + public static readonly string SuffixBasicOrdinalRegex = $@"((((({TensNumberIntegerRegex}{ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|({AnIntRegex})|{RoundNumberIntegerRegex})(\s*{RoundNumberIntegerRegex})*)\s*(e\s+)?)*({TensNumberIntegerRegex}?{OneToNineOrdinalCompoundRegex}|{BasicOrdinalRegex}))"; public static readonly string SuffixRoundNumberOrdinalRegex = $@"(({AllIntRegex}\s*)?{RoundNumberOrdinalRegex})"; - public static readonly string AllOrdinalRegex = $@"({SuffixRoundNumberOrdinalRegex}|{SuffixBasicOrdinalRegex})"; + public static readonly string AllOrdinalNumberRegex = $@"({SuffixRoundNumberOrdinalRegex}|{SuffixBasicOrdinalRegex})"; + public static readonly string AllOrdinalRegex = $@"(?:{AllOrdinalNumberRegex}|{RelativeOrdinalRegex})"; public const string OrdinalSuffixRegex = @"(?<=\b)(\d+(°|(esi)?m[oaie]))"; public const string OrdinalNumericRegex = @"(?<=\b)(\d{1,3}(\s*,\s*\d{3})*(°|(esi)?m[oaie]))"; public static readonly string OrdinalRoundNumberRegex = $@"(?\s+e\s+(mezzo|(un|{TwoToNineIntegerRegex})\s+(mezz[oi]|quart[oi]|terz[oi]|quint[oi]|sest[oi]|settim[oi]|ottav[oi]|non[oi]|decim[oi])))"; + public static readonly string RoundMultiplierWithFraction = $@"(?(?:milion[ei]|miliard[oi]|bilion[ei]|trillion[ei]))(?={FractionMultiplierRegex}?$)"; + public static readonly string RoundMultiplierRegex = $@"\b\s*({RoundMultiplierWithFraction}|(?(cento|mille|mila))$)"; + public static readonly string FractionNounRegex = $@"(?<=\b)({AllIntRegex}\s+(e\s+)?)?(({AllIntRegex})(\s+|\s*-\s*)(?!\bprimo\b|\bsecondo\b)(mezzi|({AllOrdinalNumberRegex})|({RoundNumberOrdinalRegex}))|(mezzo|un\s+quarto\s+di)\s+{RoundNumberIntegerRegex})(?=\b)"; + public static readonly string FractionNounWithArticleRegex = $@"(?<=\b)((({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\s+(e\s+)?)?((un)(\s+|\s*-\s*)(?!\bprimo\b|\bsecondo\b)({AllOrdinalNumberRegex}|{RoundNumberOrdinalRegex})|(un\s+)?mezzo))(?=\b)"; public static readonly string FractionPrepositionRegex = $@"(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?!\.))(?=\b)"; public static readonly string AllPointRegex = $@"((\s+{ZeroToNineIntegerRegex})+|(\s+{SeparaIntRegex}))"; public static readonly string AllFloatRegex = $@"({AllIntRegex}(\s+(virgola|punto)){AllPointRegex})"; @@ -106,6 +111,7 @@ public static class NumbersDefinitions public static readonly string[] WrittenGroupSeparatorTexts = { @"punto" }; public static readonly string[] WrittenIntegerSeparatorTexts = { @"e", @"-" }; public static readonly string[] WrittenFractionSeparatorTexts = { @"e" }; + public static readonly string[] OneHalfTokens = { @"un", @"mezzo" }; public const string HalfADozenRegex = @"mezza\s+dozzina"; public static readonly string DigitalNumberRegex = $@"((?<=\b)(cento|mille|milione|milioni|miliardo|miliardi|bilione|bilioni|trilione|trilioni|dozzina|dozzine)(?=\b))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; public const string AmbiguousFractionConnectorsRegex = @"(\bnel\b)"; @@ -155,6 +161,7 @@ public static class NumbersDefinitions { @"novanta", 90 }, { @"novant", 90 }, { @"cento", 100 }, + { @"cent", 100 }, { @"mille", 1000 }, { @"mila", 1000 }, { @"milione", 1000000 }, @@ -406,81 +413,75 @@ public static class NumbersDefinitions }; public static readonly Dictionary RelativeReferenceOffsetMap = new Dictionary { - { @"ultimo", @"0" }, - { @"ultima", @"0" }, - { @"ultimi", @"0" }, - { @"ultime", @"0" }, - { @"successivo", @"1" }, - { @"successiva", @"1" }, - { @"successivi", @"1" }, - { @"successive", @"1" }, - { @"prossimo", @"1" }, - { @"prossima", @"1" }, - { @"prossimi", @"1" }, - { @"prossime", @"1" }, - { @"seguente", @"1" }, - { @"seguenti", @"1" }, { @"precedente", @"-1" }, { @"precedenti", @"-1" }, - { @"penultimo", @"-1" }, + { @"seguente", @"1" }, + { @"seguenti", @"1" }, + { @"prossima", @"1" }, + { @"prossimo", @"1" }, + { @"prossime", @"1" }, + { @"prossimi", @"1" }, + { @"corrente", @"0" }, + { @"correnti", @"0" }, + { @"attuale", @"0" }, + { @"attuali", @"0" }, + { @"l'attuale", @"0" }, + { @"successiva", @"1" }, + { @"successivo", @"1" }, + { @"successive", @"1" }, + { @"successivi", @"1" }, { @"penultima", @"-1" }, - { @"penultimi", @"-1" }, + { @"penultimo", @"-1" }, { @"penultime", @"-1" }, - { @"terz'ultimo", @"-2" }, - { @"terz'ultima", @"-2" }, - { @"terz'ultimi", @"-2" }, - { @"terz'ultime", @"-2" }, - { @"terzultimo", @"-2" }, + { @"penultimi", @"-1" }, + { @"quello prima dell'ultimo", @"-1" }, + { @"quella prima dell'ultima", @"-1" }, { @"terzultima", @"-2" }, - { @"terzultimi", @"-2" }, + { @"terzultimo", @"-2" }, { @"terzultime", @"-2" }, - { @"quart'ultimo", @"-3" }, - { @"quart'ultima", @"-3" }, - { @"quart'ultimi", @"-3" }, - { @"quart'ultime", @"-3" }, - { @"quartultimo", @"-3" }, - { @"quartultima", @"-3" }, - { @"quartultimi", @"-3" }, - { @"quartultime", @"-3" } + { @"terzultimi", @"-2" }, + { @"ultima", @"0" }, + { @"ultimo", @"0" }, + { @"ultime", @"0" }, + { @"ultimi", @"0" }, + { @"l'ultima", @"0" }, + { @"l'ultimo", @"0" } }; public static readonly Dictionary RelativeReferenceRelativeToMap = new Dictionary { - { @"ultimo", @"end" }, - { @"ultima", @"end" }, - { @"ultimi", @"end" }, - { @"ultime", @"end" }, - { @"successivo", @"current" }, - { @"successiva", @"current" }, - { @"successivi", @"current" }, - { @"successive", @"current" }, - { @"prossimo", @"current" }, - { @"prossima", @"current" }, - { @"prossimi", @"current" }, - { @"prossime", @"current" }, - { @"seguente", @"current" }, - { @"seguenti", @"current" }, { @"precedente", @"current" }, { @"precedenti", @"current" }, - { @"penultimo", @"end" }, + { @"seguente", @"current" }, + { @"seguenti", @"current" }, + { @"prossima", @"current" }, + { @"prossimo", @"current" }, + { @"prossime", @"current" }, + { @"prossimi", @"current" }, + { @"corrente", @"current" }, + { @"correnti", @"current" }, + { @"attuale", @"current" }, + { @"attuali", @"current" }, + { @"l'attuale", @"current" }, + { @"successiva", @"current" }, + { @"successivo", @"current" }, + { @"successive", @"current" }, + { @"successivi", @"current" }, { @"penultima", @"end" }, - { @"penultimi", @"end" }, + { @"penultimo", @"end" }, { @"penultime", @"end" }, - { @"terz'ultimo", @"end" }, - { @"terz'ultima", @"end" }, - { @"terz'ultimi", @"end" }, - { @"terz'ultime", @"end" }, - { @"terzultimo", @"end" }, + { @"penultimi", @"end" }, + { @"quello prima dell'ultimo", @"end" }, + { @"quella prima dell'ultima", @"end" }, { @"terzultima", @"end" }, - { @"terzultimi", @"end" }, + { @"terzultimo", @"end" }, { @"terzultime", @"end" }, - { @"quart'ultimo", @"end" }, - { @"quart'ultima", @"end" }, - { @"quart'ultimi", @"end" }, - { @"quart'ultime", @"end" }, - { @"quartultimo", @"end" }, - { @"quartultima", @"end" }, - { @"quartultimi", @"end" }, - { @"quartultime", @"end" } + { @"terzultimi", @"end" }, + { @"ultima", @"end" }, + { @"ultimo", @"end" }, + { @"ultime", @"end" }, + { @"ultimi", @"end" }, + { @"l'ultima", @"end" }, + { @"l'ultimo", @"end" } }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Italian/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Italian/NumbersWithUnitDefinitions.cs index 16f0fded6c..97a1c75e26 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Italian/NumbersWithUnitDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Italian/NumbersWithUnitDefinitions.cs @@ -290,12 +290,282 @@ public static class NumbersWithUnitDefinitions { @"Fen", @"fen" }, { @"Jiao", @"jiao" }, { @"Finnish markka", @"suomen markka|finnish markka|finsk mark|fim|markkaa|markka|marco finlandese|marchi finlandesi" }, - { @"Penni", @"penniä|penni" } + { @"Penni", @"penniä|penni" }, + { @"Bitcoin", @"bitcoin|bitcoins|btc|xbt|₿" }, + { @"Millibitcoin", @"millibitcoin|millibitcoins|milibitcoin|milibitcoins" }, + { @"Satoshi", @"satoshi|satoshis" } }; - public const string CompoundUnitConnectorRegex = @"(?[^.])"; + public static readonly Dictionary CurrencyNameToIsoCodeMap = new Dictionary + { + { @"Afghan afghani", @"AFN" }, + { @"Euro", @"EUR" }, + { @"Albanian lek", @"ALL" }, + { @"Angolan kwanza", @"AOA" }, + { @"Armenian dram", @"AMD" }, + { @"Aruban florin", @"AWG" }, + { @"Bangladeshi taka", @"BDT" }, + { @"Bhutanese ngultrum", @"BTN" }, + { @"Bolivian boliviano", @"BOB" }, + { @"Bosnia and Herzegovina convertible mark", @"BAM" }, + { @"Botswana pula", @"BWP" }, + { @"Brazilian real", @"BRL" }, + { @"Bulgarian lev", @"BGN" }, + { @"Cambodian riel", @"KHR" }, + { @"Cape Verdean escudo", @"CVE" }, + { @"Costa Rican colón", @"CRC" }, + { @"Croatian kuna", @"HRK" }, + { @"Czech koruna", @"CZK" }, + { @"Eritrean nakfa", @"ERN" }, + { @"Ethiopian birr", @"ETB" }, + { @"Gambian dalasi", @"GMD" }, + { @"Georgian lari", @"GEL" }, + { @"Ghanaian cedi", @"GHS" }, + { @"Guatemalan quetzal", @"GTQ" }, + { @"Haitian gourde", @"HTG" }, + { @"Honduran lempira", @"HNL" }, + { @"Hungarian forint", @"HUF" }, + { @"Iranian rial", @"IRR" }, + { @"Yemeni rial", @"YER" }, + { @"Israeli new shekel", @"ILS" }, + { @"Japanese yen", @"JPY" }, + { @"Kazakhstani tenge", @"KZT" }, + { @"Kenyan shilling", @"KES" }, + { @"North Korean won", @"KPW" }, + { @"South Korean won", @"KRW" }, + { @"Kyrgyzstani som", @"KGS" }, + { @"Lao kip", @"LAK" }, + { @"Lesotho loti", @"LSL" }, + { @"South African rand", @"ZAR" }, + { @"Macanese pataca", @"MOP" }, + { @"Macedonian denar", @"MKD" }, + { @"Malagasy ariary", @"MGA" }, + { @"Malawian kwacha", @"MWK" }, + { @"Malaysian ringgit", @"MYR" }, + { @"Mauritanian ouguiya", @"MRO" }, + { @"Mongolian tögrög", @"MNT" }, + { @"Mozambican metical", @"MZN" }, + { @"Burmese kyat", @"MMK" }, + { @"Nicaraguan córdoba", @"NIO" }, + { @"Nigerian naira", @"NGN" }, + { @"Turkish lira", @"TRY" }, + { @"Omani rial", @"OMR" }, + { @"Panamanian balboa", @"PAB" }, + { @"Papua New Guinean kina", @"PGK" }, + { @"Paraguayan guaraní", @"PYG" }, + { @"Peruvian sol", @"PEN" }, + { @"Polish złoty", @"PLN" }, + { @"Qatari riyal", @"QAR" }, + { @"Saudi riyal", @"SAR" }, + { @"Samoan tālā", @"WST" }, + { @"São Tomé and Príncipe dobra", @"STN" }, + { @"Sierra Leonean leone", @"SLL" }, + { @"Swazi lilangeni", @"SZL" }, + { @"Tajikistani somoni", @"TJS" }, + { @"Thai baht", @"THB" }, + { @"Ukrainian hryvnia", @"UAH" }, + { @"Vanuatu vatu", @"VUV" }, + { @"Venezuelan bolívar", @"VEF" }, + { @"Zambian kwacha", @"ZMW" }, + { @"Moroccan dirham", @"MAD" }, + { @"United Arab Emirates dirham", @"AED" }, + { @"Azerbaijani manat", @"AZN" }, + { @"Turkmenistan manat", @"TMT" }, + { @"Somali shilling", @"SOS" }, + { @"Tanzanian shilling", @"TZS" }, + { @"Ugandan shilling", @"UGX" }, + { @"Romanian leu", @"RON" }, + { @"Moldovan leu", @"MDL" }, + { @"Nepalese rupee", @"NPR" }, + { @"Pakistani rupee", @"PKR" }, + { @"Indian rupee", @"INR" }, + { @"Seychellois rupee", @"SCR" }, + { @"Mauritian rupee", @"MUR" }, + { @"Maldivian rufiyaa", @"MVR" }, + { @"Sri Lankan rupee", @"LKR" }, + { @"Indonesian rupiah", @"IDR" }, + { @"Danish krone", @"DKK" }, + { @"Norwegian krone", @"NOK" }, + { @"Icelandic króna", @"ISK" }, + { @"Swedish krona", @"SEK" }, + { @"West African CFA franc", @"XOF" }, + { @"Central African CFA franc", @"XAF" }, + { @"Comorian franc", @"KMF" }, + { @"Congolese franc", @"CDF" }, + { @"Burundian franc", @"BIF" }, + { @"Djiboutian franc", @"DJF" }, + { @"CFP franc", @"XPF" }, + { @"Guinean franc", @"GNF" }, + { @"Swiss franc", @"CHF" }, + { @"Rwandan franc", @"RWF" }, + { @"Russian ruble", @"RUB" }, + { @"Transnistrian ruble", @"PRB" }, + { @"New Belarusian ruble", @"BYN" }, + { @"Algerian dinar", @"DZD" }, + { @"Bahraini dinar", @"BHD" }, + { @"Iraqi dinar", @"IQD" }, + { @"Jordanian dinar", @"JOD" }, + { @"Kuwaiti dinar", @"KWD" }, + { @"Libyan dinar", @"LYD" }, + { @"Serbian dinar", @"RSD" }, + { @"Tunisian dinar", @"TND" }, + { @"Argentine peso", @"ARS" }, + { @"Chilean peso", @"CLP" }, + { @"Colombian peso", @"COP" }, + { @"Cuban convertible peso", @"CUC" }, + { @"Cuban peso", @"CUP" }, + { @"Dominican peso", @"DOP" }, + { @"Mexican peso", @"MXN" }, + { @"Uruguayan peso", @"UYU" }, + { @"British pound", @"GBP" }, + { @"Saint Helena pound", @"SHP" }, + { @"Egyptian pound", @"EGP" }, + { @"Falkland Islands pound", @"FKP" }, + { @"Gibraltar pound", @"GIP" }, + { @"Manx pound", @"IMP" }, + { @"Jersey pound", @"JEP" }, + { @"Lebanese pound", @"LBP" }, + { @"South Sudanese pound", @"SSP" }, + { @"Sudanese pound", @"SDG" }, + { @"Syrian pound", @"SYP" }, + { @"United States dollar", @"USD" }, + { @"Australian dollar", @"AUD" }, + { @"Bahamian dollar", @"BSD" }, + { @"Barbadian dollar", @"BBD" }, + { @"Belize dollar", @"BZD" }, + { @"Bermudian dollar", @"BMD" }, + { @"Brunei dollar", @"BND" }, + { @"Singapore dollar", @"SGD" }, + { @"Canadian dollar", @"CAD" }, + { @"Cayman Islands dollar", @"KYD" }, + { @"New Zealand dollar", @"NZD" }, + { @"Fijian dollar", @"FJD" }, + { @"Guyanese dollar", @"GYD" }, + { @"Hong Kong dollar", @"HKD" }, + { @"Jamaican dollar", @"JMD" }, + { @"Liberian dollar", @"LRD" }, + { @"Namibian dollar", @"NAD" }, + { @"Solomon Islands dollar", @"SBD" }, + { @"Surinamese dollar", @"SRD" }, + { @"New Taiwan dollar", @"TWD" }, + { @"Trinidad and Tobago dollar", @"TTD" }, + { @"Tuvaluan dollar", @"TVD" }, + { @"Chinese yuan", @"CNY" }, + { @"Rial", @"__RI" }, + { @"Shiling", @"__S" }, + { @"Som", @"__SO" }, + { @"Dirham", @"__DR" }, + { @"Dinar", @"_DN" }, + { @"Dollar", @"__D" }, + { @"Manat", @"__MA" }, + { @"Rupee", @"__R" }, + { @"Krone", @"__K" }, + { @"Krona", @"__K" }, + { @"Crown", @"__K" }, + { @"Frank", @"__F" }, + { @"Mark", @"__M" }, + { @"Ruble", @"__RB" }, + { @"Peso", @"__PE" }, + { @"Pound", @"__P" }, + { @"Tristan da Cunha pound", @"_TP" }, + { @"South Georgia and the South Sandwich Islands pound", @"_SP" }, + { @"Somaliland shilling", @"_SS" }, + { @"Pitcairn Islands dollar", @"_PND" }, + { @"Palauan dollar", @"_PD" }, + { @"Niue dollar", @"_NID" }, + { @"Nauruan dollar", @"_ND" }, + { @"Micronesian dollar", @"_MD" }, + { @"Kiribati dollar", @"_KID" }, + { @"Guernsey pound", @"_GGP" }, + { @"Faroese króna", @"_FOK" }, + { @"Cook Islands dollar", @"_CKD" }, + { @"British Virgin Islands dollar", @"_BD" }, + { @"Ascension pound", @"_AP" }, + { @"Alderney pound", @"_ALP" }, + { @"Abkhazian apsar", @"_AA" }, + { @"Bitcoin", @"_XBT" } + }; + public static readonly Dictionary FractionalUnitNameToCodeMap = new Dictionary + { + { @"Jiao", @"JIAO" }, + { @"Kopek", @"KOPEK" }, + { @"Pul", @"PUL" }, + { @"Cent", @"CENT" }, + { @"Qindarkë", @"QINDARKE" }, + { @"Penny", @"PENNY" }, + { @"Santeem", @"SANTEEM" }, + { @"Cêntimo", @"CENTIMO" }, + { @"Centavo", @"CENTAVO" }, + { @"Luma", @"LUMA" }, + { @"Qəpik", @"QƏPIK" }, + { @"Fils", @"FILS" }, + { @"Poisha", @"POISHA" }, + { @"Kapyeyka", @"KAPYEYKA" }, + { @"Centime", @"CENTIME" }, + { @"Chetrum", @"CHETRUM" }, + { @"Paisa", @"PAISA" }, + { @"Fening", @"FENING" }, + { @"Thebe", @"THEBE" }, + { @"Sen", @"SEN" }, + { @"Stotinka", @"STOTINKA" }, + { @"Fen", @"FEN" }, + { @"Céntimo", @"CENTIMO" }, + { @"Lipa", @"LIPA" }, + { @"Haléř", @"HALER" }, + { @"Øre", @"ØRE" }, + { @"Piastre", @"PIASTRE" }, + { @"Santim", @"SANTIM" }, + { @"Oyra", @"OYRA" }, + { @"Butut", @"BUTUT" }, + { @"Tetri", @"TETRI" }, + { @"Pesewa", @"PESEWA" }, + { @"Fillér", @"FILLER" }, + { @"Eyrir", @"EYRIR" }, + { @"Dinar", @"DINAR" }, + { @"Agora", @"AGORA" }, + { @"Tïın", @"TIIN" }, + { @"Chon", @"CHON" }, + { @"Jeon", @"JEON" }, + { @"Tyiyn", @"TYIYN" }, + { @"Att", @"ATT" }, + { @"Sente", @"SENTE" }, + { @"Dirham", @"DIRHAM" }, + { @"Rappen", @"RAPPEN" }, + { @"Avo", @"AVO" }, + { @"Deni", @"DENI" }, + { @"Iraimbilanja", @"IRAIMBILANJA" }, + { @"Tambala", @"TAMBALA" }, + { @"Laari", @"LAARI" }, + { @"Khoums", @"KHOUMS" }, + { @"Ban", @"BAN" }, + { @"Möngö", @"MONGO" }, + { @"Pya", @"PYA" }, + { @"Kobo", @"KOBO" }, + { @"Kuruş", @"KURUS" }, + { @"Baisa", @"BAISA" }, + { @"Centésimo", @"CENTESIMO" }, + { @"Toea", @"TOEA" }, + { @"Sentimo", @"SENTIMO" }, + { @"Grosz", @"GROSZ" }, + { @"Sene", @"SENE" }, + { @"Halala", @"HALALA" }, + { @"Para", @"PARA" }, + { @"Öre", @"ORE" }, + { @"Diram", @"DIRAM" }, + { @"Satang", @"SATANG" }, + { @"Seniti", @"SENITI" }, + { @"Millime", @"MILLIME" }, + { @"Tennesi", @"TENNESI" }, + { @"Kopiyka", @"KOPIYKA" }, + { @"Tiyin", @"TIYIN" }, + { @"Hào", @"HAO" }, + { @"Ngwee", @"NGWEE" }, + { @"Millibitcoin", @"MILLIBITCOIN" }, + { @"Satoshi", @"SATOSHI" } + }; + public const string CompoundUnitConnectorRegex = @"(?e)"; public static readonly Dictionary CurrencyPrefixList = new Dictionary { - { @"Dollar", @"$" }, + { @"Dollar", @"$|dollari" }, { @"United States dollar", @"$ stati uniti|us$|us $|u.s. $|u.s $" }, { @"East Caribbean dollar", @"east caribbean $" }, { @"Australian dollar", @"$ australiano|$ australia" }, @@ -334,7 +604,8 @@ public static class NumbersWithUnitDefinitions { @"Euro", @"€" }, { @"Pound", @"£" }, { @"Costa Rican colón", @"₡" }, - { @"Turkish lira", @"₺" } + { @"Turkish lira", @"₺" }, + { @"Bitcoin", @"₿|btc|xbt" } }; public static readonly IList AmbiguousCurrencyUnitList = new List { @@ -493,13 +764,13 @@ public static class NumbersWithUnitDefinitions { @"Pollice cubo", @"pollice cubo|pollice cubico|pollici cubici|pollici cubi" }, { @"Piede cubo", @"piede cubo|piedi cubi|piede cubico|piedi cubici" }, { @"Miglia cubica", @"miglio cubo|miglio cubico|miglia cubica|miglia cubiche" }, - { @"Oncia", @"oncia|once" }, { @"Oncia liquida", @"oncia fluida|once fluide|oncia liquida|once liquide" }, { @"Cucchiaino", @"cucchiaino|cucchiaini" }, { @"Cucchiaio", @"cucchiaio|cucchiai" }, { @"Tazza", @"tazza|tazze" }, { @"Pinta", @"pinta|pinte" }, - { @"Gallone", @"gallone|galloni" } + { @"Gallone", @"gallone|galloni" }, + { @"Barile", @"barili|barile" } }; public static readonly IList AmbiguousVolumeUnitList = new List { @@ -515,8 +786,7 @@ public static class NumbersWithUnitDefinitions { @"Chilogrammo", @"kg|kilogrammo|chilogrammo|chilogrammi|kilogrammi|kilo|kili|chilo|chili" }, { @"Grammo", @"g|grammo|grammi|gr" }, { @"Milligrammo", @"mg|milligrammo|milligrammi" }, - { @"Barile", @"barili|barile" }, - { @"Gallone", @"gallone|galloni" }, + { @"Microgrammo", @"μg|microgrammo|microgrammi" }, { @"Tonnellata", @"tonnellata|tonnellate" }, { @"Libbra", @"libbra|libbre" }, { @"Oncia", @"oncia|once" } @@ -526,9 +796,28 @@ public static class NumbersWithUnitDefinitions @"g", @"oz" }; + public static readonly Dictionary AngleSuffixList = new Dictionary + { + { @"Degree", @"grado|gradi|°" }, + { @"Radian", @"radiante|radianti|rad" }, + { @"Turn", @"giro|giri" } + }; + public static readonly IList AmbiguousAngleUnitList = new List + { + @"giro", + @"giri" + }; public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { { @"\bl\b", @"l\s*('|’)" } }; + public static readonly Dictionary TemperatureAmbiguityFiltersDict = new Dictionary + { + { @"\b(grad[oi]|°)$", @"\b((grad[oi]|°)\s*(angolo|rotazione)|(ruota(re|t[oiae]|ndo)?|angolo|rotazioe)(\s+(\p{L}+|\d+)){0,4}\s*(grad[oi]\b|°))" } + }; + public static readonly Dictionary DimensionAmbiguityFiltersDict = new Dictionary + { + { @"\b(grad[oi]|°)$", @"\b((grad[oi]|°)\s*(c(elsius|entigrado)?|f(ah?renheit)?)|(temperatura)(\s+(\p{L}+|\d+)){0,4}\s*(grad[oi]\b|°))" } + }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Italian/QuotedTextDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Italian/QuotedTextDefinitions.cs new file mode 100644 index 0000000000..ebdf60b568 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Italian/QuotedTextDefinitions.cs @@ -0,0 +1,35 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Italian\Italian-QuotedText.yaml +// - Language: Italian +// - ClassName: QuotedTextDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Italian +{ + using System; + using System.Collections.Generic; + + public static class QuotedTextDefinitions + { + public const string LangMarker = @"Ita"; + public const string QuotedTextRegex1 = @"(“([^“”]+)”)"; + public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)"; + public const string QuotedTextRegex3 = @"(""([^""]+)"")"; + public const string QuotedTextRegex4 = @"(\\'([^\']+)\\')"; + public const string QuotedTextRegex5 = @"(`([^`]+)`)"; + public const string QuotedTextRegex6 = @"(«([^«»]+)»)"; + public const string QuotedTextRegex7 = @"(‹([^‹›]+)›)"; + public const string QuotedTextRegex8 = @"(123456)"; + public const string QuotedTextRegex9 = @"(123456)"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Italian/QuotedTextDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Italian/QuotedTextDefinitions.tt new file mode 100644 index 0000000000..5cbc2252ff --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Italian/QuotedTextDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Italian\Italian-QuotedText.yaml"; + this.Language = "Italian"; + this.ClassName = "QuotedTextDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/ChoiceDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/ChoiceDefinitions.cs index dec1094ed0..fec41639a8 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/ChoiceDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/ChoiceDefinitions.cs @@ -23,7 +23,8 @@ public static class ChoiceDefinitions { public const string LangMarker = @"Jpn"; public const string TokenizerRegex = @"[^\w\d\u3040-\u309f\u30a0-\u30ff\uff00-\uff9f\u4e00-\u9faf\u3400-\u4dbf]"; - public const string TrueRegex = @"(はい(!)*|そうです|よい(です)*)|(\uD83D\uDC4D|\uD83D\uDC4C)"; - public const string FalseRegex = @"(いいえ|ではありません|ではない|じゃない|じゃありません)|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)"; + public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)"; + public static readonly string TrueRegex = $@"(はい(!)*|そうです|よい(です)*)|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?"; + public static readonly string FalseRegex = $@"(いいえ|ではありません|ではない|じゃない|じゃありません)|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs index 7fd0558829..ead5c9e65e 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs @@ -21,114 +21,173 @@ namespace Microsoft.Recognizers.Definitions.Japanese public static class DateTimeDefinitions { - public const string MonthRegex = @"(?正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月)"; - public const string MonthRegexForPeriod = @"(?正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月)(?=\b|t|まで|から)?"; - public const string MonthNumRegexForPeriod = @"(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)(?=\b|t|まで|から)?"; - public const string DayRegex = @"(?01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|1|2|3|4|5|6|7|8|9)日?"; - public const string DayRegexForPeriod = @"(?01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)日?(?=\b|t|まで|から)?"; - public const string DateDayRegexInJapanese = @"(?初一|三十|一日|十一日|二十一日|三十一日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|一日|十一日|十日|二十一日|二十日|三十一日|三十日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|十日|二十日|三十日|10日|11日|12日|13日|14日|15日|16日|17日|18日|19日|1日|20日|21日|22日|23日|24日|25日|26日|27日|28日|29日|2日|30日|31日|3日|4日|5日|6日|7日|8日|9日|一号|十一号|二十一号|三十一号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|一号|十一号|十号|二十一号|二十号|三十一号|三十号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|十号|二十号|三十号|10号|11号|12号|13号|14号|15号|16号|17号|18号|19号|1号|20号|21号|22号|23号|24号|25号|26号|27号|28号|29号|2号|30号|31号|3号|4号|5号|6号|7号|8号|9号)"; - public const string DayRegexNumInJapanese = @"(?一|十一|二十一|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|十|二十|廿|卅)"; - public const string MonthNumRegex = @"(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)"; + public const string LangMarker = @"Jpn"; + public const string MonthRegex = @"(?(正|一|二|三|四|五|六|七|八|九|十|十一|十二|0?[1-9]|1[0-2])\s*(か月(?!で)|月間?))"; + public const string MonthRegexForPeriod = @"(?正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|(0?[1-9]|1[0-2])か?月)(?=\b|t|まで|から)?"; + public const string MonthNumRegexForPeriod = @"(?0?[1-9]|1[0-2])(?=\b|t|まで|から)?"; + public const string DayRegex = @"(?[0-2]?[1-9]|[1-3]0|31)((日|目)(?!かかる|待つ|泊まる|経つ|都合)間?)?"; + public const string DayRegexForPeriod = @"(?3[01]|[0-2]?\d|(三十一?|(一|二)?十?[一二三四五六七八九]))((\s*日(?!かかる|待つ|泊まる|経つ))目?)?(?=\b|t|まで|から)?"; + public const string DayNumberRegex = @"(二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|二十一|三十一|十二|十三|十四|十五|十六|十七|十八|十一|十|二十|廿(?!日市市)|三十|一|二|三|四|五|六|七|八|九)"; + public static readonly string DateDayRegexInCJK = $@"(?初一|({DayNumberRegex}|3[01]|[0-2]?\d)(\s*日|号)(?!かかる|待つ|泊まる|経つ))目?"; + public const string DayRegexNumInCJK = @"(?一|十一|二十一|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|廿(?!日市市)|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|十|二十|廿|卅)"; + public const string MonthNumRegex = @"(?0?[1-9]|1[0-2])"; public const string TwoNumYear = @"50"; - public const string YearNumRegex = @"(?((1[5-9]|20)\d{2})|2100)年?"; - public const string SimpleYearRegex = @"(?(\d{2,4}))年?"; - public const string ZeroToNineIntegerRegexJap = @"[一二三四五六七八九零壹贰叁肆伍陆柒捌玖〇两千俩倆仨]"; - public static readonly string DateYearInJapaneseRegex = $@"(?({ZeroToNineIntegerRegexJap}{ZeroToNineIntegerRegexJap}{ZeroToNineIntegerRegexJap}{ZeroToNineIntegerRegexJap}|{ZeroToNineIntegerRegexJap}{ZeroToNineIntegerRegexJap}|{ZeroToNineIntegerRegexJap}{ZeroToNineIntegerRegexJap}{ZeroToNineIntegerRegexJap}))"; - public const string WeekDayRegex = @"(?日曜日?|月曜日?|火曜日?|水曜日?|木曜日?|金曜日?|土曜日?)\s?"; - public const string LunarRegex = @"(农历|初一|正月|大年)"; - public static readonly string DateThisRegex = $@"(这个|这一个|这|这一|本){WeekDayRegex}"; - public static readonly string DateLastRegex = $@"(上一个|上个|上一|上|最后一个|最后)(的)?{WeekDayRegex}"; - public static readonly string DateNextRegex = $@"(下一个|下个|下一|下)(的)?{WeekDayRegex}"; - public const string SpecialMonthRegex = @"(先月|来月|今月|前月|再来月|昨月|先々月)"; - public const string SpecialYearRegex = @"(昨年|来年|今年|前年|ことし|らいねん)"; - public const string SpecialDayRegex = @"(最近|前天|后天|昨天|明天|今天|今日|明日|昨日|大后天|大前天|後天|大後天|きょう|あす|あした|きのう|明後日|一昨日|この日|前日|二日前|おととい|あさって)"; - public const string SpecialDayWithNumRegex = @"^[.]"; - public static readonly string WeekDayOfMonthRegex = $@"((({MonthRegex}|{MonthNumRegex})的\s*)(?第一个|第二个|第三个|第四个|第五个|最后一个)\s*{WeekDayRegex})"; - public const string ThisPrefixRegex = @"这个|这一个|这|这一|本|今"; - public const string LastPrefixRegex = @"上个|上一个|上|上一|去"; - public const string NextPrefixRegex = @"下个|下一个|下|下一|明"; + public const string YearNumRegex = @"((?((?\d{2,4})(?!\$|ドル|円|¥))(\s*年)?"; + public const string ZeroToNineIntegerRegexCJK = @"[一二三四五六七八九十廿零壹贰叁肆伍陆柒捌玖〇两千俩倆仨]"; + public const string DynastyStartYear = @"元"; + public const string RegionTitleRegex = @"(昭和|平成|令和|大正|明治|寛政|享和|文化|文政|天保|弘化|嘉永|安政|万延|文久|元治|慶応)"; + public static readonly string DynastyYearRegex = $@"((?{RegionTitleRegex})(?({DynastyStartYear}|\d{{1,2}}|({ZeroToNineIntegerRegexCJK}){{1,3}}))年?)|(((?慶応)|(?明治)|(?大正)|(?昭和)|(?平成)|(?令和))(\d+|元|{ZeroToNineIntegerRegexCJK})年)"; + public static readonly string DateYearInCJKRegex = $@"(?({ZeroToNineIntegerRegexCJK}{{2,4}}|{DynastyYearRegex}))年?"; + public const string WeekDayRegex = @"(前の?)?(週(間)?の?)?(?(日|月|火|水|木|金|土)曜日?)"; + public static readonly string WeekDayStartEnd = $@"(^(の)?{WeekDayRegex}|{WeekDayRegex}$)"; + public const string LunarRegex = @"(农历|初一|正月|大年|旧暦)"; + public static readonly string DateThisRegex = $@"(这个|这一个|这|这一|本|(?今週)|これ?)(的|の)?({WeekDayRegex}|日)"; + public static readonly string DateLastRegex = $@"(上一个|上个|上一|上|最后一个|最后|前の?|(?先週)|最後)(的|の)?({WeekDayRegex}|日)"; + public static readonly string DateNextRegex = $@"(下一个|下个|下一|下|(?(来|翌)週)|次)(的|の)?{WeekDayRegex}"; + public static readonly string WeekWithWeekDayRangeRegex = $@"({DateThisRegex}|{DateNextRegex}|{DateLastRegex})(から)({WeekDayRegex})"; + public const string WoMLastRegex = @"過去|去|最後|先"; + public const string WoMPreviousRegex = @"前"; + public const string WoMNextRegex = @"次|翌|来|これから(の)?"; + public const string SpecialMonthRegex = @"(先月|来月|今月|前月|再来月|昨月|先々月|ぜんげつ|(せん)?せんげつ|さくげつ|らいげつ|こんげつ)"; + public const string SpecialYearRegex = @"(ことし|さ?らいねん|きょねん|さくねん)"; + public const string SpecialDayRegex = @"((いっ)?さくじつ|おとつい|最近(?!の)|前天|后天|明日から二日((?今日)から(?1日半)(の間)?)|((?今日)から(?2日半)(の間)?)|(?本日)|昨日の2日前|昨日から4日|今日から二日|今日から4日|昨日から2日間|昨天|明天|今天|(?日曜日?|月曜日?|火曜日?|水曜日?|木曜日?|金曜日?|土曜日?))?"; + public static readonly string WeekDayOfMonthRegex = $@"((({SpecialMonthRegex}|{MonthRegex}|{MonthNumRegex}|((这个|这一个|这|这一|本|今|上个|上一个|上|上一|去|下个|下一个|下|下一|明)月))(的|の)?\s*)?(第|最)?(?([初一二三四五])|最後|最終|([1-5])|最后一)(个|の|\s)*{WeekDayRegex})"; + public static readonly string WeekDayAndDayRegex = $@"({DayRegexForPeriod}(の|的)?(\s|,)*{WeekDayRegex})"; + public const string ThisPrefixRegex = @"这个|这一个|这|这一|本|今|こ"; + public const string LastPrefixRegex = @"上个|上一个|上|上一|去|過去|ここ|最後|前|先|昨|最終"; + public const string NextPrefixRegex = @"下个|下一个|下|下一|明(?!治)|次|再?来|向こう|これから(の)?|翌|向こう"; public static readonly string RelativeRegex = $@"(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex}))"; - public static readonly string SpecialDate = $@"(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})年)?(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})月)?{DateDayRegexInJapanese}"; - public const string DateUnitRegex = @"(?年|个月|周|日|天)"; - public const string BeforeRegex = @"以前|之前|前"; - public const string AfterRegex = @"以后|以後|之后|之後|后|後"; - public static readonly string DateRegexList1 = $@"({SimpleYearRegex}[/\\\-]?{MonthRegex}[/\\\-]?{DayRegexForPeriod}\s*({WeekDayRegex})?)"; - public static readonly string DateRegexList2 = $@"({SimpleYearRegex}{MonthRegexForPeriod}\s*)"; - public static readonly string DateRegexList3 = $@"((({SimpleYearRegex}|{DateYearInJapaneseRegex})年)(\s*))?({LunarRegex}(\s*))?{MonthRegex}(\s*)({DateDayRegexInJapanese}|{DayRegex})((\s*|,|,){WeekDayRegex})?({BeforeRegex}|{AfterRegex})?"; - public static readonly string DateRegexList4 = $@"{MonthNumRegex}\s*/\s*{DayRegex}((\s+|\s*,\s*){SimpleYearRegex})?"; - public static readonly string DateRegexList5 = $@"{DayRegex}\s*/\s*{MonthNumRegex}((\s+|\s*,\s*){SimpleYearRegex})?"; - public static readonly string DateRegexList6 = $@"{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}\s*[/\\\-]\s*{SimpleYearRegex}"; + public static readonly string SpecialDate = $@"(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})年)?(の|的)?(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})(の|的)?月)?(の|的)?{DateDayRegexInCJK}"; + public const string DateUnitRegex = @"(?年|个月|月|周|(?営業)日|(?あとで?)|(?の早い時間)"; + public static readonly string DateRegexList1 = $@"({LunarRegex}(的|の|\s)*)?(({SimpleYearRegex}|{DateYearInCJKRegex})[/\\\-の的]?(\s*{MonthRegex})[/\\\-の的]?(\s*{DayRegexForPeriod})((\s|,)*{WeekDayRegex})?)"; + public static readonly string DateRegexList2 = $@"((?到|至|--|-|—|——|~|–)"; + public static readonly string DateRegexList8 = $@"{YearNumRegex}\s*[/\\\-\. ]\s*{MonthNumRegex}\s*[/\\\-\. ]\s*{DayRegexForPeriod}((\s|,)*{WeekDayRegex})?"; + public static readonly string DateRegexList9 = $@"({LunarRegex}(的|の|\s)*)?((\s*{MonthRegex}[/\\\-の的]?{DayRegexForPeriod}((\s|,)*{WeekDayRegex})?)|((\s*{MonthRegex}[/\\\-の的]?){DayRegexForPeriod}(の|的)?((\s|,)*{WeekDayRegex})))"; + public static readonly string DateRegexList10 = $@"(({SimpleYearRegex}|{DateYearInCJKRegex})[/\\\-]{MonthNumRegex}[/\\\-]{DayRegexForPeriod})"; + public static readonly string DateRegexList11 = $@"(({SimpleYearRegex}|{DateYearInCJKRegex})[/\\\-]{MonthNumRegexForPeriod})"; + public const string DatePeriodTillRegex = @"(?到|至|から|--|-|—|——|~|–)(?!\d泊)"; + public const string DatePeriodRangeSuffixRegex = @"(に?まで|の間)"; + public const string DatePeriodRangePrefixRegex = @"^\b$"; public const string DatePeriodTillSuffixRequiredRegex = @"(?与|和)"; - public const string DatePeriodDayRegexInJapanese = @"(?初一|三十|一日|十一日|二十一日|三十一日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|一日|十一日|十日|二十一日|二十日|三十一日|三十日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|十日|二十日|三十日|10日|11日|12日|13日|14日|15日|16日|17日|18日|19日|1日|20日|21日|22日|23日|24日|25日|26日|27日|28日|29日|2日|30日|31日|3日|4日|5日|6日|7日|8日|9日|一号|十一号|二十一号|三十一号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|一号|十一号|十号|二十一号|二十号|三十一号|三十号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|十号|二十号|三十号|10号|11号|12号|13号|14号|15号|16号|17号|18号|19号|1号|20号|21号|22号|23号|24号|25号|26号|27号|28号|29号|2号|30号|31号|3号|4号|5号|6号|7号|8号|9号|一|十一|二十一|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|十|二十|三十||廿|卅)"; - public const string DatePeriodThisRegex = @"这个|这一个|这|这一|本"; - public const string DatePeriodLastRegex = @"上个|上一个|上|上一"; - public const string DatePeriodNextRegex = @"下个|下一个|下|下一"; + public const string DatePeriodDayRegexInCJK = @"(?(二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|二十二|二十三|二十一|十一|三十一|十二|十三|十四|十五|十六|十七|十八|十九|十|二十|三十|一|十|二|三|四|五|六|七|八|九|3[0-1]|[1-2]\d|0?[1-9])日|初一|三十|(一|十一|二十一|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|十|二十|三十|3[0-1]|[1-2]\d|0?[1-9])号|一|十一|二十一|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|十|二十|三十|廿(?!日市市)|卅)目?"; + public const string DatePeriodThisRegex = @"(?再来|以降)|下个|下一个|下|下一|最初|来|向こう|これから(の)?|翌|今後|次(の)?|の後"; + public const string DateRangePrepositions = @"((ひと|こ|私の|その|この|これらの|それらの)\s*)?"; public static readonly string RelativeMonthRegex = $@"(?({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex})\s*月)"; - public static readonly string YearRegex = $@"(({YearNumRegex})(\s*年)?|({SimpleYearRegex})\s*年)(?=[\u4E00-\u9FFF]|\s|$|\W)"; - public static readonly string StrictYearRegex = $@"{YearRegex}"; + public const string HalfYearRegex = @"((?の?(上|前)半期?)|(?の?(下|后|後)半期?))"; + public static readonly string YearRegex = $@"((({YearNumRegex})(\s*年)?|({SimpleYearRegex})\s*年)(に)?{HalfYearRegex}?)|({DynastyYearRegex})"; + public static readonly string StrictYearRegex = $@"(((ひと|こ|その|この|これらの|それらの)\s*)?{YearRegex})"; public const string YearRegexInNumber = @"(?(\d{3,4}))"; - public static readonly string DatePeriodYearInJapaneseRegex = $@"(?({ZeroToNineIntegerRegexJap}{ZeroToNineIntegerRegexJap}{ZeroToNineIntegerRegexJap}{ZeroToNineIntegerRegexJap}|{ZeroToNineIntegerRegexJap}{ZeroToNineIntegerRegexJap}|{ZeroToNineIntegerRegexJap}{ZeroToNineIntegerRegexJap}{ZeroToNineIntegerRegexJap}))年"; + public static readonly string DatePeriodYearInCJKRegex = $@"(?({ZeroToNineIntegerRegexCJK}{{2,4}}))年{HalfYearRegex}?"; public static readonly string MonthSuffixRegex = $@"(?({RelativeMonthRegex}|{MonthRegex}))"; - public static readonly string SimpleCasesRegex = $@"((从)\s*)?(({YearRegex}|{DatePeriodYearInJapaneseRegex})\s*)?{MonthSuffixRegex}({DatePeriodDayRegexInJapanese}|{DayRegex})\s*{DatePeriodTillRegex}\s*({DatePeriodDayRegexInJapanese}|{DayRegex})((\s+|\s*,\s*){YearRegex})?"; - public static readonly string YearAndMonth = $@"({YearNumRegex}{MonthRegex}(\b|から)$)"; - public static readonly string SimpleYearAndMonth = $@"({YearNumRegex}[/\\\-]{MonthNumRegex}(\b|から)$)"; - public static readonly string PureNumYearAndMonth = $@"({YearRegexInNumber}\s*[-\.\/]\s*{MonthNumRegex})|({MonthNumRegex}\s*\/\s*{YearRegexInNumber})"; - public static readonly string OneWordPeriodRegex = $@"(((明年|今年|去年)\s*)?{MonthRegex}|({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex})\s*(周末|周|月|年)|周末|今年|明年|去年|前年|后年)"; - public static readonly string WeekOfMonthRegex = $@"(?{MonthSuffixRegex}的(?第一|第二|第三|第四|第五|最后一)\s*周\s*)"; - public const string UnitRegex = @"(?年|(个)?月|周|日|天)"; + public static readonly string SimpleCasesRegex = $@"({DateRangePrepositions})(({YearRegex}|{DatePeriodYearInCJKRegex})\s*)?{MonthSuffixRegex}({DatePeriodDayRegexInCJK}|{DayRegex})\s*{DatePeriodTillRegex}\s*({DatePeriodDayRegexInCJK}|{DayRegex})(?!\d)((\s+|\s*,\s*){YearRegex})?(までの間|まで|の間)?"; + public static readonly string YearAndMonth = $@"(({YearNumRegex}|{DateYearInCJKRegex})の?\s*{MonthRegex}(\b|から)?)"; + public static readonly string SimpleYearAndMonth = $@"({DateRangePrepositions})({YearNumRegex}[/\\\-]{MonthNumRegex}(\b|から)$)"; + public static readonly string PureNumYearAndMonth = $@"({DateRangePrepositions})({YearRegexInNumber}\s*[-\.\/]\s*{MonthNumRegex})(?!\d)|({MonthNumRegex}\s*\/\s*{YearRegexInNumber})"; + public static readonly string OneWordPeriodRegex = $@"({DateRangePrepositions})((((周末|週(間)?|日間?|明年|(?(今|再来|翌|去|前|后|来)年))(,|の(残り)?)?\s*)?{MonthRegex}|(({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex})の?\s*)?(数|\d\d?|{ZeroToNineIntegerRegexCJK}|(?半))?(?ヶ?((?営業)日|(?半)|(?の残りの日|いっぱい)?)|(({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex})({MonthRegex}(?!で)|{DayRegex})))(?後に|以内に|初来)?"; + public const string LaterEarlyPeriodRegex = @"((?来|翌)|(?今|同じ)|(?この|去|先|前(の)?))?(?(?週(間)?)|(?(正|一|二|三|四|五|六|七|八|九|十|十一|十二|0?[1-9]|1[0-2]))?((?(?の下旬|この後|の後半)|の終わり(ごろ)?|末|下旬)|(?(の)?(半ば|中旬))|(?(の)?初め|のはじめ|早くに|初旬|(?ちょっと前に|上旬(に)?)))"; + public const string DatePointWithAgoAndLater = @"((?今日)|(?昨日)|(?明日))(から|の)(\d)(?週間|日)((?以内)|(?以上)(?前)|(?以上(あと)?))"; + public static readonly string WeekOfMonthRegex = $@"({DateRangePrepositions})((?({YearRegex}\s*)?{MonthSuffixRegex}(的|の))(?第一|第二|第三|第四|第五|最后一|第\d|{DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex})?\s*の?(週|周)\s*)"; + public static readonly string WeekOfYearRegex = $@"({DateRangePrepositions})(?({YearRegex}|{RelativeRegex}年)(的|の)(?第一|第二|第三|第四|第五|最后一|第\d|{DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex})?\s*の?(週|周)\s*)"; + public static readonly string WeekOfDateRegex = $@"(({DateRangePrepositions})({MonthSuffixRegex}({DayRegex})(的|の))第?\s*の?(週|周)s*)|({DayRegex}日の?(週(間)?))"; + public static readonly string MonthOfDateRegex = $@"({DateRangePrepositions})({MonthSuffixRegex}({DayRegex})(的|の))第?\s*の?(月)s*"; + public const string RestOfDateRegex = @"((当|この|今)(?日)の)?(?残りの?)(?時間|日|週|月|年)"; + public const string UnitRegex = @"(?ヶ?(年|(个)?月|周|週間|日|天))"; public static readonly string FollowedUnit = $@"^\s*{UnitRegex}"; public static readonly string NumberCombinedWithUnit = $@"(?\d+(\.\d*)?){UnitRegex}"; - public static readonly string YearMonthDayRange = $@"({YearNumRegex}[/\\\-]?({MonthRegex}|{MonthNumRegex})[/\\\-]?{DayRegexForPeriod}から{YearNumRegex}[/\\\-]?({MonthRegex}|{MonthNumRegex})[/\\\-]?{DayRegexForPeriod}まで)"; - public static readonly string YearMonthRange = $@"({YearNumRegex}[/\\\-]?({MonthRegex}|{MonthNumRegex})から{YearNumRegex}[/\\\-]?({MonthRegex}|{MonthNumRegex})まで)"; - public static readonly string MonthDayRange = $@"(({MonthRegex}|{MonthNumRegex})[/\\\-]?{DayRegexForPeriod}から({MonthRegex}|{MonthNumRegex})[/\\\-]?{DayRegexForPeriod}まで)"; - public static readonly string YearToYear = $@"({YearNumRegex}から{YearNumRegex}まで)"; - public static readonly string MonthToMonth = $@"({MonthRegex}から{MonthRegex}まで)"; - public static readonly string DayToDay = $@"({DayRegex}から{DayRegex}まで)"; - public const string PastRegex = @"(?(前|上|之前|近|过去))"; - public const string FutureRegex = @"(?(后|後|(?春|夏|秋|冬)(天|季)?"; - public static readonly string SeasonWithYear = $@"(({YearRegex}|{DatePeriodYearInJapaneseRegex}|(?明年|今年|去年))(的)?)?{SeasonRegex}"; - public static readonly string QuarterRegex = $@"(({YearRegex}|{DatePeriodYearInJapaneseRegex}|(?明年|今年|去年))(的)?)?(第(?1|2|3|4|一|二|三|四)季度)"; - public const string CenturyRegex = @"(?\d|1\d|2\d)世纪"; - public const string CenturyRegexInJapanese = @"(?一|二|三|四|五|六|七|八|九|十|十一|十二|十三|十四|十五|十六|十七|十八|十九|二十|二十一|二十二)世纪"; - public static readonly string RelativeCenturyRegex = $@"(?({DatePeriodLastRegex}|{DatePeriodThisRegex}|{DatePeriodNextRegex}))世纪"; - public const string DecadeRegexInJapanese = @"(?十|一十|二十|三十|四十|五十|六十|七十|八十|九十)"; - public static readonly string DecadeRegex = $@"(?({CenturyRegex}|{CenturyRegexInJapanese}|{RelativeCenturyRegex}))?(?(\d0|{DecadeRegexInJapanese}))年代"; - public const string PrepositionRegex = @"(?^的|在$)"; - public const string NowRegex = @"(?现在|马上|立刻|刚刚才|刚刚|刚才)"; - public const string NightRegex = @"(?早|晚)"; - public const string TimeOfTodayRegex = @"(今晚|今早|今晨|明晚|明早|明晨|昨晚)(的|在)?"; - public const string DateTimePeriodTillRegex = @"(?到|直到|--|-|—|——)"; - public const string DateTimePeriodPrepositionRegex = @"(?^\s*的|在\s*$)"; + public static readonly string YearMonthDayRange = $@"({YearNumRegex}[/\\\-]?({MonthRegex}|{MonthNumRegex})[/\\\-]?({DayRegexForPeriod}|{DateDayRegexInCJK})から{YearNumRegex}[/\\\-]?({MonthRegex}|{MonthNumRegex})[/\\\-]?({DayRegexForPeriod}|{DateDayRegexInCJK})(までの間|まで|の間|にわたって))|(({YearNumRegex})?({MonthRegex}|{MonthNumRegex})[/\\\-]?({DayRegexForPeriod}|{DateDayRegexInCJK}){WeekDayRegex}?から({MonthRegex}|{MonthNumRegex})?({DayRegexForPeriod}|{DateDayRegexInCJK}){WeekDayRegex}(までの間|まで|の間|にわたって))"; + public static readonly string YearMonthRange = $@"({YearNumRegex}[/\\\-]?({MonthRegex}|{MonthNumRegex})から{YearNumRegex}[/\\\-]?({MonthRegex}|{MonthNumRegex})(までの間|まで|の間|にわたって))"; + public static readonly string MonthDayRange = $@"({YearNumRegex})?({MonthRegex}|{MonthNumRegex})[/\\\-]?(({DayRegexForPeriod}|{DateDayRegexInCJK})|{WeekDayRegex})から(({DayRegexForPeriod}|{DateDayRegexInCJK})|{WeekDayRegex})(までの間|まで|の間|にわたって)"; + public static readonly string YearToYear = $@"({DateRangePrepositions})(({SpecialYearRegex}|{DatePeriodYearInCJKRegex}|{YearNumRegex})から({SpecialYearRegex}|{DatePeriodYearInCJKRegex}|{YearNumRegex})(ま(での間|で)?|の間|にわたって))"; + public const string YearToYearSuffixRequired = @"^[.]"; + public static readonly string MonthToMonth = $@"({DateRangePrepositions})(({SimpleYearRegex}?({SpecialMonthRegex}|{MonthRegex})(SpecialDayRegex}}|{DayRegex})?から({SpecialMonthRegex}|{MonthRegex})(SpecialDayRegex}}|{DayRegex})?(までの間|まで|の間))|({SimpleYearRegex}{MonthRegexForPeriod}から{SimpleYearRegex}{MonthRegexForPeriod}(までの間|まで|の間))|({SimpleYearRegex}[/\\\-](?{MonthNumRegexForPeriod})から{SimpleYearRegex}[/\\\-](?{MonthNumRegexForPeriod})(までの間|まで|の間)))"; + public const string MonthToMonthSuffixRequired = @"^[.]"; + public static readonly string DayToDay = $@"({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})?(({SpecialMonthRegex}|{MonthRegex})の?)?(({SpecialDayRegex}|{DayRegex}|{WeekDayRegex})から(({SpecialMonthRegex}|{MonthRegex})の?)?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})?((今月|来月|{MonthRegex})の?)?({SpecialDayRegex}|{DayRegex}|{WeekDayRegex})(までの間|まで|の間))|{SpecialDayRegex}"; + public static readonly string FirstLastOfYearRegex = $@"(({DatePeriodYearInCJKRegex}|{YearRegex}|(?再来年|翌年|来年|今年|去年))的?)((?前)|(?(最后|最後|最終)))"; + public static readonly string ComplexDatePeriodRegex = $@"({DateRangePrepositions})(?.+)(から)(?.+)(までの間|(?まで)|(?(この|(?((?以内に)|後に|向こう|后|次の|今後|今日の午後|これから(の)?|(?春(?!節)|夏|秋|冬)(天|季)?(の)?((?半ば)|(?初め|のはじめ)|(?終わり(ごろ)?|末|下旬))?"; + public const string WhichWeekRegex = @"第(?5[0-3]|[1-4]\d|0?[1-9])週"; + public static readonly string SeasonWithYear = $@"({DateRangePrepositions})(({YearRegex}|{DatePeriodYearInCJKRegex}|(?再来年|翌年|来年|今年|去年))(的|の)?)?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})?{SeasonRegex}"; + public static readonly string QuarterRegex = $@"({DateRangePrepositions})((({YearRegex}|{DatePeriodYearInCJKRegex}|(?再来年|翌年|来年|今年|去年))(的|の)?)(第(?1|2|3|4|一|二|三|四)(四半期|クォーター)?)|(第(?1|2|3|4|一|二|三|四)(四半期|クォーター)))|(({DatePeriodLastRegex}|{DatePeriodThisRegex}|{DatePeriodNextRegex})(四半期|クォーター))"; + public const string CenturyNumRegex = @"(?\d|1\d|2\d)世紀"; + public const string CenturyRegexInCJK = @"(?一|二|三|四|五|六|七|八|九|十|十一|十二|十三|十四|十五|十六|十七|十八|十九|二十|二十一|二十二)世紀"; + public static readonly string CenturyRegex = $@"({CenturyNumRegex}|{CenturyRegexInCJK})"; + public static readonly string RelativeCenturyRegex = $@"(?({DatePeriodLastRegex}|{DatePeriodThisRegex}|{DatePeriodNextRegex}))世紀"; + public const string DecadeRegexInCJK = @"(?十|一十|二十|三十|四十|五十|六十|七十|八十|九十)"; + public static readonly string DecadeRegex = $@"({DateRangePrepositions})(?({CenturyRegex}|{CenturyRegexInCJK}|{RelativeCenturyRegex}))?の?(?(?\d{{2}}(?=\d)))?(?((\d{{1}}0)|{DecadeRegexInCJK}))年代(のごろ)?"; + public const string PrepositionRegex = @"(?^(,?(夜の|的|の(?朝|夜|午後|晩)?|t),?|在)$)"; + public const string NowRegex = @"(?出来る限り早く|できるだけ早く|现在|马上|立刻|刚刚才|刚刚|刚才|今日中|今(?!日)(すぐ)?)"; + public const string NightRegex = @"(?早|晚|夜|泊(?=の?予約))"; + public const string TomorrowRegex = @"(?(?昨日の?(午前|午後|中|夜|泊(?=の?予約)|朝)?)"; + public const string TodayRegex = @"(?(今朝の?|今朝の午前|今晩|今晚|今早|今晨|明晚|明早|明晨|昨晚|今夜|昨夜)(的|在)?)"; + public const string FromNowRegex = @"((?今)から)"; + public static readonly string SpecialDayHourRegex = $@"((?{TimeHourCJKRegex}|{TimeHourNumRegex})(時間?|(:00)))"; + public static readonly string SpecialDayMinuteRegex = $@"((?{TimeMinuteCJKRegex}|{TimeMinuteNumRegex})分間?)"; + public static readonly string SpecialDaySecondRegex = $@"((?{TimeSecondCJKRegex}|{TimeSecondNumRegex})秒間?)"; + public const string SpecialDayModRegex = @"((?過ぎに|以降)|(?で)|(?弱|たらず)|(?以上))"; + public static readonly string SpecialDayEndOfRegex = $@"((?明日の終わり|今?({WeekDayRegex}の?終わり))|(?日の終わり|一日の終わり|その日の終わり))"; + public static readonly string TimeOfSpecialDayRegex = $@"(({SpecialDayEndOfRegex}|{WeekDayRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex})(\d日)?(と)?(({SpecialDayHourRegex}{SpecialDayMinuteRegex}?{SpecialDaySecondRegex}?)|({SpecialDayMinuteRegex}{SpecialDaySecondRegex}?)){SpecialDayModRegex}?)|(({SpecialDayHourRegex}(の?うちに)))|(({SpecialDayEndOfRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex}){SpecialDayModRegex}?)|({WeekDayRegex}(\d日)?(と)?{SpecialDayModRegex})|({FromNowRegex}\d+(分|時|秒)後)"; + public const string NowTimeRegex = @"(现在|今)"; + public const string RecentlyTimeRegex = @"(刚刚才?|刚才)"; + public const string AsapTimeRegex = @"(出来る限り早く|立刻|马上)"; + public const string DateTimePeriodTillRegex = @"(? 到|至|から|--|-|—|——|~)"; + public const string DateTimePeriodFromPrefixRegex = @"(从)"; + public const string DateTimePeriodFromSuffixRegex = @"(の間|まで(の間)?)"; + public const string DateTimePeriodConnectorRegex = @"(和|与|到)"; + public const string DateTimePeriodPrepositionRegex = @"(?^\s*(的|の(?!午)|在)\s*$)"; + public const string BeforeAfterRegex = @"(?[零〇一二两三四五六七八九]|二十[一二三四]?|十[一二三四五六七八九]?)"; public const string ZhijianRegex = @"^\s*(之间|之内|期间|中间|间)"; - public const string DateTimePeriodThisRegex = @"这个|这一个|这|这一"; - public const string DateTimePeriodLastRegex = @"上个|上一个|上|上一"; + public const string DateTimePeriodThisRegex = @"这个|这一个|这|这一|今後|今から|これから"; + public const string DateTimePeriodLastRegex = @"上个|上一个|上|上一|昨"; public const string DateTimePeriodNextRegex = @"下个|下一个|下|下一"; - public const string AmPmDescRegex = @"(?(am|a\.m\.|a m|a\. m\.|a\.m|a\. m|a m|pm|p\.m\.|p m|p\. m\.|p\.m|p\. m|p m))"; - public const string TimeOfDayRegex = @"(?凌晨|清晨|早上|早|上午|中午|下午|午后|晚上|夜里|夜晚|半夜|夜间|深夜|傍晚|晚)"; - public static readonly string SpecificTimeOfDayRegex = $@"((({DateTimePeriodThisRegex}|{DateTimePeriodNextRegex}|{DateTimePeriodLastRegex})\s+{TimeOfDayRegex})|(今晚|今早|今晨|明晚|明早|明晨|昨晚))"; - public const string DateTimePeriodUnitRegex = @"(个)?(?(小时|分钟|秒钟|时|分|秒))"; + public const string AmPmDescRegex = @"(?(am|a\.m\.|a m|a\. m\.|a\.m|a\. m|a m|pm|p\.m\.|p m|p\. m\.|p\.m|p\. m|p m|夜|晚|晩|午後|午后|午前(半ば|中)?|正午|真昼|夜中|深夜|昼食時|夕方に|朝|午後|昼(?!食)))"; + public const string TimeOfDayRegex = @"(?凌晨|清晨|早上|早|上午|中午|下午|午后|晚上|夜里|夜晚|半夜|夜间|深夜|傍晚|晩|泊(?=の?予約)|夜|((?ぎりぎり)|(今夜|今晩|今朝|今早|今晨|明晚|明早|明晨|昨晚)|(({FutureRegex}|{PastRegex})(?(日|月|火|水|木|金|土)曜日?)の(午前|午後|中|夜|泊(?=の?予約)|朝)((?(([零〇一二两三四五六七八九]|二十[一二三四]?|十[一二三四五六七八九]?)(つ)?)|([0-1]?\d|2[0-4]))時間?)((?([二三四五]?十[一二三四五六七八九]?|六十|[零〇一二三四五六七八九])|([0-5]?\d))分間?)?((?([二三四五]?十[一二三四五六七八九]?|六十|[零〇一二三四五六七八九])|([0-5]?\d))秒間?)?まで)|(({FutureRegex}|{PastRegex})の?(?数)((時|分|秒)間?)))"; + public const string DateTimePeriodUnitRegex = @"(?(時|分|秒)間?)"; public static readonly string DateTimePeriodFollowedUnit = $@"^\s*{DateTimePeriodUnitRegex}"; public static readonly string DateTimePeriodNumberCombinedWithUnit = $@"\b(?\d+(\.\d*)?){DateTimePeriodUnitRegex}"; + public const string PlusOneDayRegex = @"あす|あした|明日|来|次|翌"; + public const string MinusOneDayRegex = @"きのう|最後の日|前日|昨|昨日の?"; + public const string PlusTwoDayRegex = @"后天|後天|明後日|あさって|今日から二日"; + public const string MinusTwoDayRegex = @"前天|一昨日|二日前|おととい"; + public const string PlusThreeDayRegex = @"大后天|大後天|明日から二日|昨日から4日"; + public const string MinusThreeDayRegex = @"大前天|昨日の2日前|昨日から2日間"; + public const string PlusFourDayRegex = @"今日から4日"; + public const string DurationAllRegex = @"(まる)"; + public const string DurationHalfRegex = @"^[.]"; + public const string DurationRelativeDurationUnitRegex = @"(?数ヶ|数)|(?(?以内)|(?後|(?(?(?(?数(?((か|ヶ)?(時|月|日(?!都合)|週|年|周|週|週|秒|分|営業日|年)間?))(たらず|以上)?)"; + public const string DurationMoreOrLessRegex = @"(?たらず|以下|を下回る)|(?以上|を上回る)"; public const string DurationYearRegex = @"((\d{3,4})|0\d|两千)\s*年"; public const string DurationHalfSuffixRegex = @"半"; public static readonly Dictionary DurationSuffixList = new Dictionary { - { @"M", @"分钟" }, - { @"S", @"秒钟|秒" }, - { @"H", @"个小时|小时" }, - { @"D", @"天" }, - { @"W", @"星期|个星期|周" }, - { @"Mon", @"个月" }, - { @"Y", @"年" } + { @"M", @"分|分間" }, + { @"S", @"秒钟|秒|秒間" }, + { @"H", @"時|時間" }, + { @"D", @"天|日|泊|日間" }, + { @"BD", @"営業日" }, + { @"W", @"星期|个星期|周|週間|週" }, + { @"MON", @"ひと月|月間|か月間|ヶ月|ヶ月間|个月|か月|月" }, + { @"Y", @"年|年間" } }; public static readonly IList DurationAmbiguousUnits = new List { @@ -138,76 +197,121 @@ public static class DateTimeDefinitions @"个小时", @"小时", @"天", + @"日", + @"泊", @"星期", @"个星期", @"周", @"个月", - @"年" - }; - public static readonly string LunarHolidayRegex = $@"(({YearRegex}|{DatePeriodYearInJapaneseRegex}|(?明年|今年|去年))(的)?)?(?除夕|春节|中秋节|中秋|元宵节|端午节|端午|重阳节)"; - public static readonly string HolidayRegexList1 = $@"(({YearRegex}|{DatePeriodYearInJapaneseRegex}|(?明年|今年|去年))(的)?)?(?新年|五一|劳动节|元旦节|元旦|愚人节|圣诞节|植树节|国庆节|情人节|教师节|儿童节|妇女节|青年节|建军节|女生节|光棍节|双十一|清明节|清明)"; - public static readonly string HolidayRegexList2 = $@"(({YearRegex}|{DatePeriodYearInJapaneseRegex}|(?明年|今年|去年))(的)?)?(?母亲节|父亲节|感恩节|万圣节)"; - public const string SetUnitRegex = @"(?年|月|周|星期|日|天|小时|时|分钟|分|秒钟|秒)"; - public static readonly string SetEachUnitRegex = $@"(?(每个|每一|每)\s*{SetUnitRegex})"; - public const string SetEachPrefixRegex = @"(?(每)\s*$)"; + @"年", + @"時", + @"時間", + @"月" + }; + public static readonly string DurationUnitRegex = $@"(?年|个月|月|周|時間?|泊|(?営業)日|天|週間?|星期|个星期|か月|(?別)の?(?日|年|月|時間?)"; + public const string DurationConnectorRegex = @"^\s*(?[と]?|,)\s*$"; + public const string ConnectorRegex = @"^\s*[,-]\s*$"; + public static readonly string LunarHolidayRegex = $@"(({YearRegex}|{DatePeriodYearInCJKRegex}|(?明年|今年|去年|来年))(的)?)?(?除夕|春节|旧暦の正月初一|中秋(節|节)?|元宵(节|節)|端午(节|の節句)?|重(阳节|陽節))"; + public static readonly string HolidayRegexList1 = $@"(旧暦の)?(({YearRegex}|{DatePeriodYearInCJKRegex}|(?明年|今年|去年|来年))(的|の)?)?(?新年|五一|劳动节|国際的な労働者の日|メーデー|元旦节|元旦|の?独立記念日|大晦日|愚人节|エイプリルフール|圣诞节|クリスマス(の日|イブ)?|感謝祭(の日)?|クリーンマンデイ|父の日|植树节|国庆节|国慶節|情人节|バレンタインデー|教(师节|師の日)|儿童节|妇女节|青年(节|の日)|建军节|建軍節|女生节|光棍节|双十一|清明(节|節)?|キング牧師記念日|旧正月|ガールズデー|(こども|子ども|子供)の日|お正月|植樹祭|シングルデー|シングルズデー|国際婦人デー|ダブル十一|復活祭|イースター)(の\d日)?"; + public static readonly string HolidayRegexList2 = $@"(({YearRegex}|{DatePeriodYearInCJKRegex}|(?明年|今年|去年|来年))(的)?)?(?母(亲节|の日)|父亲节|感恩节|万圣节|ハロウィン)"; + public const string SetUnitRegex = @"(?年|月|隔週|週|日|時|分|秒)"; + public static readonly string SetEachUnitRegex = $@"((?(毎个|毎一|毎|各)\s*(?年|月|週|日|時|分|秒))|(?隔週))"; + public const string SetEachPrefixRegex = @"((?毎|隔|各|ごとに)\s*$)"; + public const string SetEachSuffixRegex = @"(^\s*(?ごとに))"; public const string SetLastRegex = @"(?last|this|next)"; - public const string SetEachDayRegex = @"(每|每一)(天|日)\s*$"; - public const string TimeHourNumRegex = @"(00|01|02|03|04|05|06|07|08|09|0|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|1|2|3|4|5|6|7|8|9)"; - public const string TimeMinuteNumRegex = @"(00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|0|1|2|3|4|5|6|7|8|9)"; - public const string TimeSecondNumRegex = @"(00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|0|1|2|3|4|5|6|7|8|9)"; - public const string TimeHourJapRegex = @"([零〇一二两三四五六七八九]|二十[一二三四]?|十[一二三四五六七八九]?)"; - public const string TimeMinuteJapRegex = @"([二三四五]?十[一二三四五六七八九]?|六十|[零〇一二三四五六七八九])"; - public static readonly string TimeSecondJapRegex = $@"{TimeMinuteJapRegex}"; - public const string TimeClockDescRegex = @"(点\s*整|点\s*钟|点|时)"; - public const string TimeMinuteDescRegex = @"(分钟|分|)"; - public const string TimeSecondDescRegex = @"(秒钟|秒)"; + public const string SetEachDayRegex = @"(毎|各|毎一)(天|日)\s*$"; + public const string SetEachDateUnitRegex = @"(毎)(年|月|週)\s*$"; + public const string TimeHourNumRegex = @"(?{TimeHourJapRegex}|{TimeHourNumRegex}){TimeClockDescRegex}"; - public static readonly string TimeMinuteRegex = $@"(?{TimeMinuteJapRegex}|{TimeMinuteNumRegex}){TimeMinuteDescRegex}"; - public static readonly string TimeSecondRegex = $@"(?{TimeSecondJapRegex}|{TimeSecondNumRegex}){TimeSecondDescRegex}"; + public static readonly string TimeHourRegex = $@"(?{TimeHourCJKRegex}|{TimeHourNumRegex}){TimeClockDescRegex}"; + public static readonly string TimeMinuteRegex = $@"(?{TimeMinuteCJKRegex}|{TimeMinuteNumRegex}){TimeMinuteDescRegex}"; + public static readonly string TimeSecondRegex = $@"(?{TimeSecondCJKRegex}|{TimeSecondNumRegex}){TimeSecondDescRegex}"; public const string TimeHalfRegex = @"(?过半|半)"; public const string TimeQuarterRegex = @"(?[一两二三四1-4])\s*(刻钟|刻)"; - public static readonly string TimeJapaneseTimeRegex = $@"{TimeHourRegex}({TimeQuarterRegex}|{TimeHalfRegex}|((过|又)?{TimeMinuteRegex})({TimeSecondRegex})?)?"; - public static readonly string TimeDigitTimeRegex = $@"(?{TimeHourNumRegex}):(?{TimeMinuteNumRegex})(:(?{TimeSecondNumRegex}))?"; - public const string TimeDayDescRegex = @"(?凌晨|清晨|早上|早|上午|中午|下午|午后|晚上|夜里|夜晚|半夜|午夜|夜间|深夜|傍晚|晚)"; - public const string TimeApproximateDescPreffixRegex = @"(大[约概]|差不多|可能|也许|约|不超过|不多[于过]|最[多长少]|少于|[超短长多]过|几乎要|将近|差点|快要|接近|至少|起码|超出|不到)"; - public const string TimeApproximateDescSuffixRegex = @"(之前|以前|以后|以後|之后|之後|前|后|後|左右)"; - public static readonly string TimeRegexes1 = $@"{TimeApproximateDescPreffixRegex}?{TimeDayDescRegex}?{TimeJapaneseTimeRegex}{TimeApproximateDescSuffixRegex}?"; - public static readonly string TimeRegexes2 = $@"{TimeApproximateDescPreffixRegex}?{TimeDayDescRegex}?{TimeDigitTimeRegex}{TimeApproximateDescSuffixRegex}?(\s*{AmPmDescRegex}?)"; - public static readonly string TimeRegexes3 = $@"差{TimeMinuteRegex}{TimeJapaneseTimeRegex}"; - public const string TimePeriodTimePeriodConnectWords = @"(起|至|到|–|-|—|~|~)"; - public static readonly string TimePeriodLeftJapTimeRegex = $@"(从)?(?{TimeDayDescRegex}?({TimeJapaneseTimeRegex}))"; - public static readonly string TimePeriodRightJapTimeRegex = $@"{TimePeriodTimePeriodConnectWords}(?{TimeDayDescRegex}?{TimeJapaneseTimeRegex})(之间)?"; - public static readonly string TimePeriodLeftDigitTimeRegex = $@"(从)?(?{TimeDayDescRegex}?({TimeDigitTimeRegex}))"; - public static readonly string TimePeriodRightDigitTimeRegex = $@"{TimePeriodTimePeriodConnectWords}(?{TimeDayDescRegex}?{TimeDigitTimeRegex})(之间)?"; - public static readonly string TimePeriodShortLeftJapTimeRegex = $@"(从)?(?{TimeDayDescRegex}?({TimeHourJapRegex}))"; - public static readonly string TimePeriodShortLeftDigitTimeRegex = $@"(从)?(?{TimeDayDescRegex}?({TimeHourNumRegex}))"; - public static readonly string TimePeriodRegexes1 = $@"({TimePeriodLeftDigitTimeRegex}{TimePeriodRightDigitTimeRegex}|{TimePeriodLeftJapTimeRegex}{TimePeriodRightJapTimeRegex})"; - public static readonly string TimePeriodRegexes2 = $@"({TimePeriodShortLeftDigitTimeRegex}{TimePeriodRightDigitTimeRegex}|{TimePeriodShortLeftJapTimeRegex}{TimePeriodRightJapTimeRegex})"; - public const string ParserConfigurationBefore = @"(之前|以前|前)"; - public const string ParserConfigurationAfter = @"(之后|之後|以后|以後|后|後)"; + public static readonly string LessThanHalfHourRegex = $@"(?([0-2]?\d)|(二?十[一二三四五六七八九]?|[零〇一二三四五六七八九]))({TimeMinuteDescRegex})"; + public static readonly string TimeCJKTimeRegex = $@"{TimeHourRegex}({TimeQuarterRegex}|({TimeHalfRegex}({TimeSecondRegex})?)|((((过|又)?{TimeMinuteRegex})({TimeSecondRegex})?)|({TimeSecondRegex})))?"; + public static readonly string TimeDigitTimeRegex = $@"(?{TimeHourNumRegex}):(?{TimeMinuteNumRegex})(:(?{TimeSecondNumRegex}))?({AmPmDescRegex})?"; + public static readonly string LessTimeRegex = $@"(({TimeHourRegex}|(?{TimeHourNumRegex}):){LessThanHalfHourRegex}前)({AmPmDescRegex})?"; + public static readonly string TimeDayDescRegex = $@"(?(正午|夜中|午前半ば|(昼食時)|真昼)|((?<=({TimeDigitTimeRegex}|{TimeCJKTimeRegex})(の)?)(早朝(に)?|午後(に)?|晚|晩|(深)?夜(に)?|未明|午前(中)?|日中|白昼|(未|早)?朝(に)?|昼前に|昼すぎに|夕方前に|夕方に|営業時間内に|昼(?!食)))|((早朝(に)?|午後(に)?|晚|晩|(深)?夜(に)?|泊(?=の?予約)|未明|(早朝)?午前(中)?|日中|白昼|(未|早)?朝(に)?|昼前に|昼すぎに|夕方前に|夕方に|営業時間内に|昼(?!食))(?=(の)?({TimeDigitTimeRegex}|{TimeCJKTimeRegex}))))"; + public const string TimeApproximateDescPreffixRegex = @"(ぐらい|おそらく|多分|ほとんど|まもなく|昨日の|昨日|来週の|来週|昼食時|昼食|真)"; + public const string TimeApproximateDescSuffixRegex = @"(過ぎに|過ぎ|丁度に|丁度|きっかりに|きっかり|を過ぎた頃に|を過ぎた頃|ちょっと前に|ちょっと前|近くに|近く|昼食時|昼食|ぐらい|時かっきり|頃|かっきり)"; + public static readonly string TimeRegexes1 = $@"{TimeApproximateDescPreffixRegex}?({TimeDayDescRegex}(の)?)?({TimeDigitTimeRegex}|{TimeCJKTimeRegex})((の)?{TimeDayDescRegex})?{TimeApproximateDescSuffixRegex}?"; + public static readonly string TimeRegexes2 = $@"({TimeApproximateDescPreffixRegex}(の)?)?{TimeDayDescRegex}((の)?{TimeApproximateDescSuffixRegex})?"; + public static readonly string TimeRegexes3 = $@"({TimeDayDescRegex}(の)?)?({LessTimeRegex})((の)?{TimeDayDescRegex})?"; + public const string TimePeriodTimePeriodConnectWords = @"(まで(の間)?|の間|–|-|—|~|~)"; + public static readonly string TimePeriodLeftCJKTimeRegex = $@"(?{TimeDayDescRegex}?({TimeCJKTimeRegex}))(から)?"; + public static readonly string TimePeriodRightCJKTimeRegex = $@"{TimePeriodTimePeriodConnectWords}?(?{TimeDayDescRegex}?{TimeCJKTimeRegex}){TimePeriodTimePeriodConnectWords}?"; + public static readonly string TimePeriodLeftDigitTimeRegex = $@"(?{TimeDayDescRegex}?({TimeDigitTimeRegex}))(から)?"; + public static readonly string TimePeriodRightDigitTimeRegex = $@"{TimePeriodTimePeriodConnectWords}?(?{TimeDayDescRegex}?{TimeDigitTimeRegex}){TimePeriodTimePeriodConnectWords}?"; + public static readonly string TimePeriodShortLeftCJKTimeRegex = $@"(?{TimeDayDescRegex}?({TimeHourCJKRegex}))(から)?"; + public static readonly string TimePeriodShortLeftDigitTimeRegex = $@"(?{TimeDayDescRegex}?({TimeHourNumRegex}))(から)?"; + public static readonly string TimePeriodRegexes1 = $@"({TimePeriodLeftDigitTimeRegex}{TimePeriodRightDigitTimeRegex}|{TimePeriodLeftCJKTimeRegex}{TimePeriodRightCJKTimeRegex})"; + public static readonly string TimePeriodRegexes2 = $@"(((早朝(に)?|午後(に)?|(深)?夜(に)?|未明|午前(中)?|日中|白昼|(未|早)?朝(に)?|昼前に|昼すぎに|夕方前に|夕方に|営業時間内に|昼(?!食))({TimePeriodShortLeftDigitTimeRegex}{TimePeriodRightDigitTimeRegex}|{TimePeriodShortLeftCJKTimeRegex}{TimePeriodRightCJKTimeRegex}))|((早朝(に)?|午後(に)?|(深)?夜(に)?|未明|午前(中)?|日中|白昼|(未|早)?朝(に)?|昼前に|昼すぎに|夕方前に|夕方に|営業時間内に|昼(?!食))(?=((?!({TimeCJKTimeRegex}|{TimeDigitTimeRegex})(から)?)))))"; + public const string FromToRegex = @"^[.]"; + public const string AmbiguousRangeModifierPrefix = @"^[.]"; + public const string UnspecificDatePeriodRegex = @"^(の?(分|日|週|周|月|年|時間))$"; + public const string ReferenceDatePeriodRegex = @"(同じ|その)(?月|週末|年|週)"; + public const string ParserConfigurationBefore = @"((?(または|及び|と)そ)?の前|またはそれ以前|之前|以前|前|まで|以前)"; + public const string ParserConfigurationAfter = @"(の後から|(?または)それ以降|之后|之後|以后|以後|后|の?後|以降)"; public const string ParserConfigurationUntil = @"(直到|直至|截至|截止(到)?)"; - public const string ParserConfigurationSincePrefix = @"(自从|自|自打|打)"; - public const string ParserConfigurationSinceSuffix = @"(以来|开始)"; - public const string ParserConfigurationLastWeekDayToken = @"最后一个"; - public const string ParserConfigurationNextMonthToken = @"下一个"; - public const string ParserConfigurationLastMonthToken = @"上一个"; + public const string ParserConfigurationSincePrefix = @"(自从|自|自打|打|早ければ)"; + public const string ParserConfigurationSinceSuffix = @"(またはその後|以来|开始|(? ParserConfigurationUnitMap = new Dictionary { { @"年", @"Y" }, { @"月", @"MON" }, { @"个月", @"MON" }, - { @"日", @"D" }, + { @"か月", @"MON" }, + { @"ヶ月", @"MON" }, { @"周", @"W" }, + { @"週", @"W" }, + { @"週間", @"W" }, + { @"星期", @"W" }, + { @"隔週", @"W" }, + { @"个星期", @"W" }, + { @"日", @"D" }, + { @"日間", @"D" }, + { @"営業日", @"BD" }, { @"天", @"D" }, + { @"泊", @"D" }, { @"小时", @"H" }, + { @"時間", @"H" }, { @"时", @"H" }, { @"分钟", @"M" }, + { @"分間", @"M" }, { @"分", @"M" }, { @"秒钟", @"S" }, { @"秒", @"S" }, - { @"星期", @"W" } + { @"まる", @"whole" }, + { @"まるひと", @"whole" }, + { @"もう", @"another" }, + { @"別", @"another" }, + { @"数", @"some" }, + { @"たらず", @"less" }, + { @"以上", @"more" }, + { @"以下", @"less" }, + { @"を上回る", @"more" }, + { @"を下回る", @"less" } }; public static readonly Dictionary ParserConfigurationUnitValueMap = new Dictionary { @@ -233,6 +337,62 @@ public static class DateTimeDefinitions { @"secs", 1 }, { @"sec", 1 } }; + public static readonly IList MonthTerms = new List + { + @"月", + @"月間", + @"月の前半", + @"月の後半" + }; + public static readonly IList WeekendTerms = new List + { + @"周末", + @"週末", + @"週の週末" + }; + public static readonly IList WeekTerms = new List + { + @"周", + @"週", + @"週間", + @"週の前半", + @"週の後半" + }; + public static readonly IList YearTerms = new List + { + @"年", + @"年間", + @"去年", + @"今年", + @"来年" + }; + public static readonly IList ThisYearTerms = new List + { + @"今年" + }; + public static readonly IList YearToDateTerms = new List + { + @"年初来" + }; + public static readonly IList LastYearTerms = new List + { + @"去年", + @"前の年" + }; + public static readonly IList NextYearTerms = new List + { + @"来年", + @"翌年" + }; + public static readonly IList YearAfterNextTerms = new List + { + @"后年", + @"再来年" + }; + public static readonly IList YearBeforeLastTerms = new List + { + @"前年" + }; public static readonly Dictionary ParserConfigurationSeasonMap = new Dictionary { { @"春", @"SP" }, @@ -250,6 +410,8 @@ public static class DateTimeDefinitions public static readonly Dictionary ParserConfigurationCardinalMap = new Dictionary { { @"一", 1 }, + { @"初", 1 }, + { @"最初", 1 }, { @"二", 2 }, { @"三", 3 }, { @"四", 4 }, @@ -268,7 +430,14 @@ public static class DateTimeDefinitions { @"第二", 2 }, { @"第三", 3 }, { @"第四", 4 }, - { @"第五", 5 } + { @"第五", 5 }, + { @"最後", 5 }, + { @"最終", 5 }, + { @"第1", 1 }, + { @"第2", 2 }, + { @"第3", 3 }, + { @"第4", 4 }, + { @"第5", 5 } }; public static readonly Dictionary ParserConfigurationDayOfMonth = new Dictionary { @@ -312,6 +481,15 @@ public static class DateTimeDefinitions { @"29", 29 }, { @"30", 30 }, { @"31", 31 }, + { @"01日", 1 }, + { @"02日", 2 }, + { @"03日", 3 }, + { @"04日", 4 }, + { @"05日", 5 }, + { @"06日", 6 }, + { @"07日", 7 }, + { @"08日", 8 }, + { @"09日", 9 }, { @"1日", 1 }, { @"2日", 2 }, { @"3日", 3 }, @@ -346,6 +524,7 @@ public static class DateTimeDefinitions { @"一日", 1 }, { @"十一日", 11 }, { @"二十日", 20 }, + { @"廿日", 20 }, { @"十日", 10 }, { @"二十一日", 21 }, { @"三十一日", 31 }, @@ -373,7 +552,39 @@ public static class DateTimeDefinitions { @"二十七日", 27 }, { @"二十八日", 28 }, { @"二十九日", 29 }, - { @"三十日", 30 } + { @"三十日", 30 }, + { @"一", 1 }, + { @"十一", 11 }, + { @"二十", 20 }, + { @"廿", 20 }, + { @"十", 10 }, + { @"二十一", 21 }, + { @"三十一", 31 }, + { @"二", 2 }, + { @"三", 3 }, + { @"四", 4 }, + { @"五", 5 }, + { @"六", 6 }, + { @"七", 7 }, + { @"八", 8 }, + { @"九", 9 }, + { @"十二", 12 }, + { @"十三", 13 }, + { @"十四", 14 }, + { @"十五", 15 }, + { @"十六", 16 }, + { @"十七", 17 }, + { @"十八", 18 }, + { @"十九", 19 }, + { @"二十二", 22 }, + { @"二十三", 23 }, + { @"二十四", 24 }, + { @"二十五", 25 }, + { @"二十六", 26 }, + { @"二十七", 27 }, + { @"二十八", 28 }, + { @"二十九", 29 }, + { @"三十", 30 } }; public static readonly Dictionary ParserConfigurationDayOfWeek = new Dictionary { @@ -390,7 +601,8 @@ public static class DateTimeDefinitions { @"木曜日", 4 }, { @"金曜日", 5 }, { @"土曜日", 6 }, - { @"日曜日", 0 } + { @"日曜日", 0 }, + { @"最後の日", 6 } }; public static readonly Dictionary ParserConfigurationMonthOfYear = new Dictionary { @@ -427,6 +639,7 @@ public static class DateTimeDefinitions { @"十月", 10 }, { @"十一月", 11 }, { @"十二月", 12 }, + { @"正月", 13 }, { @"1月", 1 }, { @"2月", 2 }, { @"3月", 3 }, @@ -439,6 +652,18 @@ public static class DateTimeDefinitions { @"10月", 10 }, { @"11月", 11 }, { @"12月", 12 }, + { @"1か月", 1 }, + { @"2か月", 2 }, + { @"3か月", 3 }, + { @"4か月", 4 }, + { @"5か月", 5 }, + { @"6か月", 6 }, + { @"7か月", 7 }, + { @"8か月", 8 }, + { @"9か月", 9 }, + { @"10か月", 10 }, + { @"11か月", 11 }, + { @"12か月", 12 }, { @"01月", 1 }, { @"02月", 2 }, { @"03月", 3 }, @@ -449,30 +674,82 @@ public static class DateTimeDefinitions { @"08月", 8 }, { @"09月", 9 } }; - public const string DateTimeSimpleAmRegex = @"(?早|晨)"; - public const string DateTimeSimplePmRegex = @"(?晚)"; - public const string DateTimePeriodMORegex = @"(凌晨|清晨|早上|早|上午)"; + public const string DateTimeSimpleAmRegex = @"(?早|晨|am)"; + public const string DateTimeSimplePmRegex = @"(?晚|晩|pm)"; + public const string DateTimePeriodMORegex = @"(朝|凌晨|清晨|早上|早|上午)"; + public const string DateTimePeriodMIRegex = @"昼(?!食)"; public const string DateTimePeriodAFRegex = @"(中午|下午|午后|傍晚)"; - public const string DateTimePeriodEVRegex = @"(晚上|夜里|夜晚|晚)"; - public const string DateTimePeriodNIRegex = @"(半夜|夜间|深夜)"; - public static readonly Dictionary DurationUnitValueMap = new Dictionary + public const string DateTimePeriodEVRegex = @"(晚上|夜里|夜晚|晚|晩)"; + public const string DateTimePeriodNIRegex = @"(半夜|夜间|深夜|夜|泊(?=の?予約))"; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"^\d{1,2}\.\d{1,2}$", @"\d{1,2}\.\d{1,2}(?!\s*に[戻残]|から|で)" } + }; + public static readonly Dictionary AmbiguityDateFiltersDict = new Dictionary + { + { @"^今週$", @"今週" }, + { @"^[1一]日$", @"[1一]日" } + }; + public static readonly Dictionary AmbiguityDateTimeFiltersDict = new Dictionary + { + { @"から.+まで", @"" } + }; + public static readonly Dictionary AmbiguityDatePeriodFiltersDict = new Dictionary + { + { @"^年$", @"年" }, + { @"(よい|いい)([0-9]|[一二三四五六七八九十])?か?(日|週|月|年)間?", @"(よい|いい)([0-9]|[一二三四五六七八九十])?か?(日|週|月|年)間?" } + }; + public static readonly Dictionary AmbiguityTimeFiltersDict = new Dictionary + { + { @"^(\d+|[一二三四五六七八九十廿])時$", @"(\d+|[一二三四五六七八九十廿])時間" } + }; + public static readonly Dictionary AmbiguityTimePeriodFiltersDict = new Dictionary + { + { @"^早$", @"早" } + }; + public static readonly Dictionary AmbiguityDurationFiltersDict = new Dictionary + { + { @"月", @"(? DurationUnitValueMap = new Dictionary { { @"Y", 31536000 }, - { @"Mon", 2592000 }, + { @"MON", 2592000 }, { @"W", 604800 }, { @"D", 86400 }, + { @"BD", 86400 }, { @"H", 3600 }, { @"M", 60 }, - { @"S", 1 } + { @"S", 1 }, + { @"whole", 1 }, + { @"another", 1 }, + { @"some", 2 }, + { @"more", 3 }, + { @"less", 4 } }; public static readonly Dictionary HolidayNoFixedTimex = new Dictionary { - { @"父亲节", @"-06-WXX-6-3" }, + { @"父亲节", @"-06-WXX-7-3" }, + { @"父の日", @"-06-WXX-7-3" }, { @"母亲节", @"-05-WXX-7-2" }, - { @"感恩节", @"-11-WXX-4-4" } + { @"母の日", @"-05-WXX-7-2" }, + { @"感恩节", @"-11-WXX-4-4" }, + { @"感謝祭の日", @"-11-WXX-4-4" }, + { @"感謝祭", @"-11-WXX-4-4" }, + { @"キング牧師記念日", @"-01-WXX-1-3" } }; public const string MergedBeforeRegex = @"(前|之前)$"; - public const string MergedAfterRegex = @"(后|後|之后|之後)$"; + public const string MergedAfterRegex = @"(后|後|之后|之後|以降)$"; public static readonly Dictionary TimeNumberDictionary = new Dictionary { { '零', 0 }, @@ -487,22 +764,284 @@ public static class DateTimeDefinitions { '九', 9 }, { '〇', 0 }, { '两', 2 }, - { '十', 10 } + { '十', 10 }, + { '廿', 20 } }; public static readonly Dictionary TimeLowBoundDesc = new Dictionary { - { @"中午", 11 }, - { @"下午", 12 }, - { @"午后", 12 }, - { @"晚上", 18 }, - { @"夜里", 18 }, - { @"夜晚", 18 }, - { @"夜间", 18 }, - { @"深夜", 18 }, - { @"傍晚", 18 }, + { @"泊", 18 }, + { @"夜", 18 }, { @"晚", 18 }, - { @"pm", 12 } + { @"晩", 18 }, + { @"午後", 12 }, + { @"午后", 12 }, + { @"pm", 12 }, + { @"午前半ば", 10 }, + { @"正午", 12 }, + { @"真昼", 12 }, + { @"夜中", 0 }, + { @"深夜", 0 }, + { @"昼食時", 11 }, + { @"夕方に", 12 } }; public const string DefaultLanguageFallback = @"DMY"; + public static readonly IList MorningTermList = new List + { + @"午前半ば", + @"午前中", + @"午前", + @"朝", + @"未明", + @"昼前に", + @"昼前", + @"早朝に", + @"早朝" + }; + public static readonly IList MidDayTermList = new List + { + @"正午", + @"真昼", + @"昼" + }; + public static readonly IList AfternoonTermList = new List + { + @"午后", + @"午後", + @"午後に", + @"夕方前に", + @"昼すぎに", + @"昼すぎ" + }; + public static readonly IList EveningTermList = new List + { + @"夕方に", + @"夕方", + @"晚", + @"晩", + @"晚上", + @"夜里", + @"傍晚", + @"夜晚" + }; + public static readonly IList DaytimeTermList = new List + { + @"日中", + @"昼食時", + @"昼" + }; + public static readonly IList NightTermList = new List + { + @"深夜", + @"夜に", + @"夜", + @"泊", + @"夜中", + @"夜間" + }; + public static readonly IList BusinessHourTermList = new List + { + @"営業時間内に", + @"営業時間内" + }; + public static readonly IList EarlyHourTermList = new List + { + @"早朝に", + @"早朝", + @"昼すぎに", + @"昼すぎ" + }; + public static readonly IList LateHourTermList = new List + { + @"昼前に", + @"夕方前に" + }; + public static readonly Dictionary DynastyYearMap = new Dictionary + { + { @"贞观", 627 }, + { @"昭和", 1926 }, + { @"平成", 1989 }, + { @"令和", 2019 }, + { @"大正", 1912 }, + { @"明治", 1868 }, + { @"寛政", 1789 }, + { @"享和", 1801 }, + { @"文化", 1804 }, + { @"文政", 1818 }, + { @"天保", 1830 }, + { @"弘化", 1844 }, + { @"嘉永", 1848 }, + { @"安政", 1854 }, + { @"万延", 1860 }, + { @"文久", 1861 }, + { @"元治", 1864 }, + { @"慶応", 1865 }, + { @"平成二", 1990 }, + { @"平成元", 1989 }, + { @"昭和二", 1927 }, + { @"大正二", 1913 }, + { @"大正元", 1912 }, + { @"慶応4", 1868 }, + { @"明治元", 1868 }, + { @"明治2", 1869 }, + { @"明治3", 1870 }, + { @"明治4", 1871 }, + { @"明治5", 1872 }, + { @"明治6", 1873 }, + { @"明治7", 1874 }, + { @"明治8", 1875 }, + { @"明治9", 1876 }, + { @"明治10", 1877 }, + { @"明治11", 1878 }, + { @"明治12", 1879 }, + { @"明治13", 1880 }, + { @"明治14", 1881 }, + { @"明治15", 1882 }, + { @"明治16", 1883 }, + { @"明治17", 1884 }, + { @"明治18", 1885 }, + { @"明治19", 1886 }, + { @"明治20", 1887 }, + { @"明治21", 1888 }, + { @"明治22", 1889 }, + { @"明治23", 1890 }, + { @"明治24", 1891 }, + { @"明治25", 1892 }, + { @"明治26", 1893 }, + { @"明治27", 1894 }, + { @"明治28", 1895 }, + { @"明治29", 1896 }, + { @"明治30", 1897 }, + { @"明治31", 1898 }, + { @"明治32", 1899 }, + { @"明治33", 1900 }, + { @"明治34", 1901 }, + { @"明治35", 1902 }, + { @"明治36", 1903 }, + { @"明治37", 1904 }, + { @"明治38", 1905 }, + { @"明治39", 1906 }, + { @"明治40", 1907 }, + { @"明治41", 1908 }, + { @"明治42", 1909 }, + { @"明治43", 1910 }, + { @"明治44", 1911 }, + { @"明治45", 1912 }, + { @"大正2", 1913 }, + { @"大正3", 1914 }, + { @"大正4", 1915 }, + { @"大正5", 1916 }, + { @"大正6", 1917 }, + { @"大正7", 1918 }, + { @"大正8", 1919 }, + { @"大正9", 1920 }, + { @"大正10", 1921 }, + { @"大正11", 1922 }, + { @"大正12", 1923 }, + { @"大正13", 1924 }, + { @"大正14", 1925 }, + { @"大正15", 1926 }, + { @"昭和元", 1926 }, + { @"昭和64", 1989 }, + { @"昭和2", 1927 }, + { @"昭和3", 1928 }, + { @"昭和4", 1929 }, + { @"昭和5", 1930 }, + { @"昭和6", 1931 }, + { @"昭和7", 1932 }, + { @"昭和8", 1933 }, + { @"昭和9", 1934 }, + { @"昭和10", 1935 }, + { @"昭和11", 1936 }, + { @"昭和12", 1937 }, + { @"昭和13", 1938 }, + { @"昭和14", 1939 }, + { @"昭和15", 1940 }, + { @"昭和16", 1941 }, + { @"昭和17", 1942 }, + { @"昭和18", 1943 }, + { @"昭和19", 1944 }, + { @"昭和20", 1945 }, + { @"昭和21", 1946 }, + { @"昭和22", 1947 }, + { @"昭和23", 1948 }, + { @"昭和24", 1949 }, + { @"昭和25", 1950 }, + { @"昭和26", 1951 }, + { @"昭和27", 1952 }, + { @"昭和28", 1953 }, + { @"昭和29", 1954 }, + { @"昭和30", 1955 }, + { @"昭和31", 1956 }, + { @"昭和32", 1957 }, + { @"昭和33", 1958 }, + { @"昭和34", 1959 }, + { @"昭和35", 1960 }, + { @"昭和36", 1961 }, + { @"昭和37", 1962 }, + { @"昭和38", 1963 }, + { @"昭和39", 1964 }, + { @"昭和40", 1965 }, + { @"昭和41", 1966 }, + { @"昭和42", 1967 }, + { @"昭和43", 1968 }, + { @"昭和44", 1969 }, + { @"昭和45", 1970 }, + { @"昭和46", 1971 }, + { @"昭和47", 1972 }, + { @"昭和48", 1973 }, + { @"昭和49", 1974 }, + { @"昭和50", 1975 }, + { @"昭和51", 1976 }, + { @"昭和52", 1977 }, + { @"昭和53", 1978 }, + { @"昭和54", 1979 }, + { @"昭和55", 1980 }, + { @"昭和56", 1981 }, + { @"昭和57", 1982 }, + { @"昭和58", 1983 }, + { @"昭和59", 1984 }, + { @"昭和60", 1985 }, + { @"昭和61", 1986 }, + { @"昭和62", 1987 }, + { @"昭和63", 1988 }, + { @"平成2", 1990 }, + { @"平成3", 1991 }, + { @"平成4", 1992 }, + { @"平成5", 1993 }, + { @"平成6", 1994 }, + { @"平成7", 1995 }, + { @"平成8", 1996 }, + { @"平成9", 1997 }, + { @"平成10", 1998 }, + { @"平成11", 1999 }, + { @"平成12", 2000 }, + { @"平成13", 2001 }, + { @"平成14", 2002 }, + { @"平成15", 2003 }, + { @"平成16", 2004 }, + { @"平成17", 2005 }, + { @"平成18", 2006 }, + { @"平成19", 2007 }, + { @"平成20", 2008 }, + { @"平成21", 2009 }, + { @"平成22", 2010 }, + { @"平成23", 2011 }, + { @"平成24", 2012 }, + { @"平成25", 2013 }, + { @"平成26", 2014 }, + { @"平成27", 2015 }, + { @"平成28", 2016 }, + { @"平成29", 2017 }, + { @"平成30", 2018 }, + { @"平成31", 2019 }, + { @"令和元", 2019 }, + { @"令和2", 2020 }, + { @"令和3", 2021 } + }; + public const string DayTypeRegex = @"^(天|日)$"; + public const string WeekTypeRegex = @"^(周|星期|週)$"; + public const string BiWeekTypeRegex = @"^(隔週)$"; + public const string MonthTypeRegex = @"^(月)$"; + public const string YearTypeRegex = @"^(年)$"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/NumbersDefinitions.cs index 1ae6bafb7c..43f1f462d5 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/NumbersDefinitions.cs @@ -26,7 +26,7 @@ public static class NumbersDefinitions public const bool MultiDecimalSeparatorCulture = false; public const char DecimalSeparatorChar = '.'; public const string FractionMarkerToken = @""; - public const char NonDecimalSeparatorChar = ' '; + public const char NonDecimalSeparatorChar = ','; public const string HalfADozenText = @""; public const string WordSeparatorToken = @""; public const char ZeroChar = '零'; @@ -51,6 +51,7 @@ public static class NumbersDefinitions public static readonly Dictionary ZeroToNineMap = new Dictionary { { '零', 0 }, + { '〇', 0 }, { '一', 1 }, { '二', 2 }, { '三', 3 }, @@ -100,7 +101,37 @@ public static class NumbersDefinitions { @"万万", @"億" }, { @"億万", @"兆" }, { @"万億", @"兆" }, - { @" ", @"" } + { @" ", @"" }, + { @"れい", @"〇" }, + { @"ゼロ", @"〇" }, + { @"マル", @"〇" }, + { @"いち", @"一" }, + { @"いっ", @"一" }, + { @"に", @"二" }, + { @"さん", @"三" }, + { @"し", @"四" }, + { @"よん", @"四" }, + { @"ご", @"五" }, + { @"ろく", @"六" }, + { @"ろっ", @"六" }, + { @"しち", @"七" }, + { @"なな", @"七" }, + { @"はち", @"八" }, + { @"はっ", @"八" }, + { @"きゅう", @"九" }, + { @"く", @"九" }, + { @"じゅう", @"十" }, + { @"ひゃく", @"百" }, + { @"ぴゃく", @"百" }, + { @"びゃく", @"百" }, + { @"せん", @"千" }, + { @"ぜん", @"千" }, + { @"まん", @"万" }, + { @"ひゃくまん", @"百万" }, + { @"ぴゃくまん", @"百万" }, + { @"びゃくまん", @"百万" }, + { @"せんまん", @"千万" }, + { @"ぜんまん", @"千万" } }; public static readonly IList RoundDirectList = new List { @@ -112,102 +143,156 @@ public static class NumbersDefinitions { '十' }; + public const string RoundNumberIntegerRegex = @"(十|百|千|万(?!万)|億|兆)"; + public const string RoundNumberIntegerHiraganaRegex = @"(じゅう|[ひぴび]ゃく|[せぜ]ん|まん|[ひぴび]ゃくまん|[せぜ]んまん)"; + public static readonly string AllMultiplierLookupRegex = $@"({BaseNumbers.MultiplierLookupRegex}|ミリリットル(入れら)?|キロメートル|メートル|ミリメート)"; public static readonly string DigitalNumberRegex = $@"((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; - public const string ZeroToNineFullHalfRegex = @"[\d1234567890]"; + public const string ZeroToNineFullHalfRegex = @"[\d]"; public static readonly string DigitNumRegex = $@"{ZeroToNineFullHalfRegex}+"; public const string DozenRegex = @".*ダース$"; + public const string PercentageSymbol = @"(パ\s*ー\s*セ\s*ン\s*ト|[%%])"; public const string PercentageRegex = @".+(?=パ\s*ー\s*セ\s*ン\s*ト)|.*(?=[%%])"; - public static readonly string DoubleAndRoundRegex = $@"{ZeroToNineFullHalfRegex}+(\.{ZeroToNineFullHalfRegex}+)?\s*[万億]{{1,2}}(\s*(以上))?"; + public static readonly string DoubleAndRoundRegex = $@"{ZeroToNineFullHalfRegex}+(\.{ZeroToNineFullHalfRegex}+)?\s*{RoundNumberIntegerRegex}{{1,2}}(\s*(以上))?"; public const string FracSplitRegex = @"[はと]|分\s*の"; - public const string ZeroToNineIntegerRegex = @"[一二三四五六七八九]"; + public const string ZeroToNineIntegerRegex = @"[零〇一二三四五六七八九]"; + public const string ZeroToNineIntegerHiraganaRegex = @"(れい|ゼロ|マル|い[ちっ]|に|さん|し|よん|ご|ろ[くっ]|しち|なな|は[ちっ]|きゅう|く)"; + public const string HalfUnitRegex = @"半"; public const string NegativeNumberTermsRegex = @"(マ\s*イ\s*ナ\s*ス)"; - public const string NegativeNumberTermsRegexNum = @"(?((最後)(から1つ前のこと|から(3|2|1)番目|(から1つ前)(のもの)|から三番目|から二番目|(から(一|1)つ前)(のもの|のこと)?|(から1つ)?(前))?|(次のもの)(前)?|(前(?=の))(のもの)?|(現在)(のこと)?|次|二位))"; + public static readonly string AllOrdinalRegex = $@"({OrdinalRegex}|{RelativeOrdinalRegex})"; + public static readonly string AllFractionNumber = $@"((({NegativeNumberTermsRegex}{{0,1}})|{NegativeNumberTermsRegexNum})(({ZeroToNineFullHalfRegex}+|{AllIntRegex})\s*[はと]{{0,1}}\s*)?{NegativeNumberTermsRegex}{{0,1}}({ZeroToNineFullHalfRegex}+|{AllIntRegex})\s*分\s*の\s*{NegativeNumberTermsRegex}{{0,1}}({ZeroToNineFullHalfRegex}+|{AllIntRegex})+)|半(分|数)"; public static readonly string FractionNotationSpecialsCharsRegex = $@"({NegativeNumberTermsRegexNum}\s*)?{ZeroToNineFullHalfRegex}+\s+{ZeroToNineFullHalfRegex}+[//]{ZeroToNineFullHalfRegex}+"; public static readonly string FractionNotationRegex = $@"({NegativeNumberTermsRegexNum}\s*)?{ZeroToNineFullHalfRegex}+[//]{ZeroToNineFullHalfRegex}+"; - public static readonly string PercentagePointRegex = $@"(?)"; - public const string LessRegex = @"(小なり|小さい|低い|<)"; - public const string EqualRegex = @"(等しい|イコール|=)"; - public static readonly string MoreOrEqual = $@"((大なりかイコール)|(大きいかイコール)|(大なりか等しい)|(大きいか等しい)|小さくない|以上|最低)"; + public const string MoreRegex = @"(大なり|を超える|大きい|高い|大きく|(?)"; + public const string LessRegex = @"(小なり|小さい|低い|(?|=)<)"; + public const string EqualRegex = @"(等しい|イコール|(?)=)"; + public const string MoreOrEqualPrefixRegex = @"(少なくとも)"; + public const string LessOrEqualPrefixRegex = @"(多くて)"; + public static readonly string MoreOrEqual = $@"(({MoreRegex}(か){EqualRegex})|小さくない|以上|最低)"; public const string MoreOrEqualSuffix = @"(より(大なりイコール|小さくない))"; - public static readonly string LessOrEqual = $@"(({LessRegex}\s*(或|或者)?\s*{EqualRegex})|(小なりかイコール)|(小なりか等しい)|(小さいかイコール)|(小さいか等しい)|(小さいか等しい)|大さくない|以下|最大)"; + public static readonly string LessOrEqual = $@"(({LessRegex}\s*(或|或者)?\s*{EqualRegex})|({LessRegex}(か){EqualRegex})|大さくない|以下|最大)"; public const string LessOrEqualSuffix = @"(小なりイコール|大さくない)"; - public static readonly string OneNumberRangeMoreRegex1 = $@"(?((?!(((,|、)(?!\d+))|((,|、)(?!\d+))|。)).)+)\s*((より)\s*(({MoreOrEqual}|{MoreRegex}))|超える|を超える)"; - public const string OneNumberRangeMoreRegex2 = @"(?((?!((,|、(?!\d+))|(,|、(?!\d+))|。)).)+)\s*(より)?(大なり)"; - public const string OneNumberRangeMoreRegex3 = @"(?((?!((,(?!\d+))|(,(?!\d+))|。)).)+)\s*(以上|最低)(?![万億]{1,2})"; - public static readonly string OneNumberRangeMoreRegex4 = $@"({MoreOrEqual}|{MoreRegex})\s*(?((?!(と|は|((と)?同時に)|((と)?そして)|が|,|、|,|(,(?!\d+))|(,(?!\d+))|。)).)+)"; + public static readonly string OneNumberRangeMoreRegex1 = $@"(?(((?!((,(?!\d+))|。|は)).)+))\s*((より)\s*(({MoreOrEqual}|{MoreRegex})))|(?((?!((,(?!\d+))|。|は)).)+)\s*({MoreRegex})"; + public const string OneNumberRangeMoreRegex3 = @"(?((?!((,(?!\d+))|。)).)+)\s*(以上|最低)(?![万億]{1,2})"; + public static readonly string OneNumberRangeMoreRegex4 = $@"({MoreOrEqualPrefixRegex})\s*(?((?!(と|は|((と)?同時に)|((と)?そして)|が|,|(,(?!\d+))|。)).)*)"; + public const string OneNumberRangeMoreRegex5 = @"(?((?!((,(?!\d+))|。)).)+)\s*((もしくはそれ)(以上)(?![万億]{1,2}))"; public const string OneNumberRangeMoreSeparateRegex = @"^[.]"; public const string OneNumberRangeLessSeparateRegex = @"^[.]"; - public static readonly string OneNumberRangeLessRegex1 = $@"(?((?!(((,|、)(?!\d+))|((,|、)(?!\d+))|。)).)+)\s*(より)\s*({LessOrEqual}|{LessRegex})"; - public const string OneNumberRangeLessRegex2 = @"(?((?!((,(?!\d+))|(,(?!\d+))|。)).)+)\s*(より)?(小な)"; - public const string OneNumberRangeLessRegex3 = @"(?((?!((,(?!\d+))|(,(?!\d+))|。)).)+)\s*(以下|未満)(?![万億]{1,2})"; - public static readonly string OneNumberRangeLessRegex4 = $@"({LessOrEqual}|{LessRegex})\s*(?((?!(と|は|((と)?同時に)|((と)?そして)|が|,|、|,|(,(?!\d+))|(,(?!\d+))|。)).)+)"; - public static readonly string OneNumberRangeEqualRegex = $@"(((?((?!((,(?!\d+))|(,(?!\d+))|。)).)+)\s*(に)\s*{EqualRegex})|({EqualRegex}\s*(?((?!((,(?!\d+))|(,(?!\d+))|。)).)+)))"; - public static readonly string TwoNumberRangeRegex1 = $@"(?((?!((,(?!\d+))|(,(?!\d+))|。)).)+)\s*(と|{TillRegex})\s*(?((?!((,(?!\d+))|(,(?!\d+))|。)).)+)\s*(の間)"; - public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2}|{OneNumberRangeMoreRegex3}|{OneNumberRangeMoreRegex4})\s*(と|は|((と)?同時に)|((と)?そして)|が|,|、|,)?\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2}|{OneNumberRangeLessRegex3}|{OneNumberRangeLessRegex4})"; - public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2}|{OneNumberRangeLessRegex3}|{OneNumberRangeLessRegex4})\s*(と|は|((と)?同時に)|((と)?そして)|が|,|、|,)?\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2}|{OneNumberRangeMoreRegex3}|{OneNumberRangeMoreRegex4})"; - public static readonly string TwoNumberRangeRegex4 = $@"(?((?!((,(?!\d+))|(,(?!\d+))|。)).)+)\s*{TillRegex}\s*(?((?!((,(?!\d+))|(,(?!\d+))|。)).)+)"; + public static readonly string OneNumberRangeLessRegex1 = $@"(?(((?!(((,)(?!\d+))|。|(\D)))|(?:[-]|(分の))).)+)\s*(より)\s*({LessOrEqual}|{LessRegex})|(?((?!((,(?!\d+))|。)).)+)\s*(小な)"; + public const string OneNumberRangeLessRegex3 = @"(?(((?!((,(?!\d+))|。)).)+))\s*(以下|未満)(の間)?(?![万億]{1,2})"; + public static readonly string OneNumberRangeLessRegex4 = $@"({LessOrEqual}|{LessRegex}|{LessOrEqualPrefixRegex})\s*(?((?!(と|は|((と)?同時に)|((と)?そして)|が|の|,|(,(?!\d+))|。)).)+)"; + public static readonly string OneNumberRangeEqualRegex = $@"(((?((?!((,(?!\d+))|。)).)+)\s*(に)\s*{EqualRegex})|({EqualRegex}\s*(?((?!((,(?!\d+))|。)).)+)))"; + public static readonly string TwoNumberRangeMoreSuffix = $@"({MoreOrEqualPrefixRegex}\s*(?((?!(と|は|((と)?同時に)|((と)?そして)|が|,|(,(?!\d+))|。)).)*))(,{LessOrEqualPrefixRegex})"; + public static readonly string TwoNumberRangeRegex1 = $@"(?((?!((,(?!\d+))|。)).)+)\s*(と|{TillRegex})\s*(?((?!((,(?!\d+))|。)).)+)\s*(の間|未満)"; + public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex3}|{OneNumberRangeMoreRegex4})\s*(と|((と)?同時に)|((と)?そして)|が|,)?\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex3}|{OneNumberRangeLessRegex4})"; + public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex3}|{OneNumberRangeLessRegex4})\s*(と|((と)?同時に)|((と)?そして)|が|,)?\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex3}|{OneNumberRangeMoreRegex4})"; + public static readonly string TwoNumberRangeRegex4 = $@"(?((?!((,(?!\d+))|。)).)+)\s*{TillRegex}\s*(?((?!((,(?!\d+))|。)).)+)"; public const string AmbiguousFractionConnectorsRegex = @"^[.]"; public static readonly Dictionary RelativeReferenceOffsetMap = new Dictionary { - { @"", @"" } + { @"前", @"-1" }, + { @"現在", @"0" }, + { @"次", @"1" }, + { @"最後", @"0" }, + { @"最後から三番目", @"-2" }, + { @"最後から二番目", @"-1" }, + { @"最後から一つ前", @"-1" }, + { @"最後から一つ前のもの", @"-1" }, + { @"最後から一つ前のこと", @"-1" }, + { @"最後から1つ前のこと", @"-1" }, + { @"最後から1つ前のもの", @"-1" }, + { @"最後から1つ前", @"-1" }, + { @"現在のこと", @"0" }, + { @"前のもの", @"-1" }, + { @"次のもの", @"1" }, + { @"最後から3番目", @"-2" }, + { @"最後から2番目", @"-1" } }; public static readonly Dictionary RelativeReferenceRelativeToMap = new Dictionary { - { @"", @"" } + { @"前", @"current" }, + { @"現在", @"current" }, + { @"次", @"current" }, + { @"最後", @"end" }, + { @"最後から三番目", @"end" }, + { @"最後から二番目", @"end" }, + { @"最後から一つ前", @"end" }, + { @"最後から一つ前のもの", @"end" }, + { @"最後から一つ前のこと", @"end" }, + { @"現在のこと", @"current" }, + { @"最後から1つ前のこと", @"end" }, + { @"最後から1つ前のもの", @"end" }, + { @"最後から1つ前", @"end" }, + { @"前のもの", @"current" }, + { @"次のもの", @"current" }, + { @"最後から3番目", @"end" }, + { @"最後から2番目", @"end" } + }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"^に$", @"に" } }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/NumbersWithUnitDefinitions.cs index a3c325a25c..46fe4c52a7 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/NumbersWithUnitDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/NumbersWithUnitDefinitions.cs @@ -35,6 +35,7 @@ public static class NumbersWithUnitDefinitions public const string BuildPrefix = @""; public const string BuildSuffix = @""; public const string ConnectorToken = @""; + public const bool CheckFirstSuffix = true; public static readonly Dictionary CurrencySuffixList = new Dictionary { { @"Afghan afghani", @"アフガニ" }, @@ -168,7 +169,7 @@ public static class NumbersWithUnitDefinitions { @"Djiboutian franc", @"ジブチ・フラン" }, { @"CFP franc", @"CFPフラン" }, { @"Guinean franc", @"ギニア・フラン" }, - { @"Swiss franc", @"スイス・フラン" }, + { @"Swiss franc", @"スイス・フラン|スイスフラン" }, { @"Rwandan franc", @"ルワンダ・フラン" }, { @"Belgian franc", @"ベルギー・フラン" }, { @"Rappen", @"Rappen" }, @@ -217,7 +218,7 @@ public static class NumbersWithUnitDefinitions { @"Pound", @"ポンド" }, { @"Pence", @"ペンス" }, { @"Shilling", @"シリング" }, - { @"United States dollar", @"ドル|USドル" }, + { @"United States dollar", @"米ドル|USドル|ドル" }, { @"East Caribbean dollar", @"東カリブ・ドル" }, { @"Australian dollar", @"オーストラリア・ドル|オーストラリアドル" }, { @"Bahamian dollar", @"バハマ・ドル" }, @@ -234,7 +235,7 @@ public static class NumbersWithUnitDefinitions { @"Guyanese dollar", @"ガイアナ・ドル|ガイアナ・ドル" }, { @"Hong Kong dollar", @"香港ドル" }, { @"Macau Pataca", @"マカオ・パタカ|マカオ・パタカ" }, - { @"New Taiwan dollar", @"ニュー台湾ドル|ニュー台湾ドル" }, + { @"New Taiwan dollar", @"ニュー台湾ドル|ニュー台湾ドル|台湾ドル" }, { @"Jamaican dollar", @"ジャマイカ・ドル|ジャマイカドル" }, { @"Kiribati dollar", @"キリバス・ドル" }, { @"Liberian dollar", @"リベリア・ドル|リベリアドル" }, @@ -242,7 +243,8 @@ public static class NumbersWithUnitDefinitions { @"Surinamese dollar", @"スリナム・ドル|スリナムドル" }, { @"Trinidad and Tobago dollar", @"トリニダード・トバゴ・ドル|トリニダードトバゴ・ドル" }, { @"Tuvaluan dollar", @"ツバル・ドル|ツバルドル" }, - { @"Chinese yuan", @"人民元" }, + { @"Dollar", @"どる|$" }, + { @"Chinese yuan", @"人民元|元" }, { @"Fen", @"分" }, { @"Jiao", @"角" }, { @"Finnish markka", @"フィンランド・マルカ" }, @@ -531,7 +533,7 @@ public static class NumbersWithUnitDefinitions { @"Solomon Islands dollar", @"si$|si $" }, { @"New Taiwan dollar", @"nt$|nt $" }, { @"Samoan tālā", @"ws$" }, - { @"Chinese yuan", @"¥" }, + { @"Chinese yuan", @"¥|人民元" }, { @"Japanese yen", @"¥|\" }, { @"Turkish lira", @"₺" }, { @"Euro", @"€" }, @@ -542,7 +544,150 @@ public static class NumbersWithUnitDefinitions { @"円", @"銭", - @"\" + @"分", + @"レク", + @"プル", + @"ブル", + @"\", + @"元" }; + public static readonly Dictionary DimensionSuffixList = new Dictionary + { + { @"Meter", @"米|公尺|m|メートル" }, + { @"Kilometer", @"千米|公里|km|キロメートル" }, + { @"Decimeter", @"分米|公寸|dm|デシメートル" }, + { @"Millimeter", @"ミリ" }, + { @"Centimeter", @"釐米|厘米|公分|cm|センチ" }, + { @"Micrometer", @"毫米|公釐|mm" }, + { @"Microns", @"微米" }, + { @"Picometer", @"皮米|ピクトメーター|pm" }, + { @"Nanometer", @"纳米" }, + { @"Mile", @"英里|マイル" }, + { @"Inch", @"英寸|インチ" }, + { @"Foot", @"呎|英尺|フィート" }, + { @"Yard", @"码" }, + { @"Knot", @"海里" }, + { @"Light year", @"光年" }, + { @"Meter per second", @"米每秒|米/秒|m/s|秒速メートル|毎秒メートル" }, + { @"Kilometer per hour", @"公里每小时|千米每小时|公里/小时|千米/小时|km/h|時速キロメートル" }, + { @"Kilometer per minute", @"公里每分钟|千米每分钟|公里/分钟|千米/分钟|km/min|分速キロメートル" }, + { @"Kilometer per second", @"公里每秒|千米每秒|公里/秒|千米/秒|km/s|秒速キロメートル|毎秒キロメートル" }, + { @"Mile per hour", @"英里每小时|英里/小时|時速マイル" }, + { @"Foot per second", @"英尺每小时|英尺/小时" }, + { @"Foot per minute", @"英尺每分钟|英尺/分钟" }, + { @"Yard per minute", @"码每分|码/分" }, + { @"Yard per second", @"码每秒|码/秒" }, + { @"Square centimetre", @"平方厘米" }, + { @"Square decimeter", @"平方分米" }, + { @"Square meter", @"平方米|平方メートル" }, + { @"Square kilometer", @"平方公里|平方キロメートル" }, + { @"Acre", @"英亩|公亩|エーカー" }, + { @"Hectare", @"公顷" }, + { @"Mu", @"亩|市亩|ムー" }, + { @"Liter", @"公升|升|l" }, + { @"Milliliter", @"毫升|ml|ミリリットル" }, + { @"Cubic meter", @"立方米" }, + { @"Cubic decimeter", @"立方分米" }, + { @"Cubic millimeter", @"立方毫米" }, + { @"Cubic foot", @"立方英尺|立方フィート" }, + { @"Gallon", @"加仑|ガロン" }, + { @"Pint", @"品脱" }, + { @"Dou", @"市斗|斗" }, + { @"Dan", @"市石|石" }, + { @"Kilogram", @"千克|公斤|kg|キログラム" }, + { @"Gram", @"克|g" }, + { @"Milligram", @"毫克|mg" }, + { @"Microgram", @"微克|μg" }, + { @"Ton", @"公吨|吨|t|トン" }, + { @"Metric ton", @"メートルトン" }, + { @"Pound", @"磅|ポンド" }, + { @"Ounce", @"盎司|オンス" }, + { @"Jin", @"市斤|斤" }, + { @"Liang", @"两" }, + { @"Barrel", @"桶" }, + { @"Pot", @"罐" }, + { @"Bit", @"比特|位|b|bit|ビット" }, + { @"Kilobit", @"千比特|千位|kb|Kb" }, + { @"Megabit", @"兆比特|兆位|mb|Mb|メガバイト" }, + { @"Gigabit", @"十亿比特|千兆比特|十亿位|千兆位|gb|Gb" }, + { @"Terabit", @"万亿比特|兆兆比特|万亿位|兆兆位|tb|Tb" }, + { @"Petabit", @"千兆兆比特|千万亿比特|千兆兆位|千万亿位|pb|Pb" }, + { @"Byte", @"字节|byte|Byte" }, + { @"Kilobyte", @"千字节|kB|KB" }, + { @"Megabyte", @"兆字节|mB|MB" }, + { @"Gigabyte", @"十亿字节|千兆字节|gB|GB" }, + { @"Terabyte", @"万亿字节|兆兆字节|tB|TB" }, + { @"Petabyte", @"千兆兆字节|千万亿字节|pB|PB" } + }; + public static readonly Dictionary DimensionPrefixList = new Dictionary + { + { @"split_unit", @"時速|分速|秒速|毎秒" } + }; + public static readonly IList DimensionAmbiguousValues = new List + { + @"丈", + @"位", + @"克", + @"分", + @"升", + @"寸", + @"尺", + @"斗", + @"斤", + @"桶", + @"毫", + @"石", + @"码", + @"磅", + @"米", + @"罐", + @"里", + @"m", + @"km", + @"dm", + @"cm", + @"mm", + @"l", + @"ml", + @"kg", + @"mg", + @"g", + @"t", + @"b", + @"byte", + @"kb", + @"mb", + @"gb", + @"tb", + @"pb", + @"時速", + @"トン" + }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"五角", @"五角大楼" }, + { @"普尔", @"标准普尔" } + }; + public static readonly Dictionary TemperatureSuffixList = new Dictionary + { + { @"F", @"華氏|華氏温度|華氏温度の|°f" }, + { @"K", @"开尔文温度|开氏度|凯氏度|K|k" }, + { @"R", @"兰氏温度|°r" }, + { @"C", @"摂氏|摂氏温度|°c|℃" }, + { @"Degree", @"度" } + }; + public static readonly Dictionary TemperaturePrefixList = new Dictionary + { + { @"F", @"華氏|華氏温度|華氏温度の|华氏" }, + { @"K", @"开氏温度|开氏" }, + { @"R", @"兰氏温度|兰氏" }, + { @"C", @"摂氏温度|摂氏" } + }; + public static readonly IList TemperatureAmbiguousValues = new List + { + @"度", + @"k" + }; + public const string HalfUnitRegex = @"半"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/QuotedTextDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/QuotedTextDefinitions.cs new file mode 100644 index 0000000000..7f7ef61b80 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/QuotedTextDefinitions.cs @@ -0,0 +1,35 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Japanese\Japanese-QuotedText.yaml +// - Language: Japanese +// - ClassName: QuotedTextDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Japanese +{ + using System; + using System.Collections.Generic; + + public static class QuotedTextDefinitions + { + public const string LangMarker = @"Jpn"; + public const string QuotedTextRegex1 = @"(『([^『』]+)』)"; + public const string QuotedTextRegex2 = @"(「([^「」]+)」)"; + public const string QuotedTextRegex3 = @"(﹃([^﹃﹄]+)﹄)"; + public const string QuotedTextRegex4 = @"(﹁([^﹁⋮]+)⋮)"; + public const string QuotedTextRegex5 = @"(""([^""]+)"")"; + public const string QuotedTextRegex6 = @"(\\'([^\']+)\\')"; + public const string QuotedTextRegex7 = @"(`([^`]+)`)"; + public const string QuotedTextRegex8 = @"(123456)"; + public const string QuotedTextRegex9 = @"(123456)"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/QuotedTextDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/QuotedTextDefinitions.tt new file mode 100644 index 0000000000..f5860b3495 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/QuotedTextDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Japanese\Japanese-QuotedText.yaml"; + this.Language = "Japanese"; + this.ClassName = "QuotedTextDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Korean/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Korean/DateTimeDefinitions.cs new file mode 100644 index 0000000000..c6c1c90a7f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Korean/DateTimeDefinitions.cs @@ -0,0 +1,850 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Korean\Korean-DateTime.yaml +// - Language: Korean +// - ClassName: DateTimeDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Korean +{ + using System; + using System.Collections.Generic; + + public static class DateTimeDefinitions + { + public const string LangMarker = @"Kor"; + public const string MonthRegex = @"(?(정월|(일|이|삼|사|오|유|육|칠|팔|구|십|시|십일|십이)(월|개월))|((0?\d|1[0-2])(월|개월)))"; + public const string DayRegex = @"(?[0-2]?\d|3[0-1]|하루|이틀|사흘|나흘|닷새|엿새|이레|여드레|아흐레|열흘)"; + public const string OneToNineIntegerRegex = @"[일이삼사오육륙칠팔구]"; + public static readonly string DateDayRegexInCJK = $@"(?(([12][0-9]|3[01]|[1-9]|삼십일?|[이]?[십]({OneToNineIntegerRegex})?|{OneToNineIntegerRegex})[일]))"; + public static readonly string DayRegexNumInCJK = $@"(?[12][0-9]|3[01]|[1-9]|삼십일?|[이]?[십]({OneToNineIntegerRegex})?|{OneToNineIntegerRegex})"; + public const string MonthNumRegex = @"(?(0?\d|1[0-2]))"; + public const string TwoNumYear = @"50"; + public const string YearNumRegex = @"(?((1[5-9]|20)\d{2})|2100)"; + public const string SimpleYearRegex = @"(?(\d{2,4}))"; + public const string ZeroToNineIntegerRegexCJK = @"[일이삼사오육륙칠팔구영공십]"; + public const string DynastyStartYear = @"元"; + public const string RegionTitleRegex = @"(贞观|开元|神龙|洪武|建文|永乐|景泰|天顺|成化|嘉靖|万历|崇祯|顺治|康熙|雍正|乾隆|嘉庆|道光|咸丰|同治|光绪|宣统|民国)"; + public static readonly string DynastyYearRegex = $@"(?{RegionTitleRegex})(?({DynastyStartYear}|\d{{1,3}}|[十拾]?({ZeroToNineIntegerRegexCJK}[十百拾佰]?){{0,3}}))"; + public static readonly string DateYearInCJKRegex = $@"(?({ZeroToNineIntegerRegexCJK}{{2,4}}|[일이]천{ZeroToNineIntegerRegexCJK}{{1,2}}))"; + public const string WeekDayRegex = @"(?일요일|월요일|화요일|수요일|목요일|금요일|토요일)"; + public const string WeekDayStartEnd = @"^[.]"; + public const string LunarRegex = @"음력"; + public static readonly string DateThisRegex = $@"(이번(\s+)?(주\s+)?){WeekDayRegex}"; + public static readonly string DateLastRegex = $@"((저번|지난)(\s+)?(주\s+)?){WeekDayRegex}"; + public static readonly string DateNextRegex = $@"(다음(\s+)?(주\s+)?){WeekDayRegex}"; + public const string WeekWithWeekDayRangeRegex = @"^[.]"; + public const string SpecialMonthRegex = @"^[.]"; + public const string SpecialYearRegex = @"^[.]"; + public const string MonthDayRange = @"^[.]"; + public const string SpecialDayRegex = @"(최근|그저께|그제|((내일)?\s?모레)|그끄저께|어제|내일|오늘|금일|작일|익일|당일|명일|전일|다음 날|마지막 날|며칠|글피|그글피)"; + public static readonly string DurationFromSpecialDayRegex = $@"({SpecialDayRegex}|지금(으로)?)\s*((부터)\s*(\d+|{ZeroToNineIntegerRegexCJK}+)\s*(?!월){DateUnitRegex})(\s*후)?"; + public const string SpecialDayWithNumRegex = @"(하루|이틀|사흘|나흘|닷새|엿새)"; + public static readonly string WeekDayOfMonthRegex = $@"(((((이번|저번|지난|다음)\s)?{MonthRegex}|((이번|저번|지난|다음)\s)?{MonthNumRegex}월|(이번|저번|지난|다음)\s*달)의?\s*)?(?첫\s?번?째|두\s?번째|둘째|세\s?번째|셋째|네\s?번째|넷째|다섯\s?번?째|다섯째|여섯\s?번?째|여섯째|일곱\s?번?째|여덟\s?번?째|아홉\s?번?째|열\s?번?\s?째|마지막)\s*주?\s*{WeekDayRegex})"; + public const string WeekDayAndDayRegex = @"^[.]"; + public const string ThisPrefixRegex = @"이번?|금"; + public const string LastPrefixRegex = @"저번|지난"; + public const string NextPrefixRegex = @"다음|다가오는"; + public static readonly string RelativeRegex = $@"(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex}))"; + public static readonly string SpecialDate = $@"(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})년)?({RelativeRegex}\s달의?\s)?{DateDayRegexInCJK}"; + public const string DateUnitRegex = @"(?년|개?월|주|(?--|-|—|——|~|–|로?부터|에서|[과와]|들어|,)"; + public const string DatePeriodRangeSuffixRegex = @"(사이|까지|안|이?전|이내|간)"; + public const string DatePeriodRangePrefixRegex = @"^\b$"; + public const string DatePeriodTillSuffixRequiredRegex = @"(?로?부터|에서|과)"; + public const string DatePeriodDayRegexInCJK = @"(?[1-9]\s*일|[12][0-9]\s*일|3[01]\s*일|[일이삼사오육칠팔구십]\s*일|이?십[일이삼사오륙칠팔구]?\s*일|삼십일?\s*일|0[1-9]\s*일|며칠)"; + public const string DatePeriodThisRegex = @"이번?"; + public const string DatePeriodLastRegex = @"저번?|지난|과거"; + public const string DatePeriodNextRegex = @"다?다음|다가오는|마지막"; + public const string WoMLastRegex = @"마지막"; + public const string WoMPreviousRegex = @"이전"; + public const string WoMNextRegex = @"다음"; + public static readonly string RelativeMonthRegex = $@"(?({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex}|같은)\s*달)"; + public const string HalfYearRegex = @"((?[상전]반기)|(?[하후]반기))"; + public static readonly string YearRegex = $@"(({YearNumRegex}|{SimpleYearRegex})\s*년)(\s*{HalfYearRegex})?"; + public static readonly string StrictYearRegex = $@"((({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex})\s+)?{YearRegex}(?=[\u4E00-\u9FFF]|\s|$|\W))"; + public const string YearRegexInNumber = @"(?(\d{4}))"; + public static readonly string DatePeriodYearInCJKRegex = $@"{DateYearInCJKRegex}년{HalfYearRegex}?"; + public static readonly string MonthSuffixRegex = $@"(?({RelativeMonthRegex}|{MonthRegex}))"; + public static readonly string SimpleCasesRegex = $@"(({YearRegex}|{DatePeriodYearInCJKRegex})\s*)?{MonthSuffixRegex}(\s+{DatePeriodDayRegexInCJK}|{DayRegex})\s*{DatePeriodTillRegex}?\s*(({YearRegex}|{DatePeriodYearInCJKRegex})\s*)?{MonthSuffixRegex}(\s+{DatePeriodDayRegexInCJK}|{DayRegex})(까지|\s+사이)?(\s*{DatePeriodRangeSuffixRegex})?"; + public static readonly string YearAndMonth = $@"({DatePeriodYearInCJKRegex}|{YearRegex})\s*{MonthRegex}"; + public static readonly string SimpleYearAndMonth = $@"({YearNumRegex}[/\\\-]{MonthNumRegex}\b$)"; + public static readonly string PureNumYearAndMonth = $@"({YearRegexInNumber}\s*[-\.\/]\s*{MonthNumRegex})|({MonthNumRegex}\s*\/\s*{YearRegexInNumber})"; + public static readonly string OneWordPeriodRegex = $@"((({MonthRegex}\s*)({DateDayRegexInCJK}\s*)|(그\s*)?(일|사건))(이 있[는던]|의)\s+[달주해])|(((?(작년|((다음|올)\s?해)|금년|내년))|({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex}))\s*)?{MonthRegex}(\s+달)?(\s*(초|말))?(\s+([1-9]|[12]\d|3[01])일,\s+([1-9]|[12]\d|3[01])일)?|(({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex}|그|다가오는|지난|올)\s+(몇\s+)?(며칠|날|주말?|해|달)(\s+([말안]|([초후]반?)))?(\s+휴가)?(에\s좀\s더\s일찍)?)|(금년|올\s?해|작년|내년)(\s*({HalfYearRegex}|[초말안]))?|((다다음주|금주|주말)(\s+[말안])?)|((앞으로)\s+(하루|이틀|사흘|나흘|닷새|엿새|이레|여드레|아흐레|열흘)(\s+동안)?)|((다가오는|다음)\s+(({DateDayRegexInCJK})|{MonthRegex}|((하루|이틀|사흘|나흘|닷새|엿새|이레|여드레|아흐레|열흘)|[주달])))|((({YearRegex})|(\d{{1,2}}년)\s*)?((({MonthRegex})|((한|두|세|네|다섯|여섯|일곱|여덟|아홉|열|열한|열두)\s+달))\s*)?({DateDayRegexInCJK})?(\s+이내))|((같은|이번|올)\s+[달주해])|((어제|오늘|내일)로?부터(\s+최소)?\s+((\d{{1,2}}주)|(하루|이틀|사흘|나흘|닷새|엿새|이레|여드레|아흐레|열흘)|(\d{{1,2}}일)|(오늘|내일))((\s+[후내안전])|(까지)|(\s+이내)))|(\d{{1,2}}번 째 주)|(\d{{1,2}}\s*세기)|(\d{{4}}-\d{{4}})"; + public const string LaterEarlyPeriodRegex = @"^[.]"; + public const string DatePointWithAgoAndLater = @"^[.]"; + public static readonly string WeekOfMonthRegex = $@"(?{MonthSuffixRegex}의?\s+(?첫\s?번?째|두\s?번?째|둘째|세\s?번?째|셋째|네\s?번?째|넷째|마지막)\s*주\s*)"; + public static readonly string WeekOfYearRegex = $@"(?(?내년|금년|작년|((다음|올)\s*해)|{YearRegex})의?\s+(?첫\s?번?째|두\s?번?째|둘째|세\s?번?째|셋째|네\s?번?째|넷째|마지막)\s*주\s*)"; + public const string WeekOfDateRegex = @"^[.]"; + public const string MonthOfDateRegex = @"^[.]"; + public const string RestOfDateRegex = @"^[.]"; + public const string UnitRegex = @"(?년|(개)?월(\s달)?|달|주|일)"; + public static readonly string FollowedUnit = $@"^\s*{UnitRegex}"; + public static readonly string NumberCombinedWithUnit = $@"(?\d+(\.\d*)?){UnitRegex}"; + public const string DateRangePrepositions = @"((从|在|自)\s*)?"; + public static readonly string YearToYear = $@"({DatePeriodYearInCJKRegex}|{YearRegex})\s*{DatePeriodTillRegex}\s*({DatePeriodYearInCJKRegex}|{YearRegex})"; + public static readonly string YearToYearSuffixRequired = $@"({DateRangePrepositions})({DatePeriodYearInCJKRegex}|{YearRegex})\s*({DatePeriodTillSuffixRequiredRegex})\s*({DatePeriodYearInCJKRegex}|{YearRegex})\s*(까지|사이)"; + public static readonly string MonthToMonth = $@"({DateRangePrepositions})({MonthRegex}){DatePeriodTillRegex}({MonthRegex})"; + public static readonly string MonthToMonthSuffixRequired = $@"({DateRangePrepositions})({MonthRegex}){DatePeriodTillSuffixRequiredRegex}({MonthRegex})\s*(까지|사이)"; + public const string DayToDay = @"^[.]"; + public const string DayRegexForPeriod = @"^[.]"; + public static readonly string WeekToWeek = $@"(\d+\s*주{DatePeriodTillRegex}\s+\d+\s*주\s*{DatePeriodRangeSuffixRegex})"; + public static readonly string RelativePeriodRegex = $@"(({SpecialDayRegex}|지금|올 해)\s*{DatePeriodTillRegex}\s*((\d+\s*주)|(하루|이틀|사흘|나흘|닷새|엿새|이흐레|여드레|아흐레|열흘)|(지금))(\s*반)?(\s*[전후뒤앞])?\s*{DatePeriodRangeSuffixRegex})"; + public static readonly string FirstLastOfYearRegex = $@"(({DatePeriodYearInCJKRegex}|{YearRegex}|(?明年|今年|去年))的?)((?前)|(?(最后|最後)))"; + public const string ComplexDatePeriodRegex = @"^[.]"; + public const string PastRegex = @"(?지난|과거)"; + public const string FutureRegex = @"(?앞으로|지금부터)"; + public const string SeasonRegex = @"(한\s*)?(?봄|여름|가을|겨울)"; + public const string WhichWeekRegex = @"^[.]"; + public static readonly string SeasonWithYear = $@"(({YearRegex}|{DatePeriodYearInCJKRegex}|(?내년|올\s?해|금년|작년)|(이번?|저번?|다음|그))의?\s+)?{SeasonRegex}\s*(초|중반|말)?"; + public static readonly string QuarterRegex = $@"(({YearRegex}|{DatePeriodYearInCJKRegex}|(?내년|올\s?해|금년|작년))(의)?\s+)?((?1|2|3|4|일|이|삼|사)사?분기)"; + public const string CenturyNumRegex = @"^[.]"; + public const string CenturyRegex = @"(?\d|1\d|2\d)\s*세기"; + public const string CenturyRegexInCJK = @"(?일|이|삼|사|오|육|칠|팔|구|이?십(일|이|삼|사|오|육|칠|팔|구))\s*세기"; + public static readonly string RelativeCenturyRegex = $@"(?({DatePeriodLastRegex}|{DatePeriodThisRegex}|{DatePeriodNextRegex}))세기"; + public const string DecadeRegexInCJK = @"(?십|일십|이십|삼십|사십|오십|육십|칠십|팔십|구십)"; + public static readonly string DecadeRegex = $@"(?({CenturyRegex}|{CenturyRegexInCJK}|{RelativeCenturyRegex}))?(?(\d0|{DecadeRegexInCJK}|\d{{3}}0))년대"; + public const string PrepositionRegex = @"(?^的|在$)"; + public const string NowRegex = @"(?지금)"; + public const string NightRegex = @"(?이른|늦은)"; + public const string TimeOfSpecialDayRegex = @"(今晚|今早|今晨|明晚|明早|明晨|昨晚)(的|在)?"; + public const string DateTimePeriodTillRegex = @"(?到|直到|--|-|—|——)"; + public const string DateTimePeriodPrepositionRegex = @"(?^\s*的|在\s*$)"; + public const string BeforeAfterRegex = @"^\b$"; + public static readonly string HourRegex = $@"\b{BaseDateTime.HourRegex}"; + public const string HourNumRegex = @"(?[한두세네]|다섯|여섯|일곱|여덟|아홉|스무|스물[한두세네]|열([한두세네]|다섯|여섯|일곱|여덟|아홉)?)"; + public const string ZhijianRegex = @"^\s*(까지)"; + public const string DateTimePeriodThisRegex = @"这个|这一个|这|这一"; + public const string DateTimePeriodLastRegex = @"上个|上一个|上|上一"; + public const string DateTimePeriodNextRegex = @"下个|下一个|下|下一"; + public const string AmPmDescRegex = @"(?(am|a\.m\.|a m|a\. m\.|a\.m|a\. m|a m|pm|p\.m\.|p m|p\. m\.|p\.m|p\. m|p m))"; + public const string TimeOfDayRegex = @"(?凌晨|清晨|早上|早间|早|上午|中午|下午|午后|晚上|夜里|夜晚|半夜|夜间|深夜|傍晚|晚)"; + public static readonly string SpecificTimeOfDayRegex = $@"((({DateTimePeriodThisRegex}|{DateTimePeriodNextRegex}|{DateTimePeriodLastRegex})\s+{TimeOfDayRegex})|(今晚|今早|今晨|明晚|明早|明晨|昨晚))"; + public const string DateTimePeriodUnitRegex = @"(个)?(?(小时|钟头|分钟|秒钟|时|分|秒))"; + public static readonly string DateTimePeriodFollowedUnit = $@"^\s*{DateTimePeriodUnitRegex}"; + public static readonly string DateTimePeriodNumberCombinedWithUnit = $@"\b(?\d+(\.\d*)?){DateTimePeriodUnitRegex}"; + public const string DurationAllRegex = @"(내내|종일)"; + public const string DurationHalfRegex = @"ㅂ"; + public const string DurationRelativeDurationUnitRegex = @"(지난|저번|작(?=년))"; + public const string AgoLaterRegex = @"^[.]"; + public const string DurationDuringRegex = @"(동안)"; + public const string DurationSomeRegex = @"^\b$"; + public const string DurationMoreOrLessRegex = @"(더|이상|이하|초과|미만)"; + public static readonly string DurationYearRegex = $@"(\d+|{ZeroToNineIntegerRegexCJK})\s*년\s*간"; + public const string DurationHalfSuffixRegex = @"반"; + public static readonly Dictionary DurationSuffixList = new Dictionary + { + { @"M", @"분" }, + { @"S", @"초" }, + { @"H", @"시|시간" }, + { @"D", @"일|칠|날" }, + { @"BD", @"영업일 기준으로" }, + { @"QD", @"한나절" }, + { @"W", @"주|주일" }, + { @"MON", @"개월|월|달" }, + { @"Y", @"년" }, + { @"P1D", @"하루" }, + { @"P2D", @"이틀" }, + { @"P3D", @"사흘" }, + { @"P4D", @"나흘" }, + { @"P5D", @"닷새" }, + { @"P6D", @"엿새" }, + { @"P7D", @"이레" }, + { @"P8D", @"여드레" }, + { @"P9D", @"아흐레" }, + { @"P10D", @"열흘" } + }; + public static readonly IList DurationAmbiguousUnits = new List + { + @"분", + @"초", + @"시", + @"시간", + @"일", + @"주", + @"주일", + @"달", + @"월", + @"년", + @"시" + }; + public const string DurationUnitRegex = @"(?(년|개?월|달|주일?|(?\s*그리고\s*|\s+|,\s*)"; + public const string ConnectorRegex = @"^\s*,\s*$"; + public static readonly string DurationMoreOrLessThanSurfix = $@"(?\s*(이상|이하|초과|미만))"; + public static readonly string LunarHolidayRegex = $@"(({YearRegex}|{DatePeriodYearInCJKRegex}|(?내년|금년|작년))(의)?\s)?(?섣달그믐날?|음력설|구정|추석|한가위|정월대보름|단오|석가탄신일)"; + public static readonly string HolidayRegexList1 = $@"(({YearRegex}|{DatePeriodYearInCJKRegex}|(?내년|금년|작년))(의)?\s)?(?새해|설날|양력설|신정|근로자의 날|만우절|크리스마스 이브|크리스마스|식목일|건국기념일|발렌타인데이|밸런타인데이|스승의 날|교사의 날|어린이날|국제 여성의 날|세계 여성의 날|삼일절|3.1절|3·1절|현충일|광복절|개천절|한글날|기독탄신일)"; + public static readonly string HolidayRegexList2 = $@"(({YearRegex}|{DatePeriodYearInCJKRegex}|(?내년|금년|작년))(의)?\s)?(?추수감사절|할로윈|제헌절|국군의 날|유엔의 날|아버지의 날|클린 먼데이|마틴 루터 킹 데이|메이데이|부활절|국제 노동자의 날)"; + public const string SetUnitRegex = @"(?년|월|달|주일?|일|시간|시|분|초)"; + public static readonly string SetEachUnitRegex = $@"(?{SetUnitRegex}\s?(마다))"; + public const string SetEachPrefixRegex = @"(?(매)\s*$)"; + public const string SetEachSuffixRegex = @"^[.]"; + public const string SetLastRegex = @"(?last|this|next)"; + public const string SetEachDayRegex = @"(每|每一)(天|日)\s*$"; + public const string SetEachDateUnitRegex = @"^[.]"; + public const string TimeHourNumRegex = @"(00|01|02|03|04|05|06|07|08|09|0|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|1|2|3|4|5|6|7|8|9)"; + public const string TimeMinuteNumRegex = @"(00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|0|1|2|3|4|5|6|7|8|9)"; + public const string TimeSecondNumRegex = @"(00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|0|1|2|3|4|5|6|7|8|9)"; + public const string TimeHourCJKRegex = @"([零〇一二两三四五六七八九]|二十[一二三四]?|十[一二三四五六七八九]?)"; + public const string TimeMinuteCJKRegex = @"([二三四五]?十[一二三四五六七八九]?|六十|[零〇一二三四五六七八九])"; + public static readonly string TimeSecondCJKRegex = $@"{TimeMinuteCJKRegex}"; + public const string TimeClockDescRegex = @"(点\s*整|点\s*钟|点|时)"; + public const string TimeMinuteDescRegex = @"(分钟|分|)"; + public const string TimeSecondDescRegex = @"(秒钟|秒)"; + public const string TimeBanHourPrefixRegex = @"(第)"; + public static readonly string TimeHourRegex = $@"(?{TimeHourCJKRegex}|{TimeHourNumRegex}){TimeClockDescRegex}"; + public static readonly string TimeMinuteRegex = $@"(?{TimeMinuteCJKRegex}|{TimeMinuteNumRegex}){TimeMinuteDescRegex}"; + public static readonly string TimeSecondRegex = $@"(?{TimeSecondCJKRegex}|{TimeSecondNumRegex}){TimeSecondDescRegex}"; + public const string TimeHalfRegex = @"(?过半|半)"; + public const string TimeQuarterRegex = @"(?[一两二三四1-4])\s*(刻钟|刻)"; + public static readonly string TimeCJKTimeRegex = $@"{TimeHourRegex}({TimeQuarterRegex}|{TimeHalfRegex}|((过|又)?{TimeMinuteRegex})({TimeSecondRegex})?)?"; + public static readonly string TimeDigitTimeRegex = $@"(?{TimeHourNumRegex}):(?{TimeMinuteNumRegex})(:(?{TimeSecondNumRegex}))?"; + public const string TimeDayDescRegex = @"(?凌晨|清晨|早上|早间|早|上午|中午|下午|午后|晚上|夜里|夜晚|半夜|午夜|夜间|深夜|傍晚|晚)"; + public const string TimeApproximateDescPreffixRegex = @"(大[约概]|差不多|可能|也许|约|不超过|不多[于过]|最[多长少]|少于|[超短长多]过|几乎要|将近|差点|快要|接近|至少|起码|超出|不到)"; + public const string TimeApproximateDescSuffixRegex = @"(左右)"; + public static readonly string TimeRegexes1 = $@"{TimeApproximateDescPreffixRegex}?{TimeDayDescRegex}?{TimeCJKTimeRegex}{TimeApproximateDescSuffixRegex}?"; + public static readonly string TimeRegexes2 = $@"{TimeApproximateDescPreffixRegex}?{TimeDayDescRegex}?{TimeDigitTimeRegex}{TimeApproximateDescSuffixRegex}?(\s*{AmPmDescRegex}?)"; + public static readonly string TimeRegexes3 = $@"差{TimeMinuteRegex}{TimeCJKTimeRegex}"; + public const string TimePeriodTimePeriodConnectWords = @"(起|至|到|–|-|—|~|~)"; + public static readonly string TimePeriodLeftCJKTimeRegex = $@"(从)?(?{TimeDayDescRegex}?({TimeCJKTimeRegex}))"; + public static readonly string TimePeriodRightCJKTimeRegex = $@"{TimePeriodTimePeriodConnectWords}(?{TimeDayDescRegex}?{TimeCJKTimeRegex})(之间)?"; + public static readonly string TimePeriodLeftDigitTimeRegex = $@"(从)?(?{TimeDayDescRegex}?({TimeDigitTimeRegex}))"; + public static readonly string TimePeriodRightDigitTimeRegex = $@"{TimePeriodTimePeriodConnectWords}(?{TimeDayDescRegex}?{TimeDigitTimeRegex})(之间)?"; + public static readonly string TimePeriodShortLeftCJKTimeRegex = $@"(从)?(?{TimeDayDescRegex}?({TimeHourCJKRegex}))"; + public static readonly string TimePeriodShortLeftDigitTimeRegex = $@"(从)?(?{TimeDayDescRegex}?({TimeHourNumRegex}))"; + public static readonly string TimePeriodRegexes1 = $@"({TimePeriodLeftDigitTimeRegex}{TimePeriodRightDigitTimeRegex}|{TimePeriodLeftCJKTimeRegex}{TimePeriodRightCJKTimeRegex})"; + public static readonly string TimePeriodRegexes2 = $@"({TimePeriodShortLeftDigitTimeRegex}{TimePeriodRightDigitTimeRegex}|{TimePeriodShortLeftCJKTimeRegex}{TimePeriodRightCJKTimeRegex})"; + public const string FromToRegex = @"(으?로?부터|과|에서).+(까지).+"; + public const string AmbiguousRangeModifierPrefix = @"(从|自)"; + public const string ReferenceDatePeriodRegex = @"^[.]"; + public const string UnspecificDatePeriodRegex = @"^[.]"; + public const string ParserConfigurationBefore = @"((?和|或|及)?(之前|以前)|前)"; + public const string ParserConfigurationAfter = @"((?和|或|及)?(之后|之後|以后|以後)|后|後)"; + public const string ParserConfigurationUntil = @"(直到|直至|截至|截止(到)?)"; + public const string ParserConfigurationSincePrefix = @"(自从|自|自打|打|从)"; + public const string ParserConfigurationAroundPrefix = @"^[.]"; + public const string ParserConfigurationAroundSuffix = @"^[.]"; + public const string ParserConfigurationSinceSuffix = @"(以来|开始|起)"; + public const string ParserConfigurationLastWeekDayRegex = @"最后一个"; + public const string ParserConfigurationNextMonthRegex = @"下一个"; + public const string ParserConfigurationLastMonthRegex = @"上一个"; + public const string ParserConfigurationDatePrefix = @" "; + public static readonly Dictionary ParserConfigurationUnitMap = new Dictionary + { + { @"년", @"Y" }, + { @"월", @"MON" }, + { @"달", @"MON" }, + { @"일", @"D" }, + { @"날", @"D" }, + { @"칠", @"D" }, + { @"영업일 기준으로", @"BD" }, + { @"한나절", @"QD" }, + { @"주", @"W" }, + { @"주일", @"W" }, + { @"시", @"H" }, + { @"시간", @"H" }, + { @"분", @"M" }, + { @"초", @"S" }, + { @"하루", @"P1D" }, + { @"이틀", @"P2D" }, + { @"사흘", @"P3D" }, + { @"나흘", @"P4D" }, + { @"닷새", @"P5D" }, + { @"엿새", @"P6D" }, + { @"이레", @"P7D" }, + { @"여드레", @"P8D" }, + { @"아흐레", @"P9D" }, + { @"열흘", @"P10D" }, + { @"종일", @"whole" }, + { @"내내", @"whole" }, + { @"몇", @"some" }, + { @"여러", @"some" }, + { @"더", @"more" }, + { @"이상", @"more" }, + { @"이하", @"less" }, + { @"초과", @"more" }, + { @"미만", @"less" } + }; + public static readonly Dictionary ParserConfigurationUnitValueMap = new Dictionary + { + { @"years", 31536000 }, + { @"year", 31536000 }, + { @"months", 2592000 }, + { @"month", 2592000 }, + { @"weeks", 604800 }, + { @"week", 604800 }, + { @"days", 86400 }, + { @"day", 86400 }, + { @"hours", 3600 }, + { @"hour", 3600 }, + { @"hrs", 3600 }, + { @"hr", 3600 }, + { @"h", 3600 }, + { @"minutes", 60 }, + { @"minute", 60 }, + { @"mins", 60 }, + { @"min", 60 }, + { @"seconds", 1 }, + { @"second", 1 }, + { @"secs", 1 }, + { @"sec", 1 } + }; + public static readonly IList MonthTerms = new List + { + @"월", + @"달" + }; + public static readonly IList WeekendTerms = new List + { + @"주말" + }; + public static readonly IList WeekTerms = new List + { + @"주", + @"주일" + }; + public static readonly IList YearTerms = new List + { + @"년" + }; + public static readonly IList ThisYearTerms = new List + { + @"금년", + @"올해" + }; + public static readonly IList YearToDateTerms = new List + { + @"올해 초부터 현재까지" + }; + public static readonly IList LastYearTerms = new List + { + @"작년" + }; + public static readonly IList NextYearTerms = new List + { + @"내년" + }; + public static readonly IList YearAfterNextTerms = new List + { + @"내후년" + }; + public static readonly IList YearBeforeLastTerms = new List + { + @"재작년" + }; + public static readonly Dictionary ParserConfigurationSeasonMap = new Dictionary + { + { @"봄", @"SP" }, + { @"여름", @"SU" }, + { @"가을", @"FA" }, + { @"겨울", @"WI" } + }; + public static readonly Dictionary ParserConfigurationSeasonValueMap = new Dictionary + { + { @"SP", 3 }, + { @"SU", 6 }, + { @"FA", 9 }, + { @"WI", 12 } + }; + public static readonly Dictionary ParserConfigurationCardinalMap = new Dictionary + { + { @"일", 1 }, + { @"이", 2 }, + { @"삼", 3 }, + { @"사", 4 }, + { @"오", 5 }, + { @"1", 1 }, + { @"2", 2 }, + { @"3", 3 }, + { @"4", 4 }, + { @"5", 5 }, + { @"첫 번째", 1 }, + { @"두 번째", 2 }, + { @"세 번째", 3 }, + { @"네 번째", 4 }, + { @"다섯 번째", 5 }, + { @"첫째", 1 }, + { @"둘째", 2 }, + { @"셋째", 3 }, + { @"넷째", 4 }, + { @"다섯째", 5 }, + { @"마지막", 5 } + }; + public static readonly Dictionary ParserConfigurationDayOfMonth = new Dictionary + { + { @"01", 1 }, + { @"02", 2 }, + { @"03", 3 }, + { @"04", 4 }, + { @"05", 5 }, + { @"06", 6 }, + { @"07", 7 }, + { @"08", 8 }, + { @"09", 9 }, + { @"1", 1 }, + { @"2", 2 }, + { @"3", 3 }, + { @"4", 4 }, + { @"5", 5 }, + { @"6", 6 }, + { @"7", 7 }, + { @"8", 8 }, + { @"9", 9 }, + { @"10", 10 }, + { @"11", 11 }, + { @"12", 12 }, + { @"13", 13 }, + { @"14", 14 }, + { @"15", 15 }, + { @"16", 16 }, + { @"17", 17 }, + { @"18", 18 }, + { @"19", 19 }, + { @"20", 20 }, + { @"21", 21 }, + { @"22", 22 }, + { @"23", 23 }, + { @"24", 24 }, + { @"25", 25 }, + { @"26", 26 }, + { @"27", 27 }, + { @"28", 28 }, + { @"29", 29 }, + { @"30", 30 }, + { @"31", 31 }, + { @"1일", 1 }, + { @"2일", 2 }, + { @"3일", 3 }, + { @"4일", 4 }, + { @"5일", 5 }, + { @"6일", 6 }, + { @"7일", 7 }, + { @"8일", 8 }, + { @"9일", 9 }, + { @"10일", 10 }, + { @"11일", 11 }, + { @"12일", 12 }, + { @"13일", 13 }, + { @"14일", 14 }, + { @"15일", 15 }, + { @"16일", 16 }, + { @"17일", 17 }, + { @"18일", 18 }, + { @"19일", 19 }, + { @"20일", 20 }, + { @"21일", 21 }, + { @"22일", 22 }, + { @"23일", 23 }, + { @"24일", 24 }, + { @"25일", 25 }, + { @"26일", 26 }, + { @"27일", 27 }, + { @"28일", 28 }, + { @"29일", 29 }, + { @"30일", 30 }, + { @"31일", 31 }, + { @"일일", 1 }, + { @"십일일", 11 }, + { @"이십일", 21 }, + { @"십일", 11 }, + { @"이십일일", 21 }, + { @"삼십일일", 31 }, + { @"이일", 2 }, + { @"삼일", 3 }, + { @"사일", 4 }, + { @"오일", 5 }, + { @"육일", 6 }, + { @"칠일", 7 }, + { @"팔일", 8 }, + { @"구일", 9 }, + { @"십이일", 12 }, + { @"십삼일", 13 }, + { @"십사일", 14 }, + { @"십오일", 15 }, + { @"십육일", 16 }, + { @"십칠일", 17 }, + { @"십팔일", 18 }, + { @"십구일", 19 }, + { @"이십이일", 22 }, + { @"이십삼일", 23 }, + { @"이십사일", 24 }, + { @"이십오일", 25 }, + { @"이십육일", 26 }, + { @"이십칠일", 27 }, + { @"이십팔일", 28 }, + { @"이십구일", 29 }, + { @"삼십일", 31 }, + { @"초하루", 32 }, + { @"1번", 1 }, + { @"2번", 2 }, + { @"3번", 3 }, + { @"4번", 4 }, + { @"5번", 5 }, + { @"6번", 6 }, + { @"7번", 7 }, + { @"8번", 8 }, + { @"9번", 9 }, + { @"10번", 10 }, + { @"11번", 11 }, + { @"12번", 12 }, + { @"13번", 13 }, + { @"14번", 14 }, + { @"15번", 15 }, + { @"16번", 16 }, + { @"17번", 17 }, + { @"18번", 18 }, + { @"19번", 19 }, + { @"20번", 20 }, + { @"21번", 21 }, + { @"22번", 22 }, + { @"23번", 23 }, + { @"24번", 24 }, + { @"25번", 25 }, + { @"26번", 26 }, + { @"27번", 27 }, + { @"28번", 28 }, + { @"29번", 29 }, + { @"30번", 30 }, + { @"31번", 31 }, + { @"일번", 1 }, + { @"십일번", 11 }, + { @"이십번", 20 }, + { @"십번", 10 }, + { @"이십일번", 21 }, + { @"삼십일번", 31 }, + { @"이번", 2 }, + { @"삼번", 3 }, + { @"사번", 4 }, + { @"오번", 5 }, + { @"육번", 6 }, + { @"칠번", 7 }, + { @"팔번", 8 }, + { @"구번", 9 }, + { @"십이번", 12 }, + { @"십삼번", 13 }, + { @"십사번", 14 }, + { @"십오번", 15 }, + { @"십육번", 16 }, + { @"십칠번", 17 }, + { @"십팔번", 18 }, + { @"십구번", 19 }, + { @"이십이번", 22 }, + { @"이십삼번", 23 }, + { @"이십사번", 24 }, + { @"이십오번", 25 }, + { @"이십육번", 26 }, + { @"이십칠번", 27 }, + { @"이십팔번", 28 }, + { @"이십구번", 29 }, + { @"삼십번", 30 }, + { @"삼십", 30 }, + { @"일", 1 }, + { @"이십", 20 }, + { @"십", 10 }, + { @"이", 2 }, + { @"삼", 3 }, + { @"사", 4 }, + { @"오", 5 }, + { @"육", 6 }, + { @"칠", 7 }, + { @"팔", 8 }, + { @"구", 9 }, + { @"십이", 12 }, + { @"십삼", 13 }, + { @"십사", 14 }, + { @"십오", 15 }, + { @"십육", 16 }, + { @"십칠", 17 }, + { @"십팔", 18 }, + { @"십구", 19 }, + { @"이십이", 22 }, + { @"이십삼", 23 }, + { @"이십사", 24 }, + { @"이십오", 25 }, + { @"이십육", 26 }, + { @"이십칠", 27 }, + { @"이십팔", 28 }, + { @"이십구", 29 } + }; + public static readonly Dictionary ParserConfigurationDayOfWeek = new Dictionary + { + { @"월요일", 1 }, + { @"화요일", 2 }, + { @"수요일", 3 }, + { @"목요일", 4 }, + { @"금요일", 5 }, + { @"토요일", 6 }, + { @"일요일", 0 } + }; + public static readonly Dictionary ParserConfigurationMonthOfYear = new Dictionary + { + { @"1", 1 }, + { @"2", 2 }, + { @"3", 3 }, + { @"4", 4 }, + { @"5", 5 }, + { @"6", 6 }, + { @"7", 7 }, + { @"8", 8 }, + { @"9", 9 }, + { @"10", 10 }, + { @"11", 11 }, + { @"12", 12 }, + { @"01", 1 }, + { @"02", 2 }, + { @"03", 3 }, + { @"04", 4 }, + { @"05", 5 }, + { @"06", 6 }, + { @"07", 7 }, + { @"08", 8 }, + { @"09", 9 }, + { @"한", 1 }, + { @"두", 2 }, + { @"세", 3 }, + { @"네", 4 }, + { @"다섯", 5 }, + { @"여섯", 6 }, + { @"일곱", 7 }, + { @"여덟", 8 }, + { @"아홉", 9 }, + { @"열", 10 }, + { @"얼한", 11 }, + { @"열두", 12 }, + { @"일월", 1 }, + { @"이월", 2 }, + { @"삼월", 3 }, + { @"사월", 4 }, + { @"오월", 5 }, + { @"유월", 6 }, + { @"육월", 6 }, + { @"칠월", 7 }, + { @"팔월", 8 }, + { @"구월", 9 }, + { @"시월", 10 }, + { @"십월", 10 }, + { @"십일월", 11 }, + { @"십이월", 12 }, + { @"1월", 1 }, + { @"2월", 2 }, + { @"3월", 3 }, + { @"4월", 4 }, + { @"5월", 5 }, + { @"6월", 6 }, + { @"7월", 7 }, + { @"8월", 8 }, + { @"9월", 9 }, + { @"10월", 10 }, + { @"11월", 11 }, + { @"12월", 12 }, + { @"01월", 1 }, + { @"02월", 2 }, + { @"03월", 3 }, + { @"04월", 4 }, + { @"05월", 5 }, + { @"06월", 6 }, + { @"07월", 7 }, + { @"08월", 8 }, + { @"09월", 9 }, + { @"1개월", 1 }, + { @"2개월", 2 }, + { @"3개월", 3 }, + { @"4개월", 4 }, + { @"5개월", 5 }, + { @"6개월", 6 }, + { @"7개월", 7 }, + { @"8개월", 8 }, + { @"9개월", 9 }, + { @"10개월", 10 }, + { @"11개월", 11 }, + { @"12개월", 12 }, + { @"새해", 13 } + }; + public const string DateTimeSimpleAmRegex = @"(?早|晨)"; + public const string DateTimeSimplePmRegex = @"(?晚)"; + public const string DateTimePeriodMORegex = @"(凌晨|清晨|早上|早间|早|上午)"; + public const string DateTimePeriodMIRegex = @"(中午)"; + public const string DateTimePeriodAFRegex = @"(下午|午后|傍晚)"; + public const string DateTimePeriodEVRegex = @"(晚上|夜里|夜晚|晚)"; + public const string DateTimePeriodNIRegex = @"(半夜|夜间|深夜)"; + public static readonly Dictionary AmbiguityTimeFiltersDict = new Dictionary + { + { @"^[.]", @"^[.]" } + }; + public static readonly Dictionary AmbiguityTimePeriodFiltersDict = new Dictionary + { + { @"^[.]", @"^[.]" } + }; + public static readonly Dictionary AmbiguityDateFiltersDict = new Dictionary + { + { @"^[.]", @"^[.]" } + }; + public static readonly Dictionary AmbiguityDateTimeFiltersDict = new Dictionary + { + { @"^[.]", @"^[.]" } + }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"早", @"(? DurationUnitValueMap = new Dictionary + { + { @"Y", 31536000 }, + { @"MON", 2592000 }, + { @"W", 604800 }, + { @"D", 86400 }, + { @"BD", 5 }, + { @"QD", 21600 }, + { @"H", 3600 }, + { @"M", 60 }, + { @"S", 1 }, + { @"P1D", 86400 }, + { @"P2D", 172800 }, + { @"P3D", 259200 }, + { @"P4D", 345600 }, + { @"P5D", 432000 }, + { @"P6D", 518400 }, + { @"P7D", 604800 }, + { @"P8D", 691200 }, + { @"P9D", 777600 }, + { @"P10D", 864000 }, + { @"whole", 1 }, + { @"some", 2 }, + { @"more", 3 }, + { @"less", 4 } + }; + public static readonly Dictionary HolidayNoFixedTimex = new Dictionary + { + { @"父亲节", @"-06-WXX-6-3" }, + { @"母亲节", @"-05-WXX-7-2" }, + { @"感恩节", @"-11-WXX-4-4" } + }; + public const string MergedBeforeRegex = @"(이?전)$"; + public const string MergedAfterRegex = @"((이?후)|뒤)$"; + public static readonly Dictionary TimeNumberDictionary = new Dictionary + { + { '영', 0 }, + { '일', 1 }, + { '이', 2 }, + { '삼', 3 }, + { '사', 4 }, + { '오', 5 }, + { '육', 6 }, + { '칠', 7 }, + { '팔', 8 }, + { '구', 9 }, + { '공', 0 }, + { '십', 10 }, + { '한', 1 }, + { '두', 2 }, + { '세', 3 }, + { '네', 4 }, + { '열', 10 } + }; + public static readonly Dictionary TimeLowBoundDesc = new Dictionary + { + { @"오전", 11 }, + { @"정오", 12 }, + { @"오후", 13 }, + { @"午后", 12 }, + { @"晚上", 18 }, + { @"夜里", 18 }, + { @"夜晚", 18 }, + { @"夜间", 18 }, + { @"深夜", 18 }, + { @"傍晚", 18 }, + { @"晚", 18 }, + { @"pm", 12 } + }; + public const string DefaultLanguageFallback = @"YMD"; + public static readonly IList MorningTermList = new List + { + @"早", + @"上午", + @"早间", + @"早上", + @"清晨" + }; + public static readonly IList MidDayTermList = new List + { + @"中午", + @"正午" + }; + public static readonly IList AfternoonTermList = new List + { + @"下午", + @"午后" + }; + public static readonly IList EveningTermList = new List + { + @"晚", + @"晚上", + @"夜里", + @"傍晚", + @"夜晚" + }; + public static readonly IList DaytimeTermList = new List + { + @"白天", + @"日间" + }; + public static readonly IList NightTermList = new List + { + @"深夜" + }; + public static readonly Dictionary DynastyYearMap = new Dictionary + { + { @"贞观", 627 }, + { @"开元", 713 }, + { @"神龙", 705 }, + { @"洪武", 1368 }, + { @"建文", 1399 }, + { @"永乐", 1403 }, + { @"景泰", 1450 }, + { @"天顺", 1457 }, + { @"成化", 1465 }, + { @"嘉靖", 1522 }, + { @"万历", 1573 }, + { @"崇祯", 1628 }, + { @"顺治", 1644 }, + { @"康熙", 1662 }, + { @"雍正", 1723 }, + { @"乾隆", 1736 }, + { @"嘉庆", 1796 }, + { @"道光", 1821 }, + { @"咸丰", 1851 }, + { @"同治", 1862 }, + { @"光绪", 1875 }, + { @"宣统", 1909 }, + { @"民国", 1912 } + }; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Korean/DateTimeDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Korean/DateTimeDefinitions.tt new file mode 100644 index 0000000000..8e3209daa0 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Korean/DateTimeDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Korean\Korean-DateTime.yaml"; + this.Language = "Korean"; + this.ClassName = "DateTimeDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Korean/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Korean/NumbersDefinitions.cs index b5d15502e1..33ef3241df 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Korean/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Korean/NumbersDefinitions.cs @@ -29,7 +29,7 @@ public static class NumbersDefinitions public const char NonDecimalSeparatorChar = ' '; public const string HalfADozenText = @""; public const string WordSeparatorToken = @""; - public const char ZeroChar = '?'; + public const char ZeroChar = '영'; public const char PairChar = '?'; public static readonly Dictionary RoundNumberMap = new Dictionary { @@ -53,14 +53,25 @@ public static class NumbersDefinitions { { '영', 0 }, { '일', 1 }, + { '하', 1 }, + { '한', 1 }, { '이', 2 }, + { '두', 2 }, { '삼', 3 }, + { '셋', 3 }, + { '세', 3 }, { '사', 4 }, + { '네', 4 }, { '오', 5 }, + { '다', 5 }, { '육', 6 }, + { '여', 6 }, { '칠', 7 }, + { '곱', 7 }, { '팔', 8 }, + { '덟', 8 }, { '구', 9 }, + { '홉', 9 }, { '빵', 0 }, { '0', 0 }, { '1', 1 }, @@ -108,13 +119,43 @@ public static class NumbersDefinitions }; public static readonly Dictionary UnitMap = new Dictionary { - { @" ", @"" }, - { @"몇", @"" } + { @"첫", @"일" }, + { @"처음", @"일" }, + { @"여섯", @"육" }, + { @"하나", @"일" }, + { @"둘", @"이" }, + { @"셋", @"삼" }, + { @"넷", @"사" }, + { @"다섯", @"오" }, + { @"일곱", @"칠" }, + { @"여덟", @"팔" }, + { @"아홉", @"구" }, + { @"스물", @"이십" }, + { @"서른", @"삼십" }, + { @"마흔", @"사십" }, + { @"쉰", @"오십" }, + { @"예순", @"육십" }, + { @"일흔", @"칠십" }, + { @"여든", @"팔십" }, + { @"아흔", @"구십" }, + { @"온", @"백" }, + { @"즈믄", @"천" }, + { @"다스", @"십이" }, + { @"이십오일", @"이십오" }, + { @"스무", @"이십" }, + { @"번째", @"" }, + { @"등", @"" }, + { @"이백십", @"백백십" }, + { @"삼백십", @"백백백십" }, + { @"십세", @"십" }, + { @" ", @"" } }; public static readonly IList RoundDirectList = new List { '빵', - '열' + '열', + '조', + '번' }; public static readonly IList TenChars = new List { @@ -122,44 +163,54 @@ public static class NumbersDefinitions }; public static readonly string DigitalNumberRegex = $@"((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; public const string ZeroToNineFullHalfRegex = @"[\d1234567890]"; - public static readonly string DigitNumRegex = $@"{ZeroToNineFullHalfRegex}+"; + public static readonly string DigitNumRegex = $@"{ZeroToNineFullHalfRegex}+|반"; public const string DozenRegex = @".*타$"; - public const string PercentageRegex = @"(?<=백\s*분\s*의).+|.+(?=퍼\s*센\s*트)|.*(?=[%%])"; - public static readonly string DoubleAndRoundRegex = $@"{ZeroToNineFullHalfRegex}+(\.{ZeroToNineFullHalfRegex}+)?\s*[만억]{{1,2}}(\s*(이상))?"; - public const string FracSplitRegex = @"[와|과]|분\s*의"; - public const string ZeroToNineIntegerRegex = @"[일이삼사오육륙칠팔구영공]"; + public const string PercentageRegex = @"(?<=백\s*분\s*의).+|.+(?=퍼\s*센\s*트*)|.*(?=[%%])|.+(?=프\s*로*)"; + public static readonly string DoubleAndRoundRegex = $@"{ZeroToNineFullHalfRegex}+(\.{ZeroToNineFullHalfRegex}+)?{RoundNumberIntegerRegex}{{1,2}}(\s*(이상))?"; + public const string FracSplitRegex = @"(와|과|분\s*의|중)"; + public const string ZeroToNineIntegerRegex = @"(영|령|공|(?((끝(에서)?\s*((세|두)번째)?(바로)?\s*(전의)?(\s*것)?)|뒤에서 세번째|다음(\s*것)?|이전 것|현재|(((마지막)((에)?\s*((서)?\s*(두번째|((바로)?\s*(것|전)))|의 옆))?)|지금)(의 것)?))"; + public static readonly string OrdinalNumbersRegex = $@"({ZeroToNineFullHalfRegex}+\s*(번째))|({ZeroToNineFullHalfRegex}+(?:일))"; + public static readonly string OrdinalKoreanRegex = $@"({OrdinalRegex}|{RelativeOrdinalRegex}|{OrdinalNumbersRegex})"; + public static readonly string AllFractionNumber = $@"(반)|({NegativeNumberTermsRegex}?((({ZeroToNineFullHalfRegex}+|{AllIntRegex}+|{ZeroToNineIntegerRegex}+|{RoundNumberIntegerRegex}+)\s*(와|과)\s*)+)?{NegativeNumberTermsRegex}?({ZeroToNineFullHalfRegex}+|{AllIntRegex}+|{ZeroToNineIntegerRegex}+|{RoundNumberIntegerRegex}+)\s*(분\s*의|중)\s*{NegativeNumberTermsRegex}?({ZeroToNineFullHalfRegex}+|{AllIntRegex}+|{ZeroToNineIntegerRegex}+|{RoundNumberIntegerRegex}+))"; + public static readonly string FractionNotationSpecialsCharsRegex = $@"(?)"; - public const string LessRegex = @"(미만|적|낮|작|더적|더낮|더적|<)"; - public const string EqualRegex = @"(동일|같|=)"; - public static readonly string MoreOrEqual = $@"(({MoreRegex}\s*(거나)?\s*{EqualRegex}))"; - public const string MoreOrEqualSuffix = @"\s*(이상)"; - public static readonly string LessOrEqual = $@"(({LessRegex}\s*(거나)?\s*{EqualRegex}))"; - public const string LessOrEqualSuffix = @"\s*(이상)"; + public static readonly string SimpleIntegerPercentageRegex = $@"(?=|>|(이\s)?넘는(다)?|초과이다|크고|(살)?이 넘는다|크고|보다 크다|보다 높(거나)?|(을 초과하는)|크(거나(\s같(다|고)?)?))((습)?니다|(아|네|군)?요)?"; + public const string LessRegex = @"(미만|마리|적|낮|작|더적|더낮|더적|<|아래|작다|같)((네|아|어|군)요|습니다|은|다)?"; + public const string EqualRegex = @"(동일|같(고)?|=|(해당하는)|작은|그와 같다)"; + public const string RangePrefixLessRegex = @"(최대|까지최소|(?|=)<|≤)"; + public const string RangePrefixMoreRegex = @"((?|≥|>=|개에서 최소)"; + public static readonly string MoreOrEqual = $@"(({MoreRegex}\s*(거나)?\s*{EqualRegex})|최소)"; + public static readonly string MoreOrEqual2 = $@"(\s*(거나)?\s*(그보다)\s*{MoreRegex})"; + public const string MoreOrEqualSuffix = @"\s*(이상|세 이상|개 이상|(과 같)?거나 그보다 많다|거나 그보다 크다)"; + public static readonly string LessOrEqual = $@"(?:(이|보다|과|≤)?)?\s*(({LessRegex}\s*(거나)?\s*{EqualRegex}(은|다)?)|≤)"; + public const string LessOrEqualSuffix = @"\s*((달하는|또는 그 미만|또는 그보다 적게|거나 그보다 작다|또는 그보다 작은|점 이하|이하|이하이다))"; + public const string OneNumberRangeMoreSeparateRegex = @"(>=|≥|과 같|이거나)"; + public const string OneNumberRangeLessSeparateRegex = @"같거나|약"; + public static readonly string OneNumberRangeEqualRegex = $@"((?((?!((\s(?!\d+))|(,(?!\d+))|。)).)+)\s*(과|에)+\s*{EqualRegex})(다|개)?|((?((?!((\s(?!\d+))|(,(?!\d+))|。)).)+)\s*(년에)\s*(\d+)(은|이다))|(정확히|같음|평등|그냥|저스트)(?\s*[+-]?(\d+[\.,]?\d+))"; + public static readonly string OneNumberRangeEqualRegex2 = $@"((?(((?!((\s(?!\d+))|(,(?!\d+))|。)))(?<=((는\s+))))([\d]|{AllIntRegex})+)(이다))"; + public static readonly string OneNumberRangeMoreRegex1 = $@"((?((?!(((?!\d+))|((,)(?!\d+))|。)).)+[\.,]?(((?!(((?!\d+))|((,)(?!\d+))|。)).)+)?)\s*(이|보다)+\s*(그|그보다)?\s*({MoreOrEqual}|{MoreRegex}|{MoreOrEqualSuffix})(은|다)?)"; + public static readonly string OneNumberRangeMoreRegex2 = $@"((?(((?!(((?!([십백천만억조경열]|(영|령|공|일|이(?!다)|두|삼|사|오|육|(?(스물|서른|마흔|쉰|예순|일흔|여든|아흔|온|즈믄|다스|스무|이십오일)|(첫|처음|여섯|하나|둘|셋|넷|다섯|여섯|일곱|여덟|아홉)+))((\s등\s)?보다)?(\s살이)?\s({MoreRegex})|(최소\s*)?(?((?!((、(?!\d+))|(、(?!\d+))|。))|(?!\s+).)+)\s*(((혹은\s*그)?){MoreRegex}|((혹은\s*그)?){MoreOrEqualSuffix})"; + public static readonly string OneNumberRangeMoreRegex3 = $@"(({RangePrefixMoreRegex}|{MoreOrEqual})\s*(?(((?!(((?!\d+))|((,)(?!\d+))|。)).)+)))|(최소\s*(?\s*[+-]?(\d+[\.,]?\d+)))"; + public static readonly string OneNumberRangeMoreRegex4 = $@"((?((?!((\s(?!\d+))|(,(?!\d+))|。)).)+)\s*(과|에)+\s*{EqualRegex}){MoreOrEqual2}(다)?"; + public static readonly string OneNumberRangeMoreRegex5 = $@"(?((?![,.](?!\d+)).)+)\s*((또는)\s+(그){MoreOrEqualSuffix})"; + public static readonly string OneNumberRangeMoreRegexFraction = $@"((?(((\d+)[//]).)+)(이)\s*{MoreRegex})"; + public static readonly string OneNumberRangeLessRegex1 = $@"((?((?!(((?!\d+))|((,)(?!\d+))|。)).)+)\s*({LessOrEqual}|{LessOrEqualSuffix}|{LessRegex}))|((?((?!(((?!\d+))|((,)(?!\d+))|。)).)+[\.,]?(((?!(((?!\d+))|((,)(?!\d+))|。)).)+)?)\s*(이|보다|거나|또는)+\s*(그|그보다)?\s*{LessRegex})|{OneNumberRangeLessRegex5}"; + public static readonly string TwoNumberRangeRegex = $@"({RangePrefixLessRegex})\s*(?(((?!((\s(?!\d+))|((,)(?!\d+))|。)).)+))\s*({RangePrefixMoreRegex}\s*(?\s*[+-]?(\d+[\.,]?\d+)))(개)?"; + public static readonly string OneNumberRangeLessRegex3 = $@"(?:({RangePrefixLessRegex}))\s*(?(((?!((\s(?!\d+))|((,)(?!\d+))|。)).)+))|((약)(?\s*[+-]?(\d+[\.,]?\d+))\s*(미만|개 이하가))"; + public static readonly string OneNumberRangeLessRegex4 = $@"(?(((?!((\s(?!\d+))|((,)(?!\d+))|。)).)+))\s*(에)(?:({LessOrEqualSuffix}))"; + public const string OneNumberRangeLessRegex5 = @"((?((이분의\s|약\s*)|(스물|서른|마흔|쉰|예순|일흔|여든|아흔|온|즈믄|다스|스무|이십오일)|(첫|처음|여섯|하나|둘|셋|넷|다섯|여섯|일곱|여덟|아홉)+|[영령공일이두삼사오육칠팔구]+|[십백천만억조경열]+)+)\s*(또는|등)?\s*(살이|그|그보다|(위)?보다(는)?)?\s*(낮다|낮은|미만|마리 미만|적게|밑|(개 )?이하|작다|작거나)(\s?같다?)?)"; + public static readonly string TwoNumberRangeRegex1 = $@"(?((?!((,(?!\d+))|(,(?!\d+))|。)).)+)(위?)\s*(과|와|{TillRegex})\s*(?((?!((,(?!\d+))|(,(?!\d+))|。)).)+)(위?)\s*(사이)"; + public static readonly string TwoNumberRangeRegex2 = $@"(({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\s*(과|지만|또는|,|、|,)?\s*({OneNumberRangeLessRegex1}))"; + public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeLessRegex1})\s*(과|또는|,|、|,)?\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})"; + public static readonly string TwoNumberRangeRegex4 = $@"(?((?!((,(?!\d+))|(,(?!\d+))|。)).)+)\s*{TillRegex}\s*(?((?!((,(?!\d+))|(,(?!\d+))|。)).)+)(까지)?"; + public static readonly string TwoNumberRangeRegex5 = $@"(?((마이너스\s?)?([십백천만억조경열]|(영|령|공|일|이(?!다)|두|삼|사|오|육|(?((마이너스\s?)?(([십백천만억조경열]|(영|령|공|일|이(?!다)|두|삼|사|오|육|(?((?!((,(?!\d+))|(,(?!\d+))|。|\D)).)+)\s*{TillRegex}+\s*(?((?!((,(?!\d+))|(,(?!\d+))|。)).)+)\s*(까지))"; + public static readonly string TwoNumberRangeRegex7 = $@"({OneNumberRangeMoreRegex2}\s*{OneNumberRangeLessRegex5})"; + public const string InexactNumberUnitRegex = @"(몇(?!.+\?)|며|여러)"; public static readonly Dictionary RelativeReferenceOffsetMap = new Dictionary { - { @"", @"" } + { @"마지막", @"0" }, + { @"다음", @"1" }, + { @"뒤에서 세번째", @"-2" }, + { @"마지막에서 두번째", @"-1" }, + { @"마지막의 옆", @"-1" }, + { @"마지막에서 바로 전의 것", @"-1" }, + { @"이전 것", @"-1" }, + { @"다음 것", @"1" }, + { @"마지막에서 바로 전", @"-1" }, + { @"지금의 것", @"0" }, + { @"현재", @"0" }, + { @"끝", @"0" }, + { @"끝에서 세번째", @"-2" }, + { @"끝에서 두번째", @"-1" }, + { @"끝에서 바로 전의 것", @"-1" } }; public static readonly Dictionary RelativeReferenceRelativeToMap = new Dictionary { - { @"", @"" } + { @"마지막", @"end" }, + { @"다음", @"current" }, + { @"뒤에서 세번째", @"end" }, + { @"마지막에서 두번째", @"end" }, + { @"마지막의 옆", @"end" }, + { @"마지막에서 바로 전의 것", @"end" }, + { @"이전 것", @"current" }, + { @"다음 것", @"current" }, + { @"마지막에서 바로 전", @"end" }, + { @"지금의 것", @"current" }, + { @"현재", @"current" }, + { @"끝", @"end" }, + { @"끝에서 세번째", @"end" }, + { @"끝에서 두번째", @"end" }, + { @"끝에서 바로 전의 것", @"end" } }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Korean/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Korean/NumbersWithUnitDefinitions.cs new file mode 100644 index 0000000000..02c1a4f94f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Korean/NumbersWithUnitDefinitions.cs @@ -0,0 +1,683 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Korean\Korean-NumbersWithUnit.yaml +// - Language: Korean +// - ClassName: NumbersWithUnitDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Korean +{ + using System; + using System.Collections.Generic; + + public static class NumbersWithUnitDefinitions + { + public static readonly IList AgeAmbiguousValues = new List + { + @"살", + @"세", + @"월", + @"개월" + }; + public static readonly Dictionary AgeSuffixList = new Dictionary + { + { @"Year", @"살|세" }, + { @"Month", @"개월" }, + { @"Week", @"" }, + { @"Day", @"" } + }; + public const string BuildPrefix = @""; + public const string BuildSuffix = @""; + public const string ConnectorToken = @""; + public const bool CheckFirstSuffix = true; + public static readonly Dictionary CurrencySuffixList = new Dictionary + { + { @"Afghan afghani", @"아프가니" }, + { @"Pul", @"普尔" }, + { @"Euro", @"유로" }, + { @"Cent", @"센트" }, + { @"Albanian lek", @"렉" }, + { @"Angolan kwanza", @"콴자" }, + { @"Armenian dram", @"아르메니아 드람" }, + { @"Aruban florin", @"아루반 플로린" }, + { @"Bangladeshi taka", @"타카" }, + { @"Paisa", @"파이사" }, + { @"Bhutanese ngultrum", @"눌트럼" }, + { @"Chetrum", @"切特鲁姆" }, + { @"Bolivian boliviano", @"볼리비아노" }, + { @"Bosnia and Herzegovina convertible mark", @"마커" }, + { @"Botswana pula", @"풀라" }, + { @"Thebe", @"thebe" }, + { @"Brazilian real", @"브라질리안 레알" }, + { @"Bulgarian lev", @"불가리안 레프" }, + { @"Stotinka", @"斯托丁卡" }, + { @"Cambodian riel", @"캄보디아 릴" }, + { @"Cape Verdean escudo", @"카보베르데 이스쿠두" }, + { @"Costa Rican colón", @"콜론" }, + { @"Croatian kuna", @"크로아티안 쿠나" }, + { @"Lipa", @"利巴" }, + { @"Eritrean nakfa", @"낙파" }, + { @"Ethiopian birr", @"에디오피안 비르" }, + { @"Gambian dalasi", @"달라시" }, + { @"Butut", @"부투트" }, + { @"Georgian lari", @"라리" }, + { @"Tetri", @"特特里|泰特里" }, + { @"Ghanaian cedi", @"세디" }, + { @"Pesewa", @"比塞瓦" }, + { @"Guatemalan quetzal", @"케트살" }, + { @"Haitian gourde", @"海地古德" }, + { @"Honduran lempira", @"洪都拉斯伦皮拉" }, + { @"Hungarian forint", @"匈牙利福林|匈牙利货币|匈牙利福林币" }, + { @"Iranian rial", @"伊朗里亚尔|伊朗莱尔" }, + { @"Yemeni rial", @"叶门莱尔|叶门里亚尔" }, + { @"Israeli new shekel", @"₪|ils|以色列币|以色列新克尔|谢克尔" }, + { @"Japanese yen", @"엔|옌" }, + { @"Sen", @"日本銭" }, + { @"Kazakhstani tenge", @"텡게" }, + { @"Kenyan shilling", @"케냐 실링" }, + { @"North Korean won", @"북한 원" }, + { @"South Korean won", @"원" }, + { @"Korean won", @"₩" }, + { @"Kyrgyzstani som", @"섬" }, + { @"Lao kip", @"킵" }, + { @"Att", @"att" }, + { @"Lesotho loti", @"로티" }, + { @"South African rand", @"란드" }, + { @"Macedonian denar", @"데나르" }, + { @"Deni", @"第尼" }, + { @"Malagasy ariary", @"마다가스카르 아리아" }, + { @"Iraimbilanja", @"伊莱姆比拉贾" }, + { @"Malawian kwacha", @"콰차" }, + { @"Tambala", @"坦巴拉" }, + { @"Malaysian ringgit", @"말레이시아 링겟" }, + { @"Mauritanian ouguiya", @"우기야" }, + { @"Khoums", @"库姆斯" }, + { @"Mozambican metical", @"메티칼" }, + { @"Burmese kyat", @"차트" }, + { @"Pya", @"缅分" }, + { @"Nigerian naira", @"나이라" }, + { @"Kobo", @"考包" }, + { @"Turkish lira", @"터키리라" }, + { @"Kuruş", @"库鲁" }, + { @"Omani rial", @"리알 오마니" }, + { @"Panamanian balboa", @"발보아" }, + { @"Centesimo", @"意大利分|乌拉圭分|巴拿马分" }, + { @"Papua New Guinean kina", @"키나" }, + { @"Toea", @"托亚|托伊" }, + { @"Peruvian sol", @"솔" }, + { @"Polish złoty", @"P즐로티" }, + { @"Grosz", @"格罗希" }, + { @"Qatari riyal", @"카타르 리알" }, + { @"Saudi riyal", @"沙特里亚尔" }, + { @"Riyal", @"里亚尔" }, + { @"Dirham", @"迪拉姆" }, + { @"Halala", @"哈拉" }, + { @"Samoan tālā", @"탈라" }, + { @"Sierra Leonean leone", @"리온" }, + { @"Peseta", @"페세타" }, + { @"Swazi lilangeni", @"릴랑게니" }, + { @"Tajikistani somoni", @"소모니" }, + { @"Thai baht", @"바트" }, + { @"Satang", @"萨当" }, + { @"Tongan paʻanga", @"파앙가" }, + { @"Ukrainian hryvnia", @"흐리브냐" }, + { @"Vanuatu vatu", @"바투" }, + { @"Vietnamese dong", @"베트남 동" }, + { @"Indonesian rupiah", @"루피아" }, + { @"Netherlands guilder", @"네덜란드 휠던|네덜란드 길더" }, + { @"Surinam florin", @"苏里南盾" }, + { @"Guilder", @"盾" }, + { @"Zambian kwacha", @"赞比亚克瓦查" }, + { @"Moroccan dirham", @"모로코 디르함" }, + { @"United Arab Emirates dirham", @"아랍에미리트 디르함" }, + { @"Azerbaijani manat", @"아제르바이잔 마나트" }, + { @"Turkmenistan manat", @"투르크메니스탄 마나트" }, + { @"Manat", @"마나트" }, + { @"Somali shilling", @"소말리아 실링" }, + { @"Somaliland shilling", @"소말릴랜드 실링" }, + { @"Tanzanian shilling", @"탄자니아 실링" }, + { @"Ugandan shilling", @"우간다 실링" }, + { @"Romanian leu", @"루마니아 레우" }, + { @"Moldovan leu", @"몰도바 레우" }, + { @"Leu", @"레우" }, + { @"Ban", @"巴尼" }, + { @"Nepalese rupee", @"네팔 루피" }, + { @"Pakistani rupee", @"파키스탄 루피" }, + { @"Indian rupee", @"인디안 루피" }, + { @"Seychellois rupee", @"세이셜 루피" }, + { @"Mauritian rupee", @"모리셔스 루피" }, + { @"Maldivian rufiyaa", @"루피야" }, + { @"Sri Lankan rupee", @"스리랑카 루피" }, + { @"Rupee", @"루피" }, + { @"Czech koruna", @"체코 코루나" }, + { @"Danish krone", @"덴마크 크로네" }, + { @"Norwegian krone", @"노르웨이 크로네" }, + { @"Faroese króna", @"페로 제도 크로나" }, + { @"Icelandic króna", @"아이슬란드 크로나" }, + { @"Swedish krona", @"스웨덴 크로나" }, + { @"Krone", @"克朗" }, + { @"Øre", @"奥依拉|奥拉|埃利" }, + { @"West African CFA franc", @"非共体法郎" }, + { @"Central African CFA franc", @"中非法郎|中非金融合作法郎" }, + { @"Comorian franc", @"科摩罗法郎" }, + { @"Congolese franc", @"콩고 프랑" }, + { @"Burundian franc", @"브룬디 프랑" }, + { @"Djiboutian franc", @"지투비 프랑" }, + { @"CFP franc", @"CFP 프랑" }, + { @"Guinean franc", @"기나아 프랑" }, + { @"Swiss franc", @"스위스 프랑" }, + { @"Rwandan franc", @"르완다 프랑" }, + { @"Belgian franc", @"比利时法郎" }, + { @"Rappen", @"瑞士分|瑞士生丁" }, + { @"Franc", @"프랑" }, + { @"Centime", @"生丁|仙士" }, + { @"Russian ruble", @"러시아 루블" }, + { @"Transnistrian ruble", @"德涅斯特卢布" }, + { @"Belarusian ruble", @"벨라루시안 루블" }, + { @"Kopek", @"戈比" }, + { @"Ruble", @"루블" }, + { @"Algerian dinar", @"알제르 디나르" }, + { @"Bahraini dinar", @"바레인 디나르" }, + { @"Iraqi dinar", @"이라크 디나르" }, + { @"Jordanian dinar", @"요르단 디나르" }, + { @"Kuwaiti dinar", @"쿠웨이트 디나르" }, + { @"Libyan dinar", @"리비야 디나르" }, + { @"Serbian dinar", @"세르비아 디나르" }, + { @"Tunisian dinar", @"튀니지 디나르" }, + { @"Dinar", @"디나르" }, + { @"Fils", @"费尔" }, + { @"Para", @"帕拉" }, + { @"Millime", @"米利姆" }, + { @"Argentine peso", @"아르헨티나 페소" }, + { @"Chilean peso", @"칠레니아 페소" }, + { @"Colombian peso", @"콜롬비안 페소" }, + { @"Cuban peso", @"쿠반 페소" }, + { @"Dominican peso", @"도미니칸 페소" }, + { @"Mexican peso", @"멕시코 페소" }, + { @"Philippine peso", @"필리핀 페소" }, + { @"Uruguayan peso", @"우루과이 페소" }, + { @"Peso", @"페소" }, + { @"Centavo", @"仙|菲辅币" }, + { @"Alderney pound", @"奥尔德尼镑" }, + { @"British pound", @"영국 파운드" }, + { @"Guernsey pound", @"건지섬 파운드" }, + { @"Saint Helena pound", @"세인트 헬레나 파운드" }, + { @"Egyptian pound", @"이집트 파운드" }, + { @"Falkland Islands pound", @"포크랜드 파운드" }, + { @"Gibraltar pound", @"지브롤터 파운드" }, + { @"Manx pound", @"맨섬 파운드" }, + { @"Jersey pound", @"저지섬 파운드" }, + { @"Lebanese pound", @"레바논 파운드" }, + { @"South Sudanese pound", @"남수단 파운드" }, + { @"Sudanese pound", @"수단 파운드" }, + { @"Syrian pound", @"시리아 파운드" }, + { @"Pound", @"파운드" }, + { @"Pence", @"펜스" }, + { @"Shilling", @"先令" }, + { @"Penny", @"페니" }, + { @"United States dollar", @"US 달러|미국 달러" }, + { @"East Caribbean dollar", @"동카리브 달러" }, + { @"Australian dollar", @"오스트레일리아 달러|호주 달러" }, + { @"Bahamian dollar", @"바하미안 달러" }, + { @"Barbadian dollar", @"바베이도스 달러" }, + { @"Belize dollar", @"벨리즈 달러" }, + { @"Bermudian dollar", @"버뮤디안 달러" }, + { @"Brunei dollar", @"브루나이 달러" }, + { @"Singapore dollar", @"싱가폴 달러" }, + { @"Canadian dollar", @"캐나다 달러" }, + { @"Cayman Islands dollar", @"케이맨제도 달러" }, + { @"New Zealand dollar", @"뉴질랜드 달러" }, + { @"Cook Islands dollar", @"쿡 제도 달러" }, + { @"Fijian dollar", @"피지 달러" }, + { @"Guyanese dollar", @"과야나 달러" }, + { @"Hong Kong dollar", @"홍콩 달러" }, + { @"Macau Pataca", @"파타카" }, + { @"New Taiwan dollar", @"신 대만 달러" }, + { @"Jamaican dollar", @"자메이칸 달러" }, + { @"Kiribati dollar", @"키리바시 달러" }, + { @"Liberian dollar", @"리베리아 달러" }, + { @"Namibian dollar", @"나미비야 달러" }, + { @"Surinamese dollar", @"수리단 달러" }, + { @"Trinidad and Tobago dollar", @"드리니아드 토바고 달러" }, + { @"Tuvaluan dollar", @"투발루 달러" }, + { @"Dollar", @"달러" }, + { @"Chinese yuan", @"위안|중국 위안" }, + { @"Fen", @"分钱|分" }, + { @"Jiao", @"毛钱|毛|角钱|角" }, + { @"Finnish markka", @"핀란드 마르카" }, + { @"Penni", @"盆尼" } + }; + public static readonly Dictionary CurrencyNameToIsoCodeMap = new Dictionary + { + { @"Afghan afghani", @"AFN" }, + { @"Euro", @"EUR" }, + { @"Albanian lek", @"ALL" }, + { @"Angolan kwanza", @"AOA" }, + { @"Armenian dram", @"AMD" }, + { @"Aruban florin", @"AWG" }, + { @"Bangladeshi taka", @"BDT" }, + { @"Bhutanese ngultrum", @"BTN" }, + { @"Bolivian boliviano", @"BOB" }, + { @"Bosnia and Herzegovina convertible mark", @"BAM" }, + { @"Botswana pula", @"BWP" }, + { @"Brazilian real", @"BRL" }, + { @"Bulgarian lev", @"BGN" }, + { @"Cambodian riel", @"KHR" }, + { @"Cape Verdean escudo", @"CVE" }, + { @"Costa Rican colón", @"CRC" }, + { @"Croatian kuna", @"HRK" }, + { @"Czech koruna", @"CZK" }, + { @"Eritrean nakfa", @"ERN" }, + { @"Ethiopian birr", @"ETB" }, + { @"Gambian dalasi", @"GMD" }, + { @"Georgian lari", @"GEL" }, + { @"Ghanaian cedi", @"GHS" }, + { @"Guatemalan quetzal", @"GTQ" }, + { @"Haitian gourde", @"HTG" }, + { @"Honduran lempira", @"HNL" }, + { @"Hungarian forint", @"HUF" }, + { @"Iranian rial", @"IRR" }, + { @"Yemeni rial", @"YER" }, + { @"Israeli new shekel", @"ILS" }, + { @"Japanese yen", @"JPY" }, + { @"Kazakhstani tenge", @"KZT" }, + { @"Kenyan shilling", @"KES" }, + { @"North Korean won", @"KPW" }, + { @"South Korean won", @"KRW" }, + { @"Kyrgyzstani som", @"KGS" }, + { @"Lao kip", @"LAK" }, + { @"Lesotho loti", @"LSL" }, + { @"South African rand", @"ZAR" }, + { @"Macanese pataca", @"MOP" }, + { @"Macedonian denar", @"MKD" }, + { @"Malagasy ariary", @"MGA" }, + { @"Malawian kwacha", @"MWK" }, + { @"Malaysian ringgit", @"MYR" }, + { @"Mauritanian ouguiya", @"MRO" }, + { @"Mongolian tögrög", @"MNT" }, + { @"Mozambican metical", @"MZN" }, + { @"Burmese kyat", @"MMK" }, + { @"Nicaraguan córdoba", @"NIO" }, + { @"Nigerian naira", @"NGN" }, + { @"Turkish lira", @"TRY" }, + { @"Omani rial", @"OMR" }, + { @"Panamanian balboa", @"PAB" }, + { @"Papua New Guinean kina", @"PGK" }, + { @"Paraguayan guaraní", @"PYG" }, + { @"Peruvian sol", @"PEN" }, + { @"Polish złoty", @"PLN" }, + { @"Qatari riyal", @"QAR" }, + { @"Saudi riyal", @"SAR" }, + { @"Samoan tālā", @"WST" }, + { @"São Tomé and Príncipe dobra", @"STD" }, + { @"Sierra Leonean leone", @"SLL" }, + { @"Swazi lilangeni", @"SZL" }, + { @"Tajikistani somoni", @"TJS" }, + { @"Thai baht", @"THB" }, + { @"Ukrainian hryvnia", @"UAH" }, + { @"Vanuatu vatu", @"VUV" }, + { @"Venezuelan bolívar", @"VEF" }, + { @"Zambian kwacha", @"ZMW" }, + { @"Moroccan dirham", @"MAD" }, + { @"United Arab Emirates dirham", @"AED" }, + { @"Azerbaijani manat", @"AZN" }, + { @"Turkmenistan manat", @"TMT" }, + { @"Somali shilling", @"SOS" }, + { @"Tanzanian shilling", @"TZS" }, + { @"Ugandan shilling", @"UGX" }, + { @"Romanian leu", @"RON" }, + { @"Moldovan leu", @"MDL" }, + { @"Nepalese rupee", @"NPR" }, + { @"Pakistani rupee", @"PKR" }, + { @"Indian rupee", @"INR" }, + { @"Seychellois rupee", @"SCR" }, + { @"Mauritian rupee", @"MUR" }, + { @"Maldivian rufiyaa", @"MVR" }, + { @"Sri Lankan rupee", @"LKR" }, + { @"Indonesian rupiah", @"IDR" }, + { @"Danish krone", @"DKK" }, + { @"Norwegian krone", @"NOK" }, + { @"Icelandic króna", @"ISK" }, + { @"Swedish krona", @"SEK" }, + { @"West African CFA franc", @"XOF" }, + { @"Central African CFA franc", @"XAF" }, + { @"Comorian franc", @"KMF" }, + { @"Congolese franc", @"CDF" }, + { @"Burundian franc", @"BIF" }, + { @"Djiboutian franc", @"DJF" }, + { @"CFP franc", @"XPF" }, + { @"Guinean franc", @"GNF" }, + { @"Swiss franc", @"CHF" }, + { @"Rwandan franc", @"RWF" }, + { @"Russian ruble", @"RUB" }, + { @"Transnistrian ruble", @"PRB" }, + { @"Belarusian ruble", @"BYN" }, + { @"Algerian dinar", @"DZD" }, + { @"Bahraini dinar", @"BHD" }, + { @"Iraqi dinar", @"IQD" }, + { @"Jordanian dinar", @"JOD" }, + { @"Kuwaiti dinar", @"KWD" }, + { @"Libyan dinar", @"LYD" }, + { @"Serbian dinar", @"RSD" }, + { @"Tunisian dinar", @"TND" }, + { @"Argentine peso", @"ARS" }, + { @"Chilean peso", @"CLP" }, + { @"Colombian peso", @"COP" }, + { @"Cuban convertible peso", @"CUC" }, + { @"Cuban peso", @"CUP" }, + { @"Dominican peso", @"DOP" }, + { @"Mexican peso", @"MXN" }, + { @"Uruguayan peso", @"UYU" }, + { @"British pound", @"GBP" }, + { @"Saint Helena pound", @"SHP" }, + { @"Egyptian pound", @"EGP" }, + { @"Falkland Islands pound", @"FKP" }, + { @"Gibraltar pound", @"GIP" }, + { @"Manx pound", @"IMP" }, + { @"Jersey pound", @"JEP" }, + { @"Lebanese pound", @"LBP" }, + { @"South Sudanese pound", @"SSP" }, + { @"Sudanese pound", @"SDG" }, + { @"Syrian pound", @"SYP" }, + { @"United States dollar", @"USD" }, + { @"Australian dollar", @"AUD" }, + { @"Bahamian dollar", @"BSD" }, + { @"Barbadian dollar", @"BBD" }, + { @"Belize dollar", @"BZD" }, + { @"Bermudian dollar", @"BMD" }, + { @"Brunei dollar", @"BND" }, + { @"Singapore dollar", @"SGD" }, + { @"Canadian dollar", @"CAD" }, + { @"Cayman Islands dollar", @"KYD" }, + { @"New Zealand dollar", @"NZD" }, + { @"Fijian dollar", @"FJD" }, + { @"Guyanese dollar", @"GYD" }, + { @"Hong Kong dollar", @"HKD" }, + { @"Jamaican dollar", @"JMD" }, + { @"Liberian dollar", @"LRD" }, + { @"Namibian dollar", @"NAD" }, + { @"Solomon Islands dollar", @"SBD" }, + { @"Surinamese dollar", @"SRD" }, + { @"New Taiwan dollar", @"TWD" }, + { @"Trinidad and Tobago dollar", @"TTD" }, + { @"Tuvaluan dollar", @"TVD" }, + { @"Chinese yuan", @"CNY" }, + { @"Rial", @"__RI" }, + { @"Shiling", @"__S" }, + { @"Som", @"__SO" }, + { @"Dirham", @"__DR" }, + { @"Dinar", @"_DN" }, + { @"Dollar", @"__D" }, + { @"Manat", @"__MA" }, + { @"Rupee", @"__R" }, + { @"Krone", @"__K" }, + { @"Krona", @"__K" }, + { @"Crown", @"__K" }, + { @"Frank", @"__F" }, + { @"Mark", @"__M" }, + { @"Ruble", @"__RB" }, + { @"Peso", @"__PE" }, + { @"Pound", @"__P" }, + { @"Tristan da Cunha pound", @"_TP" }, + { @"South Georgia and the South Sandwich Islands pound", @"_SP" }, + { @"Somaliland shilling", @"_SS" }, + { @"Pitcairn Islands dollar", @"_PND" }, + { @"Palauan dollar", @"_PD" }, + { @"Niue dollar", @"_NID" }, + { @"Nauruan dollar", @"_ND" }, + { @"Micronesian dollar", @"_MD" }, + { @"Kiribati dollar", @"_KID" }, + { @"Guernsey pound", @"_GGP" }, + { @"Faroese króna", @"_FOK" }, + { @"Cook Islands dollar", @"_CKD" }, + { @"British Virgin Islands dollar", @"_BD" }, + { @"Ascension pound", @"_AP" }, + { @"Alderney pound", @"_ALP" }, + { @"Abkhazian apsar", @"_AA" } + }; + public static readonly Dictionary FractionalUnitNameToCodeMap = new Dictionary + { + { @"Jiao", @"JIAO" }, + { @"Kopek", @"KOPEK" }, + { @"Pul", @"PUL" }, + { @"Cent", @"CENT" }, + { @"Qindarkë", @"QINDARKE" }, + { @"Penny", @"PENNY" }, + { @"Santeem", @"SANTEEM" }, + { @"Cêntimo", @"CENTIMO" }, + { @"Centavo", @"CENTAVO" }, + { @"Luma", @"LUMA" }, + { @"Qəpik", @"QƏPIK" }, + { @"Fils", @"FILS" }, + { @"Poisha", @"POISHA" }, + { @"Kapyeyka", @"KAPYEYKA" }, + { @"Centime", @"CENTIME" }, + { @"Chetrum", @"CHETRUM" }, + { @"Paisa", @"PAISA" }, + { @"Fening", @"FENING" }, + { @"Thebe", @"THEBE" }, + { @"Sen", @"SEN" }, + { @"Stotinka", @"STOTINKA" }, + { @"Fen", @"FEN" }, + { @"Céntimo", @"CENTIMO" }, + { @"Lipa", @"LIPA" }, + { @"Haléř", @"HALER" }, + { @"Øre", @"ØRE" }, + { @"Piastre", @"PIASTRE" }, + { @"Santim", @"SANTIM" }, + { @"Oyra", @"OYRA" }, + { @"Butut", @"BUTUT" }, + { @"Tetri", @"TETRI" }, + { @"Pesewa", @"PESEWA" }, + { @"Fillér", @"FILLER" }, + { @"Eyrir", @"EYRIR" }, + { @"Dinar", @"DINAR" }, + { @"Agora", @"AGORA" }, + { @"Tïın", @"TIIN" }, + { @"Chon", @"CHON" }, + { @"Jeon", @"JEON" }, + { @"Tyiyn", @"TYIYN" }, + { @"Att", @"ATT" }, + { @"Sente", @"SENTE" }, + { @"Dirham", @"DIRHAM" }, + { @"Rappen", @"RAPPEN" }, + { @"Avo", @"AVO" }, + { @"Deni", @"DENI" }, + { @"Iraimbilanja", @"IRAIMBILANJA" }, + { @"Tambala", @"TAMBALA" }, + { @"Laari", @"LAARI" }, + { @"Khoums", @"KHOUMS" }, + { @"Ban", @"BAN" }, + { @"Möngö", @"MONGO" }, + { @"Pya", @"PYA" }, + { @"Kobo", @"KOBO" }, + { @"Kuruş", @"KURUS" }, + { @"Baisa", @"BAISA" }, + { @"Centésimo", @"CENTESIMO" }, + { @"Toea", @"TOEA" }, + { @"Sentimo", @"SENTIMO" }, + { @"Grosz", @"GROSZ" }, + { @"Sene", @"SENE" }, + { @"Halala", @"HALALA" }, + { @"Para", @"PARA" }, + { @"Öre", @"ORE" }, + { @"Diram", @"DIRAM" }, + { @"Satang", @"SATANG" }, + { @"Seniti", @"SENITI" }, + { @"Millime", @"MILLIME" }, + { @"Tennesi", @"TENNESI" }, + { @"Kopiyka", @"KOPIYKA" }, + { @"Tiyin", @"TIYIN" }, + { @"Hào", @"HAO" }, + { @"Ngwee", @"NGWEE" } + }; + public const string CompoundUnitConnectorRegex = @"(?又|再)"; + public static readonly Dictionary CurrencyPrefixList = new Dictionary + { + { @"Dollar", @"$" }, + { @"United States dollar", @"us$" }, + { @"British Virgin Islands dollar", @"bvi$" }, + { @"Brunei dollar", @"b$" }, + { @"Sen", @"sen" }, + { @"Singapore dollar", @"s$" }, + { @"Canadian dollar", @"can$|c$|c $" }, + { @"Cayman Islands dollar", @"ci$" }, + { @"New Zealand dollar", @"nz$|nz $" }, + { @"Guyanese dollar", @"gy$|gy $|g$|g $" }, + { @"Hong Kong dollar", @"hk$|hkd|hk $" }, + { @"Jamaican dollar", @"j$" }, + { @"Namibian dollar", @"nad|n$|n $" }, + { @"Solomon Islands dollar", @"si$|si $" }, + { @"New Taiwan dollar", @"nt$|nt $" }, + { @"Samoan tālā", @"ws$" }, + { @"Chinese yuan", @"¥" }, + { @"Japanese yen", @"¥" }, + { @"Turkish lira", @"₺" }, + { @"Euro", @"€" }, + { @"Pound", @"£" }, + { @"Costa Rican colón", @"₡" } + }; + public static readonly IList CurrencyAmbiguousValues = new List + { + @"원", + @"란드", + @"엔", + @"센트", + @"프랑", + @"휠던" + }; + public static readonly Dictionary DimensionSuffixList = new Dictionary + { + { @"Meter", @"미터|m" }, + { @"Kilometer", @"킬로미터|km" }, + { @"Decimeter", @"데시미터|dm" }, + { @"Centimeter", @"센티미터|cm" }, + { @"Millimeter", @"밀리미터|mm" }, + { @"Micrometer", @"마이크로미터|μm" }, + { @"Picometer", @"피코미터|pm" }, + { @"Nanometer", @"나노미터|nm" }, + { @"Li", @"里|市里" }, + { @"Zhang", @"丈" }, + { @"Chi", @"市尺|尺" }, + { @"Cun", @"市寸|寸" }, + { @"Fen", @"市分|分" }, + { @"Hao", @"毫" }, + { @"Mile", @"마일" }, + { @"Inch", @"인치" }, + { @"Foot", @"피트" }, + { @"Yard", @"야드" }, + { @"Knot", @"노트" }, + { @"Light year", @"광년" }, + { @"Meter per second", @"미터/초|m/s" }, + { @"Kilometer per hour", @"킬로미터/시|km/h" }, + { @"Kilometer per minute", @"킬로미터/분|km/min" }, + { @"Kilometer per second", @"킬로미터/초|km/s" }, + { @"Mile per hour", @"마일/시|mi/h" }, + { @"Foot per second", @"피트/초|ft/s" }, + { @"Foot per minute", @"피트/분|ft/min" }, + { @"Yard per minute", @"야드/분|yd/min" }, + { @"Yard per second", @"야드/초|ys/s" }, + { @"Square centimetre", @"제곱센티미터" }, + { @"Square decimeter", @"제곱데시미터" }, + { @"Square meter", @"제곱미터|평방미터|평방 미터" }, + { @"Square kilometer", @"제곱킬로미터|제곱 킬로미터" }, + { @"Acre", @"에이커" }, + { @"Hectare", @"헥타르" }, + { @"Mu", @"묘" }, + { @"Liter", @"리터|l" }, + { @"Milliliter", @"밀리리터|ml" }, + { @"Cubic meter", @"세제곱미터" }, + { @"Cubic decimeter", @"세제곱데시미터" }, + { @"Cubic millimeter", @"세제곱밀리미터" }, + { @"Cubic foot", @"세제곱피트|입방 피트" }, + { @"Gallon", @"갤런" }, + { @"Pint", @"파인트" }, + { @"Dou", @"市斗|斗" }, + { @"Dan", @"市石|石" }, + { @"Kilogram", @"킬로그램|kg" }, + { @"Jin", @"市斤|斤" }, + { @"Milligram", @"밀리그램|mg" }, + { @"Microgram", @"마이크로그램|μg" }, + { @"Barrel", @"배럴" }, + { @"Pot", @"罐" }, + { @"Gram", @"그램|g" }, + { @"Ton", @"톤|미터톤|t" }, + { @"Pound", @"파운드" }, + { @"Ounce", @"온스" }, + { @"Liang", @"两" }, + { @"Bit", @"비트|b|bit" }, + { @"Kilobit", @"킬로비트|kb|Kb" }, + { @"Megabit", @"메가비트|mb|Mb" }, + { @"Gigabit", @"기가비트|gb|Gb" }, + { @"Terabit", @"테라비트|tb|Tb" }, + { @"Petabit", @"페타비트|pb|Pb" }, + { @"Byte", @"바이트|byte|Byte" }, + { @"Kilobyte", @"킬로바이트|kB|KB" }, + { @"Megabyte", @"메가바이트|mB|MB" }, + { @"Gigabyte", @"기가바이트|gB|GB" }, + { @"Terabyte", @"테라바이트|tB|TB" }, + { @"Petabyte", @"페타바이트|pB|PB" } + }; + public static readonly IList DimensionAmbiguousValues = new List + { + @"묘", + @"피트", + @"미터", + @"배럴", + @"톤", + @"m", + @"km", + @"dm", + @"cm", + @"mm", + @"l", + @"ml", + @"kg", + @"mg", + @"g", + @"t", + @"b", + @"byte", + @"kb", + @"mb", + @"gb", + @"tb", + @"pb" + }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"날", @"떠날" }, + { @"일", @"종일" } + }; + public static readonly Dictionary TemperatureSuffixList = new Dictionary + { + { @"F", @"°f" }, + { @"K", @"K|k" }, + { @"R", @"°r" }, + { @"C", @"°c" }, + { @"Degree", @"도" } + }; + public static readonly Dictionary TemperaturePrefixList = new Dictionary + { + { @"F", @"화씨온도|화씨" }, + { @"K", @"절대온도|절대" }, + { @"R", @"란씨온도|란씨" }, + { @"C", @"섭씨온도|섭씨" } + }; + public static readonly IList TemperatureAmbiguousValues = new List + { + @"도", + @"k" + }; + public const string HalfUnitRegex = @"반"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Korean/NumbersWithUnitDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Korean/NumbersWithUnitDefinitions.tt new file mode 100644 index 0000000000..17d63f4373 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Korean/NumbersWithUnitDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Korean\Korean-NumbersWithUnit.yaml"; + this.Language = "Korean"; + this.ClassName = "NumbersWithUnitDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Korean/QuotedTextDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Korean/QuotedTextDefinitions.cs new file mode 100644 index 0000000000..4e17211fdd --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Korean/QuotedTextDefinitions.cs @@ -0,0 +1,35 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Korean\Korean-QuotedText.yaml +// - Language: Korean +// - ClassName: QuotedTextDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Korean +{ + using System; + using System.Collections.Generic; + + public static class QuotedTextDefinitions + { + public const string LangMarker = @"Kor"; + public const string QuotedTextRegex1 = @"(“([^“”]+)”)"; + public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)"; + public const string QuotedTextRegex3 = @"(《([^《》]+)》)"; + public const string QuotedTextRegex4 = @"(〈([^〈〉]+)〉)"; + public const string QuotedTextRegex5 = @"(﹃([^﹃﹄]+)﹄)"; + public const string QuotedTextRegex6 = @"(﹁([^﹁﹂]+)﹂)"; + public const string QuotedTextRegex7 = @"(""([^""]+)"")"; + public const string QuotedTextRegex8 = @"(\\'([^\']+)\\')"; + public const string QuotedTextRegex9 = @"(`([^`]+)`)"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Korean/QuotedTextDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Korean/QuotedTextDefinitions.tt new file mode 100644 index 0000000000..91ce3e1625 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Korean/QuotedTextDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Korean\Korean-QuotedText.yaml"; + this.Language = "Korean"; + this.ClassName = "QuotedTextDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Microsoft.Recognizers.Definitions.Common.csproj b/.NET/Microsoft.Recognizers.Definitions.Common/Microsoft.Recognizers.Definitions.Common.csproj index 6bd6d96f6d..c794e4c2a2 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Microsoft.Recognizers.Definitions.Common.csproj +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Microsoft.Recognizers.Definitions.Common.csproj @@ -2,13 +2,18 @@ net462 + 9 false false ../Recognizers-Text.ruleset OnOutputUpdated - + + + true + ..\buildtools\35MSSharedLib1024.snk + true $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + + - + all runtime; build; native; contentfiles; analyzers - + all runtime; build; native; contentfiles; analyzers - + @@ -37,6 +43,16 @@ True ChoiceDefinitions.tt + + True + True + DateTimeDefinitions.tt + + + True + True + NumbersDefinitions.tt + True True @@ -62,6 +78,11 @@ True BaseHashTag.tt + + True + True + BaseQuotedText.tt + True True @@ -92,6 +113,11 @@ True BaseURL.tt + + ChoiceDefinitions.tt + True + True + True True @@ -117,13 +143,13 @@ True True - - URLDefinitions.tt + + IpDefinitions.tt True True - - IpDefinitions.tt + + URLDefinitions.tt True True @@ -137,10 +163,10 @@ True DateTimeDefinitions.tt - + True True - NumberDefinitions.tt + NumbersDefinitions.tt True @@ -172,6 +198,11 @@ True True + + True + True + PhoneNumbersDefinitions.tt + True True @@ -197,7 +228,7 @@ True True - + True True ChoiceDefinitions.tt @@ -222,6 +253,11 @@ True True + + DateTimeDefinitions.tt + True + True + True True @@ -232,15 +268,15 @@ True NumbersWithUnitDefinitions.tt - + True True - DateTimeDefinitions.tt + ChoiceDefinitions.tt - + True True - ChoiceDefinitions.tt + DateTimeDefinitions.tt True @@ -277,6 +313,11 @@ True NumbersDefinitions.tt + + NumbersWithUnitDefinitions.tt + True + True + True True @@ -297,6 +338,11 @@ True True + + True + True + PhoneNumbersDefinitions.tt + True True @@ -322,7 +368,27 @@ True True - + + DateTimeDefinitions.tt + True + True + + + NumbersDefinitions.tt + True + True + + + NumbersWithUnitDefinitions.tt + True + True + + + True + True + TimeZoneDefinitions.tt + + ChoiceDefinitions.tt True True @@ -347,6 +413,10 @@ + + + + @@ -389,7 +459,7 @@ - + @@ -403,6 +473,8 @@ + + @@ -423,6 +495,8 @@ + + @@ -445,6 +519,8 @@ + + @@ -453,6 +529,8 @@ + + @@ -463,6 +541,16 @@ + + + + + + + + + + diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/ChoiceDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/ChoiceDefinitions.cs index e6226b0cf8..b679b7245f 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/ChoiceDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/ChoiceDefinitions.cs @@ -23,7 +23,8 @@ public static class ChoiceDefinitions { public const string LangMarker = @"Por"; public const string TokenizerRegex = @"[^\w\d\u00E0-\u00FC]"; - public const string TrueRegex = @"\b(verdade|verdadeir[oa]|sim|isso|claro|ok)\b|(\uD83D\uDC4D|\uD83D\uDC4C)"; - public const string FalseRegex = @"\b(falso|n[aã]o|incorreto|nada disso)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)"; + public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)"; + public static readonly string TrueRegex = $@"\b(verdade|verdadeir[oa]|sim|isso|claro|ok)\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?"; + public static readonly string FalseRegex = $@"\b(falso|n[aã]o|incorreto|nada disso)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/DateTimeDefinitions.cs index c815bd06e1..b8ea9bb20a 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/DateTimeDefinitions.cs @@ -21,34 +21,47 @@ namespace Microsoft.Recognizers.Definitions.Portuguese public static class DateTimeDefinitions { + public const string LangMarker = @"Por"; public const bool CheckBothBeforeAfter = false; - public const string TillRegex = @"(?ate|as|às|até|ateh|a|ao|--|-|—|——)(\s+(o|[aà](s)?))?"; - public const string AndRegex = @"(?e|e\s*o|--|-|—|——)"; - public const string DayRegex = @"(?01|02|03|04|05|06|07|08|09|1|10|11|12|13|14|15|16|17|18|19|2|20|21|22|23|24|25|26|27|28|29|3|30|31|4|5|6|7|8|9)(?=\b|t)"; - public const string MonthNumRegex = @"(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)\b"; + public const string TillRegex = @"(?\b(at[eé]h?|[aà]s|ao?)\b|--|-|—|——)(\s+\b(o|[aà](s)?)\b)?"; + public static readonly string RangeConnectorRegex = $@"(?(e\s*(([àa]s?)|o)?)|{BaseDateTime.RangeConnectorSymbolRegex})"; + public const string DayRegex = @"(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?=\b|t)"; + public const string WrittenDayRegex = @"(?(vinte\s+e\s+)?(um|dois|tr[eê]s|quatro|cinco|seis|sete|oito|nove)|dez|onze|doze|treze|(c|qu)atorze|quinze|dez[ae](s(seis|sete)|nove)|dezoito|vinte|trinta(\s+e\s+um)?)"; + public const string MonthNumRegex = @"(?1[0-2]|(0)?[1-9])\b"; public static readonly string AmDescRegex = $@"({BaseDateTime.BaseAmDescRegex})"; public static readonly string PmDescRegex = $@"({BaseDateTime.BasePmDescRegex})"; public static readonly string AmPmDescRegex = $@"({BaseDateTime.BaseAmPmDescRegex})"; - public static readonly string DescRegex = $@"(?({AmDescRegex}|{PmDescRegex}))"; - public const string RangePrefixRegex = @"((desde|de|da|das|entre)\s+(a(s)?\s+)?)"; - public static readonly string TwoDigitYearRegex = $@"\b(?([0-27-9]\d))(?!(\s*((\:)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b"; - public const string FullTextYearRegex = @"^[\*]"; + public const string OclockRegex = @"(?em\s+ponto)"; + public static readonly string DescRegex = $@"((horas\s+)?(?{AmDescRegex}|{PmDescRegex})|{OclockRegex})"; + public const string OfPrepositionRegex = @"(\bd(o|a|e)s?\b)"; + public const string AfterNextSuffixRegex = @"\b(que\s+vem|passad[oa])\b"; + public const string RangePrefixRegex = @"((de(sde)?|das?|entre)\s+(a(s)?\s+)?)"; + public static readonly string TwoDigitYearRegex = $@"\b(?([0-9]\d))(?!(\s*((\:\d)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b"; + public const string RelativeRegex = @"(?((n?est[ae]s?|pr[oó]xim[oa]s?|([uú]ltim[ao]s?))(\s+fina(l|is)\s+d[eao])?)|(fina(l|is)\s+d[eao]))\b"; + public const string StrictRelativeRegex = @"(?((n?est[ae]|pr[oó]xim[oa]|([uú]ltim(o|as|os)))(\s+fina(l|is)\s+d[eao])?)|(fina(l|is)\s+d[eao]))\b"; + public const string WrittenOneToNineRegex = @"(uma?|dois|duas|tr[eê]s|quatro|cinco|seis|sete|oito|nove)"; + public const string WrittenOneHundredToNineHundredRegex = @"(duzent[oa]s|trezent[oa]s|[cq]uatrocent[ao]s|quinhent[ao]s|seiscent[ao]s|setecent[ao]s|oitocent[ao]s|novecent[ao]s|cem|(?((dois\s+)?mil)((\s+e)?\s+{WrittenOneHundredToNineHundredRegex})?((\s+e)?\s+{WrittenOneToNinetyNineRegex})?)"; public static readonly string YearRegex = $@"({BaseDateTime.FourDigitYearRegex}|{FullTextYearRegex})"; public const string RelativeMonthRegex = @"(?([nd]?es[st]e|pr[óo]ximo|passsado|[uú]ltimo)\s+m[eê]s)\b"; public const string MonthRegex = @"(?abr(il)?|ago(sto)?|dez(embro)?|fev(ereiro)?|jan(eiro)?|ju[ln](ho)?|mar([çc]o)?|maio?|nov(embro)?|out(ubro)?|sep?t(embro)?)"; public static readonly string MonthSuffixRegex = $@"(?((em|no)\s+|d[eo]\s+)?({RelativeMonthRegex}|{MonthRegex}))"; - public const string DateUnitRegex = @"(?anos?|meses|m[êe]s|semanas?|dias?)\b"; + public const string DateUnitRegex = @"(?(?m[êe]s)(?es)?|(ano|(?semana|dia))(?s)?)\b"; public const string PastRegex = @"(?\b(passad[ao](s)?|[uú]ltim[oa](s)?|anterior(es)?|h[aá]|pr[ée]vi[oa](s)?)\b)"; - public const string FutureRegex = @"(?\b(seguinte(s)?|pr[oó]xim[oa](s)?|dentro\s+de|em|daqui\s+a)\b)"; + public const string FutureRegex = @"(?\b(seguinte(s)?|pr[oó]xim[oa](s)?|daqui\s+a)\b)"; public static readonly string SimpleCasesRegex = $@"\b((desde\s+[oa]|desde|d[oa])\s+)?(dia\s+)?({DayRegex})\s*{TillRegex}\s*(o dia\s+)?({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b"; public static readonly string MonthFrontSimpleCasesRegex = $@"\b{MonthSuffixRegex}\s+((desde\s+[oa]|desde|d[oa])\s+)?(dia\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; - public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+((entre|entre\s+[oa]s?)\s+)(dias?\s+)?({DayRegex})\s*{AndRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; - public static readonly string DayBetweenRegex = $@"\b((entre|entre\s+[oa]s?)\s+)(dia\s+)?({DayRegex})\s*{AndRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b"; - public const string OneWordPeriodRegex = @"\b(((pr[oó]xim[oa]?|[nd]?es[st]e|aquel[ea]|[uú]ltim[oa]?|em)\s+)?(?abr(il)?|ago(sto)?|dez(embro)?|fev(ereiro)?|jan(eiro)?|ju[ln](ho)?|mar([çc]o)?|maio?|nov(embro)?|out(ubro)?|sep?t(embro)?)|(?<=\b(de|do|da|o|a)\s+)?(pr[oó]xim[oa](s)?|[uú]ltim[oa]s?|est(e|a))\s+(fim de semana|fins de semana|semana|m[êe]s|ano)|fim de semana|fins de semana|(m[êe]s|anos)? [àa] data)\b"; - public static readonly string MonthWithYearRegex = $@"\b(((pr[oó]xim[oa](s)?|[nd]?es[st]e|aquele|[uú]ltim[oa]?|em)\s+)?(?abr(il)?|ago(sto)?|dez(embro)?|fev(ereiro)?|jan(eiro)?|ju[ln](ho)?|mar([çc]o)?|maio?|nov(embro)?|out(ubro)?|sep?t(embro)?)\s+((de|do|da|o|a)\s+)?({YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|[nd]?es[st]e)\s+ano))\b"; + public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+((entre|entre\s+[oa]s?)\s+)(dias?\s+)?({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; + public static readonly string DayBetweenRegex = $@"\b((entre|entre\s+[oa]s?)\s+)(dia\s+)?({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b"; + public const string SpecialYearPrefixes = @"((do\s+)?calend[aá]rio|civil|(?fiscal|escolar|letivo))"; + public static readonly string OneWordPeriodRegex = $@"\b(((pr[oó]xim[oa]?|[nd]?es[st]e|aquel[ea]|[uú]ltim[oa]?|em)\s+)?(?abr(il)?|ago(sto)?|dez(embro)?|fev(ereiro)?|jan(eiro)?|ju[ln](ho)?|mar([çc]o)?|maio?|nov(embro)?|out(ubro)?|sep?t(embro)?)|({RelativeRegex}\s+)?(ano\s+{SpecialYearPrefixes}|{SpecialYearPrefixes}\s+ano)|(?<=\b(de|do|da|o|a)\s+)?(pr[oó]xim[oa](s)?|[uú]ltim[oa]s?|est(e|a))\s+(fim de semana|fins de semana|semana|m[êe]s|ano)|fim de semana|fins de semana|(m[êe]s|anos)? [àa] data)\b"; + public static readonly string MonthWithYearRegex = $@"\b((((pr[oó]xim[oa](s)?|[nd]?es[st]e|aquele|[uú]ltim[oa]?|em)\s+)?{MonthRegex}|((n?o\s+)?(?primeiro|1o|segundo|2o|terceiro|3o|[cq]uarto|4o|quinto|5o|sexto|6o|s[eé]timo|7o|oitavo|8o|nono|9o|d[eé]cimo(\s+(primeiro|segundo))?|10o|11o|12o|[uú]ltimo)\s+m[eê]s(?=\s+(d[aeo]|[ao]))))\s+((d[aeo]|[ao])\s+)?({YearRegex}|{TwoDigitYearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|[nd]?es[st]e)\s+ano))\b"; public static readonly string MonthNumWithYearRegex = $@"({YearRegex}(\s*?)[/\-\.](\s*?){MonthNumRegex})|({MonthNumRegex}(\s*?)[/\-](\s*?){YearRegex})"; public static readonly string WeekOfMonthRegex = $@"(?(a|na\s+)?(?primeira?|1a|segunda|2a|terceira|3a|[qc]uarta|4a|quinta|5a|[uú]ltima)\s+semana\s+{MonthSuffixRegex})"; public static readonly string WeekOfYearRegex = $@"(?(a|na\s+)?(?primeira?|1a|segunda|2a|terceira|3a|[qc]uarta|4a|quinta|5a|[uú]ltima?)\s+semana(\s+d[oe]?)?\s+({YearRegex}|(?pr[oó]ximo|[uú]ltimo|[nd]?es[st]e)\s+ano))"; + public static readonly string OfYearRegex = $@"\b((d[aeo]?|[ao])\s*({YearRegex}|{StrictRelativeRegex}\s+ano))\b"; + public const string FirstLastRegex = @"\b(n?[ao]s?\s+)?((?primeir[ao]s?)|(?[uú]ltim[ao]s?))\b"; public static readonly string FollowedDateUnit = $@"^\s*{DateUnitRegex}"; public static readonly string NumberCombinedWithDateUnit = $@"\b(?\d+(\.\d*)?){DateUnitRegex}"; public static readonly string QuarterRegex = $@"(n?o\s+)?(?primeiro|1[oº]|segundo|2[oº]|terceiro|3[oº]|[qc]uarto|4[oº])\s+trimestre(\s+d[oe]|\s*,\s*)?\s+({YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|[nd]?es[st]e)\s+ano)"; @@ -56,99 +69,106 @@ public static class DateTimeDefinitions public const string AllHalfYearRegex = @"^[.]"; public const string PrefixDayRegex = @"^[.]"; public static readonly string SeasonRegex = $@"\b(?(([uú]ltim[oa]|[nd]?es[st][ea]|n?[oa]|(pr[oó]xim[oa]s?|seguinte))\s+)?(?primavera|ver[ãa]o|outono|inverno)((\s+)?(seguinte|((de\s+|,)?\s*{YearRegex})|((do\s+)?(?pr[oó]ximo|[uú]ltimo|[nd]?es[st]e)\s+ano)))?)\b"; - public const string WhichWeekRegex = @"\b(semana)(\s*)(?5[0-3]|[1-4]\d|0?[1-9])\b"; + public static readonly string WhichWeekRegex = $@"\b(semana)(\s*)(?5[0-3]|[1-4]\d|0?[1-9])(\s+(de|do)\s+({YearRegex}|(?pr[oó]ximo|[uú]ltimo|[nd]?es[st]e)\s+ano|ano\s+(?passado)))?\b"; public const string WeekOfRegex = @"(semana)(\s*)((do|da|de))"; public const string MonthOfRegex = @"(mes)(\s*)((do|da|de))"; public const string RangeUnitRegex = @"\b(?anos?|meses|m[êe]s|semanas?)\b"; + public const string BeforeAfterRegex = @"^[.]"; + public const string UpcomingPrefixRegex = @".^"; + public static readonly string NextPrefixRegex = $@"(pr[oó]xim[oa]s?|seguinte|{UpcomingPrefixRegex})\b"; public const string InConnectorRegex = @"\b(em)\b"; public const string SinceYearSuffixRegex = @"^[.]"; - public const string WithinNextPrefixRegex = @"^[.]"; + public static readonly string WithinNextPrefixRegex = $@"\b(dentro\s+d(e|as)(\s+(?{NextPrefixRegex}))?)\b"; + public const string TodayNowRegex = @"\b(hoje|agora)\b"; public const string CenturySuffixRegex = @"^[.]"; - public const string RelativeRegex = @"^[.]"; - public const string StrictRelativeRegex = @"^[.]"; - public const string FromRegex = @"((desde|de)(\s*a(s)?)?)$"; - public const string ConnectorAndRegex = @"(e\s*([àa](s)?)?)$"; + public const string FromRegex = @"(de(sde)?(\s*a(s)?)?)$"; public const string BetweenRegex = @"(entre\s*([oa](s)?)?)"; public const string WeekDayRegex = @"\b(?(domingos?|(segunda|ter[çc]a|quarta|quinta|sexta)s?([-\s+]feiras?)?|s[aá]bados?|(2|3|4|5|6)[aª])\b|(dom|seg|ter[cç]|qua|qui|sex|sab)\b(\.?(?=\s|,|;|$)))"; public static readonly string OnRegex = $@"(?<=\b(em|no)\s+)({DayRegex}s?)\b"; - public const string RelaxedOnRegex = @"(?<=\b(em|n[oa]|d[oa])\s+)(dia\s+)?((?10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)s?)\b"; + public static readonly string RelaxedOnRegex = $@"((?<=\b(em|[nd][oa])\s+)(dia\s+)?({DayRegex}s?)|dia\s+{DayRegex}s?)\b(?!\s*[/\\\-\.,:\s]\s*(\d|(de\s+)?{MonthRegex}))"; public static readonly string ThisRegex = $@"\b(([nd]?es[st][ea]\s*){WeekDayRegex})|({WeekDayRegex}\s*([nd]?es[st]a\s+semana))\b"; - public static readonly string LastDateRegex = $@"\b(([uú]ltim[ao])\s*{WeekDayRegex})|({WeekDayRegex}(\s+(([nd]?es[st]a|na|da)\s+([uú]ltima\s+)?semana)))\b"; + public static readonly string LastDateRegex = $@"\b(([uú]ltim[ao])\s*{WeekDayRegex})|({WeekDayRegex}(\s+(([nd]?es[st]a|[nd]a)\s+([uú]ltima\s+)?semana)))\b"; public static readonly string NextDateRegex = $@"\b(((pr[oó]xim[oa]|seguinte)\s*){WeekDayRegex})|({WeekDayRegex}((\s+(pr[oó]xim[oa]|seguinte))|(\s+(da\s+)?(semana\s+seguinte|pr[oó]xima\s+semana))))\b"; public const string SpecialDayRegex = @"\b((d?o\s+)?(dia\s+antes\s+de\s+ontem|antes\s+de\s+ontem|anteontem)|((d?o\s+)?(dia\s+|depois\s+|dia\s+depois\s+)?de\s+amanh[aã])|(o\s)?dia\s+seguinte|(o\s)?pr[oó]ximo\s+dia|(o\s+)?[uú]ltimo\s+dia|ontem|amanh[ãa]|hoje)|(do\s+dia$)\b"; public const string SpecialDayWithNumRegex = @"^[.]"; public const string ForTheRegex = @".^"; - public const string WeekDayAndDayOfMonthRegex = @".^"; - public const string WeekDayAndDayRegex = @".^"; + public static readonly string FlexibleDayRegex = $@"(?([a-z]+\s)?({WrittenDayRegex}|{DayRegex}))"; + public static readonly string WeekDayAndDayOfMonthRegex = $@"\b{WeekDayRegex}\s+(dia\s+{FlexibleDayRegex})\b"; + public static readonly string WeekDayAndDayRegex = $@"\b{WeekDayRegex}\s+({DayRegex})(?!([-:/]|\.\d|(\s+({AmDescRegex}|{PmDescRegex}|{OclockRegex}))))\b"; public static readonly string WeekDayOfMonthRegex = $@"(?(n?[ao]\s+)?(?primeir[ao]|1[ao]|segund[ao]|2[ao]|terceir[ao]|3[ao]|[qc]uart[ao]|4[ao]|quint[ao]|5[ao]|[uú]ltim[ao])\s+{WeekDayRegex}\s+{MonthSuffixRegex})"; public const string RelativeWeekDayRegex = @"^[.]"; public const string AmbiguousRangeModifierPrefix = @"^[.]"; public const string NumberEndingPattern = @"^[.]"; public static readonly string SpecialDateRegex = $@"(?<=\bno\s+){DayRegex}\b"; - public static readonly string OfMonthRegex = $@"^\s*de\s*{MonthSuffixRegex}"; + public static readonly string OfMonthRegex = $@"^(\s*de)?\s*{MonthSuffixRegex}"; public static readonly string MonthEndRegex = $@"({MonthRegex}\s*(o)?\s*$)"; public static readonly string WeekDayEnd = $@"{WeekDayRegex}\s*,?\s*$"; - public const string WeekDayStart = @"^[\.]"; + public const string WeekDayStart = @"^\b$"; public static readonly string DateYearRegex = $@"(?{YearRegex}|{TwoDigitYearRegex})"; - public static readonly string DateExtractor1 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}?((\s*(de)|[/\\\.\-])\s*)?{MonthRegex}\b"; - public static readonly string DateExtractor2 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}\s*([\.\-]|de)?\s*{MonthRegex}?(\s*(,|de)\s*){YearRegex}\b"; - public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}(\s+|\s*,\s*|\s+de\s+|\s*-\s*){MonthRegex}((\s+|\s*(,|de)\s*){YearRegex})?\b"; - public static readonly string DateExtractor4 = $@"\b{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}\s*[/\\\-]\s*{DateYearRegex}"; - public static readonly string DateExtractor5 = $@"\b{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}"; - public static readonly string DateExtractor6 = $@"(?<=\b(em|no|o)\s+){MonthNumRegex}[\-\.]{DayRegex}\b"; - public static readonly string DateExtractor7 = $@"\b{MonthNumRegex}\s*/\s*{DayRegex}((\s+|\s*(,|de)\s*){DateYearRegex})?\b"; - public static readonly string DateExtractor8 = $@"(?<=\b(em|no|o)\s+){DayRegex}[\\\-]{MonthNumRegex}\b"; - public static readonly string DateExtractor9 = $@"\b{DayRegex}\s*/\s*{MonthNumRegex}((\s+|\s*(,|de)\s*){DateYearRegex})?\b"; - public static readonly string DateExtractor10 = $@"\b{YearRegex}\s*[/\\\-\.]\s*{MonthNumRegex}\s*[/\\\-\.]\s*{DayRegex}"; + public static readonly string DateExtractor1 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}((\s*(de)|[/\\\.\- ])\s*)?{MonthRegex}\b"; + public static readonly string DateExtractor2 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?({DayRegex}(\s*([/\.\-]|de)?\s*{MonthRegex}|\s+de\s+{MonthNumRegex})(\s*([,./-]|de|\s+)\s*){DateYearRegex}|{BaseDateTime.FourDigitYearRegex}\s*[/\.\- ]\s*{DayRegex}\s*[/\.\- ]\s*{MonthRegex})\b"; + public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{MonthRegex}(\s*[/\.\- ]\s*|\s+de\s+){DayRegex}(?!\s*\-\s*\d{{2}}\b)((\s*[/\.\- ]\s*|\s+de\s+){DateYearRegex})?\b"; + public static readonly string DateExtractor4 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}\s*[/\\\-]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; + public static readonly string DateExtractor5 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; + public static readonly string DateExtractor6 = $@"(?<=\b(em|no|o)\s+){MonthNumRegex}[\-\.]{DayRegex}{BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor7 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{MonthNumRegex}\s*/\s*{DayRegex}((\s+|\s*(,|de)\s*){DateYearRegex})?{BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor8 = $@"(?<=\b(em|no|o)\s+){DayRegex}[\\\-]{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor9 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}\s*/\s*{MonthNumRegex}((\s+|\s*(,|de)\s*){DateYearRegex})?{BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor10 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?({YearRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DayRegex}|{MonthRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{DayRegex}|{DayRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{MonthRegex})(?!\s*[/\\\-\.:]\s*\d+)"; public static readonly string DateExtractor11 = $@"(?<=\b(dia)\s+){DayRegex}"; public const string HourNumRegex = @"\b(?zero|uma|duas|tr[êe]s|[qc]uatro|cinco|seis|sete|oito|nove|dez|onze|doze)\b"; public const string MinuteNumRegex = @"(?um|dois|tr[êe]s|[qc]uatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|catorze|quatorze|quinze|dez[ea]sseis|dez[ea]sete|dezoito|dez[ea]nove|vinte|trinta|[qc]uarenta|cin[qc]uenta)"; public const string DeltaMinuteNumRegex = @"(?um|dois|tr[êe]s|[qc]uatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|catorze|quatorze|quinze|dez[ea]sseis|dez[ea]sete|dezoito|dez[ea]nove|vinte|trinta|[qc]uarenta|cin[qc]uenta)"; - public const string OclockRegex = @"(?em\s+ponto)"; - public const string PmRegex = @"(?((pela|de|da|\b[àa]\b|na)\s+(tarde|noite)))|((depois\s+do|ap[óo]s\s+o)\s+(almo[çc]o|meio dia|meio-dia))"; - public const string AmRegex = @"(?(pela|de|da|na)\s+(manh[ãa]|madrugada))"; + public const string PmRegex = @"(horas\s+)?(?((pela|de|da|\b[àa]\b|na)\s+(tarde|noite)))|((depois\s+do|ap[óo]s\s+o)\s+(almo[çc]o|meio dia|meio-dia))"; + public const string AmRegex = @"(horas\s+)?(?(pela|de|da|na)\s+(manh[ãa]|madrugada))"; public const string AmTimeRegex = @"(?([dn]?es[st]a|(pela|de|da|na))\s+(manh[ãa]|madrugada))"; public const string PmTimeRegex = @"(?(([dn]?es[st]a|\b[àa]\b|(pela|de|da|na))\s+(tarde|noite)))|((depois\s+do|ap[óo]s\s+o)\s+(almo[çc]o|meio dia|meio-dia))"; - public static readonly string LessThanOneHour = $@"(?((\s+e\s+)?(quinze|(um\s+|dois\s+|tr[êes]\s+)?quartos?)|quinze|(\s*)(um\s+|dois\s+|tr[êes]\s+)?quartos?|(\s+e\s+)(meia|trinta)|{BaseDateTime.DeltaMinuteRegex}(\s+(minuto|minutos|min|mins))|{DeltaMinuteNumRegex}(\s+(minuto|minutos|min|mins))))"; + public static readonly string LessThanOneHour = $@"(?((\s+e\s+)?(quinze|(um\s+|dois\s+|tr[êes]\s+)?quartos?)|quinze|(\s*)(um\s+|dois\s+|tr[êes]\s+)?quartos?|(\s+e\s+)(meia|trinta)|({BaseDateTime.DeltaMinuteRegex}|{DeltaMinuteNumRegex})(\s+(minuto|minutos|min|mins))?))"; + public const string LessThanOneHourSuffix = @"(?((\s+e\s+)?(quinze|(um\s+|dois\s+|tr[êes]\s+)?quartos?)|quinze|(\s*)(um\s+|dois\s+|tr[êes]\s+)?quartos?|(\s+e\s+)(meia|trinta)))"; public const string TensTimeRegex = @"(?dez|vinte|trinta|[qc]uarenta|cin[qc]uenta)"; - public static readonly string WrittenTimeRegex = $@"(?({HourNumRegex}\s*((e|menos)\s+)?({MinuteNumRegex}|({TensTimeRegex}((\s*e\s+)?{MinuteNumRegex})?)))|(({MinuteNumRegex}|({TensTimeRegex}((\s*e\s+)?{MinuteNumRegex})?))\s*((para as|pras|antes da|antes das)\s+)?({HourNumRegex}|{BaseDateTime.HourRegex})))"; - public static readonly string TimePrefix = $@"(?{LessThanOneHour}(\s+(passad[ao]s)\s+(as)?|\s+depois\s+(das?|do)|\s+pras?|\s+(para|antes)?\s+([àa]s?))?)"; + public static readonly string WrittenTimeRegex = $@"(?({HourNumRegex}\s*((e|menos)\s+)?(({TensTimeRegex}((\s*e\s+)?{MinuteNumRegex}))|{MinuteNumRegex}))|(({MinuteNumRegex}|({TensTimeRegex}((\s*e\s+)?{MinuteNumRegex})?))\s*((para as|pras|antes da|antes das)\s+)?({HourNumRegex}|{BaseDateTime.HourRegex})))"; + public static readonly string TimePrefix = $@"(?{LessThanOneHour}(\s+(passad[ao]s)\s+(as)?|\s+depois\s+(das?|do)|\s+pras?|\s+(para|antes)?\s+([àa]s?)))"; public static readonly string TimeSuffix = $@"(?({LessThanOneHour}\s+)?({AmRegex}|{PmRegex}|{OclockRegex}))"; public static readonly string BasicTime = $@"(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex})"; - public static readonly string AtRegex = $@"\b(?<=\b([aà]s?)\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})\b"; - public static readonly string ConnectNumRegex = $@"({BaseDateTime.HourRegex}(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)\s*{DescRegex})"; + public const string MidnightRegex = @"(?meia\s*(-\s*)?noite)"; + public const string MidmorningRegex = @"(?meio\s+da\s+manhã)"; + public const string MidEarlyMorning = @"(?meio\s+da\s+madrugada)"; + public const string MidafternoonRegex = @"(?meio\s+da\s+tarde)"; + public const string MiddayRegex = @"(?meio\s*(-\s*)?dia)"; + public static readonly string MidTimeRegex = $@"(?({MidnightRegex}|{MidmorningRegex}|{MidEarlyMorning}|{MidafternoonRegex}|{MiddayRegex}))"; + public static readonly string AtRegex = $@"\b(((?<=\b(d?[aà]s?)\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(\s+e\s+{BaseDateTime.MinuteRegex})?)(\s+horas?|\s*h\b)?|(?<=\b(s(er)?[aã]o|v[aã]o\s+ser|^[eé]h?)\s+|^\s*)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})(\s+horas?|\s*h\b))(\s+{OclockRegex})?|{MidTimeRegex})\b"; + public static readonly string ConnectNumRegex = $@"({BaseDateTime.HourRegex}(?[0-5][0-9])\s*{DescRegex})"; public static readonly string TimeRegex1 = $@"(\b{TimePrefix}\s+)?({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})\s*({DescRegex})"; public static readonly string TimeRegex2 = $@"(\b{TimePrefix}\s+)?(t)?{BaseDateTime.HourRegex}(\s*)?:(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?((\s*{DescRegex})|\b)"; public static readonly string TimeRegex3 = $@"(\b{TimePrefix}\s+)?{BaseDateTime.HourRegex}\.{BaseDateTime.MinuteRegex}(\s*{DescRegex})"; - public static readonly string TimeRegex4 = $@"\b(({DescRegex}?)|({BasicTime}?)({DescRegex}?))({TimePrefix}\s*)({HourNumRegex}|{BaseDateTime.HourRegex})?(\s+{TensTimeRegex}(\s+e\s+)?{MinuteNumRegex}?)?({OclockRegex})?\b"; - public static readonly string TimeRegex5 = $@"\b({TimePrefix}|{BasicTime}{TimePrefix})\s+(\s*{DescRegex})?{BasicTime}?\s*{TimeSuffix}\b"; + public static readonly string TimeRegex4 = $@"\b(({DescRegex}\s*)?(({TimePrefix}\s*)({HourNumRegex}|{BaseDateTime.HourRegex})|({HourNumRegex}|{BaseDateTime.HourRegex})(\s+{TensTimeRegex}(\s+e\s+)?{MinuteNumRegex}?)|{BasicTime}(\s*{DescRegex})?(?{LessThanOneHourSuffix}))(\s*({DescRegex}|{OclockRegex}))?)\b"; + public static readonly string TimeRegex5 = $@"\b({TimePrefix}|{BasicTime}(?{LessThanOneHourSuffix}))\s+(\s*{DescRegex})?{BasicTime}?\s*{TimeSuffix}\b"; public static readonly string TimeRegex6 = $@"({BasicTime}(\s*{DescRegex})?\s+{TimeSuffix}\b)"; public static readonly string TimeRegex7 = $@"\b{TimeSuffix}\s+[àa]s?\s+{BasicTime}((\s*{DescRegex})|\b)"; public static readonly string TimeRegex8 = $@"\b{TimeSuffix}\s+{BasicTime}((\s*{DescRegex})|\b)"; - public static readonly string TimeRegex9 = $@"\b(?{HourNumRegex}\s+({TensTimeRegex}\s*)?(e\s+)?{MinuteNumRegex}?)\b"; - public const string TimeRegex10 = @"(\b([àa]|ao?)|na|de|da|pela)\s+(madrugada|manh[ãa]|meio\s*dia|meia\s*noite|tarde|noite)"; - public static readonly string TimeRegex11 = $@"\b({WrittenTimeRegex})({DescRegex}?)\b"; + public static readonly string TimeRegex9 = $@"\b(?{HourNumRegex}\s+({TensTimeRegex}\s*)(e\s+)?{MinuteNumRegex}?)\b"; + public static readonly string TimeRegex11 = $@"\b({WrittenTimeRegex})(\s+{DescRegex})?\b"; public static readonly string TimeRegex12 = $@"(\b{TimePrefix}\s+)?{BaseDateTime.HourRegex}(\s*h\s*){BaseDateTime.MinuteRegex}(\s*{DescRegex})?"; - public const string PrepositionRegex = @"(?([àa]s?|em|por|pelo|pela|no|na|de|d[oa]?)?$)"; + public const string PrepositionRegex = @"(?([àa]s?|em|por|pel[ao]|n[ao]|de|d[ao]?)?$)"; public const string NowRegex = @"\b(?((logo|exatamente)\s+)?agora(\s+mesmo)?|neste\s+momento|(assim\s+que|t[ãa]o\s+cedo\s+quanto)\s+(poss[ií]vel|possas?|possamos)|o\s+mais\s+(cedo|r[aá]pido)\s+poss[íi]vel|recentemente|previamente)\b"; - public const string SuffixRegex = @"^\s*((e|a|em|por|pelo|pela|no|na|de)\s+)?(manh[ãa]|madrugada|meio\s*dia|tarde|noite)\b"; - public const string TimeOfDayRegex = @"\b(?manh[ãa]|madrugada|tarde|noite|((depois\s+do|ap[óo]s\s+o)\s+(almo[çc]o|meio dia|meio-dia)))\b"; + public const string SuffixRegex = @"^\s*((e|a|em|por|pel[ao]|n[ao]|de)\s+)?(manh[ãa]|madrugada|meio\s*dia|tarde|noite)\b"; + public const string TimeOfDayRegex = @"\b(?manh[ãa]|madrugada|tarde|noite|((depois\s+do|ap[óo]s\s+o)\s+(almo[çc]o|meio[ -]dia)))\b"; public static readonly string SpecificTimeOfDayRegex = $@"\b(((((a)?\s+|[nd]?es[st]a|seguinte|pr[oó]xim[oa]|[uú]ltim[oa])\s+)?{TimeOfDayRegex}))\b"; - public static readonly string TimeOfTodayAfterRegex = $@"^\s*(,\s*)?([àa]|em|por|pelo|pela|de|no|na?\s+)?{SpecificTimeOfDayRegex}"; - public static readonly string TimeOfTodayBeforeRegex = $@"({SpecificTimeOfDayRegex}(\s*,)?(\s+(a\s+la(s)?|para))?\s*)"; - public static readonly string SimpleTimeOfTodayAfterRegex = $@"({HourNumRegex}|{BaseDateTime.HourRegex})\s*(,\s*)?((en|de(l)?)?\s+)?{SpecificTimeOfDayRegex}"; - public static readonly string SimpleTimeOfTodayBeforeRegex = $@"({SpecificTimeOfDayRegex}(\s*,)?(\s+(a\s+la|para))?\s*({HourNumRegex}|{BaseDateTime.HourRegex}))"; - public const string SpecificEndOfRegex = @"((no|ao)\s+)?(fi(m|nal)|t[ée]rmin(o|ar))(\s+d?o(\s+dia)?(\s+de)?)?\s*$"; + public static readonly string TimeOfTodayAfterRegex = $@"^\s*(,\s*)?([àa]|em|por|pel[ao]|de|no|na?\s+)?{SpecificTimeOfDayRegex}"; + public static readonly string TimeOfTodayBeforeRegex = $@"({SpecificTimeOfDayRegex}(\s*,)?(\s+([àa]s|para))?\s*)"; + public static readonly string SimpleTimeOfTodayAfterRegex = $@"({HourNumRegex}|{BaseDateTime.HourRegex})\s*(,\s*)?{SpecificTimeOfDayRegex}"; + public static readonly string SimpleTimeOfTodayBeforeRegex = $@"({SpecificTimeOfDayRegex}(\s*,)?(\s+([àa]s|((cerca|perto|ao\s+redor|por\s+volta)\s+(de|das))))?\s*({HourNumRegex}|{BaseDateTime.HourRegex}))"; + public const string SpecificEndOfRegex = @"([na]o\s+)?(fi(m|nal)|t[ée]rmin(o|ar))(\s+d?o(\s+dia)?(\s+de)?)?\s*$"; public const string UnspecificEndOfRegex = @"^[.]"; public const string UnspecificEndOfRangeRegex = @"^[.]"; - public const string UnitRegex = @"(?anos|ano|meses|m[êe]s|semanas|semana|dias|dia|horas|hora|h|hr|hrs|hs|minutos|minuto|mins|min|segundos|segundo|segs|seg)\b"; - public const string ConnectorRegex = @"^(,|t|para [ao]|para as|pras|cerca de|cerca das|perto de|perto das|quase)$"; - public const string TimeHourNumRegex = @"(?vinte e um|vinte e dois|vinte e tr[êe]s|vinte e quatro|zero|um|uma|dois|duas|tr[êe]s|quatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|quatorze|catorze|quinze|dez[ea]sseis|dez[ea]ssete|dezoito|dez[ea]nove|vinte)"; - public static readonly string PureNumFromTo = $@"((desde|de|da|das)\s+(a(s)?\s+)?)?({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}))?\s*{TillRegex}\s*({BaseDateTime.HourRegex}|{TimeHourNumRegex})\s*(?{PmRegex}|{AmRegex}|{DescRegex})?"; - public static readonly string PureNumBetweenAnd = $@"(entre\s+((a|as)?\s+)?)({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}))?\s*e\s*(a(s)?\s+)?({BaseDateTime.HourRegex}|{TimeHourNumRegex})\s*(?{PmRegex}|{AmRegex}|{DescRegex})?"; + public const string UnitRegex = @"(?anos?|meses|m[êe]s|semanas?|dias?|horas?|hrs?|hs?|minutos?|mins?|segundos?|segs?)\b"; + public const string ConnectorRegex = @"^(,|t|para [ao]|para as|pras|(cerca|perto|ao\s+redor|por\s+volta)\s+(de|das)|quase)$"; + public const string TimeHourNumRegex = @"(?vinte( e (um|dois|tr[êe]s|quatro))?|zero|uma?|dois|duas|tr[êe]s|quatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|quatorze|catorze|quinze|dez([ea]sseis|[ea]ssete|oito|[ea]nove))"; + public static readonly string PureNumFromTo = $@"(((desde|de|da|das)\s+(a(s)?\s+)?)?({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}|horas))?\s*{TillRegex}(?{DescRegex}|horas))?\s*{TillRegex})\s*({BaseDateTime.HourRegex}|{TimeHourNumRegex})\s*(?{PmRegex}|{AmRegex}|{DescRegex}|horas)?"; + public static readonly string PureNumBetweenAnd = $@"(entre\s+((a|as)?\s+)?)({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}|horas))?\s*e\s*(a(s)?\s+)?({BaseDateTime.HourRegex}|{TimeHourNumRegex})\s*(?{PmRegex}|{AmRegex}|{DescRegex}|horas)?"; public const string SpecificTimeFromTo = @"^[.]"; public const string SpecificTimeBetweenAnd = @"^[.]"; - public const string TimeUnitRegex = @"(?horas|hora|h|minutos|minuto|mins|min|segundos|segundo|secs|sec)\b"; + public const string TimeUnitRegex = @"(?(hora|minuto|min|segundo|se[cg])(?s)?|h)\b"; public static readonly string TimeFollowedUnit = $@"^\s*{TimeUnitRegex}"; public static readonly string TimeNumberCombinedWithUnit = $@"\b(?\d+(\,\d*)?)\s*{TimeUnitRegex}"; public static readonly string DateTimePeriodNumberCombinedWithUnit = $@"\b(?\d+(\.\d*)?)\s*{TimeUnitRegex}"; @@ -166,19 +186,19 @@ public static class DateTimeDefinitions public const string ConjunctionRegex = @"^[.]"; public const string InexactNumberRegex = @"\b(poucos|pouco|algum|alguns|v[áa]rios)\b"; public static readonly string InexactNumberUnitRegex = $@"\b(poucos|pouco|algum|alguns|v[áa]rios)\s+{UnitRegex}"; - public static readonly string HolidayRegex1 = $@"\b(?sexta-feira santa|sexta-feira da paix[ãa]o|quarta-feira de cinzas|carnaval|dia (de|de los) presidentes?|ano novo chin[eê]s|ano novo|v[ée]spera de ano novo|natal|v[ée]spera de natal|dia de a[cç][ãa]o de gra[çc]as|a[cç][ãa]o de gra[çc]as|yuandan|halloween|dia das bruxas|p[áa]scoa)(\s+(d[eo]?\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|[nd]?es[st][ea]|[uú]ltim[oa]?|em))\s+ano))?\b"; - public static readonly string HolidayRegex2 = $@"\b(?(dia( d[eoa]s?)? )?(martin luther king|todos os santos|trabalho|s[ãa]o (patr[íi]cio|francisco|jorge|jo[ãa]o)|independ[êe]ncia|trabalhador|trabalho))(\s+(d[eo]?\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|[nd]?es[st][ea]|[uú]ltim[oa]?|em))\s+ano))?\b"; - public static readonly string HolidayRegex3 = $@"\b(?(dia( d[eoa]s?)? )(trabalhador|trabalhadores|trabalho|m[ãa]es?|pais?|mulher(es)?|crian[çc]as?|marmota|professor|professores))(\s+(d[eo]?\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|[nd]?es[st][ea]|[uú]ltim[oa]?|em))\s+ano))?\b"; - public const string BeforeRegex = @"(antes(\s+(de|dos?|das?)?)?)"; - public const string AfterRegex = @"((depois|ap[óo]s)(\s*(de|d?os?|d?as?)?)?)"; + public static readonly string HolidayRegex1 = $@"\b(?sexta-feira santa|sexta-feira da paix[ãa]o|quarta-feira de cinzas|carnaval|dia dos? presidentes?|ano novo chin[eê]s|ano novo|v[ée]spera de ano novo|natal|v[ée]spera de natal|dia de a[cç][ãa]o de gra[çc]as|a[cç][ãa]o de gra[çc]as|yuandan|halloween|dia das bruxas|p[áa]scoa)(\s+(d[eo]?\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|[nd]?es[st][ea]|[uú]ltim[oa]?|em))\s+ano))?\b"; + public static readonly string HolidayRegex2 = $@"\b(?(dia\s+(d[eoa]s?\s+)?)?(martin luther king|todos os santos|s[ãa]o (patr[íi]cio|francisco|jorge|jo[ãa]o)|independ[êe]ncia))(\s+(d[eo]?\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|[nd]?es[st][ea]|[uú]ltim[oa]?|em))\s+ano))?\b"; + public static readonly string HolidayRegex3 = $@"\b(?(dia\s+d[eoa]s?\s+)(trabalh(o|ador(es)?)|m[ãa]es?|pais?|mulher(es)?|crian[çc]as?|marmota|professor(es)?))(\s+(d[eo]?\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|[nd]?es[st][ea]|[uú]ltim[oa]?|em))\s+ano))?\b"; + public const string BeforeRegex = @"(antes(\s+(d(e\s+)?[aeo]s?)?)?|at[ée]h?(\s+[oàa]s?\b)?)"; + public const string AfterRegex = @"((depois|ap[óo]s|a\s+partir)(\s*(de|d?[oa]s?)?)?)"; public const string SinceRegex = @"(desde(\s+(as?|o))?)"; - public const string AroundRegex = @"^[.]"; - public const string PeriodicRegex = @"\b(?di[áa]ri[ao]|diariamente|mensalmente|semanalmente|quinzenalmente|anualmente)\b"; + public const string AroundRegex = @"(?:\b(?:cerca|perto|ao\s+redor|por\s+volta)\s*?\b)(\s+(de|das))?"; + public const string PeriodicRegex = @"\b(?di[áa]ri[ao]|(diaria|mensal|semanal|quinzenal|(bi|tri|se)mestral|anual)(mente)?)\b"; public const string EachExpression = @"cada|tod[oa]s?\s*([oa]s)?"; public static readonly string EachUnitRegex = $@"(?({EachExpression})\s*{UnitRegex})"; public static readonly string EachPrefixRegex = $@"(?({EachExpression})\s*$)"; public static readonly string EachDayRegex = $@"\s*({EachExpression})\s*dias\s*\b"; - public static readonly string BeforeEachDayRegex = $@"({EachExpression})\s*dias(\s+(as|ao))?\s*\b"; + public static readonly string BeforeEachDayRegex = $@"({EachExpression})\s*dias(\s+a[so])?\s*\b"; public static readonly string SetEachRegex = $@"(?({EachExpression})\s*)"; public const string LaterEarlyPeriodRegex = @"^[.]"; public const string WeekWithWeekDayRangeRegex = @"^[.]"; @@ -186,13 +206,14 @@ public static class DateTimeDefinitions public const string MiddlePauseRegex = @"^[.]"; public const string PrefixArticleRegex = @"^[\.]"; public const string OrRegex = @"^[.]"; - public const string YearPlusNumberRegex = @"^[.]"; - public const string NumberAsTimeRegex = @"^[.]"; + public static readonly string SpecialYearTermsRegex = $@"\b(({SpecialYearPrefixes}\s+anos?\s+|anos?\s+({SpecialYearPrefixes}\s+)?)(d[oe]\s+)?)"; + public static readonly string YearPlusNumberRegex = $@"\b({SpecialYearTermsRegex}((?(\d{{2,4}}))|{FullTextYearRegex}))\b"; + public static readonly string NumberAsTimeRegex = $@"\b({WrittenTimeRegex}|({TimeHourNumRegex}|{BaseDateTime.HourRegex})(?\s*horas)?)\b"; public const string TimeBeforeAfterRegex = @"^[.]"; public const string DateNumberConnectorRegex = @"^[.]"; public const string ComplexDatePeriodRegex = @"^[.]"; - public const string AgoRegex = @"\b(antes|atr[áa]s|no passado)\b"; - public const string LaterRegex = @"\b(depois d[eoa]s?|ap[óo]s (as)?|desde (as|o)|desde|no futuro|mais tarde)\b"; + public const string AgoRegex = @"\b(antes(\s+d[eoa]s?\s+(?hoje|ontem|manhã))?|atr[áa]s|no passado)\b"; + public const string LaterRegex = @"\b(depois(\s+d[eoa]s?\s+(agora|(?hoje|ontem|manhã)))?|ap[óo]s (as)?|desde( (as|o))?|no futuro|mais tarde)\b"; public const string Tomorrow = @"amanh[ãa]"; public static readonly Dictionary UnitMap = new Dictionary { @@ -246,7 +267,9 @@ public static class DateTimeDefinitions }; public static readonly Dictionary SpecialYearPrefixesMap = new Dictionary { - { @"", @"" } + { @"fiscal", @"FY" }, + { @"escolar", @"SY" }, + { @"letivo", @"SY" } }; public static readonly Dictionary SeasonMap = new Dictionary { @@ -286,7 +309,43 @@ public static class DateTimeDefinitions { @"quinto", 5 }, { @"quinta", 5 }, { @"5o", 5 }, - { @"5a", 5 } + { @"5a", 5 }, + { @"sexto", 6 }, + { @"sexta", 6 }, + { @"6o", 6 }, + { @"6a", 6 }, + { @"setimo", 7 }, + { @"sétimo", 7 }, + { @"setima", 7 }, + { @"sétima", 7 }, + { @"7o", 7 }, + { @"7a", 7 }, + { @"oitavo", 8 }, + { @"oitava", 8 }, + { @"8o", 8 }, + { @"8a", 8 }, + { @"nono", 9 }, + { @"nona", 9 }, + { @"9o", 9 }, + { @"9a", 9 }, + { @"decimo", 10 }, + { @"décimo", 10 }, + { @"decima", 10 }, + { @"décima", 10 }, + { @"10o", 10 }, + { @"10a", 10 }, + { @"decimo primeiro", 11 }, + { @"décimo primeiro", 11 }, + { @"decima primeira", 11 }, + { @"décima primeira", 11 }, + { @"11o", 11 }, + { @"11a", 11 }, + { @"decimo segundo", 12 }, + { @"décimo segundo", 12 }, + { @"decima segunda", 12 }, + { @"décima segunda", 12 }, + { @"12o", 12 }, + { @"12a", 12 } }; public static readonly Dictionary DayOfWeek = new Dictionary { @@ -446,7 +505,10 @@ public static class DateTimeDefinitions { @"vinte e sete", 27 }, { @"vinte e oito", 28 }, { @"vinte e nove", 29 }, - { @"trinta", 30 } + { @"trinta", 30 }, + { @"trinta e um", 31 }, + { @"quarenta", 40 }, + { @"cinquenta", 50 } }; public static readonly Dictionary> HolidayNames = new Dictionary> { @@ -470,7 +532,6 @@ public static class DateTimeDefinitions { @"pai", @"-06-WXX-7-3" }, { @"mae", @"-05-WXX-7-2" }, { @"acaodegracas", @"-11-WXX-4-4" }, - { @"trabalho", @"-05-WXX-1-1" }, { @"memoria", @"-03-WXX-2-4" } }; public static readonly Dictionary DoubleNumbers = new Dictionary @@ -482,14 +543,13 @@ public static class DateTimeDefinitions public const string TimeTokenPrefix = @"as "; public const string TokenBeforeDate = @"o "; public const string TokenBeforeTime = @"as "; - public const string UpcomingPrefixRegex = @".^"; - public static readonly string NextPrefixRegex = $@"(pr[oó]xim[oa]|seguinte|{UpcomingPrefixRegex})\b"; public const string PastPrefixRegex = @".^"; - public static readonly string PreviousPrefixRegex = $@"([uú]ltim[oa]|{PastPrefixRegex})\b"; + public static readonly string PreviousPrefixRegex = $@"([uú]ltim[oa]s?|passad[oa]s?|{PastPrefixRegex})\b"; public const string ThisPrefixRegex = @"([nd]?es[st][ea])\b"; public const string RelativeDayRegex = @"^[\.]"; public const string RestOfDateRegex = @"^[\.]"; - public const string RelativeDurationUnitRegex = @"^[\.]"; + public static readonly string DurationUnitRegex = $@"(?{DateUnitRegex}|{TimeUnitRegex}|noites?)\b"; + public static readonly string RelativeDurationUnitRegex = $@"(?:(?<=({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+)({DurationUnitRegex}))"; public const string ReferenceDatePeriodRegex = @"^[.]"; public const string FromToRegex = @"\b(from).+(to)\b.+"; public const string SingleAmbiguousMonthRegex = @"^(the\s+)?(may|march)$"; @@ -499,16 +559,18 @@ public static class DateTimeDefinitions public const string SetWeekDayRegex = @"^[\.]"; public const string NightRegex = @"\b(meia noite|noite|de noite)\b"; public const string CommonDatePrefixRegex = @"\b(dia)\s+$"; - public const string DurationUnitRegex = @"^[\.]"; public const string DurationConnectorRegex = @"^[.]"; public const string CenturyRegex = @"^[.]"; public const string DecadeRegex = @"^[.]"; public const string DecadeWithCenturyRegex = @"^[.]"; - public const string RelativeDecadeRegex = @"^[.]"; - public static readonly string YearSuffix = $@"(,?\s*({YearRegex}|{FullTextYearRegex}))"; - public const string SuffixAfterRegex = @"^[.]"; - public const string YearPeriodRegex = @"^[.]"; - public const string FutureSuffixRegex = @"^[.]"; + public static readonly string RelativeDecadeRegex = $@"\b((n?as?\s+)?{RelativeRegex}\s+((?[\w,]+)\s+)?(d[eé]cada)s?)\b"; + public static readonly string YearSuffix = $@"((,|\sde)?\s*({YearRegex}|{FullTextYearRegex}))"; + public const string SuffixAfterRegex = @"^\b$"; + public static readonly string YearPeriodRegex = $@"((((de(sde)?(\s*a(s)?)?)\s+)?{YearRegex}\s*({TillRegex})\s*{YearRegex})|(((entre\s*([oa](s)?)?)\s+){YearRegex}\s*({RangeConnectorRegex})\s*{YearRegex}))"; + public const string FutureSuffixRegex = @"\b(seguinte(s)?|pr[oó]xim[oa](s)?|no\s+futuro)\b"; + public const string PastSuffixRegex = @"^\b$"; + public static readonly string ModPrefixRegex = $@"\b({RelativeRegex}|{AroundRegex}|{BeforeRegex}|{AfterRegex}|{SinceRegex})\b"; + public static readonly string ModSuffixRegex = $@"\b({AgoRegex}|{LaterRegex}|{BeforeAfterRegex}|{FutureSuffixRegex}|{PastSuffixRegex})\b"; public static readonly Dictionary WrittenDecades = new Dictionary { { @"", 0 } @@ -521,7 +583,14 @@ public static class DateTimeDefinitions public static readonly string[] DurationDateRestrictions = { }; public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { - { @"null", @"null" } + { @"^\d{4}$", @"(\d\.\d{4}|\d{4}\.\d)" }, + { @"^(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)$", @"([$%£&!?@#])(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)|(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)([$%£&@#])" }, + { @"^\d{1,4}-\d{1,4}$", @"\d{1,4}-\d{1,4}-\d|\d-\d{1,4}-\d{1,4}" }, + { @"^\d{1,4}-\d{1,4}-\d{1,4}$", @"\d{1,4}-\d{1,4}-\d{1,4}-\d|\d-\d{1,4}-\d{1,4}-\d{1,4}" } + }; + public static readonly Dictionary AmbiguityTimeFiltersDict = new Dictionary + { + { @"horas?$", @"\b((por|duração\s+de|durante)\s+(\S+\s+){1,2}horas?|horas?\s+(\S+\s+){0,2}dur(ação|ou|a(rá|va)?))\b" } }; public static readonly IList EarlyMorningTermList = new List { @@ -594,6 +663,10 @@ public static class DateTimeDefinitions { @"semana" }; + public static readonly IList FortnightTerms = new List + { + @"quinzena" + }; public static readonly IList YearTerms = new List { @"ano", @@ -622,5 +695,20 @@ public static class DateTimeDefinitions { 'õ', 'o' }, { 'ç', 'c' } }; + public const string DayTypeRegex = @"(diari([ao]|amente))$"; + public const string WeekTypeRegex = @"(semanal(mente)?)$"; + public const string BiWeekTypeRegex = @"(quinzenal(mente)?)$"; + public const string MonthTypeRegex = @"(mensal(mente)?)$"; + public const string BiMonthTypeRegex = @"(bimestral(mente)?)$"; + public const string QuarterTypeRegex = @"(trimestral(mente)?)$"; + public const string SemiAnnualTypeRegex = @"(semestral(mente)?)$"; + public const string YearTypeRegex = @"(anual(mente)?)$"; + public static readonly IList ThisTerms = new List + { + @"esse", + @"essa", + @"este", + @"esta" + }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/NumbersDefinitions.cs index 90604f28b7..a9aba931a0 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/NumbersDefinitions.cs @@ -25,9 +25,10 @@ public static class NumbersDefinitions public const bool CompoundNumberLanguage = false; public const bool MultiDecimalSeparatorCulture = false; public const string HundredsNumberIntegerRegex = @"(quatrocent[ao]s|trezent[ao]s|seiscent[ao]s|setecent[ao]s|oitocent[ao]s|novecent[ao]s|duzent[ao]s|quinhent[ao]s|cem|(?pr[oó]xim[ao]s?|[uú]ltim[ao]\s+mas\s+um|anterior\s+ao\s+últim[ao]|(pen)?[uú]ltim[ao]s?|antepen[uú]ltim[ao]s?|seguintes?|anterior(es)?|atua(l|is))"; public static readonly string ComplexOrdinalRegex = $@"(({OverThousandOrdinalRegex}(\s)?)?{UnderThousandOrdinalRegex}|{OverThousandOrdinalRegex})"; public static readonly string SuffixRoundOrdinalRegex = $@"(({AllIntRegex})({SimpleRoundOrdinalRegex}))"; public static readonly string ComplexRoundOrdinalRegex = $@"((({SuffixRoundOrdinalRegex}(\s)?)?{ComplexOrdinalRegex})|{SuffixRoundOrdinalRegex})"; - public static readonly string AllOrdinalRegex = $@"{ComplexOrdinalRegex}|{SimpleRoundOrdinalRegex}|{ComplexRoundOrdinalRegex}"; - public const string OrdinalSuffixRegex = @"(?<=\b)(\d*(1[oaº]|2[oaº]|3[oaº]|4[oaº]|5[oaº]|6[oaº]|7[oaº]|8[oaº]|9[oaº]|0[oaº]|1.º|2.º|3.º|4.º|5.º|6.º|7.º|8.º|9.º))(?=\b)"; + public static readonly string AllOrdinalNumberRegex = $@"{ComplexOrdinalRegex}|{SimpleRoundOrdinalRegex}|{ComplexRoundOrdinalRegex}"; + public static readonly string AllOrdinalRegex = $@"(?:{AllOrdinalNumberRegex}|{RelativeOrdinalRegex})"; + public const string OrdinalSuffixRegex = @"(?<=\b)(\d*((1|2|3|4|5|6|7|8|9|0)[oaºª]|(1|2|3|4|5|6|7|8|9)(\.[ºª])))(?=\b)"; public static readonly string OrdinalEnglishRegex = $@"(?<=\b){AllOrdinalRegex}(?=\b)"; - public const string FractionNotationRegex = @"(((?<=\W|^)-\s*)|(?<=\b))\d+[/]\d+(?=(\b[^/]|$))"; + public static readonly string FractionNotationRegex = $@"{BaseNumbers.FractionNotationRegex}"; public const string FractionNotationWithSpacesRegex = @"(((?<=\W|^)-\s*)|(?<=\b))\d+\s+\d+[/]\d+(?=(\b[^/]|$))"; - public static readonly string FractionNounRegex = $@"(?<=\b)({AllIntRegex}\s+((e|com)\s+)?)?({AllIntRegex})(\s+((e|com)\s)?)((({AllOrdinalRegex})s?|({SpecialFractionInteger})|({SuffixRoundOrdinalRegex})s?)|mei[oa]?|ter[çc]o?)(?=\b)"; - public static readonly string FractionNounWithArticleRegex = $@"(?<=\b)({AllIntRegex}\s+(e\s+)?)?(um|um[as])(\s+)(({AllOrdinalRegex})|({SuffixRoundOrdinalRegex})|(e\s+)?mei[oa]?)(?=\b)"; + public static readonly string FractionMultiplierRegex = $@"(?\s+(e|com)\s+(meio|(um|{TwoToNineIntegerRegex})\s+(meio|terç[oa]|quart[oa]|quint[oa]|sext[oa]|s[eé]tim[oa]|oitav[oa]|non[oa]|d[eé]cim[oa])s?))"; + public static readonly string RoundMultiplierWithFraction = $@"(?(?:(mil(h([ãa]o|[õo]es))|bilh([ãa]o|[õo]es)|trilh([ãa]o|[õo]es)|qua[td]rilh([ãa]o|[õo]es)|quintilh([ãa]o|[õo]es))))(?={FractionMultiplierRegex}?$)"; + public static readonly string RoundMultiplierRegex = $@"\b\s*({RoundMultiplierWithFraction}|(?(mil))$)"; + public static readonly string FractionNounRegex = $@"(?<=\b)({AllIntRegex}\s+((e|com)\s+)?)?(({AllIntRegex})(\s+((e|com)\s)?)((({AllOrdinalNumberRegex})s?|({SpecialFractionInteger})|({SuffixRoundOrdinalRegex})s?)|mei[oa]?|ter[çc]o?)|(meio|um\s+quarto\s+de)\s+{RoundNumberIntegerRegex})(?=\b)"; + public static readonly string FractionNounWithArticleRegex = $@"(?<=\b)(({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\s+(e\s+)?)?((um|um[as])(\s+)(({AllOrdinalNumberRegex})|({SuffixRoundOrdinalRegex})|(e\s+)?mei[oa]?)|mei[oa]?)(?=\b)"; public static readonly string FractionPrepositionRegex = $@"(?({AllIntRegex})|((?({AllIntRegex})|((\d+)(?!\.)))(?=\b)"; - public static readonly string AllFloatRegex = $@"{AllIntRegex}(\s+(vírgula|virgula|e|ponto)){AllPointRegex}"; + public static readonly string AllFloatRegex = $@"(?)"; + public const string LessRegex = @"(mais\s+baix[oa]\s+que|(meno(s|r(es)?)|inferior(es)?|abaixo)(\s+(que|de|a)|(?=\s+ou\b))|(?|=)<)"; + public const string EqualRegex = @"((igua(l|is)|equivalente(s)?|equivale(ndo)?)(\s+(ao?|que|d[eao]))?|(?)=)"; + public static readonly string MoreOrEqualPrefix = $@"((n[ãa]o\s+{LessRegex})|((pelo|ao)\s+menos|(como(\s+o)?|no)\s+m[íi]nimo))"; + public static readonly string MoreOrEqual = $@"(({MoreRegex}\s+(ou)?\s+{EqualRegex})|({EqualRegex}\s+(ou|e)\s+{MoreRegex})|{MoreOrEqualPrefix}(\s+(ou)\s+{EqualRegex})?|({EqualRegex}\s+(ou)\s+)?{MoreOrEqualPrefix}|>\s*=)"; + public const string MoreOrEqualSuffix = @"((\b(e|ou)\b\s+(mais|maior(es)?|superior(es)?)((?!\s+(alt[oa]|baix[oa]|que|d[eao]|ao?))|(\s+(que|d[eao]|ao?)(?!(\s*\d+)))))|(como(\s+o)?|no)\s+m[íi]nimo|(pelo|ao)\s+menos)\b"; + public static readonly string LessOrEqualPrefix = $@"((n[ãa]o\s+{MoreRegex})|((como(\s+o)?|no)\s+m[aá]ximo))"; + public static readonly string LessOrEqual = $@"(({LessRegex}\s+(ou)?\s+{EqualRegex})|({EqualRegex}\s+(ou)?\s+{LessRegex})|{LessOrEqualPrefix}(\s+(ou)?\s+{EqualRegex})?|({EqualRegex}\s+(ou)?\s+)?{LessOrEqualPrefix}|<\s*=)"; + public const string LessOrEqualSuffix = @"((\b(e|ou)\b\s+(meno(s|r(es)?|inferior(es)?))((?!\s+(alt[oa]|baix[oa]|que|d[eao]|ao?))|(\s+(que|d[eao]|ao?)(?!(\s*\d+)))))|(como(\s+o)?|no)\s+m[áa]ximo)\b"; + public static readonly string NumberSplitMark = $@"(?![,.](?!\d+))(?!\s*\b(((e)\s+)?({LessRegex}|{MoreRegex}|{EqualRegex}|n[ãa]o|d[eao])|mas|[ao])\b)"; + public const string MoreRegexNoNumberSucceed = @"(\b(mais|maior(es)?|superior(es)?)((?!\s+(que|d[eao]|ao?))|\s+((que|d[eao])(?!(\s*\d+))))|((por\s+|a)cima)(?!(\s*\d+)))\b"; + public const string LessRegexNoNumberSucceed = @"(\b(meno(s|r(es)?)|inferior(es)?)((?!\s+(que|d[eao]|ao?))|\s+((que|d[eao]|ao?)(?!(\s*\d+))))|((por\s+|a)baixo)(?!(\s*\d+)))\b"; + public const string EqualRegexNoNumberSucceed = @"(\b(igua(l|is)|equivalentes?|equivale(ndo)?)((?!\s+(ao?|que|d[eao]))|(\s+(ao?|que|d[eao])(?!(\s*\d+)))))\b"; + public static readonly string OneNumberRangeMoreRegex1 = $@"({MoreOrEqual}|{MoreRegex})\s*(([ao]s?)\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeMoreRegex1LB = $@"(?({NumberSplitMark}.)+)\s*{MoreOrEqualSuffix}"; + public static readonly string OneNumberRangeMoreSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+ou\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\s+(?({NumberSplitMark}.)+)(\s+ou\s+){EqualRegexNoNumberSucceed})"; + public static readonly string OneNumberRangeLessRegex1 = $@"({LessOrEqual}|{LessRegex})\s*([ao]s?\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeLessRegex1LB = $@"(?({NumberSplitMark}.)+)\s*{LessOrEqualSuffix}"; + public static readonly string OneNumberRangeLessSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+ou\s+){LessRegexNoNumberSucceed})|({LessRegex}\s+(?({NumberSplitMark}.)+)(\s+ou\s+){EqualRegexNoNumberSucceed})"; + public static readonly string OneNumberRangeEqualRegex = $@"{EqualRegex}\s*([ao]s?\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string TwoNumberRangeRegex1 = $@"\bentre\s*([ao]s?\s+)?(?({NumberSplitMark}.)+)\s*e\s*([ao]s?\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\s*(\be\b|mas|,)\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"; + public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\s*(\be\b|mas|,)\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})"; + public static readonly string TwoNumberRangeRegex4 = $@"(\bde(sde)?\s+)?(\b[ao]s?\s+)?\b(?!\s+)(?({NumberSplitMark}(?!\b(entre|de(sde)?|es)\b).)+)\b\s*{TillRegex}\s*([ao]s?\s+)?\b(?!\s+)(?({NumberSplitMark}.)+)\b"; + public const string AmbiguousFractionConnectorsRegex = @"(\b(em|de)\b)"; public const char DecimalSeparatorChar = ','; public const string FractionMarkerToken = @"sobre"; public const char NonDecimalSeparatorChar = '.'; @@ -87,9 +120,10 @@ public static class NumbersDefinitions public static readonly string[] WrittenIntegerSeparatorTexts = { @"e" }; public static readonly string[] WrittenFractionSeparatorTexts = { @"com" }; public static readonly string[] WrittenFractionSuffix = { @"avo", @"ava" }; + public static readonly string[] OneHalfTokens = { @"um", @"meio" }; public const char PluralSuffix = 's'; public const string HalfADozenRegex = @"meia\s+d[uú]zia"; - public static readonly string DigitalNumberRegex = $@"((?<=\b)(mil|cem|milh[oõ]es|milh[aã]o|bilh[oõ]es|bilh[aã]o|trilh[oõ]es|trilh[aã]o|milhares|centena|centenas|dezena|dezenas?)(?=\b))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; + public static readonly string DigitalNumberRegex = $@"((?<=\b)(mil(hares)?|ce(m|ntenas?)|[bmt]ilh([aã]o|[oõ]es)|dezenas?)(?=\b))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; public static readonly Dictionary CardinalNumberMap = new Dictionary { { @"zero", 0 }, @@ -177,6 +211,8 @@ public static class NumbersDefinitions { @"segunda", 2 }, { @"terceiro", 3 }, { @"terceira", 3 }, + { @"terço", 3 }, + { @"terça", 3 }, { @"quarto", 4 }, { @"quarta", 4 }, { @"quinto", 5 }, @@ -259,6 +295,10 @@ public static class NumbersDefinitions { @"quadringentesimo", 400 }, { @"quadringentésima", 400 }, { @"quadringentesima", 400 }, + { @"quadrigentésimo", 400 }, + { @"quadrigentesimo", 400 }, + { @"quadrigentésima", 400 }, + { @"quadrigentesima", 400 }, { @"quingentésimo", 500 }, { @"quingentesimo", 500 }, { @"quingentésima", 500 }, @@ -307,6 +347,7 @@ public static class NumbersDefinitions public static readonly Dictionary PrefixCardinalMap = new Dictionary { { @"hum", 1 }, + { @"um", 1 }, { @"dois", 2 }, { @"tres", 3 }, { @"três", 3 }, @@ -396,11 +437,97 @@ public static class NumbersDefinitions }; public static readonly Dictionary RelativeReferenceOffsetMap = new Dictionary { - { @"", @"" } + { @"proxima", @"1" }, + { @"proximo", @"1" }, + { @"próxima", @"1" }, + { @"próximo", @"1" }, + { @"proximas", @"1" }, + { @"proximos", @"1" }, + { @"próximas", @"1" }, + { @"próximos", @"1" }, + { @"ultima", @"0" }, + { @"ultimo", @"0" }, + { @"última", @"0" }, + { @"último", @"0" }, + { @"ultimas", @"0" }, + { @"ultimos", @"0" }, + { @"últimas", @"0" }, + { @"últimos", @"0" }, + { @"penultima", @"-1" }, + { @"penultimo", @"-1" }, + { @"penúltima", @"-1" }, + { @"penúltimo", @"-1" }, + { @"penultimas", @"-1" }, + { @"penultimos", @"-1" }, + { @"penúltimas", @"-1" }, + { @"penúltimos", @"-1" }, + { @"ultima mas um", @"-1" }, + { @"ultimo mas um", @"-1" }, + { @"última mas um", @"-1" }, + { @"último mas um", @"-1" }, + { @"anterior ao último", @"-1" }, + { @"anterior ao última", @"-1" }, + { @"antepenultima", @"-2" }, + { @"antepenultimo", @"-2" }, + { @"antepenúltima", @"-2" }, + { @"antepenúltimo", @"-2" }, + { @"antepenultimas", @"-2" }, + { @"antepenultimos", @"-2" }, + { @"antepenúltimas", @"-2" }, + { @"antepenúltimos", @"-2" }, + { @"seguinte", @"1" }, + { @"seguintes", @"1" }, + { @"anterior", @"-1" }, + { @"anteriores", @"-1" }, + { @"atual", @"0" }, + { @"atuais", @"0" } }; public static readonly Dictionary RelativeReferenceRelativeToMap = new Dictionary { - { @"", @"" } + { @"proxima", @"current" }, + { @"proximo", @"current" }, + { @"próxima", @"current" }, + { @"próximo", @"current" }, + { @"proximas", @"current" }, + { @"proximos", @"current" }, + { @"próximas", @"current" }, + { @"próximos", @"current" }, + { @"ultima", @"end" }, + { @"ultimo", @"end" }, + { @"última", @"end" }, + { @"último", @"end" }, + { @"ultimas", @"end" }, + { @"ultimos", @"end" }, + { @"últimas", @"end" }, + { @"últimos", @"end" }, + { @"penultima", @"end" }, + { @"penultimo", @"end" }, + { @"penúltima", @"end" }, + { @"penúltimo", @"end" }, + { @"penultimas", @"end" }, + { @"penultimos", @"end" }, + { @"penúltimas", @"end" }, + { @"penúltimos", @"end" }, + { @"ultima mas um", @"end" }, + { @"ultimo mas um", @"end" }, + { @"última mas um", @"end" }, + { @"último mas um", @"end" }, + { @"anterior ao último", @"end" }, + { @"anterior ao última", @"end" }, + { @"antepenultima", @"end" }, + { @"antepenultimo", @"end" }, + { @"antepenúltima", @"end" }, + { @"antepenúltimo", @"end" }, + { @"antepenultimas", @"end" }, + { @"antepenultimos", @"end" }, + { @"antepenúltimas", @"end" }, + { @"antepenúltimos", @"end" }, + { @"seguinte", @"current" }, + { @"seguintes", @"current" }, + { @"anterior", @"current" }, + { @"anteriores", @"current" }, + { @"atual", @"current" }, + { @"atuais", @"current" } }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/NumbersWithUnitDefinitions.cs index b3abf00c56..42c6eeca64 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/NumbersWithUnitDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/NumbersWithUnitDefinitions.cs @@ -208,7 +208,7 @@ public static class NumbersWithUnitDefinitions { @"Coroa feroesa", @"coroa feroesa|coroas feroesas|fkr" }, { @"Libra das Malvinas", @"libra das malvinas|libras das malvinas|fk£|fkp" }, { @"Dólar das Ilhas Salomão", @"dólar das ilhas salomão|dolar das ilhas salomao|dólares das ilhas salomão|dolares das ilhas salomao|sbd" }, - { @"Novo shekel israelense", @"novo shekel|novos shekeles|novo shequel|novo siclo|novo xéquel|shekeles novos|novos sheqalim|sheqalim novos|ils" }, + { @"Novo shekel israelense", @"novo shekel|novos shekeles|novo shequel|novo siclo|novo xéquel|shekeles novos|novos sheqalim|sheqalim novos|ils|₪" }, { @"Agora", @"agora|agorot" }, { @"Dólar jamaicano", @"dólar jamaicano|dolar jamaicano|dólares jamaicanos|dolares jamaicanos|j$|ja$|jmd" }, { @"Yen", @"yen|iene|yenes|ienes|jpy" }, @@ -361,12 +361,282 @@ public static class NumbersWithUnitDefinitions { @"Franco djibutiano", @"franco djibutiano|francos djibutianos|franco jibutiano|francos jibutianos|djf" }, { @"Dinar iugoslavo", @"dinar iugoslavo|dinares iugoslavos|dinar jugoslavo|dinares jugoslavos|yud" }, { @"Kwacha zambiano", @"kwacha zambiano|kwacha zambianos|kwachas zambianos|zmw" }, - { @"Ngwee zambiano", @"ngwee zambiano|ngwee zambianos|ngwees zambianos" } + { @"Ngwee zambiano", @"ngwee zambiano|ngwee zambianos|ngwees zambianos" }, + { @"Bitcoin", @"bitcoin|bitcoins|btc|xbt|₿" }, + { @"Millibitcoin", @"millibitcoin|millibitcoins|milibitcoin|milibitcoins" }, + { @"Satoshi", @"satoshi|satoshis" } }; - public const string CompoundUnitConnectorRegex = @"(?[^.])"; + public static readonly Dictionary CurrencyNameToIsoCodeMap = new Dictionary + { + { @"Afegani afegão", @"AFN" }, + { @"Euro", @"EUR" }, + { @"Lek albanês", @"ALL" }, + { @"Kwanza angolano", @"AOA" }, + { @"Dram armênio", @"AMD" }, + { @"Florim arubano", @"AWG" }, + { @"Taka bengali", @"BDT" }, + { @"Ngultrum butanês", @"BTN" }, + { @"Boliviano", @"BOB" }, + { @"Marco da Bósnia e Herzegovina", @"BAM" }, + { @"Pula", @"BWP" }, + { @"Real brasileiro", @"BRL" }, + { @"Lev búlgaro", @"BGN" }, + { @"Riel cambojano", @"KHR" }, + { @"Escudo cabo-verdiano", @"CVE" }, + { @"Colón costarriquenho", @"CRC" }, + { @"Kuna croata", @"HRK" }, + { @"Coroa checa", @"CZK" }, + { @"Nakfa", @"ERN" }, + { @"Birr etíope", @"ETB" }, + { @"Dalasi gambiano", @"GMD" }, + { @"Lari georgiano", @"GEL" }, + { @"Cedi", @"GHS" }, + { @"Quetzal guatemalteco", @"GTQ" }, + { @"Gurde haitiano", @"HTG" }, + { @"Lempira hondurenha", @"HNL" }, + { @"Florim húngaro", @"HUF" }, + { @"Rial iraniano", @"IRR" }, + { @"Rial iemenita", @"YER" }, + { @"Novo shekel israelense", @"ILS" }, + { @"Yen", @"JPY" }, + { @"Tengue cazaque", @"KZT" }, + { @"Xelim queniano", @"KES" }, + { @"Won norte-coreano", @"KPW" }, + { @"Won sul-coreano", @"KRW" }, + { @"Som quirguiz", @"KGS" }, + { @"Quipe laosiano", @"LAK" }, + { @"Loti do Lesoto", @"LSL" }, + { @"Rand sul-africano", @"ZAR" }, + { @"Pataca macaense", @"MOP" }, + { @"Dinar macedônio", @"MKD" }, + { @"Ariary malgaxe", @"MGA" }, + { @"Kwacha do Malawi", @"MWK" }, + { @"Ringuite malaio", @"MYR" }, + { @"Uguia", @"MRO" }, + { @"Tugrik mongol", @"MNT" }, + { @"Metical moçambicao", @"MZN" }, + { @"Quiate mianmarense", @"MMK" }, + { @"Córdova nicaraguense", @"NIO" }, + { @"Naira", @"NGN" }, + { @"Lira turca", @"TRY" }, + { @"Rial omanense", @"OMR" }, + { @"Balboa panamenho", @"PAB" }, + { @"Kina", @"PGK" }, + { @"Guarani", @"PYG" }, + { @"Novo Sol", @"PEN" }, + { @"Złoty", @"PLN" }, + { @"Rial catariano", @"QAR" }, + { @"Rial saudita", @"SAR" }, + { @"Tala samoano", @"WST" }, + { @"São Tomé and Príncipe dobra", @"STN" }, + { @"Leone serra-leonino", @"SLL" }, + { @"Lilangeni", @"SZL" }, + { @"Somoni tajique", @"TJS" }, + { @"Baht tailandês", @"THB" }, + { @"Grívnia", @"UAH" }, + { @"Vatu", @"VUV" }, + { @"Bolívar forte venezuelano", @"VEF" }, + { @"Kwacha zambiano", @"ZMW" }, + { @"Dirame marroquino", @"MAD" }, + { @"Dirham dos Emirados Árabes Unidos", @"AED" }, + { @"Manat azeri", @"AZN" }, + { @"Manat turcomeno", @"TMT" }, + { @"Xelim somali", @"SOS" }, + { @"Xelim tanzaniano", @"TZS" }, + { @"Xelim ugandês", @"UGX" }, + { @"Leu romeno", @"RON" }, + { @"Leu moldávio", @"MDL" }, + { @"Rupia nepalesa", @"NPR" }, + { @"Rupia paquistanesa", @"PKR" }, + { @"Rupia indiana", @"INR" }, + { @"Rupia seichelense", @"SCR" }, + { @"Rupia maurícia", @"MUR" }, + { @"Rupia maldiva", @"MVR" }, + { @"Rupia do Sri Lanka", @"LKR" }, + { @"Rupia indonésia", @"IDR" }, + { @"Coroa dinamarquesa", @"DKK" }, + { @"Coroa norueguesa", @"NOK" }, + { @"Coroa islandesa", @"ISK" }, + { @"Coroa sueca", @"SEK" }, + { @"Franco CFA da África Ocidental", @"XOF" }, + { @"Franco CFA da África Central", @"XAF" }, + { @"Franco comorense", @"KMF" }, + { @"Franco congolês", @"CDF" }, + { @"Burundian franc", @"BIF" }, + { @"Franco djibutiano", @"DJF" }, + { @"Franco CFP", @"XPF" }, + { @"Franco da Guiné", @"GNF" }, + { @"Franco suíço", @"CHF" }, + { @"Franco ruandês", @"RWF" }, + { @"Rublo russo", @"RUB" }, + { @"Transnistrian ruble", @"PRB" }, + { @"New Belarusian ruble", @"BYN" }, + { @"Dinar argelino", @"DZD" }, + { @"Dinar bareinita", @"BHD" }, + { @"Dinar iraquiano", @"IQD" }, + { @"Dinar jordaniano", @"JOD" }, + { @"Dinar kuwaitiano", @"KWD" }, + { @"Dinar libio", @"LYD" }, + { @"Dinar sérvio", @"RSD" }, + { @"Dinar tunisiano", @"TND" }, + { @"Peso argentino", @"ARS" }, + { @"Chilean peso", @"CLP" }, + { @"Peso colombiano", @"COP" }, + { @"Peso cubano convertível", @"CUC" }, + { @"Peso cubano", @"CUP" }, + { @"Peso dominicano", @"DOP" }, + { @"Peso mexicano", @"MXN" }, + { @"Peso uruguaio", @"UYU" }, + { @"Libra esterlina", @"GBP" }, + { @"Libra de Santa Helena", @"SHP" }, + { @"Libra egípcia", @"EGP" }, + { @"Libra das Malvinas", @"FKP" }, + { @"Libra de Gibraltar", @"GIP" }, + { @"Libra manesa", @"IMP" }, + { @"Libra de Jersey", @"JEP" }, + { @"Libra libanesa", @"LBP" }, + { @"Libra sul-sudanesa", @"SSP" }, + { @"Libra sudanesa", @"SDG" }, + { @"Libra síria", @"SYP" }, + { @"Dólar estadunidense", @"USD" }, + { @"Dólar australiano", @"AUD" }, + { @"Dólar bahamense", @"BSD" }, + { @"Dólar de Barbados", @"BBD" }, + { @"Dólar de Belize", @"BZD" }, + { @"Dólar bermudense", @"BMD" }, + { @"Dólar de Brunei", @"BND" }, + { @"Dólar de Cingapura", @"SGD" }, + { @"Dólar canadense", @"CAD" }, + { @"Dólar das Ilhas Cayman", @"KYD" }, + { @"Dólar neozelandês", @"NZD" }, + { @"Dólar fijiano", @"FJD" }, + { @"Dólar guianense", @"GYD" }, + { @"Dólar de Hong Kong", @"HKD" }, + { @"Dólar jamaicano", @"JMD" }, + { @"Dólar liberiano", @"LRD" }, + { @"Dólar namibiano", @"NAD" }, + { @"Dólar das Ilhas Salomão", @"SBD" }, + { @"Dólar surinamês", @"SRD" }, + { @"Novo dólar taiwanês", @"TWD" }, + { @"Dólar de Trinidade e Tobago", @"TTD" }, + { @"Tuvaluan dólar", @"TVD" }, + { @"Yuan chinês", @"CNY" }, + { @"Rial", @"__RI" }, + { @"Xelim", @"__S" }, + { @"Som", @"__SO" }, + { @"Dirame", @"__DR" }, + { @"Dinar", @"_DN" }, + { @"Dólar", @"__D" }, + { @"Manat", @"__MA" }, + { @"Rupia", @"__R" }, + { @"Coroa", @"__K" }, + { @"Krona", @"__K" }, + { @"Franco", @"__F" }, + { @"Marco", @"__M" }, + { @"Rublo", @"__RB" }, + { @"Peso", @"__PE" }, + { @"Libra", @"__P" }, + { @"Tristan da Cunha libra", @"_TP" }, + { @"South Georgia and the South Sandwich Islands libra", @"_SP" }, + { @"Somaliland xelim", @"_SS" }, + { @"Pitcairn Islands dólar", @"_PND" }, + { @"Palauan dólar", @"_PD" }, + { @"Niue dólar", @"_NID" }, + { @"Nauruan dólar", @"_ND" }, + { @"Micronesian dólar", @"_MD" }, + { @"Kiribati dólar", @"_KID" }, + { @"Guernsey libra", @"_GGP" }, + { @"Faroese króna", @"_FOK" }, + { @"Cook Islands dólar", @"_CKD" }, + { @"British Virgin Islands dólar", @"_BD" }, + { @"Ascension libra", @"_AP" }, + { @"Alderney libra", @"_ALP" }, + { @"Abkhazian apsar", @"_AA" }, + { @"Bitcoin", @"_XBT" } + }; + public static readonly Dictionary FractionalUnitNameToCodeMap = new Dictionary + { + { @"Jiao", @"JIAO" }, + { @"Kopek", @"KOPEK" }, + { @"Pul", @"PUL" }, + { @"Cent", @"CENT" }, + { @"Qindarke", @"QINDARKE" }, + { @"Peni", @"PENNY" }, + { @"Santeem", @"SANTEEM" }, + { @"Cêntimo", @"CENTIMO" }, + { @"Centavo", @"CENT" }, + { @"Luma", @"LUMA" }, + { @"Qəpik", @"QƏPIK" }, + { @"Fils", @"FILS" }, + { @"Poisha", @"POISHA" }, + { @"Kapyeyka", @"KAPYEYKA" }, + { @"Centime", @"CENTIME" }, + { @"Chetrum", @"CHETRUM" }, + { @"Paisa", @"PAISA" }, + { @"Fening", @"FENING" }, + { @"Thebe", @"THEBE" }, + { @"Sen", @"SEN" }, + { @"Stotinka", @"STOTINKA" }, + { @"Fen", @"FEN" }, + { @"Céntimo", @"CENT" }, + { @"Lipa", @"LIPA" }, + { @"Haléř", @"HALER" }, + { @"Øre", @"ØRE" }, + { @"Piastre", @"PIASTRE" }, + { @"Santim", @"SANTIM" }, + { @"Oyra", @"OYRA" }, + { @"Butut", @"BUTUT" }, + { @"Tetri", @"TETRI" }, + { @"Pesewa", @"PESEWA" }, + { @"Fillér", @"FILLER" }, + { @"Eyrir", @"EYRIR" }, + { @"Dinar", @"DINAR" }, + { @"Agora", @"AGORA" }, + { @"Tïın", @"TIIN" }, + { @"Chon", @"CHON" }, + { @"Jeon", @"JEON" }, + { @"Tyiyn", @"TYIYN" }, + { @"Att", @"ATT" }, + { @"Sente", @"SENTE" }, + { @"Dirham", @"DIRHAM" }, + { @"Rappen", @"RAPPEN" }, + { @"Avo", @"AVO" }, + { @"Deni", @"DENI" }, + { @"Iraimbilanja", @"IRAIMBILANJA" }, + { @"Tambala", @"TAMBALA" }, + { @"Laari", @"LAARI" }, + { @"Khoums", @"KHOUMS" }, + { @"Ban", @"BAN" }, + { @"Möngö", @"MONGO" }, + { @"Pya", @"PYA" }, + { @"Kobo", @"KOBO" }, + { @"Kuruş", @"KURUS" }, + { @"Baisa", @"BAISA" }, + { @"Centésimo", @"CENTESIMO" }, + { @"Toea", @"TOEA" }, + { @"Sentimo", @"SENTIMO" }, + { @"Grosz", @"GROSZ" }, + { @"Sene", @"SENE" }, + { @"Halala", @"HALALA" }, + { @"Para", @"PARA" }, + { @"Öre", @"ORE" }, + { @"Diram", @"DIRAM" }, + { @"Satang", @"SATANG" }, + { @"Seniti", @"SENITI" }, + { @"Millime", @"MILLIME" }, + { @"Tennesi", @"TENNESI" }, + { @"Kopiyka", @"KOPIYKA" }, + { @"Tiyin", @"TIYIN" }, + { @"Hào", @"HAO" }, + { @"Ngwee", @"NGWEE" }, + { @"Millibitcoin", @"MILLIBITCOIN" }, + { @"Satoshi", @"SATOSHI" } + }; + public const string CompoundUnitConnectorRegex = @"\b(?e|com)\b"; + public const string MultiplierRegex = @"\s*\b(mil(h([ãa]o|[õo]es))?|bilh([ãa]o|[õo]es)|trilh([ãa]o|[õo]es)|qua[td]rilh([ãa]o|[õo]es)|quintilh([ãa]o|[õo]es))\b"; public static readonly Dictionary CurrencyPrefixList = new Dictionary { - { @"Dólar", @"$" }, + { @"Dólar", @"$|dólar|dolar|dólares|dolares" }, { @"Dólar estadunidense", @"us$|u$d|usd$|usd" }, { @"Dólar do Caribe Oriental", @"ec$|xcd" }, { @"Dólar australiano", @"a$|aud" }, @@ -396,11 +666,13 @@ public static class NumbersWithUnitDefinitions { @"Florim", @"ƒ" }, { @"Libra", @"£" }, { @"Colón costarriquenho", @"₡" }, - { @"Lira turca", @"₺" } + { @"Lira turca", @"₺" }, + { @"Bitcoin", @"₿|btc|xbt" } }; public static readonly IList AmbiguousCurrencyUnitList = new List { - @"le" + @"le", + @"agora" }; public static readonly Dictionary InformationSuffixList = new Dictionary { @@ -436,7 +708,8 @@ public static class NumbersWithUnitDefinitions @"áreas", @"areas", @"milha", - @"milhas" + @"milhas", + @"""" }; public const string BuildPrefix = @"(?<=(\s|^|\P{L}))"; public const string BuildSuffix = @"(?=(\s|\P{L}|$))"; @@ -455,7 +728,7 @@ public static class NumbersWithUnitDefinitions { @"Picômetro", @"pm|picometro|picômetro|picómetro|picometros|picômetros|picómetros" }, { @"Milha", @"mi|milha|milhas" }, { @"Jarda", @"yd|jarda|jardas" }, - { @"Polegada", @"polegada|polegadas|""" }, + { @"Polegada", @"polegada|polegadas|""|in" }, { @"Pé", @"pé|pe|pés|pes|ft" }, { @"Ano luz", @"ano luz|anos luz|al" } }; @@ -463,7 +736,10 @@ public static class NumbersWithUnitDefinitions { @"mi", @"milha", - @"milhas" + @"milhas", + @"""", + @"in", + @"um" }; public static readonly Dictionary SpeedSuffixList = new Dictionary { @@ -471,7 +747,7 @@ public static class NumbersWithUnitDefinitions { @"Quilômetro por hora", @"km/h|quilômetro por hora|quilómetro por hora|quilometro por hora|quilômetros por hora|quilómetros por hora|quilometros por hora|quilômetro/hora|quilómetro/hora|quilometro/hora|quilômetros/hora|quilómetros/hora|quilometros/hora" }, { @"Quilômetro por minuto", @"km/min|quilômetro por minuto|quilómetro por minuto|quilometro por minuto|quilômetros por minuto|quilómetros por minuto|quilometros por minuto|quilômetro/minuto|quilómetro/minuto|quilometro/minuto|quilômetros/minuto|quilómetros/minuto|quilometros/minuto" }, { @"Quilômetro por segundo", @"km/seg|quilômetro por segundo|quilómetro por segundo|quilometro por segundo|quilômetros por segundo|quilómetros por segundo|quilometros por segundo|quilômetro/segundo|quilómetro/segundo|quilometro/segundo|quilômetros/segundo|quilómetros/segundo|quilometros/segundo" }, - { @"Milha por hora", @"mph|milha por hora|mi/h|milha/hora|milhas/hora|milhas por hora" }, + { @"Milha por hora", @"mph|milha por hora|mi/h|milha/hora|milhas/hora|milhas por hora|mi por hora|mi/hora" }, { @"Nó", @"kt|nó|nós|kn" }, { @"Pé por segundo", @"ft/s|pé/s|pe/s|ft/seg|pé/seg|pe/seg|pé por segundo|pe por segundo|pés por segundo|pes por segundo" }, { @"Pé por minuto", @"ft/min|pé/mind|pe/min|pé por minuto|pe por minuto|pés por minuto|pes por minuto" }, @@ -489,8 +765,8 @@ public static class NumbersWithUnitDefinitions { { @"Kelvin", @"k|K|kelvin" }, { @"Grau Rankine", @"r|°r|°ra|grau rankine|graus rankine| rankine" }, - { @"Grau Celsius", @"°c|grau c|grau celsius|graus c|graus celsius|celsius|grau centígrado|grau centrigrado|graus centígrados|graus centigrados|centígrado|centígrados|centigrado|centigrados" }, - { @"Grau Fahrenheit", @"°f|grau f|graus f|grau fahrenheit|graus fahrenheit|fahrenheit" }, + { @"Grau Celsius", @"°c|° c|ºc|º c|grau c|grau celsius|graus c|graus celsius|celsius|grau centígrado|grau centrigrado|graus centígrados|graus centigrados|centígrado|centígrados|centigrado|centigrados" }, + { @"Grau Fahrenheit", @"°f|° f|ºf|º f|grau f|graus f|grau fahrenheit|graus fahrenheit|fahrenheit" }, { @"Grau", @"°|graus|grau" } }; public static readonly Dictionary VolumeSuffixList = new Dictionary @@ -515,8 +791,8 @@ public static class NumbersWithUnitDefinitions }; public static readonly Dictionary WeightSuffixList = new Dictionary { - { @"Tonelada métrica", @"tonelada métrica|tonelada metrica|toneladas métricas|toneladas metricas" }, - { @"Tonelada", @"ton|tonelada|toneladas" }, + { @"Tonelada métrica", @"tonelada métrica|tonelada metrica|toneladas métricas|toneladas metricas|t métrica|t metrica|t métricas|t metricas|t.métrica|t.metrica|t.métricas|t.metricas|t. metrica|t. métrica" }, + { @"Tonelada", @"ton|tonelada|toneladas|t" }, { @"Quilograma", @"kg|quilograma|quilogramas|quilo|quilos|kilo|kilos" }, { @"Hectograma", @"hg|hectograma|hectogramas" }, { @"Decagrama", @"dag|decagrama|decagramas" }, @@ -532,5 +808,34 @@ public static class NumbersWithUnitDefinitions { @"Grão", @"grão|grao|grãos|graos|gr" }, { @"Quilate", @"ct|quilate|quilates" } }; + public static readonly IList AmbiguousWeightUnitList = new List + { + @"g", + @"t" + }; + public static readonly Dictionary AngleSuffixList = new Dictionary + { + { @"Degree", @"grau|graus|°" }, + { @"Radian", @"radiano|radianos|rad" }, + { @"Turn", @"volta|voltas" } + }; + public static readonly IList AmbiguousAngleUnitList = new List + { + @"volta", + @"voltas" + }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"\b\d+\s*\p{L}+$", @"((\d+(\s*\p{L}+[-—–-]|\p{L}+)\d+)|(((\p{L}|\d)[-—–-]\d+\s*|\p{L}\d+)\p{L}+))" }, + { @"\bum$", @"\p{L}\s+um\b" } + }; + public static readonly Dictionary TemperatureAmbiguityFiltersDict = new Dictionary + { + { @"\b(graus?|°)$", @"\b((graus?|°)\s*(ângulo|rotação)|(gira(r|do|ndo)?|ângulo|rotação)(\s+(\p{L}+|\d+)){0,4}\s*(graus?\b|°))" } + }; + public static readonly Dictionary DimensionAmbiguityFiltersDict = new Dictionary + { + { @"\b(graus?|°)$", @"\b((graus?|°)\s*(c(elsius|entígrado)?|f(ah?renheit)?)|(temperatura)(\s+(\p{L}+|\d+)){0,4}\s*(graus?\b|°))" } + }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/PhoneNumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/PhoneNumbersDefinitions.cs new file mode 100644 index 0000000000..15fd2d6b8a --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/PhoneNumbersDefinitions.cs @@ -0,0 +1,27 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Portuguese\Portuguese-PhoneNumbers.yaml +// - Language: Portuguese +// - ClassName: PhoneNumbersDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Portuguese +{ + using System; + using System.Collections.Generic; + + public static class PhoneNumbersDefinitions + { + public const string NumberReplaceToken = @"@builtin.phonenumber"; + public const string FalsePositivePrefixRegex = @"conta(\s+(número|bancária(\s+número)?))?(\s+é)?:?\s*$"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/PhoneNumbersDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/PhoneNumbersDefinitions.tt new file mode 100644 index 0000000000..a68d843f5d --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/PhoneNumbersDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Portuguese\Portuguese-PhoneNumbers.yaml"; + this.Language = "Portuguese"; + this.ClassName = "PhoneNumbersDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/QuotedTextDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/QuotedTextDefinitions.cs new file mode 100644 index 0000000000..77c10461ed --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/QuotedTextDefinitions.cs @@ -0,0 +1,35 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Portuguese\Portuguese-QuotedText.yaml +// - Language: Portuguese +// - ClassName: QuotedTextDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Portuguese +{ + using System; + using System.Collections.Generic; + + public static class QuotedTextDefinitions + { + public const string LangMarker = @"Por"; + public const string QuotedTextRegex1 = @"(“([^“”]+)”)"; + public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)"; + public const string QuotedTextRegex3 = @"(""([^""]+)"")"; + public const string QuotedTextRegex4 = @"(\\'([^\']+)\\')"; + public const string QuotedTextRegex5 = @"(`([^`]+)`)"; + public const string QuotedTextRegex6 = @"(«([^«»]+)»)"; + public const string QuotedTextRegex7 = @"(‹([^‹›]+)›)"; + public const string QuotedTextRegex8 = @"(123456)"; + public const string QuotedTextRegex9 = @"(123456)"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/QuotedTextDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/QuotedTextDefinitions.tt new file mode 100644 index 0000000000..af37816e96 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/QuotedTextDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Portuguese\Portuguese-QuotedText.yaml"; + this.Language = "Portuguese"; + this.ClassName = "QuotedTextDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/ChoiceDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/ChoiceDefinitions.cs index 48e2411d93..dc4f865e84 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/ChoiceDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/ChoiceDefinitions.cs @@ -23,7 +23,8 @@ public static class ChoiceDefinitions { public const string LangMarker = @"Spa"; public const string TokenizerRegex = @"[^\w\d\u00E0-\u00FC]"; - public const string TrueRegex = @"\b(verdad|verdadero|sí|sip|s|si|cierto|por supuesto|ok)\b|(\uD83D\uDC4D|\uD83D\uDC4C)"; - public const string FalseRegex = @"\b(falso|no|nop|n|no)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)"; + public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)"; + public static readonly string TrueRegex = $@"\b(verdad|verdadero|sí|sip|s|si|cierto|por supuesto|ok)\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?"; + public static readonly string FalseRegex = $@"\b(falso|no|nop|n|no)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs index 99be1a612f..e0a069fa1d 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs @@ -21,211 +21,259 @@ namespace Microsoft.Recognizers.Definitions.Spanish public static class DateTimeDefinitions { + public const string LangMarker = @"Spa"; public const bool CheckBothBeforeAfter = false; - public const string TillRegex = @"(?hasta|al|a|--|-|—|——)(\s+(el|la(s)?))?"; - public const string AndRegex = @"(?y|y\s*el|--|-|—|——)"; - public const string DayRegex = @"(?01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)(?=\b|t)"; - public const string MonthNumRegex = @"(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)\b"; + public static readonly string TillRegex = $@"(?\b(hasta|hacia|al?)\b(\s+(el|la(s)?)\b)?|{BaseDateTime.RangeConnectorSymbolRegex})"; + public static readonly string StrictTillRegex = $@"(?\b(hasta|hacia|al?)(\s+(el|la(s)?))?\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*[qt][1-4](?!(\s+de|\s*,\s*))))"; + public static readonly string RangeConnectorRegex = $@"(?\b(y\s*(el|(la(s)?)?))\b|{BaseDateTime.RangeConnectorSymbolRegex})"; + public const string WrittenDayRegex = @"(?uno|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce|trece|catorce|quince|dieciséis|diecisiete|dieciocho|diecinueve|veinte|veintiuno|veintidós|veintitrés|veinticuatro|veinticinco|veintiséis|veintisiete|veintiocho|veintinueve|treinta(\s+y\s+uno)?)"; + public const string DayRegex = @"\b(?01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)(?:\.[º°])?(?=\b|t)"; + public const string MonthNumRegex = @"(?1[0-2]|(0)?[1-9])\b"; + public const string OclockRegex = @"(?en\s+punto)"; public static readonly string AmDescRegex = $@"({BaseDateTime.BaseAmDescRegex})"; public static readonly string PmDescRegex = $@"({BaseDateTime.BasePmDescRegex})"; public static readonly string AmPmDescRegex = $@"({BaseDateTime.BaseAmPmDescRegex})"; public static readonly string DescRegex = $@"(?({AmDescRegex}|{PmDescRegex}))"; - public const string OfPrepositionRegex = @"(do|da|del|de)"; - public const string AfterNextSuffixRegex = @"\b(que\s+viene|pasad[oa])\b"; - public const string RangePrefixRegex = @"((desde|de|entre)\s+(la(s)?\s+)?)"; - public static readonly string TwoDigitYearRegex = $@"\b(?([0-27-9]\d))(?!(\s*((\:)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b"; - public const string RelativeRegex = @"(?((esta|este|pr[oó]xim[oa]|([uú]ltim(o|as|os)))(\s+fin(ales)?\s+de(\s+la)?)?)|(fin(ales)?\s+de(\s+la)?))\b"; - public const string StrictRelativeRegex = @"(?((esta|este|pr[oó]xim[oa]|([uú]ltim(o|as|os)))(\s+fin(ales)?\s+de(\s+la)?)?)|(fin(ales)?\s+de(\s+la)?))\b"; - public const string WrittenOneToNineRegex = @"(uno|un|una|dos|tres|cuatro|cinco|seis|siete|ocho|nueve)"; - public const string WrittenOneHundredToNineHundredRegex = @"(cien|ciento|doscient[oa]s|trescient[oa]s|cuatrocient[ao]s|quinient[ao]s|seiscient[ao]s|setecient[ao]s|ochocient[ao]s|novecient[ao]s)"; - public static readonly string WrittenOneToNinetyNineRegex = $@"(uno|un|una|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce|trece|catorce|quince|dieciséis|dieciseis|diecisiete|dieciocho|diecinueve|veinte|veintiuno|veintiún|veintiun|veintiuna|veintidós|veintidos|veintitrés|veintitres|veinticuatro|veinticinco|veintiséis|veintisiete|veintiocho|veintinueve|((treinta|cuarenta|cincuenta|sesenta|setenta|ochenta|noventa)(\s+y\s+{WrittenOneToNineRegex})?))"; - public static readonly string FullTextYearRegex = $@"(((dos\s+)?mil)(\s+{WrittenOneHundredToNineHundredRegex})?(\s+{WrittenOneToNinetyNineRegex})?)"; + public const string OfPrepositionRegex = @"(\bd(o|al?|el?)\b)"; + public const string AfterNextSuffixRegex = @"\b(despu[eé]s\s+de\s+la\s+pr[oó]xima)\b"; + public const string NextSuffixRegex = @"\b(que\s+viene|pr[oó]xim[oa]|siguiente)\b"; + public const string PreviousSuffixRegex = @"\b(pasad[ao]|anterior(?!\s+(al?|del?)\b))\b"; + public static readonly string RelativeSuffixRegex = $@"({AfterNextSuffixRegex}|{NextSuffixRegex}|{PreviousSuffixRegex})"; + public const string RangePrefixRegex = @"((de(l|sde)?|entre)(\s+la(s)?)?)"; + public static readonly string TwoDigitYearRegex = $@"\b(?([0-9]\d))(?!(\s*((\:\d)|{AmDescRegex}|{PmDescRegex}|\.\d))|\.?[º°ª])\b"; + public const string RelativeRegex = @"(?est[ae]s?|pr[oó]xim[oa]s?|siguiente|(([uú]ltim|pasad)[ao]s?))\b"; + public const string StrictRelativeRegex = @"(?est[ae]|pr[oó]xim[oa]|siguiente|(([uú]ltim|pasad)(o|as|os)))\b"; + public const string WrittenOneToNineRegex = @"(un[ao]?|dos|tres|cuatro|cinco|seis|siete|ocho|nueve)"; + public const string WrittenOneHundredToNineHundredRegex = @"(doscient[oa]s|trescient[oa]s|cuatrocient[ao]s|quinient[ao]s|seiscient[ao]s|setecient[ao]s|ochocient[ao]s|novecient[ao]s|cien(to)?)"; + public static readonly string WrittenOneToNinetyNineRegex = $@"(((treinta|cuarenta|cincuenta|sesenta|setenta|ochenta|noventa)(\s+y\s+{WrittenOneToNineRegex})?)|diez|once|doce|trece|catorce|quince|dieciséis|dieciseis|diecisiete|dieciocho|diecinueve|veinte|veintiuno|veintiún|veintiun|veintiuna|veintidós|veintidos|veintitrés|veintitres|veinticuatro|veinticinco|veintiséis|veintisiete|veintiocho|veintinueve|un[ao]?|dos|tres|cuatro|cinco|seis|siete|ocho|nueve)"; + public static readonly string FullTextYearRegex = $@"\b(?((dos\s+)?mil)(\s+{WrittenOneHundredToNineHundredRegex})?(\s+{WrittenOneToNinetyNineRegex})?)"; public static readonly string YearRegex = $@"({BaseDateTime.FourDigitYearRegex}|{FullTextYearRegex})"; - public const string RelativeMonthRegex = @"(?((este|pr[oó]ximo|([uú]ltim(o|as|os)))\s+mes)|(mes\s+((que\s+viene)|pasado)))\b"; - public const string MonthRegex = @"\b(?abril|abr|agosto|ago|diciembre|dic|febrero|feb|enero|ene|julio|jul|junio|jun|marzo|mar|mayo|may|noviembre|nov|octubre|oct|septiembre|setiembre|sept|set|sep)\b"; - public static readonly string MonthSuffixRegex = $@"(?((del|de|la|el)\s+)?({RelativeMonthRegex}|{MonthRegex}))"; - public const string DateUnitRegex = @"(?años?|mes(es)?|semanas?|d[ií]as?)\b"; + public const string RelativeMonthRegex = @"(?(de\s+)?((este|pr[oó]ximo|([uú]ltim(o|as|os)))\s+mes)|(del\s+)?(mes\s+((que\s+viene)|pasado)))\b"; + public const string MonthRegex = @"\b(?abr(\.|(il)?\b)|ago(\.|(sto)?\b)|dic(\.|(iembre)?\b)|feb(\.|(rero)?\b)|ene(\.|(ro)?\b)|ju[ln](\.|(io)?\b)|mar(\.|(zo)?\b)|may(\.|(o)?\b)|nov(\.|(iembre)?\b)|oct(\.|(ubre)?\b)|sep?t(\.|(iembre)?\b)|sep(\.|\b))"; + public static readonly string MonthSuffixRegex = $@"(?((del?|la|el)\s+)?({RelativeMonthRegex}|{MonthRegex}))"; + public const string DateUnitRegex = @"(?(a[nñ]o|(?semana))(?s)?|(?mes)(?es)?|(?d[ií]a)(?s)?(?\s+(h[aá]biles|laborales))?)\b"; public const string PastRegex = @"(?\b(pasad(a|o)(s)?|[uú]ltim[oa](s)?|anterior(es)?|previo(s)?)\b)"; - public const string FutureRegex = @"(?\b(siguiente(s)?|pr[oó]xim[oa](s)?|dentro\s+de|en)\b)"; - public static readonly string SimpleCasesRegex = $@"\b((desde\s+el|desde|del|de)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*)(en\s+|del\s+|de\s+)?{YearRegex})?\b"; - public static readonly string MonthFrontSimpleCasesRegex = $@"\b{MonthSuffixRegex}\s+((desde\s+el|desde|del)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*)(en\s+|del\s+|de\s+)?{YearRegex})?\b"; - public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+((entre|entre\s+el)\s+)({DayRegex})\s*{AndRegex}\s*({DayRegex})((\s+|\s*,\s*)(en\s+|del\s+|de\s+)?{YearRegex})?\b"; - public static readonly string DayBetweenRegex = $@"\b((entre|entre\s+el)\s+)({DayRegex})(\s+{MonthSuffixRegex})?\s*{AndRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*)(en\s+|del\s+|de\s+)?{YearRegex})?\b"; - public static readonly string OneWordPeriodRegex = $@"\b(((((la|el)\s+)?mes\s+(({OfPrepositionRegex})\s+)?)|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\s+))?({MonthRegex})|((la|el)\s+)?((({RelativeRegex}\s+){DateUnitRegex}(\s+{AfterNextSuffixRegex})?)|{DateUnitRegex}(\s+{AfterNextSuffixRegex}))|va\s+de\s+{DateUnitRegex})"; - public static readonly string MonthWithYearRegex = $@"\b(((pr[oó]xim[oa](s)?|este|esta|[uú]ltim[oa]?)\s+)?({MonthRegex})(\s+|(\s*[,-]\s*))((de|del|de la)\s+)?({YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\s+año))\b"; - public static readonly string MonthNumWithYearRegex = $@"({YearRegex}(\s*?)[/\-\.~](\s*?){MonthNumRegex})|({MonthNumRegex}(\s*?)[/\-\.~](\s*?){YearRegex})"; - public static readonly string WeekOfMonthRegex = $@"(?(la\s+)?(?primera?|1ra|segunda|2da|tercera?|3ra|cuarta|4ta|quinta|5ta|[uú]ltima)\s+semana\s+{MonthSuffixRegex})"; - public static readonly string WeekOfYearRegex = $@"(?(la\s+)?(?primera?|1ra|segunda|2da|tercera?|3ra|cuarta|4ta|quinta|5ta|[uú]ltima?|([12345]ª))\s+semana(\s+del?)?\s+({YearRegex}|(?pr[oó]ximo|[uú]ltimo|este)\s+año))"; + public const string FutureRegex = @"\b(siguiente(s)?|pr[oó]xim[oa](s)?)\b"; + public static readonly string SimpleCasesRegex = $@"\b((desde(\s+el)?|entre|del?)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*)((en|del?)\s+)?{YearRegex})?\b"; + public static readonly string MonthFrontSimpleCasesRegex = $@"\b{MonthSuffixRegex}\s+((desde(\s+el)?|entre|del)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*)((en|del?)\s+)?{YearRegex})?\b"; + public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+((entre(\s+el)?)\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*)((en|del?)\s+)?{YearRegex})?\b"; + public static readonly string DayBetweenRegex = $@"\b((entre(\s+el)?)\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*)((en|del?)\s+)?{YearRegex})?\b"; + public const string SpecialYearPrefixes = @"((del\s+)?calend[aá]rio|(?fiscal|escolar))"; + public static readonly string OneWordPeriodRegex = $@"\b(((((la|el)\s+)?mes\s+(({OfPrepositionRegex})\s+)?)|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\s+))?({MonthRegex})|((el\s+)?{RelativeRegex}\s+)?(({SpecialYearPrefixes}\s+)año|año\s+{SpecialYearPrefixes})|(((la|el)\s+)?((({RelativeRegex}\s+)({DateUnitRegex}|(fin\s+de\s+)?semana|finde)(\s+{RelativeSuffixRegex})?)|{DateUnitRegex}(\s+{RelativeSuffixRegex}))|va\s+de\s+{DateUnitRegex}|((año|mes)(\s+(a|hasta)\s+la\s+fecha)?|((el\s+)?fin\s+de\s+)?semana|(el\s+)?finde))\b)"; + public static readonly string MonthWithYearRegex = $@"\b((((pr[oó]xim[oa](s)?|est?[ae]|[uú]ltim[oa]?)\s+)?{MonthRegex}|((el\s+)?(?primero?|1(er|ro)|segundo|2do|tercero?|3(er|ro)|uarto|4to|quinto|5to|sexto|6to|s[eé]ptimo|7mo|octavo|8vo|noveno|9no|d[eé]cimo|10mo|und[eé]cimo|11mo|duod[eé]cimo|12mo|[uú]ltimo)\s+mes(?=\s+(del?|en))))((\s+|(\s*[,-]\s*))((de(l|\s+la)?|en)\s+)?({YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\s+año)|\s+(del?|en)\s+{TwoDigitYearRegex}))\b"; + public static readonly string MonthNumWithYearRegex = $@"\b(({YearRegex}(\s*?)[/\-\.~](\s*?){MonthNumRegex})|({MonthNumRegex}(\s*?)[/\-\.~](\s*?){YearRegex}))\b"; + public static readonly string WeekOfMonthRegex = $@"(?(la\s+)?(?primera?|1ra|segunda|2da|tercera?|3ra|cuarta|4ta|quinta|5ta|([12345](\.)?ª)|[uú]ltima)\s+semana\s+{MonthSuffixRegex}((\s+de)?\s+({BaseDateTime.FourDigitYearRegex}|{RelativeRegex}\s+año))?)\b"; + public static readonly string WeekOfYearRegex = $@"(?(la\s+)?(?primera?|1ra|segunda|2da|tercera?|3ra|cuarta|4ta|quinta|5ta|[uú]ltima?|([12345]ª))\s+semana(\s+(del?|en))?\s+({YearRegex}|(?pr[oó]ximo|[uú]ltimo|este)\s+año))"; + public static readonly string OfYearRegex = $@"\b((del?)\s+({YearRegex}|{StrictRelativeRegex}\s+año))\b"; + public const string FirstLastRegex = @"\b((el|las?|los?)\s+)?((?primer([ao]s?)?)|(?[uú]ltim[ao]s?))\b"; public static readonly string FollowedDateUnit = $@"^\s*{DateUnitRegex}"; public static readonly string NumberCombinedWithDateUnit = $@"\b(?\d+(\.\d*)?){DateUnitRegex}"; - public const string QuarterTermRegex = @"(?primer|1er|segundo|2do|tercer|3ro|4to|((1|2|3|4)º))\s+(cuatrimestre|cuarto)"; - public static readonly string QuarterRegex = $@"(el\s+)?{QuarterTermRegex}((\s+del?|\s*,\s*)?\s+({YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\s+a[ñn]o|a[ñn]o(\s+{AfterNextSuffixRegex})))?"; - public static readonly string QuarterRegexYearFront = $@"({YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\s+a[ñn]o)\s+(el\s+)?{QuarterTermRegex}"; - public const string AllHalfYearRegex = @"^[.]"; - public static readonly string EarlyPrefixRegex = $@"\b(?((comienzos?|inicios?)\s+({OfPrepositionRegex})))\b"; - public static readonly string MidPrefixRegex = $@"\b(?(mediados\s+({OfPrepositionRegex})))\b"; - public static readonly string LaterPrefixRegex = $@"\b(?((fines|finales)\s+({OfPrepositionRegex})))\b"; + public const string QuarterTermRegex = @"\b((?primer|1er|segundo|2do|tercer|3ro|4to|([1234](\.)?º))\s+(trimestre|cuarto)|[tq](?[1-4]))\b"; + public static readonly string RelativeQuarterTermRegex = $@"\b((?{StrictRelativeRegex})\s+(trimestre|cuarto)|(trimestre|cuarto)\s+(?(actual|pr[oó]ximo|siguiente|pasado|anterior)))\b"; + public static readonly string QuarterRegex = $@"(el\s+)?{QuarterTermRegex}((\s+(del?\s+)?|\s*[,-]\s*)({YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\s+a[ñn]o|a[ñn]o(\s+{RelativeSuffixRegex}))|\s+del\s+a[ñn]o)?|{RelativeQuarterTermRegex}"; + public static readonly string QuarterRegexYearFront = $@"({YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\s+a[ñn]o)(?:\s*-\s*|\s+(el\s+)?)?{QuarterTermRegex}"; + public static readonly string AllHalfYearRegex = $@"\b(?primer|1er|segundo|2do|[12](\.)?º)\s+semestre(\s+(de\s+)?({YearRegex}|{RelativeRegex}\s+año))?\b"; + public static readonly string EarlyPrefixRegex = $@"\b(?(?m[aá]s\s+temprano(\s+(del?|en))?)|((comienzos?|inicios?|principios?|temprano)\s+({OfPrepositionRegex}(\s+d[ií]a)?)))(\s+(el|las?|los?))?\b"; + public static readonly string MidPrefixRegex = $@"\b(?(media[dn]os\s+({OfPrepositionRegex})))(\s+(el|las?|los?))?\b"; + public static readonly string LaterPrefixRegex = $@"\b(?((fin(al)?(es)?|[uú]ltimos)\s+({OfPrepositionRegex}))|(?m[aá]s\s+tarde(\s+(del?|en))?))(\s+(el|las?|los?))?\b"; public static readonly string PrefixPeriodRegex = $@"({EarlyPrefixRegex}|{MidPrefixRegex}|{LaterPrefixRegex})"; - public const string PrefixDayRegex = @"^[.]"; - public const string CenturySuffixRegex = @"^[.]"; - public static readonly string SeasonRegex = $@"\b(?(([uú]ltim[oa]|est[ea]|el|la|(pr[oó]xim[oa]s?|siguiente)|{PrefixPeriodRegex})\s+)?(?primavera|verano|otoño|invierno)((\s+del?|\s*,\s*)?\s+({YearRegex}|(?pr[oó]ximo|[uú]ltimo|este)\s+año))?)\b"; - public const string WhichWeekRegex = @"\b(semana)(\s*)(?5[0-3]|[1-4]\d|0?[1-9])\b"; - public static readonly string WeekOfRegex = $@"((del|de|la|el)\s+)?(semana)(\s*)({OfPrepositionRegex})"; + public static readonly string PrefixDayRegex = $@"\b((?(comienzos?|inicios?|principios?|temprano))|(?mediados)|(?(fin((al)?es)?|m[aá]s\s+tarde)))(\s+(en|{OfPrepositionRegex}))?(\s+([ae]l)(\s+d[ií]a)?)?$"; + public const string CenturySuffixRegex = @"(^siglo)\b"; + public static readonly string SeasonRegex = $@"\b(?(([uú]ltim[oa]|est[ea]|el|la|(pr[oó]xim[oa]s?|siguiente)|{PrefixPeriodRegex})\s+)?(?primavera|verano|otoño|invierno)((\s+(del?|en)|\s*,\s*)?\s+({YearRegex}|(?pr[oó]ximo|[uú]ltimo|este)\s+año))?)\b"; + public static readonly string WhichWeekRegex = $@"\b(semana)(\s*)(?5[0-3]|[1-4]\d|0?[1-9])(\s+del?\s+({YearRegex}|(?pr[oó]ximo|[uú]ltimo|este)\s+año|año\s+(?pasado)))?\b"; + public static readonly string WeekOfRegex = $@"((del?|el|la)\s+)?(semana)(\s*)({OfPrepositionRegex}|que\s+(inicia|comienza)\s+el|(que\s+va|a\s+partir)\s+del)"; public static readonly string MonthOfRegex = $@"(mes)(\s+)({OfPrepositionRegex})"; - public const string RangeUnitRegex = @"\b(?años|año|meses|mes|semanas|semana)\b"; - public const string InConnectorRegex = @"\b(in)\b"; - public const string SinceYearSuffixRegex = @"^[.]"; - public const string WithinNextPrefixRegex = @"\b(dentro\s+de)\b"; - public const string FromRegex = @"((desde|de)(\s*la(s)?)?)$"; - public const string ConnectorAndRegex = @"(y\s*(la(s)?)?)$"; - public const string BetweenRegex = @"(entre\s*(la(s)?)?)"; - public const string WeekDayRegex = @"\b(?domingos?|lunes|martes|mi[eé]rcoles|jueves|viernes|s[aá]bados?|lun|mar|mi[eé]|jue|vie|s[aá]b|dom|lu|ma|mi|ju|vi|s[aá]|do)\b"; - public static readonly string OnRegex = $@"(?<=\ben\s+)({DayRegex}s?)\b"; - public const string RelaxedOnRegex = @"(?<=\b(en|el|del)\s+)((?10|11|12|13|14|15|16|17|18|19|1st|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)s?)\b"; - public static readonly string ThisRegex = $@"\b((este\s*){WeekDayRegex})|({WeekDayRegex}\s*((de\s+)?esta\s+semana))\b"; - public static readonly string LastDateRegex = $@"\b(([uú]ltimo)\s*{WeekDayRegex})|({WeekDayRegex}(\s+((de\s+)?(esta|la)\s+([uú]ltima\s+)?semana)))\b"; - public static readonly string NextDateRegex = $@"\b(((pr[oó]ximo|siguiente)\s*){WeekDayRegex})|({WeekDayRegex}(\s+(de\s+)?(la\s+)?(pr[oó]xima|siguiente)(\s*semana)))\b"; - public const string SpecialDayRegex = @"\b((el\s+)?(d[ií]a\s+antes\s+de\s+ayer|anteayer)|((el\s+)?d[ií]a\s+(despu[eé]s\s+)?de\s+mañana|pasado\s+mañana)|(el\s)?d[ií]a siguiente|(el\s)?pr[oó]ximo\s+d[ií]a|(el\s+)?[uú]ltimo d[ií]a|(d)?el d[ií]a|ayer|mañana|hoy)\b"; + public const string RangeUnitRegex = @"\b(?a[nñ]os?|mes(es)?|semanas?)\b"; + public const string BeforeAfterRegex = @"^[.]"; + public const string InConnectorRegex = @"\b(en)(?=\s*$)\b"; + public const string TodayNowRegex = @"\b(hoy|ahora|este entonces)\b"; + public const string FromRegex = @"((\bde(sde)?)(\s*la(s)?)?)$"; + public const string BetweenRegex = @"(\bentre\s*(la(s)?)?)"; + public const string WeekDayRegex = @"\b(?(domingos?|lunes|martes|mi[eé]rcoles|jueves|viernes|s[aá]bados?)\b|(lun|mar|mi[eé]|jue|vie|s[aá]b|dom|lu|ma|mi|ju|vi|s[aá]|do)(\.|\b))(?!ñ)"; + public static readonly string OnRegex = $@"((?<=\b(e[ln])\s+)|(\be[ln]\s+d[ií]a\s+))({DayRegex}s?)(?![.,]\d)\b"; + public const string RelaxedOnRegex = @"(?<=\b(en|d?el)\s+)((?10|11|12|13|14|15|16|17|18|19|1st|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)s?)(?![.,]\d)\b"; + public const string SpecialDayRegex = @"\b((el\s+)?(d[ií]a\s+antes\s+de\s+ayer|anteayer)|((el\s+)?d[ií]a\s+(despu[eé]s\s+)?de\s+mañana|pasado\s+mañana)|(el\s)?d[ií]a\s+(siguiente|anterior)|(el\s)?pr[oó]ximo\s+d[ií]a|(el\s+)?[uú]ltimo\s+d[ií]a|(d)?el\s+d[ií]a(?!\s+(de|internacional))|ayer|mañana|hoy)\b"; public const string SpecialDayWithNumRegex = @"^[.]"; - public const string ForTheRegex = @"^[.]"; - public const string WeekDayAndDayOfMonthRegex = @"^[.]"; - public const string WeekDayAndDayRegex = @"^[.]"; - public static readonly string WeekDayOfMonthRegex = $@"(?(el\s+)?(?primer|1er|segundo|2do|tercer|3er|cuarto|4to|quinto|5to|[uú]ltimo)\s+{WeekDayRegex}\s+{MonthSuffixRegex})"; + public static readonly string FlexibleDayRegex = $@"(?([a-z]+\s)?({WrittenDayRegex}|{DayRegex}))"; + public static readonly string ForTheRegex = $@"\b((((?<=para\s+el\s+){FlexibleDayRegex})|((?\s*(,|\.(?![º°ª])|!|\?|-|$))(?!\d))"; + public static readonly string WeekDayAndDayOfMonthRegex = $@"\b{WeekDayRegex}\s+((el\s+(d[ií]a\s+)?){FlexibleDayRegex})\b"; + public static readonly string WeekDayAndDayRegex = $@"\b{WeekDayRegex}\s+({DayRegex}|{WrittenDayRegex})(?!([-:/]|\.\d|(\s+({AmDescRegex}|{PmDescRegex}|{OclockRegex}))))\b"; + public static readonly string WeekDayOfMonthRegex = $@"(?(el\s+)?(?primera?|1era?|segund[ao]|2d[ao]|tercera?|3era?|cuart[ao]|4t[ao]|quint[ao]|5t[ao]|((1|2|3|4|5)(\.)?[ºª])|[uú]ltim[ao])\s+(semana\s+{MonthSuffixRegex}\s+el\s+{WeekDayRegex}|{WeekDayRegex}\s+{MonthSuffixRegex}))"; public const string RelativeWeekDayRegex = @"^[.]"; public const string AmbiguousRangeModifierPrefix = @"^[.]"; public const string NumberEndingPattern = @"^[.]"; + public const string DateTokenPrefix = @"en "; + public const string TimeTokenPrefix = @"a las "; + public const string TokenBeforeDate = @"el "; + public const string TokenBeforeTime = @"a las "; + public const string HalfTokenRegex = @"^((y\s+)?media)"; + public const string QuarterTokenRegex = @"^((y\s+)?cuarto|(?menos\s+cuarto))"; + public const string PastTokenRegex = @"\b(pasad[ao]s(\s+(de\s+)?las)?)$"; + public const string ToTokenRegex = @"\b((para|antes)(\s+(de\s+)?las?)|(?^menos))$"; public static readonly string SpecialDateRegex = $@"(?<=\b(en)\s+el\s+){DayRegex}\b"; - public static readonly string OfMonthRegex = $@"^\s*de\s*{MonthSuffixRegex}"; + public static readonly string OfMonthRegex = $@"^\s*((d[ií]a\s+)?d[eo]\s+)?{MonthSuffixRegex}"; public static readonly string MonthEndRegex = $@"({MonthRegex}\s*(el)?\s*$)"; public static readonly string WeekDayEnd = $@"{WeekDayRegex}\s*,?\s*$"; - public const string WeekDayStart = @"^[\.]"; - public static readonly string DateYearRegex = $@"(?{YearRegex}|{TwoDigitYearRegex})"; - public static readonly string DateExtractor1 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}?((\s*(de)|[/\\\.\-])\s*)?{MonthRegex}\b"; - public static readonly string DateExtractor2 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}\s*([\.\-]|de)\s*{MonthRegex}(\s*,\s*|\s*(del?)\s*){DateYearRegex}\b"; - public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}(\s+|\s*,\s*|\s+de\s+|\s*-\s*){MonthRegex}((\s+|\s*,\s*){DateYearRegex})?\b"; - public static readonly string DateExtractor4 = $@"\b{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}\s*[/\\\-]\s*{DateYearRegex}"; - public static readonly string DateExtractor5 = $@"\b{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}"; - public static readonly string DateExtractor6 = $@"(?<=\b(en|el)\s+){MonthNumRegex}[\-\.]{DayRegex}\b"; - public static readonly string DateExtractor7 = $@"\b{MonthNumRegex}\s*/\s*{DayRegex}((\s+|\s*,\s*|\s+de\s+){DateYearRegex})?\b"; - public static readonly string DateExtractor8 = $@"(?<=\b(en|el)\s+){DayRegex}[\\\-]{MonthNumRegex}\b"; - public static readonly string DateExtractor9 = $@"\b{DayRegex}\s*/\s*{MonthNumRegex}((\s+|\s*,\s*|\s+de\s+){DateYearRegex})?\b"; - public static readonly string DateExtractor10 = $@"\b{YearRegex}\s*[/\\\-\.]\s*{MonthNumRegex}\s*[/\\\-\.]\s*{DayRegex}"; + public const string WeekDayStart = @"^\b$"; + public static readonly string DateYearRegex = $@"(?{YearRegex}|(?2[0-4]|[0-1]?\d)"; public const string HourNumRegex = @"\b(?cero|una|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce)\b"; - public const string MinuteNumRegex = @"(?un|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce|trece|catorce|quince|dieciseis|diecisiete|dieciocho|diecinueve|veinte|treinta|cuarenta|cincuenta)"; - public const string DeltaMinuteNumRegex = @"(?un|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce|trece|catorce|quince|dieciseis|diecisiete|dieciocho|diecinueve|veinte|treinta|cuarenta|cincuenta)"; - public const string OclockRegex = @"(?en\s+punto)"; + public const string MinuteNumRegex = @"(?uno?|d[óo]s|tr[eé]s|cuatro|cinco|s[eé]is|siete|ocho|nueve|diez|once|doce|trece|catorce|quince|diecis[eé]is|diecisiete|dieciocho|diecinueve|veinte|treinta|cuarenta|cincuenta)"; + public const string DeltaMinuteNumRegex = @"(?uno?|d[óo]s|tr[eé]s|cuatro|cinco|s[eé]is|siete|ocho|nueve|diez|once|doce|trece|catorce|quince|diecis[eé]is|diecisiete|dieciocho|diecinueve|veinte|treinta|cuarenta|cincuenta)"; public const string PmRegex = @"(?((por|de|a|en)\s+la)\s+(tarde|noche))"; public const string AmRegex = @"(?((por|de|a|en)\s+la)\s+(mañana|madrugada))"; public const string AmTimeRegex = @"(?(esta|(por|de|a|en)\s+la)\s+(mañana|madrugada))"; public const string PmTimeRegex = @"(?(esta|(por|de|a|en)\s+la)\s+(tarde|noche))"; - public static readonly string LessThanOneHour = $@"(?((\s+y\s+)?cuarto|(\s*)menos cuarto|(\s+y\s+)media|{BaseDateTime.DeltaMinuteRegex}(\s+(minuto|minutos|min|mins))|{DeltaMinuteNumRegex}(\s+(minuto|minutos|min|mins))))"; + public const string NightTimeRegex = @"(noche)"; + public const string LastNightTimeRegex = @"(anoche)"; + public const string NowTimeRegex = @"(ahora|mismo|momento)"; + public const string RecentlyTimeRegex = @"(mente)"; + public const string AsapTimeRegex = @"(posible|pueda[ns]?|podamos)"; + public static readonly string LessThanOneHour = $@"(?((\s+y\s+)?cuarto|(\s*)menos cuarto|(\s+y\s+)media|{BaseDateTime.DeltaMinuteRegex}(\s+(minutos?|mins?))|{DeltaMinuteNumRegex}(\s+(minutos?|mins?))))"; public const string TensTimeRegex = @"(?diez|veint(i|e)|treinta|cuarenta|cincuenta)"; - public static readonly string WrittenTimeRegex = $@"(?{HourNumRegex}\s*((y|menos)\s+)?({MinuteNumRegex}|({TensTimeRegex}((\s*y\s+)?{MinuteNumRegex})?)))"; + public static readonly string WrittenTimeRegex = $@"(?{HourNumRegex}\s*((y|(?menos))\s+)?(({TensTimeRegex}(\s*y\s+)?)?{MinuteNumRegex}))"; public static readonly string TimePrefix = $@"(?{LessThanOneHour}(\s+(pasad[ao]s)\s+(de\s+las|las)?|\s+(para|antes\s+de)?\s+(las?))?)"; public static readonly string TimeSuffix = $@"(?({LessThanOneHour}\s+)?({AmRegex}|{PmRegex}|{OclockRegex}))"; + public static readonly string GeneralDescRegex = $@"({DescRegex}|(?{AmRegex}|{PmRegex}))"; public static readonly string BasicTime = $@"(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex})"; - public static readonly string AtRegex = $@"\b(?<=\b(a las?)\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})\b(\s*\bh\b)?"; - public static readonly string ConnectNumRegex = $@"({BaseDateTime.HourRegex}(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)\s*{DescRegex})"; + public const string MidTimeRegex = @"(?((?media\s*noche)|(?media\s*madrugada)|(?media\s*mañana)|(?media\s*tarde)|(?medio\s*d[ií]a)))"; + public static readonly string AtRegex = $@"\b((?<=\b((a|de(sde)?)\s+las?|al)\s+)(({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})\b(\s*\bh\b)?(DescRegex)?|{MidTimeRegex})|{MidTimeRegex})"; + public static readonly string ConnectNumRegex = $@"({BaseDateTime.HourRegex}(?[0-5][0-9])\s*{DescRegex})"; + public static readonly string TimeRegexWithDotConnector = $@"({BaseDateTime.HourRegex}\.{BaseDateTime.MinuteRegex})"; public static readonly string TimeRegex1 = $@"(\b{TimePrefix}\s+)?({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})\s*({DescRegex}|\s*\bh\b)"; - public static readonly string TimeRegex2 = $@"(\b{TimePrefix}\s+)?(t)?{BaseDateTime.HourRegex}(\s*)?:(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?((\s*{DescRegex})|\b)"; - public static readonly string TimeRegex3 = $@"(\b{TimePrefix}\s+)?{BaseDateTime.HourRegex}\.{BaseDateTime.MinuteRegex}(\s*({DescRegex}|\bh\b))"; - public static readonly string TimeRegex4 = $@"\b(({DescRegex}?)|({BasicTime}?)({DescRegex}?))({TimePrefix}\s*)({HourNumRegex}|{BaseDateTime.HourRegex})?(\s+{TensTimeRegex}(\s+y\s+)?{MinuteNumRegex}?)?({OclockRegex})?\b"; + public static readonly string TimeRegex2 = $@"(\b{TimePrefix}\s+)?(t)?{BaseDateTime.HourRegex}(\s*)?:(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?(\s*({DescRegex}|\bh\b)|\b)"; + public static readonly string TimeRegex3 = $@"\b(({TimePrefix}\s+)?{TimeRegexWithDotConnector}(\s*({DescRegex}|{TimeSuffix}|\bh\b))|((las\s+{TimeRegexWithDotConnector})(?!\s*(por\s+cien(to)?|%))(\s*({DescRegex}|{TimeSuffix}|\bh\b)|\b)))"; + public static readonly string TimeRegex4 = $@"\b(({DescRegex}?)|({BasicTime}\s*)?({GeneralDescRegex}?)){TimePrefix}(\s*({HourNumRegex}|{BaseDateTime.HourRegex}))?(\s+{TensTimeRegex}(\s*(y\s+)?{MinuteNumRegex})?)?(\s*({OclockRegex}|{DescRegex}|\bh\b)|\b)"; public static readonly string TimeRegex5 = $@"\b({TimePrefix}|{BasicTime}{TimePrefix})\s+(\s*{DescRegex})?{BasicTime}?\s*{TimeSuffix}\b"; public static readonly string TimeRegex6 = $@"({BasicTime}(\s*{DescRegex})?\s+{TimeSuffix}\b)"; - public static readonly string TimeRegex7 = $@"\b{TimeSuffix}\s+a\s+las\s+{BasicTime}((\s*{DescRegex})|\b)"; + public static readonly string TimeRegex7 = $@"\b{TimeSuffix}\s+a\s+las\s+{BasicTime}((\s*{DescRegex}|\bh\b)|\b)"; public static readonly string TimeRegex8 = $@"\b{TimeSuffix}\s+{BasicTime}((\s*{DescRegex})|\b)"; - public static readonly string TimeRegex9 = $@"\b(?{HourNumRegex}\s+({TensTimeRegex}\s*)?(y\s+)?{MinuteNumRegex}?)\b"; - public const string TimeRegex10 = @"(a\s+la|al)\s+(madrugada|mañana|medio\s*d[ií]a|tarde|noche)"; - public static readonly string TimeRegex11 = $@"\b({WrittenTimeRegex})({DescRegex}?)\b"; + public static readonly string TimeRegex9 = $@"\b(?{HourNumRegex}\s+({TensTimeRegex}\s*)(y\s+)?{MinuteNumRegex}?)\b"; + public static readonly string TimeRegex11 = $@"\b({WrittenTimeRegex})(\s+{DescRegex})?\b"; public static readonly string TimeRegex12 = $@"(\b{TimePrefix}\s+)?{BaseDateTime.HourRegex}(\s*h\s*){BaseDateTime.MinuteRegex}(\s*{DescRegex})?"; - public const string PrepositionRegex = @"\b(?(a(l)?|en|de(l)?)?(\s*(la(s)?|el|los))?$)\b"; - public const string NowRegex = @"\b(?(justo\s+)?ahora(\s+mismo)?|en\s+este\s+momento|tan\s+pronto\s+como\s+sea\s+posible|tan\s+pronto\s+como\s+(pueda|puedas|podamos|puedan)|lo\s+m[aá]s\s+pronto\s+posible|recientemente|previamente)\b"; - public const string SuffixRegex = @"^\s*(((y|a|en|por)\s+la|al)\s+)?(mañana|madrugada|medio\s*d[ií]a|tarde|noche)\b"; - public const string TimeOfDayRegex = @"\b(?mañana|madrugada|(pasado\s+(el\s+)?)?medio\s?d[ií]a|tarde|noche|anoche)\b"; - public static readonly string SpecificTimeOfDayRegex = $@"\b(((((a\s+)?la|esta|siguiente|pr[oó]xim[oa]|[uú]ltim[oa])\s+)?{TimeOfDayRegex}))\b"; + public const string PrepositionRegex = @"(?^(,\s*)?(a(l)?|en|de(l)?)?(\s*(la(s)?|el|los))?$)"; + public const string LaterEarlyRegex = @"((?temprano)|(?fin(al)?(\s+de)?|m[aá]s\s+tarde))"; + public const string NowRegex = @"\b(?(justo\s+)?ahora(\s+mismo)?|en\s+este\s+momento|tan\s+pronto\s+como\s+sea\s+posible|tan\s+pronto\s+como\s+(pueda|puedas|podamos|puedan)|lo\s+m[aá]s\s+pronto\s+posible|recientemente|previamente|este entonces)\b"; + public const string SuffixRegex = @"^\s*(((y|a|en|por)\s+la|al)\s+)?(mañana|madrugada|medio\s*d[ií]a|(?(({LaterEarlyRegex}\s+)((del?|en|por)(\s+(el|los?|las?))?\s+)?)?(mañana|madrugada|pasado\s+(el\s+)?medio\s?d[ií]a|(?años|año|meses|mes|semanas|semana|d[ií]as|d[ií]a|horas|hora|h|hr|hrs|hs|minutos|minuto|mins|min|segundos|segundo|segs|seg)\b"; - public const string ConnectorRegex = @"^(,|t|para la|para las|cerca de la|cerca de las)$"; - public const string TimeHourNumRegex = @"(?veintiuno|veintidos|veintitres|veinticuatro|cero|uno|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce|trece|catorce|quince|diecis([eé])is|diecisiete|dieciocho|diecinueve|veinte)"; - public static readonly string PureNumFromTo = $@"((desde|de)\s+(la(s)?\s+)?)?({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}))?\s*{TillRegex}\s*({BaseDateTime.HourRegex}|{TimeHourNumRegex})\s*(?{PmRegex}|{AmRegex}|{DescRegex})?"; - public static readonly string PureNumBetweenAnd = $@"(entre\s+(la(s)?\s+)?)({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}))?\s*y\s*(la(s)?\s+)?({BaseDateTime.HourRegex}|{TimeHourNumRegex})\s*(?{PmRegex}|{AmRegex}|{DescRegex})?"; - public static readonly string TimeRegexWithDotConnector = $@"({BaseDateTime.HourRegex}(\s*\.\s*){BaseDateTime.MinuteRegex})"; - public static readonly string SpecificTimeFromTo = $@"({RangePrefixRegex})?(?(({TimeRegex2}|{TimeRegexWithDotConnector}(\s*{DescRegex})?)|({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}))?))\s*{TillRegex}\s*(?(({TimeRegex2}|{TimeRegexWithDotConnector}(\s*{DescRegex})?)|({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}))?))"; - public static readonly string SpecificTimeBetweenAnd = $@"({BetweenRegex})(?(({TimeRegex1}|{TimeRegex2}|{TimeRegexWithDotConnector}(\s*{DescRegex})?)|({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}))?))\s*{ConnectorAndRegex}\s*(?(({TimeRegex1}|{TimeRegex2}|{TimeRegexWithDotConnector}(\s*{DescRegex})?)|({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}))?))"; - public const string TimeUnitRegex = @"([^A-Za-z]{1,}|\b)(?horas|hora|h|minutos|minuto|mins|min|segundos|segundo|secs|sec)\b"; + public const string DateTimeTimeOfDayRegex = @"\b(?mañana|madrugada|(?pasado\s+(el\s+)?medio\s?d[ií]a|tarde|noche))\b"; + public static readonly string PeriodTimeOfDayRegex = $@"\b((en\s+(el|la|lo)?\s+)?({LaterEarlyRegex}\s+)?(est[ae]\s+)?{DateTimeTimeOfDayRegex})\b"; + public static readonly string PeriodSpecificTimeOfDayRegex = $@"\b(({LaterEarlyRegex}\s+)?est[ae]\s+{DateTimeTimeOfDayRegex}|({StrictRelativeRegex}\s+{PeriodTimeOfDayRegex})|anoche)\b"; + public const string UnitRegex = @"(?a[nñ]os?|(bi|tri|cuatri|se)mestre|mes(es)?|semanas?|fin(es)?\s+de\s+semana|finde|d[ií]as?|horas?|hra?s?|hs?|minutos?|mins?|segundos?|segs?|noches?)\b"; + public const string ConnectorRegex = @"^(,|t|(para|y|a|en|por) las?|(\s*,\s*)?((cerca|alrededor)\s+)?(de\s+las?|del))$"; + public const string TimeHourNumRegex = @"(?veint(i(uno|dos|tres|cuatro)|e)|cero|uno|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce|trece|catorce|quince|dieci(s([eé])is|siete|ocho|nueve))"; + public static readonly string PureNumFromTo = $@"((\b(desde|de)\s+(la(s)?\s+)?)?({BaseDateTime.HourRegex}|{TimeHourNumRegex})(?!\s+al?\b)(\s*(?{DescRegex}))?|(\b(desde|de)\s+(la(s)?\s+)?)({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}))?)\s*{TillRegex}\s*({BaseDateTime.HourRegex}|{TimeHourNumRegex})\s*(?{PmRegex}|{AmRegex}|{DescRegex})?"; + public static readonly string PureNumBetweenAnd = $@"(\bentre\s+(la(s)?\s+)?)(({BaseDateTime.TwoDigitHourRegex}{BaseDateTime.TwoDigitMinuteRegex})|{BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}))?\s*{RangeConnectorRegex}\s*(({BaseDateTime.TwoDigitHourRegex}{BaseDateTime.TwoDigitMinuteRegex})|{BaseDateTime.HourRegex}|{TimeHourNumRegex})\s*(?{PmRegex}|{AmRegex}|{DescRegex})?"; + public static readonly string SpecificTimeFromTo = $@"({RangePrefixRegex}\s+)?(?(({TimeRegex2}|{TimeRegexWithDotConnector}(\s*{DescRegex})?)|({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}))?))\s*{TillRegex}\s*(?(({TimeRegex2}|{TimeRegexWithDotConnector}(\s*{DescRegex})?)|({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}))?))"; + public static readonly string SpecificTimeBetweenAnd = $@"({BetweenRegex}\s+)(?(({TimeRegex1}|{TimeRegex2}|{TimeRegexWithDotConnector}(\s*{DescRegex})?)|({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}))?))\s*{RangeConnectorRegex}\s*(?(({TimeRegex1}|{TimeRegex2}|{TimeRegexWithDotConnector}(\s*{DescRegex})?)|({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\s*(?{DescRegex}))?))"; + public const string TimeUnitRegex = @"([^A-Za-z]{1,}|\b)(?(hora|minuto|min|segundo|se[cg])(?s)?|h)\b"; public static readonly string TimeFollowedUnit = $@"^\s*{TimeUnitRegex}"; public static readonly string TimeNumberCombinedWithUnit = $@"\b(?\d+(\,\d*)?)\s*{TimeUnitRegex}"; public static readonly string DateTimePeriodNumberCombinedWithUnit = $@"\b(?\d+(\.\d*)?)\s*{TimeUnitRegex}"; - public const string PeriodTimeOfDayWithDateRegex = @"\b(((y|a|en|por)\s+(la\s+)?|al\s+)?(((?primeras\s+horas\s+)|(?(últimas|altas)\s+horas\s+))?(de\s+la\s+)?(?(mañana|madrugada|(pasado\s+(el\s+)?)?medio\s?d[ií]a|tarde|noche|anoche))))(\s+(del|de))?\b"; + public static readonly string PeriodTimeOfDayWithDateRegex = $@"\b(((y|a|en|por)\s+(la\s+)?|al\s+)?((((?primeras\s+horas\s+)|(?(últimas|altas)\s+horas\s+))(de\s+la\s+)?|{LaterEarlyRegex}\s+(est[ae]\s+)?)?(?(mañana|madrugada|pasado\s+(el\s+)?medio\s?d[ií]a|(?\s*(y)\s+((un|uno|una)\s+)?(?media|cuarto))"; + public const string SuffixAndRegex = @"(?\s*(y)\s+((un[ao]?)\s+)?(?media|cuarto))"; public static readonly string FollowedUnit = $@"^\s*{UnitRegex}"; public static readonly string DurationNumberCombinedWithUnit = $@"\b(?\d+(\,\d*)?){UnitRegex}"; - public static readonly string AnUnitRegex = $@"\b(un(a)?)\s+{UnitRegex}"; + public static readonly string AnUnitRegex = $@"\b(una?|otr[ao])\s+{UnitRegex}"; public const string DuringRegex = @"^[.]"; - public const string AllRegex = @"\b(?tod[oa]?\s+(el|la)\s+(?año|mes|semana|d[ií]a))\b"; + public const string AllRegex = @"\b(?tod[oa]?\s+(el|la)\s+(?año|mes|semana|d[ií]a)|((una?|el|la)\s+)?(?año|mes|semana|d[ií]a)\s+enter[ao])\b"; public const string HalfRegex = @"\b(?medi[oa]\s+(?ano|mes|semana|d[íi]a|hora))\b"; public const string ConjunctionRegex = @"^[.]"; - public const string InexactNumberRegex = @"\b(pocos|poco|algo|varios)\b"; - public static readonly string InexactNumberUnitRegex = $@"\b(pocos|poco|algo|varios)\s+{UnitRegex}"; - public static readonly string HolidayRegex1 = $@"\b(?viernes santo|mi[eé]rcoles de ceniza|martes de carnaval|d[ií]a (de|de los) presidentes?|clebraci[oó]n de mao|año nuevo chino|año nuevo|noche vieja|(festividad de )?los mayos|d[ií]a de los inocentes|navidad|noche buena|d[ií]a de acci[oó]n de gracias|acci[oó]n de gracias|yuandan|halloween|noches de brujas|pascuas)(\s+(del?\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?|en))\s+año))?\b"; - public static readonly string HolidayRegex2 = $@"\b(?(d[ií]a( del?( la)?)? )?(martin luther king|todos los santos|blanco|san patricio|san valent[ií]n|san jorge|cinco de mayo|independencia|raza|trabajador))(\s+(del?\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?|en))\s+año))?\b"; - public static readonly string HolidayRegex3 = $@"\b(?(d[ií]a( del?( las?)?)? )(trabajador|madres?|padres?|[aá]rbol|mujer(es)?|solteros?|niños?|marmota|san valent[ií]n|maestro))(\s+(del?\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?|en))\s+año))?\b"; - public const string BeforeRegex = @"(antes(\s+del?(\s+las?)?)?)"; - public const string AfterRegex = @"(despues(\s*del?(\s+las?)?)?)"; - public const string SinceRegex = @"(desde(\s+(las?|el))?)"; - public const string AroundRegex = @"^[.]"; - public const string PeriodicRegex = @"\b(?a\s*diario|diariamente|mensualmente|semanalmente|quincenalmente|anualmente)\b"; - public const string EachExpression = @"cada|tod[oa]s\s*(l[oa]s)?"; - public static readonly string EachUnitRegex = $@"(?({EachExpression})\s*{UnitRegex})"; + public const string InexactNumberRegex = @"\b(pocos?|algo|vari[ao]s|algun[ao]s|un[ao]s)\b"; + public static readonly string InexactNumberUnitRegex = $@"({InexactNumberRegex})\s+{UnitRegex}"; + public static readonly string HolidayRegex1 = $@"\b(?viernes\s+(santo|negro)|mi[eé]rcoles de ceniza|martes de carnaval|d[ií]a (de|de los) presidentes?|clebraci[oó]n de mao|año nuevo chino|año nuevo|noche vieja|(festividad de )?los mayos|d[ií]a de los inocentes|navidad|noche buena|d[ií]a de acci[oó]n de gracias|acci[oó]n de gracias|yuandan|halloween|noches de brujas|pascuas)(\s+(del?\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?|en))\s+año))?\b"; + public static readonly string HolidayRegex2 = $@"\b(?(d[ií]a( del?( la)?)? )?(martin luther king|todos los santos|tierra|blanco|san patricio|san valent[ií]n|san jorge|cinco de mayo|independencia|raza|trabajador))(\s+(del?\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?|en))\s+año))?\b"; + public static readonly string HolidayRegex3 = $@"\b(?(d[ií]a( internacional)?( del?( l[ao]s?)?)? )(trabajador(es)?|madres?|padres?|[aá]rbol|mujer(es)?|solteros?|niños?|marmota|san valent[ií]n|maestro))(\s+(del?\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?|en))\s+año))?\b"; + public const string BeforeRegex = @"(\b((ante(s|rior)|m[aá]s\s+temprano|no\s+m[aá]s\s+tard(e|ar)|hasta|(?tan\s+tarde\s+como))(\s+(del?|a|que)(\s+(el|las?|los?))?)?)|(?)((?<\s*=)|<))"; + public const string AfterRegex = @"((\b(despu[eé]s|(año\s+)?posterior|m[aá]s\s+tarde|a\s+primeros)(\s*(del?|en|a)(\s+(el|las?|los?))?)?|(empi?en?zando|comenzando)(\s+(el|las?|los?))?)\b|(?>\s*=)|>))"; + public const string SinceRegex = @"\b(((cualquier\s+tiempo\s+)?(desde|a\s+partir\s+del?)|tan\s+(temprano|pronto)\s+como(\s+(de|a))?)(\s+(el|las?|los?))?)\b"; + public static readonly string SinceRegexExp = $@"({SinceRegex}|\bde\b)"; + public const string AroundRegex = @"(?:\b(?:cerca|alrededor|aproximadamente)(\s+(de\s+(las?|el)|del?))?\s*\b)"; + public const string PeriodicRegex = @"\b(?a\s*diario|diaria(s|mente)|(bi|tri)?(semanal|quincenal|mensual|(se|tri)mestral|anual)(es|mente)?)\b"; + public const string EachExpression = @"\b(cada|tod[oa]s\s*(l[oa]s)?)\b\s*(?!\s*l[oa]\b)"; + public static readonly string EachUnitRegex = $@"(?({EachExpression})\s*({UnitRegex}|(?fin(es)?\s+de\s+semana|finde)\b))"; public static readonly string EachPrefixRegex = $@"(?({EachExpression})\s*$)"; public static readonly string EachDayRegex = $@"\s*({EachExpression})\s*d[ií]as\s*\b"; public static readonly string BeforeEachDayRegex = $@"({EachExpression})\s*d[ií]as(\s+a\s+las?)?\s*\b"; public static readonly string SetEachRegex = $@"(?({EachExpression})\s*)"; - public static readonly string LaterEarlyPeriodRegex = $@"\b(({PrefixPeriodRegex})\s+(?{OneWordPeriodRegex})|({UnspecificEndOfRangeRegex}))\b"; - public const string RelativeWeekRegex = @"(((la|el)\s+)?(((esta|este|pr[oó]xim[oa]|[uú]ltim(o|as|os))\s+semana(s)?)|(semana(s)?\s+(que\s+viene|pasad[oa]))))"; + public static readonly string LaterEarlyPeriodRegex = $@"\b(({PrefixPeriodRegex})\s+(?{OneWordPeriodRegex}|(?{BaseDateTime.FourDigitYearRegex}))|({UnspecificEndOfRangeRegex}))\b"; + public const string RelativeWeekRegex = @"(((la|el)\s+)?(((est[ae]|pr[oó]xim[oa]|[uú]ltim(o|as|os))\s+semanas?)|(semanas?\s+(que\s+viene|pasad[oa]))))"; public static readonly string WeekWithWeekDayRangeRegex = $@"\b((({RelativeWeekRegex})((\s+entre\s+{WeekDayRegex}\s+y\s+{WeekDayRegex})|(\s+de\s+{WeekDayRegex}\s+a\s+{WeekDayRegex})))|((entre\s+{WeekDayRegex}\s+y\s+{WeekDayRegex})|(de\s+{WeekDayRegex}\s+a\s+{WeekDayRegex})){OfPrepositionRegex}\s+{RelativeWeekRegex})\b"; - public const string GeneralEndingRegex = @"^[.]"; + public const string GeneralEndingRegex = @"^\s*((\.,)|\.|,|!|\?)?\s*$"; public const string MiddlePauseRegex = @"^[.]"; - public const string PrefixArticleRegex = @"^[\.]"; + public const string PrefixArticleRegex = @"\b(e[ln]\s+(d[ií]a\s+)?)"; public const string OrRegex = @"^[.]"; - public static readonly string YearPlusNumberRegex = $@"\b(años?\s+((?(\d{{2,4}}))|{FullTextYearRegex}))\b"; - public const string NumberAsTimeRegex = @"^[.]"; - public const string TimeBeforeAfterRegex = @"^[.]"; - public const string DateNumberConnectorRegex = @"^[.]"; + public static readonly string SpecialYearTermsRegex = $@"\b(({SpecialYearPrefixes}\s+a[nñ]os?\s+|a[nñ]os?\s+({SpecialYearPrefixes}\s+)?)(de\s+)?)"; + public static readonly string YearPlusNumberRegex = $@"\b({SpecialYearTermsRegex}((?(\d{{2,4}}))|{FullTextYearRegex}))\b"; + public static readonly string NumberAsTimeRegex = $@"\b({WrittenTimeRegex}|{HourRegex}(?\s*h(oras)?)?)\b"; + public static readonly string TimeBeforeAfterRegex = $@"\b((?<=\b(antes|no\s+m[aá]s\s+tard(e|ar)\s+(de|a\s+las?)|por| después)\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}|{MidTimeRegex}))\b"; + public const string DateNumberConnectorRegex = @"^\s*(?a\s+las)\s*$"; public const string CenturyRegex = @"^[.]"; - public const string DecadeRegex = @"(?diez|veinte|treinta|cuarenta|cincuenta|sesenta|setenta|ochenta|noventa)"; - public static readonly string DecadeWithCenturyRegex = $@"(los\s+)?((((d[ée]cada(\s+de)?)\s+)(((?\d|1\d|2\d)?(?\d0))))|a[ñn]os\s+((((dos\s+)?mil\s+)?({WrittenOneHundredToNineHundredRegex}\s+)?{DecadeRegex})|((dos\s+)?mil\s+)?({WrittenOneHundredToNineHundredRegex})(\s+{DecadeRegex}?)|((dos\s+)?mil)(\s+{WrittenOneHundredToNineHundredRegex}\s+)?{DecadeRegex}?))"; - public static readonly string RelativeDecadeRegex = $@"\b(((el|las?)\s+)?{RelativeRegex}\s+(((?[\d]+)|{WrittenOneToNineRegex})\s+)?d[eé]cadas?)\b"; - public const string ComplexDatePeriodRegex = @"^[.]"; - public static readonly string YearSuffix = $@"(,?\s*({YearRegex}|{FullTextYearRegex}))"; - public const string AgoRegex = @"\b(antes\s+de\s+(?hoy|ayer|mañana)|antes)\b"; - public const string LaterRegex = @"\b(despu[eé]s|desde\s+ahora|a\s+partir\s+de\s+(?hoy|ayer|mañana))\b"; + public const string DecadeRegex = @"(?diez|veinte|treinta|cuarenta|cincuenta|se[st]enta|ochenta|noventa)"; + public static readonly string DecadeWithCenturyRegex = $@"(los\s+)?((((d[ée]cada(\s+de)?)\s+)(((?\d|1\d|2\d)?(?\d0))))|a[ñn]os\s+((?\d|1\d|2\d)?(?\d0)\b)|a[ñn]os\s+(((?((dos\s+)?mil\s+)?({WrittenOneHundredToNineHundredRegex}\s+)?)?{DecadeRegex})|(?((dos\s+)?mil\s+)?({WrittenOneHundredToNineHundredRegex}))(\s+{DecadeRegex}?)|(?((dos\s+)?mil)(\s+{WrittenOneHundredToNineHundredRegex}\s+)?){DecadeRegex}?))"; + public static readonly string RelativeDecadeRegex = $@"\b(((el|las?)\s+)?{RelativeRegex}\s+((?[\w,]+)\s+)?(d[eé]cada|decenio)s?)\b"; + public static readonly string ComplexDatePeriodRegex = $@"(?:((de(sde)?)\s+)?(?.+)\s*({StrictTillRegex})\s*(?.+)|((entre)\s+)(?.+)\s*({RangeConnectorRegex})\s*(?.+))"; + public const string AmbiguousPointRangeRegex = @"^(mar\.?)$"; + public static readonly string YearSuffix = $@"((,|\sdel?)?\s*({YearRegex}|{FullTextYearRegex}))"; + public static readonly string SinceYearSuffixRegex = $@"(^\s*{SinceRegex}(\s*(el\s+)?año\s*)?{YearSuffix})"; + public const string AgoRegex = @"\b(antes\s+de\s+(?hoy|ayer|mañana)|antes|hace)\b"; + public const string LaterRegex = @"\b(despu[eé]s(?!\s+de\b)|desde\s+ahora|(a\s+partir|despu[eé]s)\s+de\s+(ahora|(?hoy|ayer|mañana)))\b"; public const string Tomorrow = @"mañana"; public static readonly Dictionary UnitMap = new Dictionary { { @"años", @"Y" }, { @"año", @"Y" }, + { @"anos", @"Y" }, + { @"ano", @"Y" }, { @"meses", @"MON" }, { @"mes", @"MON" }, + { @"trimestre", @"3MON" }, + { @"trimestres", @"3MON" }, + { @"cuatrimestre", @"4MON" }, + { @"cuatrimestres", @"4MON" }, + { @"semestre", @"6MON" }, + { @"semestres", @"6MON" }, + { @"bimestre", @"2MON" }, + { @"bimestres", @"2MON" }, { @"semanas", @"W" }, { @"semana", @"W" }, + { @"fin de semana", @"WE" }, + { @"fines de semana", @"WE" }, + { @"finde", @"WE" }, { @"dias", @"D" }, { @"dia", @"D" }, { @"días", @"D" }, { @"día", @"D" }, { @"jornada", @"D" }, + { @"noche", @"D" }, + { @"noches", @"D" }, { @"horas", @"H" }, { @"hora", @"H" }, { @"hrs", @"H" }, + { @"hras", @"H" }, + { @"hra", @"H" }, { @"hr", @"H" }, { @"h", @"H" }, { @"minutos", @"M" }, @@ -241,17 +289,26 @@ public static class DateTimeDefinitions { { @"años", 31536000 }, { @"año", 31536000 }, + { @"anos", 31536000 }, + { @"ano", 31536000 }, { @"meses", 2592000 }, { @"mes", 2592000 }, { @"semanas", 604800 }, { @"semana", 604800 }, + { @"fin de semana", 172800 }, + { @"fines de semana", 172800 }, + { @"finde", 172800 }, { @"dias", 86400 }, { @"dia", 86400 }, { @"días", 86400 }, { @"día", 86400 }, + { @"noche", 86400 }, + { @"noches", 86400 }, { @"horas", 3600 }, { @"hora", 3600 }, { @"hrs", 3600 }, + { @"hras", 3600 }, + { @"hra", 3600 }, { @"hr", 3600 }, { @"h", 3600 }, { @"minutos", 60 }, @@ -265,7 +322,8 @@ public static class DateTimeDefinitions }; public static readonly Dictionary SpecialYearPrefixesMap = new Dictionary { - { @"", @"" } + { @"fiscal", @"FY" }, + { @"escolar", @"SY" } }; public static readonly Dictionary SeasonMap = new Dictionary { @@ -289,24 +347,75 @@ public static class DateTimeDefinitions { @"1er", 1 }, { @"1ro", 1 }, { @"1ra", 1 }, + { @"1.º", 1 }, + { @"1º", 1 }, + { @"1ª", 1 }, { @"segundo", 2 }, { @"segunda", 2 }, { @"2do", 2 }, { @"2da", 2 }, + { @"2.º", 2 }, + { @"2º", 2 }, + { @"2ª", 2 }, { @"tercer", 3 }, { @"tercero", 3 }, { @"tercera", 3 }, { @"3er", 3 }, { @"3ro", 3 }, { @"3ra", 3 }, + { @"3.º", 3 }, + { @"3º", 3 }, + { @"3ª", 3 }, { @"cuarto", 4 }, { @"cuarta", 4 }, { @"4to", 4 }, { @"4ta", 4 }, + { @"4.º", 4 }, + { @"4º", 4 }, + { @"4ª", 4 }, { @"quinto", 5 }, { @"quinta", 5 }, { @"5to", 5 }, - { @"5ta", 5 } + { @"5ta", 5 }, + { @"5.º", 5 }, + { @"5º", 5 }, + { @"5ª", 5 }, + { @"sexto", 6 }, + { @"sexta", 6 }, + { @"6to", 6 }, + { @"6ta", 6 }, + { @"septimo", 7 }, + { @"séptimo", 7 }, + { @"septima", 7 }, + { @"séptima", 7 }, + { @"7mo", 7 }, + { @"7ma", 7 }, + { @"octavo", 8 }, + { @"octava", 8 }, + { @"8vo", 8 }, + { @"8va", 8 }, + { @"noveno", 9 }, + { @"novena", 9 }, + { @"9no", 9 }, + { @"9na", 9 }, + { @"decimo", 10 }, + { @"décimo", 10 }, + { @"decima", 10 }, + { @"décima", 10 }, + { @"10mo", 10 }, + { @"10ma", 10 }, + { @"undecimo", 11 }, + { @"undécimo", 11 }, + { @"undecima", 11 }, + { @"undécima", 11 }, + { @"11mo", 11 }, + { @"11ma", 11 }, + { @"duodecimo", 12 }, + { @"duodécimo", 12 }, + { @"duodecima", 12 }, + { @"duodécima", 12 }, + { @"12mo", 12 }, + { @"12ma", 12 } }; public static readonly Dictionary DayOfWeek = new Dictionary { @@ -317,14 +426,33 @@ public static class DateTimeDefinitions { @"jueves", 4 }, { @"viernes", 5 }, { @"sabado", 6 }, + { @"sábado", 6 }, { @"domingo", 0 }, + { @"dom", 0 }, + { @"lun", 1 }, + { @"mar", 2 }, + { @"mie", 3 }, + { @"mié", 3 }, + { @"jue", 4 }, + { @"vie", 5 }, + { @"sab", 6 }, + { @"sáb", 6 }, + { @"dom.", 0 }, + { @"lun.", 1 }, + { @"mar.", 2 }, + { @"mie.", 3 }, + { @"mié.", 3 }, + { @"jue.", 4 }, + { @"vie.", 5 }, + { @"sab.", 6 }, + { @"sáb.", 6 }, + { @"do", 0 }, { @"lu", 1 }, { @"ma", 2 }, { @"mi", 3 }, { @"ju", 4 }, { @"vi", 5 }, - { @"sa", 6 }, - { @"do", 0 } + { @"sa", 6 } }; public static readonly Dictionary MonthOfYear = new Dictionary { @@ -350,10 +478,25 @@ public static class DateTimeDefinitions { @"jul", 7 }, { @"ago", 8 }, { @"sept", 9 }, + { @"sep", 9 }, { @"set", 9 }, { @"oct", 10 }, { @"nov", 11 }, { @"dic", 12 }, + { @"ene.", 1 }, + { @"feb.", 2 }, + { @"mar.", 3 }, + { @"abr.", 4 }, + { @"may.", 5 }, + { @"jun.", 6 }, + { @"jul.", 7 }, + { @"ago.", 8 }, + { @"sept.", 9 }, + { @"sep.", 9 }, + { @"set.", 9 }, + { @"oct.", 10 }, + { @"nov.", 11 }, + { @"dic.", 12 }, { @"1", 1 }, { @"2", 2 }, { @"3", 3 }, @@ -383,10 +526,13 @@ public static class DateTimeDefinitions { @"una", 1 }, { @"uno", 1 }, { @"dos", 2 }, + { @"dós", 2 }, { @"tres", 3 }, + { @"trés", 3 }, { @"cuatro", 4 }, { @"cinco", 5 }, { @"seis", 6 }, + { @"séis", 6 }, { @"siete", 7 }, { @"ocho", 8 }, { @"nueve", 9 }, @@ -404,6 +550,7 @@ public static class DateTimeDefinitions { @"dieciocho", 18 }, { @"diecinueve", 19 }, { @"veinte", 20 }, + { @"veinti", 20 }, { @"ventiuna", 21 }, { @"ventiuno", 21 }, { @"veintiun", 21 }, @@ -421,14 +568,16 @@ public static class DateTimeDefinitions { @"veintisiete", 27 }, { @"veintiocho", 28 }, { @"veintinueve", 29 }, - { @"treinta", 30 } + { @"treinta", 30 }, + { @"cuarenta", 40 }, + { @"cincuenta", 50 } }; public static readonly Dictionary> HolidayNames = new Dictionary> { { @"padres", new string[] { @"diadelpadre" } }, { @"madres", new string[] { @"diadelamadre" } }, { @"acciondegracias", new string[] { @"diadegracias", @"diadeacciondegracias", @"acciondegracias" } }, - { @"trabajador", new string[] { @"diadeltrabajador" } }, + { @"trabajador", new string[] { @"diadeltrabajador", @"diadelostrabajadores", @"diainternacionaldeltrabajador", @"diainternacionaldelostrabajadores" } }, { @"delaraza", new string[] { @"diadelaraza", @"diadeladiversidadcultural" } }, { @"memoria", new string[] { @"diadelamemoria" } }, { @"pascuas", new string[] { @"diadepascuas", @"pascuas" } }, @@ -437,17 +586,22 @@ public static class DateTimeDefinitions { @"añonuevo", new string[] { @"añonuevo", @"diadeañonuevo" } }, { @"nochevieja", new string[] { @"nochevieja", @"diadenochevieja" } }, { @"yuandan", new string[] { @"yuandan" } }, + { @"earthday", new string[] { @"diadelatierra" } }, { @"maestro", new string[] { @"diadelmaestro" } }, { @"todoslossantos", new string[] { @"todoslossantos" } }, { @"niño", new string[] { @"diadelniño" } }, - { @"mujer", new string[] { @"diadelamujer" } } + { @"mujer", new string[] { @"diadelamujer" } }, + { @"independencia", new string[] { @"diadelaindependencia", @"diadeindependencia", @"independencia" } }, + { @"blackfriday", new string[] { @"viernesnegro" } }, + { @"goodfriday", new string[] { @"viernessanto" } }, + { @"stpatrickday", new string[] { @"sanpatricio", @"diadesanpatricio" } }, + { @"valentinesday", new string[] { @"sanvalentin", @"diadesanvalentin" } } }; public static readonly Dictionary VariableHolidaysTimexDictionary = new Dictionary { { @"padres", @"-06-WXX-7-3" }, { @"madres", @"-05-WXX-7-2" }, { @"acciondegracias", @"-11-WXX-4-4" }, - { @"trabajador", @"-05-WXX-1-1" }, { @"delaraza", @"-10-WXX-1-2" }, { @"memoria", @"-03-WXX-2-4" } }; @@ -456,37 +610,48 @@ public static class DateTimeDefinitions { @"mitad", 0.5 }, { @"cuarto", 0.25 } }; - public const string DateTokenPrefix = @"en "; - public const string TimeTokenPrefix = @"a las "; - public const string TokenBeforeDate = @"el "; - public const string TokenBeforeTime = @"la "; - public const string UpcomingPrefixRegex = @".^"; - public static readonly string NextPrefixRegex = $@"(pr[oó]xim[oa]|siguiente|{UpcomingPrefixRegex})\b"; - public const string PastPrefixRegex = @".^"; - public static readonly string PreviousPrefixRegex = $@"([uú]ltim[oa]|{PastPrefixRegex})\b"; - public const string PreviousSuffixRegex = @"\b(pasad[ao])\b"; - public const string ThisPrefixRegex = @"(est[ea])\b"; + public const string UpcomingPrefixRegex = @"((este\s+))"; + public static readonly string NextPrefixRegex = $@"\b({UpcomingPrefixRegex}?pr[oó]xim[oa]s?|siguiente|que\s+viene)\b"; + public const string PastPrefixRegex = @"((este\s+))"; + public static readonly string PreviousPrefixRegex = $@"\b({PastPrefixRegex}?pasad[oa]s?(?!(\s+el)?\s+medio\s*d[ií]a)|[uú]ltim[oa]s?|anterior)\b"; + public const string ThisPrefixRegex = @"(est?[ea]|actual)\b"; + public const string PrefixWeekDayRegex = @"(\s*((,?\s*el)|[-—–]))"; + public static readonly string ThisRegex = $@"\b((est[ae]\s*)(semana{PrefixWeekDayRegex}?)?\s*{WeekDayRegex})|({WeekDayRegex}\s*((de\s+)?esta\s+semana))\b"; + public static readonly string LastDateRegex = $@"\b(({PreviousPrefixRegex}\s+(semana{PrefixWeekDayRegex}?)?|(la\s+)?semana\s+{PreviousPrefixRegex}{PrefixWeekDayRegex})\s*{WeekDayRegex})|(este\s+)?({WeekDayRegex}\s+([uú]ltimo|pasado|anterior))|({WeekDayRegex}(\s+((de\s+)?((esta|la)\s+([uú]ltima\s+)?semana)|(de\s+)?(la\s+)?semana\s+(pasada|anterior))))\b"; + public static readonly string NextDateRegex = $@"\b((({NextPrefixRegex}\s+)(semana{PrefixWeekDayRegex}?)?|(la\s+)?semana\s+{NextPrefixRegex}{PrefixWeekDayRegex})\s*{WeekDayRegex})|(este\s+)?({WeekDayRegex}\s+(pr[oó]ximo|siguiente|que\s+viene))|({WeekDayRegex}(\s+(de\s+)?(la\s+)?((pr[oó]xima|siguiente)\s+semana|semana\s+(pr[oó]xima|siguiente))))\b"; public const string RelativeDayRegex = @"(?((este|pr[oó]ximo|([uú]ltim(o|as|os)))\s+días)|(días\s+((que\s+viene)|pasado)))\b"; - public const string RestOfDateRegex = @"\bresto\s+((del|de)\s+)?((la|el|est[ae])\s+)?(?semana|mes|año|decada)(\s+actual)?\b"; - public const string RelativeDurationUnitRegex = @"^[\.]"; - public const string ReferencePrefixRegex = @"(mism[ao]|aquel)\b"; + public const string RestOfDateRegex = @"\bresto\s+((del|de)\s+)?((la|el|est?[ae])\s+)?(?semana|mes|año|decada)(\s+actual)?\b"; + public static readonly string WithinNextPrefixRegex = $@"\b(dentro\s+de((\s+(el|l[ao]s?))?\s+(?{NextPrefixRegex}))?)(?=\s*$)\b"; + public static readonly string DurationUnitRegex = $@"(?{DateUnitRegex}|horas?|hra?s?|hs?|minutos?|mins?|segundos?|segs?|noches?)\b"; + public const string DurationConnectorRegex = @"^\s*(?\s+|y|,)\s*$"; + public static readonly string RelativeDurationUnitRegex = $@"(?:(?<=({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+)({DurationUnitRegex}))"; + public const string ReferencePrefixRegex = @"(mism[ao]|aquel|est?e)\b"; public static readonly string ReferenceDatePeriodRegex = $@"\b{ReferencePrefixRegex}\s+({DateUnitRegex}|fin\s+de\s+semana)\b"; public const string FromToRegex = @"\b(from).+(to)\b.+"; public const string SingleAmbiguousMonthRegex = @"^(the\s+)?(may|march)$"; - public const string UnspecificDatePeriodRegex = @"^[.]"; - public const string PrepositionSuffixRegex = @"\b(on|in|at|around|for|during|since|from|to)$"; - public const string RestOfDateTimeRegex = @"\bresto\s+((del|de)\s+)?((la|el|est[ae])\s+)?(?(día|jornada))(\s+de\s+hoy)?\b"; + public const string UnspecificDatePeriodRegex = @"^[\.]"; + public const string PrepositionSuffixRegex = @"\b(en|el|la|cerca|alrededor|desde|durante|hasta|hacia)$"; + public const string RestOfDateTimeRegex = @"\bresto\s+((del?)\s+)?((la|el|est[ae])\s+)?(?(día|jornada))(\s+de\s+hoy)?\b"; public const string SetWeekDayRegex = @"^[\.]"; public const string NightRegex = @"\b(medionoche|noche)\b"; public const string CommonDatePrefixRegex = @"^[\.]"; - public const string DurationUnitRegex = @"^[\.]"; - public const string DurationConnectorRegex = @"^[.]"; - public const string SuffixAfterRegex = @"^[.](?!$)"; - public const string YearPeriodRegex = @"^[.]"; - public const string FutureSuffixRegex = @"\b(despu[ée]s)\b"; + public const string SuffixAfterRegex = @"\b((a\s+)?(o|y)\s+(arriba|despu[eé]s|posterior|mayor|m[aá]s\s+tarde)(?!\s+(que|de)))\b"; + public static readonly string YearPeriodRegex = $@"((((de(sde)?|durante|en)\s+)?{YearRegex}\s*({TillRegex})\s*{YearRegex})|(((entre)\s+){YearRegex}\s*({RangeConnectorRegex})\s*{YearRegex}))"; + public const string FutureSuffixRegex = @"\b(siguiente(s)?|pr[oó]xim[oa](s)?|(en\s+el\s+)?futuro)\b"; + public const string PastSuffixRegex = @"^\b$"; + public static readonly string ModPrefixRegex = $@"\b({RelativeRegex}|{AroundRegex}|{BeforeRegex}|{AfterRegex}|{SinceRegex})\b"; + public static readonly string ModSuffixRegex = $@"\b({AgoRegex}|{LaterRegex}|{BeforeAfterRegex}|{FutureSuffixRegex}|{PastSuffixRegex})\b"; public static readonly Dictionary WrittenDecades = new Dictionary { - { @"", 0 } + { @"diez", 10 }, + { @"veinte", 20 }, + { @"treinta", 30 }, + { @"cuarenta", 40 }, + { @"cincuenta", 50 }, + { @"sesenta", 60 }, + { @"setenta", 70 }, + { @"ochenta", 80 }, + { @"noventa", 90 } }; public static readonly Dictionary SpecialDecadeCases = new Dictionary { @@ -494,18 +659,41 @@ public static class DateTimeDefinitions }; public const string DefaultLanguageFallback = @"DMY"; public static readonly string[] DurationDateRestrictions = { @"hoy" }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"^\d{4}$", @"(\d\.\d{4}|\d{4}\.\d)" }, + { @"^(este\s+)?mi(\s+([uú]ltimo|pasado|anterior|pr[oó]ximo|siguiente|que\s+viene))?$", @"\b(este\s+)?mi(\s+([uú]ltimo|pasado|anterior|pr[oó]ximo|siguiente|que\s+viene))?\b" }, + { @"^a[nñ]o$", @"(? EarlyMorningTermList = new List { @"madrugada" }; public static readonly IList MorningTermList = new List { - @"mañana" + @"mañana", + @"la mañana" }; public static readonly IList AfternoonTermList = new List { @"pasado mediodia", - @"pasado el mediodia" + @"pasado el mediodia", + @"pasado mediodía", + @"pasado el mediodía", + @"pasado medio dia", + @"pasado el medio dia", + @"pasado medio día", + @"pasado el medio día" }; public static readonly IList EveningTermList = new List { @@ -530,7 +718,8 @@ public static class DateTimeDefinitions public static readonly IList MinusOneDayTerms = new List { @"ayer", - @"ultimo dia" + @"ultimo dia", + @"dia anterior" }; public static readonly IList PlusTwoDayTerms = new List { @@ -550,25 +739,34 @@ public static class DateTimeDefinitions public static readonly IList MonthToDateTerms = new List { @"mes a la fecha", - @"meses a la fecha" + @"mes hasta la fecha" }; public static readonly IList WeekendTerms = new List { - @"fin de semana" + @"finde", + @"fin de semana", + @"fines de semana" }; public static readonly IList WeekTerms = new List { @"semana" }; + public static readonly IList FortnightTerms = new List + { + @"quincena", + @"la quincena" + }; public static readonly IList YearTerms = new List { @"año", - @"años" + @"años", + @"ano", + @"anos" }; public static readonly IList YearToDateTerms = new List { @"año a la fecha", - @"años a la fecha" + @"año hasta la fecha" }; public static readonly Dictionary SpecialCharactersEquivalent = new Dictionary { @@ -578,5 +776,18 @@ public static class DateTimeDefinitions { 'ó', 'o' }, { 'ú', 'u' } }; + public const string DoubleMultiplierRegex = @"^(bi)(-|\s)?"; + public const string DayTypeRegex = @"(d[ií]as?|diari(o|as|amente))$"; + public const string WeekTypeRegex = @"(semanas?|semanalmente)$"; + public const string BiWeekTypeRegex = @"(quincenalmente)$"; + public const string WeekendTypeRegex = @"(fin(es)?\s+de\s+semana|finde)$"; + public const string MonthTypeRegex = @"(mes(es)?|mensual(es|mente)?)$"; + public const string QuarterTypeRegex = @"(trimestral(es|mente)?)$"; + public const string SemiAnnualTypeRegex = @"(semestral(es|mente)?)$"; + public const string YearTypeRegex = @"(a[nñ]os?|anual(mente)?)$"; + public static readonly IList ThisTerms = new List + { + @"esta" + }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/NumbersDefinitions.cs index e002efb218..e9525e3069 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/NumbersDefinitions.cs @@ -24,12 +24,25 @@ public static class NumbersDefinitions public const string LangMarker = @"Spa"; public const bool CompoundNumberLanguage = false; public const bool MultiDecimalSeparatorCulture = true; + public static readonly IList NonStandardSeparatorVariants = new List + { + @"es-mx", + @"es-do", + @"es-sv", + @"es-gt", + @"es-hn", + @"es-ni", + @"es-pa", + @"es-pr" + }; public const string HundredsNumberIntegerRegex = @"(cuatrocient[ao]s|trescient[ao]s|seiscient[ao]s|setecient[ao]s|ochocient[ao]s|novecient[ao]s|doscient[ao]s|quinient[ao]s|(?(? NumbersWithPlaceHolder = (placeholder) => $@"(((?(antes\s+de|anterior\s+a)(l|\s+la)\s+[uú]ltim[ao]|((ante)?pen)?[uú]ltim[ao]s?|pr[oó]xim[ao]s?|anterior(es)?|actual(es)?|siguientes?)"; public static readonly string ComplexOrdinalRegex = $@"(({OverThousandOrdinalRegex}(\s)?)?{UnderThousandOrdinalRegex}|{OverThousandOrdinalRegex})"; public static readonly string SufixRoundOrdinalRegex = $@"(({AllIntRegex})({SimpleRoundOrdinalRegex}))"; public static readonly string ComplexRoundOrdinalRegex = $@"((({SufixRoundOrdinalRegex}(\s)?)?{ComplexOrdinalRegex})|{SufixRoundOrdinalRegex})"; - public static readonly string AllOrdinalRegex = $@"{ComplexOrdinalRegex}|{SimpleRoundOrdinalRegex}|{ComplexRoundOrdinalRegex}"; - public const string OrdinalSuffixRegex = @"(?<=\b)(\d*(1r[oa]|2d[oa]|3r[oa]|4t[oa]|5t[oa]|6t[oa]|7m[oa]|8v[oa]|9n[oa]|0m[oa]|11[vm][oa]|12[vm][oa]))(?=\b)"; + public static readonly string AllOrdinalNumberRegex = $@"{ComplexOrdinalRegex}|{SimpleRoundOrdinalRegex}|{ComplexRoundOrdinalRegex}"; + public static readonly string AllOrdinalRegex = $@"(?:{AllOrdinalNumberRegex}s?|{RelativeOrdinalRegex})"; + public const string OrdinalSuffixRegex = @"(?<=\b)(\d*((1(er|r[oa])|2d[oa]|3r[oa]|4t[oa]|5t[oa]|6t[oa]|7m[oa]|8v[oa]|9n[oa]|0m[oa]|11[vm][oa]|12[vm][oa])|\d\.?[ºª]))(?=\b)"; public static readonly string OrdinalNounRegex = $@"(?<=\b){AllOrdinalRegex}(?=\b)"; public static readonly string SpecialFractionInteger = $@"((({AllIntRegex})i?({ZeroToNineIntegerRegex})|({AllIntRegex}))a?v[oa]s?)"; - public const string FractionNotationRegex = @"(((?<=\W|^)-\s*)|(?<=\b))\d+[/]\d+(?=(\b[^/]|$))"; + public static readonly string FractionNotationRegex = $@"{BaseNumbers.FractionNotationRegex}"; public const string FractionNotationWithSpacesRegex = @"(((?<=\W|^)-\s*)|(?<=\b))\d+\s+\d+[/]\d+(?=(\b[^/]|$))"; - public static readonly string FractionNounRegex = $@"(?<=\b)({AllIntRegex}\s+((y|con)\s+)?)?({AllIntRegex})(\s+((y|con)\s)?)((({AllOrdinalRegex})s?|({SpecialFractionInteger})|({SufixRoundOrdinalRegex})s?)|medi[oa]s?|tercios?)(?=\b)"; - public static readonly string FractionNounWithArticleRegex = $@"(?<=\b)({AllIntRegex}\s+(y\s+)?)?(un|un[oa])(\s+)(({AllOrdinalRegex})|({SufixRoundOrdinalRegex})|(y\s+)?medi[oa]s?)(?=\b)"; + public static readonly string FractionMultiplierRegex = $@"(?\s+(y|con)\s+(medio|(un|{TwoToNineIntegerRegex})\s+(medio|terci[oa]?|cuart[oa]|quint[oa]|sext[oa]|s[eé]ptim[oa]|octav[oa]|noven[oa]|d[eé]cim[oa])s?))"; + public static readonly string RoundMultiplierWithFraction = $@"(?(?:(mil\s+millones|mill[oó]n(es)?|bill[oó]n(es)?|trill[oó]n(es)?|cuatrill[oó]n(es)?|quintill[oó]n(es)?|sextill[oó]n(es)?|septill[oó]n(es)?)))(?={FractionMultiplierRegex}?$)"; + public static readonly string RoundMultiplierRegex = $@"\b\s*({RoundMultiplierWithFraction}|(?(mil))$)"; + public static readonly string FractionNounRegex = $@"(?<=\b)({AllIntRegex}\s+((y|con)\s+)?)?({AllIntRegex}\s+((({AllOrdinalNumberRegex}|{SufixRoundOrdinalRegex})s|{SpecialFractionInteger})|((y|con)\s+)?(medi[oa]s?|tercios?))|(medio|un\s+cuarto\s+de)\s+{RoundNumberIntegerRegex})(?=\b)"; + public static readonly string FractionNounWithArticleRegex = $@"(?<=\b)(({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\s+((y|con)\s+)?)?((un|un[oa])(\s+)(({AllOrdinalNumberRegex})|({SufixRoundOrdinalRegex}))|(un[ao]?\s+)?medi[oa]s?|mitad)(?=\b)"; public static readonly string FractionPrepositionRegex = $@"(?({AllIntRegex})|((?({AllIntRegex})|((\d+)(?!\.)))(?=\b)"; public static readonly string AllPointRegex = $@"((\s+{ZeroToNineIntegerRegex})+|(\s+{AllIntRegex}))"; public static readonly string AllFloatRegex = $@"{AllIntRegex}(\s+(coma|con)){AllPointRegex}"; @@ -75,32 +93,34 @@ public static class NumbersDefinitions public static readonly string DoubleAllFloatRegex = $@"((?<=\b){AllFloatRegex}(?=\b))"; public const string DoubleExponentialNotationRegex = @"(((?)"; - public const string LessRegex = @"((menos|menor|menores|por\s+debajo)(\s+(que|de|del))?|más\s+baj[oa]\s+que|(?|=)<)"; - public const string EqualRegex = @"((igual(es)?|equivalente(s)?|equivale|equivalen|son)(\s+(a|que|de|al|del))?|(?)=)"; + public const string MoreRegex = @"(más\s+(alt[oa]s?|grandes)\s+que|(m[áa]s|mayor(es)?|superior(es)?|por\s+encima)\b((\s+(que|del?|al?))|(?=\s+o\b))|(?)"; + public const string LessRegex = @"((meno(s|r(es)?)|inferior(es)?|por\s+debajo)((\s+(que|del?|al?)|(?=\s+o\b)))|más\s+baj[oa]\s+que|(?|=)<)"; + public const string EqualRegex = @"((igual(es)?|equivalente(s)?|equivalen?)(\s+(al?|que|del?))?|(?)=)"; public static readonly string MoreOrEqualPrefix = $@"((no\s+{LessRegex})|(por\s+lo\s+menos|como\s+m[íi]nimo|al\s+menos))"; public static readonly string MoreOrEqual = $@"(({MoreRegex}\s+(o)?\s+{EqualRegex})|({EqualRegex}\s+(o|y)\s+{MoreRegex})|{MoreOrEqualPrefix}(\s+(o)\s+{EqualRegex})?|({EqualRegex}\s+(o)\s+)?{MoreOrEqualPrefix}|>\s*=)"; - public const string MoreOrEqualSuffix = @"((\b(y|o)\b\s+(m[áa]s|mayor|mayores)((?!\s+(alt[oa]|baj[oa]|que|de|del))|(\s+(que|de|del)(?!(\s*\d+)))))|como\s+m[áa]ximo|por\s+lo\s+menos|al\s+menos)"; - public static readonly string LessOrEqualPrefix = $@"((no\s+{MoreRegex})|(como\s+máximo|como\s+maximo|como\s+mucho))"; + public const string MoreOrEqualSuffix = @"((\b(y|o)\b\s+(m[áa]s|mayor(es)?|superior(es)?)((?!\s+(alt[oa]|baj[oa]|que|del?|al?))|(\s+(que|del?|al?)(?!(\s*\d+)))))|como\s+m[íi]nimo|por\s+lo\s+menos|al\s+menos)\b"; + public static readonly string LessOrEqualPrefix = $@"((no\s+{MoreRegex})|(como\s+(m[aá]ximo|mucho)))"; public static readonly string LessOrEqual = $@"(({LessRegex}\s+(o)?\s+{EqualRegex})|({EqualRegex}\s+(o)?\s+{LessRegex})|{LessOrEqualPrefix}(\s+(o)?\s+{EqualRegex})?|({EqualRegex}\s+(o)?\s+)?{LessOrEqualPrefix}|<\s*=)"; - public const string LessOrEqualSuffix = @"((\b(y|o)\b\s+(menos|menor|menores)((?!\s+(alt[oa]|baj[oa]|que|de|del))|(\s+(que|de|del)(?!(\s*\d+)))))|como\s+m[íi]nimo)"; - public const string NumberSplitMark = @"(?![,.](?!\d+))"; - public const string MoreRegexNoNumberSucceed = @"((m[áa]s|mayor|mayores)((?!\s+(que|de|del))|\s+((que|de|del)(?!(\s*\d+))))|(por encima)(?!(\s*\d+)))"; - public const string LessRegexNoNumberSucceed = @"((menos|menor|menores)((?!\s+(que|de|del))|\s+((que|de|del)(?!(\s*\d+))))|(por debajo)(?!(\s*\d+)))"; - public const string EqualRegexNoNumberSucceed = @"((igual|iguales|equivalente|equivalentes|equivale|equivalen)((?!\s+(a|que|de|al|del))|(\s+(a|que|de|al|del)(?!(\s*\d+)))))"; - public static readonly string OneNumberRangeMoreRegex1 = $@"({MoreOrEqual}|{MoreRegex})\s*((el|la|los|las)\s+)?(?({NumberSplitMark}.)+)"; + public const string LessOrEqualSuffix = @"((\b(y|o)\b\s+(meno(s|r(es)?|inferior(es)?))((?!\s+(alt[oa]|baj[oa]|que|del?|al?))|(\s+(que|del?|al?)(?!(\s*\d+)))))|como\s+m[áa]ximo)\b"; + public static readonly string NumberSplitMark = $@"(?![,.](?!\d+))(?!\s*\b(((y|e)\s+)?({LessRegex}|{MoreRegex}|{EqualRegex}|no|de)|pero|o|a)\b)"; + public const string MoreRegexNoNumberSucceed = @"(\b(m[áa]s|mayor(es)?|superior(es)?)((?!\s+(que|del?|al?))|\s+((que|del?)(?!(\s*\d+))))|(por encima)(?!(\s*\d+)))\b"; + public const string LessRegexNoNumberSucceed = @"(\b(meno(s|r(es)?)|inferior(es)?)((?!\s+(que|del?|al?))|\s+((que|del?|al?)(?!(\s*\d+))))|(por debajo)(?!(\s*\d+)))\b"; + public const string EqualRegexNoNumberSucceed = @"(\b(igual(es)?|equivalentes?|equivalen?)((?!\s+(al?|que|del?))|(\s+(al?|que|del?)(?!(\s*\d+)))))\b"; + public static readonly string OneNumberRangeMoreRegex1 = $@"({MoreOrEqual}|{MoreRegex})\s*((el|las?|los)\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeMoreRegex1LB = $@"(?({NumberSplitMark}.)+)\s*{MoreOrEqualSuffix}"; public static readonly string OneNumberRangeMoreSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+o\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\s+(?({NumberSplitMark}.)+)(\s+o\s+){EqualRegexNoNumberSucceed})"; - public static readonly string OneNumberRangeLessRegex1 = $@"({LessOrEqual}|{LessRegex})\s*((el|la|los|las)\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeLessRegex1 = $@"({LessOrEqual}|{LessRegex})\s*((el|las?|los)\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeLessRegex1LB = $@"(?({NumberSplitMark}.)+)\s*{LessOrEqualSuffix}"; public static readonly string OneNumberRangeLessSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+o\s+){LessRegexNoNumberSucceed})|({LessRegex}\s+(?({NumberSplitMark}.)+)(\s+o\s+){EqualRegexNoNumberSucceed})"; - public static readonly string OneNumberRangeEqualRegex = $@"{EqualRegex}\s*((el|la|los|las)\s+)?(?({NumberSplitMark}.)+)"; - public static readonly string TwoNumberRangeRegex1 = $@"entre\s*((el|la|los|las)\s+)?(?({NumberSplitMark}.)+)\s*y\s*((el|la|los|las)\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeEqualRegex = $@"{EqualRegex}\s*((el|las?|los)\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string TwoNumberRangeRegex1 = $@"\bentre\s*((el|las?|los)\s+)?(?({NumberSplitMark}.)+)\s*y\s*((el|las?|los)\s+)?(?({NumberSplitMark}.)+)"; public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\s*(\by\b|\be\b|pero|,)\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"; public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\s*(\by\b|\be\b|pero|,)\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})"; - public static readonly string TwoNumberRangeRegex4 = $@"((de|desde)\s+)?((el|la|los|las)\s+)?(?({NumberSplitMark}(?!\b(entre|de|desde|es)\b).)+)\s*{TillRegex}\s*((el|la|los|las)\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string TwoNumberRangeRegex4 = $@"(\bde(sde)?\s+)?(\b(el|las?|los)\s+)?\b(?!\s+)(?({NumberSplitMark}(?!\b(entre|de(sde)?|es)\b).)+)\b\s*{TillRegex}\s*((el|las?|los)\s+)?\b(?!\s+)(?({NumberSplitMark}.)+)\b"; public const string AmbiguousFractionConnectorsRegex = @"(\b(en|de)\b)"; public const char DecimalSeparatorChar = ','; public const string FractionMarkerToken = @"sobre"; @@ -111,8 +131,9 @@ public static class NumbersDefinitions public static readonly string[] WrittenGroupSeparatorTexts = { @"punto" }; public static readonly string[] WrittenIntegerSeparatorTexts = { @"y" }; public static readonly string[] WrittenFractionSeparatorTexts = { @"con" }; + public static readonly string[] OneHalfTokens = { @"un", @"medio" }; public const string HalfADozenRegex = @"media\s+docena"; - public static readonly string DigitalNumberRegex = $@"((?<=\b)(mil|millones|mill[oó]n|billones|bill[oó]n|trillones|trill[oó]n|docenas?)(?=\b))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; + public static readonly string DigitalNumberRegex = $@"((?<=\b)(mil(l[oó]n(es)?)?|bill[oó]n(es)?|trill[oó]n(es)?|(docena|dz|doz)s?)(?=\b))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; public static readonly Dictionary CardinalNumberMap = new Dictionary { { @"cero", 0 }, @@ -132,6 +153,10 @@ public static class NumbersDefinitions { @"doce", 12 }, { @"docena", 12 }, { @"docenas", 12 }, + { @"dz", 12 }, + { @"doz", 12 }, + { @"dzs", 12 }, + { @"dozs", 12 }, { @"trece", 13 }, { @"catorce", 14 }, { @"quince", 15 }, @@ -203,6 +228,7 @@ public static class NumbersDefinitions { @"segunda", 2 }, { @"medio", 2 }, { @"media", 2 }, + { @"mitad", 2 }, { @"tercero", 3 }, { @"tercera", 3 }, { @"tercer", 3 }, @@ -215,6 +241,8 @@ public static class NumbersDefinitions { @"sexta", 6 }, { @"septimo", 7 }, { @"septima", 7 }, + { @"séptimo", 7 }, + { @"séptima", 7 }, { @"octavo", 8 }, { @"octava", 8 }, { @"noveno", 9 }, @@ -225,8 +253,12 @@ public static class NumbersDefinitions { @"décima", 10 }, { @"undecimo", 11 }, { @"undecima", 11 }, + { @"undécimo", 11 }, + { @"undécima", 11 }, { @"duodecimo", 12 }, { @"duodecima", 12 }, + { @"duodécimo", 12 }, + { @"duodécima", 12 }, { @"decimotercero", 13 }, { @"decimotercera", 13 }, { @"decimocuarto", 14 }, @@ -243,44 +275,211 @@ public static class NumbersDefinitions { @"decimonovena", 19 }, { @"vigesimo", 20 }, { @"vigesima", 20 }, + { @"vigésimo", 20 }, + { @"vigésima", 20 }, { @"trigesimo", 30 }, { @"trigesima", 30 }, + { @"trigésimo", 30 }, + { @"trigésima", 30 }, { @"cuadragesimo", 40 }, { @"cuadragesima", 40 }, + { @"cuadragésimo", 40 }, + { @"cuadragésima", 40 }, { @"quincuagesimo", 50 }, { @"quincuagesima", 50 }, + { @"quincuagésimo", 50 }, + { @"quincuagésima", 50 }, { @"sexagesimo", 60 }, { @"sexagesima", 60 }, + { @"sexagésimo", 60 }, + { @"sexagésima", 60 }, { @"septuagesimo", 70 }, { @"septuagesima", 70 }, + { @"septuagésimo", 70 }, + { @"septuagésima", 70 }, { @"octogesimo", 80 }, { @"octogesima", 80 }, + { @"octogésimo", 80 }, + { @"octogésima", 80 }, { @"nonagesimo", 90 }, { @"nonagesima", 90 }, + { @"nonagésimo", 90 }, + { @"nonagésima", 90 }, { @"centesimo", 100 }, { @"centesima", 100 }, + { @"centésimo", 100 }, + { @"centésima", 100 }, { @"ducentesimo", 200 }, { @"ducentesima", 200 }, + { @"ducentésimo", 200 }, + { @"ducentésima", 200 }, { @"tricentesimo", 300 }, { @"tricentesima", 300 }, + { @"tricentésimo", 300 }, + { @"tricentésima", 300 }, { @"cuadringentesimo", 400 }, { @"cuadringentesima", 400 }, + { @"cuadringentésimo", 400 }, + { @"cuadringentésima", 400 }, { @"quingentesimo", 500 }, { @"quingentesima", 500 }, + { @"quingentésimo", 500 }, + { @"quingentésima", 500 }, { @"sexcentesimo", 600 }, { @"sexcentesima", 600 }, + { @"sexcentésimo", 600 }, + { @"sexcentésima", 600 }, { @"septingentesimo", 700 }, { @"septingentesima", 700 }, + { @"septingentésimo", 700 }, + { @"septingentésima", 700 }, { @"octingentesimo", 800 }, { @"octingentesima", 800 }, + { @"octingentésimo", 800 }, + { @"octingentésima", 800 }, { @"noningentesimo", 900 }, { @"noningentesima", 900 }, + { @"noningentésimo", 900 }, + { @"noningentésima", 900 }, { @"milesimo", 1000 }, { @"milesima", 1000 }, + { @"milésimo", 1000 }, + { @"milésima", 1000 }, { @"millonesimo", 1000000 }, { @"millonesima", 1000000 }, + { @"millonésimo", 1000000 }, + { @"millonésima", 1000000 }, { @"billonesimo", 1000000000000 }, - { @"billonesima", 1000000000000 } + { @"billonesima", 1000000000000 }, + { @"billonésimo", 1000000000000 }, + { @"billonésima", 1000000000000 }, + { @"primeros", 1 }, + { @"primeras", 1 }, + { @"segundos", 2 }, + { @"segundas", 2 }, + { @"terceros", 3 }, + { @"terceras", 3 }, + { @"tercios", 3 }, + { @"cuartos", 4 }, + { @"cuartas", 4 }, + { @"quintos", 5 }, + { @"quintas", 5 }, + { @"sextos", 6 }, + { @"sextas", 6 }, + { @"septimos", 7 }, + { @"septimas", 7 }, + { @"séptimos", 7 }, + { @"séptimas", 7 }, + { @"octavos", 8 }, + { @"octavas", 8 }, + { @"novenos", 9 }, + { @"novenas", 9 }, + { @"decimos", 10 }, + { @"décimos", 10 }, + { @"decimas", 10 }, + { @"décimas", 10 }, + { @"undecimos", 11 }, + { @"undecimas", 11 }, + { @"undécimos", 11 }, + { @"undécimas", 11 }, + { @"duodecimos", 12 }, + { @"duodecimas", 12 }, + { @"duodécimos", 12 }, + { @"duodécimas", 12 }, + { @"decimoterceros", 13 }, + { @"decimoterceras", 13 }, + { @"decimocuartos", 14 }, + { @"decimocuartas", 14 }, + { @"decimoquintos", 15 }, + { @"decimoquintas", 15 }, + { @"decimosextos", 16 }, + { @"decimosextas", 16 }, + { @"decimoseptimos", 17 }, + { @"decimoseptimas", 17 }, + { @"decimoctavos", 18 }, + { @"decimoctavas", 18 }, + { @"decimonovenos", 19 }, + { @"decimonovenas", 19 }, + { @"vigesimos", 20 }, + { @"vigesimas", 20 }, + { @"vigésimos", 20 }, + { @"vigésimas", 20 }, + { @"trigesimos", 30 }, + { @"trigesimas", 30 }, + { @"trigésimos", 30 }, + { @"trigésimas", 30 }, + { @"cuadragesimos", 40 }, + { @"cuadragesimas", 40 }, + { @"cuadragésimos", 40 }, + { @"cuadragésimas", 40 }, + { @"quincuagesimos", 50 }, + { @"quincuagesimas", 50 }, + { @"quincuagésimos", 50 }, + { @"quincuagésimas", 50 }, + { @"sexagesimos", 60 }, + { @"sexagesimas", 60 }, + { @"sexagésimos", 60 }, + { @"sexagésimas", 60 }, + { @"septuagesimos", 70 }, + { @"septuagesimas", 70 }, + { @"septuagésimos", 70 }, + { @"septuagésimas", 70 }, + { @"octogesimos", 80 }, + { @"octogesimas", 80 }, + { @"octogésimos", 80 }, + { @"octogésimas", 80 }, + { @"nonagesimos", 90 }, + { @"nonagesimas", 90 }, + { @"nonagésimos", 90 }, + { @"nonagésimas", 90 }, + { @"centesimos", 100 }, + { @"centesimas", 100 }, + { @"centésimos", 100 }, + { @"centésimas", 100 }, + { @"ducentesimos", 200 }, + { @"ducentesimas", 200 }, + { @"ducentésimos", 200 }, + { @"ducentésimas", 200 }, + { @"tricentesimos", 300 }, + { @"tricentesimas", 300 }, + { @"tricentésimos", 300 }, + { @"tricentésimas", 300 }, + { @"cuadringentesimos", 400 }, + { @"cuadringentesimas", 400 }, + { @"cuadringentésimos", 400 }, + { @"cuadringentésimas", 400 }, + { @"quingentesimos", 500 }, + { @"quingentesimas", 500 }, + { @"quingentésimos", 500 }, + { @"quingentésimas", 500 }, + { @"sexcentesimos", 600 }, + { @"sexcentesimas", 600 }, + { @"sexcentésimos", 600 }, + { @"sexcentésimas", 600 }, + { @"septingentesimos", 700 }, + { @"septingentesimas", 700 }, + { @"septingentésimos", 700 }, + { @"septingentésimas", 700 }, + { @"octingentesimos", 800 }, + { @"octingentesimas", 800 }, + { @"octingentésimos", 800 }, + { @"octingentésimas", 800 }, + { @"noningentesimos", 900 }, + { @"noningentesimas", 900 }, + { @"noningentésimos", 900 }, + { @"noningentésimas", 900 }, + { @"milesimos", 1000 }, + { @"milesimas", 1000 }, + { @"milésimos", 1000 }, + { @"milésimas", 1000 }, + { @"millonesimos", 1000000 }, + { @"millonesimas", 1000000 }, + { @"millonésimos", 1000000 }, + { @"millonésimas", 1000000 }, + { @"billonesimos", 1000000000000 }, + { @"billonesimas", 1000000000000 }, + { @"billonésimos", 1000000000000 }, + { @"billonésimas", 1000000000000 } }; public static readonly Dictionary PrefixCardinalMap = new Dictionary { @@ -356,6 +555,10 @@ public static class NumbersDefinitions { @"trillonesimo", 1000000000000000000 }, { @"docena", 12 }, { @"docenas", 12 }, + { @"dz", 12 }, + { @"doz", 12 }, + { @"dzs", 12 }, + { @"dozs", 12 }, { @"k", 1000 }, { @"m", 1000000 }, { @"g", 1000000000 }, @@ -368,11 +571,101 @@ public static class NumbersDefinitions }; public static readonly Dictionary RelativeReferenceOffsetMap = new Dictionary { - { @"", @"" } + { @"proxima", @"1" }, + { @"proximo", @"1" }, + { @"proximas", @"1" }, + { @"proximos", @"1" }, + { @"próxima", @"1" }, + { @"próximo", @"1" }, + { @"próximas", @"1" }, + { @"próximos", @"1" }, + { @"anterior", @"-1" }, + { @"anteriores", @"-1" }, + { @"actual", @"0" }, + { @"actuales", @"0" }, + { @"siguiente", @"1" }, + { @"siguientes", @"1" }, + { @"ultima", @"0" }, + { @"ultimo", @"0" }, + { @"última", @"0" }, + { @"último", @"0" }, + { @"ultimas", @"0" }, + { @"ultimos", @"0" }, + { @"últimas", @"0" }, + { @"últimos", @"0" }, + { @"penultima", @"-1" }, + { @"penultimo", @"-1" }, + { @"penúltima", @"-1" }, + { @"penúltimo", @"-1" }, + { @"penultimas", @"-1" }, + { @"penultimos", @"-1" }, + { @"penúltimas", @"-1" }, + { @"penúltimos", @"-1" }, + { @"antepenultima", @"-2" }, + { @"antepenultimo", @"-2" }, + { @"antepenúltima", @"-2" }, + { @"antepenúltimo", @"-2" }, + { @"antepenultimas", @"-2" }, + { @"antepenultimos", @"-2" }, + { @"antepenúltimas", @"-2" }, + { @"antepenúltimos", @"-2" }, + { @"antes de la ultima", @"-1" }, + { @"antes del ultimo", @"-1" }, + { @"antes de la última", @"-1" }, + { @"antes del último", @"-1" }, + { @"anterior al ultimo", @"-1" }, + { @"anterior a la ultima", @"-1" }, + { @"anterior al último", @"-1" }, + { @"anterior a la última", @"-1" } }; public static readonly Dictionary RelativeReferenceRelativeToMap = new Dictionary { - { @"", @"" } + { @"proxima", @"current" }, + { @"proximo", @"current" }, + { @"proximas", @"current" }, + { @"proximos", @"current" }, + { @"próxima", @"current" }, + { @"próximo", @"current" }, + { @"próximas", @"current" }, + { @"próximos", @"current" }, + { @"anterior", @"current" }, + { @"anteriores", @"current" }, + { @"actual", @"current" }, + { @"actuales", @"current" }, + { @"siguiente", @"current" }, + { @"siguientes", @"current" }, + { @"ultima", @"end" }, + { @"ultimo", @"end" }, + { @"última", @"end" }, + { @"último", @"end" }, + { @"ultimas", @"end" }, + { @"ultimos", @"end" }, + { @"últimas", @"end" }, + { @"últimos", @"end" }, + { @"penultima", @"end" }, + { @"penultimo", @"end" }, + { @"penúltima", @"end" }, + { @"penúltimo", @"end" }, + { @"penultimas", @"end" }, + { @"penultimos", @"end" }, + { @"penúltimas", @"end" }, + { @"penúltimos", @"end" }, + { @"antepenultima", @"end" }, + { @"antepenultimo", @"end" }, + { @"antepenúltima", @"end" }, + { @"antepenúltimo", @"end" }, + { @"antepenultimas", @"end" }, + { @"antepenultimos", @"end" }, + { @"antepenúltimas", @"end" }, + { @"antepenúltimos", @"end" }, + { @"antes de la ultima", @"end" }, + { @"antes del ultimo", @"end" }, + { @"antes de la última", @"end" }, + { @"antes del último", @"end" }, + { @"anterior al ultimo", @"end" }, + { @"anterior a la ultima", @"end" }, + { @"anterior al último", @"end" }, + { @"anterior a la última", @"end" } }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/NumbersWithUnitDefinitions.cs index f476aa4460..7cc9787f6c 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/NumbersWithUnitDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/NumbersWithUnitDefinitions.cs @@ -23,7 +23,7 @@ public static class NumbersWithUnitDefinitions { public static readonly Dictionary AgeSuffixList = new Dictionary { - { @"Año", @"años|año" }, + { @"Año", @"años|anos|año|ano" }, { @"Mes", @"meses|mes" }, { @"Semana", @"semanas|semana" }, { @"Día", @"dias|días|día|dia" } @@ -31,7 +31,9 @@ public static class NumbersWithUnitDefinitions public static readonly IList AmbiguousAgeUnitList = new List { @"años", + @"anos", @"año", + @"ano", @"meses", @"mes", @"semanas", @@ -62,7 +64,7 @@ public static class NumbersWithUnitDefinitions }; public static readonly Dictionary CurrencySuffixList = new Dictionary { - { @"Dólar", @"dólar|dólares" }, + { @"Dólar", @"dólar|dólares|dolar|dolares" }, { @"Peso", @"peso|pesos" }, { @"Rublo", @"rublo|rublos" }, { @"Libra", @"libra|libras" }, @@ -252,7 +254,7 @@ public static class NumbersWithUnitDefinitions { @"Rupia de Mauricio", @"rupia de Mauricio|rupias de Mauricio|mur" }, { @"Uguiya", @"uguiya|uguiyas|mro" }, { @"Jum", @"jum|jums" }, - { @"Peso mexicano", @"peso mexicano|pesos mexicanos|mxn" }, + { @"Peso mexicano", @"peso mexicano|pesos mexicanos|mxn|mxn$|mxn $|mex$" }, { @"Centavo mexicano", @"centavo mexicano|centavos mexicanos" }, { @"Leu moldavo", @"leu moldavo|lei moldavos|mdl" }, { @"Ban moldavo", @"ban moldavo|bani moldavos" }, @@ -302,7 +304,7 @@ public static class NumbersWithUnitDefinitions { @"Sene", @"sene" }, { @"Libra de Santa Helena", @"libra de santa helena|libras de santa helena|shp" }, { @"Penique de Santa Helena", @"penique de santa helena|peniques de santa helena" }, - { @"Dobra", @"dobra|db|std" }, + { @"Dobra", @"dobra" }, { @"Dinar serbio", @"dinar serbio|dinares serbios|rsd" }, { @"Para serbio", @"para serbio|para serbios" }, { @"Rupia de Seychelles", @"rupia de seychelles|rupias de seychelles|scr" }, @@ -369,12 +371,284 @@ public static class NumbersWithUnitDefinitions { @"Franco yibutiano", @"franco yibutiano|francos yibutianos|djf" }, { @"Dinar yugoslavo", @"dinar yugoslavo|dinares yugoslavos|yud" }, { @"Kwacha zambiano", @"kwacha zambiano|kwacha zambianos|zmw" }, - { @"Ngwee zambiano", @"ngwee zambiano|ngwee zambianos" } + { @"Ngwee zambiano", @"ngwee zambiano|ngwee zambianos" }, + { @"Bitcoin", @"bitcoin|bitcoins|btc|xbt|₿" }, + { @"Millibitcoin", @"millibitcoin|millibitcoins|milibitcoin|milibitcoins" }, + { @"Satoshi", @"satoshi|satoshis" } }; - public const string CompoundUnitConnectorRegex = @"(?[^.])"; + public static readonly Dictionary CurrencyNameToIsoCodeMap = new Dictionary + { + { @"Afgani afgano", @"AFN" }, + { @"Euro", @"EUR" }, + { @"Lek albanés", @"ALL" }, + { @"Kwanza angoleño", @"AOA" }, + { @"Dram armenio", @"AMD" }, + { @"Florín arubeño", @"AWG" }, + { @"Taka bangladeshí", @"BDT" }, + { @"Ngultrum butanés", @"BTN" }, + { @"Boliviano", @"BOB" }, + { @"Marco bosnioherzegovino", @"BAM" }, + { @"Pula", @"BWP" }, + { @"Real brasileño", @"BRL" }, + { @"Lev búlgaro", @"BGN" }, + { @"Riel camboyano", @"KHR" }, + { @"Escudo caboverdiano", @"CVE" }, + { @"Colón costarricense", @"CRC" }, + { @"Kuna croata", @"HRK" }, + { @"Corona checa", @"CZK" }, + { @"Nakfa", @"ERN" }, + { @"Birr etíope", @"ETB" }, + { @"Dalasi", @"GMD" }, + { @"Lari georgiano", @"GEL" }, + { @"Cedi", @"GHS" }, + { @"Quetzal guatemalteco", @"GTQ" }, + { @"Gourde haitiano", @"HTG" }, + { @"Lempira hondureño", @"HNL" }, + { @"Forinto húngaro", @"HUF" }, + { @"Rial iraní", @"IRR" }, + { @"Rial yemení", @"YER" }, + { @"Nuevo shéquel", @"ILS" }, + { @"Yen", @"JPY" }, + { @"Tenge kazajo", @"KZT" }, + { @"Chelín keniano", @"KES" }, + { @"Won norcoreano", @"KPW" }, + { @"Won surcoreano", @"KRW" }, + { @"Som kirguís", @"KGS" }, + { @"Kip laosiano", @"LAK" }, + { @"Loti", @"LSL" }, + { @"Rand sudafricano", @"ZAR" }, + { @"Pataca macaense", @"MOP" }, + { @"Denar macedonio", @"MKD" }, + { @"Ariary malgache", @"MGA" }, + { @"Kwacha malauí", @"MWK" }, + { @"Ringgit malayo", @"MYR" }, + { @"Uguiya", @"MRO" }, + { @"Tugrik mongol", @"MNT" }, + { @"Metical mozambiqueño", @"MZN" }, + { @"Kyat birmano", @"MMK" }, + { @"Córdoba nicaragüense", @"NIO" }, + { @"Naira", @"NGN" }, + { @"Lira turca", @"TRY" }, + { @"Rial omaní", @"OMR" }, + { @"Balboa panameño", @"PAB" }, + { @"Kina", @"PGK" }, + { @"Guaraní", @"PYG" }, + { @"Sol", @"PEN" }, + { @"Złoty", @"PLN" }, + { @"Riyal qatarí", @"QAR" }, + { @"Riyal saudí", @"SAR" }, + { @"Tala", @"WST" }, + { @"São Tomé and Príncipe dobra", @"STN" }, + { @"Leone", @"SLL" }, + { @"Lilangeni", @"SZL" }, + { @"Somoni tayiko", @"TJS" }, + { @"Baht tailandés", @"THB" }, + { @"Grivna", @"UAH" }, + { @"Vatu", @"VUV" }, + { @"Bolívar fuerte", @"VEF" }, + { @"Kwacha zambiano", @"ZMW" }, + { @"Dirham marroquí", @"MAD" }, + { @"Dirham de los Emiratos Árabes Unidos", @"AED" }, + { @"Manat azerí", @"AZN" }, + { @"Manat turkmeno", @"TMT" }, + { @"Chelín somalí", @"SOS" }, + { @"Chelín tanzano", @"TZS" }, + { @"Chelín ugandés", @"UGX" }, + { @"Leu rumano", @"RON" }, + { @"Leu moldavo", @"MDL" }, + { @"Rupia nepalí", @"NPR" }, + { @"Rupia pakistaní", @"PKR" }, + { @"Rupia india", @"INR" }, + { @"Rupia de Seychelles", @"SCR" }, + { @"Rupia de Mauricio", @"MUR" }, + { @"Rupia de Maldivas", @"MVR" }, + { @"Rupia de Sri Lanka", @"LKR" }, + { @"Rupia indonesia", @"IDR" }, + { @"Corona danesa", @"DKK" }, + { @"Corona noruega", @"NOK" }, + { @"Corona islandesa", @"ISK" }, + { @"Corona sueca", @"SEK" }, + { @"Franco CFA de África Occidental", @"XOF" }, + { @"Franco CFA de África Central", @"XAF" }, + { @"Franco comorano", @"KMF" }, + { @"Franco congoleño", @"CDF" }, + { @"Burundian franc", @"BIF" }, + { @"Franco yibutiano", @"DJF" }, + { @"Franco CFP", @"XPF" }, + { @"Franco guineano", @"GNF" }, + { @"Franco suizo", @"CHF" }, + { @"Franco ruandés", @"RWF" }, + { @"Rublo ruso", @"RUB" }, + { @"Transnistrian ruble", @"PRB" }, + { @"New Belarusian ruble", @"BYN" }, + { @"Dinar argelino", @"DZD" }, + { @"Dinar bahreiní", @"BHD" }, + { @"Dinar iraquí", @"IQD" }, + { @"Dinar jordano", @"JOD" }, + { @"Kuwaiti dinar", @"KWD" }, + { @"Dinar libio", @"LYD" }, + { @"Dinar serbio", @"RSD" }, + { @"Dinar tunecino", @"TND" }, + { @"Peso argentino", @"ARS" }, + { @"Chilean peso", @"CLP" }, + { @"Peso colombiano", @"COP" }, + { @"Peso cubano convertible", @"CUC" }, + { @"Peso cubano", @"CUP" }, + { @"Peso dominicano", @"DOP" }, + { @"Peso mexicano", @"MXN" }, + { @"Peso uruguayo", @"UYU" }, + { @"Libra esterlina", @"GBP" }, + { @"Libra de Santa Helena", @"SHP" }, + { @"Libra egipcia", @"EGP" }, + { @"Libra malvinense", @"FKP" }, + { @"Libra gibraltareña", @"GIP" }, + { @"Libra manesa", @"IMP" }, + { @"Libra de Jersey", @"JEP" }, + { @"Libra libanesa", @"LBP" }, + { @"Libra sursudanesa", @"SSP" }, + { @"Libra sudanesa", @"SDG" }, + { @"Libra siria", @"SYP" }, + { @"Dólar estadounidense", @"USD" }, + { @"Dólar australiano", @"AUD" }, + { @"Dólar bahameño", @"BSD" }, + { @"Dólar de Barbados", @"BBD" }, + { @"Dólar beliceño", @"BZD" }, + { @"Dólar bermudeño", @"BMD" }, + { @"Dólar de Brunéi", @"BND" }, + { @"Dólar de Singapur", @"SGD" }, + { @"Dólar canadiense", @"CAD" }, + { @"Dólar de las Islas Caimán", @"KYD" }, + { @"Dólar neozelandés", @"NZD" }, + { @"Dólar fiyiano", @"FJD" }, + { @"Dólar guyanés", @"GYD" }, + { @"Dólar de Hong Kong", @"HKD" }, + { @"Dólar jamaiquino", @"JMD" }, + { @"Dólar liberiano", @"LRD" }, + { @"Dólar namibio", @"NAD" }, + { @"Dólar de las Islas Salomón", @"SBD" }, + { @"Dólar surinamés", @"SRD" }, + { @"Nuevo dólar taiwanés", @"TWD" }, + { @"Dólar trinitense", @"TTD" }, + { @"Tuvaluan dollar", @"TVD" }, + { @"Yuan chino", @"CNY" }, + { @"Rial", @"__RI" }, + { @"Chelín", @"__S" }, + { @"Som", @"__SO" }, + { @"Dirham", @"__DR" }, + { @"Dinar", @"_DN" }, + { @"Dólar", @"__D" }, + { @"Manat", @"__MA" }, + { @"Rupia", @"__R" }, + { @"Corona", @"__K" }, + { @"Franco", @"__F" }, + { @"Marco", @"__M" }, + { @"Rublo", @"__RB" }, + { @"Peso", @"__PE" }, + { @"Libra", @"__P" }, + { @"Tristan da Cunha libra", @"_TP" }, + { @"South Georgia and the South Sandwich Islands libra", @"_SP" }, + { @"Somaliland chelín", @"_SS" }, + { @"Pitcairn Islands dólar", @"_PND" }, + { @"Palauan dólar", @"_PD" }, + { @"Niue dólar", @"_NID" }, + { @"Nauruan dólar", @"_ND" }, + { @"Micronesian dólar", @"_MD" }, + { @"Kiribati dólar", @"_KID" }, + { @"Guernsey libra", @"_GGP" }, + { @"Faroese corona", @"_FOK" }, + { @"Cook Islands dólar", @"_CKD" }, + { @"British Virgin Islands dólar", @"_BD" }, + { @"Ascension libra", @"_AP" }, + { @"Alderney libra", @"_ALP" }, + { @"Abkhazian apsar", @"_AA" }, + { @"Bitcoin", @"_XBT" } + }; + public static readonly Dictionary FractionalUnitNameToCodeMap = new Dictionary + { + { @"Jiao", @"JIAO" }, + { @"Kópek", @"KOPEK" }, + { @"Kopek", @"KOPEK" }, + { @"Pul", @"PUL" }, + { @"Cent", @"CENT" }, + { @"Qindarka", @"QINDARKE" }, + { @"Penique", @"PENNY" }, + { @"Santeem", @"SANTEEM" }, + { @"Cêntimo", @"CENT" }, + { @"Centavo", @"CENT" }, + { @"Luma", @"LUMA" }, + { @"Qəpik", @"QƏPIK" }, + { @"Fils", @"FILS" }, + { @"Poisha", @"POISHA" }, + { @"Kapyeyka", @"KAPYEYKA" }, + { @"Kopyeyka", @"KOPYEYKA" }, + { @"Centime", @"CENTIME" }, + { @"Chetrum", @"CHETRUM" }, + { @"Paisa", @"PAISA" }, + { @"Feningas", @"FENING" }, + { @"Thebe", @"THEBE" }, + { @"Sen", @"SEN" }, + { @"Stotinka", @"STOTINKA" }, + { @"Fen", @"FEN" }, + { @"Céntimo", @"CENT" }, + { @"Lipa", @"LIPA" }, + { @"Haléř", @"HALER" }, + { @"Øre", @"ØRE" }, + { @"Piastre", @"PIASTRE" }, + { @"Santim", @"SANTIM" }, + { @"Oyra", @"OYRA" }, + { @"Butut", @"BUTUT" }, + { @"Tetri", @"TETRI" }, + { @"Pesewa", @"PESEWA" }, + { @"Fillér", @"FILLER" }, + { @"Eyrir", @"EYRIR" }, + { @"Dinar", @"DINAR" }, + { @"Agora", @"AGORA" }, + { @"Tïın", @"TIIN" }, + { @"Chon", @"CHON" }, + { @"Jeon", @"JEON" }, + { @"Tyiyn", @"TYIYN" }, + { @"Att", @"ATT" }, + { @"Sente", @"SENTE" }, + { @"Dirham", @"DIRHAM" }, + { @"Rappen", @"RAPPEN" }, + { @"Avo", @"AVO" }, + { @"Deni", @"DENI" }, + { @"Iraimbilanja", @"IRAIMBILANJA" }, + { @"Tambala", @"TAMBALA" }, + { @"Laari", @"LAARI" }, + { @"Khoums", @"KHOUMS" }, + { @"Ban", @"BAN" }, + { @"Möngö", @"MONGO" }, + { @"Pya", @"PYA" }, + { @"Kobo", @"KOBO" }, + { @"Kuruş", @"KURUS" }, + { @"Baisa", @"BAISA" }, + { @"Centésimo", @"CENTESIMO" }, + { @"Toea", @"TOEA" }, + { @"Sentimo", @"SENTIMO" }, + { @"Grosz", @"GROSZ" }, + { @"Sene", @"SENE" }, + { @"Halala", @"HALALA" }, + { @"Para", @"PARA" }, + { @"Öre", @"ORE" }, + { @"Diram", @"DIRAM" }, + { @"Satang", @"SATANG" }, + { @"Seniti", @"SENITI" }, + { @"Millime", @"MILLIME" }, + { @"Tennesi", @"TENNESI" }, + { @"Kopiyka", @"KOPIYKA" }, + { @"Tiyin", @"TIYIN" }, + { @"Hào", @"HAO" }, + { @"Ngwee", @"NGWEE" }, + { @"Millibitcoin", @"MILLIBITCOIN" }, + { @"Satoshi", @"SATOSHI" } + }; + public const string CompoundUnitConnectorRegex = @"(?y|con)"; + public const string MultiplierRegex = @"\s*\b(((mil\s+)?mi|bi|cuatri|quinti|sexti|septi)ll[oó]n|mil)(es)?\b"; public static readonly Dictionary CurrencyPrefixList = new Dictionary { - { @"Dólar", @"$" }, + { @"Dobra", @"db|std" }, + { @"Dólar", @"$|dólar|dólares|dolar|dolares" }, { @"Dólar estadounidense", @"us$|u$d|usd" }, { @"Dólar del Caribe Oriental", @"ec$|xcd" }, { @"Dólar australiano", @"a$|aud" }, @@ -395,6 +669,7 @@ public static class NumbersWithUnitDefinitions { @"Dólar namibio", @"n$|nad" }, { @"Dólar de las Islas Salomón", @"si$|sbd" }, { @"Nuevo dólar taiwanés", @"nt$|twd" }, + { @"Peso mexicano", @"mxn|mxn$|mxn $|mex$" }, { @"Real brasileño", @"r$|brl" }, { @"Guaraní", @"₲|gs.|pyg" }, { @"Dólar trinitense", @"tt$|ttd" }, @@ -404,11 +679,14 @@ public static class NumbersWithUnitDefinitions { @"Florín", @"ƒ" }, { @"Libra", @"£" }, { @"Colón costarricense", @"₡" }, - { @"Lira turca", @"₺" } + { @"Lira turca", @"₺" }, + { @"Bitcoin", @"₿|btc|xbt" } }; public static readonly IList AmbiguousCurrencyUnitList = new List { - @"le" + @"le", + @"db", + @"std" }; public static readonly Dictionary InformationSuffixList = new Dictionary { @@ -442,7 +720,8 @@ public static class NumbersWithUnitDefinitions @"área", @"áreas", @"pie", - @"pies" + @"pies", + @"""" }; public static readonly Dictionary LengthSuffixList = new Dictionary { @@ -460,13 +739,14 @@ public static class NumbersWithUnitDefinitions { @"Yarda", @"yd|yarda|yardas" }, { @"Pulgada", @"pulgada|pulgadas|""" }, { @"Pie", @"pie|pies|ft" }, - { @"Año luz", @"año luz|años luz|al" } + { @"Año luz", @"año luz|ano luz|años luz|anos luz|al" } }; public static readonly IList AmbiguousLengthUnitList = new List { @"mi", @"área", - @"áreas" + @"áreas", + @"""" }; public const string BuildPrefix = @"(?<=(\s|^|\P{L}))"; public const string BuildSuffix = @"(?=(\s|\P{L}|$))"; @@ -493,8 +773,8 @@ public static class NumbersWithUnitDefinitions { { @"Kelvin", @"k|K|kelvin" }, { @"Rankine", @"r|rankine" }, - { @"Grado Celsius", @"°c|grados c|grado celsius|grados celsius|celsius|grado centígrado|grados centígrados|centígrado|centígrados" }, - { @"Grado Fahrenheit", @"°f|grados f|grado fahrenheit|grados fahrenheit|fahrenheit" }, + { @"Grado Celsius", @"°c|° c|ºc|º c|grado c|grados c|grado celsius|grados celsius|celsius|grado centígrado|grados centígrados|centígrado|centígrados" }, + { @"Grado Fahrenheit", @"°f|° f|ºf|º f|grado f|grados f|grado fahrenheit|grados fahrenheit|fahrenheit" }, { @"Grado Réaumur", @"°r|°re|grados r|grado réaumur|grados réaumur|réaumur" }, { @"Grado Delisle", @"°d|grados d|grado delisle|grados delisle|delisle" }, { @"Grado", @"°|grados|grado" } @@ -521,8 +801,8 @@ public static class NumbersWithUnitDefinitions }; public static readonly Dictionary WeightSuffixList = new Dictionary { - { @"Tonelada métrica", @"tonelada métrica|toneladas métricas" }, - { @"Tonelada", @"ton|tonelada|toneladas" }, + { @"Tonelada métrica", @"tonelada métrica|toneladas métricas|t.métrica|t métrica|t. métrica|tonelada metrica|toneladas metricas|t.metrica|t metrica|t. metrica" }, + { @"Tonelada", @"ton|tonelada|toneladas|t" }, { @"Kilogramo", @"kg|kilogramo|kilogramos" }, { @"Hectogramo", @"hg|hectogramo|hectogramos" }, { @"Decagramo", @"dag|decagramo|decagramos" }, @@ -538,5 +818,33 @@ public static class NumbersWithUnitDefinitions { @"Grano", @"grano|granos" }, { @"Quilate", @"ct|quilate|quilates" } }; + public static readonly IList AmbiguousWeightUnitList = new List + { + @"g", + @"t" + }; + public static readonly Dictionary AngleSuffixList = new Dictionary + { + { @"Degree", @"grado|grados|°" }, + { @"Radian", @"radián|radian|radianes|rad" }, + { @"Turn", @"vuelta|vueltas" } + }; + public static readonly IList AmbiguousAngleUnitList = new List + { + @"vuelta", + @"vueltas" + }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"null", @"null" } + }; + public static readonly Dictionary TemperatureAmbiguityFiltersDict = new Dictionary + { + { @"\b(grados?|°)$", @"\b((grados?|°)\s*(angolo|rotaci[oó]n)|(gira(r|do|torio)?|angolo|rotaci[oó]n)(\s+(\p{L}+|\d+)){0,4}\s*(grados?\b|°))" } + }; + public static readonly Dictionary DimensionAmbiguityFiltersDict = new Dictionary + { + { @"\b(grados?|°)$", @"\b((grados?|°)\s*(c(elsius|entígrado)?|f(ah?renheit)?)|(temperatura)(\s+(\p{L}+|\d+)){0,4}\s*(grados?\b|°))" } + }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/QuotedTextDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/QuotedTextDefinitions.cs new file mode 100644 index 0000000000..8eb6534041 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/QuotedTextDefinitions.cs @@ -0,0 +1,35 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Spanish\Spanish-QuotedText.yaml +// - Language: Spanish +// - ClassName: QuotedTextDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Spanish +{ + using System; + using System.Collections.Generic; + + public static class QuotedTextDefinitions + { + public const string LangMarker = @"Spa"; + public const string QuotedTextRegex1 = @"(“([^“”]+)”)"; + public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)"; + public const string QuotedTextRegex3 = @"(""([^""]+)"")"; + public const string QuotedTextRegex4 = @"(\\'([^\']+)\\')"; + public const string QuotedTextRegex5 = @"(`([^`]+)`)"; + public const string QuotedTextRegex6 = @"(«([^«»]+)»)"; + public const string QuotedTextRegex7 = @"(123456)"; + public const string QuotedTextRegex8 = @"(123456)"; + public const string QuotedTextRegex9 = @"(123456)"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/QuotedTextDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/QuotedTextDefinitions.tt new file mode 100644 index 0000000000..1f8c0f5ac9 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/QuotedTextDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Spanish\Spanish-QuotedText.yaml"; + this.Language = "Spanish"; + this.ClassName = "QuotedTextDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/ChoiceDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/ChoiceDefinitions.cs index 5fe4de56b6..ef54973c36 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/ChoiceDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/ChoiceDefinitions.cs @@ -23,7 +23,8 @@ public static class ChoiceDefinitions { public const string LangMarker = @"Swe"; public const string TokenizerRegex = @"[^\w\d\u00E0-\u00FC]"; - public const string TrueRegex = @"\b(sant|ja|yes|y|j|ok|japp|jupp|jepp|absolut|säkert|instämmer|javisst|kör)\b|(\uD83D\uDC4D|\uD83D\uDC4C)"; - public const string FalseRegex = @"\b(falskt|nej|näpp|nope|misstycker|aldrig|n|nä|absolut\s+inte|glöm det|instämmer\s+inte|vill\s+inte|avstår)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)"; + public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)"; + public static readonly string TrueRegex = $@"\b(sant|ja|yes|y|j|ok|japp|jupp|jepp|absolut|säkert|instämmer|javisst|kör)\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?"; + public static readonly string FalseRegex = $@"\b(falskt|nej|näpp|nope|misstycker|aldrig|n|nä|absolut\s+inte|glöm det|instämmer\s+inte|vill\s+inte|avstår)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/DateTimeDefinitions.cs new file mode 100644 index 0000000000..c312315034 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/DateTimeDefinitions.cs @@ -0,0 +1,1144 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Swedish\Swedish-DateTime.yaml +// - Language: Swedish +// - ClassName: DateTimeDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Swedish +{ + using System; + using System.Collections.Generic; + + public static class DateTimeDefinitions + { + public const string LangMarker = @"Swe"; + public const bool CheckBothBeforeAfter = false; + public static readonly string TillRegex = $@"(?#TillRegex)(?\b(till|tom)\b(\s+den\b)?|{BaseDateTime.RangeConnectorSymbolRegex})"; + public static readonly string RangeConnectorRegex = $@"(?#RangeConnectorRegex)(?\b(och|tom)\b(\s+den\b)?|{BaseDateTime.RangeConnectorSymbolRegex})"; + public const string LastNegPrefix = @"(?#LastNegPrefix)(?efter|nästa|kommande|de(nn|tt)a|de(n|t)\s+här|{LastNegPrefix}senaste|förra|tidigare|innevarande)\b"; + public static readonly string StrictRelativeRegex = $@"(?#StrictRelativeRegex)\b(?efter|nästa|kommande|de(nn|tt)a|de(n|t)\s+här|{LastNegPrefix}senaste|förra|tidigare|innevarande)\b"; + public const string UpcomingPrefixRegex = @"(?#UpcomingPrefixRegex)((de(n|t)\s+)?(kommande|här))"; + public static readonly string NextPrefixRegex = $@"(?#NextPrefixRegex)\b(efter|nästa|{UpcomingPrefixRegex})\b"; + public const string AfterNextSuffixRegex = @"(?#AfterNextSuffixRegex)\b(efter\s+nästa)\b"; + public const string PastPrefixRegex = @"(?#PastPrefixRegex)((de(n|t)\s+)?förra)\b"; + public static readonly string PreviousPrefixRegex = $@"(?#PreviousPrefixRegex)({LastNegPrefix}förra|föregående|{PastPrefixRegex})\b"; + public const string ThisPrefixRegex = @"(?#ThisPrefixRegex)(innevarande|de(nn|tt)a|de(n|t)\s+här)\b"; + public const string RangePrefixRegex = @"(?#RangePrefixRegex)(från|mellan)"; + public const string CenturySuffixRegex = @"(?#CenturySuffixRegex)(^sekel)\b"; + public const string ReferencePrefixRegex = @"(?#ReferencePrefixRegex)(samma)\b"; + public const string FutureSuffixRegex = @"(?#FutureSuffixRegex)\b((i\s+)?framtiden)\b"; + public const string PastSuffixRegex = @"(?#PastSuffixRegex)\b((i\s+)dåtid(en)?)\b"; + public const string DayRegex = @"(?#DayRegex)(den\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:\:(a|e))?)(?=\b|t)"; + public const string ImplicitDayRegex = @"(?#ImplicitDayRegex)(den\s*)?(?(?:3[0-1]|[0-2]?\d)(?:\:(a|e)))\b"; + public const string MonthNumRegex = @"(?#MonthNumRegex)(?1[0-2]|(0)?[1-9])\b"; + public const string WrittenOneToNineRegex = @"(?#WrittenOneToNineRegex)(?:e(n|tt)|två|tre|fyra|fem|sex|sju|åtta|nio)"; + public const string WrittenElevenToNineteenRegex = @"(?#WrittenElevenToNineteenRegex)(?:elva|tolv|(?:tret|fjor|fem|sex|sjut|ar|nit)ton)"; + public const string WrittenTensRegex = @"(?#WrittenTensRegex)(?:tio|tjugo|trettio|fyrtio|femtio|sextio|sjuttio|åttio|nittio)"; + public static readonly string WrittenNumRegex = $@"(?#WrittenNumRegex)(?:{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s*{WrittenOneToNineRegex})?)"; + public static readonly string WrittenCenturyFullYearRegex = $@"(?#WrittenCenturyFullYearRegex)(?:(ett|två)\s+tusen(\s*{WrittenOneToNineRegex}\s*hundra)?)"; + public const string WrittenCenturyOrdinalYearRegex = @"(?#WrittenCenturyOrdinalYearRegex)(?:tjugo(\s+(ett|två))?|tio|elva|tolv|tretton|femton|arton|tjugo|(?:fjor|sex|sjut|nit)(ton)?|ett|två|tre|fyra|fem|sju|åtta|nio)"; + public static readonly string CenturyRegex = $@"(?#CenturyRegex)\b(?{WrittenCenturyFullYearRegex}|(ett|två)tusen|{WrittenCenturyOrdinalYearRegex}(\s*hundra)?)"; + public static readonly string LastTwoYearNumRegex = $@"(?#LastTwoYearNumRegex)(?:(noll\s+)?{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s*{WrittenOneToNineRegex})?)"; + public static readonly string FullTextYearRegex = $@"(?#FullTextYearRegex)\b((?{CenturyRegex})(\s+och)?\s*(?{LastTwoYearNumRegex})\b|\b(?{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}\s*hundra))\b"; + public const string OclockRegex = @"(?#OclockRegex)(?o\s*((’|‘|')\s*)?clock|sharp|prick)"; + public const string SpecialDescRegex = @"(?#SpecialDescRegex)((?)p\b)"; + public static readonly string AmDescRegex = $@"(?#AmDescRegex)(?:{BaseDateTime.BaseAmDescRegex})"; + public static readonly string PmDescRegex = $@"(?#PmDescRegex)(:?{BaseDateTime.BasePmDescRegex})"; + public static readonly string AmPmDescRegex = $@"(?#AmPmDescRegex)(:?{BaseDateTime.BaseAmPmDescRegex})"; + public static readonly string DescRegex = $@"(?#DescRegex)(:?(:?({OclockRegex}\s+)?(?({AmPmDescRegex}|{AmDescRegex}|{PmDescRegex}|{SpecialDescRegex})))|{OclockRegex})"; + public const string OfPrepositionRegex = @"(?#OfPrepositionRegex)(\bi\b)"; + public static readonly string TwoDigitYearRegex = $@"(?#TwoDigitYearRegex)\b(?([0-9]\d))(?!(\s*((\:\d)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b"; + public static readonly string YearRegex = $@"(?#YearRegex)(?:{BaseDateTime.FourDigitYearRegex}|{FullTextYearRegex})"; + public const string WeekDayRegex = @"(?#WeekDayRegex)\b(?(?:sö(n)?|må(n)?|ti(s)?|on(s)?|to(rs)?|fr(e)?|lö(r)?))(d(ags?(en|ar)?)?)?\b"; + public const string SingleWeekDayRegex = @"(?#SingleWeekDayRegex)\b(?(?((dag\s+)?i\s+)?{RelativeRegex}\s+månad(en)?)\b"; + public const string MonthRegex = @"(?#MonthRegex)\b(?apr(il)?|aug(usti)?|dec(ember)?|feb(ruari)?|jan(uari)?|juli?|juni?|mar(s)?|maj|nov(ember)?|okt(ober)?|sept(ember)?|sep)(?!\p{L})"; + public static readonly string WrittenMonthRegex = $@"(?#WrittenMonthRegex)((i\s+)?{MonthRegex}(\s+månad)?)"; + public static readonly string MonthSuffixRegex = $@"(?#MonthSuffixRegex)(?(?:(i|under)\s+)?({RelativeMonthRegex}|{WrittenMonthRegex}))"; + public const string DateUnitRegex = @"(?#DateUnitRegex)(?(decenni(um)?|år|(?månad|vecka?)|(?(arbets\s*|vecka?\s*))?(?dag)|weekend|helg)(?(s|or|er|ar|e?n))?|(?<=\s+\d{1,4})[ymwd])\b"; + public const string DateTokenPrefix = @"den "; + public const string TimeTokenPrefix = @"kl "; + public const string TokenBeforeDate = @"den "; + public const string TokenBeforeTime = @"kl "; + public const string HalfTokenRegex = @"(?#HalfTokenRegex)^(halv)"; + public const string QuarterTokenRegex = @"(?#QuarterTokenRegex)^((en\s+)?kvart)"; + public const string ThreeQuarterTokenRegex = @"(?#ThreeQuarterTokenRegex)^(tre\s+kvart(ar)?)"; + public const string ToTokenRegex = @"(?#ToTokenRegex)\b(till)$"; + public const string FromRegex = @"(?#FromRegex)\b(från(\s+den)?)$"; + public const string BetweenTokenRegex = @"(?#BetweenTokenRegex)\b(mellan(\s+den)?)$"; + public static readonly string SimpleCasesRegex = $@"(?#SimpleCasesRegex)\b({RangePrefixRegex}\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex}\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+{DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; + public static readonly string MonthFrontSimpleCasesRegex = $@"(?#MonthFrontSimpleCasesRegex)\b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((från)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; + public static readonly string MonthFrontBetweenRegex = $@"(?#MonthFrontBetweenRegex)\b{MonthSuffixRegex}\s+(mellan\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b"; + public static readonly string BetweenRegex = $@"(?#BetweenRegex)\b(mellan\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b"; + public static readonly string MonthWithYear = $@"(?#MonthWithYear)\b((({WrittenMonthRegex}[\.]?|((den\s+)?(?först(e|a)|1:(e|a)|andr(e|a)|2:(a|e)|tredje|3:e|fjärde|4:e|femte|5:e|sjätte|6:e|sjunde|7:e|åttonde|8:e|nionde|9:e|tionde|10:e|elfte|11:e|tolfte|12:e|senaste)\s+månaden(?=\s+(of|in))))((\s*)[/\\\-\.,]?(\s+(of|in))?(\s*)({YearRegex}|(?kommande|nästa|senaste|detta)\s+år(et)?)|\s+(of|in)\s+{TwoDigitYearRegex}))|(({YearRegex}|(?kommande|nästa|senaste|detta)\s+år(et)?)(\s*),?(\s*){WrittenMonthRegex}))\b"; + public const string SpecialYearPrefixes = @"(?#SpecialYearPrefixes)(kalender|(?räkenskaps|skol|läs))"; + public static readonly string OneWordPeriodRegex = $@"(?#OneWordPeriodRegex)\b((((the\s+)?month of\s+)?({StrictRelativeRegex}\s+)?{MonthRegex})|(month|year) to date|(?((un)?till?|to)\s+date)|({RelativeRegex}\s+)?(my\s+)?((?working\s+week|workweek)|week(end)?|month|fortnight|(({SpecialYearPrefixes}\s+)?year))(?!((\s+of)?\s+\d+(?!({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex}))|\s+to\s+date))(\s+{AfterNextSuffixRegex})?)\b"; + public static readonly string MonthNumWithYear = $@"(?#MonthNumWithYear)\b(({BaseDateTime.FourDigitYearRegex}(\s*)[/\-\.](\s*){MonthNumRegex})|({MonthNumRegex}(\s*)[/\-](\s*){BaseDateTime.FourDigitYearRegex}))\b"; + public static readonly string WeekOfMonthRegex = $@"(?#WeekOfMonthRegex)\b(?(den\s+)?(?första|1:a|andra|2:a|tredje|3:e|fjärde|4:e|femte|5:e|sista)\s+veckan\s+{MonthSuffixRegex}(\s+{BaseDateTime.FourDigitYearRegex}|{RelativeRegex}\s+år)?)\b"; + public static readonly string WeekOfYearRegex = $@"(?#WeekOfYearRegex)\b(?(den\s+)?(?första|1:a|andra|2:a|tredje|3:e|fjärde|4:e|femte|5:e|sista)\s+veckan(\s+i)?\s+({YearRegex}|{RelativeRegex}\s+år))\b"; + public static readonly string OfYearRegex = $@"(?#OfYearRegex)\b((i)\s+({YearRegex}|{StrictRelativeRegex}\s+år))\b"; + public const string FirstLastRegex = @"(?#FirstLastRegex)\b(den\s+)?((?första)|(?sista))\b"; + public static readonly string FollowedDateUnit = $@"(?#FollowedDateUnit)^\s*{DateUnitRegex}"; + public static readonly string NumberCombinedWithDateUnit = $@"(?#NumberCombinedWithDateUnit)\b(?\d+(\.\d*)?){DateUnitRegex}"; + public const string QuarterTermRegex = @"(?#QuarterTermRegex)\b(((?första|1:a|andra|2:a|tredje|3:e|fjärde|4:e)[ -]+kvartalet)|(q(?[1-4])))\b"; + public static readonly string RelativeQuarterTermRegex = $@"(?#RelativeQuarterTermRegex)\b(?{StrictRelativeRegex})\s+kvartalet\b"; + public static readonly string QuarterRegex = $@"(?#QuarterRegex)((det\s+)?{QuarterTermRegex}(?:((\s+under)?\s+|\s*[,-]\s*)({YearRegex}|{RelativeRegex}\s+year))?)|{RelativeQuarterTermRegex}"; + public static readonly string QuarterRegexYearFront = $@"(?#QuarterRegexYearFront)(?:{YearRegex}|{RelativeRegex}\s+year)('s)?(?:\s*-\s*|\s+(det\s+)?)?{QuarterTermRegex}"; + public const string HalfYearTermRegex = @"(?#HalfYearTermRegex)(?första|1:a|andra|2:a)\s+halvåret"; + public static readonly string HalfYearFrontRegex = $@"(?#HalfYearFrontRegex)(?((1[5-9]|20)\d{{2}})|2100)(\s*-\s*|\s+(det\s+)?)?h(?[1-2])"; + public static readonly string HalfYearBackRegex = $@"(?#HalfYearBackRegex)(det\s+)?(h(?[1-2])|({HalfYearTermRegex}))(\s+under|\s*,\s*)?\s+({YearRegex})"; + public static readonly string HalfYearRelativeRegex = $@"(?#HalfYearRelativeRegex)(det\s+)?{HalfYearTermRegex}(\s+under|\s*,\s*)?\s+({RelativeRegex}\s+år)"; + public static readonly string AllHalfYearRegex = $@"(?#AllHalfYearRegex)({HalfYearFrontRegex})|({HalfYearBackRegex})|({HalfYearRelativeRegex})"; + public const string EarlyPrefixRegex = @"(?#EarlyPrefixRegex)\b(?early|början av|början på|(?tidigare(\s+under)?))\b"; + public const string MidPrefixRegex = @"(?#MidPrefixRegex)\b(?mitten av)\b"; + public const string LaterPrefixRegex = @"(?#LaterPrefixRegex)\b(?slutet på|slutet av|(?senare(\s+under)?))\b"; + public static readonly string PrefixPeriodRegex = $@"(?#PrefixPeriodRegex)({EarlyPrefixRegex}|{MidPrefixRegex}|{LaterPrefixRegex})"; + public const string PrefixDayRegex = @"(?#PrefixDayRegex)\b((?tidig(t|are))|(?mitt på)|(?senare?))(\s+på)?(\s+dagen)?$"; + public const string SeasonDescRegex = @"(?#SeasonDescRegex)(?våren|sommaren|hösten|vintern)"; + public static readonly string SeasonRegex = $@"(?#SeasonRegex)\b(?({PrefixPeriodRegex}\s+)?({RelativeRegex}\s+)?{SeasonDescRegex}((\s+|\s*,\s*)?\s+({YearRegex}|{RelativeRegex}\s+året))?)\b"; + public const string WhichWeekRegex = @"(?#WhichWeekRegex)\b(vecka)(\s*)(?5[0-3]|[1-4]\d|0?[1-9])\b"; + public const string WeekOfRegex = @"(?#WeekOfRegex)((veckan)(\s+(som|(börjar|startar|inleds)(\s+den)?))|w/c)(\s+den)?"; + public const string MonthOfRegex = @"(?#MonthOfRegex)(månaden)(\s*)(i)"; + public static readonly string DateYearRegex = $@"(?#DateYearRegex)(?{BaseDateTime.FourDigitYearRegex}|(?(3[0-1]|[0-2]?\d)(?:(a|e)))s?)\b"; + public const string PrefixWeekDayRegex = @"(?#PrefixWeekDayRegex)(\s*((,?\s*på)|[-—–]))"; + public static readonly string ThisRegex = $@"(?#ThisRegex)\b(denna(\s*vecka{PrefixWeekDayRegex}?)?\s*{WeekDayRegex})|({WeekDayRegex}((\s+i)?\s+(denna|den\s+här)\s*veckan?))\b|(på\s+){WeekDayRegex}$"; + public static readonly string LastDateRegex = $@"(?#LastDateRegex)\b({PreviousPrefixRegex}(\s*vecka{PrefixWeekDayRegex}?)?\s*{WeekDayRegex})|({WeekDayRegex}(\s+(den\s+)?senaste\s*veckan))\b|({WeekDayRegex}\s+(i\s+)?{PreviousPrefixRegex}\s*veckan)\b|(i\s+{WeekDayRegex})\b"; + public static readonly string NextDateRegex1 = $@"(?#NextDateRegex1)\b({NextPrefixRegex}(\s*vecka{PrefixWeekDayRegex}?)?\s*{WeekDayRegex})"; + public static readonly string NextDateRegex2 = $@"(?#NextDateRegex2)\b((på\s+)?{WeekDayRegex}((\s+i)?\s+(den\s+kommande|nästa)\s*vecka))\b"; + public static readonly string NextDateRegex = $@"(?#NextDateRegex)({NextDateRegex2}|{NextDateRegex1})"; + public static readonly string SpecialDayRegex = $@"(?#SpecialDayRegex)\b(i förrgår|i övermorgon|dagen\s+(före|efter)(?!=\s+dag)|((den\s+)?({RelativeRegex}|min)\s+dag)|igår|imorgon|idag)\b"; + public static readonly string SpecialDayWithNumRegex = $@"(?#SpecialDayWithNumRegex)\b((?{WrittenNumRegex})\s+dag(ar)?\s+från\s+(?igår|imorgon|idag))\b"; + public static readonly string RelativeDayRegex = $@"(?#RelativeDayRegex)\b(((den\s+)?{RelativeRegex}\s+dag(en)?))\b"; + public const string SetWeekDayRegex = @"(?#SetWeekDayRegex)\b(?på\s+)?(?morgonen|eftermiddagen|kvällen|natten|(sön|mån|tis|ons|tors|fre|lör)dag(en)?)\b"; + public static readonly string WeekDayOfMonthRegex = $@"(?#WeekDayOfMonthRegex)(?(den\s+)?(?första|1:a|andra|2:a|tredje|3:e|fjärde|4:e|femte|5:e|sista)\s+(veckan\s+{MonthSuffixRegex}[\.]?\s+(på\s+)?{WeekDayRegex}|{WeekDayRegex}\s+{MonthSuffixRegex}))"; + public static readonly string RelativeWeekDayRegex = $@"(?#RelativeWeekDayRegex)\b({WrittenNumRegex}\s+{WeekDayRegex}\s+(från\s+nu|senare))\b"; + public static readonly string SpecialDate = $@"(?#SpecialDate)(?=\b(den)s+){DayRegex}\b"; + public const string DatePreposition = @"(?#DatePreposition)\b(den|i)"; + public static readonly string DateExtractorYearTermRegex = $@"(?#DateExtractorYearTermRegex)(\s+|\s*[/\\.,-]\s*|\s+under\s+){DateYearRegex}"; + public static readonly string DayPrefix = $@"(?#DayPrefix)\b({WeekDayRegex}|{SpecialDayRegex})\b"; + public static readonly string DateExtractor1 = $@"(?#DateExtractor1)\b({DayPrefix}\s*[,-]?\s*)?(({DayRegex}[\.]?\s*[/\\.,-]?\s*{MonthRegex})|(\({DayRegex}\s*[-./ ]\s*{MonthRegex}\)))(\s*\(\s*{DayPrefix}\s*\))?({DateExtractorYearTermRegex}\b)?"; + public static readonly string DateExtractor3 = $@"(?#DateExtractor3)\b({DayPrefix}(\s+|\s*,\s*))?({DayRegex}[\.]?(\s+|\s*[-,/]\s*|\s+i\s+){MonthRegex}[\.]?((\s+i)?{DateExtractorYearTermRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[-./]?\s*(den\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:(a|e))?)[\.]?(\s+|\s*[-,/]\s*|\s+i\s+){MonthRegex}[\.]?)\b"; + public static readonly string DateExtractor4 = $@"(?#DateExtractor4)\b{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}[\.]?\s*[/\\\-]\s*{DateYearRegex}"; + public static readonly string DateExtractor5 = $@"(?#DateExtractor5)\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; + public static readonly string DateExtractor6 = $@"(?#DateExtractor6)(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({DayPrefix}\s+)?{MonthNumRegex}[\-\.]{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor7L = $@"(?#DateExtractor7L)\b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b"; + public static readonly string DateExtractor7S = $@"(?#DateExtractor7S)\b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor8 = $@"(?#DateExtractor8)(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({DayPrefix}\s+)?{DayRegex}[\\\-]{MonthNumRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor9L = $@"(?#DateExtractor9L)\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\b"; + public static readonly string DateExtractor9S = $@"(?#DateExtractor9S)\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}(?![%])\b"; + public const string DateExtractorNoSep = @"(?#DateExtractorNoSep)\b((?((1\d|20)\d{2})|2100)(\s+(?1[0-2]|(0)?[1-9])\s+(?(?:3[0-1]|[1-2]\d|0?[1-9]))|(?1[0-2]|(0)?[1-9])(?(?:3[0-1]|[1-2]\d|0?[1-9])))\b)"; + public static readonly string DateExtractorA = $@"(?#DateExtractorA)\b({DayPrefix}(\s*,)?\s+)?(({BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DayRegex})|({MonthRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*(the\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:th|nd|rd|st)?))|({DayRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{MonthRegex})|{DateExtractorNoSep})"; + public static readonly string OfMonth = $@"(?#OfMonth)^(\s*(dagen\s+)?i)?\s*{MonthRegex}"; + public static readonly string MonthEnd = $@"(?#MonthEnd)'{MonthRegex}\s*(den)?\s*$'"; + public static readonly string WeekDayEnd = $@"(?#WeekDayEnd)(den\s+här\s+)?{WeekDayRegex}\s*,?(\bden\b)?\s*$"; + public static readonly string WeekDayStart = $@"(?#WeekDayStart)^\s+(på\s+)?{WeekDayRegex}(\s+den)?\b"; + public const string RangeUnitRegex = @"(?#RangeUnitRegex)\b(?år|månad(er)?|veck(a|or)?)\b"; + public const string HourNumRegex = @"(?#HourNumRegex)\b(?noll|ett|två|tre|fyra|fem|sex|sju|åtta|nio|tio|elva|tolv)\b"; + public const string MinuteNumRegex = @"(?#MinuteNumRegex)(((?tjugo|trettio|fyrtio|femtio)(\s*-?\s*))?(?ett|två|tre|fyra|fem|sex|sju|åtta|nio)|(?tio|elva|tolv|tretton|femton|årton|(fjor|sex|sjut|nit)(ton)|tjugo|trettio|fyrtio|femtio))"; + public const string DeltaMinuteNumRegex = @"(?#DeltaMinuteNumRegex)(((?tjugo|trettio|fyrtio|femtio)(\s*-?\s*))?(?ett|två|tre|fyra|fem|sex|sju|åtta|nio)|(?tio|elva|tolv|tretton|femton|arton|(fjor|sex|sjut|nit)(ton)|tjugo|trettio|fyrtio|femtio))"; + public const string PmRegex = @"(?#PmRegex)(?(((?:kl|runt|circa|kring)\s+(den\s+)?)?(((tidig|sen)\s+)?(eftermiddag|kväll)|midnatt|middag|lunch))|((kl|runt|circa|kring|till)\s+natt(en)?))"; + public const string PmRegexFull = @"(?#PmRegexFull)(?((?:kl|om|runt|circa)\s+(the\s+)?)?(((tidig|sen)\s+)?(eftermiddag|kväll)|(mid)?natt|middag|lunch))"; + public const string AmRegex = @"(?#AmRegex)(?((?:kl|om|runt|circa)\s+(the\s+)?)?((tidig|sen)\s+)?(morgon))"; + public const string LunchRegex = @"(?#LunchRegex)\blunch\b"; + public const string NightRegex = @"(?#NightRegex)\b(mid)?natt\b"; + public const string CommonDatePrefixRegex = @"(?#CommonDatePrefixRegex)^[\.]"; + public static readonly string LessThanOneHour = $@"(?#LessThanOneHour)(?(en\s+)?kvart|tre kvart|halvtimme?|{BaseDateTime.DeltaMinuteRegex}(\s+(minut(er)?|mins?))|{DeltaMinuteNumRegex}(\s+(minut(er)?|mins?)))"; + public static readonly string WrittenTimeRegex = $@"(?#WrittenTimeRegex)(?{HourNumRegex}\s+{MinuteNumRegex}(\s+(minut(er)?|mins?))?)"; + public static readonly string TimePrefix = $@"(?#TimePrefix)(?{LessThanOneHour}\s+(över|i))"; + public static readonly string TimeSuffix = $@"(?#TimeSuffix)(?{AmRegex}|{PmRegex}|{OclockRegex})"; + public static readonly string TimeSuffixFull = $@"(?#TimeSuffixFull)(?{AmRegex}|{PmRegexFull}|{OclockRegex})"; + public static readonly string BasicTime = $@"(?#BasicTime)\b(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex}(?![%\d]))"; + public const string MidnightRegex = @"(?#MidnightRegex)(?mid\s*(-\s*)?natt)"; + public const string MidmorningRegex = @"(?#MidmorningRegex)(?tidig\s+förmiddag)"; + public const string MidafternoonRegex = @"(?#MidafternoonRegex)(?mid\s*(-\s*)?eftermiddag)"; + public const string MiddayRegex = @"(?#MiddayRegex)(?middag|mitt\s+på\s+dagen|((12\s)?lunch(tid)?))"; + public static readonly string MidTimeRegex = $@"(?#MidTimeRegex)(?({MidnightRegex}|{MidmorningRegex}|{MidafternoonRegex}|{MiddayRegex}))"; + public static readonly string AtRegex = $@"(?#AtRegex)\b(?:(?:(?<=\b(at|(at)?\s*runt|vid)\s+)(?:{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\.\d)(\s*((?a)|(?p)))?|{MidTimeRegex}))|{MidTimeRegex})\b"; + public static readonly string IshRegex = $@"(?#IshRegex)'\b({BaseDateTime.HourRegex}(-|——)?tid|lunch(tid)?|middag(stid)?)\b'"; + public const string TimeUnitRegex = @"(?#TimeUnitRegex)([^a-z]{1,}|\b)(?(tim(me)?|min(ut)?|s(ek(und)?)?)(?mar|er)?|h)\b"; + public const string RestrictedTimeUnitRegex = @"(?#RestrictedTimeUnitRegex)(?timme|minut)\b"; + public const string FivesRegex = @"(?#FivesRegex)(?(?:femton|(?:tjugo|trettio|fyrtio|femtio)(\s*fem)?|tio|fem))\b"; + public static readonly string HourRegex = $@"(?#HourRegex)\b{BaseDateTime.HourRegex}"; + public const string PeriodHourNumRegex = @"(?#PeriodHourNumRegex)\b(?tjugo(\s+(en|ett|två|tre|fyra))?|elva|tolv|tretton|femton|arton|(fjor|sex|sjut|nit)(ton)?|noll|ett|en|två|tre|fem|åtta|tio)\b"; + public static readonly string ConnectNumRegex = $@"(?#ConnectNumRegex)'\b{BaseDateTime.HourRegex}(?[0-5][0-9])\s*{DescRegex}'"; + public static readonly string TimeRegexWithDotConnector = $@"(?#TimeRegexWithDotConnector)({BaseDateTime.HourRegex}(\s*\.\s*){BaseDateTime.MinuteRegex})"; + public static readonly string TimeRegex1 = $@"(?#TimeRegex1)\b({TimePrefix}\s+)?({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})(\s*|[.]){DescRegex}"; + public static readonly string TimeRegex2 = $@"(?#TimeRegex2)(\b{TimePrefix}\s+)?(t)?{BaseDateTime.HourRegex}(\s*)?:(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?(?a)?((\s*{DescRegex})|\b)"; + public static readonly string TimeRegex3 = $@"(?#TimeRegex3)(\b{TimePrefix}\s+)?{BaseDateTime.HourRegex}\.{BaseDateTime.MinuteRegex}(\s*{DescRegex})"; + public static readonly string TimeRegex4 = $@"(?#TimeRegex4)\b{TimePrefix}\s+{BasicTime}(\s*{DescRegex})?\s+{TimeSuffix}\b"; + public static readonly string TimeRegex5 = $@"(?#TimeRegex5)\b{TimePrefix}\s+{BasicTime}((\s*{DescRegex})|\b)"; + public static readonly string TimeRegex6 = $@"(?#TimeRegex6)({BasicTime})(\s*{DescRegex})?\s+{TimeSuffix}\b"; + public static readonly string TimeRegex7 = $@"(?#TimeRegex7)\b{TimeSuffixFull}\s+(kl\s+)?{BasicTime}((\s*{DescRegex})|\b)"; + public static readonly string TimeRegex8 = $@"(?#TimeRegex8).^"; + public static readonly string TimeRegex9 = $@"(?#TimeRegex9)\b{PeriodHourNumRegex}(\s+|-){FivesRegex}((\s*{DescRegex})|\b)"; + public static readonly string TimeRegex10 = $@"(?#TimeRegex10)\b({TimePrefix}\s+)?{BaseDateTime.HourRegex}(\s*h|t\s*){BaseDateTime.MinuteRegex}(\s*{DescRegex})?"; + public static readonly string TimeRegex11 = $@"(?#TimeRegex11)\b((?:({TimeTokenPrefix})?{TimeRegexWithDotConnector}(\s*{DescRegex}))|(?:(?:{TimeTokenPrefix}{TimeRegexWithDotConnector})(?!\s*procent|%)))"; + public static readonly string FirstTimeRegexInTimeRange = $@"(?#FirstTimeRegexInTimeRange)\b{TimeRegexWithDotConnector}(\s*{DescRegex})?"; + public static readonly string PureNumFromTo = $@"(?#PureNumFromTo)({RangePrefixRegex}\s+)?({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?\s*{TillRegex}\s*({HourRegex}|{PeriodHourNumRegex})(?\s*({PmRegex}|{AmRegex}|{DescRegex}))?"; + public static readonly string PureNumBetweenAnd = $@"(?#PureNumBetweenAnd)(mellan\s+)(({BaseDateTime.TwoDigitHourRegex}{BaseDateTime.TwoDigitMinuteRegex})|{HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?\s*{RangeConnectorRegex}\s*(({BaseDateTime.TwoDigitHourRegex}{BaseDateTime.TwoDigitMinuteRegex})|{HourRegex}|{PeriodHourNumRegex})(?\s*({PmRegex}|{AmRegex}|{DescRegex}))?"; + public static readonly string SpecificTimeFromTo = $@"(?#SpecificTimeFromTo)({RangePrefixRegex}\s+)?(?(({TimeRegex2}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))\s*{TillRegex}\s*(?(({TimeRegex2}|{TimeRegexWithDotConnector}(?\s*{DescRegex}))|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))"; + public static readonly string SpecificTimeBetweenAnd = $@"(?#SpecificTimeBetweenAnd)(mellan\s+)(?(({TimeRegex2}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))\s*{RangeConnectorRegex}\s*(?(({TimeRegex2}|{TimeRegexWithDotConnector}(?\s*{DescRegex}))|({HourRegex}|{PeriodHourNumRegex})(\s*(?{DescRegex}))?))"; + public const string SuffixAfterRegex = @"(?#SuffixAfterRegex)\b(((kl)\s)?(eller|och)\s+(över|efter|senare|större)(?!\s+än))\b"; + public const string PrepositionRegex = @"(?#PrepositionRegex)(?^(,\s*)?(kl|på)(\s+den)?$)"; + public const string LaterEarlyRegex = @"(?#LaterEarlyRegex)((?tidig(are)(\s+|-))|(?efte(r?\s+|-)))"; + public const string MealTimeRegex = @"(?#MealTimeRegex)\b(at\s+)?(?breakfast|brunch|lunch(\s*tid)?|middags(\s*tid)?|kvällsmat)\b"; + public static readonly string UnspecificTimePeriodRegex = $@"(?#UnspecificTimePeriodRegex)({MealTimeRegex})"; + public static readonly string TimeOfDayRegex = $@"(?#TimeOfDayRegex)\b(?((((in\s+the\s+)?{LaterEarlyRegex}?(in(\s+the)?\s+)?(morgon(en)?|eftermiddag(en)?|natt(en)?|kväll(en)?)))|{MealTimeRegex}|(((in\s+(the)?\s+)?)(dagtid|kontorstid)))s?)\b"; + public static readonly string SpecificTimeOfDayRegex = $@"(?#SpecificTimeOfDayRegex)\b(({StrictRelativeRegex}\s+{TimeOfDayRegex})\b|\bikväll)s?\b"; + public static readonly string TimeFollowedUnit = $@"(?#TimeFollowedUnit)^\s*{TimeUnitRegex}"; + public static readonly string TimeNumberCombinedWithUnit = $@"(?#TimeNumberCombinedWithUnit)\b(?\d+(\.\d*)?){TimeUnitRegex}"; + public static readonly string[] BusinessHourSplitStrings = { @"arbets", @"tid" }; + public const string NowRegex = @"(?#NowRegex)\b(?(just\s+)?nu|så\s+snart\s+som\s+möjligt|asap|nyligen|tidigare|i\s+(present|denna\s+stund|denna\s+minut|detta\s+(ögonblick|present\s+time)))\b"; + public static readonly string NowParseRegex = $@"(?#NowParseRegex)\b({NowRegex}|^(date)$)\b"; + public const string SuffixRegex = @"(?#SuffixRegex)^\s*(på\s+)?(morgonen|eftermiddagen|kvällen|natten)\b"; + public const string NonTimeContextTokens = @"(?#NonTimeContextTokens)(building)"; + public const string DateTimeTimeOfDayRegex = @"(?#DateTimeTimeOfDayRegex)\b(?morning|(?eftermiddag(en)?|natt(en)?|kväll(en)?))\b"; + public static readonly string DateTimeSpecificTimeOfDayRegex = $@"(?#DateTimeSpecificTimeOfDayRegex)\b(({RelativeRegex}\s+{DateTimeTimeOfDayRegex})\b|\bikväll)\b"; + public static readonly string TimeOfTodayAfterRegex = $@"(?#TimeOfTodayAfterRegex)^\s*(,\s*)?(på\s+)?{DateTimeSpecificTimeOfDayRegex}"; + public static readonly string TimeOfTodayBeforeRegex = $@"(?#TimeOfTodayBeforeRegex)'{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(på|runt|omkring|vid))?\s*$'"; + public static readonly string SimpleTimeOfTodayAfterRegex = $@"(?#SimpleTimeOfTodayAfterRegex)(?{DateUnitRegex}|tim(me|mar)?|h|min(ut(er)?)?|sek(und(er)?)?|nätter)\b"; + public const string SuffixAndRegex = @"(?#SuffixAndRegex)(?\s*(och)\s+(en?\s+)?(?halv|kvart))"; + public const string PeriodicRegex = @"(?#PeriodicRegex)\b(?((?semi|varannan|tri)(\s*|-))?(dagligen|månatligen|veckovis|kvartalsvis|årlig(en)?))\b"; + public static readonly string EachUnitRegex = $@"(?#EachUnitRegex)\b(?(varje|(varje|var|en gång\s+(i|per|varje|var)?)\s?)(?\s+(annan|alternate|second))?\s*({DurationUnitRegex}|(?kvartal|weekends?|veckoslut|helg)|{WeekDayRegex})|(?weekends))"; + public const string EachPrefixRegex = @"(?#EachPrefixRegex)\b(?(varje|var|en gång\s+(i|per|varje|var)?)\s*$)"; + public const string SetEachRegex = @"(?#SetEachRegex)\b(?(varje|var)(?\s+(annan|alternate))?\s*)(?!den|det)\b"; + public static readonly string SetLastRegex = $@"(?#SetLastRegex)(?följande|nästa|kommande|den\s+här|{LastNegPrefix}senaste|förra|tidigare|nuvarande)"; + public const string EachDayRegex = @"(?#EachDayRegex)^\s*(var|varje)\s*dag\b"; + public static readonly string DurationFollowedUnit = $@"(?#DurationFollowedUnit)(^\s*{DurationUnitRegex}\s+{SuffixAndRegex})|(^\s*{SuffixAndRegex}?(\s+|-)?{DurationUnitRegex})"; + public static readonly string NumberCombinedWithDurationUnit = $@"(?#NumberCombinedWithDurationUnit)\b(?\d+(\.\d*)?)(-)?{DurationUnitRegex}"; + public static readonly string AnUnitRegex = $@"(?#AnUnitRegex)(\b((?(en)\s+)?halv|another)|(?(1/2|½|en halv)))\s+{DurationUnitRegex}"; + public const string DuringRegex = @"(?#DuringRegex)\b(under|i)\s+(?år(et)?|månad(en)?|veckan?|dag(en)?)\b"; + public const string AllRegex = @"(?#AllRegex)\b(?(hela)(\s+|-)(?år(et)?|månad(en)?|veckan?|dag(en)?))\b"; + public const string HalfRegex = @"(?#HalfRegex)((e(n|tt)\s*)|\b)(?halvt?\s+(?år|månad|vecka|dag|timm(e|a)))\b"; + public const string ConjunctionRegex = @"(?#ConjunctionRegex)\b((och(\s+under)?)|med)\b"; + public const string HolidayList1 = @"(?#HolidayList1)(?första maj|nyårsdagen|nyårsafton|trettondedag(en|\s+jul)|julafton|juldagen|annandag\s+jul|skärtorsdag(en)?|långfredag(en)?|påskafton|påskdagen|annandag\s+påsk|mardi gras|(washington|mao)'s birthday|juneteenth|(jubilee|freedom)(\s+day)|chinese new year|(new\s+(years'|year\s*'s|years?)\s+eve)|(new\s+(years'|year\s*'s|years?)(\s+day)?)|may\s*day|yuan dan|christmas eve|(christmas|xmas)(\s+day)?|black friday|yuandan|easter(\s+(sunday|saturday|monday))?|clean monday|ash wednesday|palm sunday|maundy thursday|good friday|white\s+(sunday|monday)|trinity sunday|pentecost|corpus christi|cyber monday)"; + public const string HolidayList2 = @"(?#HolidayList2)(?(valborg(smässoafton)?|kristi himmelsfärdsdag|pingst(afton|dagen)|midsommar(afton|dagen)|allahelgonaafton|alla helgons dag|thanks\s*giving|all saint's|white lover|s(?:ain)?t?(\.)?\s+(?:patrick|george)(?:')?(?:s)?|us independence|all hallow|all souls|guy fawkes|cinco de mayo|halloween|qingming|dragon boat|april fools|tomb\s*sweeping)(\s+day)?)"; + public const string HolidayList3 = @"(?#HolidayList3)(?(nationaldagen|sveriges nationaldag|mlk|martin luther king( jr)?|canberra|ascension|columbus|tree( planting)?|arbor|labou?r|((international|int'?l)\s+)?workers'?|mother'?s?|father'?s?|female|women('s)?|single|teacher'?s|youth|children|girls|lovers?|earth|inauguration|groundhog|valentine'?s|baptiste|bastille|veterans(?:')?|memorial|mid[ \-]autumn|moon|spring|lantern)\s+day)"; + public const string HolidayList4 = @"(?#HolidayList4)(?ramad(h)?an|ram(a)?zan|ramathan|eid al(-|\s+)adha|eid al(-|\s+)azha|eidul(-|\s+)azha|feast of the sacrifice|(islamic|arabic|hijri) new year|eid al(-|\s+)fitr|festival of breaking the fast)"; + public static readonly string HolidayRegex = $@"(?#HolidayRegex)\b(({StrictRelativeRegex}\s+({HolidayList1}|{HolidayList2}|{HolidayList3}|{HolidayList4}))|(?((the\s+)?weekend\s+of\s+)({HolidayList1}|{HolidayList2}|{HolidayList3}|{HolidayList4})(\s+((of\s+)?({YearRegex}|{RelativeRegex}\s+year)))?)|(({HolidayList1}|{HolidayList2}|{HolidayList3}|{HolidayList4})((?(\s+weekend)(\s+((of\s+)?({YearRegex}|{RelativeRegex}\s+year)))?)|(\s+(of\s+)?({YearRegex}|{RelativeRegex}\s+year)(?\s+weekend)?))?))\b"; + public const string AMTimeRegex = @"(?#AMTimeRegex)(?morgon(en)?|förmiddag(en)?)"; + public const string PMTimeRegex = @"(?#PMTimeRegex)\b(?eftermiddag(en)?|kväll(en)?|natt(en)?)\b"; + public const string NightTimeRegex = @"(?#NightTimeRegex)(natten)"; + public const string NowTimeRegex = @"(?#NowTimeRegex)(nu|vid\s+(denna tidpunkt|denna\s+minut|(ögonblick(et)?|(aktuell|nuvarande)\s+tidpunkt)))"; + public const string RecentlyTimeRegex = @"(?#RecentlyTimeRegex)(senaste|tidigare)"; + public const string AsapTimeRegex = @"(?#AsapTimeRegex)(så\s+snart\s+(som|(som)?det\s+är)\s+möjligt|asap|snarast)"; + public const string InclusiveModPrepositions = @"(?#InclusiveModPrepositions)(?((den|kl|vid)\s+eller\s+)|(\s+eller\s+(den|kl|vid)))"; + public const string AroundRegex = @"(?#AroundRegex)(?:\b(?:runt|cirka|omkring)\s*?\b)(\s+den)?"; + public static readonly string BeforeRegex = $@"(?#BeforeRegex)((\b{InclusiveModPrepositions}?(?:före|innan|(ej\s+senare\s+än|tidigare)\s+än|slutar\s+(med|den)|by|tom|till|(?så\s+sent\s+som)){InclusiveModPrepositions}?\b\s*?)|(?)((?<\s*=)|<))(\s+den)?"; + public static readonly string AfterRegex = $@"(?#AfterRegex)((\b{InclusiveModPrepositions}?((efter|(\s+med)?(start|början)(?!\sfrån)|(?>\s*=)|>))(\s+den)?"; + public const string SinceRegex = @"(?#SinceRegex)(?:(?:\b(?:sedan|senare\s+än\s+eller\s+lika\s+med|((med|som)\s+börja(n|r)|(som)?\s+startar)\s+(?:från|den|med)|så\s+tidigt\s+som|(any\s+time\s+)from)\b\s*?)|(?=))(\s+den)?"; + public static readonly string SinceRegexExp = $@"(?#SinceRegexExp)({SinceRegex}|\bfrån(\s+den)?\b)"; + public const string AgoRegex = @"(?#AgoRegex)\b(sedan|tidigare|före\s+(?igår|idag))\b"; + public static readonly string LaterRegex = $@"(?#LaterRegex)\b(?:senare(?!((\s+under)?\s*{OneWordPeriodRegex})|(\s+{TimeOfDayRegex})|\s+än\b)|från nu|(från|efter)\s+(?imorgon|idag))\b"; + public const string BeforeAfterRegex = @"(?#BeforeAfterRegex)\b((?före)|(?från|efter))\b"; + public static readonly string ModPrefixRegex = $@"(?#ModPrefixRegex)\b({RelativeRegex}|{AroundRegex}|{BeforeRegex}|{AfterRegex}|{SinceRegex})\b"; + public static readonly string ModSuffixRegex = $@"(?#ModSuffixRegex)\b({AgoRegex}|{LaterRegex}|{BeforeAfterRegex}|{FutureSuffixRegex}|{PastSuffixRegex})\b"; + public const string InConnectorRegex = @"(?#InConnectorRegex)\b(om)\b"; + public static readonly string SinceYearSuffixRegex = $@"(?#SinceYearSuffixRegex)(^\s*{SinceRegex}(\s*år(et)?\s*)?{YearSuffix})"; + public static readonly string WithinNextPrefixRegex = $@"(?#WithinNextPrefixRegex)\b(inom(\s+de(n|t))?(\s+(?{NextPrefixRegex}))?)\b"; + public const string TodayNowRegex = @"(?#TodayNowRegex)\b(idag|nu|nuvarande (datum|tid))\b"; + public static readonly string MorningStartEndRegex = $@"(?#MorningStartEndRegex)(^(morgon|{AmDescRegex}))|((morgon|{AmDescRegex})$)"; + public static readonly string AfternoonStartEndRegex = $@"(?#AfternoonStartEndRegex)(^(afternoon|{PmDescRegex}))|((afternoon|{PmDescRegex})$)"; + public const string EveningStartEndRegex = @"(?#EveningStartEndRegex)(^(evening))|((evening)$)"; + public const string NightStartEndRegex = @"(?#NightStartEndRegex)(^(over|to)?ni(ght|te))|((over|to)?ni(ght|te)$)"; + public const string InexactNumberRegex = @"(?#InexactNumberRegex)\b((för\s+)?några|flera|(?(ett\s+)?(par|antal)))\b"; + public static readonly string InexactNumberUnitRegex = $@"(?#InexactNumberUnitRegex)({InexactNumberRegex})\s+({DurationUnitRegex})"; + public static readonly string RelativeTimeUnitRegex = $@"(?#RelativeTimeUnitRegex)(?:(?:(?:{NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+({TimeUnitRegex}))|((the|my))\s+({RestrictedTimeUnitRegex}))"; + public static readonly string RelativeDurationUnitRegex = $@"(?#RelativeDurationUnitRegex)(?:(?:(?<=({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+)({DurationUnitRegex}))|((the|my))\s+({RestrictedTimeUnitRegex}))"; + public static readonly string ReferenceDatePeriodRegex = $@"(?#ReferenceDatePeriodRegex)\b{ReferencePrefixRegex}\s+(?week(end)?|fortnight|month|year|decade)\b"; + public const string ConnectorRegex = @"(?#ConnectorRegex)^(-|,|for|t|runt|cirka|@)$"; + public const string FromToRegex = @"(?#FromToRegex)(\b(från).+(till|och|eller)\b.+)"; + public const string SingleAmbiguousMonthRegex = @"(?#SingleAmbiguousMonthRegex)^(the\s+)?(may|march)$"; + public const string SingleAmbiguousTermsRegex = @"(?#SingleAmbiguousTermsRegex)^(the\s+)?(dag|vecka|månad|år)$"; + public const string UnspecificDatePeriodRegex = @"(?#UnspecificDatePeriodRegex)^(vecka|fortnight|månad|år)$"; + public const string PrepositionSuffixRegex = @"(?#PrepositionSuffixRegex)\b(on|in|at|around|circa|from|to)$"; + public const string FlexibleDayRegex = @"(?#FlexibleDayRegex)(?([A-Öa-ö]+\s)?[A-Öa-ö\d]+)"; + public static readonly string ForTheRegex = $@"(?#ForTheRegex)\b((((?<=\btill\s+)den\s+{FlexibleDayRegex})|((?\s*(,|\.(?!\d)|!|\?|$)))"; + public static readonly string WeekDayAndDayOfMonthRegex = $@"(?#WeekDayAndDayOfMonthRegex)\b{WeekDayRegex}\s+(den\s+{FlexibleDayRegex})\b"; + public static readonly string WeekDayAndDayRegex = $@"(?#WeekDayAndDayRegex)\b{WeekDayRegex}\s+(?!(den)){DayRegex}(?!([-:]|(\s+({AmDescRegex}|{PmDescRegex}|{OclockRegex}))))\b"; + public const string RestOfDateRegex = @"(?#RestOfDateRegex)\b(rest|remaining)\s+(of\s+)?((the|my|this|current)\s+)?(?vecka|fortnight|månad|år|decennium)\b"; + public const string RestOfDateTimeRegex = @"\b(rest|remaining)\s+(av\s+)?((den|min|denna|nuvarande)\s+)?(?dag)\b"; + public const string AmbiguousRangeModifierPrefix = @"(?#AmbiguousRangeModifierPrefix)(från)"; + public static readonly string NumberEndingPattern = $@"(?#NumberEndingPattern)^(?:\s+(?möte|avtalad tid|konferens|sammanträde|((skype|teams|zoom|facetime)\s+)?samtal)\s+till\s+(?{PeriodHourNumRegex}|{HourRegex})([\.]?$|(\.,|,|!|\?)))"; + public const string OneOnOneRegex = @"(?#OneOnOneRegex)\b(1\s*:\s*1(?!\d))|(one (on )?one|one\s*-\s*one|one\s*:\s*one)\b"; + public static readonly string LaterEarlyPeriodRegex = $@"(?#LaterEarlyPeriodRegex)\b(({PrefixPeriodRegex})\s*\b\s*(?{OneWordPeriodRegex}|(?{BaseDateTime.FourDigitYearRegex}))|({UnspecificEndOfRangeRegex}))\b"; + public static readonly string WeekWithWeekDayRangeRegex = $@"(?#WeekWithWeekDayRangeRegex)\b((?({NextPrefixRegex}|{PreviousPrefixRegex}|this)\s+week)((\s+mellan\s+{WeekDayRegex}\s+och\s+{WeekDayRegex})|(\s+från\s+{WeekDayRegex}\s+till\s+{WeekDayRegex})))\b"; + public const string GeneralEndingRegex = @"(?#GeneralEndingRegex)^\s*((\.,)|\.|,|!|\?)?\s*$"; + public const string MiddlePauseRegex = @"(?#MiddlePauseRegex)\s*(,)\s*"; + public const string DurationConnectorRegex = @"(?#DurationConnectorRegex)^\s*(?\s+|och|,)\s*$"; + public const string PrefixArticleRegex = @"(?#PrefixArticleRegex)\bthe\s+"; + public const string OrRegex = @"(?#OrRegex)\s*((\b|,\s*)(eller|och)\b|,)\s*"; + public static readonly string SpecialYearTermsRegex = $@"(?#SpecialYearTermsRegex)\b((({SpecialYearPrefixes}\s+)?year)|(cy|(?fy|sy)))"; + public static readonly string YearPlusNumberRegex = $@"(?#YearPlusNumberRegex)\b({SpecialYearTermsRegex}\s*((?(\d{{2,4}}))|{FullTextYearRegex}))\b"; + public static readonly string NumberAsTimeRegex = $@"(?#NumberAsTimeRegex)\b({WrittenTimeRegex}|{PeriodHourNumRegex}|{BaseDateTime.HourRegex})\b"; + public static readonly string TimeBeforeAfterRegex = $@"(?#TimeBeforeAfterRegex)\b(((?<=\b(before|no later than|by|after)\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}|{MidTimeRegex}))|{MidTimeRegex})\b"; + public const string DateNumberConnectorRegex = @"(?#DateNumberConnectorRegex)^\s*(?\s+at)\s*$"; + public const string DecadeRegex = @"(?#DecadeRegex)(?(?:nough|twen|thir|fou?r|fif|six|seven|eigh|nine)ties|two\s+thousands)"; + public static readonly string DecadeWithCenturyRegex = $@"(?#DecadeWithCenturyRegex)(the\s+)?(((?\d|1\d|2\d)?(')?(?\d0)(')?(\s)?s\b)|(({CenturyRegex}(\s+|-)(and\s+)?)?{DecadeRegex})|({CenturyRegex}(\s+|-)(and\s+)?(?tens|hundreds)))"; + public static readonly string RelativeDecadeRegex = $@"(?#RelativeDecadeRegex)\b((the\s+)?{RelativeRegex}\s+((?[\w,]+)\s+)?decades?)\b"; + public static readonly string YearPeriodRegex = $@"(?#YearPeriodRegex)((((from|during|in)\s+)?{YearRegex}\s*({TillRegex})\s*{YearRegex})|(((between)\s+){YearRegex}\s*({RangeConnectorRegex})\s*{YearRegex}))"; + public static readonly string StrictTillRegex = $@"(?#StrictTillRegex)(?\b(to|(un)?till?|thru|through)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s+of|\s*,\s*))))"; + public static readonly string StrictRangeConnectorRegex = $@"(?#StrictRangeConnectorRegex)(?\b(and|through|to)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s+of|\s*,\s*))))"; + public const string StartMiddleEndRegex = @"(?#StartMiddleEndRegex)\b((?((the\s+)?(start|beginning)\s+of\s+)?)(?((the\s+)?middle\s+of\s+)?)(?((the\s+)?end\s+of\s+)?))"; + public static readonly string ComplexDatePeriodRegex = $@"(?#ComplexDatePeriodRegex)(?:((from|during|in)\s+)?{StartMiddleEndRegex}(?.+)\s*({StrictTillRegex})\s*{StartMiddleEndRegex}(?.+)|((between)\s+){StartMiddleEndRegex}(?.+)\s*({StrictRangeConnectorRegex})\s*{StartMiddleEndRegex}(?.+))"; + public static readonly string FailFastRegex = $@"(?#FailFastRegex)'{BaseDateTime.DeltaMinuteRegex}|\b(?:{BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex})|{BaseDateTime.BaseAmPmDescRegex}|\b(?:zero|{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}|{WrittenMonthRegex}|{SeasonDescRegex}|{DecadeRegex}|centur(y|ies)|weekends?|quarters?|hal(f|ves)|yesterday|to(morrow|day|night)|tmr|noonish|\d(-|——)?ish|((the\s+\w*)|\d)(th|rd|nd|st)|(mid\s*(-\s*)?)?(night|morning|afternoon|day)s?|evenings?|noon|lunch(time)?|dinner(time)?|(day|night)time|overnight|dawn|dusk|sunset|hours?|hrs?|h|minutes?|mins?|seconds?|secs?|eo[dmy]|mardi[ -]?gras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|cinco de mayo|all (hallow|souls)|guy fawkes|(st )?patrick|hundreds?|noughties|aughts|thousands?)\b|{WeekDayRegex}|{SetWeekDayRegex}|{NowRegex}|{PeriodicRegex}|\b({DateUnitRegex}|{ImplicitDayRegex})'"; + public static readonly Dictionary UnitMap = new Dictionary + { + { @"decennium", @"10Y" }, + { @"decennier", @"10Y" }, + { @"decenniet", @"10Y" }, + { @"år", @"Y" }, + { @"åren", @"Y" }, + { @"månad", @"MON" }, + { @"månader", @"MOM" }, + { @"kvartal", @"3MON" }, + { @"termin", @"6MON" }, + { @"terminer", @"6MON" }, + { @"vecka", @"W" }, + { @"veckor", @"W" }, + { @"helg", @"WE" }, + { @"helger", @"WE" }, + { @"veckoslut", @"WE" }, + { @"dag", @"D" }, + { @"dagar", @"D" }, + { @"arbetsdag", @"D" }, + { @"arbetsdagar", @"D" }, + { @"veckodag", @"D" }, + { @"veckodagar", @"D" }, + { @"natt", @"D" }, + { @"nätter", @"D" }, + { @"timme", @"H" }, + { @"timmar", @"H" }, + { @"tim", @"H" }, + { @"t", @"H" }, + { @"minut", @"M" }, + { @"minuter", @"M" }, + { @"m", @"M" }, + { @"sekund", @"S" }, + { @"sekunder", @"S" }, + { @"sek", @"S" }, + { @"s", @"S" }, + { @"decades", @"10Y" }, + { @"decade", @"10Y" }, + { @"years", @"Y" }, + { @"year", @"Y" }, + { @"months", @"MON" }, + { @"month", @"MON" }, + { @"quarters", @"3MON" }, + { @"quarter", @"3MON" }, + { @"semesters", @"6MON" }, + { @"semestres", @"6MON" }, + { @"semester", @"6MON" }, + { @"semestre", @"6MON" }, + { @"weeks", @"W" }, + { @"week", @"W" }, + { @"weekends", @"WE" }, + { @"weekend", @"WE" }, + { @"fortnights", @"2W" }, + { @"fortnight", @"2W" }, + { @"weekdays", @"D" }, + { @"weekday", @"D" }, + { @"days", @"D" }, + { @"day", @"D" }, + { @"nights", @"D" }, + { @"night", @"D" }, + { @"hours", @"H" }, + { @"hour", @"H" }, + { @"hrs", @"H" }, + { @"hr", @"H" }, + { @"h", @"H" }, + { @"minutes", @"M" }, + { @"minute", @"M" }, + { @"mins", @"M" }, + { @"min", @"M" }, + { @"seconds", @"S" }, + { @"second", @"S" }, + { @"secs", @"S" }, + { @"sec", @"S" } + }; + public static readonly Dictionary UnitValueMap = new Dictionary + { + { @"decennier", 315360000 }, + { @"decennium", 315360000 }, + { @"år", 31536000 }, + { @"månad", 2592000 }, + { @"månader", 2592000 }, + { @"helg", 172800 }, + { @"helger", 172800 }, + { @"veckoslut", 172800 }, + { @"vecka", 604800 }, + { @"veckor", 604800 }, + { @"arbetsdag", 86400 }, + { @"arbetsdagar", 86400 }, + { @"dag", 86400 }, + { @"dagar", 86400 }, + { @"natt", 86400 }, + { @"nätter", 86400 }, + { @"timme", 3600 }, + { @"timmar", 3600 }, + { @"tim", 3600 }, + { @"t", 3600 }, + { @"minut", 60 }, + { @"minuter", 60 }, + { @"m", 60 }, + { @"sekund", 1 }, + { @"sekunder", 1 }, + { @"sek", 1 }, + { @"s", 1 }, + { @"decades", 315360000 }, + { @"decade", 315360000 }, + { @"years", 31536000 }, + { @"year", 31536000 }, + { @"months", 2592000 }, + { @"month", 2592000 }, + { @"fortnights", 1209600 }, + { @"fortnight", 1209600 }, + { @"weekends", 172800 }, + { @"weekend", 172800 }, + { @"weeks", 604800 }, + { @"week", 604800 }, + { @"days", 86400 }, + { @"day", 86400 }, + { @"nights", 86400 }, + { @"night", 86400 }, + { @"hours", 3600 }, + { @"hour", 3600 }, + { @"hrs", 3600 }, + { @"hr", 3600 }, + { @"h", 3600 }, + { @"minutes", 60 }, + { @"minute", 60 }, + { @"mins", 60 }, + { @"min", 60 }, + { @"seconds", 1 }, + { @"second", 1 }, + { @"secs", 1 }, + { @"sec", 1 } + }; + public static readonly Dictionary SpecialYearPrefixesMap = new Dictionary + { + { @"skol", @"SY" }, + { @"räkenskaps", @"FY" }, + { @"fiscal", @"FY" }, + { @"school", @"SY" }, + { @"fy", @"FY" }, + { @"sy", @"SY" } + }; + public static readonly Dictionary SeasonMap = new Dictionary + { + { @"vår", @"SP" }, + { @"sommar", @"SU" }, + { @"höst", @"FA" }, + { @"vinter", @"WI" }, + { @"spring", @"SP" }, + { @"summer", @"SU" }, + { @"fall", @"FA" }, + { @"autumn", @"FA" }, + { @"winter", @"WI" } + }; + public static readonly Dictionary SeasonValueMap = new Dictionary + { + { @"SP", 3 }, + { @"SU", 6 }, + { @"FA", 9 }, + { @"WI", 12 } + }; + public static readonly Dictionary CardinalMap = new Dictionary + { + { @"första", 1 }, + { @"förste", 1 }, + { @"andra", 2 }, + { @"andre", 2 }, + { @"tredje", 3 }, + { @"fjärde", 4 }, + { @"femte", 5 }, + { @"sjätte", 6 }, + { @"sjunde", 7 }, + { @"åttonde", 8 }, + { @"nionde", 9 }, + { @"tionde", 10 }, + { @"elfte", 11 }, + { @"tolfte", 12 }, + { @"first", 1 }, + { @"1st", 1 }, + { @"second", 2 }, + { @"2nd", 2 }, + { @"third", 3 }, + { @"3rd", 3 }, + { @"fourth", 4 }, + { @"4th", 4 }, + { @"fifth", 5 }, + { @"5th", 5 }, + { @"sixth", 6 }, + { @"6th", 6 }, + { @"seventh", 7 }, + { @"7th", 7 }, + { @"eighth", 8 }, + { @"8th", 8 }, + { @"ninth", 9 }, + { @"9th", 9 }, + { @"tenth", 10 }, + { @"10th", 10 }, + { @"eleventh", 11 }, + { @"11th", 11 }, + { @"twelfth", 12 }, + { @"12th", 12 } + }; + public static readonly Dictionary DayOfWeek = new Dictionary + { + { @"måndagen", 1 }, + { @"måndag", 1 }, + { @"månd", 1 }, + { @"mån", 1 }, + { @"må", 1 }, + { @"tisdagen", 2 }, + { @"tisdag", 2 }, + { @"tisd", 2 }, + { @"tis", 2 }, + { @"ti", 2 }, + { @"onsdagen", 3 }, + { @"onsdag", 3 }, + { @"onsd", 3 }, + { @"ons", 3 }, + { @"on", 3 }, + { @"torsdagen", 4 }, + { @"torsdag", 4 }, + { @"torsd", 4 }, + { @"tors", 4 }, + { @"tor", 4 }, + { @"to", 4 }, + { @"fredagen", 5 }, + { @"fredag", 5 }, + { @"fred", 5 }, + { @"fre", 5 }, + { @"fr", 5 }, + { @"lördagen", 6 }, + { @"lördag", 6 }, + { @"lörd", 6 }, + { @"lör", 6 }, + { @"lö", 6 }, + { @"söndagen", 0 }, + { @"söndag", 0 }, + { @"sönd", 0 }, + { @"sön", 0 }, + { @"sö", 0 }, + { @"monday", 1 }, + { @"tuesday", 2 }, + { @"wednesday", 3 }, + { @"thursday", 4 }, + { @"friday", 5 }, + { @"saturday", 6 }, + { @"sunday", 0 }, + { @"mon", 1 }, + { @"tue", 2 }, + { @"tues", 2 }, + { @"wed", 3 }, + { @"wedn", 3 }, + { @"weds", 3 }, + { @"thu", 4 }, + { @"thur", 4 }, + { @"thurs", 4 }, + { @"fri", 5 }, + { @"sat", 6 }, + { @"sun", 0 } + }; + public static readonly Dictionary MonthOfYear = new Dictionary + { + { @"januari", 1 }, + { @"februari", 2 }, + { @"mars", 3 }, + { @"april", 4 }, + { @"maj", 5 }, + { @"juni", 6 }, + { @"juli", 7 }, + { @"augusti", 8 }, + { @"september", 9 }, + { @"oktober", 10 }, + { @"november", 11 }, + { @"december", 12 }, + { @"january", 1 }, + { @"february", 2 }, + { @"march", 3 }, + { @"may", 5 }, + { @"june", 6 }, + { @"july", 7 }, + { @"august", 8 }, + { @"october", 10 }, + { @"jan", 1 }, + { @"feb", 2 }, + { @"mar", 3 }, + { @"apr", 4 }, + { @"jun", 6 }, + { @"jul", 7 }, + { @"aug", 8 }, + { @"sep", 9 }, + { @"sept", 9 }, + { @"okt", 10 }, + { @"oct", 10 }, + { @"nov", 11 }, + { @"dec", 12 }, + { @"1", 1 }, + { @"2", 2 }, + { @"3", 3 }, + { @"4", 4 }, + { @"5", 5 }, + { @"6", 6 }, + { @"7", 7 }, + { @"8", 8 }, + { @"9", 9 }, + { @"10", 10 }, + { @"11", 11 }, + { @"12", 12 }, + { @"01", 1 }, + { @"02", 2 }, + { @"03", 3 }, + { @"04", 4 }, + { @"05", 5 }, + { @"06", 6 }, + { @"07", 7 }, + { @"08", 8 }, + { @"09", 9 } + }; + public static readonly Dictionary Numbers = new Dictionary + { + { @"noll", 0 }, + { @"ett", 1 }, + { @"en", 1 }, + { @"två", 2 }, + { @"tre", 3 }, + { @"fyra", 4 }, + { @"fem", 5 }, + { @"sex", 6 }, + { @"sju", 7 }, + { @"åtta", 8 }, + { @"nio", 9 }, + { @"tio", 10 }, + { @"elva", 11 }, + { @"tolv", 12 }, + { @"tretton", 13 }, + { @"fjorton", 14 }, + { @"femton", 15 }, + { @"sexton", 16 }, + { @"sjutton", 17 }, + { @"arton", 18 }, + { @"nitton", 19 }, + { @"tjugo", 20 }, + { @"tjugoett", 21 }, + { @"tjugoen", 21 }, + { @"tjugotvå", 22 }, + { @"tjugofyra", 24 }, + { @"tjugofem", 25 }, + { @"tjugosex", 26 }, + { @"tjugosju", 27 }, + { @"tjugoåtta", 28 }, + { @"tjugonio", 29 }, + { @"trettio", 30 }, + { @"trettioett", 31 }, + { @"trettioen", 31 }, + { @"trettiotvå", 32 }, + { @"trettiotre", 33 }, + { @"trettiofyra", 33 }, + { @"trettiofem", 35 }, + { @"trettiosex", 36 }, + { @"trettiosju", 37 }, + { @"trettioåtta", 38 }, + { @"trettionio", 39 }, + { @"fyrtio", 40 }, + { @"fyrtioett", 41 }, + { @"fyrtioen", 41 }, + { @"fyrtiotvå", 42 }, + { @"fyrtiotre", 43 }, + { @"fyrtiofyra", 44 }, + { @"fyrtiofem", 45 }, + { @"fyrtiosex", 46 }, + { @"fyrtiosju", 47 }, + { @"fyrtioåtta", 48 }, + { @"fyrtionio", 49 }, + { @"femtio", 50 }, + { @"femtioett", 51 }, + { @"femtioen", 51 }, + { @"femtiotvå", 52 }, + { @"femtiotre", 53 }, + { @"femtiofyra", 54 }, + { @"femtiofem", 55 }, + { @"femtiosex", 56 }, + { @"femtiosju", 57 }, + { @"femtioåtta", 58 }, + { @"femtionio", 59 }, + { @"sextio", 60 }, + { @"sextioett", 61 }, + { @"sextioen", 61 }, + { @"sextiotvå", 62 }, + { @"sextiotre", 63 }, + { @"sextiofyra", 64 }, + { @"sextiofem", 65 }, + { @"sextiosex", 66 }, + { @"sextiosju", 67 }, + { @"sextioåtta", 68 }, + { @"sextionio", 69 }, + { @"sjuttio", 70 }, + { @"sjuttioett", 71 }, + { @"sjuttioen", 71 }, + { @"sjuttiotvå", 72 }, + { @"sjuttiotre", 73 }, + { @"sjuttiofyra", 74 }, + { @"sjuttiofem", 75 }, + { @"sjuttiosex", 76 }, + { @"sjuttiosju", 77 }, + { @"sjuttioåtta", 78 }, + { @"sjuttionio", 79 }, + { @"åttio", 80 }, + { @"åttioett", 81 }, + { @"åttioen", 81 }, + { @"åttiotvå", 82 }, + { @"åttiotre", 83 }, + { @"åttiofyra", 84 }, + { @"åttiofem", 85 }, + { @"åttiosex", 86 }, + { @"åttiosju", 87 }, + { @"åttioåtta", 88 }, + { @"åttionio", 89 }, + { @"nittio", 90 }, + { @"nittioett", 91 }, + { @"nittioen", 91 }, + { @"nittiotvå", 92 }, + { @"nittiotre", 93 }, + { @"nittiofyra", 94 }, + { @"nittiofem", 95 }, + { @"nittiosex", 96 }, + { @"nittiosju", 97 }, + { @"nittioåtta", 98 }, + { @"nittionio", 99 }, + { @"etthundra", 100 } + }; + public static readonly Dictionary DayOfMonth = new Dictionary + { + { @"1", 1 }, + { @"2", 2 }, + { @"3", 3 }, + { @"4", 4 }, + { @"5", 5 }, + { @"6", 6 }, + { @"7", 7 }, + { @"8", 8 }, + { @"9", 9 }, + { @"10", 10 }, + { @"11", 11 }, + { @"12", 12 }, + { @"13", 13 }, + { @"14", 14 }, + { @"15", 15 }, + { @"16", 16 }, + { @"17", 17 }, + { @"18", 18 }, + { @"19", 19 }, + { @"20", 20 }, + { @"21", 21 }, + { @"22", 22 }, + { @"23", 23 }, + { @"24", 24 }, + { @"25", 25 }, + { @"26", 26 }, + { @"27", 27 }, + { @"28", 28 }, + { @"29", 29 }, + { @"30", 30 }, + { @"31", 31 }, + { @"01", 1 }, + { @"02", 2 }, + { @"03", 3 }, + { @"04", 4 }, + { @"05", 5 }, + { @"06", 6 }, + { @"07", 7 }, + { @"08", 8 }, + { @"09", 9 }, + { @"1:e", 1 }, + { @"1:a", 1 }, + { @"2:e", 2 }, + { @"2:a", 2 }, + { @"3:e", 3 }, + { @"4:e", 4 }, + { @"5:e", 5 }, + { @"6:e", 6 }, + { @"7:e", 7 }, + { @"8:e", 8 }, + { @"9:e", 9 }, + { @"10:e", 10 }, + { @"11:e", 11 }, + { @"12:e", 12 }, + { @"13:e", 13 }, + { @"14:e", 14 }, + { @"15:e", 15 }, + { @"16:e", 16 }, + { @"17:e", 17 }, + { @"18:e", 18 }, + { @"19:e", 19 }, + { @"20:e", 20 }, + { @"21:e", 21 }, + { @"21:a", 21 }, + { @"22:e", 22 }, + { @"22:a", 22 }, + { @"23:e", 23 }, + { @"24:e", 24 }, + { @"25:e", 25 }, + { @"26:e", 26 }, + { @"27:e", 27 }, + { @"28:e", 28 }, + { @"29:e", 29 }, + { @"30:e", 30 }, + { @"31:e", 31 }, + { @"31:a", 31 }, + { @"01:e", 1 }, + { @"01:a", 1 }, + { @"02:e", 2 }, + { @"02:a", 2 }, + { @"03:e", 3 }, + { @"04:e", 4 }, + { @"05:e", 5 }, + { @"06:e", 6 }, + { @"07:e", 7 }, + { @"08:e", 8 }, + { @"09:e", 9 } + }; + public static readonly Dictionary DoubleNumbers = new Dictionary + { + { @"half", 0.5 }, + { @"quarter", 0.25 } + }; + public static readonly Dictionary> HolidayNames = new Dictionary> + { + { @"easterday", new string[] { @"easterday", @"easter", @"eastersunday" } }, + { @"ashwednesday", new string[] { @"ashwednesday" } }, + { @"palmsunday", new string[] { @"palmsunday" } }, + { @"maundythursday", new string[] { @"maundythursday" } }, + { @"goodfriday", new string[] { @"goodfriday" } }, + { @"eastersaturday", new string[] { @"eastersaturday" } }, + { @"eastermonday", new string[] { @"eastermonday" } }, + { @"ascensionday", new string[] { @"ascensionday" } }, + { @"whitesunday", new string[] { @"whitesunday", @"pentecost", @"pentecostday" } }, + { @"whitemonday", new string[] { @"whitemonday" } }, + { @"trinitysunday", new string[] { @"trinitysunday" } }, + { @"corpuschristi", new string[] { @"corpuschristi" } }, + { @"earthday", new string[] { @"earthday" } }, + { @"fathers", new string[] { @"fatherday", @"fathersday" } }, + { @"mothers", new string[] { @"motherday", @"mothersday" } }, + { @"thanksgiving", new string[] { @"thanksgivingday", @"thanksgiving" } }, + { @"blackfriday", new string[] { @"blackfriday" } }, + { @"cybermonday", new string[] { @"cybermonday" } }, + { @"martinlutherking", new string[] { @"mlkday", @"martinlutherkingday", @"martinlutherkingjrday" } }, + { @"washingtonsbirthday", new string[] { @"washingtonsbirthday", @"washingtonbirthday", @"presidentsday" } }, + { @"canberra", new string[] { @"canberraday" } }, + { @"labour", new string[] { @"labourday", @"laborday" } }, + { @"columbus", new string[] { @"columbusday" } }, + { @"memorial", new string[] { @"memorialday" } }, + { @"yuandan", new string[] { @"yuandan" } }, + { @"maosbirthday", new string[] { @"maosbirthday" } }, + { @"teachersday", new string[] { @"teachersday", @"teacherday" } }, + { @"singleday", new string[] { @"singleday" } }, + { @"allsaintsday", new string[] { @"allsaintsday" } }, + { @"youthday", new string[] { @"youthday" } }, + { @"childrenday", new string[] { @"childrenday", @"childday" } }, + { @"femaleday", new string[] { @"femaleday" } }, + { @"treeplantingday", new string[] { @"treeplantingday" } }, + { @"arborday", new string[] { @"arborday" } }, + { @"girlsday", new string[] { @"girlsday" } }, + { @"whiteloverday", new string[] { @"whiteloverday" } }, + { @"loverday", new string[] { @"loverday", @"loversday" } }, + { @"christmas", new string[] { @"christmasday", @"christmas" } }, + { @"xmas", new string[] { @"xmasday", @"xmas" } }, + { @"newyear", new string[] { @"newyear" } }, + { @"newyearday", new string[] { @"newyearday" } }, + { @"newyearsday", new string[] { @"newyearsday" } }, + { @"inaugurationday", new string[] { @"inaugurationday" } }, + { @"groundhougday", new string[] { @"groundhougday" } }, + { @"valentinesday", new string[] { @"valentinesday" } }, + { @"stpatrickday", new string[] { @"stpatrickday", @"stpatricksday", @"stpatrick" } }, + { @"aprilfools", new string[] { @"aprilfools" } }, + { @"stgeorgeday", new string[] { @"stgeorgeday" } }, + { @"mayday", new string[] { @"mayday", @"intlworkersday", @"internationalworkersday", @"workersday" } }, + { @"cincodemayoday", new string[] { @"cincodemayoday" } }, + { @"baptisteday", new string[] { @"baptisteday" } }, + { @"usindependenceday", new string[] { @"usindependenceday" } }, + { @"independenceday", new string[] { @"independenceday" } }, + { @"bastilleday", new string[] { @"bastilleday" } }, + { @"halloweenday", new string[] { @"halloweenday", @"halloween" } }, + { @"allhallowday", new string[] { @"allhallowday" } }, + { @"allsoulsday", new string[] { @"allsoulsday" } }, + { @"guyfawkesday", new string[] { @"guyfawkesday" } }, + { @"veteransday", new string[] { @"veteransday" } }, + { @"christmaseve", new string[] { @"christmaseve" } }, + { @"newyeareve", new string[] { @"newyearseve", @"newyeareve" } }, + { @"juneteenth", new string[] { @"juneteenth", @"freedomday", @"jubileeday" } }, + { @"ramadan", new string[] { @"ramadan", @"ramazan", @"ramzan", @"ramadhan", @"ramathan" } }, + { @"sacrifice", new string[] { @"eidaladha", @"eidalazha", @"eidulazha", @"feastofthesacrifice" } }, + { @"islamicnewyear", new string[] { @"islamicnewyear", @"hijrinewyear", @"arabicnewyear" } }, + { @"eidalfitr", new string[] { @"eidalfitr", @"festivalofbreakingthefast" } } + }; + public static readonly Dictionary WrittenDecades = new Dictionary + { + { @"hundratalet", 0 }, + { @"10-talet", 10 }, + { @"10-tal", 10 }, + { @"tiotalet", 10 }, + { @"tiotal", 10 }, + { @"20-talet", 20 }, + { @"20-tal", 20 }, + { @"tjugotalet", 20 }, + { @"tjugotal", 20 }, + { @"30-talet", 30 }, + { @"30-tal", 30 }, + { @"trettiotalet", 30 }, + { @"trettiotal", 30 }, + { @"40-talet", 40 }, + { @"40-tal", 40 }, + { @"fyrtiotalet", 40 }, + { @"fyrtiotal", 40 }, + { @"50-talet", 50 }, + { @"50-tal", 50 }, + { @"femtiotalet", 50 }, + { @"femtiotal", 50 }, + { @"60-talet", 60 }, + { @"60-tal", 60 }, + { @"sextiotalet", 60 }, + { @"sextiotal", 60 }, + { @"70-talet", 70 }, + { @"70-tal", 70 }, + { @"sjuttiotalet", 70 }, + { @"sjuttiotal", 70 }, + { @"80-talet", 80 }, + { @"80-tal", 80 }, + { @"åttiotalet", 80 }, + { @"åttiotal", 80 }, + { @"90-talet", 90 }, + { @"90-tal", 90 }, + { @"nittiotalet", 90 }, + { @"nittiotal", 90 } + }; + public static readonly Dictionary SpecialDecadeCases = new Dictionary + { + { @"00-talet", 2000 }, + { @"2000-talet", 2000 }, + { @"2000-tal", 2000 }, + { @"tvåtusentalet", 2000 }, + { @"tvåtusental", 2000 }, + { @"tjugohundratalet", 2000 }, + { @"tjugohundratal", 2000 } + }; + public const string DefaultLanguageFallback = @"MDY"; + public static readonly IList SuperfluousWordList = new List + { + @"preferably", + @"how about", + @"maybe", + @"perhaps", + @"say", + @"like" + }; + public static readonly string[] DurationDateRestrictions = { @"today", @"now", @"current date" }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"^\d{4}$", @"(\d\.\d{4}|\d{4}\.\d)" }, + { @"^(morning|afternoon|evening|night|day)\b", @"\b(good\s+(morning|afternoon|evening|night|day))|(nighty\s+night)\b" }, + { @"\bnow\b", @"\b(^now,)|\b((is|are)\s+now\s+for|for\s+now)\b" }, + { @"\bmay$", @"\b((((!|\.|\?|,|;|)\s+|^)may i)|(i|you|he|she|we|they)\s+may|(may\s+((((also|not|(also not)|well)\s+)?(be|ask|contain|constitute|e-?mail|take|have|result|involve|get|work|reply|differ))|(or may not)))|(? AmbiguityTimeFiltersDict = new Dictionary + { + { @"^(\p{L}+|\d{1,2})(\s+(morning|afternoon|evening|night))?$", @"\b(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|\d{1,2})\s+(morning|afternoon|evening|night)\b" } + }; + public static readonly Dictionary AmbiguityDurationFiltersDict = new Dictionary + { + { @"night$", @"\bnight(\s*|-)(club|light|market|shift|work(er)?)s?\b" } + }; + public static readonly IList MorningTermList = new List + { + @"morgon", + @"morgonen", + @"morning" + }; + public static readonly IList AfternoonTermList = new List + { + @"afternoon", + @"eftermiddag", + @"eftermiddagen" + }; + public static readonly IList EveningTermList = new List + { + @"evening", + @"kväll", + @"kvällen" + }; + public static readonly IList MealtimeBreakfastTermList = new List + { + @"breakfast", + @"frukost", + @"frukosten" + }; + public static readonly IList MealtimeBrunchTermList = new List + { + @"brunch", + @"brunchen" + }; + public static readonly IList MealtimeLunchTermList = new List + { + @"lunch", + @"lunchtime", + @"lunchen" + }; + public static readonly IList MealtimeDinnerTermList = new List + { + @"dinner", + @"dinnertime", + @"supper", + @"kvällsmat", + @"kvällsmaten" + }; + public static readonly IList DaytimeTermList = new List + { + @"daytime", + @"dagtid" + }; + public static readonly IList NightTermList = new List + { + @"night", + @"natt", + @"natten" + }; + public static readonly IList NighttimeTermList = new List + { + @"nighttime", + @"night-time" + }; + public static readonly IList SameDayTerms = new List + { + @"today", + @"current date", + @"otd", + @"idag", + @"i dag" + }; + public static readonly IList PlusOneDayTerms = new List + { + @"tomorrow", + @"tmr", + @"day after", + @"imorgon", + @"i morgon" + }; + public static readonly IList MinusOneDayTerms = new List + { + @"yesterday", + @"day before", + @"igår", + @"i går", + @"dagen innan", + @"dagen före", + @"dag innan", + @"dag före" + }; + public static readonly IList PlusTwoDayTerms = new List + { + @"day after tomorrow", + @"day after tmr", + @"övermorgon", + @"i övermorgon" + }; + public static readonly IList MinusTwoDayTerms = new List + { + @"day before yesterday", + @"förrgår", + @"i förrgår" + }; + public static readonly IList FutureTerms = new List + { + @"this", + @"next", + @"kommande", + @"denna", + @"nästa" + }; + public static readonly IList LastCardinalTerms = new List + { + @"last", + @"förra", + @"senaste" + }; + public static readonly IList MonthTerms = new List + { + @"month", + @"månad" + }; + public static readonly IList MonthToDateTerms = new List + { + @"month to date" + }; + public static readonly IList WeekendTerms = new List + { + @"weekend", + @"helg", + @"veckoslut" + }; + public static readonly IList WeekTerms = new List + { + @"week", + @"vecka" + }; + public static readonly IList FortnightTerms = new List + { + @"fortnight", + @"fourtenight", + @"fjorton dagar" + }; + public static readonly IList YearTerms = new List + { + @"year", + @"år" + }; + public static readonly IList GenericYearTerms = new List + { + @"y" + }; + public static readonly IList YearToDateTerms = new List + { + @"year to date" + }; + public const string DoubleMultiplierRegex = @"^(bi)(-|\s)?"; + public const string HalfMultiplierRegex = @"^(semi)(-|\s)?"; + public const string DayTypeRegex = @"((week)?da(il)?ys?)$"; + public const string WeekTypeRegex = @"(week(s|ly)?)$"; + public const string WeekendTypeRegex = @"(weekends?)$"; + public const string MonthTypeRegex = @"(month(s|ly)?)$"; + public const string QuarterTypeRegex = @"(quarter(s|ly)?)$"; + public const string YearTypeRegex = @"((years?|annual)(ly)?)$"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/DateTimeDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/DateTimeDefinitions.tt new file mode 100644 index 0000000000..6639e7e7b1 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/DateTimeDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Swedish\Swedish-DateTime.yaml"; + this.Language = "Swedish"; + this.ClassName = "DateTimeDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/NumbersDefinitions.cs new file mode 100644 index 0000000000..4bcc36a6a9 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/NumbersDefinitions.cs @@ -0,0 +1,565 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Swedish\Swedish-Numbers.yaml +// - Language: Swedish +// - ClassName: NumbersDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Swedish +{ + using System; + using System.Collections.Generic; + + public static class NumbersDefinitions + { + public const string LangMarker = @"Swe"; + public const bool CompoundNumberLanguage = true; + public const bool MultiDecimalSeparatorCulture = true; + public const string RoundNumberIntegerRegex = @"(?#RoundNumberIntegerRegex)(hundra|tusen|miljon(er)?|miljard(er)?|biljon(er)?|biljard(er)?|triljon(er)?)"; + public const string ZeroToNineIntegerRegex = @"(?#ZeroToNineIntegerRegex)(tre|sju|åtta|fyra|fem|noll|nio|ett|en|två|sex)"; + public const string TwoToNineIntegerRegex = @"(?#TwoToNineIntegerRegex)(tre|sju|åtta|fyra|fem|nio|två|sex)"; + public const string NegativeNumberTermsRegex = @"(?#NegativeNumberTermsRegex)(?((minus|negativ(t)?)\s+))"; + public static readonly string NegativeNumberSignRegex = $@"(?#NegativeNumberSignRegex)^({NegativeNumberTermsRegex}).*"; + public const string AnIntRegex = @"(?#AnIntRegex)(e(n|tt))(?=\s)"; + public const string TenToNineteenIntegerRegex = @"(?#TenToNineteenIntegerRegex)(sjutton|tretton|fjorton|arton|nitton|femton|sexton|elva|tolv|tio)"; + public const string TensNumberIntegerRegex = @"(?#TensNumberIntegerRegex)(sjuttio|tjugo|trettio|åttio|nittio|fyrtio|femtio|sextio)"; + public static readonly string SeparaIntRegex = $@"(?#SeparaIntRegex)((({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}(\s+(och\s+)?|\s*-\s*)?{ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex})(\s*{RoundNumberIntegerRegex})*))|(({AnIntRegex}(\s*{RoundNumberIntegerRegex})+))"; + public static readonly string AllIntRegex = $@"(?#AllIntRegex)(((({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}(\s+(och\s+)?|\s*-\s*)?{ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|({ZeroToNineIntegerRegex}|{AnIntRegex}))?(\s*{RoundNumberIntegerRegex})))*{SeparaIntRegex})"; + public const string PlaceHolderPureNumber = @"(?#PlaceHolderPureNumber)\b"; + public const string PlaceHolderDefault = @"(?#PlaceHolderDefault)\D|\b"; + public static readonly Func NumbersWithPlaceHolder = (placeholder) => $@"(?#NumbersWithPlaceHolder)(((?(\bnäst(a|e)|\bföregående|\bnäst\s+sist(a|e)|\bsist(a|e)|\bnuvarande|\b(före|efter)\s+nuvarande|\bförr(a|e)|\btredje\s+från\s+slutet|\bsenaste|\btidigare|\bföre\s+den\s+sist(a|e)|\b(innan|efter|före)\s+sist(a|e)))"; + public static readonly string SuffixBasicOrdinalRegex = $@"(?#SuffixBasicOrdinalRegex)((((({TensNumberIntegerRegex}(\s+(och\s+)?|\s*-?\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}?|{AnIntRegex})(\s*{RoundNumberIntegerRegex})+)\s*(och\s+)?)*({TensNumberIntegerRegex}(\s+|\s*-?\s*))?{NumberOrdinalRegex})"; + public static readonly string SuffixRoundNumberOrdinalRegex = $@"(?#SuffixRoundNumberOrdinalRegex)(({AllIntRegex}\s*){RoundNumberOrdinalRegex})"; + public static readonly string AllOrdinalNumberRegex = $@"({SuffixRoundNumberOrdinalRegex}|{SuffixBasicOrdinalRegex})"; + public static readonly string AllOrdinalRegex = $@"(?#AllOrdinalRegex)({RelativeOrdinalRegex}|{AllOrdinalNumberRegex})"; + public const string OrdinalSuffixRegex = @"(?#OrdinalSuffixRegex)(?<=\b)(?:(\d*(1:(e|a)|2:(a|e)|3:e|4:e|5:e|6:e|7:e|8:e|9:e|0:e))|(11:e|12:e))(?=\b)"; + public const string OrdinalNumericRegex = @"(?#OrdinalNumericRegex)(?<=\b)(?:\d{1,3}(\s*,\s*\d{3})*\s*(:(e|a)))(?=\b)"; + public static readonly string OrdinalRoundNumberRegex = $@"(?#OrdinalRoundNumberRegex)(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?![\.,]))(?=\b)"; + public static readonly string FractionPrepositionWithinPercentModeRegex = $@"(?#FractionPrepositionWithinPercentModeRegex)(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?![\.,]))(?=\b)"; + public static readonly string AllPointRegex = $@"(?#AllPointRegex)((\s+{ZeroToNineIntegerRegex})+|(\s+{SeparaIntRegex}))"; + public static readonly string AllFloatRegex = $@"(?#AllFloatRegex){AllIntRegex}(\s+komma){AllPointRegex}"; + public static readonly string DoubleWithMultiplierRegex = $@"(?#DoubleWithMultiplierRegex)(((? DoubleDecimalPointRegex = (placeholder) => $@"(?#DoubleDecimalPointRegex)(((? DoubleWithoutIntegralRegex = (placeholder) => $@"(?#DoubleWithoutIntegralRegex)(?<=\s|^)(?och)"; + public static readonly string NumberWithSuffixPercentage = $@"(?#NumberWithSuffixPercentage)(?)"; + public const string LessRegex = @"(?:(less|lower|smaller|fewer)(\s+than)?|below|under|(?|=)<)"; + public const string EqualRegex = @"(equal(s|ing)?(\s+(to|than))?|(?)=)"; + public static readonly string MoreOrEqualPrefix = $@"((no\s+{LessRegex})|(at\s+least))"; + public static readonly string MoreOrEqual = $@"(?:({MoreRegex}\s+(or)?\s+{EqualRegex})|({EqualRegex}\s+(or)?\s+{MoreRegex})|{MoreOrEqualPrefix}(\s+(or)?\s+{EqualRegex})?|({EqualRegex}\s+(or)?\s+)?{MoreOrEqualPrefix}|>\s*=)"; + public const string MoreOrEqualSuffix = @"((and|or)\s+(((more|greater|higher|larger|bigger)((?!\s+than)|(\s+than(?!(\s*\d+)))))|((over|above)(?!\s+than))))"; + public static readonly string LessOrEqualPrefix = $@"((no\s+{MoreRegex})|(at\s+most)|(up\s+to))"; + public static readonly string LessOrEqual = $@"(({LessRegex}\s+(or)?\s+{EqualRegex})|({EqualRegex}\s+(or)?\s+{LessRegex})|{LessOrEqualPrefix}(\s+(or)?\s+{EqualRegex})?|({EqualRegex}\s+(or)?\s+)?{LessOrEqualPrefix}|<\s*=)"; + public const string LessOrEqualSuffix = @"((and|or)\s+(less|lower|smaller|fewer)((?!\s+than)|(\s+than(?!(\s*\d+)))))"; + public const string NumberSplitMark = @"(?![,.](?!\d+))"; + public const string MoreRegexNoNumberSucceed = @"((bigger|greater|more|higher|larger)((?!\s+than)|\s+(than(?!(\s*\d+))))|(above|over)(?!(\s*\d+)))"; + public const string LessRegexNoNumberSucceed = @"((less|lower|smaller|fewer)((?!\s+than)|\s+(than(?!(\s*\d+))))|(below|under)(?!(\s*\d+)))"; + public const string EqualRegexNoNumberSucceed = @"(equal(s|ing)?((?!\s+(to|than))|(\s+(to|than)(?!(\s*\d+)))))"; + public static readonly string OneNumberRangeMoreRegex1 = $@"({MoreOrEqual}|{MoreRegex})\s*(the\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeMoreRegex2 = $@"(?({NumberSplitMark}.)+)\s*{MoreOrEqualSuffix}"; + public static readonly string OneNumberRangeMoreSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+or\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\s+(?({NumberSplitMark}.)+)(\s+or\s+){EqualRegexNoNumberSucceed})"; + public static readonly string OneNumberRangeLessRegex1 = $@"({LessOrEqual}|{LessRegex})\s*(the\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string OneNumberRangeLessRegex2 = $@"(?({NumberSplitMark}.)+)\s*{LessOrEqualSuffix}"; + public static readonly string OneNumberRangeLessSeparateRegex = $@"({EqualRegex}\s+(?({NumberSplitMark}.)+)(\s+or\s+){LessRegexNoNumberSucceed})|({LessRegex}\s+(?({NumberSplitMark}.)+)(\s+or\s+){EqualRegexNoNumberSucceed})"; + public static readonly string OneNumberRangeEqualRegex = $@"{EqualRegex}\s*(the\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string TwoNumberRangeRegex1 = $@"between\s*(the\s+)?(?({NumberSplitMark}.)+)\s*and\s*(the\s+)?(?({NumberSplitMark}.)+)"; + public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\s*(and|but|,)\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"; + public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\s*(and|but|,)\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})"; + public static readonly string TwoNumberRangeRegex4 = $@"(from\s+)?(?({NumberSplitMark}(?!\bfrom\b).)+)\s*{TillRegex}\s*(the\s+)?(?({NumberSplitMark}.)+)"; + public const string AmbiguousFractionConnectorsRegex = @"^[.]"; + public const char DecimalSeparatorChar = ','; + public const string FractionMarkerToken = @"av"; + public const char NonDecimalSeparatorChar = '.'; + public const string HalfADozenText = @"sex"; + public const string HalfATjogText = @"tio"; + public const string WordSeparatorToken = @"och"; + public static readonly string[] WrittenDecimalSeparatorTexts = { @"komma" }; + public static readonly string[] WrittenGroupSeparatorTexts = { @"punkt" }; + public static readonly string[] WrittenIntegerSeparatorTexts = { @"och" }; + public static readonly string[] WrittenFractionSeparatorTexts = { @"och" }; + public const string HalfADozenRegex = @"ett\s+halvt\s+dussin"; + public static readonly string DigitalNumberRegex = $@"((?<=\b)(hundra|tusen|miljon|miljoner|miljard|miljarder|biljon|biljoner|triljon|triljoner|biljard|biljarder|dussin|tjog)(?=\b))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))"; + public static readonly Dictionary CardinalNumberMap = new Dictionary + { + { @"ingen", 0 }, + { @"inga", 0 }, + { @"noll", 0 }, + { @"en", 1 }, + { @"ett", 1 }, + { @"två", 2 }, + { @"tre", 3 }, + { @"fyra", 4 }, + { @"fem", 5 }, + { @"sex", 6 }, + { @"sju", 7 }, + { @"åtta", 8 }, + { @"nio", 9 }, + { @"tio", 10 }, + { @"elva", 11 }, + { @"tolv", 12 }, + { @"dussin", 12 }, + { @"dussintal", 12 }, + { @"dussintals", 12 }, + { @"tretton", 13 }, + { @"fjorton", 14 }, + { @"femton", 15 }, + { @"sexton", 16 }, + { @"sjutton", 17 }, + { @"arton", 18 }, + { @"nitton", 19 }, + { @"tjugo", 20 }, + { @"tjog", 20 }, + { @"tjogvis", 20 }, + { @"trettio", 30 }, + { @"tretti", 30 }, + { @"fyrtio", 40 }, + { @"femtio", 50 }, + { @"sextio", 60 }, + { @"sjuttio", 70 }, + { @"åttio", 80 }, + { @"nittio", 90 }, + { @"hundra", 100 }, + { @"tusen", 1000 }, + { @"miljon", 1000000 }, + { @"miljoner", 1000000 }, + { @"miljard", 1000000000 }, + { @"miljarder", 1000000000 }, + { @"biljon", 1000000000000 }, + { @"biljoner", 1000000000000 }, + { @"biljard", 1000000000000000 }, + { @"biljarder", 1000000000000000 }, + { @"triljon", 1000000000000000000 }, + { @"triljoner", 1000000000000000000 } + }; + public static readonly Dictionary OrdinalNumberMap = new Dictionary + { + { @"första", 1 }, + { @"förste", 1 }, + { @"etta", 1 }, + { @"ettan", 1 }, + { @"andra", 2 }, + { @"andre", 2 }, + { @"sekundära", 2 }, + { @"sekundäre", 2 }, + { @"tvåa", 2 }, + { @"tvåan", 2 }, + { @"halva", 2 }, + { @"halvan", 2 }, + { @"halvt", 2 }, + { @"halv", 2 }, + { @"halvor", 2 }, + { @"hälft", 2 }, + { @"hälften", 2 }, + { @"tredje", 3 }, + { @"tertiära", 3 }, + { @"tertiäre", 3 }, + { @"fjärde", 4 }, + { @"kvart", 4 }, + { @"kvarten", 4 }, + { @"kvarts", 4 }, + { @"femte", 5 }, + { @"sjätte", 6 }, + { @"sjunde", 7 }, + { @"åttonde", 8 }, + { @"nionde", 9 }, + { @"tionde", 10 }, + { @"elfte", 11 }, + { @"tolfte", 12 }, + { @"trettonde", 13 }, + { @"fjortonde", 14 }, + { @"femtonde", 15 }, + { @"sextonde", 16 }, + { @"sjuttonde", 17 }, + { @"artonde", 18 }, + { @"nittonde", 19 }, + { @"tjugonde", 20 }, + { @"trettionde", 30 }, + { @"fyrtionde", 40 }, + { @"femtionde", 50 }, + { @"sextionde", 60 }, + { @"sjuttionde", 70 }, + { @"åttionde", 80 }, + { @"nittionde", 90 }, + { @"hundrade", 100 }, + { @"tusende", 1000 }, + { @"miljonte", 1000000 }, + { @"miljardte", 1000000000 }, + { @"biljonte", 1000000000000 }, + { @"biljardte", 1000000000000000 }, + { @"triljonte", 1000000000000000000 } + }; + public static readonly Dictionary RoundNumberMap = new Dictionary + { + { @"hundra", 100 }, + { @"tusen", 1000 }, + { @"miljon", 1000000 }, + { @"milj", 1000000 }, + { @"miljoner", 1000000 }, + { @"miljard", 1000000000 }, + { @"miljarder", 1000000000 }, + { @"biljon", 1000000000000 }, + { @"biljoner", 1000000000000 }, + { @"biljard", 1000000000000000 }, + { @"bijarder", 1000000000000000 }, + { @"triljon", 1000000000000000000 }, + { @"triljoner", 1000000000000000000 }, + { @"hundrade", 100 }, + { @"tusende", 1000 }, + { @"miljonte", 1000000 }, + { @"miljardte", 1000000000 }, + { @"biljonte", 1000000000000 }, + { @"biljardte", 1000000000000000 }, + { @"triljonte", 1000000000000000000 }, + { @"hundratals", 100 }, + { @"tusentals", 1000 }, + { @"miljontals", 1000000 }, + { @"miljardtals", 1000000000 }, + { @"biljontals", 1000000000000 }, + { @"biljardtals", 1000000000000000 }, + { @"triljontals", 1000000000000000000 }, + { @"dussin", 12 }, + { @"tjog", 20 }, + { @"dussintals", 12 }, + { @"k", 1000 }, + { @"m", 1000000 }, + { @"g", 1000000000 }, + { @"b", 1000000000 }, + { @"t", 1000000000000 } + }; + public static readonly Dictionary SwedishWrittenFractionLookupMap = new Dictionary + { + { @"tjugoförst", 21 }, + { @"tjugoandr", 22 }, + { @"tjugotred", 23 }, + { @"tjugofjärd", 24 }, + { @"tjugofemt", 25 }, + { @"tjugosjätted", 26 }, + { @"tjugosjund", 27 }, + { @"tjugoåttond", 28 }, + { @"tjugoniond", 29 }, + { @"trettioförst", 31 }, + { @"trettoiandr", 32 }, + { @"trettiotred", 33 }, + { @"trettiofjärd", 34 }, + { @"trettiofemt", 35 }, + { @"trettiosjätted", 36 }, + { @"trettiosjund", 37 }, + { @"trettioåttond", 38 }, + { @"trettioniond", 39 }, + { @"fyrtioförst", 41 }, + { @"fyrtioandr", 42 }, + { @"fyrtiotred", 43 }, + { @"fyrtiofjärd", 44 }, + { @"fyrtiofemt", 45 }, + { @"fyrtiosjätted", 46 }, + { @"fyrtiosjund", 47 }, + { @"fyrtioåttond", 48 }, + { @"fyrtioniond", 49 }, + { @"femtioförst", 51 }, + { @"femtioandr", 52 }, + { @"femtiotred", 53 }, + { @"femtiofjärd", 54 }, + { @"femtiofemt", 55 }, + { @"femtiosjätted", 56 }, + { @"femtiosjund", 57 }, + { @"femtioåttond", 58 }, + { @"femtioniond", 59 }, + { @"sextioförst", 61 }, + { @"sextioandr", 62 }, + { @"sextiotred", 63 }, + { @"sextiofjärd", 64 }, + { @"sextiofemt", 65 }, + { @"sextiosjätted", 66 }, + { @"sextiosjund", 67 }, + { @"sextioåttond", 68 }, + { @"sextioniond", 69 }, + { @"sjuttioförst", 71 }, + { @"sjuttioandr", 72 }, + { @"sjuttiotred", 73 }, + { @"sjuttiofjärd", 74 }, + { @"sjuttiofemt", 75 }, + { @"sjuttiosjätted", 76 }, + { @"sjuttiosjund", 77 }, + { @"sjuttioåttond", 78 }, + { @"sjuttioniond", 79 }, + { @"åttioförst", 81 }, + { @"åttioandr", 82 }, + { @"åttiotred", 83 }, + { @"åttiofjärd", 84 }, + { @"åttiofemt", 85 }, + { @"åttiosjätted", 86 }, + { @"åttiosjund", 87 }, + { @"åttioåttond", 88 }, + { @"åttioniond", 89 }, + { @"nittioförst", 91 }, + { @"nittioandr", 92 }, + { @"nittiotred", 93 }, + { @"nittiofjärd", 94 }, + { @"nittiofemt", 95 }, + { @"nittiosjätted", 96 }, + { @"nittiosjund", 97 }, + { @"nittioåttond", 98 }, + { @"nittioniond", 99 }, + { @"förstadelar", 1 }, + { @"förstedelar", 1 }, + { @"förstedel", 1 }, + { @"förstadel", 1 }, + { @"andradelar", 2 }, + { @"andredelar", 2 }, + { @"tredjedelar", 3 }, + { @"tredjedel", 3 }, + { @"tredjedels", 3 }, + { @"fjärdedelar", 4 }, + { @"fjärdedel", 4 }, + { @"fjärdedels", 4 }, + { @"kvartar", 4 }, + { @"kvart", 4 }, + { @"kvarts", 4 }, + { @"femtedelar", 5 }, + { @"femtedel", 5 }, + { @"femtedels", 5 }, + { @"sjättedelar", 6 }, + { @"sjättedel", 6 }, + { @"sjättedels", 6 }, + { @"sjundedelar", 7 }, + { @"sjundedel", 7 }, + { @"sjundedels", 7 }, + { @"åttondelar", 8 }, + { @"åttondedelar", 8 }, + { @"åttondel", 8 }, + { @"åttondedel", 8 }, + { @"åttondels", 8 }, + { @"åttondedels", 8 }, + { @"niondelar", 9 }, + { @"niondedelar", 9 }, + { @"niondel", 9 }, + { @"niondedel", 9 }, + { @"niondels", 9 }, + { @"niondedels", 9 }, + { @"tiondelar", 10 }, + { @"tiondedelar", 10 }, + { @"tiondel", 10 }, + { @"tiondedel", 10 }, + { @"tiondels", 10 }, + { @"tiondedels", 10 }, + { @"elftedelar", 11 }, + { @"elftedel", 11 }, + { @"elftedels", 11 }, + { @"tolftedelar", 12 }, + { @"tolftedel", 12 }, + { @"tolftedels", 12 }, + { @"trettondelar", 13 }, + { @"trettondedelar", 13 }, + { @"trettondel", 13 }, + { @"trettondedel", 13 }, + { @"trettondels", 13 }, + { @"trettondedels", 13 }, + { @"fjortondelar", 14 }, + { @"fjortondedelar", 14 }, + { @"fjortondel", 14 }, + { @"fjortondedel", 14 }, + { @"fjortondels", 14 }, + { @"fjortondedels", 14 }, + { @"femtondelar", 15 }, + { @"femtondedelar", 15 }, + { @"femtondel", 15 }, + { @"femtondedel", 15 }, + { @"femtondels", 15 }, + { @"femtondedels", 15 }, + { @"sextondelar", 16 }, + { @"sextondedelar", 16 }, + { @"sextondel", 16 }, + { @"sextondedel", 16 }, + { @"sextondels", 16 }, + { @"sextondedels", 16 }, + { @"sjuttondelar", 17 }, + { @"sjuttondedelar", 17 }, + { @"sjuttondel", 17 }, + { @"sjuttondedel", 17 }, + { @"sjuttondels", 17 }, + { @"sjuttondedels", 17 }, + { @"artondelar", 18 }, + { @"artondedelar", 18 }, + { @"artondel", 18 }, + { @"artondedel", 18 }, + { @"artondels", 18 }, + { @"artondedels", 18 }, + { @"nittondelar", 19 }, + { @"nittondedelar", 19 }, + { @"nittondel", 19 }, + { @"nittondedel", 19 }, + { @"nittondels", 19 }, + { @"nittondedels", 19 }, + { @"tjugondelar", 20 }, + { @"tjugondedelar", 20 }, + { @"tjugondel", 20 }, + { @"tjugondedel", 20 }, + { @"tjugondels", 20 }, + { @"tjugondedels", 20 }, + { @"trettiondelar", 30 }, + { @"trettiondedelar", 30 }, + { @"trettiondel", 30 }, + { @"trettiondedel", 30 }, + { @"trettiondels", 30 }, + { @"trettiondedels", 30 }, + { @"fyrtiondelar", 40 }, + { @"fyrtiondedelar", 40 }, + { @"fyrtiondel", 40 }, + { @"fyrtiondedel", 40 }, + { @"fyrtiondels", 40 }, + { @"fyrtiondedels", 40 }, + { @"femtiondelar", 50 }, + { @"femtiondedelar", 50 }, + { @"femtiondel", 50 }, + { @"femtiondedel", 50 }, + { @"femtiondels", 50 }, + { @"femtiondedels", 50 }, + { @"sextiondelar", 60 }, + { @"sextiondedelar", 60 }, + { @"sextiondedels", 60 }, + { @"sextiondels", 60 }, + { @"sextiondel", 60 }, + { @"sextiondedel", 60 }, + { @"sjuttiondelar", 70 }, + { @"sjuttiondedelar", 70 }, + { @"sjuttiondel", 70 }, + { @"sjuttiondedel", 70 }, + { @"sjuttiondels", 70 }, + { @"sjuttiondedels", 70 }, + { @"åttiondelar", 80 }, + { @"åttiondedelar", 80 }, + { @"åttiondel", 80 }, + { @"åttiondedel", 80 }, + { @"åttiondels", 80 }, + { @"åttiondedels", 80 }, + { @"nittiondelar", 90 }, + { @"nittiondedelar", 90 }, + { @"nittiondel", 90 }, + { @"nittiondedel", 90 }, + { @"nittiondels", 90 }, + { @"nittiondedels", 90 }, + { @"hundradelar", 100 }, + { @"hundradedelar", 100 }, + { @"hundradel", 100 }, + { @"hundradedel", 100 }, + { @"hundradels", 100 }, + { @"hundradedels", 100 }, + { @"tusendelar", 1000 }, + { @"tusendedelar", 1000 }, + { @"tusendel", 1000 }, + { @"tusendedel", 1000 }, + { @"tusendels", 1000 }, + { @"tusendedels", 1000 }, + { @"miljondelar", 1000000 }, + { @"miljontedelar", 1000000 }, + { @"miljondel", 1000000 }, + { @"miljontedel", 1000000 }, + { @"miljontedels", 1000000 }, + { @"miljondels", 1000000 }, + { @"miljarddelar", 1000000000 }, + { @"miljarddel", 1000000000 }, + { @"miljarddels", 1000000000 }, + { @"biljondelar", 1000000000000 }, + { @"biljondel", 1000000000000 }, + { @"biljontedel", 1000000000000 }, + { @"biljondels", 1000000000000 }, + { @"biljarddelar", 1000000000000000 }, + { @"biljarddel", 1000000000000000 }, + { @"biljarddels", 1000000000000000 }, + { @"triljondelar", 1000000000000000000 }, + { @"triljontedelar", 1000000000000000000 }, + { @"triljontedels", 1000000000000000000 }, + { @"triljondels", 1000000000000000000 }, + { @"triljondel", 1000000000000000000 } + }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"\ben\b", @"\b(en)\s+(en)\b" }, + { @"m", @"\dm\b" } + }; + public static readonly Dictionary RelativeReferenceOffsetMap = new Dictionary + { + { @"sista", @"0" }, + { @"siste", @"0" }, + { @"senaste", @"0" }, + { @"nästa", @"1" }, + { @"näste", @"1" }, + { @"efter nuvarande", @"1" }, + { @"nuvarande", @"0" }, + { @"föregående", @"-1" }, + { @"före nuvarande", @"-1" }, + { @"förra", @"-1" }, + { @"tidigare", @"-1" }, + { @"näst sista", @"-1" }, + { @"näst siste", @"-1" }, + { @"före den sista", @"-1" }, + { @"före den siste", @"-1" }, + { @"före sista", @"-1" }, + { @"före siste", @"-1" }, + { @"innan siste", @"-1" }, + { @"innan sista", @"-1" }, + { @"efter sista", @"-1" }, + { @"efter siste", @"-1" }, + { @"tredje från slutet", @"-2" } + }; + public static readonly Dictionary RelativeReferenceRelativeToMap = new Dictionary + { + { @"sista", @"end" }, + { @"siste", @"end" }, + { @"senaste", @"end" }, + { @"nästa", @"current" }, + { @"näste", @"current" }, + { @"efter nuvarande", @"current" }, + { @"nuvarande", @"current" }, + { @"föregående", @"current" }, + { @"före nuvarande", @"current" }, + { @"förra", @"current" }, + { @"tidigare", @"current" }, + { @"näst sista", @"end" }, + { @"näst siste", @"end" }, + { @"före den sista", @"end" }, + { @"före den siste", @"end" }, + { @"före siste", @"end" }, + { @"före sista", @"end" }, + { @"innan siste", @"end" }, + { @"innan sista", @"end" }, + { @"efter sista", @"end" }, + { @"efter siste", @"end" }, + { @"tredje från slutet", @"end" } + }; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/NumbersDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/NumbersDefinitions.tt new file mode 100644 index 0000000000..001c0c8ece --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/NumbersDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Swedish\Swedish-Numbers.yaml"; + this.Language = "Swedish"; + this.ClassName = "NumbersDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/NumbersWithUnitDefinitions.cs new file mode 100644 index 0000000000..9ff13d7cde --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/NumbersWithUnitDefinitions.cs @@ -0,0 +1,803 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Swedish\Swedish-NumbersWithUnit.yaml +// - Language: Swedish +// - ClassName: NumbersWithUnitDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Swedish +{ + using System; + using System.Collections.Generic; + + public static class NumbersWithUnitDefinitions + { + public static readonly Dictionary AgeSuffixList = new Dictionary + { + { @"Year", @"år gammal|år gammalt|år gamla|års ålder|-årigt|-årig|-åringen|-åring|-års|års|år" }, + { @"Month", @"månader gammal|månader gammalt|månad gammalt|månader gammal|månad gammal|månader gamla|månad gamla|månads ålder|månaders ålder|-månads|månad|-månaders|månaders|månader" }, + { @"Week", @"veckor gammal|vecka gammal|veckor gammalt|vecka gammalt|veckor gamla|vecka gamla|veckas ålder|veckors ålder|vecka|-veckors|veckors|veckor" }, + { @"Day", @"dagar gammal|dagar gammalt|dag gammal|dag gammalt|dagar gamla|dag gamla|dags ålder|dagars ålder|dagar|-dagars|dagars|dagsgammalt|dagsgammal|dag" } + }; + public static readonly Dictionary AreaSuffixList = new Dictionary + { + { @"Square kilometer", @"kvadratkilometer|kvadratkilometrar|kvadratkilometers|km2|km^2|km²" }, + { @"Square hectometer", @"kvadrathektometer|kvadrathektometrar|hm2|hm^2|hm²|hektar|ha" }, + { @"Square decameter", @"kvadratdekameter|kvadratdekametrar|dam2|dam^2|dam²" }, + { @"Square meter", @"kvm|kvadratmeter|kvadratmetrar|m2|m^2|m²" }, + { @"Square decimeter", @"kvadratdecimeter|kvadratdecimetrar|dm2|dm^2|dm²" }, + { @"Square centimeter", @"kvadratcentimeter|kvadratcentimetrar|cm2|cm^2|cm²" }, + { @"Square millimeter", @"kvadratmillimeter|kvadratmillimetrar|mm2|mm^2|mm²" }, + { @"Square inch", @"kvadrattum|tum2|tum^2|tum²" }, + { @"Square foot", @"kvadratfot|fot2|fot^2|fot²|ft2|ft^2|ft²" }, + { @"Square mile", @"kvadratmile|mi2|mi^2|mi²" }, + { @"Square yard", @"kvadratyard|yd2|yd^2|yd²" }, + { @"Acre", @"-acre|acre|acres" } + }; + public static readonly Dictionary InformationSuffixList = new Dictionary + { + { @"Bit", @"-bit|bit|bits" }, + { @"Kilobit", @"kilobit|kilobits|kb|Kb|kbit" }, + { @"Megabit", @"megabit|megabits|mb|Mb|mbit" }, + { @"Gigabit", @"gigabit|gigabits|gb|Gb|gbit" }, + { @"Terabit", @"terabit|terabits|tb|Tb|tbit" }, + { @"Petabit", @"petabit|petabits|pb|Pb|pbit" }, + { @"Byte", @"-byte|byte|bytes" }, + { @"Kilobyte", @"-kilobyte|-kilobytes|kilobyte|kB|KB|kilobytes|kilo byte|kilo bytes|kbyte" }, + { @"Megabyte", @"-megabyte|-megabytes|megabyte|mB|MB|megabytes|mega byte|mega bytes|mbyte" }, + { @"Gigabyte", @"-gigabyte|-gigabytes|gigabyte|gB|GB|gigabytes|giga byte|giga bytes|gbyte" }, + { @"Terabyte", @"-terabyte|-terabytes|terabyte|tB|TB|terabytes|tera byte|tera bytes|tbyte" }, + { @"Petabyte", @"-petabyte|-petabytes|petabyte|pB|PB|petabytes|peta byte|peta bytes|pbyte" } + }; + public static readonly IList AmbiguousDimensionUnitList = new List + { + @"dm", + @"fat", + @"""" + }; + public const string BuildPrefix = @"(?<=(\s|^))"; + public const string BuildSuffix = @"(?=(\s|\W|$))"; + public static readonly Dictionary LengthSuffixList = new Dictionary + { + { @"Kilometer", @"km|kilometer|kilometrarna|kilometrar|-kilometers" }, + { @"Hectometer", @"hm|hektometer|hektometrar" }, + { @"Decameter", @"dam|dekameter|dekametrar" }, + { @"Meter", @"m|meter|metrarna|metrar|-meters" }, + { @"Decimeter", @"dm|decimeter|decimetrarna|decimetrar|-decimeters" }, + { @"Centimeter", @"cm|-centimeters|centimeter|centimetrarna|centimetrar" }, + { @"Millimeter", @"mm|-millimeters|millimeter|millimetrarna|millimetrarna|millimetrar" }, + { @"Micrometer", @"μm|mikrometer|mikrometrarna|-mikrometers" }, + { @"Nanometer", @"nm|nanometer|nanometrarna|nanometrar" }, + { @"Picometer", @"pm|pikometer|pikometrarna|pikometrar" }, + { @"Mile", @"-mile|mile|miles" }, + { @"Yard", @"yard|yards" }, + { @"Inch", @"-inch|inches|inch|-tums|-tum|tum|tums|""" }, + { @"Foot", @"-fot|fot|ft|-fots" }, + { @"Light year", @"ljusår" }, + { @"Pt", @"pt|pts" } + }; + public static readonly IList AmbiguousLengthUnitList = new List + { + @"pm", + @"pt", + @"pts" + }; + public static readonly Dictionary SpeedSuffixList = new Dictionary + { + { @"Meter per second", @"meter / sekund|meter/sekund|m/s|meter per sekund|metrar per sekund|sekundmetrar|sekundmeter" }, + { @"Kilometer per hour", @"km/h|kilometer per timme|kilometer i timmen" }, + { @"Kilometer per minute", @"km/min|kilometer per minut|kilometer i minuten|kilometrar per minut" }, + { @"Kilometer per second", @"km/s|kilometer per sekund|kilometres per second|kilometer per second|kilometre per second" }, + { @"Mile per hour", @"mph|miles i timmen|miles per timme|mi/h" }, + { @"Knot", @"kt|knot|kn" }, + { @"Foot per second", @"ft/s|fot/s|fot per sekund|fps" }, + { @"Foot per minute", @"ft/min|fot/min|fot per minut" }, + { @"Yard per minute", @"yards per minut|yard per minut" }, + { @"Yard per second", @"yards per sekund|yard i sekunden|yards/s|yard/s" } + }; + public static readonly Dictionary TemperatureSuffixList = new Dictionary + { + { @"F", @"grader fahrenheit|grad fahrenheit|gr fahrenheit|gr. fahrenheit|°f|grad f|grader f|fahrenheit|f" }, + { @"K", @"k|K|kelvin" }, + { @"R", @"rankine|°ra|°r" }, + { @"D", @"delisle|°de|°d" }, + { @"C", @"grader celsius|grad celsius|gr. celsius|gr celsius|grader c|gr. c|gr c|°c|celsiusgrader|celsius|c" }, + { @"Degree", @"gr.|grader|grad|°" } + }; + public static readonly IList AmbiguousTemperatureUnitList = new List + { + @"c", + @"f", + @"k" + }; + public static readonly Dictionary VolumeSuffixList = new Dictionary + { + { @"Cubic meter", @"m3|kubikmeter|kbm" }, + { @"Cubic centimeter", @"kubikcentimeter|ccm" }, + { @"Cubic millimiter", @"kubikmillimeter" }, + { @"Hectoliter", @"hektoliter|hektolitrar" }, + { @"Decaliter", @"dekaliter" }, + { @"Liter", @"l|liter|litrar" }, + { @"Deciliter", @"dl|deciliter|deciliters|decilitrat" }, + { @"Centiliter", @"cl|centiliter" }, + { @"Milliliter", @"ml|milliliter" }, + { @"Cubic yard", @"kubikyard" }, + { @"Cubic inch", @"kubiktum" }, + { @"Cubic foot", @"kubikfot" }, + { @"Cubic mile", @"kubikmile" }, + { @"Fluid ounce", @"fl oz" }, + { @"Teaspoon", @"tesked|teskedar" }, + { @"Tablespoon", @"matsked|matskedar" }, + { @"Pint", @"pint|pints" }, + { @"Volume unit", @"kopp" } + }; + public static readonly IList AmbiguousVolumeUnitList = new List + { + @"kopp" + }; + public static readonly Dictionary WeightSuffixList = new Dictionary + { + { @"Kilogram", @"kg|kilogram|kilo|kilon" }, + { @"Barrel", @"-fats|fat" }, + { @"Gram", @"g|gram|-grams" }, + { @"Milligram", @"mg|milligram|-milligrams" }, + { @"Microgram", @"μg|mikrogram|-mikrograms" }, + { @"Gallon", @"-gallon|gallons|gallon" }, + { @"Metric ton", @"metric tons|metric ton" }, + { @"Ton", @"-ton|ton|-tons|tons|ton" }, + { @"Pound", @"pound|pounds|lb|lbs|pund|-punds" }, + { @"Ounce", @"-ounce|ounce|oz|ounces|uns" }, + { @"Weight unit", @"pennyweight|grain|british long ton|us short hundredweight|stone|dram" } + }; + public static readonly IList AmbiguousWeightUnitList = new List + { + @"g" + }; + public static readonly Dictionary AngleSuffixList = new Dictionary + { + { @"Degree", @"grad|grader|gr.|gr|°" }, + { @"Radian", @"radian|radianer|rad" }, + { @"Turn", @"varv" } + }; + public static readonly IList AmbiguousAngleUnitList = new List + { + @"varv" + }; + public static readonly Dictionary CurrencySuffixList = new Dictionary + { + { @"Abkhazian apsar", @"abchazisk apsar|apsar|аԥсар|āpsār" }, + { @"Afghan afghani", @"afghani|؋|afn|afghanis|afghani|افغانۍ|افغانی" }, + { @"Euro", @"euros|euro|€|eur" }, + { @"Cent", @"cents|cent|-cents|-cent" }, + { @"Albanian lek", @"albansk lek|lekë|lek" }, + { @"Qindarkë", @"qindarkë|qindarkës|qindarke|qindarkes" }, + { @"Angolan kwanza", @"angolansk kwanza|kz|aoa|kwanza|kwanzas|angolanska kwanzas" }, + { @"Armenian dram", @"dram" }, + { @"Aruban florin", @"arubansk florin|ƒ|awg|arubanska floriner|florin|floriner|arubaanse florin" }, + { @"Bangladeshi taka", @"taka|৳|bdt|takas|tākā|tk" }, + { @"Paisa", @"paisa" }, + { @"Bhutanese ngultrum", @"ngultrum|nu.|btn|ngultrums|dNgul Tam" }, + { @"Chetrum", @"chhertum|chhertums" }, + { @"Bolivian boliviano", @"boliviano|bob|bs.|bolivianos" }, + { @"Bosnia and Herzegovina convertible mark", @"konvertibilna marka|bam|конвертибилна марка|konvertibilnih maraka" }, + { @"Fening", @"feninga" }, + { @"Botswana pula", @"pula|pulas" }, + { @"Thebe", @"thebe|thebes" }, + { @"Brazilian real", @"real|r$|brl" }, + { @"Bulgarian lev", @"lev|bgn|лв|bălgarska lev|leva" }, + { @"Stotinka", @"stotinki" }, + { @"Cambodian riel", @"riel|khr|៛|រៀល" }, + { @"Cape Verdean escudo", @"kapverdisk escudo|kapverdiska escudos|cve" }, + { @"Costa Rican colón", @"costaricansk colón|crc|costaricansk colon" }, + { @"Salvadoran colón", @"salvadoransk colón|salvadoransk colon|svc" }, + { @"Céntimo", @"céntimo|centimo" }, + { @"Croatian kuna", @"Kroatisk kuna|hrvatska kuna|hrk|kn" }, + { @"Lipa", @"lipa" }, + { @"Czech koruna", @"tjeckisk krona|tjeckiska kronor|czk|kč|tjeckisk koruna|tjeckiska koruna|tjeckiska korunas|koruna česká" }, + { @"Haléř", @"haléř" }, + { @"Eritrean nakfa", @"eritrean nakfa|nfk|ern|eritrean nakfas|nakfas|nakfa" }, + { @"Ethiopian birr", @"ethiopian birr|etb" }, + { @"Gambian dalasi", @"gmd|dalasi" }, + { @"Butut", @"bututs|butut" }, + { @"Georgian lari", @"georgisk lari|lari|gel|₾|ქართული ლარი|kartuli lari" }, + { @"Tetri", @"tetri" }, + { @"Ghanaian cedi", @"ghana cedi|ghs|₵|gh₵" }, + { @"Pesewa", @"pesewas|pesewa" }, + { @"Guatemalan quetzal", @"quetzal|quetzales|gtq|quetzal guatemalteco" }, + { @"Haitian gourde", @"gourde|haitisk gourde|htg" }, + { @"Honduran lempira", @"lempira|hnl|lempira hondureño" }, + { @"Hungarian forint", @"ungersk forint|huf|ft|forint|magyar forint" }, + { @"Fillér", @"fillér" }, + { @"Iranian rial", @"iransk rial|iransk riyal|irr|iranska rial|iranska riyal|irani rial" }, + { @"Yemeni rial", @"jemenitisk rial|jemenitisk riyal|yer|yemeni rials|yemeni rial|ر.ي" }, + { @"Israeli new shekel", @"₪|ils|agora|שקל חדש|sheqel khadash|nis" }, + { @"Lithuanian litas", @"ltl|litas" }, + { @"Japanese yen", @"japanska yen|japanska yen|jpy|yen|-yen|¥" }, + { @"Kazakhstani tenge", @"tenge|kzt|тиын|теңг" }, + { @"Kenyan shilling", @"kenyansk shilling|kes" }, + { @"North Korean won", @"nordkoreansk won|kpw|₩n" }, + { @"South Korean won", @"sydkoreansk won|krw|" }, + { @"Korean won", @"koreansk won|₩|koreanska won|wŏn|원" }, + { @"Kyrgyzstani som", @"kirgizistansk som|kgs|Кыргыз сом" }, + { @"Uzbekitan som", @"uzbekistansk som|uzs|Ўзбек сўм" }, + { @"Lao kip", @"kip|lak|₭n|₭" }, + { @"Att", @"att" }, + { @"Lesotho loti", @"loti|maloti|lsl" }, + { @"Sente", @"sente|lisente" }, + { @"South African rand", @"rand" }, + { @"Macanese pataca", @"pataca|mop$|mop" }, + { @"Avo", @"avos|avo" }, + { @"Macedonian denar", @"makedonisk denar|mkd|ден|Mdi" }, + { @"Deni", @"deni|denari" }, + { @"Malagasy ariary", @"ariary|mga|ma|Ariary malgache" }, + { @"Iraimbilanja", @"iraimbilanja" }, + { @"Malawian kwacha", @"malawisk kwacha|mk|mwk" }, + { @"Tambala", @"tambala" }, + { @"Malaysian ringgit", @"malaysisk ringgit|malaysisk dollar|rm|myr" }, + { @"Mauritanian ouguiya", @"mauretansk ouguiya|ouguiya|um|mro" }, + { @"Khoums", @"khoums" }, + { @"Mongolian tögrög", @"tögrög|tugrik|mnt|₮|төгрөг" }, + { @"Mozambican metical", @"metical|metical moçambicano nuevo|mt|mzn" }, + { @"Burmese kyat", @"kyat|kyap|ks|mmk|" }, + { @"Pya", @"pya" }, + { @"Nicaraguan córdoba", @"córdoba|nio|C$|córdoba oro nicaragüense" }, + { @"Nigerian naira", @"naira|naira|ngn|₦|nigerian naira" }, + { @"Kobo", @"kobo" }, + { @"Turkish lira", @"turkiska lira|try|tl|turkisk lira|türk Lirası" }, + { @"Kuruş", @"kuruş" }, + { @"Omani rial", @"omansk rial|omani rial|omr|ر.ع." }, + { @"Panamanian balboa", @"balboa|b/.|pab" }, + { @"Centesimo", @"centésimo|centésimos|centesimos|centesimo" }, + { @"Papua New Guinean kina", @"kina|pgk" }, + { @"Toea", @"toea" }, + { @"Paraguayan guaraní", @"guaraní|guarani|₲|pyg" }, + { @"Peruvian sol", @"sol|nuevo sol peruano|pen" }, + { @"Polish złoty", @"złoty|polsk złoty|zł|pln|zloty|złoty polski" }, + { @"Grosz", @"groszy|grosz|grosze" }, + { @"Qatari riyal", @"qatarisk rial|qatarisk riyal|qar|ر.ق" }, + { @"Saudi riyal", @"saudiarabisk riyal|saudiarabisk rial|sar|riyāl suʿūdī" }, + { @"Riyal", @"riyal|rial|﷼" }, + { @"Dirham", @"dirham|dirhem|dirhm|درهم" }, + { @"Halala", @"hallalas|hallala|هللة" }, + { @"Samoan tālā", @"tālā|tala|ws$|wst" }, + { @"Sene", @"sene" }, + { @"São Tomé and Príncipe dobra", @"dobra|dobras|std|dobra são-tomeano" }, + { @"Sierra Leonean leone", @"sierra leonean leone|sll|leone|le" }, + { @"Peseta", @"pesetas|peseta" }, + { @"Netherlands guilder", @"antillergulden|antilliaanse gulden|ang|nederländska gulden|-gulden|holländska gulden|fl|naƒ" }, + { @"Swazi lilangeni", @"lilangeni|szl|emalangeni" }, + { @"Tajikistani somoni", @"somoni|tjs|сомонӣ" }, + { @"Diram", @"diram|дирам" }, + { @"Thai baht", @"฿t|thb|baht|บาทไทย" }, + { @"Satang", @"satang|สตางค์" }, + { @"Tongan paʻanga", @"paʻanga|T$|pa''anga" }, + { @"Seniti", @"seniti" }, + { @"Ukrainian hryvnia", @"ukrainska hryvnia|ukrainsk hryvnia|hyrvnia|uah|₴|гривня" }, + { @"Vanuatu vatu", @"vatu|vuv" }, + { @"Venezuelan bolívar", @"bolívar|bs.f.|vef|fuerte" }, + { @"Vietnamese dong", @"dong|vnd|đồng|₫" }, + { @"Zambian kwacha", @"zambisk kwacha|zk|zmw|zambian kwacha" }, + { @"Moroccan dirham", @"marockansk dirham|mad|د.م." }, + { @"United Arab Emirates dirham", @"emiratisk dirham|د.إ|aed" }, + { @"Azerbaijani manat", @"azerbajdzjansk manat|azn|Azərbaycan manatı" }, + { @"Turkmenistan manat", @"turkmensk manat|turkmensk ny manat|tmt" }, + { @"Manat", @"manats|manat" }, + { @"Qəpik", @"qəpik|gepik" }, + { @"Somali shilling", @"somalisk shilling|shilin soomaali|sh.so.|sos" }, + { @"Somaliland shilling", @"somaliländsk shilling|somaliländsk shilin" }, + { @"Tanzanian shilling", @"tanzanisk shilling|tsh|tzs|shilingi tanzania" }, + { @"Ugandan shilling", @"ugandisk shilling|ugandiska shilling|ugx" }, + { @"Romanian leu", @"rumänsk leu|lei|ron|rumänska leu" }, + { @"Moldovan leu", @"moldavisk leu|mdl|moldaviska leu" }, + { @"Leu", @"leu" }, + { @"Ban", @"bani|-ban|ban" }, + { @"Nepalese rupee", @"nepalesisk rupie|npr" }, + { @"Pakistani rupee", @"pakistansk rupie|pkr" }, + { @"Indian rupee", @"indiska rupier|indisk rupie|inr|₹" }, + { @"Seychellois rupee", @"seychelliska rupier|scr|sr|sre" }, + { @"Mauritian rupee", @"mauritiska rupier|mauritisk rupie|mur" }, + { @"Maldivian rufiyaa", @"rufiyah|mvr|.ރ" }, + { @"Sri Lankan rupee", @"lankesiska rupier|lankesisk rupee|lkr|රු|ரூ" }, + { @"Indonesian rupiah", @"rupiah|perak|rp|idr" }, + { @"Rupee", @"rupie|rupier|rs" }, + { @"Danish krone", @"danska kronor|dansk krona|dkk" }, + { @"Norwegian krone", @"norska kronor|norsk krona|nok" }, + { @"Faroese króna", @"färöiska kronor|färöisk krona|fok" }, + { @"Icelandic króna", @"isländska kronor|isländsk krona" }, + { @"Swedish krona", @"svenska kronor|svensk krona|sek|spänn" }, + { @"Krone", @"kronor|krona|króna|kr|-kr" }, + { @"Øre", @"Øre|oyra|eyrir|öre" }, + { @"West African CFA franc", @"västafrikanska cfa franc|xof|västafrikansk cfa franc" }, + { @"Central African CFA franc", @"centralafrikanska cfa franc|xaf|centralafrikansk cfa franc" }, + { @"Comorian franc", @"komorisk franc|kmf" }, + { @"Congolese franc", @"kongolesisk franc|cdf" }, + { @"Burundian franc", @"burundisk francbif" }, + { @"Djiboutian franc", @"djiboutisk franc|djf" }, + { @"CFP franc", @"cfp franc|xpf" }, + { @"Guinean franc", @"guinesisk franc|gnf" }, + { @"Swiss franc", @"schweizisk franc|schweizerfranc|chf|sfr." }, + { @"Rwandan franc", @"rwandisk franc|rwandiska franc|rwf|rf|r₣|frw" }, + { @"Belgian franc", @"belgian franc|bi.|b.fr.|bef|belgium franc" }, + { @"Rappen", @"rappen|-rappen" }, + { @"Franc", @"francs|franc|fr.|fs" }, + { @"Centime", @"centimes|centime|santim" }, + { @"Russian ruble", @"ryska rubel|rysk rubel|₽|rub" }, + { @"New Belarusian ruble", @"ny belarusisk rubel|byn" }, + { @"Old Belarusian ruble", @"gammal belarusisk rubel|byr" }, + { @"Transnistrian ruble", @"transnistrisk rubel|prb|р." }, + { @"Belarusian ruble", @"belarusisk rubel" }, + { @"Kopek", @"kopek|kopeks" }, + { @"Kapyeyka", @"kapyeyka" }, + { @"Ruble", @"rubel|br" }, + { @"Algerian dinar", @"algerisk dinar|د.ج|dzd|algeriska dinarer" }, + { @"Bahraini dinar", @"bahrainsk dinar|bahrainska dinarer|bhd|.د.ب" }, + { @"Santeem", @"santeem|santeems" }, + { @"Iraqi dinar", @"iraqi dinars|iraqi dinar|iraq dinars|iraq dinar|iqd|ع.د" }, + { @"Jordanian dinar", @"jordanska dinarer|jordansk dinar|د.ا|jod" }, + { @"Kuwaiti dinar", @"kuwaitiska dinarer|kuwaitisk dinar|kwd|د.ك" }, + { @"Libyan dinar", @"libyska dinarer|libysk dinar|lyd" }, + { @"Serbian dinar", @"serbiska dinarer|serbisk dinar|din.|rsd|дин.|Српски Динар" }, + { @"Tunisian dinar", @"tunisiska dinarer|tunisisk dinar|tnd" }, + { @"Yugoslav dinar", @"yugoslav dinars|yugoslav dinar|yun" }, + { @"Dinar", @"dinarer|dinar|-dinarer|-dinar" }, + { @"Fils", @"fils|fulūs|-fils|-fil" }, + { @"Para", @"para|napa" }, + { @"Millime", @"millimes|millime" }, + { @"Argentine peso", @"argentinska pesos|argentinsk peso|ars" }, + { @"Chilean peso", @"chilenska pesos|chilensk peso|clp" }, + { @"Colombian peso", @"colombianska pesos|colombiansk peso|cop" }, + { @"Cuban convertible peso", @"cuban convertible pesos|cuban convertible peso|cuc|cuba convertible pesos|cuba convertible peso" }, + { @"Cuban peso", @"kubanska pesos|kubansk pesocup" }, + { @"Dominican peso", @"dominikanska pesos|dominikansk peso|dop" }, + { @"Mexican peso", @"mexikanska pesos|mexikansk peso|mxn" }, + { @"Philippine peso", @"piso|filippinska pesos|filippinsk peso|₱|php" }, + { @"Uruguayan peso", @"uruguayanska pesos|uruguayansk peso|uyu" }, + { @"Peso", @"pesos|peso" }, + { @"Centavo", @"centavos|centavo" }, + { @"Alderney pound", @"alderney pounds|alderney pound|alderney £" }, + { @"British pound", @"brittiskt pund|brittiskta pund|brittiskt £|gbp|pound sterling|pound sterlings|sterling|pound scot|pound scots" }, + { @"Guernsey pound", @"guernsey pounds|guernsey £|ggp" }, + { @"Ascension pound", @"ascension pounds|ascension pound|ascension £" }, + { @"Saint Helena pound", @"saint helena pounds|saint helena pound|saint helena £|shp" }, + { @"Egyptian pound", @"egyptiska pund|egyptiskt pund|egyptiskt £|egp|ج.م" }, + { @"Falkland Islands pound", @"falkland islands pounds|falkland islands pound|falkland islands £|fkp|falkland island pounds|falkland island pound|falkland island £" }, + { @"Gibraltar pound", @"gibraltar pounds|gibraltar pound|gibraltar £|gip" }, + { @"Manx pound", @"manx pounds|manx pound|manx £|imp" }, + { @"Jersey pound", @"jersey pounds|jersey pound|jersey £|jep" }, + { @"Lebanese pound", @"libanesiska pund|libanesiskt pund|libanesiskt £|lbp|ل.ل" }, + { @"South Georgia and the South Sandwich Islands pound", @"south georgia and the south sandwich islands pounds|south georgia and the south sandwich islands pound|south georgia and the south sandwich islands £" }, + { @"South Sudanese pound", @"sydsudanesiska pund|sydsudanesiskt pund|sydsudanesiskt £|ssp" }, + { @"Sudanese pound", @"sudanesiska pund|sudanesiskt pund|sudanesiskt £|ج.س.|sdg" }, + { @"Syrian pound", @"syriska pund|syriskt pund|syriskt £|ل.س|syp" }, + { @"Tristan da Cunha pound", @"tristan da cunha pounds|tristan da cunha pound|tristan da cunha £" }, + { @"Pound", @"pund|-pund|£" }, + { @"Pence", @"pence" }, + { @"Shilling", @"shillings|shilling|shilingi|sh" }, + { @"Penny", @"pennies|penny" }, + { @"United States dollar", @"amerikanska dollar|amerikans dollar|amerikanska $|amerikans $|u.s. dollars|u.s. dollar|u s dollar|u s dollars|usd|american dollars|american dollar|us$|us dollar|us dollars|u.s dollar|u.s dollars" }, + { @"East Caribbean dollar", @"east caribbean dollars|east caribbean dollar|east Caribbean $|xcd" }, + { @"Australian dollar", @"australiska dollar|australisk dollar|australian dollars|australian dollar|australian $|australian$|aud|australia dollars|australia dollar|australia $|australia$|aud|au$" }, + { @"Bahamian dollar", @"bahamian dollars|bahamian dollar|bahamian $|bahamian$|bsd|bahamia dollars|bahamia dollar|bahamia $|bahamia$" }, + { @"Barbadian dollar", @"barbadian dollars|barbadian dollar|barbadian $|bbd" }, + { @"Belize dollar", @"belize dollars|belize dollar|belize $|bzd" }, + { @"Bermudian dollar", @"bermudian dollars|bermudian dollar|bermudian $|bmd|bermudia dollars|bermudia dollar|bermudia $" }, + { @"British Virgin Islands dollar", @"british virgin islands dollars|british virgin islands dollar|british virgin islands $|bvi$|virgin islands dollars|virgin islands dolalr|virgin islands $|virgin island dollars|virgin island dollar|virgin island $" }, + { @"Brunei dollar", @"brunei dollar|brunei $|bnd" }, + { @"Sen", @"sen" }, + { @"Singapore dollar", @"singapore dollars|singapore dollar|singapore $|s$|sgd" }, + { @"Canadian dollar", @"canadian dollars|canadian dollar|canadian $|cad|can$|c$|canada dollars|canada dolllar|canada $" }, + { @"Cayman Islands dollar", @"cayman islands dollars|cayman islands dollar|cayman islands $|kyd|ci$|cayman island dollar|cayman island doolars|cayman island $" }, + { @"New Zealand dollar", @"new zealand dollars|new zealand dollar|new zealand $|nz$|nzd|kiwi" }, + { @"Cook Islands dollar", @"cook islands dollars|cook islands dollar|cook islands $|cook island dollars|cook island dollar|cook island $" }, + { @"Fijian dollar", @"fijian dollars|fijian dollar|fijian $|fjd|fiji dollars|fiji dollar|fiji $" }, + { @"Guyanese dollar", @"guyanese dollars|guyanese dollar|gyd|gy$" }, + { @"Hong Kong dollar", @"hong kong dollars|hong kong dollar|hong kong $|hk$|hkd|hk dollars|hk dollar|hk $|hongkong$" }, + { @"Jamaican dollar", @"jamaican dollars|jamaican dollar|jamaican $|j$|jamaica dollars|jamaica dollar|jamaica $|jmd" }, + { @"Kiribati dollar", @"kiribati dollars|kiribati dollar|kiribati $" }, + { @"Liberian dollar", @"liberian dollars|liberian dollar|liberian $|liberia dollars|liberia dollar|liberia $|lrd" }, + { @"Micronesian dollar", @"micronesian dollars|micronesian dollar|micronesian $" }, + { @"Namibian dollar", @"namibian dollars|namibian dollar|namibian $|nad|n$|namibia dollars|namibia dollar|namibia $" }, + { @"Nauruan dollar", @"nauruan dollars|nauruan dollar|nauruan $" }, + { @"Niue dollar", @"niue dollars|niue dollar|niue $" }, + { @"Palauan dollar", @"palauan dollars|palauan dollar|palauan $" }, + { @"Pitcairn Islands dollar", @"pitcairn islands dollars|pitcairn islands dollar|pitcairn islands $|pitcairn island dollars|pitcairn island dollar|pitcairn island $" }, + { @"Solomon Islands dollar", @"solomon islands dollars|solomon islands dollar|solomon islands $|si$|sbd|solomon island dollars|solomon island dollar|solomon island $" }, + { @"Surinamese dollar", @"surinamese dollars|surinamese dollar|surinamese $|srd" }, + { @"New Taiwan dollar", @"new taiwan dollars|new taiwan dollar|nt$|twd|ntd" }, + { @"Trinidad and Tobago dollar", @"trinidad and tobago dollars|trinidad and tobago dollar|trinidad and tobago $|trinidad $|trinidad dollar|trinidad dollars|trinidadian dollar|trinidadian dollars|trinidadian $|ttd" }, + { @"Tuvaluan dollar", @"tuvaluan dollars|tuvaluan dollar|tuvaluan $" }, + { @"Dollar", @"dollars|dollar|$" }, + { @"Chinese yuan", @"yuán|yuan|kuai|chinese yuan|renminbi|cny|rmb|¥|元|人民币|人民幣" }, + { @"Fen", @"fen" }, + { @"Jiao", @"jiao|mao" }, + { @"Finnish markka", @"suomen markka|finska mark|finsk mark|fim|markkaa|markka" }, + { @"Penni", @"penniä|penni" }, + { @"Bitcoin", @"bitcoin|bitcoins|btc|xbt|₿" }, + { @"Millibitcoin", @"millibitcoin|millibitcoins|milibitcoin|milibitcoins" }, + { @"Satoshi", @"satoshi|satoshis" } + }; + public static readonly Dictionary CurrencyNameToIsoCodeMap = new Dictionary + { + { @"Afghan afghani", @"AFN" }, + { @"Euro", @"EUR" }, + { @"Albanian lek", @"ALL" }, + { @"Angolan kwanza", @"AOA" }, + { @"Armenian dram", @"AMD" }, + { @"Aruban florin", @"AWG" }, + { @"Bangladeshi taka", @"BDT" }, + { @"Bhutanese ngultrum", @"BTN" }, + { @"Bolivian boliviano", @"BOB" }, + { @"Bosnia and Herzegovina convertible mark", @"BAM" }, + { @"Botswana pula", @"BWP" }, + { @"Brazilian real", @"BRL" }, + { @"Bulgarian lev", @"BGN" }, + { @"Cambodian riel", @"KHR" }, + { @"Cape Verdean escudo", @"CVE" }, + { @"Costa Rican colón", @"CRC" }, + { @"Croatian kuna", @"HRK" }, + { @"Czech koruna", @"CZK" }, + { @"Eritrean nakfa", @"ERN" }, + { @"Ethiopian birr", @"ETB" }, + { @"Gambian dalasi", @"GMD" }, + { @"Georgian lari", @"GEL" }, + { @"Ghanaian cedi", @"GHS" }, + { @"Guatemalan quetzal", @"GTQ" }, + { @"Haitian gourde", @"HTG" }, + { @"Honduran lempira", @"HNL" }, + { @"Hungarian forint", @"HUF" }, + { @"Iranian rial", @"IRR" }, + { @"Yemeni rial", @"YER" }, + { @"Israeli new shekel", @"ILS" }, + { @"Japanese yen", @"JPY" }, + { @"Kazakhstani tenge", @"KZT" }, + { @"Kenyan shilling", @"KES" }, + { @"North Korean won", @"KPW" }, + { @"South Korean won", @"KRW" }, + { @"Kyrgyzstani som", @"KGS" }, + { @"Lao kip", @"LAK" }, + { @"Lesotho loti", @"LSL" }, + { @"South African rand", @"ZAR" }, + { @"Macanese pataca", @"MOP" }, + { @"Macedonian denar", @"MKD" }, + { @"Malagasy ariary", @"MGA" }, + { @"Malawian kwacha", @"MWK" }, + { @"Malaysian ringgit", @"MYR" }, + { @"Mauritanian ouguiya", @"MRO" }, + { @"Mongolian tögrög", @"MNT" }, + { @"Mozambican metical", @"MZN" }, + { @"Burmese kyat", @"MMK" }, + { @"Nicaraguan córdoba", @"NIO" }, + { @"Nigerian naira", @"NGN" }, + { @"Turkish lira", @"TRY" }, + { @"Omani rial", @"OMR" }, + { @"Panamanian balboa", @"PAB" }, + { @"Papua New Guinean kina", @"PGK" }, + { @"Paraguayan guaraní", @"PYG" }, + { @"Peruvian sol", @"PEN" }, + { @"Polish złoty", @"PLN" }, + { @"Qatari riyal", @"QAR" }, + { @"Saudi riyal", @"SAR" }, + { @"Samoan tālā", @"WST" }, + { @"São Tomé and Príncipe dobra", @"STD" }, + { @"Sierra Leonean leone", @"SLL" }, + { @"Swazi lilangeni", @"SZL" }, + { @"Tajikistani somoni", @"TJS" }, + { @"Thai baht", @"THB" }, + { @"Ukrainian hryvnia", @"UAH" }, + { @"Vanuatu vatu", @"VUV" }, + { @"Venezuelan bolívar", @"VEF" }, + { @"Zambian kwacha", @"ZMW" }, + { @"Moroccan dirham", @"MAD" }, + { @"United Arab Emirates dirham", @"AED" }, + { @"Azerbaijani manat", @"AZN" }, + { @"Turkmenistan manat", @"TMT" }, + { @"Somali shilling", @"SOS" }, + { @"Tanzanian shilling", @"TZS" }, + { @"Ugandan shilling", @"UGX" }, + { @"Romanian leu", @"RON" }, + { @"Moldovan leu", @"MDL" }, + { @"Nepalese rupee", @"NPR" }, + { @"Pakistani rupee", @"PKR" }, + { @"Indian rupee", @"INR" }, + { @"Seychellois rupee", @"SCR" }, + { @"Mauritian rupee", @"MUR" }, + { @"Maldivian rufiyaa", @"MVR" }, + { @"Sri Lankan rupee", @"LKR" }, + { @"Indonesian rupiah", @"IDR" }, + { @"Danish krone", @"DKK" }, + { @"Norwegian krone", @"NOK" }, + { @"Icelandic króna", @"ISK" }, + { @"Swedish krona", @"SEK" }, + { @"West African CFA franc", @"XOF" }, + { @"Central African CFA franc", @"XAF" }, + { @"Comorian franc", @"KMF" }, + { @"Congolese franc", @"CDF" }, + { @"Burundian franc", @"BIF" }, + { @"Djiboutian franc", @"DJF" }, + { @"CFP franc", @"XPF" }, + { @"Guinean franc", @"GNF" }, + { @"Swiss franc", @"CHF" }, + { @"Rwandan franc", @"RWF" }, + { @"Russian ruble", @"RUB" }, + { @"Transnistrian ruble", @"PRB" }, + { @"New Belarusian ruble", @"BYN" }, + { @"Algerian dinar", @"DZD" }, + { @"Bahraini dinar", @"BHD" }, + { @"Iraqi dinar", @"IQD" }, + { @"Jordanian dinar", @"JOD" }, + { @"Kuwaiti dinar", @"KWD" }, + { @"Libyan dinar", @"LYD" }, + { @"Serbian dinar", @"RSD" }, + { @"Tunisian dinar", @"TND" }, + { @"Argentine peso", @"ARS" }, + { @"Chilean peso", @"CLP" }, + { @"Colombian peso", @"COP" }, + { @"Cuban convertible peso", @"CUC" }, + { @"Cuban peso", @"CUP" }, + { @"Dominican peso", @"DOP" }, + { @"Mexican peso", @"MXN" }, + { @"Uruguayan peso", @"UYU" }, + { @"British pound", @"GBP" }, + { @"Saint Helena pound", @"SHP" }, + { @"Egyptian pound", @"EGP" }, + { @"Falkland Islands pound", @"FKP" }, + { @"Gibraltar pound", @"GIP" }, + { @"Manx pound", @"IMP" }, + { @"Jersey pound", @"JEP" }, + { @"Lebanese pound", @"LBP" }, + { @"South Sudanese pound", @"SSP" }, + { @"Sudanese pound", @"SDG" }, + { @"Syrian pound", @"SYP" }, + { @"United States dollar", @"USD" }, + { @"Australian dollar", @"AUD" }, + { @"Bahamian dollar", @"BSD" }, + { @"Barbadian dollar", @"BBD" }, + { @"Belize dollar", @"BZD" }, + { @"Bermudian dollar", @"BMD" }, + { @"Brunei dollar", @"BND" }, + { @"Singapore dollar", @"SGD" }, + { @"Canadian dollar", @"CAD" }, + { @"Cayman Islands dollar", @"KYD" }, + { @"New Zealand dollar", @"NZD" }, + { @"Fijian dollar", @"FJD" }, + { @"Guyanese dollar", @"GYD" }, + { @"Hong Kong dollar", @"HKD" }, + { @"Jamaican dollar", @"JMD" }, + { @"Liberian dollar", @"LRD" }, + { @"Namibian dollar", @"NAD" }, + { @"Solomon Islands dollar", @"SBD" }, + { @"Surinamese dollar", @"SRD" }, + { @"New Taiwan dollar", @"TWD" }, + { @"Trinidad and Tobago dollar", @"TTD" }, + { @"Tuvaluan dollar", @"TVD" }, + { @"Chinese yuan", @"CNY" }, + { @"Rial", @"__RI" }, + { @"Shiling", @"__S" }, + { @"Som", @"__SO" }, + { @"Dirham", @"__DR" }, + { @"Dinar", @"_DN" }, + { @"Dollar", @"__D" }, + { @"Manat", @"__MA" }, + { @"Rupee", @"__R" }, + { @"Krone", @"__K" }, + { @"Krona", @"__K" }, + { @"Crown", @"__K" }, + { @"Frank", @"__F" }, + { @"Mark", @"__M" }, + { @"Ruble", @"__RB" }, + { @"Peso", @"__PE" }, + { @"Pound", @"__P" }, + { @"Tristan da Cunha pound", @"_TP" }, + { @"South Georgia and the South Sandwich Islands pound", @"_SP" }, + { @"Somaliland shilling", @"_SS" }, + { @"Pitcairn Islands dollar", @"_PND" }, + { @"Palauan dollar", @"_PD" }, + { @"Niue dollar", @"_NID" }, + { @"Nauruan dollar", @"_ND" }, + { @"Micronesian dollar", @"_MD" }, + { @"Kiribati dollar", @"_KID" }, + { @"Guernsey pound", @"_GGP" }, + { @"Faroese króna", @"_FOK" }, + { @"Cook Islands dollar", @"_CKD" }, + { @"British Virgin Islands dollar", @"_BD" }, + { @"Ascension pound", @"_AP" }, + { @"Alderney pound", @"_ALP" }, + { @"Abkhazian apsar", @"_AA" }, + { @"Bitcoin", @"_XBT" } + }; + public static readonly Dictionary FractionalUnitNameToCodeMap = new Dictionary + { + { @"Jiao", @"JIAO" }, + { @"Kopek", @"KOPEK" }, + { @"Pul", @"PUL" }, + { @"Cent", @"CENT" }, + { @"Qindarkë", @"QINDARKE" }, + { @"Penny", @"PENNY" }, + { @"Santeem", @"SANTEEM" }, + { @"Cêntimo", @"CENTIMO" }, + { @"Centavo", @"CENTAVO" }, + { @"Luma", @"LUMA" }, + { @"Qəpik", @"QƏPIK" }, + { @"Fils", @"FILS" }, + { @"Poisha", @"POISHA" }, + { @"Kapyeyka", @"KAPYEYKA" }, + { @"Centime", @"CENTIME" }, + { @"Chetrum", @"CHETRUM" }, + { @"Paisa", @"PAISA" }, + { @"Fening", @"FENING" }, + { @"Thebe", @"THEBE" }, + { @"Sen", @"SEN" }, + { @"Stotinka", @"STOTINKA" }, + { @"Fen", @"FEN" }, + { @"Céntimo", @"CENTIMO" }, + { @"Lipa", @"LIPA" }, + { @"Haléř", @"HALER" }, + { @"Øre", @"ØRE" }, + { @"Piastre", @"PIASTRE" }, + { @"Santim", @"SANTIM" }, + { @"Oyra", @"OYRA" }, + { @"Butut", @"BUTUT" }, + { @"Tetri", @"TETRI" }, + { @"Pesewa", @"PESEWA" }, + { @"Fillér", @"FILLER" }, + { @"Eyrir", @"EYRIR" }, + { @"Dinar", @"DINAR" }, + { @"Agora", @"AGORA" }, + { @"Tïın", @"TIIN" }, + { @"Chon", @"CHON" }, + { @"Jeon", @"JEON" }, + { @"Tyiyn", @"TYIYN" }, + { @"Att", @"ATT" }, + { @"Sente", @"SENTE" }, + { @"Dirham", @"DIRHAM" }, + { @"Rappen", @"RAPPEN" }, + { @"Avo", @"AVO" }, + { @"Deni", @"DENI" }, + { @"Iraimbilanja", @"IRAIMBILANJA" }, + { @"Tambala", @"TAMBALA" }, + { @"Laari", @"LAARI" }, + { @"Khoums", @"KHOUMS" }, + { @"Ban", @"BAN" }, + { @"Möngö", @"MONGO" }, + { @"Pya", @"PYA" }, + { @"Kobo", @"KOBO" }, + { @"Kuruş", @"KURUS" }, + { @"Baisa", @"BAISA" }, + { @"Centésimo", @"CENTESIMO" }, + { @"Toea", @"TOEA" }, + { @"Sentimo", @"SENTIMO" }, + { @"Grosz", @"GROSZ" }, + { @"Sene", @"SENE" }, + { @"Halala", @"HALALA" }, + { @"Para", @"PARA" }, + { @"Öre", @"ORE" }, + { @"Diram", @"DIRAM" }, + { @"Satang", @"SATANG" }, + { @"Seniti", @"SENITI" }, + { @"Millime", @"MILLIME" }, + { @"Tennesi", @"TENNESI" }, + { @"Kopiyka", @"KOPIYKA" }, + { @"Tiyin", @"TIYIN" }, + { @"Hào", @"HAO" }, + { @"Ngwee", @"NGWEE" }, + { @"Millibitcoin", @"MILLIBITCOIN" }, + { @"Satoshi", @"SATOSHI" } + }; + public const string CompoundUnitConnectorRegex = @"(?och)"; + public static readonly Dictionary CurrencyPrefixList = new Dictionary + { + { @"Dollar", @"$" }, + { @"United States dollar", @"united states $|us$|us $|u.s. $|u.s $|usd" }, + { @"East Caribbean dollar", @"east caribbean $" }, + { @"Australian dollar", @"australian $|australia $" }, + { @"Bahamian dollar", @"bahamian $|bahamia $" }, + { @"Barbadian dollar", @"barbadian $|barbadin $" }, + { @"Belize dollar", @"belize $" }, + { @"Bermudian dollar", @"bermudian $" }, + { @"British Virgin Islands dollar", @"british virgin islands $|bvi$|virgin islands $|virgin island $|british virgin island $" }, + { @"Brunei dollar", @"brunei $|b$" }, + { @"Sen", @"sen" }, + { @"Singapore dollar", @"singapore $|s$" }, + { @"Canadian dollar", @"canadian $|can$|c$|c $|canada $" }, + { @"Cayman Islands dollar", @"cayman islands $|ci$|cayman island $" }, + { @"New Zealand dollar", @"new zealand $|nz$|nz $" }, + { @"Cook Islands dollar", @"cook islands $|cook island $" }, + { @"Fijian dollar", @"fijian $|fiji $" }, + { @"Guyanese dollar", @"gy$|gy $|g$|g $" }, + { @"Hong Kong dollar", @"hong kong $|hk$|hkd|hk $" }, + { @"Indian rupee", @"₹" }, + { @"Jamaican dollar", @"jamaican $|j$|jamaica $" }, + { @"Kiribati dollar", @"kiribati $" }, + { @"Liberian dollar", @"liberian $|liberia $" }, + { @"Micronesian dollar", @"micronesian $" }, + { @"Namibian dollar", @"namibian $|nad|n$|namibia $" }, + { @"Nauruan dollar", @"nauruan $" }, + { @"Niue dollar", @"niue $" }, + { @"Palauan dollar", @"palauan $" }, + { @"Pitcairn Islands dollar", @"pitcairn islands $|pitcairn island $" }, + { @"Solomon Islands dollar", @"solomon islands $|si$|si $|solomon island $" }, + { @"Surinamese dollar", @"surinamese $|surinam $" }, + { @"New Taiwan dollar", @"nt$|nt $" }, + { @"Trinidad and Tobago dollar", @"trinidad and tobago $|trinidad $|trinidadian $" }, + { @"Tuvaluan dollar", @"tuvaluan $" }, + { @"Samoan tālā", @"ws$" }, + { @"Chinese yuan", @"¥" }, + { @"Japanese yen", @"¥" }, + { @"Euro", @"€" }, + { @"Pound", @"£" }, + { @"Costa Rican colón", @"₡" }, + { @"Turkish lira", @"₺" }, + { @"Bitcoin", @"₿|btc|xbt" } + }; + public static readonly IList AmbiguousCurrencyUnitList = new List + { + @"att", + @"din.", + @"kiwi", + @"kina", + @"kobo", + @"lari", + @"lipa", + @"napa", + @"para", + @"sfr.", + @"taka", + @"tala", + @"toea", + @"vatu", + @"yuan", + @"ang", + @"ban", + @"bob", + @"btn", + @"byr", + @"cad", + @"cop", + @"cup", + @"dop", + @"gip", + @"jod", + @"kgs", + @"lak", + @"lei", + @"mga", + @"mop", + @"nad", + @"omr", + @"pul", + @"sar", + @"sbd", + @"scr", + @"sdg", + @"sek", + @"sen", + @"sol", + @"sos", + @"std", + @"try", + @"yer", + @"yen" + }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"null", @"null" } + }; + public static readonly Dictionary TemperatureAmbiguityFiltersDict = new Dictionary + { + { @"\b(gr(ad(er)?)?|°)$", @"\b((gr(ad(er)?)?|°)\s*(vinkel|rotation)|(rot(ation|eras?)|vinkeln?)(\s+(\p{L}+|\d+)){0,4}\s*(gr(ad(er)?)?\b|°))" } + }; + public static readonly Dictionary DimensionAmbiguityFiltersDict = new Dictionary + { + { @"\b(gr(ad(er)?)?|°)$", @"\b((gr(ad(er)?)?|°)\s*(c(elsius)?|f(ah?renheit)?)|(temperatur(en)?)(\s+(\p{L}+|\d+)){0,4}\s*(gr(ad(er)?)?\b|°))" } + }; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/NumbersWithUnitDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/NumbersWithUnitDefinitions.tt new file mode 100644 index 0000000000..0b29b8de06 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/NumbersWithUnitDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Swedish\Swedish-NumbersWithUnit.yaml"; + this.Language = "Swedish"; + this.ClassName = "NumbersWithUnitDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/QuotedTextDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/QuotedTextDefinitions.cs new file mode 100644 index 0000000000..27dd74fcf9 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/QuotedTextDefinitions.cs @@ -0,0 +1,35 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Swedish\Swedish-QuotedText.yaml +// - Language: Swedish +// - ClassName: QuotedTextDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Swedish +{ + using System; + using System.Collections.Generic; + + public static class QuotedTextDefinitions + { + public const string LangMarker = @"Swe"; + public const string QuotedTextRegex1 = @"(“([^“”]+)”)"; + public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)"; + public const string QuotedTextRegex3 = @"(""([^""]+)"")"; + public const string QuotedTextRegex4 = @"('([^']+)')"; + public const string QuotedTextRegex5 = @"(`([^`]+)`)"; + public const string QuotedTextRegex6 = @"(123456)"; + public const string QuotedTextRegex7 = @"(123456)"; + public const string QuotedTextRegex8 = @"(123456)"; + public const string QuotedTextRegex9 = @"(123456)"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/QuotedTextDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/QuotedTextDefinitions.tt new file mode 100644 index 0000000000..c3ad2f03f9 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/QuotedTextDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Swedish\Swedish-QuotedText.yaml"; + this.Language = "Swedish"; + this.ClassName = "QuotedTextDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/TimeZoneDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/TimeZoneDefinitions.cs new file mode 100644 index 0000000000..18d9f536b0 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/TimeZoneDefinitions.cs @@ -0,0 +1,2086 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Swedish\Swedish-TimeZone.yaml +// - Language: Swedish +// - ClassName: TimeZoneDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Swedish +{ + using System; + using System.Collections.Generic; + + public static class TimeZoneDefinitions + { + public const string LangMarker = @"Swe"; + public const string DirectUtcRegex = @"\b(utc|gmt)(\s*[+\-\u00B1]?\s*[\d]{1,2}h?(\s*:\s*[\d]{1,2})?)?\b"; + public static readonly IList AbbreviationsList = new List + { + @"ACST", + @"ACT", + @"ACWST", + @"ADT", + @"AEDT", + @"AEST", + @"AFT", + @"AKDT", + @"AKST", + @"ALMT", + @"AMST", + @"AMT", + @"ANAT", + @"AQTT", + @"ART", + @"AST", + @"AWST", + @"AZOST", + @"AZOT", + @"AZST", + @"AZT", + @"BNT", + @"BOT", + @"BRST", + @"BRT", + @"BST", + @"BTT", + @"CAT", + @"CCT", + @"CDT", + @"CEST", + @"CET", + @"CHADT", + @"CHAST", + @"CHOT", + @"ChST", + @"CHUT", + @"CKT", + @"CLST", + @"CLT", + @"COT", + @"CST", + @"CVT", + @"CXT", + @"EASST", + @"EAST", + @"EAT", + @"ECT", + @"EDT", + @"EEST", + @"EET", + @"EGST", + @"EGT", + @"EST", + @"FET", + @"FJST", + @"FJT", + @"FKST", + @"FNT", + @"GALT", + @"GAMT", + @"GET", + @"GFT", + @"GILT", + @"GMT", + @"GST", + @"GYT", + @"HDT", + @"HKT", + @"HOVT", + @"HST", + @"ICT", + @"IDT", + @"IRDT", + @"IRKT", + @"IRST", + @"IST", + @"JST", + @"KGT", + @"KOST", + @"KRAT", + @"KST", + @"LHDT", + @"LHST", + @"LINT", + @"MAGT", + @"MART", + @"MDT", + @"MHT", + @"MMT", + @"MSK", + @"MST", + @"MUT", + @"MVT", + @"MYT", + @"NCT", + @"NDT", + @"NFT", + @"NPT", + @"NRT", + @"NST", + @"NUT", + @"NZDT", + @"NZST", + @"OMST", + @"ORAT", + @"PDT", + @"PET", + @"PETT", + @"PGT", + @"PHOT", + @"PHT", + @"PKT", + @"PMDT", + @"PMST", + @"PONT", + @"PST", + @"PWT", + @"PYST", + @"PYT", + @"QYZT", + @"RET", + @"SAKT", + @"SAMT", + @"SAST", + @"SBT", + @"SCT", + @"SGT", + @"SRT", + @"SST", + @"TAHT", + @"TJT", + @"TLT", + @"TMT", + @"TOT", + @"TRT", + @"TVT", + @"ULAST", + @"ULAT", + @"UYST", + @"UYT", + @"UZT", + @"VET", + @"VLAT", + @"VUT", + @"WAST", + @"WAT", + @"WEST", + @"WET", + @"WFT", + @"WGST", + @"WGT", + @"WIB", + @"WIT", + @"WITA", + @"WST", + @"WT", + @"YAKT", + @"YEKT", + @"ABST", + @"ACDT", + @"AET", + @"AOE", + @"ARBST", + @"ARST", + @"AWDT", + @"BIT", + @"CADT", + @"CAST", + @"CBST", + @"CBT", + @"CCST", + @"CDTM", + @"CSTM", + @"CT", + @"EDTM", + @"ESAST", + @"ESAT", + @"ESTM", + @"ET", + @"GNDT", + @"GNST", + @"GTBST", + @"HADT", + @"HAST", + @"ISDT", + @"ISST", + @"JDT", + @"MAGST", + @"MAT", + @"MDTM", + @"MEST", + @"MOST", + @"MSK+1", + @"MSK+2", + @"MSK+3", + @"MSK+4", + @"MSK+5", + @"MSK+6", + @"MSK+7", + @"MSK+8", + @"MSK+9", + @"MSK-1", + @"MSTM", + @"MVST", + @"MYST", + @"NCAST", + @"NMDT", + @"NMST", + @"NZT", + @"PDST", + @"PDTM", + @"PSAST", + @"PSAT", + @"PSTM", + @"PT", + @"RST", + @"SAEST", + @"SAPST", + @"SAWST", + @"SLT", + @"SMST", + @"SNST", + @"TADT", + @"TAST", + @"THA", + @"TIST", + @"TOST", + @"TST", + @"UTC", + @"WPST" + }; + public static readonly IList FullNameList = new List + { + @"Centralaustralisk sommartid", + @"Centralaustralisk normaltid", + @"Acretid", + @"Central västaustralisk normaltid", + @"Central vastaustralisk normaltid", + @"Atlantisk sommartid", + @"Östaustralisk sommartid", + @"Ostaustralisk sommartid", + @"Östaustralisk normaltid", + @"Ostaustralisk normaltid", + @"Afghansk tid", + @"Alaska sommartid", + @"Alaska normaltid", + @"Almatytid", + @"Amazonsk sommartid", + @"Armenisk tid", + @"Amazonsk tid", + @"Anadyrtid", + @"Aqtöbe/Aktiubinsk tid", + @"Aqtobe/Aktiubinsk tid", + @"Argentinsk tid", + @"Arabisk normaltid", + @"Atlantisk normaltid", + @"Västaustralisk normaltid", + @"Vastaustralisk normaltid", + @"Azorisk sommartid", + @"Azorisk tid", + @"Azerbaijansk sommartid", + @"Azerbaijansk tid", + @"Bruneisk tid", + @"Boliviansk tid", + @"Brasiliansk sommartid", + @"Brasiliansk tid", + @"Brittisk sommartid", + @"Bangladeshisk normaltid", + @"Bhutansk tid", + @"Centralafrikansk tid", + @"Kokosöarnas tid", + @"Kokosoarnas tid", + @"Centralamerikansk sommartid", + @"Kubansk sommartid", + @"Centraleuropeisk sommartid", + @"Centraleuropeisk tid", + @"Chathamöarnas sommartid", + @"Chathamoarnas sommartid", + @"Chathamöarna normaltid", + @"Chathamoarna normaltid", + @"Tjojbalsantid", + @"Chamorro normaltid", + @"Chuuktid", + @"Cooköarnas tid", + @"Cookoarnas tid", + @"Chilensk sommartid", + @"Chilensk normaltid", + @"Colombiansk tid", + @"Centralamerikansk normaltid", + @"Kubansk normaltid", + @"Kinesisk normaltid", + @"Kapverdiansk tid", + @"Julötid", + @"Julotid", + @"Påsköns sommartid", + @"Paskons sommartid", + @"Påsköns normaltid", + @"Paskons normaltid", + @"Östafrikansk tid", + @"Ostafrikansk tid", + @"Equadoriansk tid", + @"Östamerikansk sommartid", + @"Ostamerikansk sommartid", + @"Östeuropeisk sommartid", + @"Osteuropeisk sommartid", + @"Östeuropeisk tid", + @"Osteuropeisk tid", + @"Östgrönländsk sommartid", + @"Ostgronlandsk sommartid", + @"Östgrönländsk tid", + @"Ostgronlandsk tid", + @"Östamerikansk normaltid", + @"Ostamerikansk normaltid", + @"Ytterligare-östeuropeisk tid", + @"Ytterligare-osteuropeisk tid", + @"Fiji sommartid", + @"Fijitid", + @"Falklandsöarnas sommartid", + @"Falklandsoarnas sommartid", + @"Fernando de Noronha tid", + @"Galápagostid", + @"Gambierstid", + @"Georgisk normaltid", + @"Franska Guyana tid", + @"Gilbertöarnas tid", + @"Gilbertoarnas tid", + @"Greenwichtid", + @"Gulf normaltid", + @"Guyanatid", + @"Hawaiisk sommartid", + @"Hongkongtid", + @"Hovdtid", + @"Hawaiisk normaltid", + @"Indokinesisk normaltid", + @"Israelisk sommartid", + @"Iransk sommartid", + @"Irkutsktid", + @"Iransk normaltid", + @"Irländsk normaltid", + @"Irlandsk normaltid", + @"Indisk normaltid", + @"Israelisk normaltid", + @"Japansk normaltid", + @"Kirgizistansk tid", + @"Kosraetid", + @"Krasnoyarsk tid", + @"Koreansk normaltid", + @"Lord Howe sommartid", + @"Lord Howe normaltid", + @"Linjeöarnas tid", + @"Linjeoarnas tid", + @"Magadantid", + @"Marquesastid", + @"Amerikansk-Mountain sommartid", + @"Marshallöarnas tid", + @"Marshalloarnas tid", + @"Burmesisk tid", + @"Moskva normaltid", + @"Amerikansk-Mountain normaltid", + @"Mauritisk tid", + @"Maldivisk tid", + @"Malaysisk tid", + @"Nykaledonsk tid", + @"Newfoundland sommartid", + @"Norfolktid", + @"Nepalesisk tid", + @"Naurutid", + @"Newfoundland normaltid", + @"Niuetid", + @"Nyzeeländsk sommartid", + @"Nyzeelandsk sommartid", + @"Nyzeeländsk normaltid", + @"Nyzeelandsk normaltid", + @"Omsk normaltid", + @"Oraltid", + @"Amerikansk-Stillahavs sommartid", + @"Peruansk tid", + @"Kamchatka tid", + @"Papua Nya Guineansk tid", + @"Phoenixöarnas tid", + @"Phoenixoarnas tid", + @"Filippinsk tid", + @"Pakistansk normaltid", + @"Saint-Pierre och Miquelon sommartid", + @"Saint-Pierre och Miquelon normaltid", + @"Pohnpei normaltid", + @"Pitcairnöarnas normaltid", + @"Pitcairnoarnas normaltid", + @"Amerikansk-Stillahavs normaltid", + @"Palauisk tid", + @"Paraguayansk sommartid", + @"Pyongyang tid", + @"Paraguayansk tid", + @"Qyzylordatid", + @"Réuniontid", + @"Reuniontid", + @"Sachalintid", + @"Samaratid", + @"Sydafrikansk normaltid", + @"Solomonöarnas tid", + @"Solomonoarnas tid", + @"Seychellisk tid", + @"Singaporiansk tid", + @"Surinamesisk tid", + @"Samoansk normaltid", + @"Tahitisk tid", + @"Tadzjikisk tid", + @"Östtimorstid", + @"Osttimorstid", + @"Turkmenistansk tid", + @"Tongatid", + @"Turkisk tid", + @"Tuvalutid", + @"Ulaanbaatar sommartid", + @"Ulaanbaatartid", + @"Uruguayansk sommartid", + @"Uruguayansk tid", + @"Uzbekistansk tid", + @"Venezuelansk normaltid", + @"Vladivostoktid", + @"Vanuatuansk tid", + @"Västafrikansk sommartid", + @"Vastafrikansk sommartid", + @"Västafrikansk tid", + @"Vastafrikansk tid", + @"Västeuropeisk sommartid", + @"Vasteuropeisk sommartid", + @"Västeuropeisk tid", + @"Vasteuropeisk tid", + @"Wallis och Futuna tid", + @"Västgrönländsk sommartid", + @"Vastgronlandsk sommartid", + @"Västgrönländsk tid", + @"Vastgronlandsk tid", + @"Västindonesisk tid", + @"Vastindonesisk tid", + @"Östindonesisk tid", + @"Ostindonesisk tid", + @"Centralindonesisk tid", + @"Västsahara sommartid", + @"Vastsahara sommartid", + @"Västsamoansk tid", + @"Vastsamoansk tid", + @"Västsahara normaltid", + @"Vastsahara normaltid", + @"Jakutsktid", + @"Yekaterinburgtid", + @"Acre Time", + @"Afghanistan Standard Time", + @"Alaskan Standard Time", + @"Anywhere on Earth", + @"Arab Standard Time", + @"Arabian Standard Time", + @"Arabic Standard Time", + @"Argentina Standard Time", + @"Atlantic Standard Time", + @"AUS Central Standard Time", + @"Australian Central Time", + @"AUS Eastern Standard Time", + @"Australian Eastern Time", + @"Australian Eastern Standard Time", + @"Australian Central Daylight Time", + @"Australian Eastern Daylight Time", + @"Azerbaijan Standard Time", + @"Azores Standard Time", + @"Bahia Standard Time", + @"Bangladesh Standard Time", + @"Belarus Standard Time", + @"Canada Central Standard Time", + @"Cape Verde Standard Time", + @"Caucasus Standard Time", + @"Cen. Australia Standard Time", + @"Central America Standard Time", + @"Central Asia Standard Time", + @"Central Brazilian Standard Time", + @"Central Daylight Time", + @"Europe Central Time", + @"European Central Time", + @"Central Europe Standard Time", + @"Central Europe Std Time", + @"Central European Std Time", + @"Central European Standard Time", + @"Central Pacific Standard Time", + @"Central Standard Time", + @"Central Standard Time (Mexico)", + @"China Standard Time", + @"Dateline Standard Time", + @"E. Africa Standard Time", + @"E. Australia Standard Time", + @"E. Europe Standard Time", + @"E. South America Standard Time", + @"Eastern Time", + @"Eastern Daylight Time", + @"Eastern Standard Time", + @"Eastern Standard Time (Mexico)", + @"Egypt Standard Time", + @"Ekaterinburg Standard Time", + @"Fiji Standard Time", + @"FLE Standard Time", + @"Georgian Standard Time", + @"GMT Standard Time", + @"Greenland Standard Time", + @"Greenwich Standard Time", + @"GTB Standard Time", + @"Hawaiian Standard Time", + @"India Standard Time", + @"Iran Standard Time", + @"Israel Standard Time", + @"Jordan Standard Time", + @"Kaliningrad Standard Time", + @"Kamchatka Standard Time", + @"Korea Standard Time", + @"Libya Standard Time", + @"Line Islands Standard Time", + @"Magadan Standard Time", + @"Mauritius Standard Time", + @"Mid-Atlantic Standard Time", + @"Middle East Standard Time", + @"Montevideo Standard Time", + @"Morocco Standard Time", + @"Mountain Standard Time", + @"Mountain Standard Time (Mexico)", + @"Myanmar Standard Time", + @"N. Central Asia Standard Time", + @"Namibia Standard Time", + @"Nepal Standard Time", + @"New Zealand Standard Time", + @"Newfoundland Standard Time", + @"North Asia East Standard Time", + @"North Asia Standard Time", + @"North Korea Standard Time", + @"Pacific SA Standard Time", + @"Pacific Standard Time", + @"Pacific Daylight Time", + @"Pacific Time", + @"Pacific Standard Time", + @"Pacific Standard Time (Mexico)", + @"Pakistan Standard Time", + @"Paraguay Standard Time", + @"Romance Standard Time", + @"Russia Time Zone 1", + @"Russia Time Zone 2", + @"Russia Time Zone 3", + @"Russia Time Zone 4", + @"Russia Time Zone 5", + @"Russia Time Zone 6", + @"Russia Time Zone 7", + @"Russia Time Zone 8", + @"Russia Time Zone 9", + @"Russia Time Zone 10", + @"Russia Time Zone 11", + @"Russian Standard Time", + @"SA Eastern Standard Time", + @"SA Pacific Standard Time", + @"SA Western Standard Time", + @"Samoa Standard Time", + @"SE Asia Standard Time", + @"Singapore Standard Time", + @"Singapore Time", + @"South Africa Standard Time", + @"Sri Lanka Standard Time", + @"Syria Standard Time", + @"Taipei Standard Time", + @"Tasmania Standard Time", + @"Tokyo Standard Time", + @"Tonga Standard Time", + @"Turkey Standard Time", + @"Ulaanbaatar Standard Time", + @"US Eastern Standard Time", + @"US Mountain Standard Time", + @"Mountain", + @"Venezuela Standard Time", + @"Vladivostok Standard Time", + @"W. Australia Standard Time", + @"W. Central Africa Standard Time", + @"W. Europe Standard Time", + @"West Asia Standard Time", + @"West Pacific Standard Time", + @"Yakutsk Standard Time", + @"Pacific Daylight Saving Time", + @"Austrialian Western Daylight Time", + @"Austrialian West Daylight Time", + @"Australian Western Daylight Time", + @"Australian West Daylight Time", + @"Colombia Time", + @"Hong Kong Time", + @"Central Europe Time", + @"Central European Time", + @"Central Europe Summer Time", + @"Central European Summer Time", + @"Central Europe Standard Time", + @"Central European Standard Time", + @"Central Europe Std Time", + @"Central European Std Time", + @"West Coast Time", + @"West Coast", + @"Central Time", + @"Central", + @"Pacific", + @"Eastern" + }; + public const string BaseTimeZoneSuffixRegex = @"((\s+|-)(friendly|compatible))?(\s*|-)?(ti(d|me))(zone?)?"; + public static readonly string LocationTimeSuffixRegex = $@"({BaseTimeZoneSuffixRegex})\b"; + public static readonly string TimeZoneEndRegex = $@"({BaseTimeZoneSuffixRegex})$"; + public static readonly IList AmbiguousTimezoneList = new List + { + @"bit", + @"get", + @"art", + @"cast", + @"eat", + @"lint", + @"mat", + @"most", + @"west", + @"vet", + @"wet", + @"cot", + @"pt", + @"et", + @"eastern", + @"pacific", + @"central", + @"mountain", + @"west coast" + }; + public static readonly Dictionary AbbrToMinMapping = new Dictionary + { + { @"acwst", 525 }, + { @"almt", 360 }, + { @"anat", 720 }, + { @"aqtt", 300 }, + { @"bnt", 480 }, + { @"bot", -240 }, + { @"brst", -120 }, + { @"brt", -180 }, + { @"cat", 120 }, + { @"cct", 390 }, + { @"chadt", 825 }, + { @"chast", 765 }, + { @"chot", 480 }, + { @"chst", 600 }, + { @"chut", 600 }, + { @"ckt", -600 }, + { @"clst", -180 }, + { @"clt", -240 }, + { @"cxt", 420 }, + { @"easst", -300 }, + { @"east", -360 }, + { @"egt", -60 }, + { @"fet", 180 }, + { @"fkst", -180 }, + { @"fnt", -120 }, + { @"galt", -360 }, + { @"gamt", -540 }, + { @"gft", -180 }, + { @"gilt", 720 }, + { @"gyt", -240 }, + { @"hovt", 420 }, + { @"ict", 420 }, + { @"idt", 180 }, + { @"kgt", 360 }, + { @"kost", 660 }, + { @"lhdt", 660 }, + { @"lhst", 630 }, + { @"mart", -10000 }, + { @"mht", 720 }, + { @"mmt", 390 }, + { @"mvt", 300 }, + { @"myt", 480 }, + { @"nct", 660 }, + { @"nft", 660 }, + { @"nrt", 720 }, + { @"nut", -660 }, + { @"omst", 360 }, + { @"orat", 300 }, + { @"pet", -300 }, + { @"pgt", 600 }, + { @"phot", 780 }, + { @"pht", 480 }, + { @"pmdt", -120 }, + { @"pmst", -180 }, + { @"pont", 660 }, + { @"pwt", 540 }, + { @"qyzt", 360 }, + { @"ret", 240 }, + { @"sakt", 660 }, + { @"samt", 240 }, + { @"sct", 240 }, + { @"srt", -180 }, + { @"taht", -600 }, + { @"tjt", 300 }, + { @"tlt", 540 }, + { @"tmt", 300 }, + { @"tvt", 720 }, + { @"ulast", 540 }, + { @"uyst", -120 }, + { @"uyt", -180 }, + { @"uzt", 300 }, + { @"vut", 660 }, + { @"wft", 720 }, + { @"wgst", -120 }, + { @"wgt", -180 }, + { @"wib", 420 }, + { @"wit", 540 }, + { @"wita", 480 }, + { @"wst", -10000 }, + { @"wt", 0 }, + { @"abst", 180 }, + { @"acdt", 630 }, + { @"acst", 570 }, + { @"act", -10000 }, + { @"adt", -10000 }, + { @"aedt", 660 }, + { @"aest", 600 }, + { @"aet", 600 }, + { @"aft", 270 }, + { @"akdt", -480 }, + { @"akst", -540 }, + { @"amst", -10000 }, + { @"amt", -10000 }, + { @"aoe", -720 }, + { @"arbst", 180 }, + { @"arst", 180 }, + { @"art", -180 }, + { @"ast", -10000 }, + { @"awdt", 540 }, + { @"awst", 480 }, + { @"azost", 0 }, + { @"azot", -60 }, + { @"azst", 300 }, + { @"azt", 240 }, + { @"bit", -720 }, + { @"bst", -10000 }, + { @"btt", 360 }, + { @"cadt", -360 }, + { @"cast", 480 }, + { @"cbst", -240 }, + { @"cbt", -240 }, + { @"ccst", -360 }, + { @"cdt", -10000 }, + { @"cdtm", -360 }, + { @"cest", 120 }, + { @"cet", 60 }, + { @"cot", -300 }, + { @"cst", -10000 }, + { @"cstm", -360 }, + { @"ct", -360 }, + { @"cvt", -60 }, + { @"eat", 180 }, + { @"ect", -10000 }, + { @"edt", -240 }, + { @"edtm", -300 }, + { @"eest", 180 }, + { @"eet", 120 }, + { @"egst", 0 }, + { @"esast", -180 }, + { @"esat", -180 }, + { @"est", -300 }, + { @"estm", -300 }, + { @"et", -300 }, + { @"fjst", 780 }, + { @"fjt", 720 }, + { @"get", 240 }, + { @"gmt", 0 }, + { @"gndt", -180 }, + { @"gnst", -180 }, + { @"gst", -10000 }, + { @"gtbst", 120 }, + { @"hadt", -540 }, + { @"hast", -600 }, + { @"hdt", -540 }, + { @"hkt", 480 }, + { @"hst", -600 }, + { @"irdt", 270 }, + { @"irkt", 480 }, + { @"irst", 210 }, + { @"isdt", 120 }, + { @"isst", 120 }, + { @"ist", -10000 }, + { @"jdt", 120 }, + { @"jst", 540 }, + { @"krat", 420 }, + { @"kst", -10000 }, + { @"lint", 840 }, + { @"magst", 720 }, + { @"magt", 660 }, + { @"mat", -120 }, + { @"mdt", -360 }, + { @"mdtm", -420 }, + { @"mest", 120 }, + { @"most", 0 }, + { @"msk+1", 240 }, + { @"msk+2", 300 }, + { @"msk+3", 360 }, + { @"msk+4", 420 }, + { @"msk+5", 480 }, + { @"msk+6", 540 }, + { @"msk+7", 600 }, + { @"msk+8", 660 }, + { @"msk+9", 720 }, + { @"msk-1", 120 }, + { @"msk", 180 }, + { @"mst", -420 }, + { @"mstm", -420 }, + { @"mut", 240 }, + { @"mvst", -180 }, + { @"myst", 390 }, + { @"ncast", 420 }, + { @"ndt", -150 }, + { @"nmdt", 60 }, + { @"nmst", 60 }, + { @"npt", 345 }, + { @"nst", -210 }, + { @"nzdt", 780 }, + { @"nzst", 720 }, + { @"nzt", 720 }, + { @"pdst", -420 }, + { @"pdt", -420 }, + { @"pdtm", -480 }, + { @"pett", 720 }, + { @"pkt", 300 }, + { @"psast", -240 }, + { @"psat", -240 }, + { @"pst", -480 }, + { @"pstm", -480 }, + { @"pt", -480 }, + { @"pyst", -10000 }, + { @"pyt", -10000 }, + { @"rst", 60 }, + { @"saest", -180 }, + { @"sapst", -300 }, + { @"sast", 120 }, + { @"sawst", -240 }, + { @"sbt", 660 }, + { @"sgt", 480 }, + { @"slt", 330 }, + { @"smst", 780 }, + { @"snst", 480 }, + { @"sst", -10000 }, + { @"tadt", 600 }, + { @"tast", 600 }, + { @"tha", 420 }, + { @"tist", 480 }, + { @"tost", 840 }, + { @"tot", 780 }, + { @"trt", 180 }, + { @"tst", 540 }, + { @"ulat", 480 }, + { @"utc", 0 }, + { @"vet", -240 }, + { @"vlat", 600 }, + { @"wast", 120 }, + { @"wat", -10000 }, + { @"west", 60 }, + { @"wet", 0 }, + { @"wpst", 600 }, + { @"yakt", 540 }, + { @"yekt", 300 } + }; + public static readonly Dictionary FullToMinMapping = new Dictionary + { + { @"centralaustralisk sommar", 630 }, + { @"centralaustralisk normal", 570 }, + { @"acre", -300 }, + { @"central västaustralisk normal", 525 }, + { @"central vastaustralisk normal", 525 }, + { @"atlantisk sommar", -180 }, + { @"östaustralisk sommar", 660 }, + { @"ostaustralisk sommar", 660 }, + { @"östaustralisk normal", 600 }, + { @"ostaustralisk normal", 600 }, + { @"afghansk", 270 }, + { @"alaska sommar", -480 }, + { @"alaska normal", -540 }, + { @"almaty", 360 }, + { @"amazonsk sommar", -180 }, + { @"armenisk", 240 }, + { @"amazonsk", -240 }, + { @"anadyr", 720 }, + { @"aqtöbe/aktiubinsk", 300 }, + { @"aqtobe/aktiubinsk", 300 }, + { @"argentinsk", -180 }, + { @"arabisk normal", 180 }, + { @"atlantisk normal", -240 }, + { @"västaustralisk normal", 480 }, + { @"vastaustralisk normal", 480 }, + { @"azorisk sommar", 0 }, + { @"azorisk", -60 }, + { @"azerbaijansk sommar", 300 }, + { @"azerbaijansk", 240 }, + { @"bruneisk", 480 }, + { @"boliviansk", -240 }, + { @"brasiliansk sommar", -120 }, + { @"brasiliansk", -180 }, + { @"brittisk sommar", 60 }, + { @"bangladeshisk normal", 360 }, + { @"bhutansk", 360 }, + { @"centralafrikansk", 120 }, + { @"kokosöarnas", 390 }, + { @"kokosoarnas", 390 }, + { @"centralamerikansk sommar", -300 }, + { @"kubansk sommar", -240 }, + { @"centraleuropeisk sommar", 120 }, + { @"centraleuropeisk", 60 }, + { @"chathamöarnas sommar", 825 }, + { @"chathamoarnas sommar", 825 }, + { @"chathamöarna normal", 765 }, + { @"chathamoarna normal", 765 }, + { @"tjojbalsan", 480 }, + { @"chamorro normal", 600 }, + { @"chuuk", 600 }, + { @"cooköarnas", -600 }, + { @"cookoarnas", -600 }, + { @"chilensk sommar", -180 }, + { @"chilensk normal", -240 }, + { @"colombiansk", -300 }, + { @"centralamerikansk normal", -360 }, + { @"kubansk normal", -300 }, + { @"kinesisk normal", 480 }, + { @"kapverdiansk", -60 }, + { @"julö", 420 }, + { @"julo", 420 }, + { @"påsköns sommar", -300 }, + { @"paskons sommar", -300 }, + { @"påsköns normal", -360 }, + { @"paskons normal", -360 }, + { @"östafrikansk", 180 }, + { @"ostafrikansk", 180 }, + { @"equadoriansk", -300 }, + { @"östamerikansk sommar", -240 }, + { @"ostamerikansk sommar", -240 }, + { @"östeuropeisk sommar", 180 }, + { @"osteuropeisk sommar", 180 }, + { @"östeuropeisk", 120 }, + { @"osteuropeisk", 120 }, + { @"östgrönländsk sommar", 0 }, + { @"ostgronlandsk sommar", 0 }, + { @"östgrönländsk", -60 }, + { @"ostgronlandsk", -60 }, + { @"östamerikansk normal", -300 }, + { @"ostamerikansk normal", -300 }, + { @"ytterligare-östeuropeisk", 180 }, + { @"ytterligare-osteuropeisk", 180 }, + { @"fiji sommar", 780 }, + { @"fiji", 720 }, + { @"falklandsöarnas sommar", -180 }, + { @"falklandsoarnas sommar", -180 }, + { @"fernando de noronha", -120 }, + { @"galápagos", -360 }, + { @"gambiers", -540 }, + { @"georgisk normal", 240 }, + { @"franska guyana", -180 }, + { @"gilbertöarnas", 720 }, + { @"gilbertoarnas", 720 }, + { @"greenwich", 0 }, + { @"gulf normal", 240 }, + { @"guyana", -240 }, + { @"hawaiisk sommar", -540 }, + { @"hongkong", 480 }, + { @"hovd", 420 }, + { @"hawaiisk normal", -600 }, + { @"indokinesisk normal", 420 }, + { @"israelisk sommar", 180 }, + { @"iransk sommar", 270 }, + { @"irkutsk", 480 }, + { @"iransk normal", 210 }, + { @"irländsk normal", 60 }, + { @"irlandsk normal", 60 }, + { @"indisk normal", 330 }, + { @"israelisk normal", 120 }, + { @"japansk normal", 540 }, + { @"kirgizistansk", 360 }, + { @"kosrae", 660 }, + { @"krasnoyarsk", 420 }, + { @"koreansk normal", 540 }, + { @"lord howe sommar", 660 }, + { @"lord howe normal", 630 }, + { @"linjeöarnas", 840 }, + { @"linjeoarnas", 840 }, + { @"magadan", 660 }, + { @"marquesas", -10000 }, + { @"amerikansk-mountain sommar", -360 }, + { @"marshallöarnas", 720 }, + { @"marshalloarnas", 720 }, + { @"burmesisk", 390 }, + { @"moskva normal", 180 }, + { @"amerikansk-mountain normal", -420 }, + { @"mauritisk", 240 }, + { @"maldivisk", 300 }, + { @"malaysisk", 480 }, + { @"nykaledonsk", 660 }, + { @"newfoundland sommar", -150 }, + { @"norfolk", 660 }, + { @"nepalesisk", 345 }, + { @"nauru", 720 }, + { @"newfoundland normal", -210 }, + { @"niue", -660 }, + { @"nyzeeländsk sommar", 780 }, + { @"nyzeelandsk sommar", 780 }, + { @"nyzeeländsk normal", 720 }, + { @"nyzeelandsk normal", 720 }, + { @"omsk normal", 360 }, + { @"oral", 300 }, + { @"amerikansk-stillahavs sommar", -420 }, + { @"peruansk", -300 }, + { @"kamchatka", 720 }, + { @"papua nya guineansk", 600 }, + { @"phoenixöarnas", 780 }, + { @"phoenixoarnas", 780 }, + { @"filippinsk", 480 }, + { @"pakistansk normal", 300 }, + { @"saint-pierre och miquelon sommar", -120 }, + { @"saint-pierre och miquelon normal", -180 }, + { @"pohnpei normal", 660 }, + { @"pitcairnöarnas normal", -480 }, + { @"pitcairnoarnas normal", -480 }, + { @"amerikansk-stillahavs normal", -480 }, + { @"palauisk", 540 }, + { @"paraguayansk sommar", -180 }, + { @"pyongyang", 510 }, + { @"paraguayansk", -240 }, + { @"qyzylorda", 360 }, + { @"réunion", 240 }, + { @"reunion", 240 }, + { @"sachalin", 660 }, + { @"samara", 240 }, + { @"sydafrikansk normal", 120 }, + { @"solomonöarnas", 660 }, + { @"solomonoarnas", 660 }, + { @"seychellisk", 240 }, + { @"singaporiansk", 480 }, + { @"surinamesisk", -180 }, + { @"samoansk normal", -660 }, + { @"tahitisk", -600 }, + { @"tadzjikisk", 300 }, + { @"östtimors", 540 }, + { @"osttimors", 540 }, + { @"turkmenistansk", 300 }, + { @"tonga", 780 }, + { @"turkisk", 180 }, + { @"tuvalu", 720 }, + { @"ulaanbaatar sommar", 540 }, + { @"ulaanbaatar", 480 }, + { @"uruguayansk sommar", -120 }, + { @"uruguayansk", -180 }, + { @"uzbekistansk", 300 }, + { @"venezuelansk normal", -240 }, + { @"vladivostok", 600 }, + { @"vanuatuansk", 660 }, + { @"västafrikansk sommar", 120 }, + { @"vastafrikansk sommar", 120 }, + { @"västafrikansk", 60 }, + { @"vastafrikansk", 60 }, + { @"västeuropeisk sommar", 60 }, + { @"vasteuropeisk sommar", 60 }, + { @"västeuropeisk", 0 }, + { @"vasteuropeisk", 0 }, + { @"wallis och futuna", 720 }, + { @"västgrönländsk sommar", -120 }, + { @"vastgronlandsk sommar", -120 }, + { @"västgrönländsk", -180 }, + { @"vastgronlandsk", -180 }, + { @"västindonesisk", 420 }, + { @"vastindonesisk", 420 }, + { @"östindonesisk", 540 }, + { @"ostindonesisk", 540 }, + { @"centralindonesisk", 480 }, + { @"västsahara sommar", 60 }, + { @"vastsahara sommar", 60 }, + { @"västsamoansk", 780 }, + { @"vastsamoansk", 780 }, + { @"västsahara normal", 0 }, + { @"vastsahara normal", 0 }, + { @"jakutsk", 540 }, + { @"yekaterinburg", 300 }, + { @"beijing", 480 }, + { @"shanghai", 480 }, + { @"shenzhen", 480 }, + { @"suzhou", 480 }, + { @"tianjian", 480 }, + { @"chengdu", 480 }, + { @"guangzhou", 480 }, + { @"wuxi", 480 }, + { @"xiamen", 480 }, + { @"chongqing", 480 }, + { @"shenyang", 480 }, + { @"china", 480 }, + { @"redmond", -480 }, + { @"seattle", -480 }, + { @"bellevue", -480 }, + { @"afghanistan standard", 270 }, + { @"alaskan standard", -540 }, + { @"anywhere on earth", -720 }, + { @"arab standard", 180 }, + { @"arabian standard", 180 }, + { @"arabic standard", 180 }, + { @"argentina standard", -180 }, + { @"atlantic standard", -240 }, + { @"aus central standard", 570 }, + { @"aus eastern standard", 600 }, + { @"australian eastern", 600 }, + { @"australian eastern standard", 600 }, + { @"australian central daylight", 630 }, + { @"australian eastern daylight", 660 }, + { @"azerbaijan standard", 240 }, + { @"azores standard", -60 }, + { @"bahia standard", -180 }, + { @"bangladesh standard", 360 }, + { @"belarus standard", 180 }, + { @"canada central standard", -360 }, + { @"cape verde standard", -60 }, + { @"caucasus standard", 240 }, + { @"cen. australia standard", 570 }, + { @"central australia standard", 570 }, + { @"central america standard", -360 }, + { @"central asia standard", 360 }, + { @"central brazilian standard", -240 }, + { @"central", -360 }, + { @"central daylight", -10000 }, + { @"central daylight saving", -10000 }, + { @"central europe", 60 }, + { @"central european", 60 }, + { @"central europe std", 60 }, + { @"central european std", 60 }, + { @"central europe standard", 60 }, + { @"central european standard", 60 }, + { @"central europe summer", 120 }, + { @"central european summer", 120 }, + { @"central pacific standard", 660 }, + { @"central standard time (mexico)", -360 }, + { @"central standard", -360 }, + { @"china standard", 480 }, + { @"dateline standard", -720 }, + { @"e. africa standard", 180 }, + { @"e. australia standard", 600 }, + { @"e. europe standard", 120 }, + { @"e. south america standard", -180 }, + { @"europe central", 60 }, + { @"european central", 60 }, + { @"eastern", -300 }, + { @"eastern daylight", -10000 }, + { @"eastern daylight saving", -10000 }, + { @"eastern standard time (mexico)", -300 }, + { @"eastern standard", -300 }, + { @"egypt standard", 120 }, + { @"ekaterinburg standard", 300 }, + { @"fiji standard", 720 }, + { @"fle standard", 120 }, + { @"georgian standard", 240 }, + { @"gmt standard", 0 }, + { @"greenland standard", -180 }, + { @"greenwich standard", 0 }, + { @"gtb standard", 120 }, + { @"hawaiian standard", -600 }, + { @"india standard", 330 }, + { @"iran standard", 210 }, + { @"israel standard", 120 }, + { @"jordan standard", 120 }, + { @"kaliningrad standard", 120 }, + { @"kamchatka standard", 720 }, + { @"korea standard", 540 }, + { @"libya standard", 120 }, + { @"line islands standard", 840 }, + { @"magadan standard", 660 }, + { @"mauritius standard", 240 }, + { @"mid-atlantic standard", -120 }, + { @"middle east standard", 120 }, + { @"montevideo standard", -180 }, + { @"morocco standard", 0 }, + { @"mountain", -420 }, + { @"mountain daylight", -360 }, + { @"mountain daylight saving", -360 }, + { @"mountain standard", -420 }, + { @"mountain standard time (mexico)", -420 }, + { @"myanmar standard", 390 }, + { @"n. central asia standard", 420 }, + { @"namibia standard", 60 }, + { @"nepal standard", 345 }, + { @"new zealand standard", 720 }, + { @"newfoundland standard", -210 }, + { @"north asia east standard", 480 }, + { @"north asia standard", 420 }, + { @"north korea standard", 510 }, + { @"west coast", -420 }, + { @"pacific", -480 }, + { @"pacific daylight", -420 }, + { @"pacific daylight saving", -420 }, + { @"pacific standard", -480 }, + { @"pacific standard time (mexico)", -480 }, + { @"pacific sa standard", -240 }, + { @"pakistan standard", 300 }, + { @"paraguay standard", -240 }, + { @"romance standard", 60 }, + { @"russia time zone 1", 120 }, + { @"russia time zone 2", 180 }, + { @"russia time zone 3", 240 }, + { @"russia time zone 4", 300 }, + { @"russia time zone 5", 360 }, + { @"russia time zone 6", 420 }, + { @"russia time zone 7", 480 }, + { @"russia time zone 8", 540 }, + { @"russia time zone 9", 600 }, + { @"russia time zone 10", 660 }, + { @"russia time zone 11", 720 }, + { @"russian standard", 180 }, + { @"sa eastern standard", -180 }, + { @"sa pacific standard", -300 }, + { @"sa western standard", -240 }, + { @"samoa standard", -660 }, + { @"se asia standard", 420 }, + { @"singapore standard", 480 }, + { @"singapore", 480 }, + { @"south africa standard", 120 }, + { @"sri lanka standard", 330 }, + { @"syria standard", 120 }, + { @"taipei standard", 480 }, + { @"tasmania standard", 600 }, + { @"tokyo standard", 540 }, + { @"tonga standard", 780 }, + { @"turkey standard", 180 }, + { @"ulaanbaatar standard", 480 }, + { @"us eastern standard", -300 }, + { @"us mountain standard", -420 }, + { @"venezuela standard", -240 }, + { @"vladivostok standard", 600 }, + { @"w. australia standard", 480 }, + { @"w. central africa standard", 60 }, + { @"w. europe standard", 0 }, + { @"western european", 0 }, + { @"west europe standard", 0 }, + { @"west europe std", 0 }, + { @"western europe standard", 0 }, + { @"western europe summer", 60 }, + { @"w. europe summer", 60 }, + { @"western european summer", 60 }, + { @"west europe summer", 60 }, + { @"west asia standard", 300 }, + { @"west pacific standard", 600 }, + { @"yakutsk standard", 540 }, + { @"australian western daylight", 540 }, + { @"australian west daylight", 540 }, + { @"austrialian western daylight", 540 }, + { @"austrialian west daylight", 540 }, + { @"australian western daylight saving", 540 }, + { @"australian west daylight saving", 540 }, + { @"austrialian western daylight saving", 540 }, + { @"austrialian west daylight saving", 540 }, + { @"colombia", -300 }, + { @"hong kong", 480 }, + { @"madrid", 60 }, + { @"bilbao", 60 }, + { @"seville", 60 }, + { @"valencia", 60 }, + { @"malaga", 60 }, + { @"las Palmas", 60 }, + { @"zaragoza", 60 }, + { @"alicante", 60 }, + { @"alche", 60 }, + { @"oviedo", 60 }, + { @"gijón", 60 }, + { @"avilés", 60 } + }; + public static readonly IList MajorLocations = new List + { + @"Dominican Republic", + @"Dominikanska republiken", + @"Dominica", + @"Guinea Bissau", + @"Guinea-Bissau", + @"Guinea", + @"Equatorial Guinea", + @"Ekvatorialguinea", + @"Papua New Guinea", + @"Papua Nya Guinea", + @"New York City", + @"New York", + @"York", + @"Mexico City", + @"New Mexico", + @"Mexico", + @"Aberdeen", + @"Adelaide", + @"Anaheim", + @"Atlanta", + @"Auckland", + @"Austin", + @"Bangkok", + @"Baltimore", + @"Baton Rouge", + @"Beijing", + @"Belfast", + @"Birmingham", + @"Bolton", + @"Boston", + @"Bournemouth", + @"Bradford", + @"Brisbane", + @"Bristol", + @"Calgary", + @"Canberra", + @"Cardiff", + @"Charlotte", + @"Chicago", + @"Christchurch", + @"Colchester", + @"Colorado Springs", + @"Coventry", + @"Dallas", + @"Denver", + @"Derby", + @"Detroit", + @"Dubai", + @"Dublin", + @"Dudley", + @"Dunedin", + @"Edinburgh", + @"Edmonton", + @"El Paso", + @"Glasgow", + @"Gold Coast", + @"Guldkusten", + @"Hamilton", + @"Hialeah", + @"Houston", + @"Ipswich", + @"Jacksonville", + @"Jersey City", + @"Kansas City", + @"Kingston-upon-Hull", + @"Leeds", + @"Leicester", + @"Lexington", + @"Lincoln", + @"Liverpool", + @"London", + @"Long Beach", + @"Los Angeles", + @"Louisville", + @"Lubbock", + @"Luton", + @"Madison", + @"Manchester", + @"Mansfield", + @"Melbourne", + @"Memphis", + @"Mesa", + @"Miami", + @"Middlesbrough", + @"Milan", + @"Milano", + @"Milton Keynes", + @"Minneapolis", + @"Montréal", + @"Montreal", + @"Nashville", + @"New Orleans", + @"Newark", + @"Newcastle-upon-Tyne", + @"Newcastle", + @"Northampton", + @"Norwich", + @"Nottingham", + @"Oklahoma City", + @"Oldham", + @"Omaha", + @"Orlando", + @"Ottawa", + @"Perth", + @"Peterborough", + @"Philadelphia", + @"Phoenix", + @"Plymouth", + @"Portland", + @"Portsmouth", + @"Preston", + @"Québec City", + @"Quebec City", + @"Québec", + @"Quebec", + @"Raleigh", + @"Reading", + @"Redmond", + @"Richmond", + @"Rome", + @"Rom", + @"San Antonio", + @"San Diego", + @"San Francisco", + @"San José", + @"Santa Ana", + @"Seattle", + @"Sheffield", + @"Southampton", + @"Southend-on-Sea", + @"Spokane", + @"St Louis", + @"St Paul", + @"St Petersburg", + @"St. Louis", + @"St. Paul", + @"St. Petersburg", + @"Stockton-on-Tees", + @"Stockton", + @"Stoke-on-Trent", + @"Sunderland", + @"Swansea", + @"Swindon", + @"Sydney", + @"Tampa", + @"Tauranga", + @"Telford", + @"Toronto", + @"Vancouver", + @"Virginia Beach", + @"Walsall", + @"Warrington", + @"Washington", + @"Wellington", + @"Wolverhampton", + @"Abilene", + @"Akron", + @"Albuquerque", + @"Alexandria", + @"Allentown", + @"Amarillo", + @"Anchorage", + @"Ann Arbor", + @"Antioch", + @"Arlington", + @"Arvada", + @"Athens", + @"Athen", + @"Augusta", + @"Aurora", + @"Bakersfield", + @"Beaumont", + @"Bellevue", + @"Berkeley", + @"Billings", + @"Boise", + @"Boulder", + @"Bridgeport", + @"Broken Arrow", + @"Brownsville", + @"Buffalo", + @"Burbank", + @"Cambridge", + @"Cape Coral", + @"Carlsbad", + @"Carrollton", + @"Cary", + @"Cedar Rapids", + @"Centennial", + @"Chandler", + @"Charleston", + @"Chattanooga", + @"Chengdu", + @"Chesapeake", + @"Chongqing", + @"Chula Vista", + @"Cincinnati", + @"Clarksville", + @"Clearwater", + @"Cleveland", + @"Clovis", + @"College Station", + @"Columbia", + @"Columbus", + @"Concord", + @"Coral Springs", + @"Corona", + @"Costa Mesa", + @"Daly City", + @"Davenport", + @"Dayton", + @"Denton", + @"Des Moines", + @"Downey", + @"Durham", + @"Edison", + @"El Cajon", + @"El Monte", + @"Elgin", + @"Elizabeth", + @"Elk Grove", + @"Erie", + @"Escondido", + @"Eugene", + @"Evansville", + @"Everett", + @"Fairfield", + @"Fargo", + @"Farmington Hills", + @"Fayetteville", + @"Fontana", + @"Fort Collins", + @"Fort Lauderdale", + @"Fort Wayne", + @"Fort Worth", + @"Fremont", + @"Fresno", + @"Frisco", + @"Fullerton", + @"Gainesville", + @"Garden Grove", + @"Garland", + @"Gilbert", + @"Glendale", + @"Grand Prairie", + @"Grand Rapids", + @"Green Bay", + @"Greensboro", + @"Gresham", + @"Guangzhou", + @"Hampton", + @"Hartford", + @"Hayward", + @"Henderson", + @"High Point", + @"Hollywood", + @"Honolulu", + @"Huntington Beach", + @"Huntsville", + @"Independence", + @"Indianapolis", + @"Inglewood", + @"Irvine", + @"Irving", + @"Jackson", + @"Joliet", + @"Kent", + @"Killeen", + @"Knoxville", + @"Lafayette", + @"Lakeland", + @"Lakewood", + @"Lancaster", + @"Lansing", + @"Laredo", + @"Las Cruces", + @"Las Vegas", + @"Lewisville", + @"Little Rock", + @"Lowell", + @"Macon", + @"McAllen", + @"McKinney", + @"Mesquite", + @"Miami Gardens", + @"Midland", + @"Milwaukee", + @"Miramar", + @"Mobile", + @"Modesto", + @"Montgomery", + @"Moreno Valley", + @"Murfreesboro", + @"Murrieta", + @"Naperville", + @"New Haven", + @"Newport News", + @"Norfolk", + @"Norman", + @"North Charleston", + @"North Las Vegas", + @"Norwalk", + @"Oakland", + @"Oceanside", + @"Odessa", + @"Olathe", + @"Ontario", + @"Orange", + @"Overland Park", + @"Oxnard", + @"Palm Bay", + @"Palmdale", + @"Pasadena", + @"Paterson", + @"Pearland", + @"Pembroke Pines", + @"Peoria", + @"Pittsburgh", + @"Plano", + @"Pomona", + @"Pompano Beach", + @"Providence", + @"Provo", + @"Pueblo", + @"Rancho Cucamonga", + @"Reno", + @"Rialto", + @"Richardson", + @"Riverside", + @"Rochester", + @"Rockford", + @"Roseville", + @"Round Rock", + @"Sacramento", + @"Saint Paul", + @"Salem", + @"Salinas", + @"Salt Lake City", + @"San Bernardino", + @"San Jose", + @"San Mateo", + @"Sandy Springs", + @"Santa Clara", + @"Santa Clarita", + @"Santa Maria", + @"Santa Rosa", + @"Savannah", + @"Scottsdale", + @"Shanghai", + @"Shenyang", + @"Shenzhen", + @"Shreveport", + @"Simi Valley", + @"Sioux Falls", + @"South Bend", + @"Springfield", + @"Stamford", + @"Sterling Heights", + @"Sunnyvale", + @"Surprise", + @"Suzhou", + @"Syracuse", + @"Tacoma", + @"Tallahassee", + @"Temecula", + @"Tempe", + @"Thornton", + @"Thousand Oaks", + @"Tianjing", + @"Toledo", + @"Topeka", + @"Torrance", + @"Tucson", + @"Tulsa", + @"Tyler", + @"Vallejo", + @"Ventura", + @"Victorville", + @"Visalia", + @"Waco", + @"Warren", + @"Waterbury", + @"West Covina", + @"West Jordan", + @"West Palm Beach", + @"West Valley City", + @"Westminster", + @"Wichita", + @"Wichita Falls", + @"Wilmington", + @"Winston-Salem", + @"Worcester", + @"Wuxi", + @"Xiamen", + @"Yonkers", + @"Bentonville", + @"Afghanistan", + @"AK", + @"AL", + @"Alabama", + @"Åland", + @"Åland Islands", + @"Alaska", + @"Albania", + @"Albansk", + @"Algeria", + @"American Samoa", + @"Andorra", + @"Angola", + @"Anguilla", + @"Antarctica", + @"Antigua and Barbuda", + @"AR", + @"Argentina", + @"Arizona", + @"Arkansas", + @"Armenia", + @"Aruba", + @"Australia", + @"Austria", + @"AZ", + @"Azerbaijan", + @"Bahamas", + @"Bahrain", + @"Bangladesh", + @"Barbados", + @"Belarus", + @"Belgium", + @"Belize", + @"Benin", + @"Bermuda", + @"Bhutan", + @"Bolivia", + @"Bonaire", + @"Bosnia", + @"Bosnia and Herzegovina", + @"Botswana", + @"Bouvet Island", + @"Brazil", + @"British Indian Ocean Territory", + @"British Virgin Islands", + @"Brunei", + @"Bulgaria", + @"Burkina Faso", + @"Burundi", + @"CA", + @"Cabo Verde", + @"California", + @"Cambodia", + @"Cameroon", + @"Canada", + @"Cayman Islands", + @"Central African Republic", + @"Chad", + @"Chile", + @"China", + @"Christmas Island", + @"CO", + @"Cocos Islands", + @"Colombia", + @"Colorado", + @"Comoros", + @"Congo", + @"Congo (DRC)", + @"Connecticut", + @"Cook Islands", + @"Costa Rica", + @"Côte d’Ivoire", + @"Croatia", + @"CT", + @"Cuba", + @"Curaçao", + @"Cyprus", + @"Czechia", + @"DE", + @"Delaware", + @"Denmark", + @"Djibouti", + @"Ecuador", + @"Egypt", + @"El Salvador", + @"Eritrea", + @"Estonia", + @"eSwatini", + @"Ethiopia", + @"Falkland Islands", + @"Falklands", + @"Faroe Islands", + @"Fiji", + @"Finland", + @"FL", + @"Florida", + @"France", + @"French Guiana", + @"French Polynesia", + @"French Southern Territories", + @"FYROM", + @"GA", + @"Gabon", + @"Gambia", + @"Georgia", + @"Georgia", + @"Germany", + @"Ghana", + @"Gibraltar", + @"Greece", + @"Greenland", + @"Grenada", + @"Guadeloupe", + @"Guam", + @"Guatemala", + @"Guernsey", + @"Guyana", + @"Haiti", + @"Hawaii", + @"Herzegovina", + @"HI", + @"Honduras", + @"Hong Kong", + @"Hungary", + @"IA", + @"Iceland", + @"ID", + @"Idaho", + @"IL", + @"Illinois", + @"IN", + @"India", + @"Indiana", + @"Indonesia", + @"Iowa", + @"Iran", + @"Iraq", + @"Ireland", + @"Isle of Man", + @"Israel", + @"Italy", + @"Ivory Coast", + @"Jamaica", + @"Jan Mayen", + @"Japan", + @"Jersey", + @"Jordan", + @"Kansas", + @"Kazakhstan", + @"Keeling Islands", + @"Kentucky", + @"Kenya", + @"Kiribati", + @"Korea", + @"Kosovo", + @"KS", + @"Kuwait", + @"KY", + @"Kyrgyzstan", + @"LA", + @"Laos", + @"Latvia", + @"Lebanon", + @"Lesotho", + @"Liberia", + @"Libya", + @"Liechtenstein", + @"Lithuania", + @"Louisiana", + @"Luxembourg", + @"MA", + @"Macao", + @"Macedonia", + @"Madagascar", + @"Maine", + @"Malawi", + @"Malaysia", + @"Maldives", + @"Mali", + @"Malta", + @"Marshall Islands", + @"Martinique", + @"Maryland", + @"Massachusetts", + @"Mauritania", + @"Mauritius", + @"Mayotte", + @"MD", + @"ME", + @"MI", + @"Michigan", + @"Micronesia", + @"Minnesota", + @"Mississippi", + @"Missouri", + @"MN", + @"MO", + @"Moldova", + @"Monaco", + @"Mongolia", + @"Montana", + @"Montenegro", + @"Montserrat", + @"Morocco", + @"Mozambique", + @"MS", + @"MT", + @"Myanmar", + @"Namibia", + @"Nauru", + @"NC", + @"ND", + @"NE", + @"Nebraska", + @"Nepal", + @"Netherlands", + @"Nevada", + @"New Caledonia", + @"New Hampshire", + @"New Jersey", + @"New Zealand", + @"NH", + @"Nicaragua", + @"Niger", + @"Nigeria", + @"Niue", + @"NJ", + @"NM", + @"Norfolk Island", + @"North Carolina", + @"North Dakota", + @"North Korea", + @"Northern Mariana Islands", + @"Norway", + @"NV", + @"NY", + @"OH", + @"Ohio", + @"OK", + @"Oklahoma", + @"Oman", + @"OR", + @"Oregon", + @"PA", + @"Pakistan", + @"Palau", + @"Palestinian Authority", + @"Panama", + @"Paraguay", + @"Pennsylvania", + @"Peru", + @"Philippines", + @"Pitcairn Islands", + @"Poland", + @"Portugal", + @"Puerto Rico", + @"Qatar", + @"Réunion", + @"Rhode Island", + @"RI", + @"Romania", + @"Russia", + @"Rwanda", + @"Saba", + @"Saint Barthélemy", + @"Saint Kitts and Nevis", + @"Saint Lucia", + @"Saint Martin", + @"Saint Pierre and Miquelon", + @"Saint Vincent and the Grenadines", + @"Samoa", + @"San Marino", + @"São Tomé and Príncipe", + @"Saudi Arabia", + @"SC", + @"SD", + @"Senegal", + @"Serbia", + @"Seychelles", + @"Sierra Leone", + @"Singapore", + @"Sint Eustatius", + @"Sint Maarten", + @"Slovakia", + @"Slovenia", + @"Solomon Islands", + @"Somalia", + @"South Africa", + @"South Carolina", + @"South Dakota", + @"South Sudan", + @"Spain", + @"Sri Lanka", + @"Sudan", + @"Suriname", + @"Svalbard", + @"Swaziland", + @"Sweden", + @"Switzerland", + @"Syria", + @"Taiwan", + @"Tajikistan", + @"Tanzania", + @"Tennessee", + @"Texas", + @"Thailand", + @"Timor-Leste", + @"TN", + @"Togo", + @"Tokelau", + @"Tonga", + @"Trinidad and Tobago", + @"Tunisia", + @"Turkey", + @"Turkmenistan", + @"Turks and Caicos Islands", + @"Tuvalu", + @"TX", + @"U.S. Outlying Islands", + @"US Outlying Islands", + @"U.S. Virgin Islands", + @"US Virgin Islands", + @"Uganda", + @"UK", + @"Ukraine", + @"United Arab Emirates", + @"United Kingdom", + @"United States", + @"Uruguay", + @"US", + @"USA", + @"UT", + @"Utah", + @"Uzbekistan", + @"VA", + @"Vanuatu", + @"Vatican City", + @"Venezuela", + @"Vermont", + @"Vietnam", + @"Virginia", + @"VT", + @"WA", + @"Wallis and Futuna", + @"West Virginia", + @"WI", + @"Wisconsin", + @"WV", + @"WY", + @"Wyoming", + @"Yemen", + @"Zambia", + @"Zimbabwe", + @"Paris", + @"Tokyo", + @"Shanghai", + @"Sao Paulo", + @"Rio de Janeiro", + @"Rio", + @"Brasília", + @"Brasilia", + @"Recife", + @"Milan", + @"Mumbai", + @"Moscow", + @"Frankfurt", + @"Munich", + @"Berlim", + @"Madrid", + @"Lisbon", + @"Warsaw", + @"Johannesburg", + @"Seoul", + @"Istanbul", + @"Kuala Kumpur", + @"Jakarta", + @"Amsterdam", + @"Brussels", + @"Valencia", + @"Seville", + @"Bilbao", + @"Malaga", + @"Las Palmas", + @"Zaragoza", + @"Alicante", + @"Elche", + @"Oviedo", + @"Gijón", + @"Avilés", + @"West Coast", + @"Central", + @"Pacific", + @"Eastern", + @"Mountain" + }; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/TimeZoneDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/TimeZoneDefinitions.tt new file mode 100644 index 0000000000..dac607d0c5 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Swedish/TimeZoneDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Swedish\Swedish-TimeZone.yaml"; + this.Language = "Swedish"; + this.ClassName = "TimeZoneDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/ChoiceDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/ChoiceDefinitions.cs new file mode 100644 index 0000000000..4c1797efa2 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/ChoiceDefinitions.cs @@ -0,0 +1,30 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Turkish\Turkish-Choice.yaml +// - Language: Turkish +// - ClassName: ChoiceDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Turkish +{ + using System; + using System.Collections.Generic; + + public static class ChoiceDefinitions + { + public const string LangMarker = @"Tr"; + public const string TokenizerRegex = @"[^\w\d\u00E0-\u00FC]"; + public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)"; + public static readonly string TrueRegex = $@"\b(do[gğ]ru|evet|d|e|tabii?|tamam|kat[ıi]l[ıi]yorum)\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?"; + public static readonly string FalseRegex = $@"\b(yanl[ıi][sş]|hay[ıi]r|h|y|(do[gğ]ru|tamam)\s+de[gğ]il|kat[ıi]lm[ıi]yorum)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/ChoiceDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/ChoiceDefinitions.tt new file mode 100644 index 0000000000..15ca163745 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/ChoiceDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Turkish\Turkish-Choice.yaml"; + this.Language = "Turkish"; + this.ClassName = "ChoiceDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/DateTimeDefinitions.cs index d08a8c5406..7f7eb3c4f5 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/DateTimeDefinitions.cs @@ -21,22 +21,25 @@ namespace Microsoft.Recognizers.Definitions.Turkish public static class DateTimeDefinitions { + public const string LangMarker = @"Tur"; public const bool CheckBothBeforeAfter = true; - public const string TillRegex = @"(?\b(kadar|dek|değin)\b|-|—|——|–)"; - public const string RangeConnectorRegex = @"(?(ile|ila|yle|'le|'la|'?tan|'?ten|(günün|'|n)?den|'?dan|ve)\b|(-|—|——|–))"; - public const string RelativeRegex = @"(?ertesi|(bir\s+)?sonraki|gelecek|bu|geçen|son|geçtiğimiz|(bir\s+)?önceki|evvelki|önümüzdeki|o(?=\s+gün))"; - public const string StrictRelativeRegex = @"(?ertesi|(bir\s+)?sonraki|gelecek|bu|geçen|son|geçtiğimiz|(bir\s+)?önceki|evvelki|önümüzdeki)"; + public static readonly string TillRegex = $@"(?\b(kadar|dek\b|değin)|{BaseDateTime.RangeConnectorSymbolRegex})"; + public static readonly string TillConnectorRegex = $@"(?('?tan|'?ten|'?den|'?dan|ile)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; + public static readonly string RangeConnectorRegex = $@"(?(ile|ila|yle|'l[ea]|(?ertesi|(bir\s+)?(sonraki|dahaki)|gelecek|bu|geçen|geçmiş|son|geçtiğimiz|(?ertesi|(bir\s+)?(sonraki|dahaki)|gelecek|bu|son|geçen|geçmiş|geçtiğimiz|(bir\s+)?önceki|evvelki|önümüzdeki)"; public const string UpcomingPrefixRegex = @"((bu\s+)?(yaklaşan))"; - public const string NextPrefixRegex = @"\b((bir\s+)sonraki|(?günden|haftadan|hafta\s+sonundan|aydan|yıldan|seneden)\s+sonra(ki\s+(gün|hafta(\s+sonu)?|ay|yıl|sene))?)\b"; - public const string PastPrefixRegex = @"(son|geçtiğimiz)\b"; - public static readonly string PreviousPrefixRegex = $@"(geçen|bir\s+önceki|önceki|evvelki|{PastPrefixRegex})\b"; + public static readonly string NextPrefixRegex = $@"\b(((bir\s+)?(sonraki|dahaki)|(?günden|haftadan|hafta\s+sonundan|aydan|yıldan|seneden)\s+sonra((ki\s+(?hafta(\s+sonu|ya\s+ait|nın)?|gün|ay|yıl|sene))((?=daki\b)|\b)|\b))"; + public const string PastPrefixRegex = @"(son|geçen|geçtiğimiz|geçmiş)\b"; + public static readonly string PreviousPrefixRegex = $@"(geçen|bir\s+önceki|(?(10|20|30|31|(1|2)[1-9]|0?[1-9])('i|'si|'sı|'ü|'u)?)"; + public static readonly string FutureSuffixRegex = $@"\b({NextPrefixRegex}|ileride|gelecekte)\b"; + public const string PastSuffixRegex = @"^\b$"; + public const string DayRegex = @"((?10|20|30|31|(1|2)[1-9]|0?[1-9])('i|'si|'sı|'ü|'u)?)"; public const string ImplicitDayRegex = @"(?(10|20|30|31|(1|2)[1-9])('i|'si|'sı|'ü|'u))(?=\b)"; public const string DayFromSuffixRegex = @"(?(1|5|8|11|15|18|21|25|28|31)'inden|(2|7|12|17|20|22|27)'sinden|(3|4|13|14|23|24)'ünden|(6|16|26)'sından|(9|10|19|29|30)'undan)"; public const string DayToSuffixRegex = @"(?(1|5|8|11|15|18|21|25|28|31)'ine|(2|7|12|17|20|22|27)'sine|(3|4|13|14|23|24)'üne|(6|16|26)'sına|(9|10|19|29|30)'una)"; @@ -52,28 +55,28 @@ public static class DateTimeDefinitions public static readonly string LastTwoYearNumRegex = $@"({WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s+{WrittenOneToNineRegex})?)"; public static readonly string FullTextYearRegex = $@"\b(((?{CenturyRegex})\s*)(?{LastTwoYearNumRegex})|(?{WrittenCenturyFullYearRegex}|(iki\s*)?bin))"; public static readonly string FullTextAtYearRegex = $@"\b((({WrittenCenturyFullYearRegex}|(iki\s+)?bin)\s+)?(((on|yirmi|otuz|kırk|elli|altmış|yetmiş|seksen|doksan)\s+)?(birde|ikide|üçte|dörtte|beşte|altıda|yedide|sekizde|dokuzda)|onda|yirmide|otuzda|kırkta|ellide|altmışta|yetmişte|seksende|doksanda)|{WrittenCenturyFullYearRegex}de|(iki\s)?binde)\b"; - public const string OclockRegex = @"(?saat|saat tam)"; + public const string OclockRegex = @"(?saat(\s+tam)?)"; public const string SpecialDescRegex = @"((?)p\b)"; public static readonly string AmDescRegex = $@"({BaseDateTime.BaseAmDescRegex})"; public static readonly string PmDescRegex = $@"({BaseDateTime.BasePmDescRegex})"; public static readonly string AmPmDescRegex = $@"({BaseDateTime.BaseAmPmDescRegex})"; public static readonly string DescRegex = $@"((({OclockRegex}\s+)?(?({AmPmDescRegex}|{AmDescRegex}|{PmDescRegex}|{SpecialDescRegex})))|{OclockRegex})"; - public const string TwoDigitYearRegex = @"\b(?([0-27-9]\d))\b"; + public static readonly string TwoDigitYearRegex = $@"\b(?([0-9]\d))(?!(\s*((\:\d)|{AmDescRegex}|{PmDescRegex}|\.\d)))\b"; public static readonly string YearRegex = $@"({BaseDateTime.FourDigitYearRegex}('a\b)?|{FullTextYearRegex})"; public const string WeekDayRegex = @"\b(?pazartesi(leri|si)?|salı(ları|sı)?|çarşamba(ları|sı)?|perşembe(leri|si)?|cuma(ları|sı)?|cumartesi(leri|si)?|pazar(ları|ı)?|pzt|sal|çrş|per|cum|cmt|paz)(\s+günü)?\b"; public const string SingleWeekDayRegex = @"\b(?pazartesi|salı|çarşamba|perşembe|cuma|cumartesi|pazar|pzt|sal|çrş|per|cum|cmt|paz)\b"; - public static readonly string MonthRegex = $@"(?ocak|şubat|mart|nisan|mayıs|haziran|temmuz|ağustos|eylül|ekim|kasım|(?ocak|şubat|mart|nisan|mayıs|haziran|temmuz|ağustos|eylül|ekim|kasım|(?{RelativeRegex}\s+(ayın|ay))\b"; - public static readonly string WrittenMonthRegex = $@"(?ocak|şubat|mart|nisan|mayıs|haziran|temmuz|ağustos|eylül|ekim|kasım|(?ocak|şubat|mart|nisan|mayıs|haziran|temmuz|ağustos|eylül|ekim|kasım|(?((?(ocak|şubat|mart|mayıs|ağustos|aralık)('ta)|(nisan|haziran|temmuz|kasım)('da)|(eylül|ekim)('de))"; - public static readonly string MonthPossessiveSuffixRegex = $@"((?(ocak|şubat|mart|nisan|mayıs|haziran|kasım|aralık)('ın)|(temmuz|ağustos)('un)|eylül'ün|ekim'in)|{MonthRegex}(\s+ayının)|{RelativeMonthRegex})"; + public static readonly string MonthPossessiveSuffixRegex = $@"((?(ocak|şubat|mart|nisan|mayıs|haziran|kasım|aralık)('ın)|(temmuz|ağustos)('un)|eylül'ün|ekim'in)|{MonthRegex}(\s+ayının)|(?{RelativeRegex}\s+(ayın))\b)"; public const string MonthToSuffixRegex = @"(?((ocak|şubat|mart|nisan|mayıs|haziran|temmuz|ağustos|kasım|aralık)'a|(eylül|ekim)'e))"; - public const string DateUnitRegex = @"\b(?(?yıl|sene)|(?ay)|(?hafta)|(?iş\s+)günü|(?gün))(?!(den|dan)(?!\s+(az|fazla)))"; + public const string DateUnitRegex = @"\b(?(?yıl|sene)|(?ayın\b|ay(?=lığına|ı|da|\b))|(?hafta)|(?iş\s+)günü|(?gün))(?!(den|dan)(?!\s+(az|fazla)))"; public const string DateTokenPrefix = @""; public const string TimeTokenPrefix = @""; public const string TokenBeforeDate = @""; - public const string TokenListBeforeDate = @"'de|'da|'te|'ta"; + public const string TokenListBeforeDate = @"'de|'da|'te|'ta|nda|nde"; public const string TokenBeforeTime = @""; public const string HalfTokenRegex = @"\b(buçuk|buçuğa)$"; public const string QuarterTokenRegex = @"\b(çeyrek|çeyreği)\b"; @@ -82,11 +85,13 @@ public static class DateTimeDefinitions public static readonly string MonthFrontSimpleCasesRegex = $@"({YearRegex}\s+)?({MonthPossessiveSuffixRegex}\s+)({DayFromSuffixRegex})\s*({DayToSuffixRegex})(\s+{TillRegex})?"; public static readonly string MonthFrontBetweenRegex = $@"\b({WrittenMonthRegex}\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?(\s*{RangePrefixRegex})?\b"; public static readonly string BetweenRegex = $@"\b(({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+({WrittenMonthRegex}|{RelativeMonthRegex})((\s+|\s*,\s*){YearRegex})?(\s*{RangePrefixRegex})?|({MonthPossessiveSuffixRegex},?\s+)({YearRegex}\s+)?)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})(\s*{RangePrefixRegex})?\b"; - public static readonly string MonthWithYear = $@"\b(({WrittenMonthRegex}(\s+|,?\s*){YearRegex}(?!den|dan))|({RelativeRegex}\s+(yıl|sene)|({RelativeRegex}\s+)?{YearRegex}(in|nin|ün|nın|un)?|seneye)\s+{WrittenMonthRegex}(\s+ayı(?=nda)|(?='(ta|da|de)\b)|\b))"; - public static readonly string OneWordPeriodRegex = $@"\b({AfterNextSuffixRegex}|({RelativeRegex}\s+)?{WrittenMonthRegex}(\s+ayı(?=nda)|\b)|({NextPrefixRegex}\s+)?{WrittenMonthRegex}|(ayın|sene) başından beri|((({RelativeRegex}\s+)(benim\s+)?|benim\s+|(?([1258]|10|11|12)('inci|\.)|[34]('üncü|\.)|6('ıncı|\.)|(7|12)('nci|\.)|(9|10)('uncu|\.))(\s+ayı|\b)))"; - public static readonly string WeekOfMonthRegex = $@"\b(?({MonthPossessiveSuffixRegex}|({BaseDateTime.FourDigitYearRegex}\s+yılı\s+|{RelativeRegex}\s+yılın\s+)?{WrittenMonthRegex}\s+ayının)\s+(?ilk|birinci|1(.|'inci)|ikinci|2(.|'inci)|üçüncü|3(.|'inci)|dördüncü|4(.|'inci)|beşinci|5(.|'inci)|son)\s+haftası)\b"; + public static readonly string WeekOfMonthRegex = $@"\b(?({MonthPossessiveSuffixRegex}|{WrittenMonthRegex}(\s+{BaseDateTime.FourDigitYearRegex}('(in|nin|ün|nın|un)))|({BaseDateTime.FourDigitYearRegex}\s+yılı\s+|{RelativeRegex}\s+yılın\s+)?{WrittenMonthRegex}\s+ayının)\s+(?ilk|birinci|1(.|'inci)|ikinci|2(.|'inci)|üçüncü|3(.|'üncü)|dördüncü|4(.|'üncü)|beşinci|5(.|'inci)|son)\s+haftası(na)?)\b"; public static readonly string WeekOfYearRegex = $@"\b(?({YearRegex}(\s+yılının|'?(nin|in|ün|nın|ın|un))|{RelativeRegex}\s+yıl(ın)?)\s+(?((on|yirmi|otuz|kırk|elli)\s+)?(birinci|ilk|ikinci|üçüncü|dördüncü|beşinci|altıncı|yedinci|sekizinci|dokuzuncu|onuncu|yirminci|otuzuncu|kırkıncı|ellinci)|elli birinci|elli ikinci|(1|2|3|4|5)?1(.|'inci)|(1|2|3|4|5)?2(.|'inci)|(1|2|3|4)?3(.|'üncü)|(1|2|3|4)?4(.|'üncü)|(1|2|3|4)?5(.|'inci)|(1|2|3|4)?6(.|'ıncı)|(1|2|3|4)?7(.|'nci)|(1|2|3|4)?8(.|'inci)|(1|2|3|4)?9(.|'uncu)|son)\s+hafta(sı)?)"; + public static readonly string OfYearRegex = $@"\b((of|in)\s+({YearRegex}|{StrictRelativeRegex}\s+year))\b"; + public const string FirstLastRegex = @"\b(the\s+)?((?first)|(?last))\b"; public static readonly string FollowedDateUnit = $@"^\s*{DateUnitRegex}"; public static readonly string NumberCombinedWithDateUnit = $@"\b(?\d+(\.)?){DateUnitRegex}"; public const string QuarterTermRegex = @"\b((?birinci|1'inci|1.|ilk|ikinci|2'nci|2.|üç|üçüncü|3'üncü|3.|dördüncü|4'üncü|4.|son)\s+(çeyreği|çeyrek))"; @@ -100,12 +105,12 @@ public static class DateTimeDefinitions public static readonly string HalfYearBackRegex = $@"({HalfYearTermRegex})(\s*,\s*)({YearRegex})"; public static readonly string HalfYearRelativeRegex = $@"((({RelativeRegex}\s+)?yılın|{YearRegex}\s+yılının)\s+{HalfYearTermRegex})(ı)"; public static readonly string AllHalfYearRegex = $@"({HalfYearFrontRegex})|({HalfYearBackRegex})|({HalfYearRelativeRegex})"; - public const string EarlyPrefixRegex = @"(?başına|(?başlarında|başında)|başı|başları|başlarına)"; - public const string MidPrefixRegex = @"(?ortasına|ortasında|ortası|ortaları|ortalarına|ortalarında)"; - public const string LaterPrefixRegex = @"(?sonuna(\s+doğru)?|(?sonunda)|sonu|sonları|sonlarına|sonlarında)"; + public const string EarlyPrefixRegex = @"(?erken|başından|başına|(?başlarında|başında|daha\s+erken|erkenden)|başı|başları|başlarına)"; + public const string MidPrefixRegex = @"(?(ortasına|ortasından?|ortası|ortaları|ortalarına|ortalarında))"; + public const string LaterPrefixRegex = @"(?sonundan|sonunda|sonuna(\s+doğru)?|(?daha\s+geç|(içinde\s+)?daha\s+sonra|içinde\s+sonra)|sonu|sonları|sonlarına|sonlarında)"; public static readonly string PrefixPeriodRegex = $@"({EarlyPrefixRegex}|{MidPrefixRegex}|{LaterPrefixRegex})"; - public const string PrefixDayRegex = @"\b(günün\s+({EarlyPrefixRegex}|{MidPrefixRegex}|{LaterPrefixRegex}))$"; - public const string SeasonDescRegex = @"(?(ilk)?baharı?|yaz(ın(?=(?!')\b)|ı(?=nda)?)?|sonbaharı?|kış(ın(?=(?!')\b)|ı(?=nda)?)?)"; + public static readonly string PrefixDayRegex = $@"\b((gün\s+)?{MidPrefixRegex}|(günün?\s+)?((?erken\s+saatler(in)?de)|(?(geç|ilerleyen)\s+saatler(in)?de)))"; + public const string SeasonDescRegex = @"(?(ilk)?baharı?|yaz(?!(dı|mış))(ın(?=(?!')\b)|ı(?=nda)?)?|(?<=((mevsim(lerden)?|bir)\s+))yaz(?=(dı|mış))|sonbaharı?|kış(ın(?=(?!')\b)|ı(?=nda)?)?)"; public static readonly string SeasonRegex = $@"\b(?({YearRegex}('nın)?\s+{SeasonDescRegex}(nın)?|({RelativeRegex}\s+)?{SeasonDescRegex}(ın)?)(\s+{PrefixPeriodRegex})?)"; public const string WhichWeekRegex = @"\b((?(?((on|yirmi|otuz|kırk|elli)\s+)?(birinci|ikinci|üçüncü|dördüncü|beşinci|altıncı|yedinci|sekizinci|dokuzuncu|onuncu|yirminci|otuzuncu|kırkıncı|ellinci)|elli birinci|elli ikinci))|((?(?(1|2|3|4|5)?(1|2|5|8)))(.|'inci)|(?(?(1|2|3|4|5)?(3|4)))(.|'üncü)|(?(?(1|2|3|4|5)?6))(.|'ıncı)|(?(?(1|2|3|4|5)?7))(.|'nci)|(?(?(1|2|3|4|5)?9))(.|'uncu)))\s+hafta(ya)?"; public const string WeekOfRegex = @"(haftası)"; @@ -117,38 +122,39 @@ public static class DateTimeDefinitions public const string PrefixWeekDayRegex = @"(\s*[-—–])"; public static readonly string ThisRegex = $@"\b(bu(\s+hafta)?\s+{WeekDayRegex})\b"; public static readonly string LastDateRegex = $@"\b({PreviousPrefixRegex}(\s+hafta)?\s+{WeekDayRegex})\b"; - public static readonly string NextDateRegex = $@"\b(({NextPrefixRegex}(\s+hafta)?|haftaya)\s+{WeekDayRegex})\b"; - public static readonly string SpecialDayRegex = $@"\b((dünden önceki|yarından\s+sonraki)\s+güne?|(önceki|sonraki)\s+gün|{RelativeRegex}\s+gün|(benim\s+)?günüm|dün(den)?|yarın(dan|a)?|bugün)"; - public static readonly string SpecialDayWithNumRegex = $@"\b((?dünden|yarından|bugünden|şu andan)\s+(itibaren\s+)?(?{WrittenNumRegex})\s+gün\s+(sonra|içinde))\b"; + public static readonly string NextDateRegex = $@"\b(({NextPrefixRegex}(\s+hafta(nın)?)?|haftaya)\s+{WeekDayRegex})\b"; + public static readonly string SpecialDayRegex = $@"\b((dünden\s+önceki|yarından\s+sonraki)\s+güne?|yarından\s+sonra(ki)?|(önceki|sonraki)\s+gün|{RelativeRegex}\s+gün|(benim\s+)?günüm|dün(den|kü)?|yarın(dan|a|ki)?|bugün(kü)?|^gün\b)"; + public static readonly string SpecialDayWithNumRegex = $@"\b((?dünden|yarından|bugünden|şu\s+andan)\s+(itibaren\s+)?(?{WrittenNumRegex}|\d+)\s+gün\s+(sonra|içinde))\b"; public static readonly string RelativeDayRegex = $@"\b({RelativeRegex}\s+gün)\b"; public const string SetWeekDayRegex = @"\b(?sabah|öğlen|akşam|gece|pazartesi|salı|çarşamba|perşembe|cuma|cumartesi|pazar)(?('|\s+gün)?l[ae]r)[ıi]\b"; public static readonly string WeekDayOfMonthRegex = $@"(?({MonthPossessiveSuffixRegex}|{WrittenMonthRegex}\s+ayının|{RelativeRegex}\s+ayın)\s+(?birinci|1'inci|1.|ilk|ikinci|2'nci|2.|üçüncü|3'üncü|3.|dördüncü|4'üncü|4.|beşinci|5'inci|5.|son)\s+{WeekDayRegex})"; public static readonly string RelativeWeekDayRegex = $@"\b((şu\s+andan\s+itibaren\s+)?{WrittenNumRegex}\s+{WeekDayRegex}\s+(sonra))\b"; public static readonly string SpecialDate = $@"\b{DayRegex}\s+günü\b"; - public const string DatePreposition = @"^[\*]"; - public static readonly string DateExtractorYearTermRegex = $@"(\s+|\s*,\s*){DateYearRegex}"; - public static readonly string DateExtractor1 = $@"\b(({DayRegex}\s+{MonthRegex}(\s+{DateYearRegex})?)((\s*\(\s*{SingleWeekDayRegex}\s*\))|\s*,?\s+{SingleWeekDayRegex}(\s+günü)?)?)"; + public const string DatePreposition = @"\b(tarihinde)"; + public static readonly string DateExtractorYearTermRegex = $@"(\s+|\s*[/\\.,\-]\s*){DateYearRegex}"; + public static readonly string DateExtractor1 = $@"\b(({DayRegex}\s*[/\\.,\-]?\s*{MonthRegex}(\s*[/\\.,\- ]\s*{DateYearRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[/\\.,\- ]\s*{DayRegex}\s*[/\\.,\- ]\s*{MonthRegex})((\s*\(\s*{SingleWeekDayRegex}\s*\))|\s*,?\s+{SingleWeekDayRegex}(\s+günü)?)?)"; public static readonly string DateExtractor3 = $@"\b(({RelativeRegex}\s+ayın\s+)?({OnRegex})(\s*\(\s*{SingleWeekDayRegex}\s*\))?)\b"; public static readonly string DateExtractor4 = $@"\b(({RelativeRegex}\s+)?({SingleWeekDayRegex}\s+(günü\s+)?)?(\({DayRegex}\s+{MonthRegex}(\s+{DateYearRegex})?\)|(\s*,\s*)?{DayRegex}\s+{MonthRegex}'(e|a)(?!\s+kadar)))\b"; - public static readonly string DateExtractor5 = $@"\b({DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex})((\s*\(\s*{SingleWeekDayRegex}\s*\))|\s*,?\s+{SingleWeekDayRegex}(\s+günü)?)?"; - public static readonly string DateExtractor6 = $@"({RelativeRegex}\s+)?({SingleWeekDayRegex}\s+)?(\({DayRegex}\s*[/\\.]\s*{MonthNumRegex}\s*[/\\.]\s*{DateYearRegex}\))\b"; + public static readonly string DateExtractor5 = $@"\b({DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex})((\s*\(\s*{SingleWeekDayRegex}\s*\))|\s*,?\s+{SingleWeekDayRegex}(\s+günü)?)?(?!\s*[/\\\-\.]\s*\d+)"; + public static readonly string DateExtractor6 = $@"({RelativeRegex}\s+)?({SingleWeekDayRegex}\s+)?(\({DayRegex}\s*[/\\.]\s*{MonthNumRegex}\s*[/\\.]\s*{DateYearRegex}\))\b(?!\s*[/\\\-\.]\s*\d+)"; public static readonly string DateExtractor7 = $@"\b(({DayRegex})\s+{ProperMonthSuffixRegex})\b"; - public const string DateExtractor8 = @"^[\*]"; + public static readonly string DateExtractor8 = $@"\b({WeekDayRegex}\s*[,-]?\s*)?(({MonthRegex}[\.]?\s*[/\\.,-]?\s*{DayRegex})|(\({MonthRegex}\s*[-./]\s*{DayRegex}\)))(\s*\(\s*{WeekDayRegex}\s*\))?({DateExtractorYearTermRegex})?\b"; public static readonly string DateExtractor9 = $@"\b({SingleWeekDayRegex}\s+{OnRegex})\b"; - public static readonly string DateExtractor7L = $@"\b({DayRegex}\s*[/]\s*{MonthNumRegex})"; - public const string DateExtractor7S = @"^[\*]"; + public static readonly string DateExtractor7L = $@"\b({WeekDayRegex}\s+)?({DayRegex}\s*[/]\s*{MonthNumRegex})(,?\s+{BaseDateTime.FourDigitYearRegex})?{BaseDateTime.CheckDecimalRegex}"; + public static readonly string DateExtractor7S = $@"\b(({DayRegex}\s*[/\\.,\-]?\s*{MonthRegex})((\s*\(\s*{SingleWeekDayRegex}\s*\))|\s*,?\s+{SingleWeekDayRegex}(\s+günü)?)?)"; public const string DateExtractor9L = @"^[\*]"; public const string DateExtractor9S = @"^[\*]"; - public const string DateExtractorA = @"^[\*]"; + public static readonly string DateExtractorA = $@"\b({WeekDayRegex}\s+)?(({BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DayRegex})|({MonthRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{DayRegex})|({DayRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{MonthRegex}))"; public static readonly string OfMonth = $@"^\s*{MonthRegex}"; public static readonly string MonthEnd = $@"{MonthPossessiveSuffixRegex}\s*$"; public static readonly string WeekDayEnd = $@"(bu\s+)?{WeekDayRegex}\s*,?\s*$"; public static readonly string WeekDayStart = $@"\s*,?\s*{WeekDayRegex}"; public const string RangeUnitRegex = @"\b(?hafta|ay|yıl)\b"; - public const string HourNumRegex = @"\b(saat\s*)?(?on\s+(bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz)|yirmi\s+(bir|iki|üç)|bir|iki|üç|dört|beş|altı|yedi|sekiz|dokuz|on|yirmi)\b"; - public const string AtHourNumRegexNoSuffix = @"\b(saat\s*)?(?on\s+(bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz)|yirmi\s+(bir|iki|üç)|bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz|on|yirmi)(?='?[dt]?[ae]?\b)"; - public const string AtHourNumRegex = @"\b(saat\s*)?(?on\s+(bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz)|yirmi\s+(bir|iki|üç)|bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz|on|yirmi)('?[dt][ae]\b)"; - public const string MinuteNumRegex = @"(?((on|yirmi|otuz|kırk|elli)\s)?(bir|iki|üç|dört|dörd|dörd|beş|altı|yedi|sekiz|dokuz)|(on|yirmi|otuz|kırk|elli))(\s*dakika\b)?"; + public const string HourNumRegex = @"\b(saat\s*)?(?on\s+(bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz)|yirmi\s+(bir|iki|üç)|sıfır|bir|iki|üç|dört|beş|altı|yedi|sekiz|dokuz|on|yirmi)\b"; + public const string HourNumRegexNoSuffix = @"\b(saat\s*)?(?on\s+(bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz)|yirmi\s+(bir|iki|üç)|sıfır|bir|iki|üç|dört|beş|altı|yedi|sekiz|dokuz|on|yirmi)(?=('?[dt]?[ae](ki)?|a|e|ya|ye|i|u|ü|yi|yı|ı)?\b)"; + public const string AtHourNumRegexNoSuffix = @"\b(saat\s*)?(?on\s+(bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz)|yirmi\s+(bir|iki|üç)|sıfır|bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz|on|yirmi)(?='?[dty]?[ae](ki)?\b)"; + public const string AtHourNumRegex = @"\b(saat\s*)?(?on\s+(bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz)|yirmi\s+(bir|iki|üç)|sıfır|bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz|on|yirmi)('?[dt][ae](ki)?\b)"; + public const string MinuteNumRegex = @"(?((on|yirmi|otuz|kırk|elli)\s+)?(bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz)|(on|yirmi|otuz|kırk|elli))(\s*dakika\b)?"; public const string DeltaMinuteNumRegex = @"\b(?((on|yirmi|otuz|kırk|elli)\s)?(bir|iki|üç|dört|dörd|dörd|beş|altı|yedi|sekiz|dokuz)|(on|yirmi|otuz|kırk|elli))"; public const string AtMinuteNumRegexNoSuffix = @"\b(?((bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz)|((on|yirmi|otuz|kırk|elli)(\s*(bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz))?))(?=[dt]?[ae]?\b))"; public const string AtMinuteNumRegex = @"\b(?((bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz)|((on|yirmi|otuz|kırk|elli)(\s*(bir|iki|üç|dört|dörd|beş|altı|yedi|sekiz|dokuz))?)))([dt][ae]\b)"; @@ -183,10 +189,10 @@ public static class DateTimeDefinitions public const string MiddayRegex = @"(?öğle yemeği vakti|öğlenleyin|öğle vakti|gün ortası|öğlen civarı|öğle civarı|öğlen)"; public static readonly string MidTimeRegex = $@"(?{MidnightRegex}|{MidmorningRegex}|{MidafternoonRegex}|{MiddayRegex})"; public static readonly string AtHourRegexNoSuffix = $@"\b({HourRegex}(?=('([dt][ae]|dan|den|ten|[ae]|y[ae])\b|\s*{RangeConnectorRegex}\s*{HourRegex}))|(?<={HourRegex}\s*{RangeConnectorRegex}\s*){HourRegex})"; - public static readonly string AtRegex = $@"\b(({TimePrefix}\s*|{TimePrefix}\s+(saat)\s*)?({AtWrittenTimeRegexNoSuffix}|{AtHourNumRegexNoSuffix}|{AtHourRegexNoSuffix}|{HourRegex}:{AtMinuteRegexNoSuffix}|{MidTimeRegex})|(({TimePrefix}\s*|{TimePrefix}\s+(saat)\s*|saat\s*){HourRegex}))(?!(\d|:))"; + public static readonly string AtRegex = $@"\b(({TimePrefix}\s*|{TimePrefix}\s+(saat)\s*)?({AtWrittenTimeRegexNoSuffix}|{AtHourNumRegexNoSuffix}|{AtHourRegexNoSuffix}|{HourRegex}:{AtMinuteRegexNoSuffix}|{MidTimeRegex})|(({TimePrefix}\s*|{TimePrefix}\s+(saat)\s*|saat\s*)({HourRegex}|{HourNumRegexNoSuffix})))(?!(\d|:|\s+saat))"; public const string IshRegex = @"^[\*]"; public const string AtHourWithZeroMinRegex = @"(?0?[1-9]|[0-5][0-9]|2[0-4])(:00)?('[dt][ae])"; - public const string TimeUnitRegex = @"([^A-Za-z]{1,}|\b)(?saat|sa.|dakika|dk.|saniye|sn.)"; + public const string TimeUnitRegex = @"([^A-Za-z]{1,}|\b)(?saat|sa\.|dakika|dk\.|saniye|sn\.)"; public const string RestrictedTimeUnitRegex = @"(?saat|dakika|saniye)\b"; public const string AtDateTimeUnitRegex = @"(?yıllarda|senelerde|aylarda|haftalarda|günlerde|saatlerde|dakikalarda|saniyelerde)\b"; public const string FivesRegex = @"(?(beş|(((on|yirmi|otuz|kırk|elli)\s)?beş?)))\b"; @@ -204,8 +210,8 @@ public static class DateTimeDefinitions public const string PeriodHourNumToRegex = @"\b((?iki|yedi|on\s+iki|on\s+yedi|yirmi|yirmi\s+iki)ye|(?bir|üç|dörd|beş|sekiz|on\s+bir|on\s+üç|on\s+dörd|on\s+beş|on\s+sekiz|yirmi\s+bir|yirmi\s+üç|yirmi\s+dörd)e|(?altı|on\s+altı)ya|(?sıfır|dokuz|on|on\s+dokuz)a)\b"; public static readonly string ConnectNumRegex = $@"\b({DescRegex}\s+){HourRegex}(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)(?!\d)"; public static readonly string TimeRegexWithDotConnector = $@"({HourRegex}(\s*\.\s*){BaseDateTime.MinuteRegex})"; - public static readonly string TimeRegex1 = $@"\b(({TimePrefix}\s+)(saat\s)?({WrittenTimeRegex}|{HourNumRegex}|{AtHourNumRegex}|{HourRegex}))|((saat\s)?({WrittenTimeRegex}|{HourNumRegex}|{AtHourNumRegex}|{HourRegex})\s+{TimePrefix})"; - public static readonly string TimeRegex2 = $@"\b({TimePrefix}\s+)?(saat\s)?({HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?)"; + public static readonly string TimeRegex1 = $@"\b(({TimePrefix}\s+)(saat\s)?({WrittenTimeRegex}|{HourNumRegex}|{AtHourNumRegex}|{HourRegex}))(?!(\s+saat|\d+))"; + public static readonly string TimeRegex2 = $@"\b({TimePrefix}\s+)?(saat\s)?(t)?({HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?)"; public static readonly string TimeRegex3 = $@"\b({TimePrefix}\s+)?(saat\s)?({HourRegex}:{BaseDateTime.MinuteRegex})"; public static readonly string TimeRegex4 = $@"\b({TimePrefix}\s+)?(saat\s)?{TimeSuffix}(?=(\b|dan))"; public static readonly string TimeRegex5 = $@"\b({TimePrefix}\s+)?(saat\s)?(?{TimePrefix})\s*)?(?saat\s+)?({PeriodHourNumFromRegex}|{HourNumFromRegex})\s*((?{TimePrefix})\s*)?({PeriodHourNumToRegex}|{HourNumToRegex})(\s+{TillRegex})?)"; - public static readonly string PureNumBetweenAnd = $@"((?{TimePrefix})\s*)?({HourRegex}|{PeriodHourNumRegex}|({HourRegex}{BaseDateTime.TwoDigitMinuteRegex}))\s*{RangeConnectorRegex}\s*((?{TimePrefix})\s*)?({HourRegex}|{PeriodHourNumRegex}|({HourRegex}{BaseDateTime.TwoDigitMinuteRegex}))(\s*{RangePrefixRegex}\b)?"; - public static readonly string SpecificTimeFromTo = $@"(?((?\b{TimePrefix}\s+)?({HourRegex}(\s*)?:(\s*)?{MinuteFromRegex})|{HourNumFromRegex}|{PeriodHourNumFromRegex}))\s*(?((?\b{TimePrefix}\s+)?({HourRegex}(\s*)?:(\s*)?{MinuteToRegex})|{HourNumToRegex}|{PeriodHourNumToRegex}))\s*{TillRegex}\b"; + public static readonly string PureNumBetweenAnd = $@"((?{TimePrefix})\s*)?({HourRegex}|{PeriodHourNumRegex}|({HourRegex}(\s*)?:(\s*)?{BaseDateTime.TwoDigitMinuteRegex}))\s*{RangeConnectorRegex}\s*((?{TimePrefix})\s*)?({HourRegex}|{PeriodHourNumRegex}|({HourRegex}(\s*)?:(\s*)?{BaseDateTime.TwoDigitMinuteRegex}))(\s*{RangePrefixRegex}\b)?"; + public static readonly string SpecificTimeFromTo = $@"(((?((?\b({AmRegex}|{PmRegex})\s+)({HourRegex}(\s*)?:(\s*)?{MinuteFromRegex}|{HourNumFromRegex}|{PeriodHourNumFromRegex})))\s*(?((?\b({AmRegex}|{PmRegex})\s+)({HourRegex}(\s*)?:(\s*)?{MinuteToRegex}|{HourNumToRegex}|{PeriodHourNumToRegex})))\s*{TillRegex})|((?((?\b{TimePrefix}\s+)?({HourRegex}(\s*)?:(\s*)?{MinuteFromRegex}|{HourNumFromRegex}|{PeriodHourNumFromRegex})))\s*(?((?\b{TimePrefix}\s+)?({HourRegex}(\s*)?:(\s*)?{MinuteToRegex}|{HourNumToRegex}|{PeriodHourNumToRegex})))\s*{TillRegex}))\b"; public static readonly string SpecificTimeBetweenAnd = $@"(?({TimeRegex2}|{HourRegex}|{PeriodHourNumRegex}))\s*{RangeConnectorRegex}\s*(?({TimeRegex2}|{HourRegex}|{PeriodHourNumRegex}))(\s*{RangePrefixRegex}\b)?"; - public const string SuffixAfterRegex = @"\b((veya|ve)\s+(sonrasında|sonra))\b"; - public const string PrepositionRegex = @"(?^(günü|'da|'ta)$)"; + public const string SuffixAfterRegex = @"\b((veya|ve|ya da)(\s+daha)?\s+(sonrasında|sonrası|sonra))\b"; + public const string PrepositionRegex = @"(?^(günü|'d[ae]|'t[ae]|,)$)"; public const string LaterEarlyRegex = @"((?(erkenden|erken(\s+saatte)?))|(?geç(\s+saatte)?))"; - public static readonly string TimeOfDayRegex = $@"\b(?((sabah|öğleden\s+sonra|öğlen|öğle|akşam|gece|gün|mesai)(ın?|in|nin|ün|leyin|ları|leri|de|si)?\b(\s+{LaterEarlyRegex})?(\s+(saat(inde|te|lerinde|leri)|vaktinde|vakti|vakitlerinde))?|iş\s+(saat(inde|te|lerinde|leri)|vaktinde|vakti|vakitlerinde))(\s+(içinde|içi|dışında|dışı))?)"; + public static readonly string TimeOfDayRegex = $@"\b(?((sabah|öğleden\s+sonra|öğle(?!\s+yemeği)|akşam|gece|gün|mesai)(ın?|in|nin|ün|leyin|ları|leri|lik|de|si)?\b(\s+{LaterEarlyRegex})?(\s+(saat(inde|te|lerinde|leri)|vaktinde|vakti|vakitlerinde))?|iş\s+(saat(inde|te|lerinde|leri)|vaktinde|vakti|vakitlerinde))(\s+(içinde|içi|dışında|dışı)\b)?)"; public static readonly string SpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{TimeOfDayRegex})\b|\bbu\s(sabah|akşam|gece))\b"; public static readonly string TimeFollowedUnit = $@"^\s*{TimeUnitRegex}"; public static readonly string TimeNumberCombinedWithUnit = $@"\b(?\d+(\.\d*)?){TimeUnitRegex}"; public static readonly string[] BusinessHourSplitStrings = { @"iş", @"saat" }; - public const string NowRegex = @"\b(?(hemen\s+)?şimdi|(mümkün\s+olan\s+)?en\s+kısa\s+(sürede|zamanda)|ilk\s+fırsatta|bir\s+an\s+(önce|evvel)|hemen|vakit\s+geçirmeden|(mümkün\s+olduğunca|olabildiğince)\s+çabuk|son\s+(dönemlerde|zamanlarda|günlerde)|geçenlerde|yakınlarda|(bu|şu)\s+sıralar|yakın\s+zamanda|(bu|şu)\s+günlerde|önceden|evvelce|bundan\+önce|daha\s+önce|şu\s+anda)\b"; + public const string NowRegex = @"\b(?(hemen\s+)?şimdi|(mümkün\s+olan\s+)?en\s+kısa\s+(sürede|zamanda)|ilk\s+fırsatta|bir\s+an\s+(önce|evvel)|hemen|vakit\s+geçirmeden|(mümkün\s+olduğunca|olabildiğince)\s+çabuk|son\s+(dönemlerde|zamanlarda|günlerde)|geçenlerde|yakınlarda|(bu|şu)\s+sıralar|yakın\s+zamanda|(bu|şu)\s+günlerde|önceden|evvelce|bundan\s+önce|şu\s+an(da(ki)?)?)\b"; public const string SuffixRegex = @"\b(sabahı?|sabahleyin|sabahtan|öğleden\s+sonra|akşamı?|akşamleyin|gece(si)?|geceleyin)\b"; - public const string ExcludeSuffixRegex = @"^'\p{L}*(?.*)$"; - public const string DateTimeTimeOfDayRegex = @"\b(?sabah|öğle|öğlen|öğleden\s+sonra|akşam|gece)\b"; + public const string ExcludeSuffixRegex = @"^'\p{L}*\s+(?.+)$"; + public const string ExcludeSuffixDateTime = @"^'\p{L}*(?.*)$"; + public const string DateTimeTimeOfDayRegex = @"\b(?sabah|öğleden\s+sonra|akşam|gece)\b"; public static readonly string DateTimeSpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{DateTimeTimeOfDayRegex})\b|\b(bu\s+akşam|bu\s+gece))\b"; public static readonly string TimeOfTodayAfterRegex = $@"^\s*(,\s*)?{DateTimeSpecificTimeOfDayRegex}"; public static readonly string TimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?\s*$"; public static readonly string SimpleTimeOfTodayAfterRegex = $@"({HourNumRegex}|{BaseDateTime.HourRegex})\s*(,\s*)?{DateTimeSpecificTimeOfDayRegex}"; public static readonly string SimpleTimeOfTodayBeforeRegex = $@"\b{DateTimeSpecificTimeOfDayRegex}(\s*,)?\s*({HourNumRegex}|{BaseDateTime.HourRegex})(\s+(civarı(nda)?|dolaylarında|sularında))?\b"; public const string SpecificEndOfRegex = @"(('?ın|gün)\s+)?sonu(nda)?\b"; - public const string UnspecificEndOfRegex = @"\b(günün\s+sonu(nda))\b"; + public const string UnspecificEndOfRegex = @"\b(günün\s+sonu(nd?a))\b"; public const string UnspecificEndOfRangeRegex = @"^[\*]"; - public static readonly string PeriodTimeOfDayRegex = $@"\b(((?sabahın|öğlenin|akşamın|gecenin)\s+(erken|geç)\s+saatlerinde)|((?sabahı?|öğlen|öğleden\s+sonra|akşamı?|gece(si)?)(\s+{LaterEarlyRegex})?)|(bu\s+)?{DateTimeTimeOfDayRegex})\b"; + public static readonly string PeriodTimeOfDayRegex = $@"\b(((?sabahın|öğlenin|akşamın|gecenin)\s+(erken|geç)\s+saatlerinde)\b|((?sabahı?(?=a?\b)|öğleden\s+sonra(?=(dan)?\b)|akşamı?\b|gece(si)?\b)(\s+{LaterEarlyRegex}\b)?)|(bu\s+)?{DateTimeTimeOfDayRegex}\b)"; public static readonly string PeriodSpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{PeriodTimeOfDayRegex})\b|\b(bu akşam|bu gece))\b"; - public static readonly string PeriodTimeOfDayWithDateRegex = $@"\b((\s+){PeriodTimeOfDayRegex})\b"; - public const string LessThanRegex = @"((den|dan|ten|tan|daha)\s+az|\baz)(\s+bir\s+süre\b)?"; - public const string MoreThanRegex = @"((den|dan|ten|tan|\bdaha)\s+fazla|\bfazla|daha\s+fazla\s+zaman)"; + public static readonly string PeriodTimeOfDayWithDateRegex = $@"(^|\b)((\s+){PeriodTimeOfDayRegex})"; + public const string LessThanRegex = @"((den|dan|ten|tan|daha)\s+az|\baz)(\s+bir\s+süre(de)?\b)?"; + public const string MoreThanRegex = @"((den|dan|ten|tan|\bdaha)\s+fazla|\bfazla|daha\s+fazla\s+zaman)(\s+bir\s+süre(de)?\b)?"; public static readonly string DurationUnitRegex = $@"(?{DateUnitRegex}|saat|saniye|dakika|sa\.?\b|dk\.?\b|sn\.?\b|s\.?\b)"; - public const string SuffixAndRegex = @"(?\s*(?buçuk|çeyrek))"; - public const string PeriodicRegex = @"\b(?(?günlük)|(?haftalık|her\s+hafta)|(?(iki\s+)?haftada\s+bir)|(?aylık|ayda\s+bir)|(?yıllık|senelik|yılda\s+bir|her\s+sene|senede\s+bir))\b"; + public const string SuffixAndRegex = @"(?\s*(ve\s+)?(?buçuk|çeyrek))"; + public const string PeriodicRegex = @"\b(?(?günlük)|(?haftalık)|(?(iki\s+)?haftada\s+bir)|(?aylık|ayda\s+bir)|(?yıllık|senelik|yılda\s+bir|her\s+sene|senede\s+bir))\b"; public static readonly string EachUnitRegex = $@"(?(her)\s*(bir)?\s*{DurationUnitRegex})"; public const string EachPrefixRegex = @"\b(?(her)\s*$)"; public const string SetEachRegex = @"\b(?(her)\s*)"; - public const string SetLastRegex = @"(?izleyen|bir sonraki|sonraki|ertesi|gelecek|bu|geçen|son|önceki|evvelsi|şimdiki)"; + public const string SetLastRegex = @"(?izleyen|(bir\s+)?sonraki|ertesi|gelecek|bu|geçen|geçmiş|geçtiğimiz|son|önceki|evvelsi|şimdiki)"; public const string EachDayRegex = @"^\s*her\s*gün\b"; - public static readonly string DurationFollowedUnit = $@"(^\s*{DurationUnitRegex}\s+{SuffixAndRegex})|(^\s*{SuffixAndRegex}?(\s+|-)?{DurationUnitRegex})"; + public static readonly string DurationFollowedUnit = $@"(^\s*{DurationUnitRegex}\s+{SuffixAndRegex}(\s+{DurationUnitRegex})?)|(^\s*{SuffixAndRegex}?(\s+|-)?{DurationUnitRegex})"; public static readonly string NumberCombinedWithDurationUnit = $@"\b(?\d+(\.\d*)?)(-)?{DurationUnitRegex}"; public static readonly string AnUnitRegex = $@"\b(bir((?\s+buçuk)|\s+diğeri?)?|diğer)\s+{DurationUnitRegex}"; public const string DuringRegex = @"\b(?gün|hafta|ay|yıl)\s+boyunca\b"; - public const string AllRegex = @"\b(?(tüm|bütün)\s+(?gün|hafta|ay|yıl)|gün boyu)\b"; + public const string AllRegex = @"\b(?(tüm|bütün)\s+(?gün(lüğüne)?|hafta(lığına)?|ay(lığına)?|yıl(lığına)?)|gün boyu)\b"; public const string HalfRegex = @"(?yarım\s+(?saat|gün|iş\s+günü|hafta|ay|yıl))"; public const string ConjunctionRegex = @"\b(ve|ile)\b"; - public static readonly string HolidayRegex1 = $@"\b(({YearRegex}(\s+yılının)?|{RelativeRegex}\s+yıl)\s+)?(?mardi gras|((washington'ın|mao'nun) doğum günü)|çin yeni yılı|yılbaşı gecesi|yılbaşı|yuan dan|noel arifesi|noel günü|noel|kara cuma|yuandan|paskalya|temiz pazartesi|kül çarşambası|palm sunday|(kutsal (perşembe|cuma))|(beyaz (pazartesi|pazar))|trinity sunday|hamsin yortusu|corpus christi|siber pazartesi|(ramazan|şeker|kurban|(29 ekim )?cumhuriyet|(30 ağustos )?zafer|(23 nisan )?ulusal egemenlik ve çocuk|(19 mayıs )?atatürk'ü anma(\s*,\s*)? gençlik ve spor)\sbayramı|(15 temmuz )?demokrasi ve mill(î|i) birlik günü)\b"; - public static readonly string HolidayRegex2 = $@"\b(({YearRegex}(\s+yılının)?|{RelativeRegex}\s+yıl)\s+)?(?(şükran|tüm azizler|aziz patrick|amerikan bağımsızlık|azizler|bütün ruhlar|guy fawkes|cinco de mayo|1 nisan şaka|tomb\s*sweeping)\sgünü|white lover day|aya yorgi yortusu|cadılar bayramı|((qingming|dragon)\sfestivali))\b"; - public static readonly string HolidayRegex3 = $@"(({YearRegex}(\s+yılının)?|{RelativeRegex}\s+yıl)\s+)?(?(bağımsızlık|başkanlar|mlk|martin luther king( jr)?|canberra|(isa'nın (göğe yükselişi|miracı))|columbus|ağaç( dikme)?|ağaçlandırma|(1 mayıs )?emek ve dayanışma|anneler|babalar|(dünya)?\s(kadınlar|bekarlar)|öğretmenler|gençlik|kızlar|sevgililer|dünya|dağ sıçanı|bastille|gaziler|anma|ay|bahar|fener)\s+günü|((işçi|quebec ulusal) bayramı)|abd başkanı yemin töreni|ay festivali)"; + public static readonly string HolidayRegex1 = $@"\b(({YearRegex}(\s+yılının)?|{RelativeRegex}\s+yıl(ın)?)\s+)?(?mardi gras|((washington'ın|mao'nun) doğum günü)|çin yeni yılı|yılbaşı gecesi|yılbaşı|yuan dan|noel arifesi|noel günü|noel|kara cuma|yuandan|paskalya|temiz pazartesi|kül çarşambası|palm sunday|(kutsal (perşembe|cuma))|(beyaz (pazartesi|pazar))|trinity sunday|hamsin yortusu|corpus christi|siber pazartesi|(ramazan|şeker|kurban|(29 ekim )?cumhuriyet|(30 ağustos )?zafer|(23 nisan )?ulusal egemenlik ve çocuk|(19 mayıs )?atatürk'ü anma(\s*,\s*)? gençlik ve spor)\sbayramı|(15 temmuz )?demokrasi ve mill(î|i) birlik günü)\b"; + public static readonly string HolidayRegex2 = $@"\b(({YearRegex}(\s+yılının)?|{RelativeRegex}\s+yıl(ın)?)\s+)?(?(şükran|tüm azizler|aziz patrick|amerikan bağımsızlık|azizler|bütün ruhlar|guy fawkes|cinco de mayo|1 nisan şaka|tomb\s*sweeping)\sgünü|white lover day|aya yorgi yortusu|cadılar bayramı|((qingming|dragon)\sfestivali))\b"; + public static readonly string HolidayRegex3 = $@"(({YearRegex}(\s+yılının)?|{RelativeRegex}\s+yıl(ın)?)\s+)?(?(bağımsızlık|başkanlar|mlk|martin luther king( jr)?|canberra|(isa'nın (göğe yükselişi|miracı))|columbus|ağaç( dikme)?|ağaçlandırma|(1 mayıs )?emek ve dayanışma|anneler|babalar|(dünya)?\s(kadınlar|bekarlar)|öğretmenler|gençlik|kızlar|sevgililer|dünya|dağ sıçanı|bastille|gaziler|anma|ay|bahar|fener)\s+günü|((işçi|quebec ulusal) bayramı)|abd başkanı yemin töreni|ay festivali)"; public const string AMTimeRegex = @"(?sabah)"; - public const string NowTimeRegex = @"(şimdi)"; + public const string NowTimeRegex = @"(şimdi|şu\s+an(da(ki)?)?)"; public const string RecentlyTimeRegex = @"(son\s+(zamanlarda|günlerde|dönemlerde)|yakın\s+zamanda|önceden)"; public const string AsapTimeRegex = @"((mümkün\s+olan\s+)?en\s+kısa\s+sürede)"; public const string PMTimeRegex = @"\b(?öğleden sonra|öğle vakti|öğlenleyin|öğlen|öğle|akşam|gece)\b"; - public const string InclusiveModPrepositions = @"^[\*]"; - public const string BeforeRegex = @"(((\b|'(dan|den|tan|ten|y?[ea])\s+)(önce|öncesinde|daha\s+önce|en\s+(geç|erken)|kadar))|(?)((?<=)|<))"; - public static readonly string AfterRegex = $@"(((\b|'(dan|den|tan|ten|y?[ea])\s+)((sonra|sonrasında|daha\s+sonra))(?!\s+veya\s+aynı))|(?>=)|>))"; - public const string SinceRegex = @"((\b(((den|dan)\s+)?beri|sonra veya aynı|((den|dan|ile)\s+)?(başlayarak|başlayan)|erkenden|herhangi bir zamanda)\b\s*)|(?=))"; - public const string AroundRegex = @"(?:\b((?:takriben|yaklaşık)\s*|\s*(?:civarı(nda)?|dolaylarında|sularında))\b)"; - public const string AgoRegex = @"\b((?bugünden|gün|dünden|dün)\s+)?(önce|evvel)\b"; - public const string LaterRegex = @"\b(sonra|(?yarından|yarın|bugünden|gün)\s+(itibaren|sonra)|şu andan itibaren)"; + public const string LastRegex = @"(son)"; + public const string InclusiveModPrepositions = @"(?(('(ü|u|te|ta|da|de|inde|sinde|ünde|ında|unda|sında)\s+)?(ya\s+da|veya)\b))"; + public static readonly string BeforeRegex = $@"((({InclusiveModPrepositions}|(?<=günü)nden|'?(den?|tan?|ten?|y?[ea])|('|(?kadar\s+geç)|kadar(?!\s+erken)|ile\s+biten|(?)((?<=)|<))"; + public static readonly string BeforeRegexWithAnchor = $@"((^({InclusiveModPrepositions}|nden|'?(den?|tan?|ten?|y?[ea])|('|(?kadar\s+geç)|^kadar(?!\s+erken)|ile\s+biten|(?)((?<=)|<))"; + public static readonly string AfterRegex = $@"((({InclusiveModPrepositions}|(?<=günü)nden|'(dan?|den?|tan?|ten?|y?[ea]))\s+(sonrasında|daha\s+(sonra|geç|büyük\s+(bir\s+)?yıl(ın)?)|sonra)(?!\s+veya\s+aynı)|(sonrasında|daha\s+sonra|sonra)\s+{InclusiveModPrepositions}|sonra\b)|(?>=)|>))"; + public static readonly string AfterRegexWithAnchor = $@"((^({InclusiveModPrepositions}|nden|'(dan?|den?|tan?|ten?|y?[ea]))\s+(sonrasında|daha\s+(sonra|geç|büyük\s+(bir\s+)?yıl(ın)?)|sonra)(?!\s+veya\s+aynı)|(sonrasında|daha\s+sonra|sonra)\s+{InclusiveModPrepositions}|^sonra\b)|(?>=)|>))"; + public const string SinceRegex = @"((('|\b)(([dt][ae]n\s+)?beri|sonra\s+veya\s+aynı|((den|dan|ile)\s+)?(başlayarak|başlayan|(itibaren\s+)?herhangi\s+bir\s+zamanda)|(y[ae]\s+)?eşit\s+veya\s+sonraki|bu\s+yana|erkenden|kadar\s+erken|herhangi\s+bir\s+zamanda|((nın\s+)?başından\s+)itibaren)\b)|(?=))"; + public const string SinceRegexWithAnchor = @"(((^'|\b)(([dt][ae]n\s+)?beri|sonra\s+veya\s+aynı|((den|dan|ile)\s+)?(başlayarak|başlayan|(itibaren\s+)?herhangi\s+bir\s+zamanda)|(y[ae]\s+)?eşit\s+veya\s+sonraki|bu\s+yana|erkenden|kadar\s+erken|herhangi\s+bir\s+zamanda|((nın\s+)?başından\s+)itibaren)\b)|(?=))"; + public const string AroundRegex = @"(?:\b((?:takriben|yaklaşık)\s*|\s*(?:civarı(nd?a)?|dolaylarında|sularında))\b)"; + public const string AgoRegex = @"\b((?bugünden|gün|dünden|dün)\s+)?(önce(ki)?|evvel)\b"; + public const string LaterRegex = @"\b((?yarından|yarın|bugünden|gün)\s+(itibaren|sonra(ki)?)|sonra|şu\s+andan\s+itibaren)(\s+gelecek)?"; + public const string BeforeAfterRegex = @"^[.]"; + public static readonly string ModPrefixRegex = $@"\b({RelativeRegex}|{AroundRegex}|{BeforeRegex}|{AfterRegex}|{SinceRegex})\b"; + public static readonly string ModSuffixRegex = $@"\b({AgoRegex}|{LaterRegex}|{BeforeAfterRegex}|{FutureSuffixRegex}|{PastSuffixRegex})\b"; public const string InConnectorRegex = @"\b(içinde)\b"; public const string SinceNumSuffixRegex = @"\b^(?!0)(\d{0,3}((1|2|7|8)'den|(3|4|5)'ten|(6|9)'dan)|\d{0,2}(10'dan|20'den|30'dan|40'tan|50'den|60'tan|70'ten|80'den|90'dan|00'den)|\d000'den)\b"; public static readonly string SinceYearSuffixRegex = $@"({YearSuffix}\s+(yılından beri)|{SinceNumSuffixRegex}\s+beri)"; public static readonly string WithinNextPrefixRegex = $@"\b((?{NextPrefixRegex}\s+)?(\d+\s+(saniye|dakika|saat|gün|hafta|ay|yıl)\s+)?içinde)\b"; + public const string TodayNowRegex = @"\b(bugün|şimdi)\b"; public const string MorningStartEndRegex = @"(^sabahı?$)"; public const string AfternoonStartEndRegex = @"(^öğle(den\s+sonra)?$)"; public const string EveningStartEndRegex = @"(^akşamı?$)"; public const string NightStartEndRegex = @"(^(geceleyin|bu gece|gece))|((geceleyin|bu gece|gece)$)"; - public const string InexactNumberRegex = @"\b(birkaç)\b"; + public const string InexactNumberRegex = @"\b(birkaç|(?iki))\b"; public static readonly string InexactNumberUnitRegex = $@"({InexactNumberRegex})\s+({DurationUnitRegex})"; public static readonly string RelativeTimeUnitRegex = $@"(({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+({TimeUnitRegex}))"; - public static readonly string RelativeDurationUnitRegex = $@"((?<=({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+)({DurationUnitRegex}))"; - public static readonly string ReferenceDatePeriodRegex = $@"\b{ReferencePrefixRegex}\s+(?hafta\s+sonu|hafta\s+sonları|(hafta|ay|yıl)(lar)?|sene(ler)?)\b"; + public static readonly string RelativeDurationUnitRegex = $@"((?<=({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+)({DurationUnitRegex}))(?!\s+\d+\s+{DurationUnitRegex})"; + public static readonly string ReferenceDatePeriodRegex = $@"\b{ReferencePrefixRegex}\s+(?hafta\s*sonu|hafta\s*sonları|(hafta|ay|yıl)(lar)?|sene(ler)?)\b"; public const string ConnectorRegex = @"^(-|,|civarı|@)$"; - public const string FromRegex = @"(\bitibaren|'den|'dan|'ten|'tan)$"; + public const string FromRegex = @"(\bitibaren|'d[ae]n|'t[ae]n)$"; public const string PluralTokenRegex = @"(ları|leri)$"; - public const string FromToRegex = @"\b.+('?(den|dan|ten|tan)).+('?(e|a))\b.+"; + public const string FromToRegex = @"\b.+('?(den|dan|ten|tan)).+('?y?(e|a))\b"; public static readonly string RelativeAtDateTimeUnitRegex = $@"(({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+({AtDateTimeUnitRegex}))"; public const string SingleAmbiguousMonthRegex = @"^(ocak|mayıs|ekim|aralık|tem|ara)$"; public const string SingleAmbiguousTermsRegex = @"^(gün|hafta|ay|yıl)$"; - public const string UnspecificDatePeriodRegex = @"^(hafta(\s+sonu)?|ay|yıl)$"; + public const string UnspecificDatePeriodRegex = @"^(hafta(\s*sonu)|ay)$"; public const string PrepositionSuffixRegex = @"^[\*]"; public const string WrittenDayRegex = @"(?bir|iki|üç|dört|beş|altı|yedi|sekiz|dokuz|on|on\s+bir|on\s+iki|on\s+üç|on\s+dört|on\s+beş|on\s+altı|on\s+yedi|on\s+sekiz|on\s+dokuz|yirmi|yirmi\s+bir|yirmi\s+iki|yirmi\s+üç|yirmi\s+dört|yirmi\s+beş|yirmi\s+altı|yirmi\s+yedi|yirmi\s+sekiz|yirmi\s+dokuz|otuz|otuz\s+bir)"; public static readonly string FlexibleDayRegex = $@"(?({WrittenDayRegex}|{DayRegex}))"; public static readonly string ForTheRegex = $@"\b(?{FlexibleDayRegex}((?='(ü|te|inde|sinde|ünde|ında|unda|sında))|(i|ü|si)(?=nde)|(?='(ı|u))|(ı|u|sı)(?=nda)))(?!'(inci|üncü|ıncı|nci|uncu)\s+hafta)"; - public static readonly string ForTheRegex1 = $@"\b(?({FlexibleDayRegex})((?=('))|(i|ü|si)(?=nde)|(ı|u|sı)(?=nda|na)|(?=(i|ü|ı|u|si|sı)\b)))"; - public static readonly string WeekDayAndDayOfMonthRegex = $@"\b({SingleWeekDayRegex}\s+{ForTheRegex1}|(?({FlexibleDayRegex}))\s+{SingleWeekDayRegex})"; - public static readonly string WeekDayAndDayRegex = $@"\b{SingleWeekDayRegex}\s+({DayRegex}|{FlexibleDayRegex})(?=(nde|nda|nden|ne)\b)"; + public static readonly string ForTheRegex1 = $@"\b(?({FlexibleDayRegex})((?=('))|(i|ü|si)(?=nde)|(ı|u|sı)(?=nda|na)|(?=n[ae])|(?=(i|ü|ı|u|si|sı)\b)))"; + public static readonly string WeekDayAndDayOfMonthRegex = $@"\b({SingleWeekDayRegex}(\s+günü)?\s+{ForTheRegex1}|(?({FlexibleDayRegex}))\s+{SingleWeekDayRegex})"; + public static readonly string WeekDayAndDayRegex = $@"\b{SingleWeekDayRegex}(\s+günü)?\s+({DayRegex}|{FlexibleDayRegex})(?=(nde|nda|nden|ne)\b)"; public const string RestOfDateRegex = @"\b((bu\s+)?(?haftanın|ayın|yılın|haftamın|ayımın|yılımın)\s+(geri\s+kalanı(?=nda)?))"; public const string RestOfDateTimeRegex = @"\b((?günün|günümün|bugünün)\s+(geri\s+kalanı(?=nda)?))"; public const string MealTimeRegex = @"\b(at\s+)?(?öğle yemeği zamanı|öğle yemeği vakti)\b"; @@ -316,7 +331,7 @@ public static class DateTimeDefinitions public const string MiddlePauseRegex = @"\s*(,)\s*"; public const string DurationConnectorRegex = @"^\s*(?\s+|ile|ila|ve|,)\s*$"; public const string PrefixArticleRegex = @"^[\*]"; - public const string OrRegex = @"\s*((\b|,\s*)(ve|veya)\b|,)\s*"; + public const string OrRegex = @"\s*((\b|,\s*)(ve|veya|ya da)\b|,)\s*"; public static readonly string SpecialYearTermsRegex = $@"\b(yılı)"; public static readonly string YearPlusNumberRegex = $@"\b(((?(\d{{3,4}}))|{FullTextYearRegex})\s+{SpecialYearTermsRegex})"; public static readonly string NumberAsTimeRegex = $@"\b({WrittenTimeRegex}|{PeriodHourNumRegex}|{HourRegex})\b"; @@ -325,12 +340,12 @@ public static class DateTimeDefinitions public const string DecadeRegex = @"(?yirmiler|otuzlar|kırklar|elliler|altmışlar|yetmişler|seksenler|doksanlar|iki binler|(yirmili|otuzlu|kırklı|ellili|altmışlı|yetmişli|seksenli|doksanlı|iki binli)\s+yıllar)"; public static readonly string DecadeWithCenturyRegex = $@"(((?\d|1\d|2\d)?(?\d0))((')?ler|(')?lar)|(({CenturyRegex}\s+)?{DecadeRegex})|({CenturyRegex}\s+(?onlar|yüzler)))"; public static readonly string RelativeDecadeRegex = $@"\b({RelativeRegex}\s+((?[\w,]+)\s+)?(yüzyıl?|on\s+yıl))"; - public static readonly string YearPeriodRegex = $@"(({YearRegex}\s+(yılından)\s+{YearRegex}\s+(yılına kadar))|({YearRegex}\s*({RangeConnectorRegex})\s*{YearRegex}(\s+((yılları\s+)?arası(?=nda)?|yıllarında|yılları sırasında))?))"; - public static readonly string StrictTillRegex = $@"(?\b((e|a)(\s+kadar)?)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s*,\s*))))"; + public static readonly string YearPeriodRegex = $@"(({YearRegex}\s+(yılından)\s+{YearRegex}\s+(yılına\s+kadar))|({YearRegex}\s*({RangeConnectorRegex})\s*{YearRegex}(\s+((yılları\s+)?arası(?=nda)?|yıllarında|yılları sırasında)))|(({YearRegex}\s*{TillConnectorRegex}\s*{YearRegex})(\s+{TillRegex})?))"; + public static readonly string StrictTillRegex = $@"(?((e|a)(\s+kadar)?)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s*,\s*))))"; public static readonly string StrictRangeConnectorRegex = $@"(?\b(ile|ila)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s*,\s*))))"; public const string FromNumSuffixRegex = @"((1|2|7|8)'den|(3|4|5)'ten|(6|9)'dan|10'dan|20'den|30'dan|40'tan|50'den|60'tan|70'ten|80'den|90'dan|00'den|000'den)\b"; public const string ToNumSuffixRegex = @"((1|2|7|20|50)'ye|(3|4|5|8|70|80|00|000)'e|6'ya|(9|10|30|40|60|90)'a)\b"; - public static readonly string ComplexDatePeriodRegex = $@"((?.*[^'])\s*({RangeConnectorRegex})\s*(?.+)\s*{RangePrefixRegex}|(?.+({FromNumSuffixRegex}|{DayFromSuffixRegex}))\s*(?.+({ToNumSuffixRegex}|{DayToSuffixRegex}))\s*({TillRegex}))"; + public static readonly string ComplexDatePeriodRegex = $@"((?.*[^'])\s*({RangeConnectorRegex})\s*(?.*[^'\s]{{2}})\s*{RangePrefixRegex}|(?.+({FromNumSuffixRegex}|{DayFromSuffixRegex}))\s*(?.+({ToNumSuffixRegex}|{DayToSuffixRegex}))\s*({TillRegex})|((?.*[^'])\s*{TillConnectorRegex}\s*(?.*[^'\s]{{2}})(\s*{TillRegex})?))"; public static readonly string FailFastRegex = $@"{BaseDateTime.DeltaMinuteRegex}|\b({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex})|{BaseDateTime.BaseAmPmDescRegex}|\b(zero|{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}|{WrittenMonthRegex}|{SeasonDescRegex}|{DecadeRegex}|centur(y|ies)|weekends?|quarters?|hal(f|ves)|yesterday|to(morrow|day|night)|tmr|noonish|\d(-|——)?ish|((the\s+\w*)|\d)(th|rd|nd|st)|(mid\s*(-\s*)?)?(night|morning|afternoon|day)s?|evenings?||noon|lunch(time)?|dinner(time)?|(day|night)time|overnight|dawn|dusk|sunset|hours?|hrs?|h|minutes?|mins?|seconds?|secs?|eo[dmy]|mardi[ -]?gras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|cinco de mayo|all (hallow|souls)|guy fawkes|(st )?patrick|hundreds?|noughties|aughts|thousands?)\b|{WeekDayRegex}|{SetWeekDayRegex}|{NowRegex}|{PeriodicRegex}|\b({DateUnitRegex}|{ImplicitDayRegex})"; public static readonly Dictionary UnitMap = new Dictionary { @@ -340,20 +355,37 @@ public static class DateTimeDefinitions { @"yılı", @"Y" }, { @"yıllar", @"Y" }, { @"yılın", @"Y" }, + { @"yıllığına", @"Y" }, + { @"yılki", @"Y" }, + { @"yıla ait", @"Y" }, + { @"sene", @"Y" }, + { @"seneki", @"Y" }, + { @"seneye ait", @"Y" }, + { @"senesi", @"Y" }, + { @"senenin", @"Y" }, { @"ay", @"MON" }, { @"aylar", @"MON" }, { @"ayın", @"MON" }, + { @"aylığına", @"MON" }, + { @"ayki", @"MON" }, + { @"aya ait", @"MON" }, { @"hafta", @"W" }, + { @"haftaki", @"W" }, { @"haftalar", @"W" }, { @"haftam", @"W" }, { @"haftanın", @"W" }, { @"haftamın", @"W" }, { @"haftası", @"W" }, + { @"haftalığına", @"W" }, + { @"haftaya ait", @"W" }, { @"gün", @"D" }, { @"günler", @"D" }, { @"günün", @"D" }, { @"bugünün", @"D" }, + { @"günkü", @"D" }, + { @"güne ait", @"D" }, { @"günümün", @"D" }, + { @"günlüğüne", @"D" }, { @"saat", @"H" }, { @"saatler", @"H" }, { @"dakika", @"M" }, @@ -372,18 +404,33 @@ public static class DateTimeDefinitions { @"yılı", 31536000 }, { @"yıllar", 31536000 }, { @"yılın", 31536000 }, + { @"yıllığına", 31536000 }, + { @"yıla ait", 31536000 }, + { @"sene", 31536000 }, + { @"seneki", 31536000 }, + { @"seneye ait", 31536000 }, + { @"senesi", 31536000 }, + { @"senenin", 31536000 }, { @"ay", 2592000 }, { @"aylar", 2592000 }, { @"ayın", 2592000 }, + { @"aylığına", 2592000 }, + { @"ayki", 2592000 }, + { @"aya ait", 2592000 }, { @"hafta", 604800 }, { @"haftalar", 604800 }, { @"haftam", 604800 }, { @"haftanın", 604800 }, { @"haftamın", 604800 }, { @"haftası", 604800 }, + { @"haftalığına", 604800 }, + { @"haftaki", 604800 }, + { @"haftaya ait", 604800 }, { @"gün", 86400 }, { @"günler", 86400 }, { @"günü", 86400 }, + { @"günlüğüne", 86400 }, + { @"güne ait", 86400 }, { @"saat", 3600 }, { @"saatler", 3600 }, { @"sa.", 3600 }, @@ -427,19 +474,19 @@ public static class DateTimeDefinitions { { @"ilk", 1 }, { @"birinci", 1 }, - { @"1''inci", 1 }, + { @"1'inci", 1 }, { @"1.", 1 }, { @"ikinci", 2 }, - { @"2''inci", 2 }, + { @"2'inci", 2 }, { @"2.", 2 }, { @"üçüncü", 3 }, - { @"3''üncü", 3 }, + { @"3'üncü", 3 }, { @"3.", 3 }, { @"dördüncü", 4 }, - { @"4''üncü", 4 }, + { @"4'üncü", 4 }, { @"4.", 4 }, { @"beşinci", 5 }, - { @"5''inci", 5 }, + { @"5'inci", 5 }, { @"5.", 5 }, { @"son", 5 } }; @@ -490,28 +537,90 @@ public static class DateTimeDefinitions { { @"ocak", 1 }, { @"ocak'ın", 1 }, + { @"ocak'tan", 1 }, + { @"ocaktan", 1 }, + { @"ocak'dan", 1 }, + { @"ocakdan", 1 }, + { @"ocak'a", 1 }, + { @"ocaka", 1 }, + { @"ocağa", 1 }, { @"şubat", 2 }, { @"şubat'ın", 2 }, + { @"şubat'tan", 2 }, + { @"şubattan", 2 }, + { @"şubat'dan", 2 }, + { @"şubatdan", 2 }, + { @"şubat'a", 2 }, + { @"şubata", 2 }, { @"mart", 3 }, { @"mart'ın", 3 }, + { @"mart'tan", 3 }, + { @"marttan", 3 }, + { @"mart'dan", 3 }, + { @"martdan", 3 }, + { @"mart'a", 3 }, + { @"marta", 3 }, { @"nisan", 4 }, { @"nisan'ın", 4 }, + { @"nisan'dan", 4 }, + { @"nisandan", 4 }, + { @"nisan'a", 4 }, + { @"nisana", 4 }, { @"mayıs", 5 }, { @"mayıs'ın", 5 }, + { @"mayıs'tan", 5 }, + { @"mayıstan", 5 }, + { @"mayıs'dan", 5 }, + { @"mayısdan", 5 }, + { @"mayıs'a", 5 }, + { @"mayısa", 5 }, { @"haziran", 6 }, { @"haziran'ın", 6 }, + { @"haziran'dan", 6 }, + { @"hazirandan", 6 }, + { @"haziran'a", 6 }, + { @"hazirana", 6 }, { @"temmuz", 7 }, { @"temmuz'un", 7 }, + { @"temmuz'dan", 7 }, + { @"temmuzdan", 7 }, + { @"temmuz'a", 7 }, + { @"temmuza", 7 }, { @"ağustos", 8 }, { @"ağustos'un", 8 }, + { @"ağustos'tan", 8 }, + { @"ağustostan", 8 }, + { @"ağustos'dan", 8 }, + { @"ağustosdan", 8 }, + { @"ağustos'a", 8 }, + { @"ağustosa", 8 }, { @"eylül", 9 }, { @"eylül'ün", 9 }, + { @"eylül'den", 9 }, + { @"eylülden", 9 }, + { @"eylül'e", 9 }, + { @"eylüle", 9 }, { @"ekim", 10 }, { @"ekim'in", 10 }, + { @"ekim'den", 10 }, + { @"ekimden", 10 }, + { @"ekim'e", 10 }, + { @"ekime", 10 }, { @"kasım", 11 }, { @"kasım'ın", 11 }, + { @"kasım'dan", 11 }, + { @"kasımdan", 11 }, + { @"kasım'a", 11 }, + { @"kasıma", 11 }, { @"aralık", 12 }, { @"aralık'ın", 12 }, + { @"aralık'tan", 12 }, + { @"aralıktan", 12 }, + { @"aralık'dan", 12 }, + { @"aralıkdan", 12 }, + { @"aralık'a", 12 }, + { @"aralıka", 12 }, + { @"aralığa", 12 }, { @"oca", 1 }, { @"şub", 2 }, { @"mar", 3 }, @@ -987,9 +1096,9 @@ public static class DateTimeDefinitions public static readonly string[] DurationDateRestrictions = { @"bugün", @"şimdi", @"şu an" }; public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { - { @"\bmorning|afternoon|evening|night|day\b", @"\b(good\s+(morning|afternoon|evening|night|day))|(nighty\s+night)\b" }, - { @"\bnow\b", @"\b(^now,)|\b((is|are)\s+now\s+for|for\s+now)\b" }, - { @"\bmay\b", @"\b((^may i)|(i|you|he|she|we|they)\s+may|(may\s+((((also|not|(also not)|well)\s+)?(be|ask|contain|constitute|email|e-mail|take|have|result|involve|get|work|reply|differ))|(or may not))))\b" } + { @"\bmart\b", @"\b(martin)\b" }, + { @"^\d{1,4}-\d{1,4}$", @"\d{1,4}-\d{1,4}-\d|\d-\d{1,4}-\d{1,4}" }, + { @"^\d{1,4}-\d{1,4}-\d{1,4}$", @"\d{1,4}-\d{1,4}-\d{1,4}-\d|\d-\d{1,4}-\d{1,4}-\d{1,4}" } }; public static readonly IList MorningTermList = new List { @@ -1033,13 +1142,16 @@ public static class DateTimeDefinitions public static readonly IList MinusOneDayTerms = new List { @"dün", + @"dünkü", @"dünden", @"önceki gün", @"son gün" }; public static readonly IList PlusTwoDayTerms = new List { - @"yarından sonraki gün" + @"yarından sonraki gün", + @"yarından sonraki", + @"yarından sonra" }; public static readonly IList MinusTwoDayTerms = new List { @@ -1060,18 +1172,24 @@ public static class DateTimeDefinitions { @"ay", @"ayı", - @"ayın" + @"ayın", + @"ayki", + @"aya ait" }; public static readonly IList MonthToDateTerms = new List { @"ay başından beri", - @"ayın başından beri" + @"ayın başından beri", + @"bu zamana kadarki ay" }; public static readonly IList WeekendTerms = new List { @"hafta sonu", @"hafta sonum", - @"hafta sonundan" + @"hafta sonundan", + @"haftasonu", + @"haftasonum", + @"haftasonundan" }; public static readonly IList WeekTerms = new List { @@ -1079,13 +1197,17 @@ public static class DateTimeDefinitions @"haftam", @"haftanın", @"haftamın", - @"haftası" + @"haftası", + @"haftaki", + @"haftaya ait" }; public static readonly IList YearTerms = new List { @"yıl", @"yılın", - @"sene" + @"sene", + @"yıla ait", + @"seneye ait" }; public static readonly IList GenericYearTerms = new List { @@ -1093,7 +1215,8 @@ public static class DateTimeDefinitions }; public static readonly IList YearToDateTerms = new List { - @"sene başından beri" + @"sene başından beri", + @"bu zamana kadarki yıl" }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/NumbersDefinitions.cs index f1b6dd9fe8..8258c72063 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/NumbersDefinitions.cs @@ -21,9 +21,10 @@ namespace Microsoft.Recognizers.Definitions.Turkish public static class NumbersDefinitions { - public const string LangMarker = @"Tr"; + public const string LangMarker = @"Tur"; public const bool CompoundNumberLanguage = true; public const bool MultiDecimalSeparatorCulture = true; + public const string NumberMultiplierRegex = @"(K|k|M|G|T|B|b)"; public const string DigitsNumberRegex = @"\d+|\d{1,3}(\.\d{3})"; public const string RoundNumberIntegerRegex = @"(yüz|bin|milyon|milyar|trilyon)"; public const string ZeroToNineIntegerRegex = @"(sıfır|bir|[iİ]ki|üç|dört|beş|altı|yedi|sekiz|dokuz)"; @@ -44,19 +45,19 @@ public static class NumbersDefinitions public static readonly string TrillionsNumberIntegerRegex = $@"(({ThousandsNumberIntegerRegex}|{HundredToThousandRegex}|{TenToHundredRegex}|{HundredsNumberIntegerRegex}|{OneToNineIntegerRegex}|{TensNumberIntegerRegex})\s*trilyon)"; public static readonly string AboveTrillionRegex = $@"({TrillionsNumberIntegerRegex}(\s*({BillionToTrillionRegex}|{BillionsNumberIntegerRegex}|{MillionToBillionRegex}|{MillionsNumberIntegerRegex}|{ThousandToMillionRegex}|{ThousandsNumberIntegerRegex}|{HundredToThousandRegex}|{TenToHundredRegex}|{HundredsNumberIntegerRegex}|{OneToNineIntegerRegex}|{TensNumberIntegerRegex})))"; public static readonly string AllIntRegex = $@"({AboveTrillionRegex}|{BillionToTrillionRegex}|{MillionToBillionRegex}|{ThousandToMillionRegex}|{MillionsNumberIntegerRegex}|{BillionsNumberIntegerRegex}|{TrillionsNumberIntegerRegex}|{ThousandsNumberIntegerRegex}|{HundredToThousandRegex}|{HundredsNumberIntegerRegex}|{TenToHundredRegex}|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex})"; - public static readonly string NegativeAllIntRegex = $@"(eksi\s)({OneToNineIntegerRegex}|{TenToHundredRegex}|{HundredToThousandRegex}|{ThousandToMillionRegex}|{MillionToBillionRegex}|{BillionToTrillionRegex}|{AboveTrillionRegex})"; + public static readonly string NegativeAllIntRegex = $@"(eksi\s+)({OneToNineIntegerRegex}|{TenToHundredRegex}|{HundredToThousandRegex}|{ThousandToMillionRegex}|{MillionToBillionRegex}|{BillionToTrillionRegex}|{AboveTrillionRegex})"; public const string PlaceHolderPureNumber = @"(?!'(i|ı|u|ü|n))\b"; public const string PlaceHolderDefault = @"\D|\b"; public static readonly Func NumbersWithPlaceHolder = (placeholder) => $@"(((?(bir\s+)?(sonraki|önceki)|sondan\s+birinci|sondan\s+bir\s+önceki|sondan\s+ikinci|(en\s+)?son|[iİ]lki?|son(uncu(su)?)?|şimdiki)"; - public static readonly string AllOrdinalRegex = $@"({OneToHundredOrdinalRegex}|{HundredToThousandOrdinalRegex}|{ThousandToMillionOrdinalRegex}|{MillionToBillionOrdinalRegex}|{BillionToTrillionOrdinalRegex}|{AboveTrillionOrdinalRegex}|{RelativeOrdinalRegex})"; + public const string RelativeOrdinalRegex = @"(?(bir\s+)?((?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?!,))(?=\b)"; + public static readonly string FractionNotationRegex = $@"{BaseNumbers.FractionNotationRegex}"; + public static readonly string FractionNounWithArticleRegex = $@"(?<=\b)(({AllIntRegex}\s+)?(buçuk|çeyrek|yarım))(?=(t[ae]n|d[[ae]n|y?[ae])?\b)"; + public static readonly string FractionPrepositionRegex = $@"(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?!,))(?=\b)"; public static readonly string AllPointRegex = $@"((\s{ZeroToNineIntegerRegex})+|(\s{AllIntRegex}))"; public static readonly string FloatRegex1 = $@"(({NegativeAllIntRegex}|{AllIntRegex})(\s(nokta)){AllPointRegex})"; public static readonly string FloatRegex2 = $@"{AllIntRegex}(\s+(tam)\s+)((onda)\s+{OneToNineIntegerRegex}|(yüzde)\s+({OneToNineIntegerRegex}|{TenToHundredRegex})|(binde)\s+({OneToNineIntegerRegex}|{TenToHundredRegex}|{HundredToThousandRegex}))"; public static readonly string AllFloatRegex = $@"({FloatRegex1}|{FloatRegex2})"; - public static readonly string DoubleWithMultiplierRegex = $@"(((? DoubleDecimalPointRegex = (placeholder) => $@"(((?)"; - public const string LessRegex = @"(küçük(tür)?|(?|=)<)"; + public const string TillRegex = @"(('?y?[ae])|-|—|——|~|–)"; + public static readonly string NumberNoSuffixRegex = $@"(-?(\d+[\.,])?\d*(\.?000|000\.?000|000(\.?000){{2}}|000(\.?000){{2}}\.?000|10|20|30|40|50|60|70|80|90|00|1|2|3|4|5|6|7|8|9)|({NegativeAllIntRegex}|{AllIntRegex})(i?nci|ı?ncı|üncü|uncu)?|çeyrek|yarım)"; + public const string MoreRegex = @"((daha\s+)?(büyük(tür)?|yüksek(tir)?|(yukarıda|fazla)(dır)?)|(üzerinde|üstünde)(dir)?|(?)"; + public const string LessRegex = @"((daha\s+)?((küçük|düşük)(tür)?|azı|(aşağıda|az)(dır)?)|(altında)(dır)?|(?|=)<)"; public const string EqualRegex = @"(eşit(tir)?|(?)=)"; - public const string MoreOrEqualPrefix = @"((en\s+az))"; - public const string MoreOrEqual = @"((büyük(tür)?\s+veya\s+eşit(tir)?)|>\s*=)"; - public const string MoreOrEqualSuffix = @"(az\s+değil)"; + public const string MoreOrEqualPrefix = @"(en\s+az)"; + public static readonly string MoreOrEqual = $@"(({MoreRegex}\s+(ya\s+da|veya)\s+{EqualRegex})|({EqualRegex}\s+(ya\s+da|veya)\s+{MoreRegex})|{MoreOrEqualPrefix}|>\s*=)"; + public static readonly string MoreOrEqualSuffix = $@"((((ya\s+da|veya|ve)\s+)(daha\s+)?(fazla(sı)?(dır)?|yüksek(tir)?|büyük(tür)?|yukarıda(dır)?))|({LessRegex}\s+değil(dir)?))"; public const string LessOrEqualPrefix = @"(en\s+(fazla|çok))"; - public const string LessOrEqual = @"((küçük(tür)?\s+veya\s+eşit(tir)?)|<\s*=)"; - public const string LessOrEqualSuffix = @"(fazla\s+değil)"; + public static readonly string LessOrEqual = $@"(({LessRegex}\s+(ya\s+da|veya)\s+{EqualRegex})|({EqualRegex}\s+(ya\s+da|veya)\s+{LessRegex})|{LessOrEqualPrefix}|<\s*=)"; + public static readonly string LessOrEqualSuffix = $@"((((ya\s+da|veya|ve)\s+)(daha\s+)?(fazla\s+değil|azı?))|({MoreRegex}\s+değil(dir)?))"; public const string NumberSplitMark = @"(?![,.](?!\d+))"; - public const string MoreRegexNoNumberSucceed = @"((daha\s+fazla)(?!(\s*\d+)))"; - public const string LessRegexNoNumberSucceed = @"((daha\s+az)(?!(\s*\d+)))"; - public const string NumberFromSuffixRegex = @"(\d*(1'den|2'den|3'ten|4'ten|5'ten|6'dan|7'den|8'den|9'dan|10'dan|20'den|30'dan|40'tan|50'den|60'tan|70'ten|80'den|90'dan|00'den|\.?000'den|000\.?000'dan|000(\.?000){2}'dan|000(\.?000){2}\.?000'dan)|((on|yirmi|otuz|kırk|elli|altmış|yetmiş|seksen|doksan|yüz)\s)?(birden|[iİ]kiden|üçten|dörtten|beşten|altıdan|yediden|sekizden|dokuzdan)|ondan|yirmiden|otuzdan|kırktan|elliden|altmıştan|yetmişten|seksenden|doksandan|yüzden|binden|çeyrekten|yarımdan)"; - public const string NumberToSuffixRegex = @"(\d*(1'e|2'ye|3'e|4'e|5'e|6'ya|7'ye|8'e|9'a|10'a|20'ye|30'a|40'a|50'ye|60'a|70'e|80'e|90'a|00'e|\.?000'e|000\.?000'a|000(\.?000){2}'a|000(\.?000){2}\.?000'a))"; - public static readonly string OneNumberRangeMoreRegex1 = $@"((?{NumberFromSuffixRegex})\s+({MoreRegex}|{MoreOrEqual}))|((?({NumberSplitMark}.)+)\s(ve|veya|ya da)\s+daha\s+(fazla|fazlası|yüksek))|{MoreRegex}\s*(?({NumberSplitMark}.)+)"; - public static readonly string OneNumberRangeMoreRegex2 = $@"(({MoreOrEqual}|{MoreOrEqualPrefix})\s*(?({NumberSplitMark}.)+))"; - public static readonly string OneNumberRangeMoreSeparateRegex = $@"((?{NumberToSuffixRegex})\s{EqualRegex}(\s+(ve|veya|ya\sda)\s+){MoreRegexNoNumberSucceed})"; - public static readonly string OneNumberRangeLessRegex1 = $@"((?{NumberFromSuffixRegex})\s+({LessRegex}|{LessOrEqual})|{LessRegex}\s*(?({NumberSplitMark}.)+))"; - public static readonly string OneNumberRangeLessRegex2 = $@"(({LessOrEqual}|{LessOrEqualPrefix})\s*(?({NumberSplitMark}.)+))"; - public static readonly string OneNumberRangeLessSeparateRegex = $@"((?{NumberFromSuffixRegex})\s{EqualRegex}(\s+(ve|veya|ya da)\s+){LessRegexNoNumberSucceed})"; - public static readonly string OneNumberRangeEqualRegex = $@"{EqualRegex}\s*(?({NumberSplitMark}.)+)|{NumberToSuffixRegex}\s+{EqualRegex}"; - public static readonly string TwoNumberRangeRegex1 = $@"(?({NumberSplitMark}.)+)\s(ile|ila|ve)\s(?({NumberSplitMark}.)+)(\sarasında)"; + public const string MoreRegexNoNumberSucceed = @"((?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex})('?(([dt][ae]n)|(n?[iuüı]n)|(y?[ae]))\s+)({MoreOrEqual}|{MoreRegex}))|(({MoreOrEqual}|{MoreRegex})\s*(?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex})((''?y?[ae]\s+){EqualRegex})?))"; + public static readonly string OneNumberRangeMoreRegex2 = $@"((?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex})(('?[dt][ae]n)|('?n?[iuüı]n))?\s+{MoreOrEqualSuffix})"; + public static readonly string OneNumberRangeMoreSeparateRegex = $@"({EqualRegex}\s+(?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex})(\s+(ya\s+da|veya)\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\s+(?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex})(\s+(ya\s+da|veya)\s+){EqualRegexNoNumberSucceed})"; + public static readonly string OneNumberRangeLessRegex1 = $@"(((?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex})('?(([dt][ae]n)|(n?[iuüı]n)|(y?[ae]))\s+)({LessOrEqual}|{LessRegex}))|(({LessOrEqual}|{LessRegex})\s*(?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex})((''?y?[ae]\s+){EqualRegex})?))"; + public static readonly string OneNumberRangeLessRegex2 = $@"((?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex})(('?[dt][ae]n)|('?n?[iuüı]n))?\s+{LessOrEqualSuffix})"; + public static readonly string OneNumberRangeLessSeparateRegex = $@"({EqualRegex}\s+(?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex})(\s+(ya\s+da|veya)\s+){LessRegexNoNumberSucceed})|({LessRegex}\s+(?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex})(\s+(ya\s+da|veya)\s+){EqualRegexNoNumberSucceed})"; + public static readonly string OneNumberRangeEqualRegex = $@"(({EqualRegex}\s+(?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex}))|((?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex})('?y?[ae]\s+){EqualRegex}))"; + public static readonly string TwoNumberRangeRegex1 = $@"((?({NumberSplitMark}.)+)\s+(ile|ila|ve)\s+(?({NumberSplitMark}.)+)(\s+arasında))"; public static readonly string TwoNumberRangeRegex2 = $@"({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\s*(ve|ama|fakat|ancak|,)\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"; public static readonly string TwoNumberRangeRegex3 = $@"({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\s*(ve|ama|fakat|ancak|,)\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})"; - public static readonly string TwoNumberRangeRegex4 = $@"(?({NumberSplitMark}.)+)\s*{TillRegex}\s*(?({NumberSplitMark}.)+)|({NumberFromSuffixRegex}\s{NumberToSuffixRegex})(\skadar)"; + public static readonly string TwoNumberRangeRegex4 = $@"(?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex})\s*{TillRegex}\s*(?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex})|(?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex})('?[dt][ae]n)\s+(?{NumberNoSuffixRegex}|{FractionPrepositionRegex}|{FractionNotationRegex}){TillRegex}\s+(kadar)"; + public const string AmbiguousFractionConnectorsRegex = @"^[\*]"; public const char DecimalSeparatorChar = ','; public const string FractionMarkerToken = @"bölü"; public const char NonDecimalSeparatorChar = '.'; @@ -289,6 +291,7 @@ public static class NumbersDefinitions { @"sondan birinci", @"end" }, { @"en sondan bir önceki", @"end" }, { @"sondan bir önceki", @"end" }, + { @"sondan üçüncü", @"end" }, { @"sondan ikinci", @"end" }, { @"sonraki", @"current" }, { @"önceki", @"current" } diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/NumbersWithUnitDefinitions.cs index b9cd8cc085..4a8addf0a1 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/NumbersWithUnitDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/NumbersWithUnitDefinitions.cs @@ -70,7 +70,7 @@ public static class NumbersWithUnitDefinitions }; public static readonly Dictionary CurrencySuffixList = new Dictionary { - { @"Afganistan afganisi", @"afganistan afganisi|afgani|؋|afn|af|afs|afganistan afganisine|afganistan afganisinde|afganistan afganisinden|afganistan afganisini|afganistan afganisinin|afganistan afganisidir|afganistan afganisiydi|afganistan afganisiymiş" }, + { @"Afghan afghani", @"afganistan afganisi|afgani|؋|afn|af|afs|afganistan afganisine|afganistan afganisinde|afganistan afganisinden|afganistan afganisini|afganistan afganisinin|afganistan afganisidir|afganistan afganisiydi|afganistan afganisiymiş" }, { @"Pul", @"pul|pula|pulda|puldan|pulu|pulun|puldur|puldu|pulmuş|pulluk" }, { @"Euro", @"euro|€|eur|avro|avroya|avroda|avrodan|avroyu|avronun|avrodur|avroydu|avroymuş|avroluk" }, { @"Cent", @"sent|s|sentte|sentten|senti|sentin|senttir|sentti|sentmiş|sentlik" }, @@ -278,7 +278,7 @@ public static class NumbersWithUnitDefinitions { @"Pound", @"pound|sterlin|£|sterline|sterlinde|sterlinden|sterlini|sterlinin|sterlindir|sterlindi|sterlinmiş" }, { @"Shilling", @"şilin|şiline|şilinde|şilinden|şilini|şilinin|şilindir|şilindi|şilinmiş" }, { @"Penny", @"peni|peniye|penide|peniden|peniyi|peninin|penidir|peniydi|peniymiş" }, - { @"United States dollar", @"abd doları|a.b.d. doları|amerika birleşik devletleri doları|amerikan doları|usd|abd $|a.b.d. $|abd$|a.b.d.$|abd dolarına|abd dolarında|abd dolarından|abd dolarını|abd dolarının|abd dolarıdır|abd dolarıydı|abd dolarıymış|a.b.d. dolarına|a.b.d. dolarında|a.b.d. dolarından|a.b.d. dolarını|a.b.d. dolarının|a.b.d. dolarıdır|a.b.d. dolarıydı|a.b.d. dolarıymış|amerika birleşik devletleri dolarına|amerika birleşik devletleri dolarında|amerika birleşik devletleri dolarından|amerika birleşik devletleri dolarını|amerika birleşik devletleri dolarının|amerika birleşik devletleri dolarıdır|amerika birleşik devletleri dolarıydı|amerika birleşik devletleri dolarıymış|amerikan dolarına|amerikan dolarında|amerikan dolarından|amerikan dolarını|amerikan dolarının|amerikan dolarıdır|amerikan dolarıydı|amerikan dolarıymış" }, + { @"United States dollar", @"abd doları|abd dolar|a.b.d. doları|a.b.d. dolar|amerika birleşik devletleri doları|amerika birleşik devletleri dolar|amerikan doları|amerikan dolar|usd|abd $|a.b.d. $|abd$|a.b.d.$|abd dolarına|abd dolarında|abd dolarından|abd dolarını|abd dolarının|abd dolarıdır|abd dolarıydı|abd dolarıymış|a.b.d. dolarına|a.b.d. dolarında|a.b.d. dolarından|a.b.d. dolarını|a.b.d. dolarının|a.b.d. dolarıdır|a.b.d. dolarıydı|a.b.d. dolarıymış|amerika birleşik devletleri dolarına|amerika birleşik devletleri dolarında|amerika birleşik devletleri dolarından|amerika birleşik devletleri dolarını|amerika birleşik devletleri dolarının|amerika birleşik devletleri dolarıdır|amerika birleşik devletleri dolarıydı|amerika birleşik devletleri dolarıymış|amerikan dolarına|amerikan dolarında|amerikan dolarından|amerikan dolarını|amerikan dolarının|amerikan dolarıdır|amerikan dolarıydı|amerikan dolarıymış" }, { @"East Caribbean dollar", @"doğu karayip doları|xcd|doğu karayip $|doğu karayip dolarına|doğu karayip dolarında|doğu karayip dolarından|doğu karayip dolarını|doğu karayip dolarının|doğu karayip dolarıdır|doğu karayip dolarıydı|doğu karayip dolarıymış" }, { @"Australian dollar", @"avustralya doları|aud|avustralya $|avustralya$|avustralya dolarına|avustralya dolarında|avustralya dolarından|avustralya dolarını|avustralya dolarının|avustralya dolarıdır|avustralya dolarıydı|avustralya dolarıymış" }, { @"Bahamian dollar", @"bahama doları|bsd|bahama $|bahama$|bahama dolarına|bahama dolarında|bahama dolarından|bahama dolarını|bahama dolarının|bahama dolarıdır|bahama dolarıydı|bahama dolarıymış" }, @@ -308,7 +308,10 @@ public static class NumbersWithUnitDefinitions { @"Fen", @"fen" }, { @"Jiao", @"jiao|mao" }, { @"Finnish markka", @"fin markkası|fin mk|fim|markka|fin markı|fin markkasına|fin markkasında|fin markkasından|fin markkasını|fin markkasının|fin markkasıdır|fin markkasıydı|fin markkasıymış|fin markına|fin markında|fin markından|fin markını|fin markının|fin markıdır|fin markıydı|fin markıymış|markkaya|markkada|markkadan|markkayı|markkanın|markkadır|markkaydı|markkaymış" }, - { @"Penni", @"penni|penniye|pennide|penniden|penniyi|penninin|pennidir|penniydi|penniymiş" } + { @"Penni", @"penni|penniye|pennide|penniden|penniyi|penninin|pennidir|penniydi|penniymiş" }, + { @"Bitcoin", @"bitcoin|bitcoins|btc|xbt|₿" }, + { @"Millibitcoin", @"millibitcoin|millibitcoins|milibitcoin|milibitcoins" }, + { @"Satoshi", @"satoshi|satoshis" } }; public static readonly Dictionary CurrencyNameToIsoCodeMap = new Dictionary { @@ -414,7 +417,7 @@ public static class NumbersWithUnitDefinitions { @"Rwandan franc", @"RWF" }, { @"Russian ruble", @"RUB" }, { @"Transnistrian ruble", @"PRB" }, - { @"Belarusian ruble", @"BYN" }, + { @"New Belarusian ruble", @"BYN" }, { @"Algerian dinar", @"DZD" }, { @"Bahraini dinar", @"BHD" }, { @"Iraqi dinar", @"IQD" }, @@ -496,7 +499,8 @@ public static class NumbersWithUnitDefinitions { @"British Virgin Islands dollar", @"_BD" }, { @"Ascension pound", @"_AP" }, { @"Alderney pound", @"_ALP" }, - { @"Abkhazian apsar", @"_AA" } + { @"Abkhazian apsar", @"_AA" }, + { @"Bitcoin", @"_XBT" } }; public static readonly Dictionary FractionalUnitNameToCodeMap = new Dictionary { @@ -572,7 +576,9 @@ public static class NumbersWithUnitDefinitions { @"Kopiyka", @"KOPIYKA" }, { @"Tiyin", @"TIYIN" }, { @"Hào", @"HAO" }, - { @"Ngwee", @"NGWEE" } + { @"Ngwee", @"NGWEE" }, + { @"Millibitcoin", @"MILLIBITCOIN" }, + { @"Satoshi", @"SATOSHI" } }; public const string CompoundUnitConnectorRegex = @"(?\s)"; public static readonly Dictionary CurrencyPrefixList = new Dictionary @@ -835,6 +841,7 @@ public static class NumbersWithUnitDefinitions { @"Kilogram", @"kg|kilogram|kilo|kilograma|kilogramda||kilogramdan|kilogramı|kilogramın|kilogramdır|kilogramdı|kilogrammış|kilogramlık|kiloya|kiloda|kilodan|kiloyu|kilonun|kilodur|kiloydu|kiloymuş|kiloluk" }, { @"Gram", @"g|gram|grama|gramda||gramdan|gramı|gramın|gramdır|gramdı|grammış|gramlık" }, { @"Milligram", @"mg|miligram|miligrama|miligramda||miligramdan|miligramı|miligramın|miligramdır|miligramdı|miligrammış|miligramlık" }, + { @"Microgram", @"μg|mikrogram|mikrograma|mikrogramda|mikrogramdan|mikrogramı|mikrogramın|mikrogramdır|mikrogramdı|mikrogrammış|mikrogramlık" }, { @"Barrel", @"varil|varile|varilde|varilden|varili|varilin|varildir|varildi|varilmiş|varillik" }, { @"Gallon", @"galon|galona|galonda|galondan|galonu|galonun|galondur|galondu|galonmuş|galonluk" }, { @"Metric ton", @"metrik ton|metrik tona|metrik tonda|metrik tondan|metrik tonu|metrik tonun|metrik tondur|metrik tondu|metrik tonmuş|metrik tonluk" }, @@ -887,5 +894,9 @@ public static class NumbersWithUnitDefinitions @"drammış", @"dramlık" }; + public static readonly Dictionary AmbiguityFiltersDict = new Dictionary + { + { @"null", @"null" } + }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/QuotedTextDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/QuotedTextDefinitions.cs new file mode 100644 index 0000000000..7506300b60 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/QuotedTextDefinitions.cs @@ -0,0 +1,35 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Turkish\Turkish-QuotedText.yaml +// - Language: Turkish +// - ClassName: QuotedTextDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Turkish +{ + using System; + using System.Collections.Generic; + + public static class QuotedTextDefinitions + { + public const string LangMarker = @"Tur"; + public const string QuotedTextRegex1 = @"(“([^“”]+)”)"; + public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)"; + public const string QuotedTextRegex3 = @"(""([^""]+)"")"; + public const string QuotedTextRegex4 = @"(\\'([^\']+)\\')"; + public const string QuotedTextRegex5 = @"(`([^`]+)`)"; + public const string QuotedTextRegex6 = @"(«([^«»]+)»)"; + public const string QuotedTextRegex7 = @"(‹([^‹›]+)›)"; + public const string QuotedTextRegex8 = @"(123456)"; + public const string QuotedTextRegex9 = @"(123456)"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/QuotedTextDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/QuotedTextDefinitions.tt new file mode 100644 index 0000000000..81856b9df1 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/QuotedTextDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Turkish\Turkish-QuotedText.yaml"; + this.Language = "Turkish"; + this.ClassName = "QuotedTextDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/TimeZoneDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/TimeZoneDefinitions.cs new file mode 100644 index 0000000000..a471551426 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/TimeZoneDefinitions.cs @@ -0,0 +1,1490 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Turkish\Turkish-TimeZone.yaml +// - Language: Turkish +// - ClassName: TimeZoneDefinitions +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ------------------------------------------------------------------------------ + +namespace Microsoft.Recognizers.Definitions.Turkish +{ + using System; + using System.Collections.Generic; + + public static class TimeZoneDefinitions + { + public const string DirectUtcRegex = @"\b(utc|gmt)(\s*[+\-\u00B1]?\s*[\d]{1,2}h?(\s*:\s*[\d]{1,2})?)?\b"; + public static readonly IList AbbreviationsList = new List + { + @"ABST", + @"ACDT", + @"ACST", + @"ACT", + @"ADT", + @"AEDT", + @"AEST", + @"AET", + @"AFT", + @"AKDT", + @"AKST", + @"AMST", + @"AMT", + @"AOE", + @"AoE", + @"ARBST", + @"ARST", + @"ART", + @"AST", + @"AWDT", + @"AWST", + @"AZOST", + @"AZOT", + @"AZST", + @"AZT", + @"BIT", + @"BST", + @"BTT", + @"CADT", + @"CAST", + @"CBST", + @"CBT", + @"CCST", + @"CDT", + @"CDTM", + @"CEST", + @"CET", + @"COT", + @"CST", + @"CSTM", + @"CT", + @"CVT", + @"EAT", + @"ECT", + @"EDT", + @"EDTM", + @"EEST", + @"EET", + @"EGST", + @"ESAST", + @"ESAT", + @"EST", + @"ESTM", + @"ET", + @"FJST", + @"FJT", + @"GET", + @"GMT", + @"GNDT", + @"GNST", + @"GST", + @"GTBST", + @"HADT", + @"HAST", + @"HDT", + @"HKT", + @"HST", + @"IRDT", + @"IRKT", + @"IRST", + @"ISDT", + @"ISST", + @"IST", + @"JDT", + @"JST", + @"KRAT", + @"KST", + @"LINT", + @"MAGST", + @"MAGT", + @"MAT", + @"MDT", + @"MDTM", + @"MEST", + @"MOST", + @"MSK", + @"MSK+1", + @"MSK+2", + @"MSK+3", + @"MSK+4", + @"MSK+5", + @"MSK+6", + @"MSK+7", + @"MSK+8", + @"MSK+9", + @"MSK-1", + @"MST", + @"MSTM", + @"MUT", + @"MVST", + @"MYST", + @"NCAST", + @"NDT", + @"NMDT", + @"NMST", + @"NPT", + @"NST", + @"NZDT", + @"NZST", + @"NZT", + @"PDST", + @"PDT", + @"PDTM", + @"PETT", + @"PKT", + @"PSAST", + @"PSAT", + @"PST", + @"PSTM", + @"PT", + @"PYST", + @"PYT", + @"RST", + @"SAEST", + @"SAPST", + @"SAST", + @"SAWST", + @"SBT", + @"SGT", + @"SLT", + @"SMST", + @"SNST", + @"SST", + @"TADT", + @"TAST", + @"THA", + @"TIST", + @"TOST", + @"TOT", + @"TRT", + @"TST", + @"ULAT", + @"UTC", + @"VET", + @"VLAT", + @"WAST", + @"WAT", + @"WEST", + @"WET", + @"WPST", + @"YAKT", + @"YEKT" + }; + public static readonly IList FullNameList = new List + { + @"Acre Time", + @"Afghanistan Standard Time", + @"Alaskan Standard Time", + @"Anywhere on Earth", + @"Arab Standard Time", + @"Arabian Standard Time", + @"Arabic Standard Time", + @"Argentina Standard Time", + @"Atlantic Standard Time", + @"AUS Central Standard Time", + @"Australian Central Time", + @"AUS Eastern Standard Time", + @"Australian Eastern Time", + @"Australian Eastern Standard Time", + @"Australian Central Daylight Time", + @"Australian Eastern Daylight Time", + @"Azerbaijan Standard Time", + @"Azores Standard Time", + @"Bahia Standard Time", + @"Bangladesh Standard Time", + @"Belarus Standard Time", + @"Canada Central Standard Time", + @"Cape Verde Standard Time", + @"Caucasus Standard Time", + @"Cen. Australia Standard Time", + @"Central America Standard Time", + @"Central Asia Standard Time", + @"Central Brazilian Standard Time", + @"Central Daylight Time", + @"Europe Central Time", + @"European Central Time", + @"Central Europe Standard Time", + @"Central Europe Std Time", + @"Central European Std Time", + @"Central European Standard Time", + @"Central Pacific Standard Time", + @"Central Standard Time", + @"Central Standard Time (Mexico)", + @"China Standard Time", + @"Dateline Standard Time", + @"E. Africa Standard Time", + @"E. Australia Standard Time", + @"E. Europe Standard Time", + @"E. South America Standard Time", + @"Eastern Time", + @"Eastern Daylight Time", + @"Eastern Standard Time", + @"Eastern Standard Time (Mexico)", + @"Egypt Standard Time", + @"Ekaterinburg Standard Time", + @"Fiji Standard Time", + @"FLE Standard Time", + @"Georgian Standard Time", + @"GMT Standard Time", + @"Greenland Standard Time", + @"Greenwich Standard Time", + @"GTB Standard Time", + @"Hawaiian Standard Time", + @"India Standard Time", + @"Iran Standard Time", + @"Israel Standard Time", + @"Jordan Standard Time", + @"Kaliningrad Standard Time", + @"Kamchatka Standard Time", + @"Korea Standard Time", + @"Libya Standard Time", + @"Line Islands Standard Time", + @"Magadan Standard Time", + @"Mauritius Standard Time", + @"Mid-Atlantic Standard Time", + @"Middle East Standard Time", + @"Montevideo Standard Time", + @"Morocco Standard Time", + @"Mountain Standard Time", + @"Mountain Standard Time (Mexico)", + @"Myanmar Standard Time", + @"N. Central Asia Standard Time", + @"Namibia Standard Time", + @"Nepal Standard Time", + @"New Zealand Standard Time", + @"Newfoundland Standard Time", + @"North Asia East Standard Time", + @"North Asia Standard Time", + @"North Korea Standard Time", + @"Pacific SA Standard Time", + @"Pacific Standard Time", + @"Pacific Daylight Time", + @"Pacific Time", + @"Pacific Standard Time", + @"Pacific Standard Time (Mexico)", + @"Pakistan Standard Time", + @"Paraguay Standard Time", + @"Romance Standard Time", + @"Russia Time Zone 1", + @"Russia Time Zone 2", + @"Russia Time Zone 3", + @"Russia Time Zone 4", + @"Russia Time Zone 5", + @"Russia Time Zone 6", + @"Russia Time Zone 7", + @"Russia Time Zone 8", + @"Russia Time Zone 9", + @"Russia Time Zone 10", + @"Russia Time Zone 11", + @"Russian Standard Time", + @"SA Eastern Standard Time", + @"SA Pacific Standard Time", + @"SA Western Standard Time", + @"Samoa Standard Time", + @"SE Asia Standard Time", + @"Singapore Standard Time", + @"Singapore Time", + @"South Africa Standard Time", + @"Sri Lanka Standard Time", + @"Syria Standard Time", + @"Taipei Standard Time", + @"Tasmania Standard Time", + @"Tokyo Standard Time", + @"Tonga Standard Time", + @"Turkey Standard Time", + @"Ulaanbaatar Standard Time", + @"US Eastern Standard Time", + @"US Mountain Standard Time", + @"Mountain", + @"Venezuela Standard Time", + @"Vladivostok Standard Time", + @"W. Australia Standard Time", + @"W. Central Africa Standard Time", + @"W. Europe Standard Time", + @"West Asia Standard Time", + @"West Pacific Standard Time", + @"Yakutsk Standard Time", + @"Pacific Daylight Saving Time", + @"Austrialian Western Daylight Time", + @"Austrialian West Daylight Time", + @"Australian Western Daylight Time", + @"Australian West Daylight Time", + @"Colombia Time", + @"Hong Kong Time", + @"Central Europe Time", + @"Central European Time", + @"Central Europe Summer Time", + @"Central European Summer Time", + @"Central Europe Standard Time", + @"Central European Standard Time", + @"Central Europe Std Time", + @"Central European Std Time", + @"West Coast Time", + @"West Coast", + @"Central Time", + @"Central", + @"Pacific", + @"Eastern" + }; + public const string BaseTimeZoneSuffixRegex = @"((\s+|-)(friendly|compatible))?(\s+|-)time(zone)?"; + public static readonly string LocationTimeSuffixRegex = $@"({BaseTimeZoneSuffixRegex})\b"; + public static readonly string TimeZoneEndRegex = $@"({BaseTimeZoneSuffixRegex})$"; + public static readonly IList AmbiguousTimezoneList = new List + { + @"bit", + @"get", + @"art", + @"cast", + @"eat", + @"lint", + @"mat", + @"most", + @"west", + @"vet", + @"wet", + @"cot", + @"pt", + @"et", + @"eastern", + @"pacific", + @"central", + @"mountain", + @"west coast" + }; + public static readonly Dictionary AbbrToMinMapping = new Dictionary + { + { @"abst", 180 }, + { @"acdt", 630 }, + { @"acst", 570 }, + { @"act", -10000 }, + { @"adt", -10000 }, + { @"aedt", 660 }, + { @"aest", 600 }, + { @"aet", 600 }, + { @"aft", 270 }, + { @"akdt", -480 }, + { @"akst", -540 }, + { @"amst", -10000 }, + { @"amt", -10000 }, + { @"aoe", -720 }, + { @"arbst", 180 }, + { @"arst", 180 }, + { @"art", -180 }, + { @"ast", -10000 }, + { @"awdt", 540 }, + { @"awst", 480 }, + { @"azost", 0 }, + { @"azot", -60 }, + { @"azst", 300 }, + { @"azt", 240 }, + { @"bit", -720 }, + { @"bst", -10000 }, + { @"btt", 360 }, + { @"cadt", -360 }, + { @"cast", 480 }, + { @"cbst", -240 }, + { @"cbt", -240 }, + { @"ccst", -360 }, + { @"cdt", -10000 }, + { @"cdtm", -360 }, + { @"cest", 120 }, + { @"cet", 60 }, + { @"cot", -300 }, + { @"cst", -10000 }, + { @"cstm", -360 }, + { @"ct", -360 }, + { @"cvt", -60 }, + { @"eat", 180 }, + { @"ect", -10000 }, + { @"edt", -240 }, + { @"edtm", -300 }, + { @"eest", 180 }, + { @"eet", 120 }, + { @"egst", 0 }, + { @"esast", -180 }, + { @"esat", -180 }, + { @"est", -300 }, + { @"estm", -300 }, + { @"et", -300 }, + { @"fjst", 780 }, + { @"fjt", 720 }, + { @"get", 240 }, + { @"gmt", 0 }, + { @"gndt", -180 }, + { @"gnst", -180 }, + { @"gst", -10000 }, + { @"gtbst", 120 }, + { @"hadt", -540 }, + { @"hast", -600 }, + { @"hdt", -540 }, + { @"hkt", 480 }, + { @"hst", -600 }, + { @"irdt", 270 }, + { @"irkt", 480 }, + { @"irst", 210 }, + { @"isdt", 120 }, + { @"isst", 120 }, + { @"ist", -10000 }, + { @"jdt", 120 }, + { @"jst", 540 }, + { @"krat", 420 }, + { @"kst", -10000 }, + { @"lint", 840 }, + { @"magst", 720 }, + { @"magt", 660 }, + { @"mat", -120 }, + { @"mdt", -360 }, + { @"mdtm", -420 }, + { @"mest", 120 }, + { @"most", 0 }, + { @"msk+1", 240 }, + { @"msk+2", 300 }, + { @"msk+3", 360 }, + { @"msk+4", 420 }, + { @"msk+5", 480 }, + { @"msk+6", 540 }, + { @"msk+7", 600 }, + { @"msk+8", 660 }, + { @"msk+9", 720 }, + { @"msk-1", 120 }, + { @"msk", 180 }, + { @"mst", -420 }, + { @"mstm", -420 }, + { @"mut", 240 }, + { @"mvst", -180 }, + { @"myst", 390 }, + { @"ncast", 420 }, + { @"ndt", -150 }, + { @"nmdt", 60 }, + { @"nmst", 60 }, + { @"npt", 345 }, + { @"nst", -210 }, + { @"nzdt", 780 }, + { @"nzst", 720 }, + { @"nzt", 720 }, + { @"pdst", -420 }, + { @"pdt", -420 }, + { @"pdtm", -480 }, + { @"pett", 720 }, + { @"pkt", 300 }, + { @"psast", -240 }, + { @"psat", -240 }, + { @"pst", -480 }, + { @"pstm", -480 }, + { @"pt", -480 }, + { @"pyst", -10000 }, + { @"pyt", -10000 }, + { @"rst", 60 }, + { @"saest", -180 }, + { @"sapst", -300 }, + { @"sast", 120 }, + { @"sawst", -240 }, + { @"sbt", 660 }, + { @"sgt", 480 }, + { @"slt", 330 }, + { @"smst", 780 }, + { @"snst", 480 }, + { @"sst", -10000 }, + { @"tadt", 600 }, + { @"tast", 600 }, + { @"tha", 420 }, + { @"tist", 480 }, + { @"tost", 840 }, + { @"tot", 780 }, + { @"trt", 180 }, + { @"tst", 540 }, + { @"ulat", 480 }, + { @"utc", 0 }, + { @"vet", -240 }, + { @"vlat", 600 }, + { @"wast", 120 }, + { @"wat", -10000 }, + { @"west", 60 }, + { @"wet", 0 }, + { @"wpst", 600 }, + { @"yakt", 540 }, + { @"yekt", 300 } + }; + public static readonly Dictionary FullToMinMapping = new Dictionary + { + { @"beijing", 480 }, + { @"shanghai", 480 }, + { @"shenzhen", 480 }, + { @"suzhou", 480 }, + { @"tianjian", 480 }, + { @"chengdu", 480 }, + { @"guangzhou", 480 }, + { @"wuxi", 480 }, + { @"xiamen", 480 }, + { @"chongqing", 480 }, + { @"shenyang", 480 }, + { @"china", 480 }, + { @"redmond", -480 }, + { @"seattle", -480 }, + { @"bellevue", -480 }, + { @"afghanistan standard", 270 }, + { @"alaskan standard", -540 }, + { @"anywhere on earth", -720 }, + { @"arab standard", 180 }, + { @"arabian standard", 180 }, + { @"arabic standard", 180 }, + { @"argentina standard", -180 }, + { @"atlantic standard", -240 }, + { @"aus central standard", 570 }, + { @"aus eastern standard", 600 }, + { @"australian eastern", 600 }, + { @"australian eastern standard", 600 }, + { @"australian central daylight", 630 }, + { @"australian eastern daylight", 660 }, + { @"azerbaijan standard", 240 }, + { @"azores standard", -60 }, + { @"bahia standard", -180 }, + { @"bangladesh standard", 360 }, + { @"belarus standard", 180 }, + { @"canada central standard", -360 }, + { @"cape verde standard", -60 }, + { @"caucasus standard", 240 }, + { @"cen. australia standard", 570 }, + { @"central australia standard", 570 }, + { @"central america standard", -360 }, + { @"central asia standard", 360 }, + { @"central brazilian standard", -240 }, + { @"central", -360 }, + { @"central daylight", -10000 }, + { @"central daylight saving", -10000 }, + { @"central europe", 60 }, + { @"central european", 60 }, + { @"central europe std", 60 }, + { @"central european std", 60 }, + { @"central europe standard", 60 }, + { @"central european standard", 60 }, + { @"central europe summer", 120 }, + { @"central european summer", 120 }, + { @"central pacific standard", 660 }, + { @"central standard time (mexico)", -360 }, + { @"central standard", -360 }, + { @"china standard", 480 }, + { @"dateline standard", -720 }, + { @"e. africa standard", 180 }, + { @"e. australia standard", 600 }, + { @"e. europe standard", 120 }, + { @"e. south america standard", -180 }, + { @"europe central", 60 }, + { @"european central", 60 }, + { @"eastern", -300 }, + { @"eastern daylight", -10000 }, + { @"eastern daylight saving", -10000 }, + { @"eastern standard time (mexico)", -300 }, + { @"eastern standard", -300 }, + { @"egypt standard", 120 }, + { @"ekaterinburg standard", 300 }, + { @"fiji standard", 720 }, + { @"fle standard", 120 }, + { @"georgian standard", 240 }, + { @"gmt standard", 0 }, + { @"greenland standard", -180 }, + { @"greenwich standard", 0 }, + { @"gtb standard", 120 }, + { @"hawaiian standard", -600 }, + { @"india standard", 330 }, + { @"iran standard", 210 }, + { @"israel standard", 120 }, + { @"jordan standard", 120 }, + { @"kaliningrad standard", 120 }, + { @"kamchatka standard", 720 }, + { @"korea standard", 540 }, + { @"libya standard", 120 }, + { @"line islands standard", 840 }, + { @"magadan standard", 660 }, + { @"mauritius standard", 240 }, + { @"mid-atlantic standard", -120 }, + { @"middle east standard", 120 }, + { @"montevideo standard", -180 }, + { @"morocco standard", 0 }, + { @"mountain", -420 }, + { @"mountain daylight", -360 }, + { @"mountain daylight saving", -360 }, + { @"mountain standard", -420 }, + { @"mountain standard time (mexico)", -420 }, + { @"myanmar standard", 390 }, + { @"n. central asia standard", 420 }, + { @"namibia standard", 60 }, + { @"nepal standard", 345 }, + { @"new zealand standard", 720 }, + { @"newfoundland standard", -210 }, + { @"north asia east standard", 480 }, + { @"north asia standard", 420 }, + { @"north korea standard", 510 }, + { @"west coast", -420 }, + { @"pacific", -480 }, + { @"pacific daylight", -420 }, + { @"pacific daylight saving", -420 }, + { @"pacific standard", -480 }, + { @"pacific standard time (mexico)", -480 }, + { @"pacific sa standard", -240 }, + { @"pakistan standard", 300 }, + { @"paraguay standard", -240 }, + { @"romance standard", 60 }, + { @"russia time zone 1", 120 }, + { @"russia time zone 2", 180 }, + { @"russia time zone 3", 240 }, + { @"russia time zone 4", 300 }, + { @"russia time zone 5", 360 }, + { @"russia time zone 6", 420 }, + { @"russia time zone 7", 480 }, + { @"russia time zone 8", 540 }, + { @"russia time zone 9", 600 }, + { @"russia time zone 10", 660 }, + { @"russia time zone 11", 720 }, + { @"russian standard", 180 }, + { @"sa eastern standard", -180 }, + { @"sa pacific standard", -300 }, + { @"sa western standard", -240 }, + { @"samoa standard", -660 }, + { @"se asia standard", 420 }, + { @"singapore standard", 480 }, + { @"singapore", 480 }, + { @"south africa standard", 120 }, + { @"sri lanka standard", 330 }, + { @"syria standard", 120 }, + { @"taipei standard", 480 }, + { @"tasmania standard", 600 }, + { @"tokyo standard", 540 }, + { @"tonga standard", 780 }, + { @"turkey standard", 180 }, + { @"ulaanbaatar standard", 480 }, + { @"us eastern standard", -300 }, + { @"us mountain standard", -420 }, + { @"venezuela standard", -240 }, + { @"vladivostok standard", 600 }, + { @"w. australia standard", 480 }, + { @"w. central africa standard", 60 }, + { @"w. europe standard", 0 }, + { @"western european", 0 }, + { @"west europe standard", 0 }, + { @"west europe std", 0 }, + { @"western europe standard", 0 }, + { @"western europe summer", 60 }, + { @"w. europe summer", 60 }, + { @"western european summer", 60 }, + { @"west europe summer", 60 }, + { @"west asia standard", 300 }, + { @"west pacific standard", 600 }, + { @"yakutsk standard", 540 }, + { @"australian western daylight", 540 }, + { @"australian west daylight", 540 }, + { @"austrialian western daylight", 540 }, + { @"austrialian west daylight", 540 }, + { @"australian western daylight saving", 540 }, + { @"australian west daylight saving", 540 }, + { @"austrialian western daylight saving", 540 }, + { @"austrialian west daylight saving", 540 }, + { @"colombia", -300 }, + { @"hong kong", 480 }, + { @"madrid", 60 }, + { @"bilbao", 60 }, + { @"seville", 60 }, + { @"valencia", 60 }, + { @"malaga", 60 }, + { @"las Palmas", 60 }, + { @"zaragoza", 60 }, + { @"alicante", 60 }, + { @"alche", 60 }, + { @"oviedo", 60 }, + { @"gijón", 60 }, + { @"avilés", 60 } + }; + public static readonly IList MajorLocations = new List + { + @"Dominican Republic", + @"Dominica", + @"Guinea Bissau", + @"Guinea-Bissau", + @"Guinea", + @"Equatorial Guinea", + @"Papua New Guinea", + @"New York City", + @"New York", + @"York", + @"Mexico City", + @"New Mexico", + @"Mexico", + @"Aberdeen", + @"Adelaide", + @"Anaheim", + @"Atlanta", + @"Auckland", + @"Austin", + @"Bangkok", + @"Baltimore", + @"Baton Rouge", + @"Beijing", + @"Belfast", + @"Birmingham", + @"Bolton", + @"Boston", + @"Bournemouth", + @"Bradford", + @"Brisbane", + @"Bristol", + @"Calgary", + @"Canberra", + @"Cardiff", + @"Charlotte", + @"Chicago", + @"Christchurch", + @"Colchester", + @"Colorado Springs", + @"Coventry", + @"Dallas", + @"Denver", + @"Derby", + @"Detroit", + @"Dubai", + @"Dublin", + @"Dudley", + @"Dunedin", + @"Edinburgh", + @"Edmonton", + @"El Paso", + @"Glasgow", + @"Gold Coast", + @"Hamilton", + @"Hialeah", + @"Houston", + @"Ipswich", + @"Jacksonville", + @"Jersey City", + @"Kansas City", + @"Kingston-upon-Hull", + @"Leeds", + @"Leicester", + @"Lexington", + @"Lincoln", + @"Liverpool", + @"London", + @"Long Beach", + @"Los Angeles", + @"Louisville", + @"Lubbock", + @"Luton", + @"Madison", + @"Manchester", + @"Mansfield", + @"Melbourne", + @"Memphis", + @"Mesa", + @"Miami", + @"Middlesbrough", + @"Milan", + @"Milton Keynes", + @"Minneapolis", + @"Montréal", + @"Montreal", + @"Nashville", + @"New Orleans", + @"Newark", + @"Newcastle-upon-Tyne", + @"Newcastle", + @"Northampton", + @"Norwich", + @"Nottingham", + @"Oklahoma City", + @"Oldham", + @"Omaha", + @"Orlando", + @"Ottawa", + @"Perth", + @"Peterborough", + @"Philadelphia", + @"Phoenix", + @"Plymouth", + @"Portland", + @"Portsmouth", + @"Preston", + @"Québec City", + @"Quebec City", + @"Québec", + @"Quebec", + @"Raleigh", + @"Reading", + @"Redmond", + @"Richmond", + @"Rome", + @"San Antonio", + @"San Diego", + @"San Francisco", + @"San José", + @"Santa Ana", + @"Seattle", + @"Sheffield", + @"Southampton", + @"Southend-on-Sea", + @"Spokane", + @"St Louis", + @"St Paul", + @"St Petersburg", + @"St. Louis", + @"St. Paul", + @"St. Petersburg", + @"Stockton-on-Tees", + @"Stockton", + @"Stoke-on-Trent", + @"Sunderland", + @"Swansea", + @"Swindon", + @"Sydney", + @"Tampa", + @"Tauranga", + @"Telford", + @"Toronto", + @"Vancouver", + @"Virginia Beach", + @"Walsall", + @"Warrington", + @"Washington", + @"Wellington", + @"Wolverhampton", + @"Abilene", + @"Akron", + @"Albuquerque", + @"Alexandria", + @"Allentown", + @"Amarillo", + @"Anchorage", + @"Ann Arbor", + @"Antioch", + @"Arlington", + @"Arvada", + @"Athens", + @"Augusta", + @"Aurora", + @"Bakersfield", + @"Beaumont", + @"Bellevue", + @"Berkeley", + @"Billings", + @"Boise", + @"Boulder", + @"Bridgeport", + @"Broken Arrow", + @"Brownsville", + @"Buffalo", + @"Burbank", + @"Cambridge", + @"Cape Coral", + @"Carlsbad", + @"Carrollton", + @"Cary", + @"Cedar Rapids", + @"Centennial", + @"Chandler", + @"Charleston", + @"Chattanooga", + @"Chengdu", + @"Chesapeake", + @"Chongqing", + @"Chula Vista", + @"Cincinnati", + @"Clarksville", + @"Clearwater", + @"Cleveland", + @"Clovis", + @"College Station", + @"Columbia", + @"Columbus", + @"Concord", + @"Coral Springs", + @"Corona", + @"Costa Mesa", + @"Daly City", + @"Davenport", + @"Dayton", + @"Denton", + @"Des Moines", + @"Downey", + @"Durham", + @"Edison", + @"El Cajon", + @"El Monte", + @"Elgin", + @"Elizabeth", + @"Elk Grove", + @"Erie", + @"Escondido", + @"Eugene", + @"Evansville", + @"Everett", + @"Fairfield", + @"Fargo", + @"Farmington Hills", + @"Fayetteville", + @"Fontana", + @"Fort Collins", + @"Fort Lauderdale", + @"Fort Wayne", + @"Fort Worth", + @"Fremont", + @"Fresno", + @"Frisco", + @"Fullerton", + @"Gainesville", + @"Garden Grove", + @"Garland", + @"Gilbert", + @"Glendale", + @"Grand Prairie", + @"Grand Rapids", + @"Green Bay", + @"Greensboro", + @"Gresham", + @"Guangzhou", + @"Hampton", + @"Hartford", + @"Hayward", + @"Henderson", + @"High Point", + @"Hollywood", + @"Honolulu", + @"Huntington Beach", + @"Huntsville", + @"Independence", + @"Indianapolis", + @"Inglewood", + @"Irvine", + @"Irving", + @"Jackson", + @"Joliet", + @"Kent", + @"Killeen", + @"Knoxville", + @"Lafayette", + @"Lakeland", + @"Lakewood", + @"Lancaster", + @"Lansing", + @"Laredo", + @"Las Cruces", + @"Las Vegas", + @"Lewisville", + @"Little Rock", + @"Lowell", + @"Macon", + @"McAllen", + @"McKinney", + @"Mesquite", + @"Miami Gardens", + @"Midland", + @"Milwaukee", + @"Miramar", + @"Mobile", + @"Modesto", + @"Montgomery", + @"Moreno Valley", + @"Murfreesboro", + @"Murrieta", + @"Naperville", + @"New Haven", + @"Newport News", + @"Norfolk", + @"Norman", + @"North Charleston", + @"North Las Vegas", + @"Norwalk", + @"Oakland", + @"Oceanside", + @"Odessa", + @"Olathe", + @"Ontario", + @"Orange", + @"Overland Park", + @"Oxnard", + @"Palm Bay", + @"Palmdale", + @"Pasadena", + @"Paterson", + @"Pearland", + @"Pembroke Pines", + @"Peoria", + @"Pittsburgh", + @"Plano", + @"Pomona", + @"Pompano Beach", + @"Providence", + @"Provo", + @"Pueblo", + @"Rancho Cucamonga", + @"Reno", + @"Rialto", + @"Richardson", + @"Riverside", + @"Rochester", + @"Rockford", + @"Roseville", + @"Round Rock", + @"Sacramento", + @"Saint Paul", + @"Salem", + @"Salinas", + @"Salt Lake City", + @"San Bernardino", + @"San Jose", + @"San Mateo", + @"Sandy Springs", + @"Santa Clara", + @"Santa Clarita", + @"Santa Maria", + @"Santa Rosa", + @"Savannah", + @"Scottsdale", + @"Shanghai", + @"Shenyang", + @"Shenzhen", + @"Shreveport", + @"Simi Valley", + @"Sioux Falls", + @"South Bend", + @"Springfield", + @"Stamford", + @"Sterling Heights", + @"Sunnyvale", + @"Surprise", + @"Suzhou", + @"Syracuse", + @"Tacoma", + @"Tallahassee", + @"Temecula", + @"Tempe", + @"Thornton", + @"Thousand Oaks", + @"Tianjing", + @"Toledo", + @"Topeka", + @"Torrance", + @"Tucson", + @"Tulsa", + @"Tyler", + @"Vallejo", + @"Ventura", + @"Victorville", + @"Visalia", + @"Waco", + @"Warren", + @"Waterbury", + @"West Covina", + @"West Jordan", + @"West Palm Beach", + @"West Valley City", + @"Westminster", + @"Wichita", + @"Wichita Falls", + @"Wilmington", + @"Winston-Salem", + @"Worcester", + @"Wuxi", + @"Xiamen", + @"Yonkers", + @"Bentonville", + @"Afghanistan", + @"AK", + @"AL", + @"Alabama", + @"Åland", + @"Åland Islands", + @"Alaska", + @"Albania", + @"Algeria", + @"American Samoa", + @"Andorra", + @"Angola", + @"Anguilla", + @"Antarctica", + @"Antigua and Barbuda", + @"AR", + @"Argentina", + @"Arizona", + @"Arkansas", + @"Armenia", + @"Aruba", + @"Australia", + @"Austria", + @"AZ", + @"Azerbaijan", + @"Bahamas", + @"Bahrain", + @"Bangladesh", + @"Barbados", + @"Belarus", + @"Belgium", + @"Belize", + @"Benin", + @"Bermuda", + @"Bhutan", + @"Bolivia", + @"Bonaire", + @"Bosnia", + @"Bosnia and Herzegovina", + @"Botswana", + @"Bouvet Island", + @"Brazil", + @"British Indian Ocean Territory", + @"British Virgin Islands", + @"Brunei", + @"Bulgaria", + @"Burkina Faso", + @"Burundi", + @"CA", + @"Cabo Verde", + @"California", + @"Cambodia", + @"Cameroon", + @"Canada", + @"Cayman Islands", + @"Central African Republic", + @"Chad", + @"Chile", + @"China", + @"Christmas Island", + @"CO", + @"Cocos Islands", + @"Colombia", + @"Colorado", + @"Comoros", + @"Congo", + @"Congo (DRC)", + @"Connecticut", + @"Cook Islands", + @"Costa Rica", + @"Côte d’Ivoire", + @"Croatia", + @"CT", + @"Cuba", + @"Curaçao", + @"Cyprus", + @"Czechia", + @"DE", + @"Delaware", + @"Denmark", + @"Djibouti", + @"Ecuador", + @"Egypt", + @"El Salvador", + @"Eritrea", + @"Estonia", + @"eSwatini", + @"Ethiopia", + @"Falkland Islands", + @"Falklands", + @"Faroe Islands", + @"Fiji", + @"Finland", + @"FL", + @"Florida", + @"France", + @"French Guiana", + @"French Polynesia", + @"French Southern Territories", + @"FYROM", + @"GA", + @"Gabon", + @"Gambia", + @"Georgia", + @"Georgia", + @"Germany", + @"Ghana", + @"Gibraltar", + @"Greece", + @"Greenland", + @"Grenada", + @"Guadeloupe", + @"Guam", + @"Guatemala", + @"Guernsey", + @"Guyana", + @"Haiti", + @"Hawaii", + @"Herzegovina", + @"HI", + @"Honduras", + @"Hong Kong", + @"Hungary", + @"IA", + @"Iceland", + @"ID", + @"Idaho", + @"IL", + @"Illinois", + @"IN", + @"India", + @"Indiana", + @"Indonesia", + @"Iowa", + @"Iran", + @"Iraq", + @"Ireland", + @"Isle of Man", + @"Israel", + @"Italy", + @"Ivory Coast", + @"Jamaica", + @"Jan Mayen", + @"Japan", + @"Jersey", + @"Jordan", + @"Kansas", + @"Kazakhstan", + @"Keeling Islands", + @"Kentucky", + @"Kenya", + @"Kiribati", + @"Korea", + @"Kosovo", + @"KS", + @"Kuwait", + @"KY", + @"Kyrgyzstan", + @"LA", + @"Laos", + @"Latvia", + @"Lebanon", + @"Lesotho", + @"Liberia", + @"Libya", + @"Liechtenstein", + @"Lithuania", + @"Louisiana", + @"Luxembourg", + @"MA", + @"Macao", + @"Macedonia", + @"Madagascar", + @"Maine", + @"Malawi", + @"Malaysia", + @"Maldives", + @"Mali", + @"Malta", + @"Marshall Islands", + @"Martinique", + @"Maryland", + @"Massachusetts", + @"Mauritania", + @"Mauritius", + @"Mayotte", + @"MD", + @"ME", + @"MI", + @"Michigan", + @"Micronesia", + @"Minnesota", + @"Mississippi", + @"Missouri", + @"MN", + @"MO", + @"Moldova", + @"Monaco", + @"Mongolia", + @"Montana", + @"Montenegro", + @"Montserrat", + @"Morocco", + @"Mozambique", + @"MS", + @"MT", + @"Myanmar", + @"Namibia", + @"Nauru", + @"NC", + @"ND", + @"NE", + @"Nebraska", + @"Nepal", + @"Netherlands", + @"Nevada", + @"New Caledonia", + @"New Hampshire", + @"New Jersey", + @"New Zealand", + @"NH", + @"Nicaragua", + @"Niger", + @"Nigeria", + @"Niue", + @"NJ", + @"NM", + @"Norfolk Island", + @"North Carolina", + @"North Dakota", + @"North Korea", + @"Northern Mariana Islands", + @"Norway", + @"NV", + @"NY", + @"OH", + @"Ohio", + @"OK", + @"Oklahoma", + @"Oman", + @"OR", + @"Oregon", + @"PA", + @"Pakistan", + @"Palau", + @"Palestinian Authority", + @"Panama", + @"Paraguay", + @"Pennsylvania", + @"Peru", + @"Philippines", + @"Pitcairn Islands", + @"Poland", + @"Portugal", + @"Puerto Rico", + @"Qatar", + @"Réunion", + @"Rhode Island", + @"RI", + @"Romania", + @"Russia", + @"Rwanda", + @"Saba", + @"Saint Barthélemy", + @"Saint Kitts and Nevis", + @"Saint Lucia", + @"Saint Martin", + @"Saint Pierre and Miquelon", + @"Saint Vincent and the Grenadines", + @"Samoa", + @"San Marino", + @"São Tomé and Príncipe", + @"Saudi Arabia", + @"SC", + @"SD", + @"Senegal", + @"Serbia", + @"Seychelles", + @"Sierra Leone", + @"Singapore", + @"Sint Eustatius", + @"Sint Maarten", + @"Slovakia", + @"Slovenia", + @"Solomon Islands", + @"Somalia", + @"South Africa", + @"South Carolina", + @"South Dakota", + @"South Sudan", + @"Spain", + @"Sri Lanka", + @"Sudan", + @"Suriname", + @"Svalbard", + @"Swaziland", + @"Sweden", + @"Switzerland", + @"Syria", + @"Taiwan", + @"Tajikistan", + @"Tanzania", + @"Tennessee", + @"Texas", + @"Thailand", + @"Timor-Leste", + @"TN", + @"Togo", + @"Tokelau", + @"Tonga", + @"Trinidad and Tobago", + @"Tunisia", + @"Turkey", + @"Turkmenistan", + @"Turks and Caicos Islands", + @"Tuvalu", + @"TX", + @"U.S. Outlying Islands", + @"US Outlying Islands", + @"U.S. Virgin Islands", + @"US Virgin Islands", + @"Uganda", + @"UK", + @"Ukraine", + @"United Arab Emirates", + @"United Kingdom", + @"United States", + @"Uruguay", + @"US", + @"USA", + @"UT", + @"Utah", + @"Uzbekistan", + @"VA", + @"Vanuatu", + @"Vatican City", + @"Venezuela", + @"Vermont", + @"Vietnam", + @"Virginia", + @"VT", + @"WA", + @"Wallis and Futuna", + @"West Virginia", + @"WI", + @"Wisconsin", + @"WV", + @"WY", + @"Wyoming", + @"Yemen", + @"Zambia", + @"Zimbabwe", + @"Paris", + @"Tokyo", + @"Shanghai", + @"Sao Paulo", + @"Rio de Janeiro", + @"Rio", + @"Brasília", + @"Brasilia", + @"Recife", + @"Milan", + @"Mumbai", + @"Moscow", + @"Frankfurt", + @"Munich", + @"Berlim", + @"Madrid", + @"Lisbon", + @"Warsaw", + @"Johannesburg", + @"Seoul", + @"Istanbul", + @"Kuala Kumpur", + @"Jakarta", + @"Amsterdam", + @"Brussels", + @"Valencia", + @"Seville", + @"Bilbao", + @"Malaga", + @"Las Palmas", + @"Zaragoza", + @"Alicante", + @"Elche", + @"Oviedo", + @"Gijón", + @"Avilés", + @"West Coast", + @"Central", + @"Pacific", + @"Eastern", + @"Mountain" + }; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/TimeZoneDefinitions.tt b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/TimeZoneDefinitions.tt new file mode 100644 index 0000000000..51b10ab78d --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/TimeZoneDefinitions.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Turkish\Turkish-TimeZone.yaml"; + this.Language = "Turkish"; + this.ClassName = "TimeZoneDefinitions"; +#> +<#@ include file="..\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/YamlParser.cs b/.NET/Microsoft.Recognizers.Definitions.Common/YamlParser.cs index 0459afabac..4fbf1cf711 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/YamlParser.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/YamlParser.cs @@ -10,7 +10,7 @@ namespace Microsoft.Recognizers.Definitions.Common { public class YamlParser { - private readonly Deserializer yamlDeserializer; + private readonly IDeserializer yamlDeserializer; public YamlParser() { diff --git a/.NET/Microsoft.Recognizers.Definitions/Microsoft.Recognizers.Definitions.csproj b/.NET/Microsoft.Recognizers.Definitions/Microsoft.Recognizers.Definitions.csproj index f63ccccbbf..15faad277c 100644 --- a/.NET/Microsoft.Recognizers.Definitions/Microsoft.Recognizers.Definitions.csproj +++ b/.NET/Microsoft.Recognizers.Definitions/Microsoft.Recognizers.Definitions.csproj @@ -1,7 +1,8 @@  - netstandard2.0;net462;net452;net45 + netstandard2.1;netstandard2.0;net462;net6.0 + 9 false false @@ -11,18 +12,28 @@ Auto - - + $(OutputPath)$(AssemblyName).xml + $(NoWarn),1573,1591,1712 + Microsoft + nlp, entity-extraction, parser-library, recognizer, text, netstandard2.0 + Microsoft.Recognizers.Definitions provides base regex definitions for robust recognition and resolution of text entities. + MIT + https://github.com/Microsoft/Recognizers-Text + images\icon.png + © Microsoft Corporation. All rights reserved. + - CS1573: Parameter 'parameter' has no matching param tag in the XML comment for 'parameter' (but other parameters do) - CS1591: Missing XML comment for publicly visible type or member 'Type_or_Member' - --> - $(OutputPath)$(AssemblyName).xml - $(NoWarn),1573,1591,1712 - - @@ -31,67 +42,96 @@ + + + + + - + + - + + + + + + + + + + + + + + + + + + + + + + + + @@ -109,17 +149,18 @@ + - + all runtime; build; native; contentfiles; analyzers - + all runtime; build; native; contentfiles; analyzers - + diff --git a/.NET/Microsoft.Recognizers.Definitions/Utilities/DefinitionLoader.cs b/.NET/Microsoft.Recognizers.Definitions/Utilities/DefinitionLoader.cs index 5e1aa7319b..1f9a53f40c 100644 --- a/.NET/Microsoft.Recognizers.Definitions/Utilities/DefinitionLoader.cs +++ b/.NET/Microsoft.Recognizers.Definitions/Utilities/DefinitionLoader.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Definitions.Utilities @@ -13,7 +17,7 @@ public static Dictionary LoadAmbiguityFilters(Dictionary MapRegexes = new Dictionary() - { - { TrueRegex, Constants.SYS_BOOLEAN_TRUE }, - { FalseRegex, Constants.SYS_BOOLEAN_FALSE }, - }; - public ArabicBooleanExtractorConfiguration(bool onlyTopMatch = true) + : base( + trueRegex: ChoiceDefinitions.TrueRegex, + falseRegex: ChoiceDefinitions.FalseRegex, + tokenRegex: ChoiceDefinitions.TokenizerRegex, + options: RegexOptions.Singleline | RegexOptions.RightToLeft, + allowPartialMatch: false, + maxDistance: 2, + onlyTopMatch) { - this.OnlyTopMatch = onlyTopMatch; } - - Regex IBooleanExtractorConfiguration.TrueRegex => TrueRegex; - - Regex IBooleanExtractorConfiguration.FalseRegex => FalseRegex; - - IDictionary IChoiceExtractorConfiguration.MapRegexes => MapRegexes; - - Regex IChoiceExtractorConfiguration.TokenRegex => TokenRegex; - - public bool AllowPartialMatch => false; - - public int MaxDistance => 2; - - public bool OnlyTopMatch { get; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Bulgarian/Extractors/BulgarianBooleanExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/Bulgarian/Extractors/BulgarianBooleanExtractorConfiguration.cs index 5d1ada5142..248f4d9f9a 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Bulgarian/Extractors/BulgarianBooleanExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Bulgarian/Extractors/BulgarianBooleanExtractorConfiguration.cs @@ -1,44 +1,24 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Bulgarian; +using Microsoft.Recognizers.Text.Choice.Extractors; namespace Microsoft.Recognizers.Text.Choice.Bulgarian { - public class BulgarianBooleanExtractorConfiguration : IBooleanExtractorConfiguration + public class BulgarianBooleanExtractorConfiguration : BaseBooleanExtractorConfiguration { - public static readonly Regex TrueRegex = - new Regex(ChoiceDefinitions.TrueRegex, RegexOptions.Singleline); - - public static readonly Regex FalseRegex = - new Regex(ChoiceDefinitions.FalseRegex, RegexOptions.Singleline); - - public static readonly Regex TokenRegex = - new Regex(ChoiceDefinitions.TokenizerRegex, RegexOptions.Singleline); - - public static readonly IDictionary MapRegexes = new Dictionary() - { - { TrueRegex, Constants.SYS_BOOLEAN_TRUE }, - { FalseRegex, Constants.SYS_BOOLEAN_FALSE }, - }; - public BulgarianBooleanExtractorConfiguration(bool onlyTopMatch = true) + : base( + trueRegex: ChoiceDefinitions.TrueRegex, + falseRegex: ChoiceDefinitions.FalseRegex, + tokenRegex: ChoiceDefinitions.TokenizerRegex, + options: RegexOptions.Singleline, + allowPartialMatch: false, + maxDistance: 2, + onlyTopMatch) { - this.OnlyTopMatch = onlyTopMatch; } - - Regex IBooleanExtractorConfiguration.TrueRegex => TrueRegex; - - Regex IBooleanExtractorConfiguration.FalseRegex => FalseRegex; - - IDictionary IChoiceExtractorConfiguration.MapRegexes => MapRegexes; - - Regex IChoiceExtractorConfiguration.TokenRegex => TokenRegex; - - public bool AllowPartialMatch => false; - - public int MaxDistance => 2; - - public bool OnlyTopMatch { get; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Chinese/Extractors/ChineseBooleanExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/Chinese/Extractors/ChineseBooleanExtractorConfiguration.cs index 09489fe063..7177de6754 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Chinese/Extractors/ChineseBooleanExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Chinese/Extractors/ChineseBooleanExtractorConfiguration.cs @@ -1,44 +1,24 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; +using Microsoft.Recognizers.Text.Choice.Extractors; namespace Microsoft.Recognizers.Text.Choice.Chinese { - public class ChineseBooleanExtractorConfiguration : IBooleanExtractorConfiguration + public class ChineseBooleanExtractorConfiguration : BaseBooleanExtractorConfiguration { - public static readonly Regex TrueRegex = - new Regex(ChoiceDefinitions.TrueRegex, RegexOptions.Singleline); - - public static readonly Regex FalseRegex = - new Regex(ChoiceDefinitions.FalseRegex, RegexOptions.Singleline); - - public static readonly Regex TokenRegex = - new Regex(ChoiceDefinitions.TokenizerRegex, RegexOptions.Singleline); - - public static readonly IDictionary MapRegexes = new Dictionary() - { - { TrueRegex, Constants.SYS_BOOLEAN_TRUE }, - { FalseRegex, Constants.SYS_BOOLEAN_FALSE }, - }; - public ChineseBooleanExtractorConfiguration(bool onlyTopMatch = true) + : base( + trueRegex: ChoiceDefinitions.TrueRegex, + falseRegex: ChoiceDefinitions.FalseRegex, + tokenRegex: ChoiceDefinitions.TokenizerRegex, + options: RegexOptions.Singleline, + allowPartialMatch: false, + maxDistance: 2, + onlyTopMatch) { - this.OnlyTopMatch = onlyTopMatch; } - - Regex IBooleanExtractorConfiguration.TrueRegex => TrueRegex; - - Regex IBooleanExtractorConfiguration.FalseRegex => FalseRegex; - - IDictionary IChoiceExtractorConfiguration.MapRegexes => MapRegexes; - - Regex IChoiceExtractorConfiguration.TokenRegex => TokenRegex; - - public bool AllowPartialMatch => false; - - public int MaxDistance => 2; - - public bool OnlyTopMatch { get; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Choice/ChoiceOptions.cs b/.NET/Microsoft.Recognizers.Text.Choice/ChoiceOptions.cs index b584f1bc2f..5e512f3820 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/ChoiceOptions.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/ChoiceOptions.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; namespace Microsoft.Recognizers.Text.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.Choice/ChoiceRecognizer.cs b/.NET/Microsoft.Recognizers.Text.Choice/ChoiceRecognizer.cs index 736461adb9..49ed88b942 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/ChoiceRecognizer.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/ChoiceRecognizer.cs @@ -1,23 +1,35 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using Microsoft.Recognizers.Text.Choice.Arabic; using Microsoft.Recognizers.Text.Choice.Bulgarian; using Microsoft.Recognizers.Text.Choice.Chinese; using Microsoft.Recognizers.Text.Choice.Dutch; using Microsoft.Recognizers.Text.Choice.English; +using Microsoft.Recognizers.Text.Choice.Extractors; using Microsoft.Recognizers.Text.Choice.French; using Microsoft.Recognizers.Text.Choice.German; +using Microsoft.Recognizers.Text.Choice.Hindi; using Microsoft.Recognizers.Text.Choice.Italian; using Microsoft.Recognizers.Text.Choice.Japanese; using Microsoft.Recognizers.Text.Choice.Portuguese; using Microsoft.Recognizers.Text.Choice.Spanish; using Microsoft.Recognizers.Text.Choice.Swedish; +using Microsoft.Recognizers.Text.Choice.Turkish; namespace Microsoft.Recognizers.Text.Choice { public class ChoiceRecognizer : Recognizer { + public ChoiceRecognizer(string targetCulture, ChoiceOptions options, bool lazyInitialization, int timeoutInSeconds) + : base(targetCulture, options, lazyInitialization, timeoutInSeconds) + { + } + public ChoiceRecognizer(string targetCulture, ChoiceOptions options = ChoiceOptions.None, bool lazyInitialization = false) - : base(targetCulture, options, lazyInitialization) + : base(targetCulture, options, lazyInitialization, 0) { } @@ -27,12 +39,12 @@ public ChoiceRecognizer(string targetCulture, int options, bool lazyInitializati } public ChoiceRecognizer(ChoiceOptions options = ChoiceOptions.None, bool lazyInitialization = true) - : base(null, options, lazyInitialization) + : base(null, options, lazyInitialization, 0) { } - public ChoiceRecognizer(int options, bool lazyInitialization = true) - : this(null, GetOptions(options), lazyInitialization) + public ChoiceRecognizer(int options, bool lazyInitialization = true, int timeoutInSeconds = 0) + : this(null, GetOptions(options), lazyInitialization, timeoutInSeconds) { } @@ -48,6 +60,14 @@ public IModel GetBooleanModel(string culture = null, bool fallbackToDefaultCultu return GetModel(culture, fallbackToDefaultCulture); } + protected override List GetRelatedTypes() + { + return new List() + { + typeof(BaseBooleanExtractorConfiguration), + }; + } + protected override void InitializeConfiguration() { RegisterModel( @@ -70,6 +90,10 @@ protected override void InitializeConfiguration() Culture.German, (options) => new BooleanModel(new BooleanParser(), new BooleanExtractor(new GermanBooleanExtractorConfiguration()))); + RegisterModel( + Culture.Hindi, + (options) => new BooleanModel(new BooleanParser(), new BooleanExtractor(new HindiBooleanExtractorConfiguration()))); + RegisterModel( Culture.Italian, (options) => new BooleanModel(new BooleanParser(), new BooleanExtractor(new ItalianBooleanExtractorConfiguration()))); @@ -98,6 +122,9 @@ protected override void InitializeConfiguration() Culture.Arabic, (options) => new BooleanModel(new BooleanParser(), new BooleanExtractor(new ArabicBooleanExtractorConfiguration()))); + RegisterModel( + Culture.Turkish, + (options) => new BooleanModel(new BooleanParser(), new BooleanExtractor(new TurkishBooleanExtractorConfiguration()))); } } } diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Config/BooleanParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/Config/BooleanParserConfiguration.cs index d58f8c1b37..860cb23cad 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Config/BooleanParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Config/BooleanParserConfiguration.cs @@ -1,10 +1,13 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.Choice { public class BooleanParserConfiguration : IChoiceParserConfiguration { - public static IDictionary Resolutions { get; set; } = new Dictionary + public static IDictionary Resolutions { get; } = new Dictionary { { Constants.SYS_BOOLEAN_TRUE, true }, { Constants.SYS_BOOLEAN_FALSE, false }, diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Config/IChoiceParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/Config/IChoiceParserConfiguration.cs index 0dfb2cd526..4f9b0090e2 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Config/IChoiceParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Config/IChoiceParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Constants.cs b/.NET/Microsoft.Recognizers.Text.Choice/Constants.cs index a65c25061a..e537d5ba4d 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Constants.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Constants.cs @@ -1,4 +1,7 @@ -using System.Diagnostics.CodeAnalysis; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Diagnostics.CodeAnalysis; namespace Microsoft.Recognizers.Text.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Dutch/Extractors/DutchBooleanExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/Dutch/Extractors/DutchBooleanExtractorConfiguration.cs index a6e512b3f3..3fcc92d32d 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Dutch/Extractors/DutchBooleanExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Dutch/Extractors/DutchBooleanExtractorConfiguration.cs @@ -1,44 +1,24 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; +using Microsoft.Recognizers.Text.Choice.Extractors; namespace Microsoft.Recognizers.Text.Choice.Dutch { - public class DutchBooleanExtractorConfiguration : IBooleanExtractorConfiguration + public class DutchBooleanExtractorConfiguration : BaseBooleanExtractorConfiguration { - public static readonly Regex TrueRegex = - new Regex(ChoiceDefinitions.TrueRegex, RegexOptions.Singleline); - - public static readonly Regex FalseRegex = - new Regex(ChoiceDefinitions.FalseRegex, RegexOptions.Singleline); - - public static readonly Regex TokenRegex = - new Regex(ChoiceDefinitions.TokenizerRegex, RegexOptions.Singleline); - - public static readonly IDictionary MapRegexes = new Dictionary() - { - { TrueRegex, Constants.SYS_BOOLEAN_TRUE }, - { FalseRegex, Constants.SYS_BOOLEAN_FALSE }, - }; - public DutchBooleanExtractorConfiguration(bool onlyTopMatch = true) + : base( + trueRegex: ChoiceDefinitions.TrueRegex, + falseRegex: ChoiceDefinitions.FalseRegex, + tokenRegex: ChoiceDefinitions.TokenizerRegex, + options: RegexOptions.Singleline, + allowPartialMatch: false, + maxDistance: 2, + onlyTopMatch) { - this.OnlyTopMatch = onlyTopMatch; } - - Regex IBooleanExtractorConfiguration.TrueRegex => TrueRegex; - - Regex IBooleanExtractorConfiguration.FalseRegex => FalseRegex; - - IDictionary IChoiceExtractorConfiguration.MapRegexes => MapRegexes; - - Regex IChoiceExtractorConfiguration.TokenRegex => TokenRegex; - - public bool AllowPartialMatch => false; - - public int MaxDistance => 2; - - public bool OnlyTopMatch { get; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Choice/English/Extractors/EnglishBooleanExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/English/Extractors/EnglishBooleanExtractorConfiguration.cs index cd3fa44cc3..a26efb50ed 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/English/Extractors/EnglishBooleanExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/English/Extractors/EnglishBooleanExtractorConfiguration.cs @@ -1,44 +1,24 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; +using Microsoft.Recognizers.Text.Choice.Extractors; namespace Microsoft.Recognizers.Text.Choice.English { - public class EnglishBooleanExtractorConfiguration : IBooleanExtractorConfiguration + public class EnglishBooleanExtractorConfiguration : BaseBooleanExtractorConfiguration { - public static readonly Regex TrueRegex = - new Regex(ChoiceDefinitions.TrueRegex, RegexOptions.Singleline); - - public static readonly Regex FalseRegex = - new Regex(ChoiceDefinitions.FalseRegex, RegexOptions.Singleline); - - public static readonly Regex TokenRegex = - new Regex(ChoiceDefinitions.TokenizerRegex, RegexOptions.Singleline); - - public static readonly IDictionary MapRegexes = new Dictionary() - { - { TrueRegex, Constants.SYS_BOOLEAN_TRUE }, - { FalseRegex, Constants.SYS_BOOLEAN_FALSE }, - }; - public EnglishBooleanExtractorConfiguration(bool onlyTopMatch = true) + : base( + trueRegex: ChoiceDefinitions.TrueRegex, + falseRegex: ChoiceDefinitions.FalseRegex, + tokenRegex: ChoiceDefinitions.TokenizerRegex, + options: RegexOptions.Singleline, + allowPartialMatch: false, + maxDistance: 2, + onlyTopMatch) { - this.OnlyTopMatch = onlyTopMatch; } - - Regex IBooleanExtractorConfiguration.TrueRegex => TrueRegex; - - Regex IBooleanExtractorConfiguration.FalseRegex => FalseRegex; - - IDictionary IChoiceExtractorConfiguration.MapRegexes => MapRegexes; - - Regex IChoiceExtractorConfiguration.TokenRegex => TokenRegex; - - public bool AllowPartialMatch => false; - - public int MaxDistance => 2; - - public bool OnlyTopMatch { get; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Extractors/BaseBooleanExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/Extractors/BaseBooleanExtractorConfiguration.cs new file mode 100644 index 0000000000..5af2ac4c1a --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Choice/Extractors/BaseBooleanExtractorConfiguration.cs @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Reflection; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.Choice.Extractors +{ + public abstract class BaseBooleanExtractorConfiguration : IBooleanExtractorConfiguration + { + public BaseBooleanExtractorConfiguration( + string trueRegex, + string falseRegex, + string tokenRegex, + RegexOptions options, + bool allowPartialMatch, + int maxDistance, + bool onlyTopMatch) + { + TrueRegex = new Regex(trueRegex, options, RegexTimeOut); + FalseRegex = new Regex(falseRegex, options, RegexTimeOut); + TokenRegex = new Regex(tokenRegex, options, RegexTimeOut); + MapRegexes = new Dictionary() + { + { TrueRegex, Constants.SYS_BOOLEAN_TRUE }, + { FalseRegex, Constants.SYS_BOOLEAN_FALSE }, + }; + AllowPartialMatch = allowPartialMatch; + MaxDistance = maxDistance; + OnlyTopMatch = onlyTopMatch; + } + + public static TimeSpan RegexTimeOut => ChoiceRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + + public Regex TrueRegex { get; set; } + + public Regex FalseRegex { get; set; } + + public IDictionary MapRegexes { get; set; } + + public Regex TokenRegex { get; set; } + + public bool AllowPartialMatch { get; set; } + + public int MaxDistance { get; set; } + + public bool OnlyTopMatch { get; set; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Extractors/BooleanExtractor.cs b/.NET/Microsoft.Recognizers.Text.Choice/Extractors/BooleanExtractor.cs index 026637b6e4..c9334fe86f 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Extractors/BooleanExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Extractors/BooleanExtractor.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Choice +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Choice { public class BooleanExtractor : ChoiceExtractor { diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Extractors/ChoiceExtractDataResult.cs b/.NET/Microsoft.Recognizers.Text.Choice/Extractors/ChoiceExtractDataResult.cs index e32faacb94..bac6a3796b 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Extractors/ChoiceExtractDataResult.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Extractors/ChoiceExtractDataResult.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Extractors/ChoiceExtractor.cs b/.NET/Microsoft.Recognizers.Text.Choice/Extractors/ChoiceExtractor.cs index bd15fcd1e4..2cbd444138 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Extractors/ChoiceExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Extractors/ChoiceExtractor.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Extractors/IBooleanExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/Extractors/IBooleanExtractorConfiguration.cs index 49916de442..6f5e7072ba 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Extractors/IBooleanExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Extractors/IBooleanExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Extractors/IChoiceExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/Extractors/IChoiceExtractorConfiguration.cs index 3ad72699f6..f0985ed168 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Extractors/IChoiceExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Extractors/IChoiceExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Choice diff --git a/.NET/Microsoft.Recognizers.Text.Choice/French/Extractors/FrenchBooleanExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/French/Extractors/FrenchBooleanExtractorConfiguration.cs index b87e33ca7e..217d1fab0d 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/French/Extractors/FrenchBooleanExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/French/Extractors/FrenchBooleanExtractorConfiguration.cs @@ -1,44 +1,24 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.French; +using Microsoft.Recognizers.Text.Choice.Extractors; namespace Microsoft.Recognizers.Text.Choice.French { - public class FrenchBooleanExtractorConfiguration : IBooleanExtractorConfiguration + public class FrenchBooleanExtractorConfiguration : BaseBooleanExtractorConfiguration { - public static readonly Regex TrueRegex = - new Regex(ChoiceDefinitions.TrueRegex, RegexOptions.Singleline); - - public static readonly Regex FalseRegex = - new Regex(ChoiceDefinitions.FalseRegex, RegexOptions.Singleline); - - public static readonly Regex TokenRegex = - new Regex(ChoiceDefinitions.TokenizerRegex, RegexOptions.Singleline); - - public static readonly IDictionary MapRegexes = new Dictionary() - { - { TrueRegex, Constants.SYS_BOOLEAN_TRUE }, - { FalseRegex, Constants.SYS_BOOLEAN_FALSE }, - }; - public FrenchBooleanExtractorConfiguration(bool onlyTopMatch = true) + : base( + trueRegex: ChoiceDefinitions.TrueRegex, + falseRegex: ChoiceDefinitions.FalseRegex, + tokenRegex: ChoiceDefinitions.TokenizerRegex, + options: RegexOptions.Singleline, + allowPartialMatch: false, + maxDistance: 2, + onlyTopMatch) { - this.OnlyTopMatch = onlyTopMatch; } - - Regex IBooleanExtractorConfiguration.TrueRegex => TrueRegex; - - Regex IBooleanExtractorConfiguration.FalseRegex => FalseRegex; - - IDictionary IChoiceExtractorConfiguration.MapRegexes => MapRegexes; - - Regex IChoiceExtractorConfiguration.TokenRegex => TokenRegex; - - public bool AllowPartialMatch => false; - - public int MaxDistance => 2; - - public bool OnlyTopMatch { get; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Choice/German/Extractors/GermanBooleanExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/German/Extractors/GermanBooleanExtractorConfiguration.cs index 87a3b15efd..2181f0f1f8 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/German/Extractors/GermanBooleanExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/German/Extractors/GermanBooleanExtractorConfiguration.cs @@ -1,44 +1,24 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.German; +using Microsoft.Recognizers.Text.Choice.Extractors; namespace Microsoft.Recognizers.Text.Choice.German { - public class GermanBooleanExtractorConfiguration : IBooleanExtractorConfiguration + public class GermanBooleanExtractorConfiguration : BaseBooleanExtractorConfiguration { - public static readonly Regex TrueRegex = - new Regex(ChoiceDefinitions.TrueRegex, RegexOptions.Singleline); - - public static readonly Regex FalseRegex = - new Regex(ChoiceDefinitions.FalseRegex, RegexOptions.Singleline); - - public static readonly Regex TokenRegex = - new Regex(ChoiceDefinitions.TokenizerRegex, RegexOptions.Singleline); - - public static readonly IDictionary MapRegexes = new Dictionary() - { - { TrueRegex, Constants.SYS_BOOLEAN_TRUE }, - { FalseRegex, Constants.SYS_BOOLEAN_FALSE }, - }; - public GermanBooleanExtractorConfiguration(bool onlyTopMatch = true) + : base( + trueRegex: ChoiceDefinitions.TrueRegex, + falseRegex: ChoiceDefinitions.FalseRegex, + tokenRegex: ChoiceDefinitions.TokenizerRegex, + options: RegexOptions.Singleline, + allowPartialMatch: false, + maxDistance: 2, + onlyTopMatch) { - this.OnlyTopMatch = onlyTopMatch; } - - Regex IBooleanExtractorConfiguration.TrueRegex => TrueRegex; - - Regex IBooleanExtractorConfiguration.FalseRegex => FalseRegex; - - IDictionary IChoiceExtractorConfiguration.MapRegexes => MapRegexes; - - Regex IChoiceExtractorConfiguration.TokenRegex => TokenRegex; - - public bool AllowPartialMatch => false; - - public int MaxDistance => 2; - - public bool OnlyTopMatch { get; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Hindi/Extractors/HindiBooleanExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/Hindi/Extractors/HindiBooleanExtractorConfiguration.cs index 7f6365e286..b20751893a 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Hindi/Extractors/HindiBooleanExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Hindi/Extractors/HindiBooleanExtractorConfiguration.cs @@ -1,44 +1,24 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Text.Choice.Extractors; namespace Microsoft.Recognizers.Text.Choice.Hindi { - public class HindiBooleanExtractorConfiguration : IBooleanExtractorConfiguration + public class HindiBooleanExtractorConfiguration : BaseBooleanExtractorConfiguration { - public static readonly Regex TrueRegex = - new Regex(ChoiceDefinitions.TrueRegex, RegexOptions.Singleline); - - public static readonly Regex FalseRegex = - new Regex(ChoiceDefinitions.FalseRegex, RegexOptions.Singleline); - - public static readonly Regex TokenRegex = - new Regex(ChoiceDefinitions.TokenizerRegex, RegexOptions.Singleline); - - public static readonly IDictionary MapRegexes = new Dictionary() - { - { TrueRegex, Constants.SYS_BOOLEAN_TRUE }, - { FalseRegex, Constants.SYS_BOOLEAN_FALSE }, - }; - public HindiBooleanExtractorConfiguration(bool onlyTopMatch = true) + : base( + trueRegex: ChoiceDefinitions.TrueRegex, + falseRegex: ChoiceDefinitions.FalseRegex, + tokenRegex: ChoiceDefinitions.TokenizerRegex, + options: RegexOptions.Singleline, + allowPartialMatch: false, + maxDistance: 2, + onlyTopMatch) { - this.OnlyTopMatch = onlyTopMatch; } - - Regex IBooleanExtractorConfiguration.TrueRegex => TrueRegex; - - Regex IBooleanExtractorConfiguration.FalseRegex => FalseRegex; - - IDictionary IChoiceExtractorConfiguration.MapRegexes => MapRegexes; - - Regex IChoiceExtractorConfiguration.TokenRegex => TokenRegex; - - public bool AllowPartialMatch => false; - - public int MaxDistance => 2; - - public bool OnlyTopMatch { get; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Italian/Extractors/ItalianBooleanExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/Italian/Extractors/ItalianBooleanExtractorConfiguration.cs index 8fb139db1c..5c26eaf960 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Italian/Extractors/ItalianBooleanExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Italian/Extractors/ItalianBooleanExtractorConfiguration.cs @@ -1,44 +1,24 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; +using Microsoft.Recognizers.Text.Choice.Extractors; namespace Microsoft.Recognizers.Text.Choice.Italian { - public class ItalianBooleanExtractorConfiguration : IBooleanExtractorConfiguration + public class ItalianBooleanExtractorConfiguration : BaseBooleanExtractorConfiguration { - public static readonly Regex TrueRegex = - new Regex(ChoiceDefinitions.TrueRegex, RegexOptions.Singleline); - - public static readonly Regex FalseRegex = - new Regex(ChoiceDefinitions.FalseRegex, RegexOptions.Singleline); - - public static readonly Regex TokenRegex = - new Regex(ChoiceDefinitions.TokenizerRegex, RegexOptions.Singleline); - - public static readonly IDictionary MapRegexes = new Dictionary() - { - { TrueRegex, Constants.SYS_BOOLEAN_TRUE }, - { FalseRegex, Constants.SYS_BOOLEAN_FALSE }, - }; - public ItalianBooleanExtractorConfiguration(bool onlyTopMatch = true) + : base( + trueRegex: ChoiceDefinitions.TrueRegex, + falseRegex: ChoiceDefinitions.FalseRegex, + tokenRegex: ChoiceDefinitions.TokenizerRegex, + options: RegexOptions.Singleline, + allowPartialMatch: false, + maxDistance: 2, + onlyTopMatch) { - this.OnlyTopMatch = onlyTopMatch; } - - Regex IBooleanExtractorConfiguration.TrueRegex => TrueRegex; - - Regex IBooleanExtractorConfiguration.FalseRegex => FalseRegex; - - IDictionary IChoiceExtractorConfiguration.MapRegexes => MapRegexes; - - Regex IChoiceExtractorConfiguration.TokenRegex => TokenRegex; - - public bool AllowPartialMatch => false; - - public int MaxDistance => 2; - - public bool OnlyTopMatch { get; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Japanese/Extractors/JapaneseBooleanExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/Japanese/Extractors/JapaneseBooleanExtractorConfiguration.cs index 6750939ea2..d73f3e1305 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Japanese/Extractors/JapaneseBooleanExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Japanese/Extractors/JapaneseBooleanExtractorConfiguration.cs @@ -1,44 +1,28 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Reflection; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Text.Choice.Extractors; namespace Microsoft.Recognizers.Text.Choice.Japanese { - public class JapaneseBooleanExtractorConfiguration : IBooleanExtractorConfiguration + public class JapaneseBooleanExtractorConfiguration : BaseBooleanExtractorConfiguration { - public static readonly Regex TrueRegex = - new Regex(ChoiceDefinitions.TrueRegex, RegexOptions.Singleline); - - public static readonly Regex FalseRegex = - new Regex(ChoiceDefinitions.FalseRegex, RegexOptions.Singleline); - - public static readonly Regex TokenRegex = - new Regex(ChoiceDefinitions.TokenizerRegex, RegexOptions.Singleline); - - public static readonly IDictionary MapRegexes = new Dictionary() - { - { TrueRegex, Constants.SYS_BOOLEAN_TRUE }, - { FalseRegex, Constants.SYS_BOOLEAN_FALSE }, - }; - public JapaneseBooleanExtractorConfiguration(bool onlyTopMatch = true) + : base( + trueRegex: ChoiceDefinitions.TrueRegex, + falseRegex: ChoiceDefinitions.FalseRegex, + tokenRegex: ChoiceDefinitions.TokenizerRegex, + options: RegexOptions.Singleline, + allowPartialMatch: false, + maxDistance: 2, + onlyTopMatch) { - this.OnlyTopMatch = onlyTopMatch; } - - Regex IBooleanExtractorConfiguration.TrueRegex => TrueRegex; - - Regex IBooleanExtractorConfiguration.FalseRegex => FalseRegex; - - IDictionary IChoiceExtractorConfiguration.MapRegexes => MapRegexes; - - Regex IChoiceExtractorConfiguration.TokenRegex => TokenRegex; - - public bool AllowPartialMatch => false; - - public int MaxDistance => 2; - - public bool OnlyTopMatch { get; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.csproj b/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.csproj index 13c54f1fc5..70b23094dd 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.csproj +++ b/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.csproj @@ -1,35 +1,46 @@  - netstandard2.0;net462;net452;net45 + netstandard2.1;netstandard2.0;net462;net6.0 + 9 false false ../Recognizers-Text.ruleset - $(OutputPath)$(AssemblyName).xml - $(NoWarn),1573,1591,1712 - - - $(OutputPath)$(AssemblyName).xml - $(NoWarn),1573,1591,1712 - + + + true + ..\buildtools\35MSSharedLib1024.snk + true + + $(OutputPath)$(AssemblyName).xml + $(NoWarn),1573,1591,1712 + Microsoft + nlp, entity-extraction, parser-library, recognizer, boolean, alternatives, choices, netstandard2.0 + Microsoft.Recognizers.Text.Choice provides recognition of Boolean (yes/no) answers expressed in English, Portuguese, Spanish, Japanese, Chinese, + Dutch, French, German, Italian, Swedish, Bulgarian, Turkish, Hindi, and Arabic. As well as base classes to support lists of alternative choices. + MIT + https://github.com/Microsoft/Recognizers-Text + images\icon.png + © Microsoft Corporation. All rights reserved. + + - + all runtime; build; native; contentfiles; analyzers - + all runtime; build; native; contentfiles; analyzers - @@ -44,4 +55,5 @@ + diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.nuspec b/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.nuspec index 96bc9199e7..df8d950391 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.nuspec +++ b/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.nuspec @@ -6,22 +6,21 @@ $title$ Microsoft true - Microsoft.Recognizers.Text.Choice provides recognition of Boolean (yes/no) answers expressed in English, Portuguese, Spanish, and Japanese. As well as base classes to support lists of alternative choices. + Microsoft.Recognizers.Text.Choice provides recognition of Boolean (yes/no) answers expressed in English, Portuguese, Spanish, Japanese, Chinese, + Dutch, French, German, Italian, Swedish, Bulgarian, Turkish, Hindi, and Arabic. As well as base classes to support lists of alternative choices. MIT https://github.com/Microsoft/Recognizers-Text - http://docs.botframework.com/images/bot_icon.png + images\icon.png © Microsoft Corporation. All rights reserved. nlp entity-extraction parser-library recognizer boolean alternatives choices netstandard2.0 - - - + diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Models/BooleanModel.cs b/.NET/Microsoft.Recognizers.Text.Choice/Models/BooleanModel.cs index 0a952e58cf..9733ed0e83 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Models/BooleanModel.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Models/BooleanModel.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Linq; namespace Microsoft.Recognizers.Text.Choice diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Models/ChoiceModel.cs b/.NET/Microsoft.Recognizers.Text.Choice/Models/ChoiceModel.cs index 450b4d59ed..a44f457a73 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Models/ChoiceModel.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Models/ChoiceModel.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; @@ -6,6 +9,11 @@ namespace Microsoft.Recognizers.Text.Choice { public abstract class ChoiceModel : IModel { + + private string culture; + + private string requestedCulture; + protected ChoiceModel(IParser parser, IExtractor extractor) { this.Parser = parser; @@ -14,6 +22,10 @@ protected ChoiceModel(IParser parser, IExtractor extractor) public abstract string ModelTypeName { get; } + public string Culture => this.culture; + + public string RequestedCulture => this.requestedCulture; + protected IExtractor Extractor { get; private set; } protected IParser Parser { get; private set; } @@ -43,6 +55,12 @@ public List Parse(string query) }).ToList(); } + public void SetCultureInfo(string culture, string requestedCulture = null) + { + this.culture = culture; + this.requestedCulture = requestedCulture; + } + protected abstract SortedDictionary GetResolution(ParseResult parseResult); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Parsers/BooleanParser.cs b/.NET/Microsoft.Recognizers.Text.Choice/Parsers/BooleanParser.cs index 0d50f17ef8..a83a904b58 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Parsers/BooleanParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Parsers/BooleanParser.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Choice +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Choice { public class BooleanParser : ChoiceParser { diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Parsers/ChoiceParseDataResult.cs b/.NET/Microsoft.Recognizers.Text.Choice/Parsers/ChoiceParseDataResult.cs index a97fae0201..826f3d06a2 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Parsers/ChoiceParseDataResult.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Parsers/ChoiceParseDataResult.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Parsers/ChoiceParser.cs b/.NET/Microsoft.Recognizers.Text.Choice/Parsers/ChoiceParser.cs index 2ccfc01195..4604540861 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Parsers/ChoiceParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Parsers/ChoiceParser.cs @@ -1,4 +1,7 @@ -using System.Linq; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Linq; namespace Microsoft.Recognizers.Text.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Parsers/OtherMatchParseResult.cs b/.NET/Microsoft.Recognizers.Text.Choice/Parsers/OtherMatchParseResult.cs index ba16369401..a9bbce6b44 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Parsers/OtherMatchParseResult.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Parsers/OtherMatchParseResult.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Choice +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Choice { public class OtherMatchParseResult { diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Portuguese/Extractors/PortugueseBooleanExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/Portuguese/Extractors/PortugueseBooleanExtractorConfiguration.cs index bfa29dca34..1db7800662 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Portuguese/Extractors/PortugueseBooleanExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Portuguese/Extractors/PortugueseBooleanExtractorConfiguration.cs @@ -1,44 +1,24 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; +using Microsoft.Recognizers.Text.Choice.Extractors; namespace Microsoft.Recognizers.Text.Choice.Portuguese { - public class PortugueseBooleanExtractorConfiguration : IBooleanExtractorConfiguration + public class PortugueseBooleanExtractorConfiguration : BaseBooleanExtractorConfiguration { - public static readonly Regex TrueRegex = - new Regex(ChoiceDefinitions.TrueRegex, RegexOptions.Singleline); - - public static readonly Regex FalseRegex = - new Regex(ChoiceDefinitions.FalseRegex, RegexOptions.Singleline); - - public static readonly Regex TokenRegex = - new Regex(ChoiceDefinitions.TokenizerRegex, RegexOptions.Singleline); - - public static readonly IDictionary MapRegexes = new Dictionary() - { - { TrueRegex, Constants.SYS_BOOLEAN_TRUE }, - { FalseRegex, Constants.SYS_BOOLEAN_FALSE }, - }; - public PortugueseBooleanExtractorConfiguration(bool onlyTopMatch = true) + : base( + trueRegex: ChoiceDefinitions.TrueRegex, + falseRegex: ChoiceDefinitions.FalseRegex, + tokenRegex: ChoiceDefinitions.TokenizerRegex, + options: RegexOptions.Singleline, + allowPartialMatch: false, + maxDistance: 2, + onlyTopMatch) { - this.OnlyTopMatch = onlyTopMatch; } - - Regex IBooleanExtractorConfiguration.TrueRegex => TrueRegex; - - Regex IBooleanExtractorConfiguration.FalseRegex => FalseRegex; - - IDictionary IChoiceExtractorConfiguration.MapRegexes => MapRegexes; - - Regex IChoiceExtractorConfiguration.TokenRegex => TokenRegex; - - public bool AllowPartialMatch => false; - - public int MaxDistance => 2; - - public bool OnlyTopMatch { get; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Spanish/Extractors/SpanishBooleanExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/Spanish/Extractors/SpanishBooleanExtractorConfiguration.cs index d0744675ce..e11552ed65 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Spanish/Extractors/SpanishBooleanExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Spanish/Extractors/SpanishBooleanExtractorConfiguration.cs @@ -1,44 +1,24 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; +using Microsoft.Recognizers.Text.Choice.Extractors; namespace Microsoft.Recognizers.Text.Choice.Spanish { - public class SpanishBooleanExtractorConfiguration : IBooleanExtractorConfiguration + public class SpanishBooleanExtractorConfiguration : BaseBooleanExtractorConfiguration { - public static readonly Regex TrueRegex = - new Regex(ChoiceDefinitions.TrueRegex, RegexOptions.Singleline); - - public static readonly Regex FalseRegex = - new Regex(ChoiceDefinitions.FalseRegex, RegexOptions.Singleline); - - public static readonly Regex TokenRegex = - new Regex(ChoiceDefinitions.TokenizerRegex, RegexOptions.Singleline); - - public static readonly IDictionary MapRegexes = new Dictionary() - { - { TrueRegex, Constants.SYS_BOOLEAN_TRUE }, - { FalseRegex, Constants.SYS_BOOLEAN_FALSE }, - }; - public SpanishBooleanExtractorConfiguration(bool onlyTopMatch = true) + : base( + trueRegex: ChoiceDefinitions.TrueRegex, + falseRegex: ChoiceDefinitions.FalseRegex, + tokenRegex: ChoiceDefinitions.TokenizerRegex, + options: RegexOptions.Singleline, + allowPartialMatch: false, + maxDistance: 2, + onlyTopMatch) { - this.OnlyTopMatch = onlyTopMatch; } - - Regex IBooleanExtractorConfiguration.TrueRegex => TrueRegex; - - Regex IBooleanExtractorConfiguration.FalseRegex => FalseRegex; - - IDictionary IChoiceExtractorConfiguration.MapRegexes => MapRegexes; - - Regex IChoiceExtractorConfiguration.TokenRegex => TokenRegex; - - public bool AllowPartialMatch => false; - - public int MaxDistance => 2; - - public bool OnlyTopMatch { get; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Swedish/Extractors/SwedishBooleanExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/Swedish/Extractors/SwedishBooleanExtractorConfiguration.cs index 4d65ad0b28..363bae9421 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Swedish/Extractors/SwedishBooleanExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Swedish/Extractors/SwedishBooleanExtractorConfiguration.cs @@ -1,44 +1,24 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.Choice.Extractors; namespace Microsoft.Recognizers.Text.Choice.Swedish { - public class SwedishBooleanExtractorConfiguration : IBooleanExtractorConfiguration + public class SwedishBooleanExtractorConfiguration : BaseBooleanExtractorConfiguration { - public static readonly Regex TrueRegex = - new Regex(ChoiceDefinitions.TrueRegex, RegexOptions.Singleline); - - public static readonly Regex FalseRegex = - new Regex(ChoiceDefinitions.FalseRegex, RegexOptions.Singleline); - - public static readonly Regex TokenRegex = - new Regex(ChoiceDefinitions.TokenizerRegex, RegexOptions.Singleline); - - public static readonly IDictionary MapRegexes = new Dictionary() - { - { TrueRegex, Constants.SYS_BOOLEAN_TRUE }, - { FalseRegex, Constants.SYS_BOOLEAN_FALSE }, - }; - public SwedishBooleanExtractorConfiguration(bool onlyTopMatch = true) + : base( + trueRegex: ChoiceDefinitions.TrueRegex, + falseRegex: ChoiceDefinitions.FalseRegex, + tokenRegex: ChoiceDefinitions.TokenizerRegex, + options: RegexOptions.Singleline, + allowPartialMatch: false, + maxDistance: 2, + onlyTopMatch) { - this.OnlyTopMatch = onlyTopMatch; } - - Regex IBooleanExtractorConfiguration.TrueRegex => TrueRegex; - - Regex IBooleanExtractorConfiguration.FalseRegex => FalseRegex; - - IDictionary IChoiceExtractorConfiguration.MapRegexes => MapRegexes; - - Regex IChoiceExtractorConfiguration.TokenRegex => TokenRegex; - - public bool AllowPartialMatch => false; - - public int MaxDistance => 2; - - public bool OnlyTopMatch { get; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Turkish/Extractors/TurkishBooleanExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Choice/Turkish/Extractors/TurkishBooleanExtractorConfiguration.cs new file mode 100644 index 0000000000..0f5b31c4d1 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Choice/Turkish/Extractors/TurkishBooleanExtractorConfiguration.cs @@ -0,0 +1,25 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Turkish; +using Microsoft.Recognizers.Text.Choice.Extractors; + +namespace Microsoft.Recognizers.Text.Choice.Turkish +{ + public class TurkishBooleanExtractorConfiguration : BaseBooleanExtractorConfiguration + { + public TurkishBooleanExtractorConfiguration(bool onlyTopMatch = true) + : base( + trueRegex: ChoiceDefinitions.TrueRegex, + falseRegex: ChoiceDefinitions.FalseRegex, + tokenRegex: ChoiceDefinitions.TokenizerRegex, + options: RegexOptions.Singleline, + allowPartialMatch: false, + maxDistance: 2, + onlyTopMatch) + { + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Utilities/UnicodeUtils.cs b/.NET/Microsoft.Recognizers.Text.Choice/Utilities/UnicodeUtils.cs index 5372dc759e..caf1e7b288 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Utilities/UnicodeUtils.cs +++ b/.NET/Microsoft.Recognizers.Text.Choice/Utilities/UnicodeUtils.cs @@ -1,4 +1,9 @@ -namespace Microsoft.Recognizers.Text.Choice.Utilities +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +namespace Microsoft.Recognizers.Text.Choice.Utilities { using System.Collections.Generic; @@ -6,8 +11,8 @@ public static class UnicodeUtils { public static bool IsEmoji(string letter) { - const int WhereEmojiLive = 0xFFFF; // Supplementary Unicode Plane. This is where emoji live - return char.IsHighSurrogate(letter[0]) && char.ConvertToUtf32(letter, 0) > WhereEmojiLive; + const int whereEmojiLive = 0xFFFF; // Supplementary Unicode Plane. This is where emoji live + return char.IsHighSurrogate(letter[0]) && char.ConvertToUtf32(letter, 0) > whereEmojiLive; } public static IEnumerable Letters(string text) @@ -22,7 +27,7 @@ public static IEnumerable Letters(string text) } else if (!char.IsHighSurrogate(c)) { - yield return c.ToString(); + yield return c.ToString(CultureInfo.InvariantCulture); } else { diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoiceRecognizerCache.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoiceRecognizerCache.cs index 2675b0ef6b..54b7e9b5be 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoiceRecognizerCache.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoiceRecognizerCache.cs @@ -1,4 +1,7 @@ -using System.Linq; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Linq; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoiceRecognizerInitialization.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoiceRecognizerInitialization.cs index dde800280f..bb75cee6bd 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoiceRecognizerInitialization.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoiceRecognizerInitialization.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Linq; using Microsoft.VisualStudio.TestTools.UnitTesting; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Arabic.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Arabic.cs index 0a852c34f5..d5cbda7e3d 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Arabic.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Arabic.cs @@ -1,4 +1,7 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.DataDrivenTests.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Bulgarian.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Bulgarian.cs index c6c3cfeda2..8c1b7eb94b 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Bulgarian.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Bulgarian.cs @@ -1,4 +1,7 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.DataDrivenTests.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Chinese.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Chinese.cs index 5ad6daf679..42d675c0be 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Chinese.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Chinese.cs @@ -1,4 +1,7 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.DataDrivenTests.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Dutch.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Dutch.cs index b01297ae3b..1d2fa4cac5 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Dutch.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Dutch.cs @@ -1,4 +1,7 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.DataDrivenTests.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_English.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_English.cs index 7a8be01ea6..992f2be60a 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_English.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_English.cs @@ -1,4 +1,7 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.DataDrivenTests.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_French.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_French.cs index 51ac4fdae7..e17ee2e1e2 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_French.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_French.cs @@ -1,4 +1,7 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.DataDrivenTests.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_German.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_German.cs index 4d9356de5e..91732900c9 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_German.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_German.cs @@ -1,4 +1,7 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.DataDrivenTests.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Hindi.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Hindi.cs index 37a734dea4..6954d51799 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Hindi.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Hindi.cs @@ -1,4 +1,7 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.DataDrivenTests.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Italian.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Italian.cs index 29e1202ddf..fd53e0847f 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Italian.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Italian.cs @@ -1,4 +1,7 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.DataDrivenTests.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Japanese.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Japanese.cs index 8bb6229f74..6c43f42623 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Japanese.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Japanese.cs @@ -1,4 +1,7 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.DataDrivenTests.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Portuguese.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Portuguese.cs index e26d3acaf1..30d21e5197 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Portuguese.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Portuguese.cs @@ -1,4 +1,7 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.DataDrivenTests.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Spanish.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Spanish.cs index 9e86226f88..e32be8ffb6 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Spanish.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Spanish.cs @@ -1,4 +1,7 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.DataDrivenTests.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Swedish.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Swedish.cs index dc467cbdc4..4b8c2364ef 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Swedish.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Swedish.cs @@ -1,4 +1,7 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.DataDrivenTests.Choice { diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Turkish.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Turkish.cs new file mode 100644 index 0000000000..f319ffa72e --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Choice/TestChoice_Turkish.cs @@ -0,0 +1,18 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.DataDrivenTests.Choice +{ + [TestClass] + public class TestChoice_Turkish : TestBase + { + [NetCoreTestDataSource] + [TestMethod] + public void BooleanModel(TestModel testSpec) + { + TestChoice(testSpec); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTimeRecognizerCache.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTimeRecognizerCache.cs index a07db088a3..0f7d1437e0 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTimeRecognizerCache.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTimeRecognizerCache.cs @@ -1,4 +1,7 @@ -using System.Linq; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Linq; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTimeRecognizerInitialization.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTimeRecognizerInitialization.cs index a06e083939..6578cad489 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTimeRecognizerInitialization.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTimeRecognizerInitialization.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using Microsoft.Recognizers.Text.DateTime.English; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Arabic.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Arabic.cs new file mode 100644 index 0000000000..9c6eb007e0 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Arabic.cs @@ -0,0 +1,201 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.DateTime.Tests +{ + [TestClass] + public class TestDateTime_Arabic : TestBase + { + public static IDictionary Extractors { get; private set; } + + public static IDictionary Parsers { get; private set; } + + [ClassInitialize] + public static void ClassInitialize(TestContext context) + { + Extractors = new Dictionary(); + Parsers = new Dictionary(); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TimeExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DatePeriodExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TimePeriodExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimePeriodExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void HolidayExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DurationExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void SetExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void MergedExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TimeParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DatePeriodParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TimePeriodParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public new void DateTimeParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimePeriodParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void HolidayParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DurationParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void SetParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void MergedParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeMergedParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeModel(TestModel testSpec) + { + TestDateTime(testSpec); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Chinese.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Chinese.cs index 78e1182b78..ab3995d8d5 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Chinese.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Chinese.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -90,6 +93,14 @@ public void SetExtractor(TestModel testSpec) TestDateTimeExtractor(testSpec); } + [NetCoreTestDataSource] + [TestMethod] + public void MergedExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + [NetCoreTestDataSource] [TestMethod] public void DateParser(TestModel testSpec) @@ -171,11 +182,27 @@ public void SetParser(TestModel testSpec) TestDateTimeParser(testSpec); } + [NetCoreTestDataSource] + [TestMethod] + public void MergedParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeMergedParser(testSpec); + } + [NetCoreTestDataSource] [TestMethod] public void DateTimeModel(TestModel testSpec) { TestDateTime(testSpec); } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeModelExperimentalMode(TestModel testSpec) + { + TestDateTimeAlt(testSpec); + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Dutch.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Dutch.cs index 5eebfa6f4e..3c9a2167b8 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Dutch.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Dutch.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -26,7 +29,6 @@ public void DateExtractor(TestModel testSpec) TestDateTimeExtractor(testSpec); } - /* [NetCoreTestDataSource] [TestMethod] public void TimeExtractor(TestModel testSpec) @@ -42,7 +44,6 @@ public void DatePeriodExtractor(TestModel testSpec) ExtractorInitialize(Extractors); TestDateTimeExtractor(testSpec); } - */ [NetCoreTestDataSource] [TestMethod] @@ -60,7 +61,6 @@ public void DateTimeExtractor(TestModel testSpec) TestDateTimeExtractor(testSpec); } - /* [NetCoreTestDataSource] [TestMethod] public void DateTimePeriodExtractor(TestModel testSpec) @@ -68,7 +68,6 @@ public void DateTimePeriodExtractor(TestModel testSpec) ExtractorInitialize(Extractors); TestDateTimeExtractor(testSpec); } - */ [NetCoreTestDataSource] [TestMethod] @@ -104,7 +103,6 @@ public void SetExtractor(TestModel testSpec) TestDateTimeExtractor(testSpec); } - /* [NetCoreTestDataSource] [TestMethod] public void MergedExtractor(TestModel testSpec) @@ -113,6 +111,7 @@ public void MergedExtractor(TestModel testSpec) TestDateTimeExtractor(testSpec); } + /* [NetCoreTestDataSource] [TestMethod] public void MergedExtractorSkipFromTo(TestModel testSpec) @@ -140,7 +139,6 @@ public void TimeParser(TestModel testSpec) TestDateTimeParser(testSpec); } - /* [NetCoreTestDataSource] [TestMethod] public void DatePeriodParser(TestModel testSpec) @@ -167,7 +165,6 @@ public void TimePeriodParser(TestModel testSpec) ParserInitialize(Parsers); TestDateTimeParser(testSpec); } - */ [NetCoreTestDataSource] [TestMethod] @@ -187,6 +184,7 @@ public void HolidayParser(TestModel testSpec) TestDateTimeParser(testSpec); } + /* [NetCoreTestDataSource] [TestMethod] public void TimeZoneParser(TestModel testSpec) @@ -195,6 +193,7 @@ public void TimeZoneParser(TestModel testSpec) ParserInitialize(Parsers); TestDateTimeParser(testSpec); } + */ [NetCoreTestDataSource] [TestMethod] @@ -205,30 +204,30 @@ public void DurationParser(TestModel testSpec) TestDateTimeParser(testSpec); } - // [NetCoreTestDataSource] - // [TestMethod] - // public void SetParser(TestModel testSpec) - // { - // ExtractorInitialize(Extractors); - // ParserInitialize(Parsers); - // TestDateTimeParser(testSpec); - // } + [NetCoreTestDataSource] + [TestMethod] + public void SetParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } - // [NetCoreTestDataSource] - // [TestMethod] - // public void MergedParser(TestModel testSpec) - // { - // ExtractorInitialize(Extractors); - // ParserInitialize(Parsers); - // TestDateTimeMergedParser(testSpec); - // } + [NetCoreTestDataSource] + [TestMethod] + public void MergedParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeMergedParser(testSpec); + } - // [NetCoreTestDataSource] - // [TestMethod] - // public void DateTimeModel(TestModel testSpec) - // { - // TestDateTime(testSpec); - // } + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeModel(TestModel testSpec) + { + TestDateTime(testSpec); + } // [NetCoreTestDataSource] // [TestMethod] diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_English.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_English.cs index 6dca415aea..504942bad7 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_English.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_English.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -254,5 +257,13 @@ public void DateTimeModelExperimentalMode(TestModel testSpec) { TestDateTimeAlt(testSpec); } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeModelTasksMode(TestModel testSpec) + { + TestDateTimeAlt(testSpec); + } + } } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_EnglishOthers.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_EnglishOthers.cs index 2f9fbb1056..fef7ee0382 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_EnglishOthers.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_EnglishOthers.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_French.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_French.cs index 9b0ccdec93..324c0d326d 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_French.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_French.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_German.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_German.cs index 54a6452db2..74a261bb43 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_German.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_German.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Hindi.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Hindi.cs new file mode 100644 index 0000000000..1b7c2c92af --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Hindi.cs @@ -0,0 +1,202 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.DateTime.Tests +{ + [TestClass] + public class TestDateTime_Hindi : TestBase + { + public static IDictionary Extractors { get; private set; } + + public static IDictionary Parsers { get; private set; } + + [ClassInitialize] + public static void ClassInitialize(TestContext context) + { + Extractors = new Dictionary(); + Parsers = new Dictionary(); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TimeExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DatePeriodExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TimePeriodExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimePeriodExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void HolidayExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DurationExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void SetExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void MergedExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TimeParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DatePeriodParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TimePeriodParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public new void DateTimeParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimePeriodParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void HolidayParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DurationParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void SetParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void MergedParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeMergedParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeModel(TestModel testSpec) + { + TestDateTime(testSpec); + } + + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Italian.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Italian.cs index e39c4c44ea..84eb810022 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Italian.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Italian.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Japanese.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Japanese.cs index c93fdba59c..0cf494556b 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Japanese.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Japanese.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -26,6 +29,98 @@ public void DateExtractor(TestModel testSpec) TestDateTimeExtractor(testSpec); } + [NetCoreTestDataSource] + [TestMethod] + public void TimeExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DatePeriodExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TimePeriodExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimePeriodExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void HolidayExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + /* + [NetCoreTestDataSource] + [TestMethod] + public void TimeZoneExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + */ + + [NetCoreTestDataSource] + [TestMethod] + public void DurationExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void SetExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void MergedExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + /* + [NetCoreTestDataSource] + [TestMethod] + public void MergedExtractorSkipFromTo(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + */ + [NetCoreTestDataSource] [TestMethod] public void DateParser(TestModel testSpec) @@ -35,6 +130,15 @@ public void DateParser(TestModel testSpec) TestDateTimeParser(testSpec); } + [NetCoreTestDataSource] + [TestMethod] + public void TimeParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + [NetCoreTestDataSource] [TestMethod] public void DatePeriodParser(TestModel testSpec) @@ -44,6 +148,15 @@ public void DatePeriodParser(TestModel testSpec) TestDateTimeParser(testSpec); } + [NetCoreTestDataSource] + [TestMethod] + public void TimePeriodParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + [NetCoreTestDataSource] [TestMethod] public new void DateTimeParser(TestModel testSpec) @@ -52,5 +165,69 @@ public void DatePeriodParser(TestModel testSpec) ParserInitialize(Parsers); TestDateTimeParser(testSpec); } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimePeriodParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void HolidayParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + /* + [NetCoreTestDataSource] + [TestMethod] + public void TimeZoneParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + */ + + [NetCoreTestDataSource] + [TestMethod] + public void DurationParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void SetParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void MergedParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeMergedParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeModel(TestModel testSpec) + { + TestDateTime(testSpec); + } + } } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Korean.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Korean.cs new file mode 100644 index 0000000000..7dd0ded605 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Korean.cs @@ -0,0 +1,201 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.DateTime.Tests +{ + [TestClass] + public class TestDateTime_Korean : TestBase + { + public static IDictionary Extractors { get; private set; } + + public static IDictionary Parsers { get; private set; } + + [ClassInitialize] + public static void ClassInitialize(TestContext context) + { + Extractors = new Dictionary(); + Parsers = new Dictionary(); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TimeExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DatePeriodExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TimePeriodExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimePeriodExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void HolidayExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DurationExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void SetExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void MergedExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TimeParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DatePeriodParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TimePeriodParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public new void DateTimeParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimePeriodParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void HolidayParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DurationParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void SetParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void MergedParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeMergedParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeModel(TestModel testSpec) + { + TestDateTime(testSpec); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Portuguese.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Portuguese.cs index 6c974e1ccc..b1e6586c74 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Portuguese.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Portuguese.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Spanish.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Spanish.cs index 410deee1ff..db2f3a483a 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Spanish.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Spanish.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -185,5 +188,12 @@ public void DateTimeModel(TestModel testSpec) { TestDateTime(testSpec); } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeModelExperimentalMode(TestModel testSpec) + { + TestDateTime(testSpec); + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Swedish.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Swedish.cs new file mode 100644 index 0000000000..aac74a31ff --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Swedish.cs @@ -0,0 +1,274 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.DateTime.Tests +{ + [TestClass] + public class TestDateTime_Swedish : TestBase + { + public static IDictionary Extractors { get; private set; } + + public static IDictionary Parsers { get; private set; } + + [ClassInitialize] + public static void ClassInitialize(TestContext context) + { + Extractors = new Dictionary(); + Parsers = new Dictionary(); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + /* + [NetCoreTestDataSource] + [TestMethod] + public void TimeExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DatePeriodExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TimePeriodExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimePeriodExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void HolidayExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + */ + + [NetCoreTestDataSource] + [TestMethod] + public void TimeZoneExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + /* + + [NetCoreTestDataSource] + [TestMethod] + public void DurationExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void SetExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void MergedExtractor(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void MergedExtractorSkipFromTo(TestModel testSpec) + { + ExtractorInitialize(Extractors); + TestDateTimeExtractor(testSpec); + } + + */ + + [NetCoreTestDataSource] + [TestMethod] + public void DateParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + /* + + [NetCoreTestDataSource] + [TestMethod] + public void TimeParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DatePeriodParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TimePeriodParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public new void DateTimeParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimePeriodParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void HolidayParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + */ + + [NetCoreTestDataSource] + [TestMethod] + public void TimeZoneParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + /* + + [NetCoreTestDataSource] + [TestMethod] + public void DurationParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void SetParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void MergedParser(TestModel testSpec) + { + ExtractorInitialize(Extractors); + ParserInitialize(Parsers); + TestDateTimeMergedParser(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeModel(TestModel testSpec) + { + TestDateTime(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeModelSplitDateAndTime(TestModel testSpec) + { + TestDateTime(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeModelCalendarMode(TestModel testSpec) + { + TestDateTimeAlt(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeModelExtendedTypes(TestModel testSpec) + { + TestDateTimeAlt(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeModelComplexCalendar(TestModel testSpec) + { + TestDateTimeAlt(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeModelExperimentalMode(TestModel testSpec) + { + TestDateTimeAlt(testSpec); + } + */ + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Turkish.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Turkish.cs index 105076c422..48133785c5 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Turkish.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_Turkish.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Matcher/SimpleTokenizerTest.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Matcher/SimpleTokenizerTest.cs index 2908008d87..77e7f210e7 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Matcher/SimpleTokenizerTest.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Matcher/SimpleTokenizerTest.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.Matcher; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.Matcher; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.DataDrivenTests.Matcher @@ -6,7 +9,7 @@ namespace Microsoft.Recognizers.Text.DataDrivenTests.Matcher [TestClass] public class SimpleTokenizerTest { - private ITokenizer tokenizer = new SimpleTokenizer(); + private readonly ITokenizer tokenizer = new SimpleTokenizer(); [TestMethod] public void EnglishTokenizedTest() diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Matcher/StringMatcherTest.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Matcher/StringMatcherTest.cs index 5da9415e95..5179515198 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Matcher/StringMatcherTest.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Matcher/StringMatcherTest.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Linq; using Microsoft.Recognizers.Text.Matcher; using Microsoft.VisualStudio.TestTools.UnitTesting; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Microsoft.Recognizers.Text.DataDrivenTests.csproj b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Microsoft.Recognizers.Text.DataDrivenTests.csproj index c2e048e92d..619fa84b69 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Microsoft.Recognizers.Text.DataDrivenTests.csproj +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Microsoft.Recognizers.Text.DataDrivenTests.csproj @@ -1,10 +1,12 @@  - netcoreapp2.1 + net6.0 + 9 false © Microsoft Corporation. All rights reserved. + true full @@ -15,6 +17,7 @@ 4 ../Recognizers-Text.ruleset + pdbonly true @@ -24,6 +27,7 @@ 4 ../Recognizers-Text.ruleset + $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + + - - - - - + + + + + all runtime; build; native; contentfiles; analyzers @@ -54,4 +59,5 @@ + \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NetCoreTestDataSourceAttribute.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NetCoreTestDataSourceAttribute.cs index d0fb3107c0..e0d2d8f9b1 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NetCoreTestDataSourceAttribute.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NetCoreTestDataSourceAttribute.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.IO; using System.Linq; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/LongFormTestConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/LongFormTestConfiguration.cs index 4562fd2d56..19459ca230 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/LongFormTestConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/LongFormTestConfiguration.cs @@ -1,7 +1,11 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Number.Tests @@ -40,13 +44,15 @@ public LongFormTestConfiguration(char decimalSep, char nonDecimalSep) public Regex FractionPrepositionRegex { get; } + public Regex RoundMultiplierRegex { get; } + public string FractionMarkerToken { get; } public Regex HalfADozenRegex { get; } public string HalfADozenText { get; } - public string LangMarker { get; } = "SelfDefined"; + public string LanguageMarker { get; } = "SelfDefined"; public char NonDecimalSeparatorChar { get; } @@ -69,6 +75,8 @@ public LongFormTestConfiguration(char decimalSep, char nonDecimalSep) public bool IsMultiDecimalSeparatorCulture { get; } + public IEnumerable NonStandardSeparatorVariants => Enumerable.Empty(); + public IEnumerable NormalizeTokenSet(IEnumerable tokens, ParseResult context) { throw new NotImplementedException(); @@ -78,5 +86,11 @@ public long ResolveCompositeNumber(string numberStr) { throw new NotImplementedException(); } + + public (bool isRelevant, double value) GetLangSpecificIntValue(List matchStrs) + { + return (false, double.MinValue); + } + } } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestDecimalAndThousandsSeparators.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestDecimalAndThousandsSeparators.cs index 00e1407424..b58e751afc 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestDecimalAndThousandsSeparators.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestDecimalAndThousandsSeparators.cs @@ -1,4 +1,7 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Number.Tests { diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumberRecognizerCache.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumberRecognizerCache.cs index 28bbd4f527..1db3b43f1e 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumberRecognizerCache.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumberRecognizerCache.cs @@ -1,4 +1,7 @@ -using System.Linq; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Linq; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumberRecognizerInitialization.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumberRecognizerInitialization.cs index cd5ccaf122..e1f47f163e 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumberRecognizerInitialization.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumberRecognizerInitialization.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Linq; using Microsoft.Recognizers.Text.Number.English; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -18,10 +21,12 @@ public class TestNumberRecognizerInitialization public TestNumberRecognizerInitialization() { + var numConfig = new BaseNumberOptionsConfiguration(EnglishCulture, NumberOptions.None); + var pureNumConfig = new BaseNumberOptionsConfiguration(EnglishCulture, NumberOptions.None, NumberMode.PureNumber); + controlModel = new NumberModel( - AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, - new EnglishNumberParserConfiguration(new BaseNumberOptionsConfiguration(EnglishCulture))), - NumberExtractor.GetInstance(NumberMode.PureNumber)); + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new EnglishNumberParserConfiguration(numConfig)), + NumberExtractor.GetInstance(pureNumConfig)); } [TestMethod] diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Arabic.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Arabic.cs new file mode 100644 index 0000000000..ee2d89b32a --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Arabic.cs @@ -0,0 +1,48 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.Number.Tests +{ + [TestClass] + public class TestNumber_Arabic : TestBase + { + [NetCoreTestDataSource] + [TestMethod] + public void OrdinalModelSuppressExtendedTypes(TestModel testSpec) + { + TestNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void NumberModel(TestModel testSpec) + { + TestNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void OrdinalModel(TestModel testSpec) + { + TestNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void PercentModel(TestModel testSpec) + { + TestNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void NumberRangeModel(TestModel testSpec) + { + TestNumber(testSpec); + } + + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Chinese.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Chinese.cs index 1109658612..42c961d540 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Chinese.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Chinese.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Number.Tests diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Dutch.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Dutch.cs index 5c462d724a..47a677172f 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Dutch.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Dutch.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Number.Tests @@ -6,6 +9,13 @@ namespace Microsoft.Recognizers.Text.Number.Tests [TestClass] public class TestNumber_Dutch : TestBase { + [NetCoreTestDataSource] + [TestMethod] + public void OrdinalModelSuppressExtendedTypes(TestModel testSpec) + { + TestNumber(testSpec); + } + [NetCoreTestDataSource] [TestMethod] public void NumberModel(TestModel testSpec) @@ -50,6 +60,7 @@ public void PercentModelPercentMode(TestModel testSpec) { TestNumber(testSpec); } + */ [NetCoreTestDataSource] [TestMethod] @@ -58,6 +69,7 @@ public void NumberRangeModel(TestModel testSpec) TestNumber(testSpec); } + /* [NetCoreTestDataSource] [TestMethod] public void NumberRangeModelExperimentalMode(TestModel testSpec) diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_English.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_English.cs index 9639b50957..c652efabe1 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_English.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_English.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Number.Tests diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_French.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_French.cs index dda9c08760..9584b01edd 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_French.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_French.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Number.Tests @@ -6,6 +9,13 @@ namespace Microsoft.Recognizers.Text.Number.Tests [TestClass] public class TestNumber_French : TestBase { + [NetCoreTestDataSource] + [TestMethod] + public void OrdinalModelSuppressExtendedTypes(TestModel testSpec) + { + TestNumber(testSpec); + } + [NetCoreTestDataSource] [TestMethod] public void NumberModel(TestModel testSpec) @@ -26,5 +36,30 @@ public void PercentModel(TestModel testSpec) { TestNumber(testSpec); } + + [NetCoreTestDataSource] + [TestMethod] + public void PercentModelPercentMode(TestModel testSpec) + { + TestNumber(testSpec); + } + + /* + [NetCoreTestDataSource] + [TestMethod] + public void NumberRangeModel(TestModel testSpec) + { + TestNumber(testSpec); + } + */ + + /* + [NetCoreTestDataSource] + [TestMethod] + public void NumberRangeModelExperimentalMode(TestModel testSpec) + { + TestNumber(testSpec); + } + */ } } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_German.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_German.cs index ed9596ddbb..d527841f7b 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_German.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_German.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Number.Tests @@ -6,6 +9,13 @@ namespace Microsoft.Recognizers.Text.Number.Tests [TestClass] public class TestNumber_German : TestBase { + [NetCoreTestDataSource] + [TestMethod] + public void OrdinalModelSuppressExtendedTypes(TestModel testSpec) + { + TestNumber(testSpec); + } + [NetCoreTestDataSource] [TestMethod] public void NumberModel(TestModel testSpec) @@ -33,5 +43,21 @@ public void PercentModel(TestModel testSpec) { TestNumber(testSpec); } + + [NetCoreTestDataSource] + [TestMethod] + public void NumberRangeModel(TestModel testSpec) + { + TestNumber(testSpec); + } + + /* + [NetCoreTestDataSource] + [TestMethod] + public void NumberRangeModelExperimentalMode(TestModel testSpec) + { + TestNumber(testSpec); + } + */ } } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Hindi.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Hindi.cs index e9cb6541ed..bc0eca2bc8 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Hindi.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Hindi.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Number.Tests @@ -6,7 +9,12 @@ namespace Microsoft.Recognizers.Text.Number.Tests [TestClass] public class TestNumber_Hindi : TestBase { - public static TestResources TestResources { get; protected set; } + [NetCoreTestDataSource] + [TestMethod] + public void OrdinalModelSuppressExtendedTypes(TestModel testSpec) + { + TestNumber(testSpec); + } [NetCoreTestDataSource] [TestMethod] diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Italian.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Italian.cs index 51068e140f..f9cc051d1f 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Italian.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Italian.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Number.Tests @@ -6,6 +9,13 @@ namespace Microsoft.Recognizers.Text.Number.Tests [TestClass] public class TestNumber_Italian : TestBase { + [NetCoreTestDataSource] + [TestMethod] + public void OrdinalModelSuppressExtendedTypes(TestModel testSpec) + { + TestNumber(testSpec); + } + [NetCoreTestDataSource] [TestMethod] public void NumberModel(TestModel testSpec) diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Japanese.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Japanese.cs index 7775e13019..f3c1215a80 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Japanese.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Japanese.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Number.Tests @@ -6,6 +9,13 @@ namespace Microsoft.Recognizers.Text.Number.Tests [TestClass] public class TestNumber_Japanese : TestBase { + [NetCoreTestDataSource] + [TestMethod] + public void OrdinalModelSuppressExtendedTypes(TestModel testSpec) + { + TestNumber(testSpec); + } + [NetCoreTestDataSource] [TestMethod] public void NumberModel(TestModel testSpec) @@ -27,7 +37,6 @@ public void PercentModel(TestModel testSpec) TestNumber(testSpec); } - /* [NetCoreTestDataSource] [TestMethod] public void NumberRangeModel(TestModel testSpec) @@ -35,6 +44,7 @@ public void NumberRangeModel(TestModel testSpec) TestNumber(testSpec); } + /* [NetCoreTestDataSource] [TestMethod] public void NumberRangeModelExperimentalMode(TestModel testSpec) diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Korean.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Korean.cs index 260256ac51..fb31ff8dd5 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Korean.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Korean.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Number.Tests @@ -6,6 +9,13 @@ namespace Microsoft.Recognizers.Text.Number.Tests [TestClass] public class TestNumber_Korean : TestBase { + [NetCoreTestDataSource] + [TestMethod] + public void OrdinalModelSuppressExtendedTypes(TestModel testSpec) + { + TestNumber(testSpec); + } + [NetCoreTestDataSource] [TestMethod] public void NumberModel(TestModel testSpec) @@ -19,5 +29,19 @@ public void OrdinalModel(TestModel testSpec) { TestNumber(testSpec); } + + [NetCoreTestDataSource] + [TestMethod] + public void NumberRangeModel(TestModel testSpec) + { + TestNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void PercentModel(TestModel testSpec) + { + TestNumber(testSpec); + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Portuguese.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Portuguese.cs index a01e1f06c5..95525034ce 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Portuguese.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Portuguese.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Number.Tests @@ -6,6 +9,13 @@ namespace Microsoft.Recognizers.Text.Number.Tests [TestClass] public class TestNumber_Portuguese : TestBase { + [NetCoreTestDataSource] + [TestMethod] + public void OrdinalModelSuppressExtendedTypes(TestModel testSpec) + { + TestNumber(testSpec); + } + [NetCoreTestDataSource] [TestMethod] public void NumberModel(TestModel testSpec) @@ -26,5 +36,12 @@ public void PercentModel(TestModel testSpec) { TestNumber(testSpec); } + + [NetCoreTestDataSource] + [TestMethod] + public void NumberRangeModel(TestModel testSpec) + { + TestNumber(testSpec); + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Spanish.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Spanish.cs index 91c451ec6b..aa942d83a9 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Spanish.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Spanish.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Number.Tests @@ -6,6 +9,13 @@ namespace Microsoft.Recognizers.Text.Number.Tests [TestClass] public class TestNumber_Spanish : TestBase { + [NetCoreTestDataSource] + [TestMethod] + public void OrdinalModelSuppressExtendedTypes(TestModel testSpec) + { + TestNumber(testSpec); + } + [NetCoreTestDataSource] [TestMethod] public void NumberModel(TestModel testSpec) diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_SpanishMexican.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_SpanishMexican.cs new file mode 100644 index 0000000000..9399c6182d --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_SpanishMexican.cs @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.Number.Tests +{ + [TestClass] + public class TestNumber_SpanishMexican : TestBase + { + [NetCoreTestDataSource] + [TestMethod] + public void NumberModel(TestModel testSpec) + { + TestNumber(testSpec); + } + + /* + [NetCoreTestDataSource] + [TestMethod] + public void OrdinalModel(TestModel testSpec) + { + TestNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void PercentModel(TestModel testSpec) + { + TestNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void NumberRangeModel(TestModel testSpec) + { + TestNumber(testSpec); + } + */ + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Swedish.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Swedish.cs new file mode 100644 index 0000000000..ee8429f6b4 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Swedish.cs @@ -0,0 +1,85 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.Number.Tests +{ + [TestClass] + public class TestNumber_Swedish : TestBase + { + [NetCoreTestDataSource] + [TestMethod] + public void OrdinalModelSuppressExtendedTypes(TestModel testSpec) + { + TestNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void NumberModel(TestModel testSpec) + { + TestNumber(testSpec); + } + + /* + [NetCoreTestDataSource] + [TestMethod] + public void NumberModelPercentMode(TestModel testSpec) + { + TestNumber(testSpec); + } + */ + + /* + [NetCoreTestDataSource] + [TestMethod] + public void NumberModelExperimentalMode(TestModel testSpec) + { + TestNumber(testSpec); + } + */ + + [NetCoreTestDataSource] + [TestMethod] + public void OrdinalModel(TestModel testSpec) + { + TestNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void PercentModel(TestModel testSpec) + { + TestNumber(testSpec); + } + + /* + [NetCoreTestDataSource] + [TestMethod] + public void PercentModelPercentMode(TestModel testSpec) + { + TestNumber(testSpec); + } + */ + + /* + [NetCoreTestDataSource] + [TestMethod] + public void NumberRangeModel(TestModel testSpec) + { + TestNumber(testSpec); + } + */ + + /* + [NetCoreTestDataSource] + [TestMethod] + public void NumberRangeModelExperimentalMode(TestModel testSpec) + { + TestNumber(testSpec); + } + */ + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Turkish.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Turkish.cs index 36424ff0e9..5a56dde579 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Turkish.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumber_Turkish.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Number.Tests @@ -6,7 +9,12 @@ namespace Microsoft.Recognizers.Text.Number.Tests [TestClass] public class TestNumber_Turkish : TestBase { - public static TestResources TestResources { get; protected set; } + [NetCoreTestDataSource] + [TestMethod] + public void OrdinalModelSuppressExtendedTypes(TestModel testSpec) + { + TestNumber(testSpec); + } [NetCoreTestDataSource] [TestMethod] diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestParserFactory.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestParserFactory.cs index c215fb6286..29553389f7 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestParserFactory.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestParserFactory.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.Number.Chinese; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.Number.Chinese; using Microsoft.Recognizers.Text.Number.English; using Microsoft.Recognizers.Text.Number.French; using Microsoft.Recognizers.Text.Number.German; @@ -18,7 +21,7 @@ public class TestParserFactory [TestMethod] public void TestEnglishParser() { - var config = new EnglishNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.English)); + var config = new EnglishNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.English, NumberOptions.None)); IParser parserNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parserCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -32,7 +35,7 @@ public void TestEnglishParser() [TestMethod] public void TestSpanishParser() { - var config = new SpanishNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Spanish)); + var config = new SpanishNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Spanish, NumberOptions.None)); IParser parserNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parserCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -46,7 +49,7 @@ public void TestSpanishParser() [TestMethod] public void TestPortugueseParser() { - var config = new PortugueseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Portuguese)); + var config = new PortugueseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Portuguese, NumberOptions.None)); IParser parserNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parserCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -60,7 +63,7 @@ public void TestPortugueseParser() [TestMethod] public void TestChineseParser() { - var config = new ChineseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Chinese)); + var config = new ChineseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Chinese, NumberOptions.None)); IParser parserNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parserCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -74,7 +77,7 @@ public void TestChineseParser() [TestMethod] public void TestJapaneseParser() { - var config = new JapaneseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Japanese)); + var config = new JapaneseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Japanese, NumberOptions.None)); IParser parserNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parserCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -88,7 +91,7 @@ public void TestJapaneseParser() [TestMethod] public void TestKoreanParser() { - var config = new KoreanNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Korean)); + var config = new KoreanNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Korean, NumberOptions.None)); IParser parserNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); @@ -98,7 +101,7 @@ public void TestKoreanParser() [TestMethod] public void TestFrenchParser() { - var config = new FrenchNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.French)); + var config = new FrenchNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.French, NumberOptions.None)); IParser parseNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parseCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -112,7 +115,7 @@ public void TestFrenchParser() [TestMethod] public void TestGermanParser() { - var config = new GermanNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.German)); + var config = new GermanNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.German, NumberOptions.None)); IParser parseNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parseCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -126,7 +129,7 @@ public void TestGermanParser() [TestMethod] public void TestItalianParser() { - var config = new ItalianNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Italian)); + var config = new ItalianNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Italian, NumberOptions.None)); IParser parseNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parseCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -140,7 +143,7 @@ public void TestItalianParser() [TestMethod] public void TestTurkishParser() { - var config = new TurkishNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Turkish)); + var config = new TurkishNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Turkish, NumberOptions.None)); IParser parseNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parseCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnitRecognizerCache.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnitRecognizerCache.cs index 1cffd4ea80..58df3d869b 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnitRecognizerCache.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnitRecognizerCache.cs @@ -1,4 +1,7 @@ -using System.Linq; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Linq; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnitRecognizerInitialization.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnitRecognizerInitialization.cs index 09f9a7f426..fcc08ca82a 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnitRecognizerInitialization.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnitRecognizerInitialization.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using Microsoft.VisualStudio.TestTools.UnitTesting; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Chinese.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Chinese.cs index 099f9d2b54..c377be9f2f 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Chinese.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Chinese.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.NumberWithUnit.Tests diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Dutch.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Dutch.cs index c9c57aefdb..cef65cb170 100755 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Dutch.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Dutch.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.NumberWithUnit.Tests diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_English.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_English.cs index a6990b02a7..394e8acecd 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_English.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_English.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.NumberWithUnit.Tests diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_French.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_French.cs index 43ae602e91..e1272ecbcd 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_French.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_French.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.NumberWithUnit.Tests diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_German.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_German.cs index 00da8c9847..98fe952919 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_German.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_German.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.NumberWithUnit.Tests diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Hindi.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Hindi.cs index c7ab855e2f..281021cd62 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Hindi.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Hindi.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.NumberWithUnit.Tests diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Italian.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Italian.cs index 7f7fbdb5f9..430a4e7e75 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Italian.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Italian.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.NumberWithUnit.Tests diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Japanese.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Japanese.cs index f54c302351..4d80a41171 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Japanese.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Japanese.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.NumberWithUnit.Tests @@ -6,6 +9,7 @@ namespace Microsoft.Recognizers.Text.NumberWithUnit.Tests [TestClass] public class TestNumberWithUnit_Japanese : TestBase { + [NetCoreTestDataSource] [TestMethod] public void AgeModel(TestModel testSpec) @@ -19,5 +23,19 @@ public void CurrencyModel(TestModel testSpec) { TestCurrency(testSpec); } + + [NetCoreTestDataSource] + [TestMethod] + public void DimensionModel(TestModel testSpec) + { + TestNumberWithUnit(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TemperatureModel(TestModel testSpec) + { + TestNumberWithUnit(testSpec); + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Korean.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Korean.cs new file mode 100644 index 0000000000..9ec2021968 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Korean.cs @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Tests +{ + [TestClass] + public class TestNumberWithUnit_Korean : TestBase + { + [NetCoreTestDataSource] + [TestMethod] + public void AgeModel(TestModel testSpec) + { + TestNumberWithUnit(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void CurrencyModel(TestModel testSpec) + { + TestCurrency(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DimensionModel(TestModel testSpec) + { + TestNumberWithUnit(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TemperatureModel(TestModel testSpec) + { + TestNumberWithUnit(testSpec); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Portuguese.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Portuguese.cs index 43e1cffb31..1cb4c48124 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Portuguese.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Portuguese.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.NumberWithUnit.Tests diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Spanish.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Spanish.cs index 6924041ffe..7e800cf096 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Spanish.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Spanish.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.NumberWithUnit.Tests diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Swedish.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Swedish.cs new file mode 100644 index 0000000000..cac7eedddd --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Swedish.cs @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Tests +{ + [TestClass] + public class TestNumberWithUnit_Swedish : TestBase + { + [NetCoreTestDataSource] + [TestMethod] + public void AgeModel(TestModel testSpec) + { + TestNumberWithUnit(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void CurrencyModel(TestModel testSpec) + { + TestCurrency(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void DimensionModel(TestModel testSpec) + { + TestNumberWithUnit(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void TemperatureModel(TestModel testSpec) + { + TestNumberWithUnit(testSpec); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Turkish.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Turkish.cs index f455a5be70..f6e0241f0c 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Turkish.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/NumberWithUnit/TestNumberWithUnit_Turkish.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.NumberWithUnit.Tests diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/PlatformEnumConverter.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/PlatformEnumConverter.cs new file mode 100644 index 0000000000..81d6d90fe7 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/PlatformEnumConverter.cs @@ -0,0 +1,19 @@ +using System; +using Newtonsoft.Json; +using Newtonsoft.Json.Converters; + +namespace Microsoft.Recognizers.Text.DataDrivenTests +{ + public class PlatformEnumConverter : StringEnumConverter + { + public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) + { + if (string.IsNullOrEmpty(reader.Value.ToString())) + { + return 0; + } + + return base.ReadJson(reader, objectType, existingValue, serializer); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequenceRecognizerCache.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequenceRecognizerCache.cs index 5171ca2f99..61fc2219de 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequenceRecognizerCache.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequenceRecognizerCache.cs @@ -1,4 +1,7 @@ -using System.Linq; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Linq; using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequenceRecognizerInitialization.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequenceRecognizerInitialization.cs index cc2b5cb526..d815185b49 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequenceRecognizerInitialization.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequenceRecognizerInitialization.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Linq; using Microsoft.Recognizers.Text.Sequence.English; using Microsoft.VisualStudio.TestTools.UnitTesting; diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Chinese.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Chinese.cs index 82cbc67ea2..6f839cb88e 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Chinese.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Chinese.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Sequence.Tests @@ -27,5 +30,12 @@ public void IpAddressModel(TestModel testSpec) { TestIpAddress(testSpec); } + + [NetCoreTestDataSource] + [TestMethod] + public void QuotedTextModel(TestModel testSpec) + { + TestQuotedText(testSpec); + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Dutch.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Dutch.cs new file mode 100644 index 0000000000..628c1d846e --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Dutch.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.Sequence.Tests +{ + [TestClass] + public class TestSequence_Dutch : TestBase + { + [NetCoreTestDataSource] + [TestMethod] + public void PhoneNumberModel(TestModel testSpec) + { + TestPhoneNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void QuotedTextModel(TestModel testSpec) + { + TestQuotedText(testSpec); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_English.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_English.cs index 5731bb7c4c..d2c4e809ad 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_English.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_English.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Sequence.Tests @@ -34,6 +37,13 @@ public void HashtagModel(TestModel testSpec) TestHashtag(testSpec); } + [NetCoreTestDataSource] + [TestMethod] + public void QuotedTextModel(TestModel testSpec) + { + TestQuotedText(testSpec); + } + [NetCoreTestDataSource] [TestMethod] public void EmailModel(TestModel testSpec) diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_French.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_French.cs new file mode 100644 index 0000000000..0a0b360744 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_French.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.Sequence.Tests +{ + [TestClass] + public class TestSequence_French : TestBase + { + [NetCoreTestDataSource] + [TestMethod] + public void PhoneNumberModel(TestModel testSpec) + { + TestPhoneNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void QuotedTextModel(TestModel testSpec) + { + TestQuotedText(testSpec); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_German.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_German.cs new file mode 100644 index 0000000000..eb757dea66 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_German.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.Sequence.Tests +{ + [TestClass] + public class TestSequence_German : TestBase + { + [NetCoreTestDataSource] + [TestMethod] + public void PhoneNumberModel(TestModel testSpec) + { + TestPhoneNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void QuotedTextModel(TestModel testSpec) + { + TestQuotedText(testSpec); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Hindi.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Hindi.cs new file mode 100644 index 0000000000..189e84a1db --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Hindi.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.Sequence.Tests +{ + [TestClass] + public class TestSequence_Hindi : TestBase + { + [NetCoreTestDataSource] + [TestMethod] + public void PhoneNumberModel(TestModel testSpec) + { + TestPhoneNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void QuotedTextModel(TestModel testSpec) + { + TestQuotedText(testSpec); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Italian.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Italian.cs new file mode 100644 index 0000000000..b34bbeec88 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Italian.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.Sequence.Tests +{ + [TestClass] + public class TestSequence_Italian : TestBase + { + [NetCoreTestDataSource] + [TestMethod] + public void PhoneNumberModel(TestModel testSpec) + { + TestPhoneNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void QuotedTextModel(TestModel testSpec) + { + TestQuotedText(testSpec); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Japanese.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Japanese.cs index b151b54f26..2cf66683f0 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Japanese.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Japanese.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Sequence.Tests @@ -26,5 +29,12 @@ public void IpAddressModel(TestModel testSpec) { TestIpAddress(testSpec); } + + [NetCoreTestDataSource] + [TestMethod] + public void QuotedTextModel(TestModel testSpec) + { + TestQuotedText(testSpec); + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Korean.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Korean.cs new file mode 100644 index 0000000000..aabd6237a1 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Korean.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.Sequence.Tests +{ + [TestClass] + public class TestSequence_Korean : TestBase + { + [NetCoreTestDataSource] + [TestMethod] + public void PhoneNumberModel(TestModel testSpec) + { + TestPhoneNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void QuotedTextModel(TestModel testSpec) + { + TestQuotedText(testSpec); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Portuguese.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Portuguese.cs index 124fa440cf..75eccbfe9c 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Portuguese.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Portuguese.cs @@ -1,4 +1,7 @@ -using Microsoft.Recognizers.Text.DataDrivenTests; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.Sequence.Tests @@ -19,5 +22,12 @@ public void IpAddressModel(TestModel testSpec) { TestIpAddress(testSpec); } + + [NetCoreTestDataSource] + [TestMethod] + public void QuotedTextModel(TestModel testSpec) + { + TestQuotedText(testSpec); + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Spanish.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Spanish.cs new file mode 100644 index 0000000000..9a9e959554 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Spanish.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.Sequence.Tests +{ + [TestClass] + public class TestSequence_Spanish : TestBase + { + [NetCoreTestDataSource] + [TestMethod] + public void PhoneNumberModel(TestModel testSpec) + { + TestPhoneNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void QuotedTextModel(TestModel testSpec) + { + TestQuotedText(testSpec); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Swedish.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Swedish.cs new file mode 100644 index 0000000000..9b8d3fd1db --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Swedish.cs @@ -0,0 +1,19 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.Sequence.Tests +{ + [TestClass] + public class TestSequence_Swedish : TestBase + { + [NetCoreTestDataSource] + [TestMethod] + public void QuotedTextModel(TestModel testSpec) + { + TestQuotedText(testSpec); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Turkish.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Turkish.cs new file mode 100644 index 0000000000..292b1580b1 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_Turkish.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Text.DataDrivenTests; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Microsoft.Recognizers.Text.Sequence.Tests +{ + [TestClass] + public class TestSequence_Turkish : TestBase + { + [NetCoreTestDataSource] + [TestMethod] + public void PhoneNumberModel(TestModel testSpec) + { + TestPhoneNumber(testSpec); + } + + [NetCoreTestDataSource] + [TestMethod] + public void QuotedTextModel(TestModel testSpec) + { + TestQuotedText(testSpec); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestBase.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestBase.cs index 79a3fa87c9..5410c887e9 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestBase.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestBase.cs @@ -1,11 +1,14 @@ -using System; -using System.Collections; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; using System.Diagnostics; using System.Linq; using Microsoft.Recognizers.Text.DateTime; +using Microsoft.Recognizers.Text.Utilities; using Microsoft.VisualStudio.TestTools.UnitTesting; using Newtonsoft.Json; @@ -60,7 +63,7 @@ public void TestNumber(TestModel testSpec) public void TestNumberWithUnit(TestModel testSpec) { TestPreValidation(testSpec); - ValidateResults(testSpec, new[] { ResolutionKey.Unit }); + ValidateResults(testSpec, new[] { ResolutionKey.Unit, ResolutionKey.SubType }); } public void TestCurrency(TestModel testSpec) @@ -78,32 +81,61 @@ public void TestDateTime(TestModel testSpec) Assert.AreEqual(expectedResults.Count(), actualResults.Count, GetMessage(testSpec)); - foreach (var tuple in Enumerable.Zip(expectedResults, actualResults, Tuple.Create)) + try { - var expected = tuple.Item1; - var actual = tuple.Item2; - Assert.AreEqual(expected.Text, actual.Text, GetMessage(testSpec)); - Assert.AreEqual(expected.TypeName, actual.TypeName, GetMessage(testSpec)); - Assert.AreEqual(expected.Start, actual.Start, GetMessage(testSpec)); - Assert.AreEqual(expected.End, actual.End, GetMessage(testSpec)); + foreach (var tuple in Enumerable.Zip(expectedResults, actualResults, Tuple.Create)) + { + var expected = tuple.Item1; + var actual = tuple.Item2; - var values = actual.Resolution as IDictionary; + Assert.AreEqual(expected.Text, actual.Text, GetMessage(testSpec)); + Assert.AreEqual(expected.TypeName, actual.TypeName, GetMessage(testSpec)); + Assert.AreEqual(expected.Start, actual.Start, GetMessage(testSpec)); + Assert.AreEqual(expected.End, actual.End, GetMessage(testSpec)); - // Actual ValueSet types should not be modified as that's considered a breaking API change - var actualValues = ((List>)values[ResolutionKey.ValueSet]).ToList(); - var expectedValues = - JsonConvert.DeserializeObject>>(expected - .Resolution[ResolutionKey.ValueSet].ToString()); + if (testSpec.IgnoreResolution) + { + Assert.Inconclusive(GetMessage(testSpec) + ". Resolution not validated."); + } + else + { + var values = actual.Resolution as IDictionary; - Assert.AreEqual(expectedValues.Count, actualValues.Count, GetMessage(testSpec)); + // Actual ValueSet types should not be modified as that's considered a breaking API change + var actualValues = ((List>)values[ResolutionKey.ValueSet]).ToList(); + var expectedValues = + JsonConvert.DeserializeObject>>(expected.Resolution[ResolutionKey.ValueSet].ToString()); - foreach (var value in expectedValues.Zip(actualValues, Tuple.Create)) - { - Assert.AreEqual(value.Item1.Count, value.Item2.Count, GetMessage(testSpec)); - CollectionAssert.AreEqual(value.Item1.OrderBy(o => o.Key).ToImmutableDictionary(), - value.Item2.OrderBy(o => o.Key).ToImmutableDictionary(), GetMessage(testSpec)); + Assert.AreEqual(expectedValues.Count, actualValues.Count, GetMessage(testSpec)); + + foreach (var resolutionValues in expectedValues.Zip(actualValues, Tuple.Create)) + { + Assert.AreEqual(resolutionValues.Item1.Count, resolutionValues.Item2.Count, + GetMessage(testSpec)); + + var expectedResolution = resolutionValues.Item1.OrderBy(o => o.Key).ToImmutableDictionary(); + var actualResolution = resolutionValues.Item2.OrderBy(o => o.Key).ToImmutableDictionary(); + + for (int i = 0; i < expectedResolution.Count; i++) + { + var expectedKey = expectedResolution.ElementAt(i).Key; + Assert.AreEqual(expectedKey, actualResolution.ElementAt(i).Key, GetMessage(testSpec)); + + var expectedValue = expectedResolution[expectedKey]; + var actualValue = actualResolution[expectedKey]; + + Assert.AreEqual(expectedValue, actualValue, GetMessage(testSpec)); + } + + } + } } + + } + catch (NullReferenceException nre) + { + throw new ApplicationException(GetMessage(testSpec), nre); } } @@ -131,22 +163,29 @@ public void TestDateTimeAlt(TestModel testSpec) Assert.AreEqual(expected.ParentText, ((ExtendedModelResult)actual).ParentText, GetMessage(testSpec)); } - // Actual ValueSet types should not be modified as that's considered a breaking API change - var actualValues = - ((IDictionary)actual.Resolution)[ResolutionKey.ValueSet] as - IList>; + if (testSpec.IgnoreResolution) + { + Assert.Inconclusive(GetMessage(testSpec) + ". Resolution not validated."); + } + else + { + // Actual ValueSet types should not be modified as that's considered a breaking API change + var actualValues = + ((IDictionary)actual.Resolution)[ResolutionKey.ValueSet] as + IList>; - var expectedValues = - JsonConvert.DeserializeObject>>(expected - .Resolution[ResolutionKey.ValueSet].ToString()); + var expectedValues = + JsonConvert.DeserializeObject>>(expected + .Resolution[ResolutionKey.ValueSet].ToString()); - Assert.AreEqual(expectedValues.Count, actualValues.Count, GetMessage(testSpec)); + Assert.AreEqual(expectedValues.Count, actualValues.Count, GetMessage(testSpec)); - foreach (var value in expectedValues.Zip(actualValues, Tuple.Create)) - { - Assert.AreEqual(value.Item1.Count, value.Item2.Count, GetMessage(testSpec)); - CollectionAssert.AreEqual(value.Item1.OrderBy(o => o.Key).ToImmutableDictionary(), - value.Item2.OrderBy(o => o.Key).ToImmutableDictionary(), GetMessage(testSpec)); + foreach (var value in expectedValues.Zip(actualValues, Tuple.Create)) + { + Assert.AreEqual(value.Item1.Count, value.Item2.Count, GetMessage(testSpec)); + CollectionAssert.AreEqual(value.Item1.OrderBy(o => o.Key).ToImmutableDictionary(), + value.Item2.OrderBy(o => o.Key).ToImmutableDictionary(), GetMessage(testSpec)); + } } } } @@ -157,7 +196,7 @@ public void TestDateTimeExtractor(TestModel testSpec) var referenceDateTime = testSpec.GetReferenceDateTime(); - var actualResults = Extractor.Extract(testSpec.Input.ToLowerInvariant(), referenceDateTime); + var actualResults = Extractor.Extract(QueryProcessor.Preprocess(testSpec.Input), referenceDateTime); var expectedResults = testSpec.CastResults(); Assert.AreEqual(expectedResults.Count(), actualResults.Count, GetMessage(testSpec)); @@ -181,7 +220,7 @@ public void TestDateTimeParser(TestModel testSpec) var referenceDateTime = testSpec.GetReferenceDateTime(); - var extractResults = Extractor.Extract(testSpec.Input.ToLowerInvariant(), referenceDateTime); + var extractResults = Extractor.Extract(QueryProcessor.Preprocess(testSpec.Input), referenceDateTime); var actualResults = extractResults.Select(o => DateTimeParser.Parse(o, referenceDateTime)).ToArray(); var expectedResults = testSpec.CastResults(); @@ -199,27 +238,34 @@ public void TestDateTimeParser(TestModel testSpec) Assert.AreEqual(expected.Start, actual.Start, GetMessage(testSpec)); Assert.AreEqual(expected.Length, actual.Length, GetMessage(testSpec)); - var actualValue = actual.Value as DateTimeResolutionResult; - var expectedValue = JsonConvert.DeserializeObject(expected.Value.ToString()); - - Assert.IsNotNull(actualValue, GetMessage(testSpec)); - Assert.AreEqual(expectedValue.Timex, actualValue.Timex, GetMessage(testSpec)); - if (expectedValue.Mod != null || actualValue.Mod != null) + if (testSpec.IgnoreResolution) { - Assert.IsNotNull(expectedValue.Mod, GetMessage(testSpec)); - Assert.IsNotNull(actualValue.Mod, GetMessage(testSpec)); - Assert.AreEqual(expectedValue.Mod, actualValue.Mod, GetMessage(testSpec)); + Assert.Inconclusive(GetMessage(testSpec) + ". Resolution not validated."); } + else + { + var actualValue = actual.Value as DateTimeResolutionResult; + var expectedValue = JsonConvert.DeserializeObject(expected.Value.ToString()); - CollectionAssert.AreEqual(expectedValue.FutureResolution, actualValue.FutureResolution, GetMessage(testSpec)); - CollectionAssert.AreEqual(expectedValue.PastResolution, actualValue.PastResolution, GetMessage(testSpec)); + Assert.IsNotNull(actualValue, GetMessage(testSpec)); + Assert.AreEqual(expectedValue.Timex, actualValue.Timex, GetMessage(testSpec)); + if (expectedValue.Mod != null || actualValue.Mod != null) + { + Assert.IsNotNull(expectedValue.Mod, GetMessage(testSpec)); + Assert.IsNotNull(actualValue.Mod, GetMessage(testSpec)); + Assert.AreEqual(expectedValue.Mod, actualValue.Mod, GetMessage(testSpec)); + } - if (expectedValue.TimeZoneResolution != null || actualValue.TimeZoneResolution != null) - { - Assert.IsNotNull(actualValue.TimeZoneResolution, GetMessage(testSpec)); - Assert.IsNotNull(expectedValue.TimeZoneResolution, GetMessage(testSpec)); - Assert.AreEqual(expectedValue.TimeZoneResolution.Value, actualValue.TimeZoneResolution.Value, GetMessage(testSpec)); - Assert.AreEqual(expectedValue.TimeZoneResolution.UtcOffsetMins, actualValue.TimeZoneResolution.UtcOffsetMins, GetMessage(testSpec)); + CollectionAssert.AreEqual(expectedValue.FutureResolution, actualValue.FutureResolution, GetMessage(testSpec)); + CollectionAssert.AreEqual(expectedValue.PastResolution, actualValue.PastResolution, GetMessage(testSpec)); + + if (expectedValue.TimeZoneResolution != null || actualValue.TimeZoneResolution != null) + { + Assert.IsNotNull(actualValue.TimeZoneResolution, GetMessage(testSpec)); + Assert.IsNotNull(expectedValue.TimeZoneResolution, GetMessage(testSpec)); + Assert.AreEqual(expectedValue.TimeZoneResolution.Value, actualValue.TimeZoneResolution.Value, GetMessage(testSpec)); + Assert.AreEqual(expectedValue.TimeZoneResolution.UtcOffsetMins, actualValue.TimeZoneResolution.UtcOffsetMins, GetMessage(testSpec)); + } } } } @@ -248,21 +294,28 @@ public void TestDateTimeMergedParser(TestModel testSpec) Assert.AreEqual(expected.Start, actual.Start, GetMessage(testSpec)); Assert.AreEqual(expected.Length, actual.Length, GetMessage(testSpec)); - if (actual.Value is IDictionary values) + if (testSpec.IgnoreResolution) { - // Actual ValueSet types should not be modified as that's considered a breaking API change - var actualValues = values[ResolutionKey.ValueSet] as IList>; + Assert.Inconclusive(GetMessage(testSpec) + ". Resolution not validated."); + } + else + { + if (actual.Value is IDictionary values) + { + // Actual ValueSet types should not be modified as that's considered a breaking API change + var actualValues = values[ResolutionKey.ValueSet] as IList>; - var expectedObj = - JsonConvert.DeserializeObject>>>( - expected.Value.ToString()); - var expectedValues = expectedObj[ResolutionKey.ValueSet]; + var expectedObj = JsonConvert.DeserializeObject>>>(expected.Value.ToString()); + var expectedValues = expectedObj[ResolutionKey.ValueSet]; - foreach (var (item1, item2) in expectedValues.Zip(actualValues, Tuple.Create)) - { - Assert.AreEqual(item1.Count, item2.Count, GetMessage(testSpec)); - CollectionAssert.AreEqual(item1.OrderBy(o => o.Key).ToImmutableDictionary(), - item2.OrderBy(o => o.Key).ToImmutableDictionary(), GetMessage(testSpec)); + Assert.AreEqual(expectedValues.Count, actualValues?.Count, GetMessage(testSpec)); + + foreach (var (item1, item2) in expectedValues.Zip(actualValues, Tuple.Create)) + { + Assert.AreEqual(item1.Count, item2.Count, GetMessage(testSpec)); + CollectionAssert.AreEqual(item1.OrderBy(o => o.Key).ToImmutableDictionary(), + item2.OrderBy(o => o.Key).ToImmutableDictionary(), GetMessage(testSpec)); + } } } } @@ -293,6 +346,12 @@ public void TestHashtag(TestModel testSpec) ValidateResults(testSpec); } + public void TestQuotedText(TestModel testSpec) + { + TestPreValidation(testSpec); + ValidateResults(testSpec); + } + public void TestEmail(TestModel testSpec) { TestPreValidation(testSpec); @@ -343,10 +402,8 @@ private void ValidateResults(TestModel testSpec, IEnumerable testResolut Assert.AreEqual(expectedResults.Count(), actualResults.Count, GetMessage(testSpec)); - foreach (var tuple in Enumerable.Zip(expectedResults, actualResults, Tuple.Create)) + foreach (var (expected, actual) in Enumerable.Zip(expectedResults, actualResults, Tuple.Create)) { - var expected = tuple.Item1; - var actual = tuple.Item2; Assert.AreEqual(expected.TypeName, actual.TypeName, GetMessage(testSpec)); Assert.AreEqual(expected.Text, actual.Text, GetMessage(testSpec)); @@ -363,29 +420,43 @@ private void ValidateResults(TestModel testSpec, IEnumerable testResolut Assert.AreEqual(expected.End, actual.End, GetMessage(testSpec)); } - if (expected.TypeName.Contains(Number.Constants.MODEL_ORDINAL)) + if (testSpec.IgnoreResolution) { - if (!expected.TypeName.Equals(Number.Constants.MODEL_ORDINAL_RELATIVE)) - { - Assert.AreEqual(expected.Resolution[ResolutionKey.Value], actual.Resolution[ResolutionKey.Value], GetMessage(testSpec)); - } - - Assert.AreEqual(expected.Resolution[ResolutionKey.Offset], actual.Resolution[ResolutionKey.Offset], GetMessage(testSpec)); - Assert.AreEqual(expected.Resolution[ResolutionKey.RelativeTo], actual.Resolution[ResolutionKey.RelativeTo], GetMessage(testSpec)); + Assert.Inconclusive(GetMessage(testSpec) + ". Resolution not validated."); } else { - Assert.AreEqual(expected.Resolution[ResolutionKey.Value], actual.Resolution[ResolutionKey.Value], GetMessage(testSpec)); - } - foreach (var key in testResolutionKeys ?? Enumerable.Empty()) - { - if (!actual.Resolution.ContainsKey(key) && !expected.Resolution.ContainsKey(key)) + if (expected.TypeName.Contains(Number.Constants.MODEL_ORDINAL)) + { + if (!expected.TypeName.Equals(Number.Constants.MODEL_ORDINAL_RELATIVE)) + { + Assert.AreEqual(expected.Resolution[ResolutionKey.Value], actual.Resolution[ResolutionKey.Value], + GetMessage(testSpec)); + } + + Assert.AreEqual(expected.Resolution[ResolutionKey.Offset], actual.Resolution[ResolutionKey.Offset], + GetMessage(testSpec)); + + Assert.AreEqual(expected.Resolution[ResolutionKey.RelativeTo], actual.Resolution[ResolutionKey.RelativeTo], + GetMessage(testSpec)); + } + else { - continue; + Assert.AreEqual(expected.Resolution[ResolutionKey.Value], actual.Resolution[ResolutionKey.Value], + GetMessage(testSpec)); } - Assert.AreEqual(expected.Resolution[key].ToString(), actual.Resolution[key].ToString(), GetMessage(testSpec)); + foreach (var key in testResolutionKeys ?? Enumerable.Empty()) + { + if (!actual.Resolution.ContainsKey(key) && !expected.Resolution.ContainsKey(key)) + { + continue; + } + + Assert.AreEqual(expected.Resolution[key].ToString(), actual.Resolution[key].ToString(), + GetMessage(testSpec)); + } } } } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestHelpers.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestHelpers.cs index dde6fb52d2..5d6380c715 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestHelpers.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestHelpers.cs @@ -1,17 +1,26 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.Linq; using Microsoft.Recognizers.Text.Choice; using Microsoft.Recognizers.Text.DateTime; +using Microsoft.Recognizers.Text.DateTime.Arabic; +using Microsoft.Recognizers.Text.DateTime.Chinese; using Microsoft.Recognizers.Text.DateTime.Dutch; using Microsoft.Recognizers.Text.DateTime.English; using Microsoft.Recognizers.Text.DateTime.French; using Microsoft.Recognizers.Text.DateTime.German; +using Microsoft.Recognizers.Text.DateTime.Hindi; using Microsoft.Recognizers.Text.DateTime.Italian; +using Microsoft.Recognizers.Text.DateTime.Japanese; +using Microsoft.Recognizers.Text.DateTime.Korean; using Microsoft.Recognizers.Text.DateTime.Portuguese; using Microsoft.Recognizers.Text.DateTime.Spanish; +using Microsoft.Recognizers.Text.DateTime.Swedish; using Microsoft.Recognizers.Text.DateTime.Turkish; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.NumberWithUnit; @@ -44,10 +53,12 @@ public enum Models DateTimeExtendedTypes, DateTimeComplexCalendar, DateTimeExperimentalMode, + DateTimeTasksMode, PhoneNumber, IpAddress, Mention, Hashtag, + QuotedText, Email, URL, GUID, @@ -111,10 +122,12 @@ public static class TestContextExtensions { Models.DateTimeExtendedTypes, (test, culture) => DateTimeRecognizer.RecognizeDateTime(test.Input, culture, DateTimeOptions.ExtendedTypes, refTime: test.GetReferenceDateTime(), fallbackToDefaultCulture: false) }, { Models.DateTimeComplexCalendar, (test, culture) => DateTimeRecognizer.RecognizeDateTime(test.Input, culture, DateTimeOptions.ExtendedTypes | DateTimeOptions.CalendarMode | DateTimeOptions.EnablePreview, refTime: test.GetReferenceDateTime(), fallbackToDefaultCulture: false) }, { Models.DateTimeExperimentalMode, (test, culture) => DateTimeRecognizer.RecognizeDateTime(test.Input, culture, DateTimeOptions.ExperimentalMode, refTime: test.GetReferenceDateTime(), fallbackToDefaultCulture: false) }, + { Models.DateTimeTasksMode, (test, culture) => DateTimeRecognizer.RecognizeDateTime(test.Input, culture, options: DateTimeOptions.TasksMode, refTime: test.GetReferenceDateTime(), fallbackToDefaultCulture: false) }, { Models.PhoneNumber, (test, culture) => SequenceRecognizer.RecognizePhoneNumber(test.Input, culture, fallbackToDefaultCulture: false) }, { Models.IpAddress, (test, culture) => SequenceRecognizer.RecognizeIpAddress(test.Input, culture, fallbackToDefaultCulture: false) }, { Models.Mention, (test, culture) => SequenceRecognizer.RecognizeMention(test.Input, culture, fallbackToDefaultCulture: false) }, { Models.Hashtag, (test, culture) => SequenceRecognizer.RecognizeHashtag(test.Input, culture, fallbackToDefaultCulture: false) }, + { Models.QuotedText, (test, culture) => SequenceRecognizer.RecognizeQuotedText(test.Input, culture, fallbackToDefaultCulture: false) }, { Models.Email, (test, culture) => SequenceRecognizer.RecognizeEmail(test.Input, culture, fallbackToDefaultCulture: false) }, { Models.URL, (test, culture) => SequenceRecognizer.RecognizeURL(test.Input, culture, fallbackToDefaultCulture: false) }, { Models.GUID, (test, culture) => SequenceRecognizer.RecognizeGUID(test.Input, culture, fallbackToDefaultCulture: false) }, @@ -160,6 +173,14 @@ public static IDateTimeExtractor GetExtractor(this TestContext context) return GetJapaneseExtractor(extractorName); case Culture.Turkish: return GetTurkishExtractor(extractorName); + case Culture.Hindi: + return GetHindiExtractor(extractorName); + case Culture.Arabic: + return GetArabicExtractor(extractorName); + case Culture.Korean: + return GetKoreanExtractor(extractorName); + case Culture.Swedish: + return GetSwedishExtractor(extractorName); } throw new Exception($"Extractor '{extractorName}' for '{culture}' not supported"); @@ -194,11 +215,87 @@ public static IDateTimeParser GetDateTimeParser(this TestContext context) return GetDutchParser(parserName); case Culture.Turkish: return GetTurkishParser(parserName); + case Culture.Hindi: + return GetHindiParser(parserName); + case Culture.Arabic: + return GetArabicParser(parserName); + case Culture.Korean: + return GetKoreanParser(parserName); + case Culture.Swedish: + return GetSwedishParser(parserName); } throw new Exception($"Parser '{parserName}' for '{culture}' not supported"); } + public static IDateTimeExtractor GetArabicExtractor(DateTimeExtractors extractorName) + { + var config = new BaseDateTimeOptionsConfiguration(Culture.Arabic); + var previewConfig = new BaseDateTimeOptionsConfiguration(Culture.Arabic, DateTimeOptions.EnablePreview); + var skipConfig = new BaseDateTimeOptionsConfiguration(Culture.Arabic, DateTimeOptions.SkipFromToMerge); + + switch (extractorName) + { + case DateTimeExtractors.Date: + return new BaseDateExtractor(new ArabicDateExtractorConfiguration(config)); + case DateTimeExtractors.Time: + return new BaseTimeExtractor(new ArabicTimeExtractorConfiguration(config)); + case DateTimeExtractors.DatePeriod: + return new BaseDatePeriodExtractor(new ArabicDatePeriodExtractorConfiguration(config)); + case DateTimeExtractors.TimePeriod: + return new BaseTimePeriodExtractor(new ArabicTimePeriodExtractorConfiguration(config)); + case DateTimeExtractors.DateTime: + return new BaseDateTimeExtractor(new ArabicDateTimeExtractorConfiguration(config)); + case DateTimeExtractors.DateTimePeriod: + return new BaseDateTimePeriodExtractor(new ArabicDateTimePeriodExtractorConfiguration(config)); + case DateTimeExtractors.Duration: + return new BaseDurationExtractor(new ArabicDurationExtractorConfiguration(config)); + case DateTimeExtractors.Holiday: + return new BaseHolidayExtractor(new ArabicHolidayExtractorConfiguration(config)); + case DateTimeExtractors.Set: + return new BaseSetExtractor(new ArabicSetExtractorConfiguration(config)); + case DateTimeExtractors.Merged: + return new BaseMergedDateTimeExtractor(new ArabicMergedExtractorConfiguration(config)); + case DateTimeExtractors.MergedSkipFromTo: + return new BaseMergedDateTimeExtractor(new ArabicMergedExtractorConfiguration(skipConfig)); + } + + throw new Exception($"Extractor '{extractorName}' for Arabic not supported"); + } + + public static IDateTimeParser GetArabicParser(DateTimeParsers parserName) + { + var commonConfiguration = new ArabicCommonDateTimeParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Arabic)); + + switch (parserName) + { + case DateTimeParsers.Date: + return new BaseDateParser(new ArabicDateParserConfiguration(commonConfiguration)); + case DateTimeParsers.Time: + return new DateTime.Arabic.TimeParser(new ArabicTimeParserConfiguration(commonConfiguration)); + case DateTimeParsers.DatePeriod: + return new BaseDatePeriodParser(new ArabicDatePeriodParserConfiguration(commonConfiguration)); + case DateTimeParsers.TimePeriod: + return new BaseTimePeriodParser(new ArabicTimePeriodParserConfiguration(commonConfiguration)); + case DateTimeParsers.DateTime: + return new BaseDateTimeParser(new ArabicDateTimeParserConfiguration(commonConfiguration)); + case DateTimeParsers.DateTimePeriod: + return new BaseDateTimePeriodParser(new ArabicDateTimePeriodParserConfiguration(commonConfiguration)); + case DateTimeParsers.Duration: + return new BaseDurationParser(new ArabicDurationParserConfiguration(commonConfiguration)); + case DateTimeParsers.Holiday: + return new BaseHolidayParser(new ArabicHolidayParserConfiguration(commonConfiguration)); + case DateTimeParsers.TimeZone: + return new BaseTimeZoneParser(new ArabicTimeZoneParserConfiguration(commonConfiguration)); + case DateTimeParsers.Set: + return new BaseSetParser(new ArabicSetParserConfiguration(commonConfiguration)); + case DateTimeParsers.Merged: + return new BaseMergedDateTimeParser(new ArabicMergedParserConfiguration(commonConfiguration)); + } + + throw new Exception($"Parser '{parserName}' for Arabic not supported"); + } + public static IDateTimeExtractor GetDutchExtractor(DateTimeExtractors extractorName) { var enableDmyConfig = new BaseDateTimeOptionsConfiguration(Culture.Dutch, DateTimeOptions.None, dmyDateFormat: true); @@ -259,7 +356,7 @@ public static IDateTimeParser GetDutchParser(DateTimeParsers parserName) case DateTimeParsers.Holiday: return new BaseHolidayParser(new DutchHolidayParserConfiguration(commonConfiguration)); case DateTimeParsers.TimeZone: - return new BaseTimeZoneParser(); + return new BaseTimeZoneParser(new DutchTimeZoneParserConfiguration(commonConfiguration)); case DateTimeParsers.Set: return new BaseSetParser(new DutchSetParserConfiguration(commonConfiguration)); case DateTimeParsers.Merged: @@ -329,7 +426,7 @@ public static IDateTimeParser GetEnglishParser(DateTimeParsers parserName) case DateTimeParsers.Holiday: return new BaseHolidayParser(new EnglishHolidayParserConfiguration(commonConfiguration)); case DateTimeParsers.TimeZone: - return new BaseTimeZoneParser(); + return new BaseTimeZoneParser(new EnglishTimeZoneParserConfiguration(commonConfiguration)); case DateTimeParsers.Set: return new BaseSetParser(new EnglishSetParserConfiguration(commonConfiguration)); case DateTimeParsers.Merged: @@ -400,7 +497,7 @@ public static IDateTimeParser GetEnglishOthersParser(DateTimeParsers parserName) case DateTimeParsers.Holiday: return new BaseHolidayParser(new EnglishHolidayParserConfiguration(commonConfiguration)); case DateTimeParsers.TimeZone: - return new BaseTimeZoneParser(); + return new BaseTimeZoneParser(new EnglishTimeZoneParserConfiguration(commonConfiguration)); case DateTimeParsers.Set: return new BaseSetParser(new EnglishSetParserConfiguration(commonConfiguration)); case DateTimeParsers.Merged: @@ -419,27 +516,27 @@ public static IDateTimeExtractor GetChineseExtractor(DateTimeExtractors extracto switch (extractorName) { case DateTimeExtractors.Date: - return new DateTime.Chinese.ChineseDateExtractorConfiguration(); + return new BaseCJKDateExtractor(new ChineseDateExtractorConfiguration(defaultConfig)); case DateTimeExtractors.Time: - return new DateTime.Chinese.ChineseTimeExtractorConfiguration(); + return new BaseCJKTimeExtractor(new ChineseTimeExtractorConfiguration(defaultConfig)); case DateTimeExtractors.DatePeriod: - return new DateTime.Chinese.ChineseDatePeriodExtractorConfiguration(); + return new BaseCJKDatePeriodExtractor(new ChineseDatePeriodExtractorConfiguration(defaultConfig)); case DateTimeExtractors.TimePeriod: - return new DateTime.Chinese.ChineseTimePeriodExtractorChsConfiguration(); + return new BaseCJKTimePeriodExtractor(new ChineseTimePeriodExtractorConfiguration(defaultConfig)); case DateTimeExtractors.DateTime: - return new DateTime.Chinese.ChineseDateTimeExtractorConfiguration(); + return new BaseCJKDateTimeExtractor(new ChineseDateTimeExtractorConfiguration(defaultConfig)); case DateTimeExtractors.DateTimePeriod: - return new DateTime.Chinese.ChineseDateTimePeriodExtractorConfiguration(); + return new BaseCJKDateTimePeriodExtractor(new ChineseDateTimePeriodExtractorConfiguration(defaultConfig)); case DateTimeExtractors.Duration: - return new DateTime.Chinese.ChineseDurationExtractorConfiguration(); + return new BaseCJKDurationExtractor(new ChineseDurationExtractorConfiguration(defaultConfig)); case DateTimeExtractors.Holiday: - return new BaseHolidayExtractor(new DateTime.Chinese.ChineseHolidayExtractorConfiguration(defaultConfig)); + return new BaseCJKHolidayExtractor(new ChineseHolidayExtractorConfiguration(defaultConfig)); case DateTimeExtractors.Set: - return new DateTime.Chinese.ChineseSetExtractorConfiguration(); + return new BaseCJKSetExtractor(new ChineseSetExtractorConfiguration(defaultConfig)); case DateTimeExtractors.Merged: - return new DateTime.Chinese.ChineseMergedExtractorConfiguration(defaultConfig); + return new BaseCJKMergedDateTimeExtractor(new ChineseMergedExtractorConfiguration(defaultConfig)); case DateTimeExtractors.MergedSkipFromTo: - return new DateTime.Chinese.ChineseMergedExtractorConfiguration(skipConfig); + return new BaseCJKMergedDateTimeExtractor(new ChineseMergedExtractorConfiguration(skipConfig)); } throw new Exception($"Extractor '{extractorName}' for Chinese not supported"); @@ -447,30 +544,30 @@ public static IDateTimeExtractor GetChineseExtractor(DateTimeExtractors extracto public static IDateTimeParser GetChineseParser(DateTimeParsers parserName) { - var config = new BaseDateTimeOptionsConfiguration(Culture.Chinese, DateTimeOptions.None); + var config = new ChineseCommonDateTimeParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Chinese, DateTimeOptions.None)); switch (parserName) { case DateTimeParsers.Date: - return new DateTime.Chinese.ChineseDateParserConfiguration(new DateTime.Chinese.ChineseDateTimeParserConfiguration(config)); + return new BaseCJKDateParser(new ChineseDateParserConfiguration(config)); case DateTimeParsers.Time: - return new DateTime.Chinese.ChineseTimeParserConfiguration(new DateTime.Chinese.ChineseDateTimeParserConfiguration(config)); + return new BaseCJKTimeParser(new ChineseTimeParserConfiguration(config)); case DateTimeParsers.DatePeriod: - return new DateTime.Chinese.ChineseDatePeriodParserConfiguration(new DateTime.Chinese.ChineseDateTimeParserConfiguration(config)); + return new BaseCJKDatePeriodParser(new ChineseDatePeriodParserConfiguration(config)); case DateTimeParsers.TimePeriod: - return new DateTime.Chinese.ChineseTimePeriodParserConfiguration(new DateTime.Chinese.ChineseDateTimeParserConfiguration(config)); + return new BaseCJKTimePeriodParser(new ChineseTimePeriodParserConfiguration(config)); case DateTimeParsers.DateTime: - return new DateTime.Chinese.ChineseDateTimeParser(new DateTime.Chinese.ChineseDateTimeParserConfiguration(config)); + return new BaseCJKDateTimeParser(new ChineseDateTimeParserConfiguration(config)); case DateTimeParsers.DateTimePeriod: - return new DateTime.Chinese.ChineseDateTimePeriodParserConfiguration(new DateTime.Chinese.ChineseDateTimeParserConfiguration(config)); + return new BaseCJKDateTimePeriodParser(new ChineseDateTimePeriodParserConfiguration(config)); case DateTimeParsers.Duration: - return new DateTime.Chinese.ChineseDurationParserConfiguration(new DateTime.Chinese.ChineseDateTimeParserConfiguration(config)); + return new BaseCJKDurationParser(new ChineseDurationParserConfiguration(config)); case DateTimeParsers.Holiday: - return new DateTime.Chinese.ChineseHolidayParserConfiguration(new DateTime.Chinese.ChineseDateTimeParserConfiguration(config)); + return new BaseCJKHolidayParser(new ChineseHolidayParserConfiguration(config)); case DateTimeParsers.Set: - return new DateTime.Chinese.ChineseSetParserConfiguration(new DateTime.Chinese.ChineseDateTimeParserConfiguration(config)); + return new BaseCJKSetParser(new ChineseSetParserConfiguration(config)); case DateTimeParsers.Merged: - return new FullDateTimeParser(new DateTime.Chinese.ChineseDateTimeParserConfiguration(config)); + return new BaseCJKMergedDateTimeParser(new ChineseMergedParserConfiguration(config)); } throw new Exception($"Parser '{parserName}' for Chinese not supported"); @@ -485,27 +582,27 @@ public static IDateTimeExtractor GetJapaneseExtractor(DateTimeExtractors extract switch (extractorName) { case DateTimeExtractors.Date: - return new DateTime.Japanese.JapaneseDateExtractorConfiguration(); + return new BaseCJKDateExtractor(new JapaneseDateExtractorConfiguration(defaultConfig)); case DateTimeExtractors.Time: - return new DateTime.Japanese.JapaneseTimeExtractorConfiguration(); + return new BaseCJKTimeExtractor(new JapaneseTimeExtractorConfiguration(defaultConfig)); case DateTimeExtractors.DatePeriod: - return new DateTime.Japanese.JapaneseDatePeriodExtractorConfiguration(); + return new BaseCJKDatePeriodExtractor(new JapaneseDatePeriodExtractorConfiguration(defaultConfig)); case DateTimeExtractors.TimePeriod: - return new DateTime.Japanese.JapaneseTimePeriodExtractorConfiguration(); + return new BaseCJKTimePeriodExtractor(new JapaneseTimePeriodExtractorConfiguration(defaultConfig)); case DateTimeExtractors.DateTime: - return new DateTime.Japanese.JapaneseDateTimeExtractorConfiguration(); + return new BaseCJKDateTimeExtractor(new JapaneseDateTimeExtractorConfiguration(defaultConfig)); case DateTimeExtractors.DateTimePeriod: - return new DateTime.Japanese.JapaneseDateTimePeriodExtractorConfiguration(); + return new BaseCJKDateTimePeriodExtractor(new JapaneseDateTimePeriodExtractorConfiguration(defaultConfig)); case DateTimeExtractors.Duration: - return new DateTime.Japanese.JapaneseDurationExtractorConfiguration(); + return new BaseCJKDurationExtractor(new JapaneseDurationExtractorConfiguration(defaultConfig)); case DateTimeExtractors.Holiday: - return new BaseHolidayExtractor(new DateTime.Japanese.JapaneseHolidayExtractorConfiguration(defaultConfig)); + return new BaseCJKHolidayExtractor(new JapaneseHolidayExtractorConfiguration(defaultConfig)); case DateTimeExtractors.Set: - return new DateTime.Japanese.JapaneseSetExtractorConfiguration(); + return new BaseCJKSetExtractor(new JapaneseSetExtractorConfiguration(defaultConfig)); case DateTimeExtractors.Merged: - return new DateTime.Japanese.JapaneseMergedExtractorConfiguration(defaultConfig); + return new BaseCJKMergedDateTimeExtractor(new JapaneseMergedExtractorConfiguration(defaultConfig)); case DateTimeExtractors.MergedSkipFromTo: - return new DateTime.Japanese.JapaneseMergedExtractorConfiguration(skipConfig); + return new BaseCJKMergedDateTimeExtractor(new JapaneseMergedExtractorConfiguration(skipConfig)); } throw new Exception($"Extractor '{extractorName}' for Japanese not supported"); @@ -513,31 +610,30 @@ public static IDateTimeExtractor GetJapaneseExtractor(DateTimeExtractors extract public static IDateTimeParser GetJapaneseParser(DateTimeParsers parserName) { - - var config = new BaseDateTimeOptionsConfiguration(Culture.Japanese, DateTimeOptions.None); + var config = new JapaneseCommonDateTimeParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Japanese, DateTimeOptions.None)); switch (parserName) { case DateTimeParsers.Date: - return new DateTime.Japanese.JapaneseDateParserConfiguration(new DateTime.Japanese.JapaneseDateTimeParserConfiguration(config)); + return new BaseCJKDateParser(new JapaneseDateParserConfiguration(config)); case DateTimeParsers.Time: - return new DateTime.Japanese.JapaneseTimeParserConfiguration(new DateTime.Japanese.JapaneseDateTimeParserConfiguration(config)); + return new BaseCJKTimeParser(new JapaneseTimeParserConfiguration(config)); case DateTimeParsers.DatePeriod: - return new DateTime.Japanese.JapaneseDatePeriodParserConfiguration(new DateTime.Japanese.JapaneseDateTimeParserConfiguration(config)); + return new BaseCJKDatePeriodParser(new JapaneseDatePeriodParserConfiguration(config)); case DateTimeParsers.TimePeriod: - return new DateTime.Japanese.JapaneseTimePeriodParserConfiguration(new DateTime.Japanese.JapaneseDateTimeParserConfiguration(config)); + return new BaseCJKTimePeriodParser(new JapaneseTimePeriodParserConfiguration(config)); case DateTimeParsers.DateTime: - return new DateTime.Japanese.JapaneseDateTimeParser(new DateTime.Japanese.JapaneseDateTimeParserConfiguration(config)); + return new BaseCJKDateTimeParser(new JapaneseDateTimeParserConfiguration(config)); case DateTimeParsers.DateTimePeriod: - return new DateTime.Japanese.JapaneseDateTimePeriodParserConfiguration(new DateTime.Japanese.JapaneseDateTimeParserConfiguration(config)); + return new BaseCJKDateTimePeriodParser(new JapaneseDateTimePeriodParserConfiguration(config)); case DateTimeParsers.Duration: - return new DateTime.Japanese.JapaneseDurationParserConfiguration(new DateTime.Japanese.JapaneseDateTimeParserConfiguration(config)); + return new BaseCJKDurationParser(new JapaneseDurationParserConfiguration(config)); case DateTimeParsers.Holiday: - return new DateTime.Japanese.JapaneseHolidayParserConfiguration(new DateTime.Japanese.JapaneseDateTimeParserConfiguration(config)); + return new BaseCJKHolidayParser(new JapaneseHolidayParserConfiguration(config)); case DateTimeParsers.Set: - return new DateTime.Japanese.JapaneseSetParserConfiguration(new DateTime.Japanese.JapaneseDateTimeParserConfiguration(config)); + return new BaseCJKSetParser(new JapaneseSetParserConfiguration(config)); case DateTimeParsers.Merged: - return new FullDateTimeParser(new DateTime.Japanese.JapaneseDateTimeParserConfiguration(config)); + return new BaseCJKMergedDateTimeParser(new JapaneseMergedParserConfiguration(config)); } throw new Exception($"Parser '{parserName}' for Japanese not supported"); @@ -927,6 +1023,205 @@ public static IDateTimeParser GetTurkishParser(DateTimeParsers parserName) throw new Exception($"Parser '{parserName}' for Turkish not supported"); } + + public static IDateTimeExtractor GetHindiExtractor(DateTimeExtractors extractorName) + { + var config = new BaseDateTimeOptionsConfiguration(Culture.Hindi); + var skipConfig = new BaseDateTimeOptionsConfiguration(Culture.Hindi, DateTimeOptions.SkipFromToMerge); + + switch (extractorName) + { + case DateTimeExtractors.Date: + return new BaseDateExtractor(new HindiDateExtractorConfiguration(config)); + case DateTimeExtractors.Time: + return new BaseTimeExtractor(new HindiTimeExtractorConfiguration(config)); + case DateTimeExtractors.DatePeriod: + return new BaseDatePeriodExtractor(new HindiDatePeriodExtractorConfiguration(config)); + case DateTimeExtractors.TimePeriod: + return new BaseTimePeriodExtractor(new HindiTimePeriodExtractorConfiguration(config)); + case DateTimeExtractors.DateTime: + return new BaseDateTimeExtractor(new HindiDateTimeExtractorConfiguration(config)); + case DateTimeExtractors.DateTimePeriod: + return new BaseDateTimePeriodExtractor(new HindiDateTimePeriodExtractorConfiguration(config)); + case DateTimeExtractors.Duration: + return new BaseDurationExtractor(new HindiDurationExtractorConfiguration(config)); + case DateTimeExtractors.Holiday: + return new BaseHolidayExtractor(new HindiHolidayExtractorConfiguration(config)); + case DateTimeExtractors.Set: + return new BaseSetExtractor(new HindiSetExtractorConfiguration(config)); + case DateTimeExtractors.Merged: + return new BaseMergedDateTimeExtractor(new HindiMergedExtractorConfiguration(config)); + } + + throw new Exception($"Extractor '{extractorName}' for Hindi not supported"); + } + + public static IDateTimeParser GetHindiParser(DateTimeParsers parserName) + { + var commonConfiguration = new HindiCommonDateTimeParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Hindi)); + + switch (parserName) + { + case DateTimeParsers.Date: + return new BaseDateParser(new HindiDateParserConfiguration(commonConfiguration)); + case DateTimeParsers.Time: + return new DateTime.Hindi.TimeParser(new HindiTimeParserConfiguration(commonConfiguration)); + case DateTimeParsers.DatePeriod: + return new BaseDatePeriodParser(new HindiDatePeriodParserConfiguration(commonConfiguration)); + case DateTimeParsers.TimePeriod: + return new BaseTimePeriodParser(new HindiTimePeriodParserConfiguration(commonConfiguration)); + case DateTimeParsers.DateTime: + return new BaseDateTimeParser(new HindiDateTimeParserConfiguration(commonConfiguration)); + case DateTimeParsers.DateTimePeriod: + return new BaseDateTimePeriodParser(new HindiDateTimePeriodParserConfiguration(commonConfiguration)); + case DateTimeParsers.Duration: + return new BaseDurationParser(new HindiDurationParserConfiguration(commonConfiguration)); + case DateTimeParsers.Holiday: + return new BaseHolidayParser(new HindiHolidayParserConfiguration(commonConfiguration)); + case DateTimeParsers.Set: + return new BaseSetParser(new HindiSetParserConfiguration(commonConfiguration)); + case DateTimeParsers.Merged: + return new BaseMergedDateTimeParser(new HindiMergedParserConfiguration(commonConfiguration)); + } + + throw new Exception($"Parser '{parserName}' for Hindi not supported"); + } + + public static IDateTimeExtractor GetKoreanExtractor(DateTimeExtractors extractorName) + { + + var defaultConfig = new BaseDateTimeOptionsConfiguration(Culture.Korean, DateTimeOptions.None); + var skipConfig = new BaseDateTimeOptionsConfiguration(Culture.Korean, DateTimeOptions.SkipFromToMerge); + + switch (extractorName) + { + case DateTimeExtractors.Date: + return new BaseCJKDateExtractor(new KoreanDateExtractorConfiguration(defaultConfig)); + case DateTimeExtractors.Time: + return new BaseCJKTimeExtractor(new KoreanTimeExtractorConfiguration(defaultConfig)); + case DateTimeExtractors.DatePeriod: + return new BaseCJKDatePeriodExtractor(new KoreanDatePeriodExtractorConfiguration(defaultConfig)); + case DateTimeExtractors.TimePeriod: + return new BaseCJKTimePeriodExtractor(new KoreanTimePeriodExtractorConfiguration(defaultConfig)); + case DateTimeExtractors.DateTime: + return new BaseCJKDateTimeExtractor(new KoreanDateTimeExtractorConfiguration(defaultConfig)); + case DateTimeExtractors.DateTimePeriod: + return new BaseCJKDateTimePeriodExtractor(new KoreanDateTimePeriodExtractorConfiguration(defaultConfig)); + case DateTimeExtractors.Duration: + return new BaseCJKDurationExtractor(new KoreanDurationExtractorConfiguration(defaultConfig)); + case DateTimeExtractors.Holiday: + return new BaseCJKHolidayExtractor(new KoreanHolidayExtractorConfiguration(defaultConfig)); + case DateTimeExtractors.Set: + return new BaseCJKSetExtractor(new KoreanSetExtractorConfiguration(defaultConfig)); + case DateTimeExtractors.Merged: + return new BaseCJKMergedDateTimeExtractor(new KoreanMergedExtractorConfiguration(defaultConfig)); + case DateTimeExtractors.MergedSkipFromTo: + return new BaseCJKMergedDateTimeExtractor(new KoreanMergedExtractorConfiguration(skipConfig)); + } + + throw new Exception($"Extractor '{extractorName}' for Korean not supported"); + } + + public static IDateTimeParser GetKoreanParser(DateTimeParsers parserName) + { + var config = new KoreanCommonDateTimeParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Korean, DateTimeOptions.None)); + + switch (parserName) + { + case DateTimeParsers.Date: + return new BaseCJKDateParser(new KoreanDateParserConfiguration(config)); + case DateTimeParsers.Time: + return new BaseCJKTimeParser(new KoreanTimeParserConfiguration(config)); + case DateTimeParsers.DatePeriod: + return new BaseCJKDatePeriodParser(new KoreanDatePeriodParserConfiguration(config)); + case DateTimeParsers.TimePeriod: + return new BaseCJKTimePeriodParser(new KoreanTimePeriodParserConfiguration(config)); + case DateTimeParsers.DateTime: + return new BaseCJKDateTimeParser(new KoreanDateTimeParserConfiguration(config)); + case DateTimeParsers.DateTimePeriod: + return new BaseCJKDateTimePeriodParser(new KoreanDateTimePeriodParserConfiguration(config)); + case DateTimeParsers.Duration: + return new BaseCJKDurationParser(new KoreanDurationParserConfiguration(config)); + case DateTimeParsers.Holiday: + return new BaseCJKHolidayParser(new KoreanHolidayParserConfiguration(config)); + case DateTimeParsers.Set: + return new BaseCJKSetParser(new KoreanSetParserConfiguration(config)); + case DateTimeParsers.Merged: + return new BaseCJKMergedDateTimeParser(new KoreanMergedParserConfiguration(config)); + } + + throw new Exception($"Parser '{parserName}' for Korean not supported"); + } + + public static IDateTimeExtractor GetSwedishExtractor(DateTimeExtractors extractorName) + { + var config = new BaseDateTimeOptionsConfiguration(Culture.Swedish); + var previewConfig = new BaseDateTimeOptionsConfiguration(Culture.Swedish, DateTimeOptions.EnablePreview); + var skipConfig = new BaseDateTimeOptionsConfiguration(Culture.Swedish, DateTimeOptions.SkipFromToMerge); + + switch (extractorName) + { + case DateTimeExtractors.Date: + return new BaseDateExtractor(new SwedishDateExtractorConfiguration(config)); + case DateTimeExtractors.Time: + return new BaseTimeExtractor(new SwedishTimeExtractorConfiguration(config)); + case DateTimeExtractors.DatePeriod: + return new BaseDatePeriodExtractor(new SwedishDatePeriodExtractorConfiguration(config)); + case DateTimeExtractors.TimePeriod: + return new BaseTimePeriodExtractor(new SwedishTimePeriodExtractorConfiguration(config)); + case DateTimeExtractors.DateTime: + return new BaseDateTimeExtractor(new SwedishDateTimeExtractorConfiguration(config)); + case DateTimeExtractors.DateTimePeriod: + return new BaseDateTimePeriodExtractor(new SwedishDateTimePeriodExtractorConfiguration(config)); + case DateTimeExtractors.Duration: + return new BaseDurationExtractor(new SwedishDurationExtractorConfiguration(config)); + case DateTimeExtractors.Holiday: + return new BaseHolidayExtractor(new SwedishHolidayExtractorConfiguration(config)); + case DateTimeExtractors.TimeZone: + return new BaseTimeZoneExtractor(new SwedishTimeZoneExtractorConfiguration(previewConfig)); + case DateTimeExtractors.Set: + return new BaseSetExtractor(new SwedishSetExtractorConfiguration(config)); + case DateTimeExtractors.Merged: + return new BaseMergedDateTimeExtractor(new SwedishMergedExtractorConfiguration(config)); + case DateTimeExtractors.MergedSkipFromTo: + return new BaseMergedDateTimeExtractor(new SwedishMergedExtractorConfiguration(skipConfig)); + } + + throw new Exception($"Extractor '{extractorName}' for Swedish not supported"); + } + + public static IDateTimeParser GetSwedishParser(DateTimeParsers parserName) + { + var commonConfiguration = new SwedishCommonDateTimeParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Swedish)); + + switch (parserName) + { + case DateTimeParsers.Date: + return new BaseDateParser(new SwedishDateParserConfiguration(commonConfiguration)); + case DateTimeParsers.Time: + return new DateTime.English.TimeParser(new SwedishTimeParserConfiguration(commonConfiguration)); + case DateTimeParsers.DatePeriod: + return new BaseDatePeriodParser(new SwedishDatePeriodParserConfiguration(commonConfiguration)); + case DateTimeParsers.TimePeriod: + return new BaseTimePeriodParser(new SwedishTimePeriodParserConfiguration(commonConfiguration)); + case DateTimeParsers.DateTime: + return new BaseDateTimeParser(new SwedishDateTimeParserConfiguration(commonConfiguration)); + case DateTimeParsers.DateTimePeriod: + return new BaseDateTimePeriodParser(new SwedishDateTimePeriodParserConfiguration(commonConfiguration)); + case DateTimeParsers.Duration: + return new BaseDurationParser(new SwedishDurationParserConfiguration(commonConfiguration)); + case DateTimeParsers.Holiday: + return new BaseHolidayParser(new SwedishHolidayParserConfiguration(commonConfiguration)); + case DateTimeParsers.TimeZone: + return new BaseTimeZoneParser(new SwedishTimeZoneParserConfiguration(commonConfiguration)); + case DateTimeParsers.Set: + return new BaseSetParser(new SwedishSetParserConfiguration(commonConfiguration)); + case DateTimeParsers.Merged: + return new BaseMergedDateTimeParser(new SwedishMergedParserConfiguration(commonConfiguration)); + } + + throw new Exception($"Parser '{parserName}' for Swedish not supported"); + } } [SuppressMessage("StyleCop.CSharp.NamingRules", "SA1402: CSharp.Naming : File may only contain a single type", Justification = "TODO")] diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestModel.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestModel.cs index 54a416e924..7f9e49fb43 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestModel.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestModel.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using Newtonsoft.Json; @@ -6,10 +9,11 @@ namespace Microsoft.Recognizers.Text.DataDrivenTests { [Flags] + [JsonConverter(typeof(PlatformEnumConverter))] public enum Platform { /// - /// dotNet flag + /// .NET flag /// DotNet = 1, @@ -36,17 +40,22 @@ public TestModel() Context = new Dictionary(); Results = Enumerable.Empty(); Debug = false; + IgnoreResolution = false; } public string TestType { get; set; } public string Input { get; set; } + public string Comment { get; set; } + public IDictionary Context { get; set; } public bool Debug { get; set; } - public Platform NotSupported { get; set; } + public bool IgnoreResolution { get; set; } + + public Platform NotSupported { get; set; } = 0; public Platform NotSupportedByDesign { get; set; } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestResources.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestResources.cs index 2800800d3a..2d9e68ea20 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestResources.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestResources.cs @@ -1,5 +1,6 @@ -using System; -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + using System.Collections.Generic; using System.IO; using System.Linq; diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests.csproj b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests.csproj index 2425fedb66..0fafbdb71a 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests.csproj +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests.csproj @@ -1,7 +1,8 @@ - netcoreapp2.1 + net6.0 + 9 false © Microsoft Corporation. All rights reserved. @@ -16,7 +17,7 @@ --> $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + true @@ -28,6 +29,7 @@ 4 ../Recognizers-Text.ruleset + pdbonly true @@ -39,10 +41,10 @@ - - - - + + + + all runtime; build; native; contentfiles; analyzers @@ -51,4 +53,5 @@ + diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimex.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimex.cs index 63e30ba63c..0aacb5a9f6 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimex.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimex.cs @@ -127,6 +127,25 @@ public void DataTypes_Timex_FromTime() Assert.AreEqual("T23:59:30", TimexProperty.FromTime(new Time(23, 59, 30)).TimexValue); } + [TestMethod] + public void DataTypes_Timex_FromDateTime_ToString() + { + var timex = new TimexProperty("2022-03-11"); + Assert.AreEqual("11th March 2022", timex.ToString()); + timex = new TimexProperty("2022-03-12"); + Assert.AreEqual("12th March 2022", timex.ToString()); + timex = new TimexProperty("2022-03-13"); + Assert.AreEqual("13th March 2022", timex.ToString()); + } + + [TestMethod] + public void DataTypes_Timex_FromDateTimeRange_ToString() + { + // TODO: This test documents a workaround to avoid exceptions when calling TimexProperty.ToString(). Proper fix for date range representation is needed. + var timex = new TimexProperty("(2022-03-15T16,2022-03-15T18,PT2H)"); + Assert.AreEqual("15th March 2022 4PM", timex.ToString()); + } + private static void Roundtrip(string timex) { Assert.AreEqual(timex, new TimexProperty(timex).TimexValue); diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexDateHelpers.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexDateHelpers.cs index a586f01867..f7570a97ce 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexDateHelpers.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexDateHelpers.cs @@ -56,12 +56,12 @@ public void DataTypes_DateHelpers_isLastWeek() [TestMethod] public void DataTypes_DateHelpers_weekOfyear() { - Assert.AreEqual(1, TimexDateHelpers.WeekOfYear(new System.DateTime(2017, 1, 1))); - Assert.AreEqual(2, TimexDateHelpers.WeekOfYear(new System.DateTime(2017, 1, 2))); - Assert.AreEqual(9, TimexDateHelpers.WeekOfYear(new System.DateTime(2017, 2, 23))); - Assert.AreEqual(12, TimexDateHelpers.WeekOfYear(new System.DateTime(2017, 3, 15))); - Assert.AreEqual(40, TimexDateHelpers.WeekOfYear(new System.DateTime(2017, 9, 25))); - Assert.AreEqual(53, TimexDateHelpers.WeekOfYear(new System.DateTime(2017, 12, 31))); + Assert.AreEqual(52, TimexDateHelpers.WeekOfYear(new System.DateTime(2017, 1, 1))); + Assert.AreEqual(1, TimexDateHelpers.WeekOfYear(new System.DateTime(2017, 1, 2))); + Assert.AreEqual(8, TimexDateHelpers.WeekOfYear(new System.DateTime(2017, 2, 23))); + Assert.AreEqual(11, TimexDateHelpers.WeekOfYear(new System.DateTime(2017, 3, 15))); + Assert.AreEqual(39, TimexDateHelpers.WeekOfYear(new System.DateTime(2017, 9, 25))); + Assert.AreEqual(52, TimexDateHelpers.WeekOfYear(new System.DateTime(2017, 12, 31))); Assert.AreEqual(1, TimexDateHelpers.WeekOfYear(new System.DateTime(2018, 1, 1))); Assert.AreEqual(1, TimexDateHelpers.WeekOfYear(new System.DateTime(2018, 1, 2))); Assert.AreEqual(1, TimexDateHelpers.WeekOfYear(new System.DateTime(2018, 1, 7))); diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexHelpers.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexHelpers.cs index 7c81fc8701..35d0f1c694 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexHelpers.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexHelpers.cs @@ -62,6 +62,15 @@ public void DataTypes_Helpers_ExpandTimeRange() Assert.AreEqual("T16", range.End.TimexValue); } + [TestMethod] + public void DataTypes_Helpers_DateRangeFromTimex_Week23() + { + var timex = new TimexProperty("2020-W23"); + var range = TimexHelpers.DateRangeFromTimex(timex); + Assert.AreEqual(new System.DateTime(2020, 6, 1), range.Start); + Assert.AreEqual(new System.DateTime(2020, 6, 8), range.End); + } + [TestMethod] public void DataTypes_Helpers_DateRangeFromTimex() { diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexParsing.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexParsing.cs index 9a15ad9de9..f4a10fe70a 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexParsing.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexParsing.cs @@ -881,15 +881,15 @@ public void DataTypes_Parsing_Last5Minutes() CollectionAssert.AreEquivalent( new[] { - Constants.TimexTypes.Date, - Constants.TimexTypes.TimeRange, - Constants.TimexTypes.DateTimeRange, - Constants.TimexTypes.Time, - Constants.TimexTypes.DateTime, - Constants.TimexTypes.Duration, - Constants.TimexTypes.DateRange, - Constants.TimexTypes.Definite, - }, timex.Types.ToList()); + Constants.TimexTypes.Date, + Constants.TimexTypes.TimeRange, + Constants.TimexTypes.DateTimeRange, + Constants.TimexTypes.Time, + Constants.TimexTypes.DateTime, + Constants.TimexTypes.Duration, + Constants.TimexTypes.DateRange, + Constants.TimexTypes.Definite, + }, timex.Types.ToList()); Assert.AreEqual(2017, timex.Year); Assert.AreEqual(9, timex.Month); @@ -920,14 +920,14 @@ public void DataTypes_Parsing_Wed4PMToSat3PM() CollectionAssert.AreEquivalent( new[] { - Constants.TimexTypes.Date, - Constants.TimexTypes.TimeRange, - Constants.TimexTypes.DateTimeRange, - Constants.TimexTypes.Time, - Constants.TimexTypes.DateTime, - Constants.TimexTypes.Duration, - Constants.TimexTypes.DateRange, - }, timex.Types.ToList()); + Constants.TimexTypes.Date, + Constants.TimexTypes.TimeRange, + Constants.TimexTypes.DateTimeRange, + Constants.TimexTypes.Time, + Constants.TimexTypes.DateTime, + Constants.TimexTypes.Duration, + Constants.TimexTypes.DateRange, + }, timex.Types.ToList()); Assert.IsNull(timex.Year); Assert.IsNull(timex.Month); diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexRangeResolve.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexRangeResolver.cs similarity index 94% rename from .NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexRangeResolve.cs rename to .NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexRangeResolver.cs index 48cc947cbb..1fabff1c1b 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexRangeResolve.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexRangeResolver.cs @@ -8,7 +8,7 @@ namespace Microsoft.Recognizers.Text.DataTypes.TimexExpression.Tests { [TestClass] - public class TestTimexRangeResolve + public class TestTimexRangeResolver { [TestMethod] public void DataTypes_RangeResolve_daterange_definite() @@ -592,5 +592,44 @@ public void DataTypes_RangeResolve_dateranges_Sunday_Evening() Assert.IsTrue(r.Contains("2018-06-17T16")); Assert.AreEqual(2, r.Count); } + + [TestMethod] + public void DataTypes_RangeResolve_time() + { + var resolutions = TimexRangeResolver.Evaluate( + new[] { "T09" }, + new[] { "(2020-01-01,2020-01-02,P1D)" }); + Assert.AreEqual(1, resolutions.Count); + } + + [TestMethod] + public void DataTypes_RangeResolve_time_with_daterange_constraint() + { + var candidates = new[] { "T09" }; + var constraints = new[] { "P3D" }; + var resolutions = TimexRangeResolver.Evaluate(candidates, constraints); + Assert.AreEqual(1, resolutions.Count); + } + + [TestMethod] + public void DataTypes_RangeResolve_time_with_datetimerange_constraint() + { + var resolutions = TimexRangeResolver.Evaluate( + new[] { "T09" }, + new[] { "(2020-01-01T00:00:00,2020-01-02T00:00:00,PT24H)" }); + + Assert.AreEqual(1, resolutions.Count); + } + + [TestMethod] + public void DataTypes_RangeResolve_datetime_with_daterange_constraint() + { + var resolutions = TimexRangeResolver.Evaluate( + new[] { "2020-01-01T09", "2020-01-02T09" }, + new[] { "(2020-01-01,2020-01-02,P1D)" }); + Assert.AreEqual(1, resolutions.Count); + Assert.AreEqual(1, resolutions.First().Month); + } + } } diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexRelativeConvert.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexRelativeConvert.cs index 348ffc082a..929d314457 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexRelativeConvert.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexRelativeConvert.cs @@ -147,7 +147,7 @@ public void DataTypes_RelativeConvert_DateTime_yesterday() [TestMethod] public void DataTypes_RelativeConvert_DateRange_this_week() { - var timex = new TimexProperty("2017-W40"); + var timex = new TimexProperty("2017-W39"); var today = new System.DateTime(2017, 9, 25); Assert.AreEqual("this week", TimexRelativeConvert.ConvertTimexToStringRelative(timex, today)); } @@ -155,7 +155,7 @@ public void DataTypes_RelativeConvert_DateRange_this_week() [TestMethod] public void DataTypes_RelativeConvert_DateRange_next_week() { - var timex = new TimexProperty("2017-W41"); + var timex = new TimexProperty("2017-W40"); var today = new System.DateTime(2017, 9, 25); Assert.AreEqual("next week", TimexRelativeConvert.ConvertTimexToStringRelative(timex, today)); } @@ -163,7 +163,7 @@ public void DataTypes_RelativeConvert_DateRange_next_week() [TestMethod] public void DataTypes_RelativeConvert_DateRange_last_week() { - var timex = new TimexProperty("2017-W39"); + var timex = new TimexProperty("2017-W38"); var today = new System.DateTime(2017, 9, 25); Assert.AreEqual("last week", TimexRelativeConvert.ConvertTimexToStringRelative(timex, today)); } @@ -171,7 +171,7 @@ public void DataTypes_RelativeConvert_DateRange_last_week() [TestMethod] public void DataTypes_RelativeConvert_DateRange_this_week_2() { - var timex = new TimexProperty("2017-W41"); + var timex = new TimexProperty("2017-W40"); var today = new System.DateTime(2017, 10, 4); Assert.AreEqual("this week", TimexRelativeConvert.ConvertTimexToStringRelative(timex, today)); } @@ -179,7 +179,7 @@ public void DataTypes_RelativeConvert_DateRange_this_week_2() [TestMethod] public void DataTypes_RelativeConvert_DateRange_next_week_2() { - var timex = new TimexProperty("2017-W42"); + var timex = new TimexProperty("2017-W41"); var today = new System.DateTime(2017, 10, 4); Assert.AreEqual("next week", TimexRelativeConvert.ConvertTimexToStringRelative(timex, today)); } @@ -187,7 +187,7 @@ public void DataTypes_RelativeConvert_DateRange_next_week_2() [TestMethod] public void DataTypes_RelativeConvert_DateRange_last_week_2() { - var timex = new TimexProperty("2017-W40"); + var timex = new TimexProperty("2017-W39"); var today = new System.DateTime(2017, 10, 4); Assert.AreEqual("last week", TimexRelativeConvert.ConvertTimexToStringRelative(timex, today)); } @@ -195,7 +195,7 @@ public void DataTypes_RelativeConvert_DateRange_last_week_2() [TestMethod] public void DataTypes_RelativeConvert_Weekend_this_weekend() { - var timex = new TimexProperty("2017-W40-WE"); + var timex = new TimexProperty("2017-W39-WE"); var today = new System.DateTime(2017, 9, 25); Assert.AreEqual("this weekend", TimexRelativeConvert.ConvertTimexToStringRelative(timex, today)); } @@ -203,7 +203,7 @@ public void DataTypes_RelativeConvert_Weekend_this_weekend() [TestMethod] public void DataTypes_RelativeConvert_Weekend_next_weekend() { - var timex = new TimexProperty("2017-W41-WE"); + var timex = new TimexProperty("2017-W40-WE"); var today = new System.DateTime(2017, 9, 25); Assert.AreEqual("next weekend", TimexRelativeConvert.ConvertTimexToStringRelative(timex, today)); } @@ -211,7 +211,7 @@ public void DataTypes_RelativeConvert_Weekend_next_weekend() [TestMethod] public void DataTypes_RelativeConvert_Weekend_last_weekend() { - var timex = new TimexProperty("2017-W39-WE"); + var timex = new TimexProperty("2017-W38-WE"); var today = new System.DateTime(2017, 9, 25); Assert.AreEqual("last weekend", TimexRelativeConvert.ConvertTimexToStringRelative(timex, today)); } diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexResolver.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexResolver.cs index 562eb85186..ecb8bb81c8 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexResolver.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/TestTimexResolver.cs @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +using System; +using System.Globalization; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace Microsoft.Recognizers.Text.DataTypes.TimexExpression.Tests @@ -62,6 +64,178 @@ public void DataTypes_Resolver_Date_Sunday() Assert.IsNull(resolution.Values[1].End); } + [TestMethod] + public void DataTypes_Resolver_Date_6th() + { + var today = new System.DateTime(2019, 4, 23, 15, 30, 0); + var resolution = TimexResolver.Resolve(new[] { "XXXX-XX-06" }, today); + Assert.AreEqual(2, resolution.Values.Count); + + Assert.AreEqual("XXXX-XX-06", resolution.Values[0].Timex); + Assert.AreEqual("date", resolution.Values[0].Type); + Assert.AreEqual("2019-04-06", resolution.Values[0].Value); + Assert.IsNull(resolution.Values[0].Start); + Assert.IsNull(resolution.Values[0].End); + + Assert.AreEqual("XXXX-XX-06", resolution.Values[1].Timex); + Assert.AreEqual("date", resolution.Values[1].Type); + Assert.AreEqual("2019-05-06", resolution.Values[1].Value); + Assert.IsNull(resolution.Values[1].Start); + Assert.IsNull(resolution.Values[1].End); + } + + [TestMethod] + public void DataTypes_Resolver_Date_Feb_2nd() + { + var today = new System.DateTime(2020, 10, 20); + var resolution = TimexResolver.Resolve(new[] { "XXXX-02-02 " }, today); + Assert.AreEqual(2, resolution.Values.Count); + + Assert.AreEqual("XXXX-02-02", resolution.Values[0].Timex); + Assert.AreEqual("date", resolution.Values[0].Type); + Assert.AreEqual("2020-02-02", resolution.Values[0].Value); + Assert.IsNull(resolution.Values[0].Start); + Assert.IsNull(resolution.Values[0].End); + + Assert.AreEqual("XXXX-02-02", resolution.Values[1].Timex); + Assert.AreEqual("date", resolution.Values[1].Type); + Assert.AreEqual("2021-02-02", resolution.Values[1].Value); + Assert.IsNull(resolution.Values[1].Start); + Assert.IsNull(resolution.Values[1].End); + } + + [TestMethod] + public void DataTypes_Resolver_DateTimeRange_Oct_25th_Afternoon() + { + var today = new System.DateTime(2020, 10, 20); + var resolution = TimexResolver.Resolve(new[] { "XXXX-10-25TAF" }, today); + Assert.AreEqual(2, resolution.Values.Count); + + Assert.AreEqual("XXXX-10-25TAF", resolution.Values[0].Timex); + Assert.AreEqual("datetimerange", resolution.Values[0].Type); + Assert.AreEqual("2019-10-25 12:00:00", resolution.Values[0].Start); + Assert.AreEqual("2019-10-25 16:00:00", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + + Assert.AreEqual("XXXX-10-25TAF", resolution.Values[1].Timex); + Assert.AreEqual("datetimerange", resolution.Values[1].Type); + Assert.AreEqual("2020-10-25 12:00:00", resolution.Values[1].Start); + Assert.AreEqual("2020-10-25 16:00:00", resolution.Values[1].End); + Assert.IsNull(resolution.Values[1].Value); + } + + [TestMethod] + public void DataTypes_Resolver_DateTimeRange_Week11_Monday() + { + var today = new System.DateTime(2020, 10, 20); + var resolution = TimexResolver.Resolve(new[] { "XXXX-W11-1" }, today); + Assert.AreEqual(2, resolution.Values.Count); + + Assert.AreEqual("XXXX-W11-1", resolution.Values[0].Timex); + Assert.AreEqual("date", resolution.Values[0].Type); + Assert.IsNull(resolution.Values[0].Start); + Assert.IsNull(resolution.Values[0].End); + Assert.AreEqual("2020-03-09", resolution.Values[0].Value); + + Assert.AreEqual("XXXX-W11-1", resolution.Values[1].Timex); + Assert.AreEqual("date", resolution.Values[1].Type); + Assert.IsNull(resolution.Values[1].Start); + Assert.IsNull(resolution.Values[1].End); + Assert.AreEqual("2021-03-15", resolution.Values[1].Value); + } + + [TestMethod] + public void DataTypes_Resolver_DateTimeRange_Thanksgiving() + { + // XXXX-11-WXX-4-4 -> 4th Thursday (4th ISO weekday) in unspecified week in November in unspecified year + var today = new System.DateTime(2020, 10, 20); + var resolution = TimexResolver.Resolve(new[] { "XXXX-11-WXX-4-4" }, today); + Assert.AreEqual(2, resolution.Values.Count); + + Assert.AreEqual("XXXX-11-WXX-4-4", resolution.Values[0].Timex); + Assert.AreEqual("date", resolution.Values[0].Type); + Assert.IsNull(resolution.Values[0].Start); + Assert.IsNull(resolution.Values[0].End); + Assert.AreEqual("2019-11-28", resolution.Values[0].Value); + + Assert.AreEqual("XXXX-11-WXX-4-4", resolution.Values[1].Timex); + Assert.AreEqual("date", resolution.Values[1].Type); + Assert.IsNull(resolution.Values[1].Start); + Assert.IsNull(resolution.Values[1].End); + Assert.AreEqual("2020-11-26", resolution.Values[1].Value); + } + + [TestMethod] + public void DataTypes_Resolver_DateTimeRange_Monday_Morning() + { + var today = new System.DateTime(2021, 1, 22, 15, 30, 0); + var resolution = TimexResolver.Resolve(new[] { "XXXX-WXX-1TMO" }, today); + Assert.AreEqual(2, resolution.Values.Count); + + Assert.AreEqual("XXXX-WXX-1TMO", resolution.Values[0].Timex); + Assert.AreEqual("datetimerange", resolution.Values[0].Type); + Assert.AreEqual("2021-01-18 08:00:00", resolution.Values[0].Start); + Assert.AreEqual("2021-01-18 12:00:00", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + + Assert.AreEqual("XXXX-WXX-1TMO", resolution.Values[1].Timex); + Assert.AreEqual("datetimerange", resolution.Values[1].Type); + Assert.AreEqual("2021-01-25 08:00:00", resolution.Values[1].Start); + Assert.AreEqual("2021-01-25 12:00:00", resolution.Values[1].End); + Assert.IsNull(resolution.Values[1].Value); + } + + [TestMethod] + public void DataTypes_Resolver_DateTimeRange_April_5th_from_10am_to_11am() + { + var today = new System.DateTime(2021, 1, 22, 15, 30, 0); + var resolution = TimexResolver.Resolve(new[] { "(XXXX-04-05T10,XXXX-04-05T11,PT1H)" }, today); + Assert.AreEqual(2, resolution.Values.Count); + + Assert.AreEqual("(XXXX-04-05T10,XXXX-04-05T11,PT1H)", resolution.Values[0].Timex); + Assert.AreEqual("datetimerange", resolution.Values[0].Type); + Assert.AreEqual("2020-04-05 10:00:00", resolution.Values[0].Start); + Assert.AreEqual("2020-04-05 11:00:00", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + + Assert.AreEqual("(XXXX-04-05T10,XXXX-04-05T11,PT1H)", resolution.Values[1].Timex); + Assert.AreEqual("datetimerange", resolution.Values[1].Type); + Assert.AreEqual("2021-04-05 10:00:00", resolution.Values[1].Start); + Assert.AreEqual("2021-04-05 11:00:00", resolution.Values[1].End); + Assert.IsNull(resolution.Values[1].Value); + } + + [TestMethod] + public void DataTypes_Resolver_DateRange_first_week_of_April_2019() + { + var today = new System.DateTime(2021, 1, 22, 15, 30, 0); + var resolution = TimexResolver.Resolve(new[] { "2019-04-W01" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("2019-04-W01", resolution.Values[0].Timex); + Assert.AreEqual("daterange", resolution.Values[0].Type); + Assert.AreEqual("2019-04-01", resolution.Values[0].Start); + Assert.AreEqual("2019-04-08", resolution.Values[0].End); + } + + [TestMethod] + public void DataTypes_Resolver_DateRange_first_week_of_April() + { + var today = new System.DateTime(2021, 1, 22); + var resolution = TimexResolver.Resolve(new[] { "XXXX-04-W01" }, today); + Assert.AreEqual(2, resolution.Values.Count); + + Assert.AreEqual("XXXX-04-W01", resolution.Values[0].Timex); + Assert.AreEqual("daterange", resolution.Values[0].Type); + Assert.AreEqual("2020-03-30", resolution.Values[0].Start); + Assert.AreEqual("2020-04-06", resolution.Values[0].End); + + Assert.AreEqual("XXXX-04-W01", resolution.Values[1].Timex); + Assert.AreEqual("daterange", resolution.Values[1].Type); + Assert.AreEqual("2021-03-29", resolution.Values[1].Start); + Assert.AreEqual("2021-04-05", resolution.Values[1].End); + } + [TestMethod] public void DataTypes_Resolver_DateTime_Wednesday_4() { @@ -219,6 +393,19 @@ public void DataTypes_Resolver_Duration_10seconds() Assert.IsNull(resolution.Values[0].End); } + [TestMethod] + public void DataTypes_Resolver_Duration_1hour30minutes() + { + var resolution = TimexResolver.Resolve(new[] { "PT1H30M" }); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("PT1H30M", resolution.Values[0].Timex); + Assert.AreEqual("duration", resolution.Values[0].Type); + Assert.AreEqual("5400", resolution.Values[0].Value); + Assert.IsNull(resolution.Values[0].Start); + Assert.IsNull(resolution.Values[0].End); + } + [TestMethod] public void DataTypes_Resolver_DateRange_September() { @@ -252,6 +439,28 @@ public void DataTypes_Resolver_DateRange_Winter() Assert.IsNull(resolution.Values[0].End); } + [TestMethod] + public void DataTypes_Resolver_DateRange_First_Week() + { + var today = new System.DateTime(2021, 1, 1); + + var resolution = TimexResolver.Resolve(new[] { "2021-W01" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("2021-W01", resolution.Values[0].Timex); + Assert.AreEqual("daterange", resolution.Values[0].Type); + Assert.AreEqual("2021-01-04", resolution.Values[0].Start); + Assert.AreEqual("2021-01-11", resolution.Values[0].End); + + resolution = TimexResolver.Resolve(new[] { "2020-W01" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("2020-W01", resolution.Values[0].Timex); + Assert.AreEqual("daterange", resolution.Values[0].Type); + Assert.AreEqual("2019-12-30", resolution.Values[0].Start); + Assert.AreEqual("2020-01-06", resolution.Values[0].End); + } + [TestMethod] public void DataTypes_Resolver_DateRange_Last_Week() { @@ -304,6 +513,108 @@ public void DataTypes_Resolver_DateRange_Last_Three_Weeks() Assert.AreEqual("2019-05-01", resolution.Values[0].End); } + [TestMethod] + public void DataTypes_Resolver_DateRange_Decimal_Period_PT() + { + var sourceLanguage = CultureInfo.CurrentCulture; + var testLanguage = new CultureInfo("pt-PT", false); + CultureInfo.CurrentCulture = testLanguage; + var today = new System.DateTime(2019, 4, 30); + var resolution = TimexResolver.Resolve(new[] { "(2019-04-05,XXXX-04-11,P5.54701493625231D)" }, today); + Assert.AreEqual(1, resolution.Values.Count); + Assert.AreEqual("(2019-04-05,2019-04-10,P5.54701493625231D)", resolution.Values[0].Timex); + Assert.AreEqual("daterange", resolution.Values[0].Type); + Assert.AreEqual("2019-04-05", resolution.Values[0].Start); + Assert.AreEqual("2019-04-10", resolution.Values[0].End); + CultureInfo.CurrentCulture = sourceLanguage; + } + + [TestMethod] + public void DataTypes_Resolver_DateRange_Demaical_Period_EN() + { + var sourceLanguage = CultureInfo.CurrentCulture; + var testLanguage = new CultureInfo("en-US", false); + CultureInfo.CurrentCulture = testLanguage; + var today = new System.DateTime(2019, 4, 30); + var resolution = TimexResolver.Resolve(new[] { "(2019-04-05,XXXX-04-11,P5.54701493625231D)" }, today); + Assert.AreEqual(1, resolution.Values.Count); + Assert.AreEqual("(2019-04-05,2019-04-10,P5.54701493625231D)", resolution.Values[0].Timex); + Assert.AreEqual("daterange", resolution.Values[0].Type); + Assert.AreEqual("2019-04-05", resolution.Values[0].Start); + Assert.AreEqual("2019-04-10", resolution.Values[0].End); + CultureInfo.CurrentCulture = sourceLanguage; + } + + [TestMethod] + public void DataTypes_Resolver_TimeRange_11_30_to_12_00() + { + var today = System.DateTime.Now; + var resolution = TimexResolver.Resolve(new[] { "(T11:30,T12:00,PT30M)" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("(T11:30,T12,PT30M)", resolution.Values[0].Timex); + Assert.AreEqual("timerange", resolution.Values[0].Type); + Assert.AreEqual("11:30:00", resolution.Values[0].Start); + Assert.AreEqual("12:00:00", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + } + + [TestMethod] + public void DataTypes_Resolver_TimeRange_11_30_to_12() + { + var today = System.DateTime.Now; + var resolution = TimexResolver.Resolve(new[] { "(T11:30,T12,PT30M)" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("(T11:30,T12,PT30M)", resolution.Values[0].Timex); + Assert.AreEqual("timerange", resolution.Values[0].Type); + Assert.AreEqual("11:30:00", resolution.Values[0].Start); + Assert.AreEqual("12:00:00", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + } + + [TestMethod] + public void DataTypes_Resolver_TimeRange_11_to_11_30() + { + var today = System.DateTime.Now; + var resolution = TimexResolver.Resolve(new[] { "(T11:00,T11:30,PT30M)" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("(T11,T11:30,PT30M)", resolution.Values[0].Timex); + Assert.AreEqual("timerange", resolution.Values[0].Type); + Assert.AreEqual("11:00:00", resolution.Values[0].Start); + Assert.AreEqual("11:30:00", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + } + + [TestMethod] + public void DataTypes_Resolver_TimeRange_23_45_to_00_30() + { + var today = System.DateTime.Now; + var resolution = TimexResolver.Resolve(new[] { "(T23:45,T00:30,PT45M)" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("(T23:45,T00:30,PT45M)", resolution.Values[0].Timex); + Assert.AreEqual("timerange", resolution.Values[0].Type); + Assert.AreEqual("23:45:00", resolution.Values[0].Start); + Assert.AreEqual("00:30:00", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + } + + [TestMethod] + public void DataTypes_Resolver_DateTimeRange_20190401_09_30_to_20190401_11() + { + var today = System.DateTime.Now; + var resolution = TimexResolver.Resolve(new[] { "(2019-04-01T09:30,2019-04-01T11,PT1H30M)" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("(2019-04-01T09:30,2019-04-01T11,PT1H30M)", resolution.Values[0].Timex); + Assert.AreEqual("datetimerange", resolution.Values[0].Type); + Assert.AreEqual("2019-04-01 09:30:00", resolution.Values[0].Start); + Assert.AreEqual("2019-04-01 11:00:00", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + } + [TestMethod] public void DataTypes_Resolver_TimeRange_4am_to_8pm() { @@ -318,6 +629,34 @@ public void DataTypes_Resolver_TimeRange_4am_to_8pm() Assert.IsNull(resolution.Values[0].Value); } + [TestMethod] + public void DataTypes_Resolver_TimeRange_23_45_to_01_20() + { + var today = System.DateTime.Now; + var resolution = TimexResolver.Resolve(new[] { "(T23:45,T01:20,PT1H35M)" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("(T23:45,T01:20,PT1H35M)", resolution.Values[0].Timex); + Assert.AreEqual("timerange", resolution.Values[0].Type); + Assert.AreEqual("23:45:00", resolution.Values[0].Start); + Assert.AreEqual("01:20:00", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + } + + [TestMethod] + public void DataTypes_Resolver_TimeRange_15_15_to_16_20() + { + var today = System.DateTime.Now; + var resolution = TimexResolver.Resolve(new[] { "(T15:15,T16:20,PT1H5M)" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("(T15:15,T16:20,PT1H5M)", resolution.Values[0].Timex); + Assert.AreEqual("timerange", resolution.Values[0].Type); + Assert.AreEqual("15:15:00", resolution.Values[0].Start); + Assert.AreEqual("16:20:00", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + } + [TestMethod] public void DataTypes_Resolver_TimeRange_Morning() { @@ -400,6 +739,48 @@ public void DataTypes_Resolver_DateTimeRange_next_monday_4am_to_next_thursday_3p Assert.IsNull(resolution.Values[0].Value); } + [TestMethod] + public void DataTypes_Resolver_DateTimeRange_20200604_15_00_to_20200604_17_30() + { + var today = System.DateTime.Now; + var resolution = TimexResolver.Resolve(new[] { "(2020-06-04T15,2020-06-04T17:30,PT2H30M)" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("(2020-06-04T15,2020-06-04T17:30,PT2H30M)", resolution.Values[0].Timex); + Assert.AreEqual("datetimerange", resolution.Values[0].Type); + Assert.AreEqual("2020-06-04 15:00:00", resolution.Values[0].Start); + Assert.AreEqual("2020-06-04 17:30:00", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + } + + [TestMethod] + public void DataTypes_Resolver_DateTimeRange_20190325_10_to_20190325_11() + { + var today = System.DateTime.Now; + var resolution = TimexResolver.Resolve(new[] { "(2019-03-25T10,2019-03-25T11,PT1H)" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("(2019-03-25T10,2019-03-25T11,PT1H)", resolution.Values[0].Timex); + Assert.AreEqual("datetimerange", resolution.Values[0].Type); + Assert.AreEqual("2019-03-25 10:00:00", resolution.Values[0].Start); + Assert.AreEqual("2019-03-25 11:00:00", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + } + + [TestMethod] + public void DataTypes_Resolver_DateRange_20190427_20190511_2weeks() + { + var today = System.DateTime.Now; + var resolution = TimexResolver.Resolve(new[] { "(2019-04-27,2019-05-11,P2W)" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("(2019-04-27,2019-05-11,P2W)", resolution.Values[0].Timex); + Assert.AreEqual("daterange", resolution.Values[0].Type); + Assert.AreEqual("2019-04-27", resolution.Values[0].Start); + Assert.AreEqual("2019-05-11", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + } + [TestMethod] public void DataTypes_Resolver_Time_4am() { @@ -431,5 +812,185 @@ public void DataTypes_Resolver_Time_4_oclock() Assert.IsNull(resolution.Values[1].Start); Assert.IsNull(resolution.Values[1].End); } + + [TestMethod] + public void DataTypes_Resolver_Date_SecondWeekInAugust() + { + var today = new System.DateTime(2019, 11, 06); + var resolution = TimexResolver.Resolve(new[] { "XXXX-08-W02" }, today); + Assert.AreEqual(2, resolution.Values.Count); + + Assert.AreEqual("XXXX-08-W02", resolution.Values[0].Timex); + Assert.AreEqual("daterange", resolution.Values[0].Type); + Assert.AreEqual("2018-08-06", resolution.Values[0].Start); + Assert.AreEqual("2018-08-13", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + + Assert.AreEqual("XXXX-08-W02", resolution.Values[1].Timex); + Assert.AreEqual("daterange", resolution.Values[1].Type); + Assert.AreEqual("2019-08-05", resolution.Values[1].Start); + Assert.AreEqual("2019-08-12", resolution.Values[1].End); + Assert.IsNull(resolution.Values[1].Value); + } + + [TestMethod] + public void DataTypes_Resolver_DateTime_Nov_6_at_11_45_25() + { + var today = new System.DateTime(2017, 9, 28, 15, 30, 0); + var resolution = TimexResolver.Resolve(new[] { "2019-11-06T11:45:25" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("2019-11-06T11:45:25", resolution.Values[0].Timex); + Assert.AreEqual("datetime", resolution.Values[0].Type); + Assert.AreEqual("2019-11-06 11:45:25", resolution.Values[0].Value); + Assert.IsNull(resolution.Values[0].Start); + Assert.IsNull(resolution.Values[0].End); + } + + [TestMethod] + public void DataTypes_Resolver_DateTime_Nov_6_at_11_45_25_UTC() + { + var today = new System.DateTime(2017, 9, 28, 15, 30, 0); + var resolution = TimexResolver.Resolve(new[] { "2019-11-06T11:45:25Z" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("2019-11-06T11:45:25", resolution.Values[0].Timex); + Assert.AreEqual("datetime", resolution.Values[0].Type); + Assert.AreEqual("2019-11-06 11:45:25", resolution.Values[0].Value); + Assert.IsNull(resolution.Values[0].Start); + Assert.IsNull(resolution.Values[0].End); + } + + [TestMethod] + public void DataTypes_Resolver_DateTime_TuesAt12PM() + { + var today = new System.DateTime(2019, 12, 05); + var resolution = TimexResolver.Resolve(new[] { "XXXX-WXX-2T12" }, today); + Assert.AreEqual(2, resolution.Values.Count); + + Assert.AreEqual("XXXX-WXX-2T12", resolution.Values[0].Timex); + Assert.AreEqual("datetime", resolution.Values[0].Type); + Assert.AreEqual("2019-12-03 12:00:00", resolution.Values[0].Value); + Assert.IsNull(resolution.Values[0].Start); + Assert.IsNull(resolution.Values[0].End); + + Assert.AreEqual("XXXX-WXX-2T12", resolution.Values[1].Timex); + Assert.AreEqual("datetime", resolution.Values[1].Type); + Assert.AreEqual("2019-12-10 12:00:00", resolution.Values[1].Value); + Assert.IsNull(resolution.Values[1].Start); + Assert.IsNull(resolution.Values[1].End); + } + + [TestMethod] + public void DataTypes_Resolver_DateTime_TuesAt12PM_UtcInput() + { + var today = new System.DateTime(2019, 12, 05); + var resolution = TimexResolver.Resolve(new[] { "XXXX-WXX-2T12" }, today.ToUniversalTime()); + Assert.AreEqual(2, resolution.Values.Count); + + var previousWeekLocal = new System.DateTime(2019, 12, 03, 12, 0, 0, System.DateTimeKind.Local); + var previousWeekUtc = previousWeekLocal.ToUniversalTime(); + + Assert.AreEqual("XXXX-WXX-2T12", resolution.Values[0].Timex); + Assert.AreEqual("datetime", resolution.Values[0].Type); + Assert.AreEqual(previousWeekUtc.ToString("yyyy-MM-dd HH:mm:ss"), resolution.Values[0].Value); + Assert.IsNull(resolution.Values[0].Start); + Assert.IsNull(resolution.Values[0].End); + + var nextWeekLocal = new System.DateTime(2019, 12, 10, 12, 0, 0, System.DateTimeKind.Local); + var nextWeekUtc = nextWeekLocal.ToUniversalTime(); + + Assert.AreEqual("XXXX-WXX-2T12", resolution.Values[1].Timex); + Assert.AreEqual("datetime", resolution.Values[1].Type); + Assert.AreEqual(nextWeekUtc.ToString("yyyy-MM-dd HH:mm:ss"), resolution.Values[1].Value); + Assert.IsNull(resolution.Values[1].Start); + Assert.IsNull(resolution.Values[1].End); + } + + [TestMethod] + public void DataTypes_Resolver_DateTime_2021W01() // first day of the year is a Friday - week 1 + { + var today = new System.DateTime(2021, 01, 05); + var resolution = TimexResolver.Resolve(new[] { "2021-W01" }, today.ToUniversalTime()); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("2021-W01", resolution.Values[0].Timex); + Assert.AreEqual("daterange", resolution.Values[0].Type); + Assert.AreEqual("2021-01-04", resolution.Values[0].Start); + Assert.AreEqual("2021-01-11", resolution.Values[0].End); + } + + [TestMethod] + public void DataTypes_Resolver_DateTime_2021W02() // first day of the year is a Friday - week 2 + { + var today = new System.DateTime(2021, 01, 05); + var resolution = TimexResolver.Resolve(new[] { "2021-W02" }, today.ToUniversalTime()); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("2021-W02", resolution.Values[0].Timex); + Assert.AreEqual("daterange", resolution.Values[0].Type); + Assert.AreEqual("2021-01-11", resolution.Values[0].Start); + Assert.AreEqual("2021-01-18", resolution.Values[0].End); + } + + [TestMethod] + public void DataTypes_Resolver_DateTime_2020W53() // has a 53-week year + { + var today = new System.DateTime(2020, 12, 30); + var resolution = TimexResolver.Resolve(new[] { "2020-W53" }, today.ToUniversalTime()); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("2020-W53", resolution.Values[0].Timex); + Assert.AreEqual("daterange", resolution.Values[0].Type); + Assert.AreEqual("2020-12-28", resolution.Values[0].Start); + Assert.AreEqual("2021-01-04", resolution.Values[0].End); + } + + [TestMethod] + public void DataTypes_Resolver_DateTime_2024W01() // first day of the year is a Monday + { + var today = new System.DateTime(2024, 01, 01); + var resolution = TimexResolver.Resolve(new[] { "2024-W01" }, today.ToUniversalTime()); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("2024-W01", resolution.Values[0].Timex); + Assert.AreEqual("daterange", resolution.Values[0].Type); + Assert.AreEqual("2024-01-01", resolution.Values[0].Start); + Assert.AreEqual("2024-01-08", resolution.Values[0].End); + } + + [TestMethod] + public void DataTypes_Resolver_DateTime_Weekend() + { + var today = new System.DateTime(2020, 1, 7); + var resolution = TimexResolver.Resolve(new[] { "2020-W02-WE" }, today); + Assert.AreEqual(1, resolution.Values.Count); + + Assert.AreEqual("2020-W02-WE", resolution.Values[0].Timex); + Assert.AreEqual("daterange", resolution.Values[0].Type); + Assert.AreEqual("2020-01-11", resolution.Values[0].Start); + Assert.AreEqual("2020-01-13", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + } + + [TestMethod] + public void DataTypes_Resolver_MonthRange_December() + { + var today = new System.DateTime(2020, 3, 25); + var resolution = TimexResolver.Resolve(new[] { "XXXX-12" }, today); + Assert.AreEqual(2, resolution.Values.Count); + + Assert.AreEqual("XXXX-12", resolution.Values[0].Timex); + Assert.AreEqual("daterange", resolution.Values[0].Type); + Assert.AreEqual("2019-12-01", resolution.Values[0].Start); + Assert.AreEqual("2020-01-01", resolution.Values[0].End); + Assert.IsNull(resolution.Values[0].Value); + + Assert.AreEqual("XXXX-12", resolution.Values[1].Timex); + Assert.AreEqual("daterange", resolution.Values[1].Type); + Assert.AreEqual("2020-12-01", resolution.Values[1].Start); + Assert.AreEqual("2021-01-01", resolution.Values[1].End); + Assert.IsNull(resolution.Values[1].Value); + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Constants.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Constants.cs index 70e79d1b42..08cab55c26 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Constants.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Constants.cs @@ -5,6 +5,42 @@ namespace Microsoft.Recognizers.Text.DataTypes.TimexExpression { public static class Constants { + // Timex + public const string TimexYear = "Y"; + public const string TimexMonth = "M"; + public const string TimexMonthFull = "MON"; + public const string TimexWeek = "W"; + public const string TimexDay = "D"; + public const string TimexBusinessDay = "BD"; + public const string TimexWeekend = "WE"; + public const string TimexHour = "H"; + public const string TimexMinute = "M"; + public const string TimexSecond = "S"; + public const string TimexNight = "NI"; + public const char TimexFuzzy = 'X'; + public const string TimexFuzzyYear = "XXXX"; + public const string TimexFuzzyMonth = "XX"; + public const string TimexFuzzyWeek = "WXX"; + public const string TimexFuzzyDay = "XX"; + public const string DateTimexConnector = "-"; + public const string TimeTimexConnector = ":"; + public const string GeneralPeriodPrefix = "P"; + public const string TimeTimexPrefix = "T"; + + public const string YearUnit = "year"; + public const string MonthUnit = "month"; + public const string WeekUnit = "week"; + public const string DayUnit = "day"; + public const string HourUnit = "hour"; + public const string MinuteUnit = "minute"; + public const string SecondUnit = "second"; + public const string TimeDurationUnit = "s"; + + public const string AM = "AM"; + public const string PM = "PM"; + + public const int InvalidValue = -1; + public static class TimexTypes { public static readonly string Present = "present"; diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/DateRange.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/DateRange.cs index 20261f166f..f98d0f675e 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/DateRange.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/DateRange.cs @@ -1,5 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. + using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DataTypes.TimexExpression diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/English/TimexConstantsEnglish.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/English/TimexConstantsEnglish.cs index 860aac89c4..9ca671625f 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/English/TimexConstantsEnglish.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/English/TimexConstantsEnglish.cs @@ -7,6 +7,18 @@ namespace Microsoft.Recognizers.Text.DataTypes.TimexExpression { internal static class TimexConstantsEnglish { + public const string Every = "every"; + public const string Now = "now"; + public const string Midnight = "midnight"; + public const string Midday = "midday"; + public const string Today = "today"; + public const string Tomorrow = "tomorrow"; + public const string Yesterday = "yesterday"; + public const string Weekend = "weekend"; + public const string Tonight = "tonight"; + public const string This = "this"; + public const string Last = "last"; + public const string Next = "next"; public static readonly string[] Days = { "Monday", diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/English/TimexConvertEnglish.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/English/TimexConvertEnglish.cs index e94c726026..cf75fc76c4 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/English/TimexConvertEnglish.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/English/TimexConvertEnglish.cs @@ -2,6 +2,8 @@ // Licensed under the MIT License. using System; +using System.Collections.Generic; +using System.Globalization; namespace Microsoft.Recognizers.Text.DataTypes.TimexExpression { @@ -13,7 +15,7 @@ public static string ConvertTimexToString(TimexProperty timex) if (types.Contains(Constants.TimexTypes.Present)) { - return "now"; + return TimexConstantsEnglish.Now; } if (types.Contains(Constants.TimexTypes.DateTimeRange)) @@ -60,11 +62,11 @@ public static string ConvertTimexSetToString(TimexSet timexSet) var timex = timexSet.Timex; if (timex.Types.Contains(Constants.TimexTypes.Duration)) { - return $"every {ConvertTimexDurationToString(timex, false)}"; + return $"{TimexConstantsEnglish.Every} {ConvertTimexDurationToString(timex, false)}"; } else { - return $"every {ConvertTimexToString(timex)}"; + return $"{TimexConstantsEnglish.Every} {ConvertTimexToString(timex)}"; } } @@ -72,18 +74,18 @@ public static string ConvertTime(TimexProperty timex) { if (timex.Hour == 0 && timex.Minute == 0 && timex.Second == 0) { - return "midnight"; + return TimexConstantsEnglish.Midnight; } if (timex.Hour == 12 && timex.Minute == 0 && timex.Second == 0) { - return "midday"; + return TimexConstantsEnglish.Midday; } - var hour = (timex.Hour == 0) ? "12" : (timex.Hour > 12) ? (timex.Hour - 12).ToString() : timex.Hour.ToString(); - var minute = (timex.Minute == 0 && timex.Second == 0) ? string.Empty : ":" + timex.Minute.ToString().PadLeft(2, '0'); - var second = (timex.Second == 0) ? string.Empty : ":" + timex.Second.ToString().PadLeft(2, '0'); - var period = timex.Hour < 12 ? "AM" : "PM"; + var hour = (timex.Hour == 0) ? "12" : (timex.Hour > 12) ? (timex.Hour - 12).Value.ToString(CultureInfo.InvariantCulture) : timex.Hour.Value.ToString(CultureInfo.InvariantCulture); + var minute = (timex.Minute == 0 && timex.Second == 0) ? string.Empty : Constants.TimeTimexConnector + timex.Minute.Value.ToString(CultureInfo.InvariantCulture).PadLeft(2, '0'); + var second = (timex.Second == 0) ? string.Empty : Constants.TimeTimexConnector + timex.Second.Value.ToString(CultureInfo.InvariantCulture).PadLeft(2, '0'); + var period = timex.Hour < 12 ? Constants.AM : Constants.PM; return $"{hour}{minute}{second}{period}"; } @@ -95,16 +97,24 @@ public static string ConvertDate(TimexProperty timex) return TimexConstantsEnglish.Days[timex.DayOfWeek.Value - 1]; } - var month = TimexConstantsEnglish.Months[timex.Month.Value - 1]; - var date = timex.DayOfMonth.ToString(); - var abbreviation = TimexConstantsEnglish.DateAbbreviation[int.Parse(date[date.Length - 1].ToString())]; + var date = timex.DayOfMonth.Value.ToString(CultureInfo.InvariantCulture); + var dayOfMonth = int.Parse(date, CultureInfo.InvariantCulture); - if (timex.Year != null) + // Ordinals 11 to 13 are special in english as they end in th + var abbreviation = TimexConstantsEnglish.DateAbbreviation[(dayOfMonth is > 9 and < 14 ? 9 : dayOfMonth) % 10]; + + if (timex.Month != null) { - return $"{date}{abbreviation} {month} {timex.Year}".Trim(); + var month = TimexConstantsEnglish.Months[timex.Month.Value - 1]; + if (timex.Year != null) + { + return $"{date}{abbreviation} {month} {timex.Year}".Trim(); + } + + return $"{date}{abbreviation} {month}"; } - return $"{date}{abbreviation} {month}"; + return $"{date}{abbreviation}"; } private static string ConvertDurationPropertyToString(decimal value, string property, bool includeSingleCount) @@ -115,48 +125,49 @@ private static string ConvertDurationPropertyToString(decimal value, string prop } else { - return $"{value} {property}s"; + return $"{value} {property}{Constants.TimeDurationUnit}"; } } private static string ConvertTimexDurationToString(TimexProperty timex, bool includeSingleCount) { + string result = string.Empty; if (timex.Years != null) { - return ConvertDurationPropertyToString(timex.Years.Value, "year", includeSingleCount); + result += ConvertDurationPropertyToString(timex.Years.Value, Constants.YearUnit, includeSingleCount); } if (timex.Months != null) { - return ConvertDurationPropertyToString(timex.Months.Value, "month", includeSingleCount); + result += ConvertDurationPropertyToString(timex.Months.Value, Constants.MonthUnit, includeSingleCount); } if (timex.Weeks != null) { - return ConvertDurationPropertyToString(timex.Weeks.Value, "week", includeSingleCount); + result += ConvertDurationPropertyToString(timex.Weeks.Value, Constants.WeekUnit, includeSingleCount); } if (timex.Days != null) { - return ConvertDurationPropertyToString(timex.Days.Value, "day", includeSingleCount); + result += ConvertDurationPropertyToString(timex.Days.Value, Constants.DayUnit, includeSingleCount); } if (timex.Hours != null) { - return ConvertDurationPropertyToString(timex.Hours.Value, "hour", includeSingleCount); + result += ConvertDurationPropertyToString(timex.Hours.Value, Constants.HourUnit, includeSingleCount); } if (timex.Minutes != null) { - return ConvertDurationPropertyToString(timex.Minutes.Value, "minute", includeSingleCount); + result += ConvertDurationPropertyToString(timex.Minutes.Value, Constants.MinuteUnit, includeSingleCount); } if (timex.Seconds != null) { - return ConvertDurationPropertyToString(timex.Seconds.Value, "second", includeSingleCount); + result += ConvertDurationPropertyToString(timex.Seconds.Value, Constants.SecondUnit, includeSingleCount); } - return string.Empty; + return result; } private static string ConvertDuration(TimexProperty timex) @@ -168,7 +179,7 @@ private static string ConvertDateRange(TimexProperty timex) { var season = (timex.Season != null) ? TimexConstantsEnglish.Seasons[timex.Season] : string.Empty; - var year = (timex.Year != null) ? timex.Year.ToString() : string.Empty; + var year = (timex.Year != null) ? timex.Year.Value.ToString(CultureInfo.InvariantCulture) : string.Empty; if (timex.WeekOfYear != null) { @@ -206,9 +217,27 @@ private static string ConvertDateTime(TimexProperty timex) private static string ConvertDateTimeRange(TimexProperty timex) { - if (timex.Types.Contains(Constants.TimexTypes.TimeRange)) + var parts = new List(); + + var types = timex.Types; + if (types.Contains(Constants.TimexTypes.Date)) + { + parts.Add(ConvertDate(timex)); + } + + if (types.Contains(Constants.TimexTypes.Time)) + { + parts.Add(ConvertTime(timex)); + } + + if (timex.PartOfDay is not null) + { + parts.Add(ConvertTimeRange(timex)); + } + + if (parts.Count > 0) { - return $"{ConvertDate(timex)} {ConvertTimeRange(timex)}"; + return string.Join(" ", parts); } // date + time + duration diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/English/TimexRelativeConvertEnglish.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/English/TimexRelativeConvertEnglish.cs index 5075c3311d..e763ccb748 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/English/TimexRelativeConvertEnglish.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/English/TimexRelativeConvertEnglish.cs @@ -49,34 +49,34 @@ private static string ConvertDate(TimexProperty timex, DateObject date) if (TimexDateHelpers.DatePartEquals(timexDate, date)) { - return "today"; + return TimexConstantsEnglish.Today; } var tomorrow = TimexDateHelpers.Tomorrow(date); if (TimexDateHelpers.DatePartEquals(timexDate, tomorrow)) { - return "tomorrow"; + return TimexConstantsEnglish.Tomorrow; } var yesterday = TimexDateHelpers.Yesterday(date); if (TimexDateHelpers.DatePartEquals(timexDate, yesterday)) { - return "yesterday"; + return TimexConstantsEnglish.Yesterday; } if (TimexDateHelpers.IsThisWeek(timexDate, date)) { - return $"this {GetDateDay(timexDate.DayOfWeek)}"; + return $"{TimexConstantsEnglish.This} {GetDateDay(timexDate.DayOfWeek)}"; } if (TimexDateHelpers.IsNextWeek(timexDate, date)) { - return $"next {GetDateDay(timexDate.DayOfWeek)}"; + return $"{TimexConstantsEnglish.Next} {GetDateDay(timexDate.DayOfWeek)}"; } if (TimexDateHelpers.IsLastWeek(timexDate, date)) { - return $"last {GetDateDay(timexDate.DayOfWeek)}"; + return $"{TimexConstantsEnglish.Last} {GetDateDay(timexDate.DayOfWeek)}"; } } @@ -100,17 +100,17 @@ private static string ConvertDateRange(TimexProperty timex, DateObject date) var thisWeek = TimexDateHelpers.WeekOfYear(date); if (thisWeek == timex.WeekOfYear) { - return timex.Weekend != null ? "this weekend" : "this week"; + return timex.Weekend != null ? $"{TimexConstantsEnglish.This} {TimexConstantsEnglish.Weekend}" : $"{TimexConstantsEnglish.This} {Constants.WeekUnit}"; } if (thisWeek == timex.WeekOfYear + 1) { - return timex.Weekend != null ? "last weekend" : "last week"; + return timex.Weekend != null ? $"{TimexConstantsEnglish.Last} {TimexConstantsEnglish.Weekend}" : $"{TimexConstantsEnglish.Last} {Constants.WeekUnit}"; } if (thisWeek == timex.WeekOfYear - 1) { - return timex.Weekend != null ? "next weekend" : "next week"; + return timex.Weekend != null ? $"{TimexConstantsEnglish.Next} {TimexConstantsEnglish.Weekend}" : $"{TimexConstantsEnglish.Next} {Constants.WeekUnit}"; } } @@ -118,31 +118,31 @@ private static string ConvertDateRange(TimexProperty timex, DateObject date) { if (timex.Month == date.Month) { - return "this month"; + return $"{TimexConstantsEnglish.This} {Constants.MonthUnit}"; } if (timex.Month == date.Month + 1) { - return "next month"; + return $"{TimexConstantsEnglish.Next} {Constants.MonthUnit}"; } if (timex.Month == date.Month - 1) { - return "last month"; + return $"{TimexConstantsEnglish.Last} {Constants.MonthUnit}"; } } - return (timex.Season != null) ? $"this {TimexConstantsEnglish.Seasons[timex.Season]}" : "this year"; + return (timex.Season != null) ? $"{TimexConstantsEnglish.This} {TimexConstantsEnglish.Seasons[timex.Season]}" : $"{TimexConstantsEnglish.This} {Constants.YearUnit}"; } if (timex.Year == year + 1) { - return (timex.Season != null) ? $"next {TimexConstantsEnglish.Seasons[timex.Season]}" : "next year"; + return (timex.Season != null) ? $"{TimexConstantsEnglish.Next} {TimexConstantsEnglish.Seasons[timex.Season]}" : $"{TimexConstantsEnglish.Next} {Constants.YearUnit}"; } if (timex.Year == year - 1) { - return (timex.Season != null) ? $"last {TimexConstantsEnglish.Seasons[timex.Season]}" : "last year"; + return (timex.Season != null) ? $"{TimexConstantsEnglish.Last} {TimexConstantsEnglish.Seasons[timex.Season]}" : $"{TimexConstantsEnglish.Last} {Constants.YearUnit}"; } } @@ -159,36 +159,36 @@ private static string ConvertDateTimeRange(TimexProperty timex, DateObject date) { if (TimexDateHelpers.DatePartEquals(timexDate, date)) { - if (timex.PartOfDay == "NI") + if (timex.PartOfDay == Constants.TimexNight) { - return "tonight"; + return TimexConstantsEnglish.Tonight; } else { - return $"this {TimexConstantsEnglish.DayParts[timex.PartOfDay]}"; + return $"{TimexConstantsEnglish.This} {TimexConstantsEnglish.DayParts[timex.PartOfDay]}"; } } var tomorrow = TimexDateHelpers.Tomorrow(date); if (TimexDateHelpers.DatePartEquals(timexDate, tomorrow)) { - return $"tomorrow {TimexConstantsEnglish.DayParts[timex.PartOfDay]}"; + return $"{TimexConstantsEnglish.Tomorrow} {TimexConstantsEnglish.DayParts[timex.PartOfDay]}"; } var yesterday = TimexDateHelpers.Yesterday(date); if (TimexDateHelpers.DatePartEquals(timexDate, yesterday)) { - return $"yesterday {TimexConstantsEnglish.DayParts[timex.PartOfDay]}"; + return $"{TimexConstantsEnglish.Yesterday} {TimexConstantsEnglish.DayParts[timex.PartOfDay]}"; } if (TimexDateHelpers.IsNextWeek(timexDate, date)) { - return $"next {GetDateDay(timexDate.DayOfWeek)} {TimexConstantsEnglish.DayParts[timex.PartOfDay]}"; + return $"{TimexConstantsEnglish.Next} {GetDateDay(timexDate.DayOfWeek)} {TimexConstantsEnglish.DayParts[timex.PartOfDay]}"; } if (TimexDateHelpers.IsLastWeek(timexDate, date)) { - return $"last {GetDateDay(timexDate.DayOfWeek)} {TimexConstantsEnglish.DayParts[timex.PartOfDay]}"; + return $"{TimexConstantsEnglish.Last} {GetDateDay(timexDate.DayOfWeek)} {TimexConstantsEnglish.DayParts[timex.PartOfDay]}"; } } } diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.csproj b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.csproj index bea2935f68..6bc458c756 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.csproj +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.csproj @@ -1,12 +1,18 @@  - netstandard2.0;net462;net452;net45 + netstandard2.1;netstandard2.0;net462;net6.0 + 9 + false false ../Recognizers-Text.ruleset - + + + true + ..\buildtools\35MSSharedLib1024.snk + true $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + Microsoft + nlp, entity-extraction, parser-library, recognizer, timex, datatime, netstandard2.0 + Microsoft.Recognizers.Text.DataTypes.TimexExpression provides parsing and evaluation of TIMEX expressions. + MIT + https://github.com/Microsoft/Recognizers-Text + images\icon.png + © Microsoft Corporation. All rights reserved. + + @@ -26,11 +40,11 @@ - + all runtime; build; native; contentfiles; analyzers - + all runtime; build; native; contentfiles; analyzers diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.nuspec b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.nuspec index e1d1ad346d..c3f332dfcb 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.nuspec +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.nuspec @@ -9,15 +9,14 @@ Microsoft.Recognizers.Text.DataTypes.TimexExpression provides parsing and evaluation of TIMEX expressions. MIT https://github.com/Microsoft/Recognizers-Text - http://docs.botframework.com/images/bot_icon.png + images\icon.png © Microsoft Corporation. All rights reserved. nlp entity-extraction parser-library recognizer timex datatime netstandard2.0 - - + diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.xml b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.xml index a170cfd39d..39516c33f5 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.xml +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.xml @@ -4,5 +4,40 @@ Microsoft.Recognizers.Text.DataTypes.TimexExpression + + + Year + + + + + Month + + + + + Week + + + + + Day + + + + + Hour + + + + + Minute + + + + + Second + + diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexDateHelpers.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexDateHelpers.cs index 4e408ba6cf..b050fce540 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexDateHelpers.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexDateHelpers.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; +using System.Globalization; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DataTypes.TimexExpression @@ -66,29 +67,24 @@ public static bool IsLastWeek(DateObject date, DateObject referenceDate) public static int WeekOfYear(DateObject date) { - var ds = new DateObject(date.Year, 1, 1); - var de = new DateObject(date.Year, date.Month, date.Day); - int weeks = 1; + CultureInfo culture = CultureInfo.InvariantCulture; - while (ds < de) + // Workaround to get ISO 8601 week number. + // (A better solution would be to use ISOWeek.GetWeekOfYear but it seems currently unsupported) + DayOfWeek day = culture.Calendar.GetDayOfWeek(date); + if (day >= DayOfWeek.Monday && day <= DayOfWeek.Wednesday) { - var dayOfWeek = ds.DayOfWeek; - - var isoDayOfWeek = (dayOfWeek == 0) ? 7 : (int)dayOfWeek; - if (isoDayOfWeek == 7) - { - weeks++; - } - - ds = ds.AddDays(1); + date = date.AddDays(3); } + int weeks = culture.Calendar.GetWeekOfYear(date, CalendarWeekRule.FirstFourDayWeek, DayOfWeek.Monday); + return weeks; } public static string FixedFormatNumber(int? n, int size) { - return n.Value.ToString().PadLeft(size, '0'); + return n.Value.ToString(CultureInfo.InvariantCulture).PadLeft(size, '0'); } public static DateObject DateOfLastDay(DayOfWeek day, DateObject referenceDate) diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexFormat.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexFormat.cs index f036cfb4d7..9cebb7d8e0 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexFormat.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexFormat.cs @@ -1,6 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +using System.Collections.Generic; +using System.Globalization; +using System.Text; + namespace Microsoft.Recognizers.Text.DataTypes.TimexExpression { public static class TimexFormat @@ -61,42 +65,43 @@ public static string Format(TimexProperty timex) private static string FormatDuration(TimexProperty timex) { + var timexList = new List { }; if (timex.Years != null) { - return $"P{timex.Years}Y"; + timexList.Add(TimexHelpers.GenerateDurationTimex(TimexUnit.Year, timex.Years ?? Constants.InvalidValue)); } if (timex.Months != null) { - return $"P{timex.Months}M"; + timexList.Add(TimexHelpers.GenerateDurationTimex(TimexUnit.Month, timex.Months ?? Constants.InvalidValue)); } if (timex.Weeks != null) { - return $"P{timex.Weeks}W"; + timexList.Add(TimexHelpers.GenerateDurationTimex(TimexUnit.Week, timex.Weeks ?? Constants.InvalidValue)); } if (timex.Days != null) { - return $"P{timex.Days}D"; + timexList.Add(TimexHelpers.GenerateDurationTimex(TimexUnit.Day, timex.Days ?? Constants.InvalidValue)); } if (timex.Hours != null) { - return $"PT{timex.Hours}H"; + timexList.Add(TimexHelpers.GenerateDurationTimex(TimexUnit.Hour, timex.Hours ?? Constants.InvalidValue)); } if (timex.Minutes != null) { - return $"PT{timex.Minutes}M"; + timexList.Add(TimexHelpers.GenerateDurationTimex(TimexUnit.Minute, timex.Minutes ?? Constants.InvalidValue)); } if (timex.Seconds != null) { - return $"PT{timex.Seconds}S"; + timexList.Add(TimexHelpers.GenerateDurationTimex(TimexUnit.Second, timex.Seconds ?? Constants.InvalidValue)); } - return string.Empty; + return TimexHelpers.GenerateCompoundDurationTimex(timexList); } private static string FormatTime(TimexProperty timex) @@ -116,22 +121,7 @@ private static string FormatTime(TimexProperty timex) private static string FormatDate(TimexProperty timex) { - if (timex.Year != null && timex.Month != null && timex.DayOfMonth != null) - { - return $"{TimexDateHelpers.FixedFormatNumber(timex.Year, 4)}-{TimexDateHelpers.FixedFormatNumber(timex.Month, 2)}-{TimexDateHelpers.FixedFormatNumber(timex.DayOfMonth, 2)}"; - } - - if (timex.Month != null && timex.DayOfMonth != null) - { - return $"XXXX-{TimexDateHelpers.FixedFormatNumber(timex.Month, 2)}-{TimexDateHelpers.FixedFormatNumber(timex.DayOfMonth, 2)}"; - } - - if (timex.DayOfWeek != null) - { - return $"XXXX-WXX-{timex.DayOfWeek}"; - } - - return string.Empty; + return TimexHelpers.GenerateDateTimex(timex.Year ?? Constants.InvalidValue, timex.WeekOfYear ?? (timex.Month ?? Constants.InvalidValue), timex.DayOfWeek != null ? timex.DayOfWeek.Value : timex.DayOfMonth ?? Constants.InvalidValue, timex.WeekOfMonth ?? Constants.InvalidValue, timex.DayOfWeek != null); } private static string FormatDateRange(TimexProperty timex) @@ -146,6 +136,11 @@ private static string FormatDateRange(TimexProperty timex) return $"{TimexDateHelpers.FixedFormatNumber(timex.Year, 4)}-W{TimexDateHelpers.FixedFormatNumber(timex.WeekOfYear, 2)}"; } + if (timex.Year != null && timex.Month != null && timex.WeekOfMonth != null) + { + return $"{TimexDateHelpers.FixedFormatNumber(timex.Year, 4)}-{TimexDateHelpers.FixedFormatNumber(timex.Month, 2)}-W{TimexDateHelpers.FixedFormatNumber(timex.WeekOfMonth, 2)}"; + } + if (timex.Year != null && timex.Season != null) { return $"{TimexDateHelpers.FixedFormatNumber(timex.Year, 4)}-{timex.Season}"; @@ -168,17 +163,17 @@ private static string FormatDateRange(TimexProperty timex) if (timex.Month != null && timex.WeekOfMonth != null && timex.DayOfWeek != null) { - return $"XXXX-{TimexDateHelpers.FixedFormatNumber(timex.Month, 2)}-WXX-{timex.WeekOfMonth}-{timex.DayOfWeek}"; + return $"{Constants.TimexFuzzyYear}-{TimexDateHelpers.FixedFormatNumber(timex.Month, 2)}-{Constants.TimexFuzzyWeek}-{timex.WeekOfMonth}-{timex.DayOfWeek}"; } if (timex.Month != null && timex.WeekOfMonth != null) { - return $"XXXX-{TimexDateHelpers.FixedFormatNumber(timex.Month, 2)}-WXX-{timex.WeekOfMonth}"; + return $"{Constants.TimexFuzzyYear}-{TimexDateHelpers.FixedFormatNumber(timex.Month, 2)}-W{timex.WeekOfMonth?.ToString("D2", CultureInfo.InvariantCulture)}"; } if (timex.Month != null) { - return $"XXXX-{TimexDateHelpers.FixedFormatNumber(timex.Month, 2)}"; + return $"{Constants.TimexFuzzyYear}-{TimexDateHelpers.FixedFormatNumber(timex.Month, 2)}"; } return string.Empty; diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexHelpers.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexHelpers.cs index 54dad77cac..230727bf2b 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexHelpers.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexHelpers.cs @@ -2,12 +2,66 @@ // Licensed under the MIT License. using System; +using System.Collections.Generic; +using System.Globalization; +using System.Text; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DataTypes.TimexExpression { + public enum TimexUnit + { + /// + /// Year + /// + Year, + + /// + /// Month + /// + Month, + + /// + /// Week + /// + Week, + + /// + /// Day + /// + Day, + + /// + /// Hour + /// + Hour, + + /// + /// Minute + /// + Minute, + + /// + /// Second + /// + Second, + } + public static class TimexHelpers { + public static readonly Dictionary TimexUnitToStringMap = new Dictionary + { + { TimexUnit.Year, Constants.TimexYear }, + { TimexUnit.Month, Constants.TimexMonth }, + { TimexUnit.Week, Constants.TimexWeek }, + { TimexUnit.Day, Constants.TimexDay }, + { TimexUnit.Hour, Constants.TimexHour }, + { TimexUnit.Minute, Constants.TimexMinute }, + { TimexUnit.Second, Constants.TimexSecond }, + }; + + public static readonly List TimeTimexUnitList = new List { TimexUnit.Hour, TimexUnit.Minute, TimexUnit.Second }; + public static TimexRange ExpandDateTimeRange(TimexProperty timex) { var types = timex.Types.Count != 0 ? timex.Types : TimexInference.Infer(timex); @@ -27,25 +81,25 @@ public static TimexRange ExpandDateTimeRange(TimexProperty timex) { if (timex.Year != null) { - var range = new TimexRange { Start = new TimexProperty { Year = timex.Year }, End = new TimexProperty() }; - if (timex.Month != null) + Tuple dateRange; + if (timex.Month != null && timex.WeekOfMonth != null) + { + dateRange = MonthWeekDateRange(timex.Year.Value, timex.Month.Value, timex.WeekOfMonth.Value); + } + else if (timex.Month != null) { - range.Start.Month = timex.Month; - range.Start.DayOfMonth = 1; - range.End.Year = timex.Year; - range.End.Month = timex.Month + 1; - range.End.DayOfMonth = 1; + dateRange = MonthDateRange(timex.Year.Value, timex.Month.Value); + } + else if (timex.WeekOfYear != null) + { + dateRange = YearWeekDateRange(timex.Year.Value, timex.WeekOfYear.Value, timex.Weekend); } else { - range.Start.Month = 1; - range.Start.DayOfMonth = 1; - range.End.Year = timex.Year + 1; - range.End.Month = 1; - range.End.DayOfMonth = 1; + dateRange = YearDateRange(timex.Year.Value); } - return range; + return new TimexRange { Start = dateRange.Item1, End = dateRange.Item2 }; } } @@ -173,55 +227,75 @@ public static TimexProperty TimexDateAdd(TimexProperty start, TimexProperty dura return start; } - public static TimexProperty TimexTimeAdd(TimexProperty start, TimexProperty duration) + public static string GenerateDateTimex(int year, int monthOrWeekOfYear, int day, int weekOfMonth, bool byWeek) { - if (duration.Hours != null) + var yearString = year == Constants.InvalidValue ? Constants.TimexFuzzyYear : TimexDateHelpers.FixedFormatNumber(year, 4); + var monthWeekString = monthOrWeekOfYear == Constants.InvalidValue ? Constants.TimexFuzzyMonth : TimexDateHelpers.FixedFormatNumber(monthOrWeekOfYear, 2); + string dayString; + if (byWeek) { - var result = start.Clone(); - result.Hour += (int)duration.Hours.Value; - if (result.Hour.Value > 23) + dayString = day.ToString(CultureInfo.InvariantCulture); + if (weekOfMonth != Constants.InvalidValue) { - var days = Math.Floor(result.Hour.Value / 24m); - var hour = result.Hour.Value % 24; - result.Hour = hour; - - if (result.Year != null && result.Month != null && result.DayOfMonth != null) - { - var d = new DateObject(result.Year.Value, result.Month.Value, result.DayOfMonth.Value, 0, 0, 0); - d = d.AddDays((double)days); - - result.Year = d.Year; - result.Month = d.Month; - result.DayOfMonth = d.Day; + monthWeekString = monthWeekString + $"-{Constants.TimexFuzzyWeek}-" + weekOfMonth.ToString(CultureInfo.InvariantCulture); + } + else + { + monthWeekString = Constants.TimexWeek + monthWeekString; + } + } + else + { + dayString = day == Constants.InvalidValue ? Constants.TimexFuzzyDay : TimexDateHelpers.FixedFormatNumber(day, 2); + } - return result; - } + return $"{yearString}-{monthWeekString}-{dayString}"; + } - if (result.DayOfWeek != null) - { - result.DayOfWeek += (int)days; - return result; - } + public static TimexProperty TimexTimeAdd(TimexProperty start, TimexProperty duration) + { + var result = start.Clone(); + if (duration.Minutes != null) + { + result.Minute += (int)duration.Minutes.Value; + if (result.Minute.Value > 59) + { + result.Hour = (result.Hour ?? 0) + 1; + result.Minute = result.Minute.Value % 60; } + } - return result; + if (duration.Hours != null) + { + result.Hour += (int)duration.Hours.Value; } - if (duration.Minutes != null) + if (result.Hour != null && result.Hour.Value > 23) { - var result = start.Clone(); - result.Minute += (int)duration.Minutes.Value; + var days = Math.Floor(result.Hour.Value / 24m); + var hour = result.Hour.Value % 24; + result.Hour = hour; - if (result.Minute.Value > 59) + if (result.Year != null && result.Month != null && result.DayOfMonth != null) { - result.Hour++; - result.Minute = 0; + var d = new DateObject(result.Year.Value, result.Month.Value, result.DayOfMonth.Value, 0, 0, 0); + d = d.AddDays((double)days); + + result.Year = d.Year; + result.Month = d.Month; + result.DayOfMonth = d.Day; + + return result; } - return result; + if (result.DayOfWeek != null) + { + result.DayOfWeek += (int)days; + return result; + } } - return start; + return result; } public static TimexProperty TimexDateTimeAdd(TimexProperty start, TimexProperty duration) @@ -259,13 +333,152 @@ public static TimeRange TimeRangeFromTimex(TimexProperty timex) }; } + public static string GenerateCompoundDurationTimex(List timexList) + { + var isTimeDurationAlreadyExist = false; + var timexBuilder = new StringBuilder(Constants.GeneralPeriodPrefix); + + foreach (string timexComponent in timexList) + { + // The Time Duration component occurs first time + if (!isTimeDurationAlreadyExist && IsTimeDurationTimex(timexComponent)) + { + timexBuilder.AppendFormat(CultureInfo.InvariantCulture, $"{Constants.TimeTimexPrefix}{GetDurationTimexWithoutPrefix(timexComponent)}"); + isTimeDurationAlreadyExist = true; + } + else + { + timexBuilder.AppendFormat(CultureInfo.InvariantCulture, $"{GetDurationTimexWithoutPrefix(timexComponent)}"); + } + } + + return timexBuilder.ToString(); + } + + public static string GenerateDateTimex(int year, int month, int day, bool byWeek) + { + var yearString = year == Constants.InvalidValue ? Constants.TimexFuzzyYear : TimexDateHelpers.FixedFormatNumber(year, 4); + var monthString = month == Constants.InvalidValue ? Constants.TimexFuzzyMonth : TimexDateHelpers.FixedFormatNumber(month, 2); + string dayString; + if (byWeek) + { + dayString = day.ToString(CultureInfo.InvariantCulture); + monthString = Constants.TimexWeek + monthString; + } + else + { + dayString = day == Constants.InvalidValue ? Constants.TimexDay : TimexDateHelpers.FixedFormatNumber(day, 2); + } + + return $"{yearString}-{monthString}-{dayString}"; + } + + public static string GenerateDurationTimex(TimexUnit unit, decimal value) + { + if (value == Constants.InvalidValue) + { + return string.Empty; + } + + var timexBuilder = new StringBuilder(Constants.GeneralPeriodPrefix); + if (TimeTimexUnitList.Contains(unit)) + { + timexBuilder.AppendFormat(CultureInfo.InvariantCulture, Constants.TimeTimexPrefix); + } + + timexBuilder.AppendFormat(CultureInfo.InvariantCulture, value.ToString(CultureInfo.InvariantCulture)); + timexBuilder.AppendFormat(CultureInfo.InvariantCulture, TimexUnitToStringMap[unit]); + return timexBuilder.ToString(); + } + + public static string FormatResolvedDateValue(string dateValue, string timeValue) + { + return $"{dateValue} {timeValue}"; + } + + public static Tuple MonthWeekDateRange(int year, int month, int weekOfMonth) + { + var start = GenerateMonthWeekDateStart(year, month, weekOfMonth); + var end = start.AddDays(7); + + return new Tuple( + new TimexProperty { Year = start.Year, Month = start.Month, DayOfMonth = start.Day }, + new TimexProperty { Year = end.Year, Month = end.Month, DayOfMonth = end.Day }); + } + + public static Tuple MonthDateRange(int year, int month) + { + return new Tuple( + new TimexProperty { Year = year, Month = month, DayOfMonth = 1 }, + new TimexProperty { Year = month == 12 ? year + 1 : year, Month = month == 12 ? 1 : month + 1, DayOfMonth = 1 }); + } + + public static Tuple YearDateRange(int year) + { + return new Tuple( + new TimexProperty { Year = year, Month = 1, DayOfMonth = 1 }, + new TimexProperty { Year = year + 1, Month = 1, DayOfMonth = 1 }); + } + + public static Tuple YearWeekDateRange(int year, int weekOfYear, bool? isWeekend) + { + var firstMondayInWeek = FirstDateOfWeek(year, weekOfYear, System.Globalization.CultureInfo.InvariantCulture); + + var start = (isWeekend == null || isWeekend.Value == false) ? + firstMondayInWeek : + TimexDateHelpers.DateOfNextDay(DayOfWeek.Saturday, firstMondayInWeek); + var end = firstMondayInWeek + TimeSpan.FromDays(7); + + return new Tuple( + new TimexProperty { Year = start.Year, Month = start.Month, DayOfMonth = start.Day }, + new TimexProperty { Year = end.Year, Month = end.Month, DayOfMonth = end.Day }); + } + + // this is based on https://stackoverflow.com/questions/19901666/get-date-of-first-and-last-day-of-week-knowing-week-number/34727270 + public static DateObject FirstDateOfWeek(int year, int weekOfYear, System.Globalization.CultureInfo cultureInfo) + { + // ISO uses FirstFourDayWeek, and Monday as first day of week, according to https://en.wikipedia.org/wiki/ISO_8601 + var jan1 = new DateObject(year, 1, 1); + int daysOffset = (int)DayOfWeek.Monday - (int)jan1.DayOfWeek; + var firstWeekDay = jan1.AddDays(daysOffset); + + int firstWeek = cultureInfo.Calendar.GetWeekOfYear(jan1, System.Globalization.CalendarWeekRule.FirstFourDayWeek, DayOfWeek.Monday); + if ((firstWeek <= 1 || firstWeek >= 52) && daysOffset >= -3) + { + weekOfYear -= 1; + } + + return firstWeekDay.AddDays(weekOfYear * 7); + } + + public static DateObject GenerateMonthWeekDateStart(int year, int month, int weekOfMonth) + { + var dateInWeek = new DateObject(year, month, 1 + ((weekOfMonth - 1) * 7)); + + // Align the date of the week according to Thursday, base on ISO 8601, https://en.wikipedia.org/wiki/ISO_8601 + if (dateInWeek.DayOfWeek > DayOfWeek.Thursday) + { + dateInWeek = dateInWeek.AddDays(7 - (int)dateInWeek.DayOfWeek + 1); + } + else + { + dateInWeek = dateInWeek.AddDays(1 - (int)dateInWeek.DayOfWeek); + } + + return dateInWeek; + } + private static TimexProperty TimeAdd(TimexProperty start, TimexProperty duration) { + int second = (int)(start.Second + (duration.Seconds ?? 0)); + int minute = (int)(start.Minute + (second / 60) + (duration.Minutes ?? 0)); + int hour = (int)(start.Hour + (minute / 60) + (duration.Hours ?? 0)); + return new TimexProperty { - Hour = (int)(start.Hour.Value + duration.Hours ?? 0), - Minute = (int)(start.Minute + duration.Minutes ?? 0), - Second = (int)(start.Second + duration.Seconds ?? 0), + Hour = (hour == 24 && minute % 60 == 0 && second % 60 == 0) ? hour : hour % 24, + Minute = minute % 60, + Second = second % 60, }; } @@ -299,5 +512,16 @@ private static TimexProperty CloneDuration(TimexProperty timex) result.PartOfDay = null; return result; } + + private static bool IsTimeDurationTimex(string timex) + { + return timex.StartsWith($"{Constants.GeneralPeriodPrefix}{Constants.TimeTimexPrefix}", StringComparison.Ordinal); + } + + private static string GetDurationTimexWithoutPrefix(string timex) + { + // Remove "PT" prefix for TimeDuration, Remove "P" prefix for DateDuration + return timex.Substring(IsTimeDurationTimex(timex) ? 2 : 1); + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexInference.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexInference.cs index 739f5d6bc9..6aca0a91b6 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexInference.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexInference.cs @@ -98,7 +98,7 @@ private static bool IsTime(TimexProperty timexProperty) private static bool IsDate(TimexProperty timexProperty) { - return (timexProperty.Month != null && timexProperty.DayOfMonth != null) || timexProperty.DayOfWeek != null; + return timexProperty.DayOfMonth != null || timexProperty.DayOfWeek != null; } private static bool IsTimeRange(TimexProperty timexProperty) @@ -108,10 +108,10 @@ private static bool IsTimeRange(TimexProperty timexProperty) private static bool IsDateRange(TimexProperty timexProperty) { - return (timexProperty.Year != null && timexProperty.DayOfMonth == null) || - (timexProperty.Year != null && timexProperty.Month != null && timexProperty.DayOfMonth == null) || - (timexProperty.Month != null && timexProperty.DayOfMonth == null) || - timexProperty.Season != null || timexProperty.WeekOfYear != null || timexProperty.WeekOfMonth != null; + return (timexProperty.DayOfMonth == null && timexProperty.DayOfWeek == null) && + (timexProperty.Year != null || timexProperty.Month != null || + timexProperty.Season != null || timexProperty.WeekOfYear != null || + timexProperty.WeekOfMonth != null); } private static bool IsDefinite(TimexProperty timexProperty) diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexParsing.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexParsing.cs index 9a2b765b6d..bc2dc0ffb9 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexParsing.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexParsing.cs @@ -14,11 +14,11 @@ public static void ParseString(string timex, TimexProperty timexProperty) { timexProperty.Now = true; } - else if (timex.StartsWith("P")) + else if (timex.StartsWith("P", System.StringComparison.Ordinal)) { // duration ExtractDuration(timex, timexProperty); } - else if (timex.StartsWith("(") && timex.EndsWith(")")) + else if (timex.StartsWith("(", System.StringComparison.Ordinal) && timex.EndsWith(")", System.StringComparison.Ordinal)) { // range indicated with start and end dates and a duration ExtractStartEndRange(timex, timexProperty); } @@ -49,18 +49,39 @@ private static void ExtractStartEndRange(string s, TimexProperty timexProperty) private static void ExtractDateTime(string s, TimexProperty timexProperty) { var indexOfT = s.IndexOf('T'); + var indexOfP = s.IndexOf('P'); + + // Spring timex value has a P in it, but should not be mixed up with + // the "period" types that have P in them + var indexOfSP = s.IndexOf("SP"); if (indexOfT == -1) { var extracted = new Dictionary(); - TimexRegex.Extract("date", s, extracted); + if (indexOfSP > -1 || indexOfP == -1) + { + TimexRegex.Extract("date", s, extracted); + } + else + { + TimexRegex.Extract("date", s.Substring(0, indexOfP), extracted); + } + timexProperty.AssignProperties(extracted); } else { var extracted = new Dictionary(); TimexRegex.Extract("date", s.Substring(0, indexOfT), extracted); - TimexRegex.Extract("time", s.Substring(indexOfT), extracted); + if (indexOfP == -1) + { + TimexRegex.Extract("time", s.Substring(indexOfT), extracted); + } + else + { + TimexRegex.Extract("time", s.Substring(indexOfT, indexOfP - indexOfT), extracted); + } + timexProperty.AssignProperties(extracted); } } diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexProperty.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexProperty.cs index f135a5af45..266d7a0189 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexProperty.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexProperty.cs @@ -2,6 +2,7 @@ // Licensed under the MIT License. using System.Collections.Generic; +using System.Globalization; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DataTypes.TimexExpression @@ -199,22 +200,27 @@ public void AssignProperties(IDictionary source) { foreach (var item in source) { + if (string.IsNullOrEmpty(item.Value)) + { + continue; + } + switch (item.Key) { case "year": - Year = int.Parse(item.Value); + Year = int.Parse(item.Value, CultureInfo.InvariantCulture); break; case "month": - Month = int.Parse(item.Value); + Month = int.Parse(item.Value, CultureInfo.InvariantCulture); break; case "dayOfMonth": - DayOfMonth = int.Parse(item.Value); + DayOfMonth = int.Parse(item.Value, CultureInfo.InvariantCulture); break; case "dayOfWeek": - DayOfWeek = int.Parse(item.Value); + DayOfWeek = int.Parse(item.Value, CultureInfo.InvariantCulture); break; case "season": @@ -222,7 +228,7 @@ public void AssignProperties(IDictionary source) break; case "weekOfYear": - WeekOfYear = int.Parse(item.Value); + WeekOfYear = int.Parse(item.Value, CultureInfo.InvariantCulture); break; case "weekend": @@ -230,18 +236,18 @@ public void AssignProperties(IDictionary source) break; case "weekOfMonth": - WeekOfMonth = int.Parse(item.Value); + WeekOfMonth = int.Parse(item.Value, CultureInfo.InvariantCulture); break; case "hour": - Hour = int.Parse(item.Value); + Hour = int.Parse(item.Value, CultureInfo.InvariantCulture); break; case "minute": - Minute = int.Parse(item.Value); + Minute = int.Parse(item.Value, CultureInfo.InvariantCulture); break; case "second": - Second = int.Parse(item.Value); + Second = int.Parse(item.Value, CultureInfo.InvariantCulture); break; case "partOfDay": @@ -252,8 +258,16 @@ public void AssignProperties(IDictionary source) AssignDateDuration(source); break; - case "timeUnit": - AssignTimeDuration(source); + case "hourAmount": + Hours = int.Parse(item.Value, CultureInfo.InvariantCulture); + break; + + case "minuteAmount": + Minutes = int.Parse(item.Value, CultureInfo.InvariantCulture); + break; + + case "secondAmount": + Seconds = int.Parse(item.Value, CultureInfo.InvariantCulture); break; } } @@ -264,37 +278,19 @@ private void AssignDateDuration(IDictionary source) switch (source["dateUnit"]) { case "Y": - Years = decimal.Parse(source["amount"]); + Years = decimal.Parse(source["amount"], CultureInfo.InvariantCulture); break; case "M": - Months = decimal.Parse(source["amount"]); + Months = decimal.Parse(source["amount"], CultureInfo.InvariantCulture); break; case "W": - Weeks = decimal.Parse(source["amount"]); + Weeks = decimal.Parse(source["amount"], CultureInfo.InvariantCulture); break; case "D": - Days = decimal.Parse(source["amount"]); - break; - } - } - - private void AssignTimeDuration(IDictionary source) - { - switch (source["timeUnit"]) - { - case "H": - Hours = decimal.Parse(source["amount"]); - break; - - case "M": - Minutes = decimal.Parse(source["amount"]); - break; - - case "S": - Seconds = decimal.Parse(source["amount"]); + Days = decimal.Parse(source["amount"], CultureInfo.InvariantCulture); break; } } diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexRangeResolver.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexRangeResolver.cs index 4f786d7192..afdb9753e8 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexRangeResolver.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexRangeResolver.cs @@ -212,6 +212,7 @@ private static IEnumerable ResolveDefiniteAgainstConstraint(TimexPropert private static IEnumerable ResolveDateAgainstConstraint(TimexProperty timex, DateRange constraint) { + if (timex.Month != null && timex.DayOfMonth != null) { var result = new List(); @@ -245,6 +246,23 @@ private static IEnumerable ResolveDateAgainstConstraint(TimexProperty ti return result; } + if (timex.Hour != null) + { + var result = new List(); + DateTime day = constraint.Start; + while (day <= constraint.End) + { + var t = timex.Clone(); + t.Year = day.Year; + t.Month = day.Month; + t.DayOfMonth = day.Day; + result.AddRange(ResolveDefiniteAgainstConstraint(t, constraint)); + day = day.AddDays(1); + } + + return result; + } + return Enumerable.Empty(); } diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexRegex.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexRegex.cs index 5d57b3d934..dfcfb72b68 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexRegex.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexRegex.cs @@ -2,66 +2,88 @@ // Licensed under the MIT License. using System.Collections.Generic; +using System.Globalization; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DataTypes.TimexExpression { public static class TimexRegex { + private const string DateTimeCollectionName = "datetime"; + private const string DateCollectionName = "date"; + private const string TimeCollectionName = "time"; + private const string PeriodCollectionName = "period"; + private static IDictionary timexRegex = new Dictionary { { - "date", new Regex[] + DateCollectionName, new Regex[] { - // date - new Regex(@"^(?\d\d\d\d)-(?\d\d)-(?\d\d)$"), - new Regex(@"^XXXX-WXX-(?\d)$"), - new Regex(@"^XXXX-(?\d\d)-(?\d\d)$"), + // date + new Regex(@"^(XXXX|(?\d\d\d\d))-(?\d\d)(-(?\d\d))?"), + new Regex(@"^XXXX-WXX-(?\d)"), + new Regex(@"^XXXX-XX-(?\d\d)"), // daterange - new Regex(@"^(?\d\d\d\d)$"), - new Regex(@"^(?\d\d\d\d)-(?\d\d)$"), - new Regex(@"^(?SP|SU|FA|WI)$"), - new Regex(@"^(?\d\d\d\d)-(?SP|SU|FA|WI)$"), - new Regex(@"^(?\d\d\d\d)-W(?\d\d)$"), - new Regex(@"^(?\d\d\d\d)-W(?\d\d)-(?WE)$"), - new Regex(@"^XXXX-(?\d\d)$"), - new Regex(@"^XXXX-(?\d\d)-W(?\d\d)$"), - new Regex(@"^XXXX-(?\d\d)-WXX-(?\d)-(?\d)$"), + new Regex(@"^(?\d\d\d\d)"), + new Regex(@"^(XXXX|(?\d\d\d\d))-(?\d\d)-W(?\d\d)"), + new Regex(@"^(XXXX|(?\d\d\d\d))-(?\d\d)-WXX-(?\d{1,2})(-(?\d))?"), + new Regex(@"^(?SP|SU|FA|WI)"), + new Regex(@"^(XXXX|(?\d\d\d\d))-(?SP|SU|FA|WI)"), + new Regex(@"^(XXXX|(?\d\d\d\d))-W(?\d\d)(-(?\d)|-(?WE))?"), } }, { - "time", new Regex[] + TimeCollectionName, new Regex[] { // time - new Regex(@"^T(?\d\d)$"), - new Regex(@"^T(?\d\d):(?\d\d)$"), - new Regex(@"^T(?\d\d):(?\d\d):(?\d\d)$"), + new Regex(@"T(?\d\d)Z?$"), + new Regex(@"T(?\d\d):(?\d\d)Z?$"), + new Regex(@"T(?\d\d):(?\d\d):(?\d\d)Z?$"), // timerange new Regex(@"^T(?DT|NI|MO|AF|EV)$"), } }, { - "period", new Regex[] + PeriodCollectionName, new Regex[] { - new Regex(@"^P(?\d*\.?\d+)(?Y|M|W|D)$"), - new Regex(@"^PT(?\d*\.?\d+)(?H|M|S)$"), + new Regex(@"P(?\d*\.?\d+)(?Y|M|W|D|WE|WD)$"), + new Regex(@"^PT(?\d*\.?\d+)H(\d*\.?\d+(M|S)){0,2}$"), + new Regex(@"^PT(\d*\.?\d+H)?(?\d*\.?\d+)M(\d*\.?\d+S)?$"), + new Regex(@"^PT(\d*\.?\d+(H|M)){0,2}(?\d*\.?\d+)S$"), } }, }; public static bool Extract(string name, string timex, IDictionary result) { - foreach (var entry in timexRegex[name]) + var lowerName = name.ToLower(CultureInfo.InvariantCulture); + var nameGroup = new string[lowerName == DateTimeCollectionName ? 2 : 1]; + + if (lowerName == DateTimeCollectionName) + { + nameGroup[0] = DateCollectionName; + nameGroup[1] = TimeCollectionName; + } + else + { + nameGroup[0] = lowerName; + } + + var anyTrue = false; + foreach (var nameItem in nameGroup) { - if (TryExtract(entry, timex, result)) + foreach (var entry in timexRegex[nameItem]) { - return true; + if (TryExtract(entry, timex, result)) + { + anyTrue = true; + } } } - return false; + return anyTrue; } private static bool TryExtract(Regex regex, string timex, IDictionary result) diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexResolver.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexResolver.cs index 03b30f8a57..86637c4280 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexResolver.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexResolver.cs @@ -3,6 +3,8 @@ using System; using System.Collections.Generic; +using System.Globalization; +using System.Linq; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DataTypes.TimexExpression @@ -28,12 +30,12 @@ public static class TimexResolver if (types.Contains(Constants.TimexTypes.DateTimeRange)) { - return ResolveDateTimeRange(timex); + return ResolveDateTimeRange(timex, date); } if (types.Contains(Constants.TimexTypes.Definite) && types.Contains(Constants.TimexTypes.Time)) { - return ResolveDefiniteTime(timex); + return ResolveDefiniteTime(timex, date); } if (types.Contains(Constants.TimexTypes.Definite) && types.Contains(Constants.TimexTypes.DateRange)) @@ -53,7 +55,7 @@ public static class TimexResolver if (types.Contains(Constants.TimexTypes.TimeRange)) { - return ResolveTimeRange(timex); + return ResolveTimeRange(timex, date); } if (types.Contains(Constants.TimexTypes.DateTime)) @@ -73,13 +75,13 @@ public static class TimexResolver if (types.Contains(Constants.TimexTypes.Time)) { - return ResolveTime(timex); + return ResolveTime(timex, date); } return new List(); } - private static List ResolveDefiniteTime(TimexProperty timex) + private static List ResolveDefiniteTime(TimexProperty timex, DateObject date) { return new List { @@ -87,7 +89,7 @@ public static class TimexResolver { Timex = timex.TimexValue, Type = "datetime", - Value = $"{TimexValue.DateValue(timex)} {TimexValue.TimeValue(timex)}", + Value = $"{TimexValue.DateValue(timex)} {TimexValue.TimeValue(timex, date)}", }, }; } @@ -123,44 +125,64 @@ public static class TimexResolver private static List ResolveDate(TimexProperty timex, DateObject date) { - return new List + var dateValueList = GetDateValues(timex, date); + var result = new List { }; + foreach (string dateValue in dateValueList) { - new Resolution.Entry - { - Timex = timex.TimexValue, - Type = "date", - Value = LastDateValue(timex, date), - }, - new Resolution.Entry + result.Add(new Resolution.Entry { Timex = timex.TimexValue, Type = "date", - Value = NextDateValue(timex, date), - }, - }; + Value = dateValue, + }); + } + + return result; } private static string LastDateValue(TimexProperty timex, DateObject date) { - if (timex.Month != null && timex.DayOfMonth != null) + if (timex.DayOfMonth != null) { + var year = date.Year; + var month = date.Month; + if (timex.Month != null) + { + month = timex.Month.Value; + if (date.Month <= month || (date.Month == month && date.Day <= timex.DayOfMonth)) + { + year--; + } + } + else + { + if (date.Day <= timex.DayOfMonth) + { + month--; + if (month < 1) + { + month = (month + 12) % 12; + year--; + } + } + } + return TimexValue.DateValue(new TimexProperty { - Year = date.Year - 1, - Month = timex.Month, + Year = year, + Month = month, DayOfMonth = timex.DayOfMonth, }); } if (timex.DayOfWeek != null) { - var day = timex.DayOfWeek == 7 ? DayOfWeek.Sunday : (DayOfWeek)timex.DayOfWeek; - var result = TimexDateHelpers.DateOfLastDay(day, date); + var start = GenerateWeekDate(timex, date, true); return TimexValue.DateValue(new TimexProperty { - Year = result.Year, - Month = result.Month, - DayOfMonth = result.Day, + Year = start.Year, + Month = start.Month, + DayOfMonth = start.Day, }); } @@ -169,32 +191,54 @@ private static string LastDateValue(TimexProperty timex, DateObject date) private static string NextDateValue(TimexProperty timex, DateObject date) { - if (timex.Month != null && timex.DayOfMonth != null) + if (timex.DayOfMonth != null) { + var year = date.Year; + var month = date.Month; + if (timex.Month != null) + { + month = timex.Month.Value; + if (date.Month > month || (date.Month == month && date.Day > timex.DayOfMonth)) + { + year++; + } + } + else + { + if (date.Day > timex.DayOfMonth) + { + month++; + if (month > 12) + { + month = month % 12; + year--; + } + } + } + return TimexValue.DateValue(new TimexProperty { - Year = date.Year, - Month = timex.Month, + Year = year, + Month = month, DayOfMonth = timex.DayOfMonth, }); } if (timex.DayOfWeek != null) { - var day = timex.DayOfWeek == 7 ? DayOfWeek.Sunday : (DayOfWeek)timex.DayOfWeek; - var result = TimexDateHelpers.DateOfNextDay(day, date); + var start = GenerateWeekDate(timex, date, false); return TimexValue.DateValue(new TimexProperty { - Year = result.Year, - Month = result.Month, - DayOfMonth = result.Day, + Year = start.Year, + Month = start.Month, + DayOfMonth = start.Day, }); } return string.Empty; } - private static List ResolveTime(TimexProperty timex) + private static List ResolveTime(TimexProperty timex, DateObject date) { return new List { @@ -202,7 +246,7 @@ private static string NextDateValue(TimexProperty timex, DateObject date) { Timex = timex.TimexValue, Type = "time", - Value = TimexValue.TimeValue(timex), + Value = TimexValue.TimeValue(timex, date), }, }; } @@ -222,28 +266,95 @@ private static string NextDateValue(TimexProperty timex, DateObject date) private static Tuple YearDateRange(int year) { + var yearDateRange = TimexHelpers.YearDateRange(year); + return new Tuple( - TimexValue.DateValue(new TimexProperty { Year = year, Month = 1, DayOfMonth = 1 }), - TimexValue.DateValue(new TimexProperty { Year = year + 1, Month = 1, DayOfMonth = 1 })); + TimexValue.DateValue(yearDateRange.Item1), + TimexValue.DateValue(yearDateRange.Item2)); } private static Tuple MonthDateRange(int year, int month) { + var monthDateRange = TimexHelpers.MonthDateRange(year, month); + return new Tuple( - TimexValue.DateValue(new TimexProperty { Year = year, Month = month, DayOfMonth = 1 }), - TimexValue.DateValue(new TimexProperty { Year = year, Month = month + 1, DayOfMonth = 1 })); + TimexValue.DateValue(monthDateRange.Item1), + TimexValue.DateValue(monthDateRange.Item2)); } - private static Tuple WeekDateRange(int year, int weekOfYear) + private static Tuple YearWeekDateRange(int year, int weekOfYear, bool? isWeekend) { - var dateInWeek = new DateObject(year, 1, 1) + TimeSpan.FromDays((weekOfYear - 1) * 7); + var yearWeekDateRange = TimexHelpers.YearWeekDateRange(year, weekOfYear, isWeekend); + + return new Tuple( + TimexValue.DateValue(yearWeekDateRange.Item1), + TimexValue.DateValue(yearWeekDateRange.Item2)); + } - var start = TimexDateHelpers.DateOfLastDay(DayOfWeek.Monday, dateInWeek); - var end = TimexDateHelpers.DateOfLastDay(DayOfWeek.Monday, dateInWeek + TimeSpan.FromDays(7)); + private static Tuple MonthWeekDateRange(int year, int month, int weekOfMonth) + { + var monthWeekDateRange = TimexHelpers.MonthWeekDateRange(year, month, weekOfMonth); return new Tuple( - TimexValue.DateValue(new TimexProperty { Year = start.Year, Month = start.Month, DayOfMonth = start.Day }), - TimexValue.DateValue(new TimexProperty { Year = end.Year, Month = end.Month, DayOfMonth = end.Day })); + TimexValue.DateValue(monthWeekDateRange.Item1), + TimexValue.DateValue(monthWeekDateRange.Item2)); + } + + private static DateObject GenerateWeekDate(TimexProperty timex, DateObject date, bool isBefore) + { + DateObject start; + if (timex.WeekOfMonth == null && timex.WeekOfYear == null) + { + var day = timex.DayOfWeek == 7 ? DayOfWeek.Sunday : (DayOfWeek)timex.DayOfWeek; + if (isBefore) + { + start = TimexDateHelpers.DateOfLastDay(day, date); + } + else + { + start = TimexDateHelpers.DateOfNextDay(day, date); + } + } + else + { + int dayOfWeek = timex.DayOfWeek.Value - 1; + int year = timex.Year ?? date.Year; + if (timex.WeekOfYear != null) + { + int weekOfYear = timex.WeekOfYear.Value; + start = TimexHelpers.FirstDateOfWeek(year, weekOfYear, CultureInfo.InvariantCulture).AddDays(dayOfWeek); + if (timex.Year == null) + { + if (isBefore && start > date) + { + start = TimexHelpers.FirstDateOfWeek(year - 1, weekOfYear, CultureInfo.InvariantCulture).AddDays(dayOfWeek); + } + else if (!isBefore && start < date) + { + start = TimexHelpers.FirstDateOfWeek(year + 1, weekOfYear, CultureInfo.InvariantCulture).AddDays(dayOfWeek); + } + } + } + else + { + int month = timex.Month ?? date.Month; + int weekOfMonth = timex.WeekOfMonth.Value; + start = TimexHelpers.GenerateMonthWeekDateStart(year, month, weekOfMonth).AddDays(dayOfWeek); + if (timex.Year == null || timex.Month == null) + { + if (isBefore && start > date) + { + start = TimexHelpers.GenerateMonthWeekDateStart(timex.Month != null ? year - 1 : year, timex.Month == null ? month - 1 : month, weekOfMonth).AddDays(dayOfWeek); + } + else if (!isBefore && start < date) + { + start = TimexHelpers.GenerateMonthWeekDateStart(timex.Month != null ? year + 1 : year, timex.Month == null ? month + 1 : month, weekOfMonth).AddDays(dayOfWeek); + } + } + } + } + + return start; } private static List ResolveDateRange(TimexProperty timex, DateObject date) @@ -262,6 +373,24 @@ private static Tuple WeekDateRange(int year, int weekOfYear) } else { + if (timex.Month != null && timex.WeekOfMonth != null) + { + var yearDateRangeList = GetMonthWeekDateRange(timex.Year ?? Constants.InvalidValue, timex.Month.Value, timex.WeekOfMonth.Value, date.Year); + var result = new List { }; + foreach (Tuple yearDateRange in yearDateRangeList) + { + result.Add(new Resolution.Entry + { + Timex = timex.TimexValue, + Type = "daterange", + Start = yearDateRange.Item1, + End = yearDateRange.Item2, + }); + } + + return result; + } + if (timex.Year != null && timex.Month != null) { var dateRange = MonthDateRange(timex.Year.Value, timex.Month.Value); @@ -279,7 +408,7 @@ private static Tuple WeekDateRange(int year, int weekOfYear) if (timex.Year != null && timex.WeekOfYear != null) { - var dateRange = WeekDateRange(timex.Year.Value, timex.WeekOfYear.Value); + var dateRange = YearWeekDateRange(timex.Year.Value, timex.WeekOfYear.Value, timex.Weekend); return new List { @@ -351,7 +480,7 @@ private static Tuple PartOfDayTimeRange(TimexProperty timex) return new Tuple("not resolved", "not resolved"); } - private static List ResolveTimeRange(TimexProperty timex) + private static List ResolveTimeRange(TimexProperty timex, DateObject date) { if (timex.PartOfDay != null) { @@ -376,8 +505,8 @@ private static Tuple PartOfDayTimeRange(TimexProperty timex) { Timex = timex.TimexValue, Type = "timerange", - Start = TimexValue.TimeValue(range.Start), - End = TimexValue.TimeValue(range.End), + Start = TimexValue.TimeValue(range.Start, date), + End = TimexValue.TimeValue(range.End, date), }, }; } @@ -389,42 +518,87 @@ private static Tuple PartOfDayTimeRange(TimexProperty timex) foreach (var resolved in resolvedDates) { resolved.Type = "datetime"; - resolved.Value = $"{resolved.Value} {TimexValue.TimeValue(timex)}"; + resolved.Value = $"{resolved.Value} {TimexValue.TimeValue(timex, date)}"; } return resolvedDates; } - private static List ResolveDateTimeRange(TimexProperty timex) + private static List GetDateValues(TimexProperty timex, DateObject date) + { + List result = new List { }; + if (timex.Year != null && timex.Month != null && timex.DayOfMonth != null) + { + result.Add(TimexValue.DateValue(timex)); + } + else + { + result.Add(LastDateValue(timex, date)); + if (timex.Year == null) + { + result.Add(NextDateValue(timex, date)); + } + } + + return result; + } + + private static List> GetMonthWeekDateRange(int year, int month, int weekOfMonth, int referYear) + { + var result = new List> { }; + if (year == Constants.InvalidValue) + { + result.Add(MonthWeekDateRange(referYear - 1, month, weekOfMonth)); + result.Add(MonthWeekDateRange(referYear, month, weekOfMonth)); + } + else + { + result.Add(MonthWeekDateRange(year, month, weekOfMonth)); + } + + return result; + } + + private static List ResolveDateTimeRange(TimexProperty timex, DateObject date) { if (timex.PartOfDay != null) { - var date = TimexValue.DateValue(timex); + var dateValues = GetDateValues(timex, date); var timeRange = PartOfDayTimeRange(timex); - return new List + var result = new List { }; + foreach (string dateValue in dateValues) { - new Resolution.Entry - { - Timex = timex.TimexValue, - Type = "datetimerange", - Start = $"{date} {timeRange.Item1}", - End = $"{date} {timeRange.Item2}", - }, - }; + result.Add( + new Resolution.Entry + { + Timex = timex.TimexValue, + Type = "datetimerange", + Start = TimexHelpers.FormatResolvedDateValue(dateValue, timeRange.Item1), + End = TimexHelpers.FormatResolvedDateValue(dateValue, timeRange.Item2), + }); + } + + return result; } else { var range = TimexHelpers.ExpandDateTimeRange(timex); - return new List + var startDateValues = GetDateValues(range.Start, date); + var endDateValues = GetDateValues(range.End, date); + var result = new List { }; + foreach (var dateRange in startDateValues.Zip(endDateValues, (n, w) => new { start = n, end = w })) { - new Resolution.Entry - { - Timex = timex.TimexValue, - Type = "datetimerange", - Start = $"{TimexValue.DateValue(range.Start)} {TimexValue.TimeValue(range.Start)}", - End = $"{TimexValue.DateValue(range.End)} {TimexValue.TimeValue(range.End)}", - }, - }; + result.Add( + new Resolution.Entry + { + Timex = timex.TimexValue, + Type = "datetimerange", + Start = TimexHelpers.FormatResolvedDateValue(dateRange.start, TimexValue.TimeValue(range.Start, date)), + End = TimexHelpers.FormatResolvedDateValue(dateRange.end, TimexValue.TimeValue(range.End, date)), + }); + } + + return result; } } } diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexValue.cs b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexValue.cs index 2177a70810..ba7a6f16f6 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexValue.cs +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/TimexValue.cs @@ -1,6 +1,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +using System; +using System.Globalization; + namespace Microsoft.Recognizers.Text.DataTypes.TimexExpression { public static class TimexValue @@ -15,59 +18,69 @@ public static string DateValue(TimexProperty timexProperty) return string.Empty; } - public static string TimeValue(TimexProperty timexProperty) + public static string TimeValue(TimexProperty timexProperty, DateTime date) { if (timexProperty.Hour != null && timexProperty.Minute != null && timexProperty.Second != null) { - return $"{TimexDateHelpers.FixedFormatNumber(timexProperty.Hour, 2)}:{TimexDateHelpers.FixedFormatNumber(timexProperty.Minute, 2)}:{TimexDateHelpers.FixedFormatNumber(timexProperty.Second, 2)}"; + if (date.Kind == DateTimeKind.Utc) + { + var timeString = $"{TimexDateHelpers.FixedFormatNumber(timexProperty.Hour, 2)}:{TimexDateHelpers.FixedFormatNumber(timexProperty.Minute, 2)}:{TimexDateHelpers.FixedFormatNumber(timexProperty.Second, 2)}"; + var tempDateTime = DateTime.Parse(timeString, CultureInfo.InvariantCulture); + return tempDateTime.ToUniversalTime().ToString("HH:mm:ss", CultureInfo.InvariantCulture); + } + else + { + return $"{TimexDateHelpers.FixedFormatNumber(timexProperty.Hour, 2)}:{TimexDateHelpers.FixedFormatNumber(timexProperty.Minute, 2)}:{TimexDateHelpers.FixedFormatNumber(timexProperty.Second, 2)}"; + } } return string.Empty; } - public static string DatetimeValue(TimexProperty timexProperty) + public static string DatetimeValue(TimexProperty timexProperty, DateTime date) { - return $"{DateValue(timexProperty)} {TimeValue(timexProperty)}"; + return $"{DateValue(timexProperty)} {TimeValue(timexProperty, date)}"; } public static string DurationValue(TimexProperty timexProperty) { + decimal duration = 0; if (timexProperty.Years != null) { - return (31536000 * timexProperty.Years).ToString(); + duration += 31536000 * timexProperty.Years ?? 0; } if (timexProperty.Months != null) { - return (2592000 * timexProperty.Months).ToString(); + duration += 2592000 * timexProperty.Months ?? 0; } if (timexProperty.Weeks != null) { - return (604800 * timexProperty.Weeks).ToString(); + duration += 604800 * timexProperty.Weeks ?? 0; } if (timexProperty.Days != null) { - return (86400 * timexProperty.Days).ToString(); + duration += 86400 * timexProperty.Days ?? 0; } if (timexProperty.Hours != null) { - return (3600 * timexProperty.Hours).ToString(); + duration += 3600 * timexProperty.Hours ?? 0; } if (timexProperty.Minutes != null) { - return (60 * timexProperty.Minutes).ToString(); + duration += 60 * timexProperty.Minutes ?? 0; } if (timexProperty.Seconds != null) { - return timexProperty.Seconds.ToString(); + duration += timexProperty.Seconds ?? 0; } - return string.Empty; + return duration.ToString(CultureInfo.InvariantCulture); } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDateExtractorConfiguration.cs new file mode 100644 index 0000000000..c8c12b21c7 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDateExtractorConfiguration.cs @@ -0,0 +1,299 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Text.DateTime.Arabic.Utilities; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Arabic; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicDateExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateExtractorConfiguration + { + + public static readonly Regex MonthRegex = + new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthNumRegex = + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayRegex = + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SingleWeekDayRegex = + new Regex(DateTimeDefinitions.SingleWeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OnRegex = + new Regex(DateTimeDefinitions.OnRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelaxedOnRegex = + new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ThisRegex = + new Regex(DateTimeDefinitions.ThisRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LastDateRegex = + new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NextDateRegex = + new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DateUnitRegex = + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialDayRegex = + new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayOfMonthRegex = + new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeWeekDayRegex = + new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialDate = + new Regex(DateTimeDefinitions.SpecialDate, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialDayWithNumRegex = + new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ForTheRegex = + new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayAndDayOfMothRegex = + new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayAndDayRegex = + new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeMonthRegex = + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex StrictRelativeRegex = + new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PrefixArticleRegex = + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OfMonth = + new Regex(DateTimeDefinitions.OfMonth, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthEnd = + new Regex(DateTimeDefinitions.MonthEnd, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayEnd = + new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayStart = + new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags, RegexTimeOut); + + public static readonly Regex YearSuffix = + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); + + public static readonly Regex LessThanRegex = + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MoreThanRegex = + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex InConnectorRegex = + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SinceYearSuffixRegex = + new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangeUnitRegex = + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangeConnectorSymbolRegex = + new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BeforeAfterRegex = + new Regex(DateTimeDefinitions.BeforeAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly ImmutableDictionary DayOfWeek = + DateTimeDefinitions.DayOfWeek.ToImmutableDictionary(); + + public static readonly ImmutableDictionary MonthOfYear = + DateTimeDefinitions.MonthOfYear.ToImmutableDictionary(); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + private static readonly Regex DayRegex = + new Regex(DateTimeDefinitions.ImplicitDayRegex, RegexFlags, RegexTimeOut); + + public ArabicDateExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Arabic.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Arabic.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new ArabicNumberParserConfiguration(numConfig)); + DurationExtractor = new BaseDurationExtractor(new ArabicDurationExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new ArabicHolidayExtractorConfiguration(this)); + UtilityConfiguration = new ArabicDatetimeUtilityConfiguration(); + + ImplicitDateList = new List + { + // extract "12" from "on 12" + OnRegex, + + // extract "12th" from "on/at/in 12th" + RelaxedOnRegex, + + // "the day before yesterday", "previous day", "today", "yesterday", "tomorrow" + SpecialDayRegex, + + // "this Monday", "Tuesday of this week" + ThisRegex, + + // "last/previous Monday", "Monday of last week" + LastDateRegex, + + // "next/following Monday", "Monday of next week" + NextDateRegex, + + // "Sunday", "Weds" + SingleWeekDayRegex, + + // "2nd Monday of April" + WeekDayOfMonthRegex, + + // "on the 12th" + SpecialDate, + + // "two days from today", "five days from tomorrow" + SpecialDayWithNumRegex, + + // "three Monday from now" + RelativeWeekDayRegex, + }; + + if ((Options & DateTimeOptions.CalendarMode) != 0) + { + ImplicitDateList = ImplicitDateList.Concat(new[] { DayRegex }); + } + + // 3-23-2017 + var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags, RegexTimeOut); + + // 23-3-2015 + var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags, RegexTimeOut); + + // on (Sunday,)? 1.3 + var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags, RegexTimeOut); + + // on (Sunday,)? 24-12 + var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags, RegexTimeOut); + + // "(Sunday,)? 7/23, 2018", year part is required + var dateRegex7L = new Regex(DateTimeDefinitions.DateExtractor7L, RegexFlags, RegexTimeOut); + + // "(Sunday,)? 7/23", year part is not required + var dateRegex7S = new Regex(DateTimeDefinitions.DateExtractor7S, RegexFlags, RegexTimeOut); + + // "(Sunday,)? 23/7, 2018", year part is required + var dateRegex9L = new Regex(DateTimeDefinitions.DateExtractor9L, RegexFlags, RegexTimeOut); + + // "(Sunday,)? 23/7", year part is not required + var dateRegex9S = new Regex(DateTimeDefinitions.DateExtractor9S, RegexFlags, RegexTimeOut); + + // (Sunday,)? 2015-12-23 + var dateRegexA = new Regex(DateTimeDefinitions.DateExtractorA, RegexFlags, RegexTimeOut); + + DateRegexList = new List + { + // (Sunday,)? April 5 or (Sunday,)? April 5, 2016 + new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags, RegexTimeOut), + + // (Sunday,)? 6th of April + new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags, RegexTimeOut), + }; + + var enableDmy = DmyDateFormat || + DateTimeDefinitions.DefaultLanguageFallback == Constants.DefaultLanguageFallback_DMY; + + DateRegexList = DateRegexList.Concat(enableDmy ? + new[] { dateRegex5, dateRegex8, dateRegex9L, dateRegex9S, dateRegex4, dateRegex6, dateRegex7L, dateRegex7S, dateRegexA } : + new[] { dateRegex4, dateRegex6, dateRegex7L, dateRegex7S, dateRegex5, dateRegex8, dateRegex9L, dateRegex9S, dateRegexA }); + } + + public IEnumerable DateRegexList { get; } + + public IExtractor IntegerExtractor { get; } + + public IExtractor OrdinalExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IEnumerable ImplicitDateList { get; } + + IImmutableDictionary IDateExtractorConfiguration.DayOfWeek => DayOfWeek; + + IImmutableDictionary IDateExtractorConfiguration.MonthOfYear => MonthOfYear; + + bool IDateExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex IDateExtractorConfiguration.OfMonth => OfMonth; + + Regex IDateExtractorConfiguration.MonthEnd => MonthEnd; + + Regex IDateExtractorConfiguration.WeekDayEnd => WeekDayEnd; + + Regex IDateExtractorConfiguration.WeekDayStart => WeekDayStart; + + Regex IDateExtractorConfiguration.DateUnitRegex => DateUnitRegex; + + Regex IDateExtractorConfiguration.ForTheRegex => ForTheRegex; + + Regex IDateExtractorConfiguration.WeekDayAndDayOfMonthRegex => WeekDayAndDayOfMothRegex; + + Regex IDateExtractorConfiguration.WeekDayAndDayRegex => WeekDayAndDayRegex; + + Regex IDateExtractorConfiguration.RelativeMonthRegex => RelativeMonthRegex; + + Regex IDateExtractorConfiguration.StrictRelativeRegex => StrictRelativeRegex; + + Regex IDateExtractorConfiguration.WeekDayRegex => WeekDayRegex; + + Regex IDateExtractorConfiguration.PrefixArticleRegex => PrefixArticleRegex; + + Regex IDateExtractorConfiguration.YearSuffix => YearSuffix; + + Regex IDateExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDateExtractorConfiguration.MoreThanRegex => MoreThanRegex; + + Regex IDateExtractorConfiguration.InConnectorRegex => InConnectorRegex; + + Regex IDateExtractorConfiguration.SinceYearSuffixRegex => SinceYearSuffixRegex; + + Regex IDateExtractorConfiguration.RangeUnitRegex => RangeUnitRegex; + + Regex IDateExtractorConfiguration.RangeConnectorSymbolRegex => RangeConnectorSymbolRegex; + + Regex IDateExtractorConfiguration.BeforeAfterRegex => BeforeAfterRegex; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDatePeriodExtractorConfiguration.cs new file mode 100644 index 0000000000..bd03124059 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDatePeriodExtractorConfiguration.cs @@ -0,0 +1,376 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Arabic; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDatePeriodExtractorConfiguration + { + // Base regexes + public static readonly Regex TillRegex = + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DayRegex = + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthNumRegex = + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex IllegalYearRegex = + new Regex(BaseDateTime.IllegalYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayRegex = + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeMonthRegex = + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WrittenMonthRegex = + new Regex(DateTimeDefinitions.WrittenMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthSuffixRegex = + new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DateUnitRegex = + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeUnitRegex = + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PreviousPrefixRegex = + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NextPrefixRegex = + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FutureSuffixRegex = + new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NowRegex = + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); + + // composite regexes + public static readonly Regex SimpleCasesRegex = + new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthFrontSimpleCasesRegex = + new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthFrontBetweenRegex = + new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BetweenRegex = + new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthWithYear = + new Regex(DateTimeDefinitions.MonthWithYear, RegexFlags, RegexTimeOut); + + public static readonly Regex OneWordPeriodRegex = + new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthNumWithYear = + new Regex(DateTimeDefinitions.MonthNumWithYear, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekOfMonthRegex = + new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekOfYearRegex = + new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FollowedDateUnit = + new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex NumberCombinedWithDateUnit = + new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex QuarterRegex = + new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex QuarterRegexYearFront = + new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags, RegexTimeOut); + + public static readonly Regex AllHalfYearRegex = + new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SeasonRegex = + new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WhichWeekRegex = + new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekOfRegex = + new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthOfRegex = + new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangeUnitRegex = + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex InConnectorRegex = + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WithinNextPrefixRegex = + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RestOfDateRegex = + new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LaterEarlyPeriodRegex = + new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekWithWeekDayRangeRegex = + new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearPlusNumberRegex = + new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DecadeWithCenturyRegex = + new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearPeriodRegex = + new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ComplexDatePeriodRegex = + new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeDecadeRegex = + new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ReferenceDatePeriodRegex = + new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AgoRegex = + new Regex(DateTimeDefinitions.AgoRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LaterRegex = + new Regex(DateTimeDefinitions.LaterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LessThanRegex = + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MoreThanRegex = + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex CenturySuffixRegex = + new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FirstLastRegex = + new Regex(DateTimeDefinitions.FirstLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OfYearRegex = + new Regex(DateTimeDefinitions.OfYearRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + private static readonly Regex FromTokenRegex = + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex BetweenTokenRegex = + new Regex(DateTimeDefinitions.BetweenTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex[] SimpleCasesRegexes = + { + // "3-5 Jan, 2018", + SimpleCasesRegex, + + // "between 3 and 5 Jan, 2018" + BetweenRegex, + + // "next april", "year to date", "previous year" + OneWordPeriodRegex, + + // "January, 2018", "this year Feb" + MonthWithYear, + + // "2018-3", "2018.3", "5-2015", only FourDigitYear is allow in this Regex + MonthNumWithYear, + + // "2018", "two thousand and ten" + YearRegex, + + // "4th week of Feb" + WeekOfMonthRegex, + + // "3rd week of 2018", "4th week last year" + WeekOfYearRegex, + + // "Jan between 8-10" + MonthFrontBetweenRegex, + + // "from Jan 5th-10th", "Feb from 5-10" + MonthFrontSimpleCasesRegex, + + // "Q1 2018", "2nd quarter" + QuarterRegex, + + // "2016 Q1", "last year the 4th quarter" + QuarterRegexYearFront, + + // "2015 the H1", "H2 of 2016", "1st half 2018", "2nd half this year" + AllHalfYearRegex, + + // "last summer", "fall of 2018", "early this summer" + SeasonRegex, + + // "week 25", "week 06" + WhichWeekRegex, + + // "rest of this week", "rest of current year" + RestOfDateRegex, + + // "early this year", "late next April" + LaterEarlyPeriodRegex, + + // "this week between Mon and Wed", "next week from Tuesday to Wednesday" + WeekWithWeekDayRangeRegex, + + // "year 834", "two thousand and nine" + YearPlusNumberRegex, + + // "21st century 30's" + DecadeWithCenturyRegex, + + // "next five decades", "previous 2 decades" + RelativeDecadeRegex, + + // "this week", "same year" + ReferenceDatePeriodRegex, + }; + + public ArabicDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DatePointExtractor = new BaseDateExtractor(new ArabicDateExtractorConfiguration(this)); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Arabic.CardinalExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Arabic.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new ArabicNumberParserConfiguration(numConfig)); + + DurationExtractor = new BaseDurationExtractor(new ArabicDurationExtractorConfiguration(this)); + } + + public IDateExtractor DatePointExtractor { get; } + + public IExtractor CardinalExtractor { get; } + + public IExtractor OrdinalExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IParser NumberParser { get; } + + IEnumerable IDatePeriodExtractorConfiguration.SimpleCasesRegexes => SimpleCasesRegexes; + + Regex IDatePeriodExtractorConfiguration.IllegalYearRegex => IllegalYearRegex; + + Regex IDatePeriodExtractorConfiguration.YearRegex => YearRegex; + + Regex IDatePeriodExtractorConfiguration.TillRegex => TillRegex; + + Regex IDatePeriodExtractorConfiguration.FollowedDateUnit => FollowedDateUnit; + + Regex IDatePeriodExtractorConfiguration.DateUnitRegex => DateUnitRegex; + + Regex IDatePeriodExtractorConfiguration.TimeUnitRegex => TimeUnitRegex; + + Regex IDatePeriodExtractorConfiguration.NumberCombinedWithDateUnit => NumberCombinedWithDateUnit; + + Regex IDatePeriodExtractorConfiguration.PreviousPrefixRegex => PreviousPrefixRegex; + + Regex IDatePeriodExtractorConfiguration.FutureRegex => NextPrefixRegex; + + Regex IDatePeriodExtractorConfiguration.FutureSuffixRegex => FutureSuffixRegex; + + Regex IDatePeriodExtractorConfiguration.WeekOfRegex => WeekOfRegex; + + Regex IDatePeriodExtractorConfiguration.MonthOfRegex => MonthOfRegex; + + Regex IDatePeriodExtractorConfiguration.RangeUnitRegex => RangeUnitRegex; + + Regex IDatePeriodExtractorConfiguration.InConnectorRegex => InConnectorRegex; + + Regex IDatePeriodExtractorConfiguration.WithinNextPrefixRegex => WithinNextPrefixRegex; + + Regex IDatePeriodExtractorConfiguration.YearPeriodRegex => YearPeriodRegex; + + Regex IDatePeriodExtractorConfiguration.ComplexDatePeriodRegex => ComplexDatePeriodRegex; + + Regex IDatePeriodExtractorConfiguration.RelativeDecadeRegex => RelativeDecadeRegex; + + Regex IDatePeriodExtractorConfiguration.ReferenceDatePeriodRegex => ReferenceDatePeriodRegex; + + Regex IDatePeriodExtractorConfiguration.AgoRegex => AgoRegex; + + Regex IDatePeriodExtractorConfiguration.LaterRegex => LaterRegex; + + Regex IDatePeriodExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDatePeriodExtractorConfiguration.MoreThanRegex => MoreThanRegex; + + Regex IDatePeriodExtractorConfiguration.CenturySuffixRegex => CenturySuffixRegex; + + Regex IDatePeriodExtractorConfiguration.MonthNumRegex => MonthNumRegex; + + Regex IDatePeriodExtractorConfiguration.NowRegex => NowRegex; + + Regex IDatePeriodExtractorConfiguration.FirstLastRegex => FirstLastRegex; + + Regex IDatePeriodExtractorConfiguration.OfYearRegex => OfYearRegex; + + bool IDatePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + string[] IDatePeriodExtractorConfiguration.DurationDateRestrictions => DateTimeDefinitions.DurationDateRestrictions; + + public bool GetFromTokenIndex(string text, out int index) + { + index = -1; + var fromMatch = FromTokenRegex.Match(text); + if (fromMatch.Success) + { + index = fromMatch.Index; + } + + return fromMatch.Success; + } + + public bool GetBetweenTokenIndex(string text, out int index) + { + index = -1; + var betweenMatch = BetweenTokenRegex.Match(text); + if (betweenMatch.Success) + { + index = betweenMatch.Index; + } + + return betweenMatch.Success; + } + + public bool HasConnectorToken(string text) + { + return RangeConnectorRegex.IsExactMatch(text, trim: true); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDateTimeAltExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDateTimeAltExtractorConfiguration.cs new file mode 100644 index 0000000000..2e6312342c --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDateTimeAltExtractorConfiguration.cs @@ -0,0 +1,71 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeAltExtractorConfiguration + { + public static readonly Regex ThisPrefixRegex = + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PreviousPrefixRegex = + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NextPrefixRegex = + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmRegex = + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PmRegex = + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangePrefixRegex = + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex[] RelativePrefixList = + { + ThisPrefixRegex, PreviousPrefixRegex, NextPrefixRegex, + }; + + public static readonly Regex[] AmPmRegexList = + { + AmRegex, PmRegex, + }; + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + private static readonly Regex OrRegex = + new Regex(DateTimeDefinitions.OrRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex DayRegex = + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); + + public ArabicDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DateExtractor = new BaseDateExtractor(new ArabicDateExtractorConfiguration(this)); + DatePeriodExtractor = new BaseDatePeriodExtractor(new ArabicDatePeriodExtractorConfiguration(this)); + } + + IEnumerable IDateTimeAltExtractorConfiguration.RelativePrefixList => RelativePrefixList; + + IEnumerable IDateTimeAltExtractorConfiguration.AmPmRegexList => AmPmRegexList; + + Regex IDateTimeAltExtractorConfiguration.OrRegex => OrRegex; + + Regex IDateTimeAltExtractorConfiguration.ThisPrefixRegex => ThisPrefixRegex; + + Regex IDateTimeAltExtractorConfiguration.DayRegex => DayRegex; + + Regex IDateTimeAltExtractorConfiguration.RangePrefixRegex => RangePrefixRegex; + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor DatePeriodExtractor { get; } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDateTimeExtractorConfiguration.cs new file mode 100644 index 0000000000..2edcfb31cc --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDateTimeExtractorConfiguration.cs @@ -0,0 +1,139 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Text.DateTime.Arabic.Utilities; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicDateTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeExtractorConfiguration + { + public static readonly Regex PrepositionRegex = + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NowRegex = + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SuffixRegex = + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeOfDayRegex = + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificTimeOfDayRegex = + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeOfTodayAfterRegex = + new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeOfTodayBeforeRegex = + new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SimpleTimeOfTodayAfterRegex = + new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SimpleTimeOfTodayBeforeRegex = + new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificEndOfRegex = + new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex UnspecificEndOfRegex = + new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex UnitRegex = + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ConnectorRegex = + new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NumberAsTimeRegex = + new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DateNumberConnectorRegex = + new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearSuffix = + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); + + public static readonly Regex SuffixAfterRegex = + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + public ArabicDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Arabic.IntegerExtractor.GetInstance(numConfig); + + DatePointExtractor = new BaseDateExtractor(new ArabicDateExtractorConfiguration(this)); + TimePointExtractor = new BaseTimeExtractor(new ArabicTimeExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new ArabicDurationExtractorConfiguration(this)); + UtilityConfiguration = new ArabicDatetimeUtilityConfiguration(); + HolidayExtractor = new BaseHolidayExtractor(new ArabicHolidayExtractorConfiguration(this)); + } + + public IExtractor IntegerExtractor { get; } + + public IDateExtractor DatePointExtractor { get; } + + public IDateTimeExtractor TimePointExtractor { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + Regex IDateTimeExtractorConfiguration.NowRegex => NowRegex; + + Regex IDateTimeExtractorConfiguration.SuffixRegex => SuffixRegex; + + Regex IDateTimeExtractorConfiguration.TimeOfTodayAfterRegex => TimeOfTodayAfterRegex; + + Regex IDateTimeExtractorConfiguration.SimpleTimeOfTodayAfterRegex => SimpleTimeOfTodayAfterRegex; + + Regex IDateTimeExtractorConfiguration.TimeOfTodayBeforeRegex => TimeOfTodayBeforeRegex; + + Regex IDateTimeExtractorConfiguration.SimpleTimeOfTodayBeforeRegex => SimpleTimeOfTodayBeforeRegex; + + Regex IDateTimeExtractorConfiguration.TimeOfDayRegex => TimeOfDayRegex; + + Regex IDateTimeExtractorConfiguration.SpecificEndOfRegex => SpecificEndOfRegex; + + Regex IDateTimeExtractorConfiguration.UnspecificEndOfRegex => UnspecificEndOfRegex; + + Regex IDateTimeExtractorConfiguration.UnitRegex => UnitRegex; + + Regex IDateTimeExtractorConfiguration.NumberAsTimeRegex => NumberAsTimeRegex; + + Regex IDateTimeExtractorConfiguration.DateNumberConnectorRegex => DateNumberConnectorRegex; + + Regex IDateTimeExtractorConfiguration.YearRegex => YearRegex; + + Regex IDateTimeExtractorConfiguration.YearSuffix => YearSuffix; + + Regex IDateTimeExtractorConfiguration.SuffixAfterRegex => SuffixAfterRegex; + + public IDateTimeExtractor DurationExtractor { get; } + + public bool IsConnector(string text) + { + text = text.Trim(); + return string.IsNullOrEmpty(text) || PrepositionRegex.IsMatch(text) || ConnectorRegex.IsMatch(text); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDateTimePeriodExtractorConfiguration.cs new file mode 100644 index 0000000000..9824056729 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDateTimePeriodExtractorConfiguration.cs @@ -0,0 +1,219 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, + IDateTimePeriodExtractorConfiguration + { + public static readonly Regex TimeNumberCombinedWithUnit = + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex HyphenDateRegex = + new Regex(BaseDateTime.HyphenDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodTimeOfDayWithDateRegex = + new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeTimeUnitRegex = + new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RestOfDateTimeRegex = + new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmDescRegex = + new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PmDescRegex = + new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WithinNextPrefixRegex = + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DateUnitRegex = + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PrefixDayRegex = + new Regex(DateTimeDefinitions.PrefixDayRegex, RegexFlags | RegexOptions.RightToLeft); + + public static readonly Regex SuffixRegex = + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BeforeRegex = + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfterRegex = + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDaysRegex = + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodSpecificTimeOfDayRegex = + new Regex(DateTimeDefinitions.PeriodSpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + private static readonly Regex[] SimpleCases = + { + ArabicTimePeriodExtractorConfiguration.PureNumFromTo, + ArabicTimePeriodExtractorConfiguration.PureNumBetweenAnd, + }; + + private static readonly Regex PeriodTimeOfDayRegex = + new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex TimeUnitRegex = + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex TimeFollowedUnit = + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); + + private static readonly Regex GeneralEndingRegex = + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex MiddlePauseRegex = + new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); + + public ArabicDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Arabic.CardinalExtractor.GetInstance(numConfig); + + SingleDateExtractor = new BaseDateExtractor(new ArabicDateExtractorConfiguration(this)); + SingleTimeExtractor = new BaseTimeExtractor(new ArabicTimeExtractorConfiguration(this)); + SingleDateTimeExtractor = new BaseDateTimeExtractor(new ArabicDateTimeExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new ArabicDurationExtractorConfiguration(this)); + TimePeriodExtractor = new BaseTimePeriodExtractor(new ArabicTimePeriodExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new ArabicHolidayExtractorConfiguration(this)); + } + + public IEnumerable SimpleCasesRegex => SimpleCases; + + public Regex PrepositionRegex => ArabicTimePeriodExtractorConfiguration.PrepositionRegex; + + public Regex TillRegex => ArabicTimePeriodExtractorConfiguration.TillRegex; + + public Regex TimeOfDayRegex => PeriodTimeOfDayRegex; + + public Regex SpecificTimeOfDayRegex => PeriodSpecificTimeOfDayRegex; + + public Regex PreviousPrefixRegex => ArabicDatePeriodExtractorConfiguration.PreviousPrefixRegex; + + public Regex NextPrefixRegex => ArabicDatePeriodExtractorConfiguration.NextPrefixRegex; + + public Regex FutureSuffixRegex => ArabicDatePeriodExtractorConfiguration.FutureSuffixRegex; + + public Regex WeekDayRegex => WeekDaysRegex; + + public Regex FollowedUnit => TimeFollowedUnit; + + bool IDateTimePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex IDateTimePeriodExtractorConfiguration.PrefixDayRegex => PrefixDayRegex; + + Regex IDateTimePeriodExtractorConfiguration.DateUnitRegex => DateUnitRegex; + + Regex IDateTimePeriodExtractorConfiguration.NumberCombinedWithUnit => TimeNumberCombinedWithUnit; + + Regex IDateTimePeriodExtractorConfiguration.TimeUnitRegex => TimeUnitRegex; + + Regex IDateTimePeriodExtractorConfiguration.RelativeTimeUnitRegex => RelativeTimeUnitRegex; + + Regex IDateTimePeriodExtractorConfiguration.RestOfDateTimeRegex => RestOfDateTimeRegex; + + Regex IDateTimePeriodExtractorConfiguration.GeneralEndingRegex => GeneralEndingRegex; + + Regex IDateTimePeriodExtractorConfiguration.MiddlePauseRegex => MiddlePauseRegex; + + Regex IDateTimePeriodExtractorConfiguration.PeriodTimeOfDayWithDateRegex => PeriodTimeOfDayWithDateRegex; + + Regex IDateTimePeriodExtractorConfiguration.AmDescRegex => AmDescRegex; + + Regex IDateTimePeriodExtractorConfiguration.PmDescRegex => PmDescRegex; + + Regex IDateTimePeriodExtractorConfiguration.WithinNextPrefixRegex => WithinNextPrefixRegex; + + Regex IDateTimePeriodExtractorConfiguration.SuffixRegex => SuffixRegex; + + Regex IDateTimePeriodExtractorConfiguration.BeforeRegex => BeforeRegex; + + Regex IDateTimePeriodExtractorConfiguration.AfterRegex => AfterRegex; + + Regex IDateTimePeriodExtractorConfiguration.TasksmodeMealTimeofDayRegex => null; + + public string TokenBeforeDate { get; } + + public IExtractor CardinalExtractor { get; } + + public IDateTimeExtractor SingleDateExtractor { get; } + + public IDateTimeExtractor SingleTimeExtractor { get; } + + public IDateTimeExtractor SingleDateTimeExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IDateTimeExtractor TimeZoneExtractor { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + // TODO: these three methods are the same in DatePeriod, should be abstracted + public bool GetFromTokenIndex(string text, out int index) + { + index = -1; + + // @TODO move hardcoded values to resources file + + if (text.EndsWith("from", StringComparison.Ordinal)) + { + index = text.LastIndexOf("from", StringComparison.Ordinal); + return true; + } + + return false; + } + + public bool GetBetweenTokenIndex(string text, out int index) + { + index = -1; + + // @TODO move hardcoded values to resources file + + if (text.EndsWith("between", StringComparison.Ordinal)) + { + index = text.LastIndexOf("between", StringComparison.Ordinal); + return true; + } + + return false; + } + + public bool HasConnectorToken(string text) + { + return RangeConnectorRegex.IsExactMatch(text, trim: true); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDurationExtractorConfiguration.cs new file mode 100644 index 0000000000..baa01e5797 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicDurationExtractorConfiguration.cs @@ -0,0 +1,134 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Text.Number; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicDurationExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDurationExtractorConfiguration + { + public static readonly Regex DurationUnitRegex = + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SuffixAndRegex = + new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DurationFollowedUnit = + new Regex(DateTimeDefinitions.DurationFollowedUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex NumberCombinedWithDurationUnit = + new Regex(DateTimeDefinitions.NumberCombinedWithDurationUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex AnUnitRegex = + new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DuringRegex = + new Regex(DateTimeDefinitions.DuringRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AllRegex = + new Regex(DateTimeDefinitions.AllRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex HalfRegex = + new Regex(DateTimeDefinitions.HalfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ConjunctionRegex = + new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex InexactNumberRegex = + new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex InexactNumberUnitRegex = + new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeDurationUnitRegex = + new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DurationConnectorRegex = + new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModPrefixRegex = + new Regex(DateTimeDefinitions.ModPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModSuffixRegex = + new Regex(DateTimeDefinitions.ModSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialNumberUnitRegex = null; + + public static readonly Regex MoreThanRegex = + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags | RegexOptions.RightToLeft); + + public static readonly Regex LessThanRegex = + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags | RegexOptions.RightToLeft); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + public ArabicDurationExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Arabic.CardinalExtractor.GetInstance(numConfig); + + UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); + UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); + } + + public IExtractor CardinalExtractor { get; } + + public IImmutableDictionary UnitMap { get; } + + public IImmutableDictionary UnitValueMap { get; } + + bool IDurationExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex IDurationExtractorConfiguration.FollowedUnit => DurationFollowedUnit; + + Regex IDurationExtractorConfiguration.NumberCombinedWithUnit => NumberCombinedWithDurationUnit; + + Regex IDurationExtractorConfiguration.AnUnitRegex => AnUnitRegex; + + Regex IDurationExtractorConfiguration.DuringRegex => DuringRegex; + + Regex IDurationExtractorConfiguration.AllRegex => AllRegex; + + Regex IDurationExtractorConfiguration.HalfRegex => HalfRegex; + + Regex IDurationExtractorConfiguration.SuffixAndRegex => SuffixAndRegex; + + Regex IDurationExtractorConfiguration.ConjunctionRegex => ConjunctionRegex; + + Regex IDurationExtractorConfiguration.InexactNumberRegex => InexactNumberRegex; + + Regex IDurationExtractorConfiguration.InexactNumberUnitRegex => InexactNumberUnitRegex; + + Regex IDurationExtractorConfiguration.RelativeDurationUnitRegex => RelativeDurationUnitRegex; + + Regex IDurationExtractorConfiguration.DurationUnitRegex => DurationUnitRegex; + + Regex IDurationExtractorConfiguration.DurationConnectorRegex => DurationConnectorRegex; + + Regex IDurationExtractorConfiguration.SpecialNumberUnitRegex => SpecialNumberUnitRegex; + + Regex IDurationExtractorConfiguration.MoreThanRegex => MoreThanRegex; + + Regex IDurationExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex; + + Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex; + + public Dictionary AmbiguityFiltersDict => null; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicHolidayExtractorConfiguration.cs new file mode 100644 index 0000000000..3be1653b07 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicHolidayExtractorConfiguration.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicHolidayExtractorConfiguration : BaseDateTimeOptionsConfiguration, IHolidayExtractorConfiguration + { + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex H = + new Regex(DateTimeDefinitions.HolidayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex[] HolidayRegexList = + { + H, + }; + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + public ArabicHolidayExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + } + + public IEnumerable HolidayRegexes => HolidayRegexList; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicMergedExtractorConfiguration.cs new file mode 100644 index 0000000000..dd6342e908 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicMergedExtractorConfiguration.cs @@ -0,0 +1,180 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Definitions.Utilities; +using Microsoft.Recognizers.Text.Matcher; +using Microsoft.Recognizers.Text.Number; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicMergedExtractorConfiguration : BaseDateTimeOptionsConfiguration, IMergedExtractorConfiguration + { + public static readonly Regex BeforeRegex = + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfterRegex = + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AroundRegex = + new Regex(DateTimeDefinitions.AroundRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EqualRegex = + new Regex(BaseDateTime.EqualRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FromToRegex = + new Regex(DateTimeDefinitions.FromToRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SingleAmbiguousMonthRegex = + new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PrepositionSuffixRegex = + new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmbiguousRangeModifierPrefix = + new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags, RegexTimeOut); + + public static readonly Regex NumberEndingPattern = + new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags, RegexTimeOut); + + public static readonly Regex SuffixAfterRegex = + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex UnspecificDatePeriodRegex = + new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex UnspecificTimePeriodRegex = + new Regex(DateTimeDefinitions.UnspecificTimePeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FailFastRegex = + new Regex(DateTimeDefinitions.FailFastRegex, RegexFlags | RegexOptions.Compiled); + + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex[] TermFilterRegexes = + { + // one on one + new Regex(DateTimeDefinitions.OneOnOneRegex, RegexFlags, RegexTimeOut), + + // (the)? (day|week|month|year) + new Regex(DateTimeDefinitions.SingleAmbiguousTermsRegex, RegexFlags, RegexTimeOut), + }; + + public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + public ArabicMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DateExtractor = new BaseDateExtractor(new ArabicDateExtractorConfiguration(this)); + TimeExtractor = new BaseTimeExtractor(new ArabicTimeExtractorConfiguration(this)); + DateTimeExtractor = new BaseDateTimeExtractor(new ArabicDateTimeExtractorConfiguration(this)); + DatePeriodExtractor = new BaseDatePeriodExtractor(new ArabicDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseTimePeriodExtractor(new ArabicTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new ArabicDateTimePeriodExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new ArabicDurationExtractorConfiguration(this)); + SetExtractor = new BaseSetExtractor(new ArabicSetExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new ArabicHolidayExtractorConfiguration(this)); + DateTimeAltExtractor = new BaseDateTimeAltExtractor(new ArabicDateTimeAltExtractorConfiguration(this)); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + if ((config.Options & DateTimeOptions.ExperimentalMode) != 0) + { + SinceRegex = SinceRegexExp; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Arabic.IntegerExtractor.GetInstance(numConfig); + + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); + + if ((Options & DateTimeOptions.EnablePreview) != 0) + { + SuperfluousWordMatcher.Init(DateTimeDefinitions.SuperfluousWordList); + } + } + + // Used in Standard mode + public static Regex SinceRegex { get; set; } = new Regex(DateTimeDefinitions.SinceRegex, RegexFlags, RegexTimeOut); + + // used in Experimental mode + public static Regex SinceRegexExp { get; } = new Regex(DateTimeDefinitions.SinceRegexExp, RegexFlags, RegexTimeOut); + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeExtractor DatePeriodExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IDateTimeExtractor DateTimePeriodExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor SetExtractor { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeZoneExtractor TimeZoneExtractor { get; } + + public IDateTimeListExtractor DateTimeAltExtractor { get; } + + public IExtractor IntegerExtractor { get; } + + public Dictionary AmbiguityFiltersDict { get; } + + Regex IMergedExtractorConfiguration.AfterRegex => AfterRegex; + + Regex IMergedExtractorConfiguration.BeforeRegex => BeforeRegex; + + Regex IMergedExtractorConfiguration.SinceRegex => SinceRegex; + + Regex IMergedExtractorConfiguration.AroundRegex => AroundRegex; + + Regex IMergedExtractorConfiguration.EqualRegex => EqualRegex; + + Regex IMergedExtractorConfiguration.FromToRegex => FromToRegex; + + Regex IMergedExtractorConfiguration.SingleAmbiguousMonthRegex => SingleAmbiguousMonthRegex; + + Regex IMergedExtractorConfiguration.PrepositionSuffixRegex => PrepositionSuffixRegex; + + Regex IMergedExtractorConfiguration.AmbiguousRangeModifierPrefix => AmbiguousRangeModifierPrefix; + + Regex IMergedExtractorConfiguration.PotentialAmbiguousRangeRegex => FromToRegex; + + Regex IMergedExtractorConfiguration.NumberEndingPattern => NumberEndingPattern; + + Regex IMergedExtractorConfiguration.SuffixAfterRegex => SuffixAfterRegex; + + Regex IMergedExtractorConfiguration.UnspecificDatePeriodRegex => UnspecificDatePeriodRegex; + + Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => UnspecificTimePeriodRegex; + + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + + Regex IMergedExtractorConfiguration.FailFastRegex => FailFastRegex; + + IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; + + StringMatcher IMergedExtractorConfiguration.SuperfluousWordMatcher => SuperfluousWordMatcher; + + bool IMergedExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public Regex TasksModeMentionFilters { get; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicSetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicSetExtractorConfiguration.cs new file mode 100644 index 0000000000..1cad382c94 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicSetExtractorConfiguration.cs @@ -0,0 +1,86 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicSetExtractorConfiguration : BaseDateTimeOptionsConfiguration, ISetExtractorConfiguration + { + public static readonly Regex SetUnitRegex = + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodicRegex = + new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachUnitRegex = + new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachPrefixRegex = + new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SetLastRegex = + new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachDayRegex = + new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SetWeekDayRegex = + new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SetEachRegex = + new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + public ArabicSetExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DurationExtractor = new BaseDurationExtractor(new ArabicDurationExtractorConfiguration(this)); + TimeExtractor = new BaseTimeExtractor(new ArabicTimeExtractorConfiguration(this)); + DateExtractor = new BaseDateExtractor(new ArabicDateExtractorConfiguration(this)); + DateTimeExtractor = new BaseDateTimeExtractor(new ArabicDateTimeExtractorConfiguration(this)); + DatePeriodExtractor = new BaseDatePeriodExtractor(new ArabicDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseTimePeriodExtractor(new ArabicTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new ArabicDateTimePeriodExtractorConfiguration(this)); + } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeExtractor DatePeriodExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IDateTimeExtractor DateTimePeriodExtractor { get; } + + bool ISetExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex ISetExtractorConfiguration.LastRegex => SetLastRegex; + + Regex ISetExtractorConfiguration.EachPrefixRegex => EachPrefixRegex; + + Regex ISetExtractorConfiguration.PeriodicRegex => PeriodicRegex; + + Regex ISetExtractorConfiguration.EachUnitRegex => EachUnitRegex; + + Regex ISetExtractorConfiguration.EachDayRegex => EachDayRegex; + + Regex ISetExtractorConfiguration.BeforeEachDayRegex => null; + + Regex ISetExtractorConfiguration.SetWeekDayRegex => SetWeekDayRegex; + + Regex ISetExtractorConfiguration.SetEachRegex => SetEachRegex; + + public Tuple WeekDayGroupMatchTuple(Match match) => SetHandler.WeekDayGroupMatchTuple(match); + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicTimeExtractorConfiguration.cs new file mode 100644 index 0000000000..88f7cb9b63 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicTimeExtractorConfiguration.cs @@ -0,0 +1,150 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, ITimeExtractorConfiguration + { + // part 1: smallest component + // -------------------------------------- + public static readonly Regex DescRegex = + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex HourNumRegex = + new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MinuteNumRegex = + new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags, RegexTimeOut); + + // part 2: middle level component + // -------------------------------------- + // handle "... o'clock" + public static readonly Regex OclockRegex = + new Regex(DateTimeDefinitions.OclockRegex, RegexFlags, RegexTimeOut); + + // handle "... afternoon" + public static readonly Regex PmRegex = + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); + + // handle "... in the morning" + public static readonly Regex AmRegex = + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); + + // handle "half past ..." "a quarter to ..." + // rename 'min' group to 'deltamin' + public static readonly Regex LessThanOneHour = + new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags, RegexTimeOut); + + // handle "six thirty", "six twenty one" + public static readonly Regex WrittenTimeRegex = + new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimePrefix = + new Regex(DateTimeDefinitions.TimePrefix, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeSuffix = + new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags, RegexTimeOut); + + public static readonly Regex BasicTime = + new Regex(DateTimeDefinitions.BasicTime, RegexFlags, RegexTimeOut); + + // handle special time such as 'at midnight', 'midnight', 'midday' + public static readonly Regex MidnightRegex = + new Regex(DateTimeDefinitions.MidnightRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MidmorningRegex = + new Regex(DateTimeDefinitions.MidmorningRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MidafternoonRegex = + new Regex(DateTimeDefinitions.MidafternoonRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MiddayRegex = + new Regex(DateTimeDefinitions.MiddayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MidTimeRegex = + new Regex(DateTimeDefinitions.MidTimeRegex, RegexFlags, RegexTimeOut); + + // part 3: regex for time + // -------------------------------------- + // handle "at four" "at 3" + public static readonly Regex AtRegex = + new Regex(DateTimeDefinitions.AtRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex IshRegex = + new Regex(DateTimeDefinitions.IshRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeUnitRegex = + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ConnectNumRegex = + new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeBeforeAfterRegex = + new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex[] TimeRegexList = + { + // (three min past)? seven|7|(seven thirty) pm + new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags, RegexTimeOut), + + // (three min past)? 3:00(:00)? (pm)? + new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags, RegexTimeOut), + + // (three min past)? 3.00 (pm) + new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags, RegexTimeOut), + + // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) + new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags, RegexTimeOut), + + // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? + new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags, RegexTimeOut), + + // (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) + new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags, RegexTimeOut), + + // (in the night) at? (five thirty|seven|7|7:00(:00)?) (pm)? + new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags, RegexTimeOut), + + new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags, RegexTimeOut), + + // (three min past)? 3h00 (pm)? + new Regex(DateTimeDefinitions.TimeRegex10, RegexFlags, RegexTimeOut), + + // at 2.30, "at" prefix is required here + // 3.30pm, "am/pm" suffix is required here + new Regex(DateTimeDefinitions.TimeRegex11, RegexFlags, RegexTimeOut), + + // 340pm + ConnectNumRegex, + }; + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + public ArabicTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DurationExtractor = new BaseDurationExtractor(new ArabicDurationExtractorConfiguration(this)); + } + + IEnumerable ITimeExtractorConfiguration.TimeRegexList => TimeRegexList; + + Regex ITimeExtractorConfiguration.AtRegex => AtRegex; + + Regex ITimeExtractorConfiguration.IshRegex => IshRegex; + + Regex ITimeExtractorConfiguration.TimeBeforeAfterRegex => TimeBeforeAfterRegex; + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor TimeZoneExtractor { get; } + + public string TimeTokenPrefix => DateTimeDefinitions.TimeTokenPrefix; + + public Dictionary AmbiguityFiltersDict => null; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicTimePeriodExtractorConfiguration.cs new file mode 100644 index 0000000000..5734db7e65 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicTimePeriodExtractorConfiguration.cs @@ -0,0 +1,149 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Text.DateTime.Arabic.Utilities; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, ITimePeriodExtractorConfiguration + { + public static readonly Regex TillRegex = + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex HourRegex = + new Regex(DateTimeDefinitions.HourRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodHourNumRegex = + new Regex(DateTimeDefinitions.PeriodHourNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodDescRegex = + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PmRegex = + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmRegex = + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PureNumFromTo = + new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags, RegexTimeOut); + + public static readonly Regex PureNumBetweenAnd = + new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificTimeFromTo = + new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificTimeBetweenAnd = + new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags, RegexTimeOut); + + public static readonly Regex PrepositionRegex = + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeOfDayRegex = + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificTimeOfDayRegex = + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeUnitRegex = + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeFollowedUnit = + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeNumberCombinedWithUnit = + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex GeneralEndingRegex = + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + public ArabicTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + SingleTimeExtractor = new BaseTimeExtractor(new ArabicTimeExtractorConfiguration(this)); + UtilityConfiguration = new ArabicDatetimeUtilityConfiguration(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Arabic.IntegerExtractor.GetInstance(numConfig); + } + + public string TokenBeforeDate { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IDateTimeExtractor SingleTimeExtractor { get; } + + public IDateTimeExtractor TimeZoneExtractor { get; } + + public IExtractor IntegerExtractor { get; } + + public IEnumerable SimpleCasesRegex => new[] + { + PureNumFromTo, PureNumBetweenAnd, SpecificTimeFromTo, SpecificTimeBetweenAnd, + }; + + public IEnumerable PureNumberRegex => new[] + { + PureNumFromTo, PureNumBetweenAnd, + }; + + bool ITimePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex ITimePeriodExtractorConfiguration.TillRegex => TillRegex; + + Regex ITimePeriodExtractorConfiguration.TimeOfDayRegex => TimeOfDayRegex; + + Regex ITimePeriodExtractorConfiguration.GeneralEndingRegex => GeneralEndingRegex; + + // @TODO move hardcoded strings to YAML file + public bool GetFromTokenIndex(string text, out int index) + { + index = -1; + + if (text.EndsWith("from", StringComparison.Ordinal)) + { + index = text.LastIndexOf("from", StringComparison.Ordinal); + return true; + } + + return false; + } + + public bool GetBetweenTokenIndex(string text, out int index) + { + index = -1; + + if (text.EndsWith("between", StringComparison.Ordinal)) + { + index = text.LastIndexOf("between", StringComparison.Ordinal); + return true; + } + + return false; + } + + public bool IsConnectorToken(string text) + { + return text.Equals("and", StringComparison.Ordinal); + } + + public List ApplyPotentialPeriodAmbiguityHotfix(string text, List timePeriodErs) => TimePeriodFunctions.ApplyPotentialPeriodAmbiguityHotfix(text, timePeriodErs); + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicCommonDateTimeParserConfiguration.cs new file mode 100644 index 0000000000..158ca74b11 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicCommonDateTimeParserConfiguration.cs @@ -0,0 +1,72 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Text.DateTime.Arabic.Utilities; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Arabic; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicCommonDateTimeParserConfiguration : BaseDateParserConfiguration, ICommonDateTimeParserConfiguration + { + public ArabicCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + UtilityConfiguration = new ArabicDatetimeUtilityConfiguration(); + + UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); + UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); + SeasonMap = DateTimeDefinitions.SeasonMap.ToImmutableDictionary(); + SpecialYearPrefixesMap = DateTimeDefinitions.SpecialYearPrefixesMap.ToImmutableDictionary(); + CardinalMap = DateTimeDefinitions.CardinalMap.ToImmutableDictionary(); + DayOfWeek = DateTimeDefinitions.DayOfWeek.ToImmutableDictionary(); + MonthOfYear = DateTimeDefinitions.MonthOfYear.ToImmutableDictionary(); + Numbers = DateTimeDefinitions.Numbers.ToImmutableDictionary(); + DoubleNumbers = DateTimeDefinitions.DoubleNumbers.ToImmutableDictionary(); + WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); + SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Arabic.CardinalExtractor.GetInstance(numConfig); + IntegerExtractor = Number.Arabic.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Arabic.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new ArabicNumberParserConfiguration(numConfig)); + + TimeZoneParser = new BaseTimeZoneParser(new ArabicTimeZoneParserConfiguration(this)); + + // Do not change order. The order of initialization can lead to side-effects + DateExtractor = new BaseDateExtractor(new ArabicDateExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new ArabicHolidayExtractorConfiguration(this)); + TimeExtractor = new BaseTimeExtractor(new ArabicTimeExtractorConfiguration(this)); + DateTimeExtractor = new BaseDateTimeExtractor(new ArabicDateTimeExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new ArabicDurationExtractorConfiguration(this)); + DatePeriodExtractor = new BaseDatePeriodExtractor(new ArabicDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseTimePeriodExtractor(new ArabicTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new ArabicDateTimePeriodExtractorConfiguration(this)); + + DurationParser = new BaseDurationParser(new ArabicDurationParserConfiguration(this)); + DateParser = new BaseDateParser(new ArabicDateParserConfiguration(this)); + HolidayTimeParser = new BaseHolidayParser(new ArabicHolidayParserConfiguration(this)); + TimeParser = new TimeParser(new ArabicTimeParserConfiguration(this)); + DateTimeParser = new BaseDateTimeParser(new ArabicDateTimeParserConfiguration(this)); + DatePeriodParser = new BaseDatePeriodParser(new ArabicDatePeriodParserConfiguration(this)); + TimePeriodParser = new BaseTimePeriodParser(new ArabicTimePeriodParserConfiguration(this)); + DateTimePeriodParser = new BaseDateTimePeriodParser(new ArabicDateTimePeriodParserConfiguration(this)); + + DateTimeAltParser = new BaseDateTimeAltParser(new ArabicDateTimeAltParserConfiguration(this)); + } + + public override IImmutableDictionary DayOfMonth => BaseDateTime.DayOfMonthDictionary.ToImmutableDictionary().AddRange(DateTimeDefinitions.DayOfMonth); + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDateParserConfiguration.cs new file mode 100644 index 0000000000..2cbdbb5f84 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDateParserConfiguration.cs @@ -0,0 +1,196 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicDateParserConfiguration : BaseDateTimeOptionsConfiguration, IDateParserConfiguration + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + public ArabicDateParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + DateTokenPrefix = DateTimeDefinitions.DateTokenPrefix; + + IntegerExtractor = config.IntegerExtractor; + OrdinalExtractor = config.OrdinalExtractor; + CardinalExtractor = config.CardinalExtractor; + NumberParser = config.NumberParser; + DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; + DurationParser = config.DurationParser; + HolidayParser = new BaseHolidayParser(new ArabicHolidayParserConfiguration(this)); + + DateRegexes = new ArabicDateExtractorConfiguration(this).DateRegexList; + OnRegex = ArabicDateExtractorConfiguration.OnRegex; + SpecialDayRegex = ArabicDateExtractorConfiguration.SpecialDayRegex; + SpecialDayWithNumRegex = ArabicDateExtractorConfiguration.SpecialDayWithNumRegex; + NextRegex = ArabicDateExtractorConfiguration.NextDateRegex; + ThisRegex = ArabicDateExtractorConfiguration.ThisRegex; + LastRegex = ArabicDateExtractorConfiguration.LastDateRegex; + UnitRegex = ArabicDateExtractorConfiguration.DateUnitRegex; + WeekDayRegex = ArabicDateExtractorConfiguration.WeekDayRegex; + MonthRegex = ArabicDateExtractorConfiguration.MonthRegex; + WeekDayOfMonthRegex = ArabicDateExtractorConfiguration.WeekDayOfMonthRegex; + ForTheRegex = ArabicDateExtractorConfiguration.ForTheRegex; + WeekDayAndDayOfMothRegex = ArabicDateExtractorConfiguration.WeekDayAndDayOfMothRegex; + WeekDayAndDayRegex = ArabicDateExtractorConfiguration.WeekDayAndDayRegex; + RelativeMonthRegex = ArabicDateExtractorConfiguration.RelativeMonthRegex; + StrictRelativeRegex = ArabicDateExtractorConfiguration.StrictRelativeRegex; + YearSuffix = ArabicDateExtractorConfiguration.YearSuffix; + RelativeWeekDayRegex = ArabicDateExtractorConfiguration.RelativeWeekDayRegex; + BeforeAfterRegex = ArabicDateExtractorConfiguration.BeforeAfterRegex; + + RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + UpcomingPrefixRegex = new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags, RegexTimeOut); + PastPrefixRegex = new Regex(DateTimeDefinitions.PastPrefixRegex, RegexFlags, RegexTimeOut); + + DayOfMonth = config.DayOfMonth; + DayOfWeek = config.DayOfWeek; + MonthOfYear = config.MonthOfYear; + CardinalMap = config.CardinalMap; + UnitMap = config.UnitMap; + UtilityConfiguration = config.UtilityConfiguration; + + SameDayTerms = DateTimeDefinitions.SameDayTerms.ToImmutableList(); + PlusOneDayTerms = DateTimeDefinitions.PlusOneDayTerms.ToImmutableList(); + PlusTwoDayTerms = DateTimeDefinitions.PlusTwoDayTerms.ToImmutableList(); + MinusOneDayTerms = DateTimeDefinitions.MinusOneDayTerms.ToImmutableList(); + MinusTwoDayTerms = DateTimeDefinitions.MinusTwoDayTerms.ToImmutableList(); + } + + public string DateTokenPrefix { get; } + + public IExtractor IntegerExtractor { get; } + + public IExtractor OrdinalExtractor { get; } + + public IExtractor CardinalExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeParser DurationParser { get; } + + public IDateTimeParser HolidayParser { get; } + + public IEnumerable DateRegexes { get; } + + public IImmutableDictionary UnitMap { get; } + + public Regex OnRegex { get; } + + public Regex SpecialDayRegex { get; } + + public Regex SpecialDayWithNumRegex { get; } + + public Regex NextRegex { get; } + + public Regex ThisRegex { get; } + + public Regex LastRegex { get; } + + public Regex UnitRegex { get; } + + public Regex WeekDayRegex { get; } + + public Regex MonthRegex { get; } + + public Regex WeekDayOfMonthRegex { get; } + + public Regex ForTheRegex { get; } + + public Regex WeekDayAndDayOfMothRegex { get; } + + public Regex WeekDayAndDayRegex { get; } + + public Regex RelativeMonthRegex { get; } + + public Regex StrictRelativeRegex { get; } + + public Regex YearSuffix { get; } + + public Regex RelativeWeekDayRegex { get; } + + public Regex RelativeDayRegex { get; } + + public Regex NextPrefixRegex { get; } + + public Regex PreviousPrefixRegex { get; } + + public Regex UpcomingPrefixRegex { get; } + + public Regex PastPrefixRegex { get; } + + public Regex BeforeAfterRegex { get; } + + public Regex TasksModeDurationToDatePatterns { get; } + + public IImmutableDictionary DayOfMonth { get; } + + public IImmutableDictionary DayOfWeek { get; } + + public IImmutableDictionary MonthOfYear { get; } + + public IImmutableDictionary CardinalMap { get; } + + public IImmutableList SameDayTerms { get; } + + public IImmutableList PlusOneDayTerms { get; } + + public IImmutableList MinusOneDayTerms { get; } + + public IImmutableList PlusTwoDayTerms { get; } + + public IImmutableList MinusTwoDayTerms { get; } + + bool IDateParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public int GetSwiftMonthOrYear(string text) + { + var trimmedText = text.Trim(); + var swift = 0; + + if (NextPrefixRegex.IsMatch(trimmedText)) + { + swift = 1; + } + + if (PreviousPrefixRegex.IsMatch(trimmedText)) + { + swift = -1; + } + + return swift; + } + + public bool IsCardinalLast(string text) + { + + // @TODO move hardcoded values to resources file + + var trimmedText = text.Trim(); + + return trimmedText.Equals("last", StringComparison.Ordinal); + } + + public string Normalize(string text) + { + return text; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDatePeriodParserConfiguration.cs new file mode 100644 index 0000000000..a80a459b48 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDatePeriodParserConfiguration.cs @@ -0,0 +1,365 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicDatePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDatePeriodParserConfiguration + { + public static readonly Regex PreviousPrefixRegex = + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ThisPrefixRegex = + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfterNextSuffixRegex = + new Regex(DateTimeDefinitions.AfterNextSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeRegex = + new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex UnspecificEndOfRangeRegex = + new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NowParseRegex = + new Regex(DateTimeDefinitions.NowParseRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + private static IList monthTermsPadded = + DateTimeDefinitions.MonthTerms.Select(str => $" {str} ").ToList(); + + private static IList weekendTermsPadded = + DateTimeDefinitions.WeekendTerms.Select(str => $" {str} ").ToList(); + + private static IList weekTermsPadded = + DateTimeDefinitions.WeekTerms.Select(str => $" {str} ").ToList(); + + private static IList yearTermsPadded = + DateTimeDefinitions.YearTerms.Select(str => $" {str} ").ToList(); + + private static readonly Regex NextPrefixRegex = + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + + public ArabicDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + CardinalExtractor = config.CardinalExtractor; + OrdinalExtractor = config.OrdinalExtractor; + IntegerExtractor = config.IntegerExtractor; + NumberParser = config.NumberParser; + DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; + DurationParser = config.DurationParser; + DateParser = config.DateParser; + + MonthFrontBetweenRegex = ArabicDatePeriodExtractorConfiguration.MonthFrontBetweenRegex; + BetweenRegex = ArabicDatePeriodExtractorConfiguration.BetweenRegex; + MonthFrontSimpleCasesRegex = ArabicDatePeriodExtractorConfiguration.MonthFrontSimpleCasesRegex; + SimpleCasesRegex = ArabicDatePeriodExtractorConfiguration.SimpleCasesRegex; + OneWordPeriodRegex = ArabicDatePeriodExtractorConfiguration.OneWordPeriodRegex; + MonthWithYear = ArabicDatePeriodExtractorConfiguration.MonthWithYear; + MonthNumWithYear = ArabicDatePeriodExtractorConfiguration.MonthNumWithYear; + YearRegex = ArabicDatePeriodExtractorConfiguration.YearRegex; + PastRegex = ArabicDatePeriodExtractorConfiguration.PreviousPrefixRegex; + FutureRegex = ArabicDatePeriodExtractorConfiguration.NextPrefixRegex; + FutureSuffixRegex = ArabicDatePeriodExtractorConfiguration.FutureSuffixRegex; + NumberCombinedWithUnit = ArabicDurationExtractorConfiguration.NumberCombinedWithDurationUnit; + WeekOfMonthRegex = ArabicDatePeriodExtractorConfiguration.WeekOfMonthRegex; + WeekOfYearRegex = ArabicDatePeriodExtractorConfiguration.WeekOfYearRegex; + QuarterRegex = ArabicDatePeriodExtractorConfiguration.QuarterRegex; + QuarterRegexYearFront = ArabicDatePeriodExtractorConfiguration.QuarterRegexYearFront; + AllHalfYearRegex = ArabicDatePeriodExtractorConfiguration.AllHalfYearRegex; + SeasonRegex = ArabicDatePeriodExtractorConfiguration.SeasonRegex; + WhichWeekRegex = ArabicDatePeriodExtractorConfiguration.WhichWeekRegex; + WeekOfRegex = ArabicDatePeriodExtractorConfiguration.WeekOfRegex; + MonthOfRegex = ArabicDatePeriodExtractorConfiguration.MonthOfRegex; + RestOfDateRegex = ArabicDatePeriodExtractorConfiguration.RestOfDateRegex; + LaterEarlyPeriodRegex = ArabicDatePeriodExtractorConfiguration.LaterEarlyPeriodRegex; + WeekWithWeekDayRangeRegex = ArabicDatePeriodExtractorConfiguration.WeekWithWeekDayRangeRegex; + YearPlusNumberRegex = ArabicDatePeriodExtractorConfiguration.YearPlusNumberRegex; + DecadeWithCenturyRegex = ArabicDatePeriodExtractorConfiguration.DecadeWithCenturyRegex; + YearPeriodRegex = ArabicDatePeriodExtractorConfiguration.YearPeriodRegex; + ComplexDatePeriodRegex = ArabicDatePeriodExtractorConfiguration.ComplexDatePeriodRegex; + RelativeDecadeRegex = ArabicDatePeriodExtractorConfiguration.RelativeDecadeRegex; + InConnectorRegex = config.UtilityConfiguration.InConnectorRegex; + WithinNextPrefixRegex = ArabicDatePeriodExtractorConfiguration.WithinNextPrefixRegex; + ReferenceDatePeriodRegex = ArabicDatePeriodExtractorConfiguration.ReferenceDatePeriodRegex; + AgoRegex = ArabicDatePeriodExtractorConfiguration.AgoRegex; + LaterRegex = ArabicDatePeriodExtractorConfiguration.LaterRegex; + LessThanRegex = ArabicDatePeriodExtractorConfiguration.LessThanRegex; + MoreThanRegex = ArabicDatePeriodExtractorConfiguration.MoreThanRegex; + CenturySuffixRegex = ArabicDatePeriodExtractorConfiguration.CenturySuffixRegex; + FirstLastRegex = ArabicDatePeriodExtractorConfiguration.FirstLastRegex; + OfYearRegex = ArabicDatePeriodExtractorConfiguration.OfYearRegex; + NowRegex = NowParseRegex; + SpecialDayRegex = ArabicDateExtractorConfiguration.SpecialDayRegex; + TodayNowRegex = new Regex(DateTimeDefinitions.TodayNowRegex, RegexOptions.Singleline); + + UnitMap = config.UnitMap; + CardinalMap = config.CardinalMap; + DayOfMonth = config.DayOfMonth; + MonthOfYear = config.MonthOfYear; + SeasonMap = config.SeasonMap; + SpecialYearPrefixesMap = config.SpecialYearPrefixesMap; + WrittenDecades = config.WrittenDecades; + Numbers = config.Numbers; + SpecialDecadeCases = config.SpecialDecadeCases; + } + + public int MinYearNum { get; } + + public int MaxYearNum { get; } + + public string TokenBeforeDate { get; } + + public IDateExtractor DateExtractor { get; } + + public IExtractor CardinalExtractor { get; } + + public IExtractor OrdinalExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IExtractor IntegerExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser DurationParser { get; } + + public Regex MonthFrontBetweenRegex { get; } + + public Regex BetweenRegex { get; } + + public Regex MonthFrontSimpleCasesRegex { get; } + + public Regex SimpleCasesRegex { get; } + + public Regex OneWordPeriodRegex { get; } + + public Regex MonthWithYear { get; } + + public Regex MonthNumWithYear { get; } + + public Regex YearRegex { get; } + + public Regex PastRegex { get; } + + public Regex FutureRegex { get; } + + public Regex FutureSuffixRegex { get; } + + public Regex NumberCombinedWithUnit { get; } + + public Regex WeekOfMonthRegex { get; } + + public Regex WeekOfYearRegex { get; } + + public Regex QuarterRegex { get; } + + public Regex QuarterRegexYearFront { get; } + + public Regex AllHalfYearRegex { get; } + + public Regex SeasonRegex { get; } + + public Regex WhichWeekRegex { get; } + + public Regex WeekOfRegex { get; } + + public Regex MonthOfRegex { get; } + + public Regex InConnectorRegex { get; } + + public Regex WithinNextPrefixRegex { get; } + + public Regex RestOfDateRegex { get; } + + public Regex LaterEarlyPeriodRegex { get; } + + public Regex WeekWithWeekDayRangeRegex { get; } + + public Regex YearPlusNumberRegex { get; } + + public Regex DecadeWithCenturyRegex { get; } + + public Regex YearPeriodRegex { get; } + + public Regex ComplexDatePeriodRegex { get; } + + public Regex RelativeDecadeRegex { get; } + + public Regex ReferenceDatePeriodRegex { get; } + + public Regex AgoRegex { get; } + + public Regex LaterRegex { get; } + + public Regex LessThanRegex { get; } + + public Regex MoreThanRegex { get; } + + public Regex CenturySuffixRegex { get; } + + public Regex NowRegex { get; } + + public Regex SpecialDayRegex { get; } + + public Regex TodayNowRegex { get; } + + public Regex FirstLastRegex { get; } + + public Regex OfYearRegex { get; } + + Regex ISimpleDatePeriodParserConfiguration.RelativeRegex => RelativeRegex; + + Regex IDatePeriodParserConfiguration.NextPrefixRegex => NextPrefixRegex; + + Regex IDatePeriodParserConfiguration.PreviousPrefixRegex => PreviousPrefixRegex; + + Regex IDatePeriodParserConfiguration.ThisPrefixRegex => ThisPrefixRegex; + + Regex IDatePeriodParserConfiguration.UnspecificEndOfRangeRegex => UnspecificEndOfRangeRegex; + + Regex IDatePeriodParserConfiguration.AmbiguousPointRangeRegex => null; + + bool IDatePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public IImmutableDictionary UnitMap { get; } + + public IImmutableDictionary CardinalMap { get; } + + public IImmutableDictionary DayOfMonth { get; } + + public IImmutableDictionary MonthOfYear { get; } + + public IImmutableDictionary SeasonMap { get; } + + public IImmutableDictionary SpecialYearPrefixesMap { get; } + + public IImmutableDictionary WrittenDecades { get; } + + public IImmutableDictionary Numbers { get; } + + public IImmutableDictionary SpecialDecadeCases { get; } + + public IImmutableList InStringList { get; } + + public int GetSwiftDayOrMonth(string text) + { + var swift = 0; + + var trimmedText = text.Trim(); + + if (AfterNextSuffixRegex.IsMatch(trimmedText)) + { + swift = 2; + } + else if (NextPrefixRegex.IsMatch(trimmedText)) + { + swift = 1; + } + else if (PreviousPrefixRegex.IsMatch(trimmedText)) + { + swift = -1; + } + + return swift; + } + + public int GetSwiftYear(string text) + { + var swift = -10; + + var trimmedText = text.Trim(); + + if (AfterNextSuffixRegex.IsMatch(trimmedText)) + { + swift = 2; + } + else if (NextPrefixRegex.IsMatch(trimmedText)) + { + swift = 1; + } + else if (PreviousPrefixRegex.IsMatch(trimmedText)) + { + swift = -1; + } + else if (ThisPrefixRegex.IsMatch(trimmedText)) + { + swift = 0; + } + + return swift; + } + + public bool IsFuture(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.FutureTerms.Any(o => trimmedText.StartsWith(o, StringComparison.Ordinal)); + } + + public bool IsLastCardinal(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.LastCardinalTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } + + public bool IsMonthOnly(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (monthTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); + } + + public bool IsMonthToDate(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } + + public bool IsWeekend(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (weekendTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); + } + + public bool IsWeekOnly(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (weekTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); + } + + public bool IsFortnight(string text) + { + return false; + } + + public bool IsYearOnly(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (yearTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)) || + (DateTimeDefinitions.GenericYearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) && + UnspecificEndOfRangeRegex.IsMatch(trimmedText)); + } + + public bool IsYearToDate(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDateTimeAltParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDateTimeAltParserConfiguration.cs new file mode 100644 index 0000000000..7f9f42e058 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDateTimeAltParserConfiguration.cs @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicDateTimeAltParserConfiguration : IDateTimeAltParserConfiguration + { + public ArabicDateTimeAltParserConfiguration(ICommonDateTimeParserConfiguration config) + { + DateTimeParser = config.DateTimeParser; + DateParser = config.DateParser; + TimeParser = config.TimeParser; + DateTimePeriodParser = config.DateTimePeriodParser; + TimePeriodParser = config.TimePeriodParser; + DatePeriodParser = config.DatePeriodParser; + } + + public IDateTimeParser DateTimeParser { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser TimeParser { get; } + + public IDateTimeParser DateTimePeriodParser { get; } + + public IDateTimeParser TimePeriodParser { get; } + + public IDateTimeParser DatePeriodParser { get; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDateTimeParserConfiguration.cs new file mode 100644 index 0000000000..0a3e39b64c --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDateTimeParserConfiguration.cs @@ -0,0 +1,197 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicDateTimeParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeParserConfiguration + { + public static readonly Regex AmTimeRegex = + new Regex(DateTimeDefinitions.AMTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PmTimeRegex = + new Regex(DateTimeDefinitions.PMTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NightTimeRegex = + new Regex(DateTimeDefinitions.NightTimeRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + private static readonly Regex NowTimeRegex = + new Regex(DateTimeDefinitions.NowTimeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex RecentlyTimeRegex = + new Regex(DateTimeDefinitions.RecentlyTimeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex AsapTimeRegex = + new Regex(DateTimeDefinitions.AsapTimeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex NextPrefixRegex = + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex PreviousPrefixRegex = + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + + public ArabicDateTimeParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; + + DateExtractor = config.DateExtractor; + TimeExtractor = config.TimeExtractor; + DateParser = config.DateParser; + TimeParser = config.TimeParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; + + NowRegex = ArabicDateTimeExtractorConfiguration.NowRegex; + + SimpleTimeOfTodayAfterRegex = ArabicDateTimeExtractorConfiguration.SimpleTimeOfTodayAfterRegex; + SimpleTimeOfTodayBeforeRegex = ArabicDateTimeExtractorConfiguration.SimpleTimeOfTodayBeforeRegex; + SpecificTimeOfDayRegex = ArabicDateTimeExtractorConfiguration.SpecificTimeOfDayRegex; + SpecificEndOfRegex = ArabicDateTimeExtractorConfiguration.SpecificEndOfRegex; + UnspecificEndOfRegex = ArabicDateTimeExtractorConfiguration.UnspecificEndOfRegex; + UnitRegex = ArabicTimeExtractorConfiguration.TimeUnitRegex; + DateNumberConnectorRegex = ArabicDateTimeExtractorConfiguration.DateNumberConnectorRegex; + YearRegex = ArabicDateTimeExtractorConfiguration.YearRegex; + + Numbers = config.Numbers; + CardinalExtractor = config.CardinalExtractor; + IntegerExtractor = config.IntegerExtractor; + NumberParser = config.NumberParser; + DurationExtractor = config.DurationExtractor; + DurationParser = config.DurationParser; + UnitMap = config.UnitMap; + UtilityConfiguration = config.UtilityConfiguration; + } + + public string TokenBeforeDate { get; } + + public string TokenBeforeTime { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser TimeParser { get; } + + public IExtractor CardinalExtractor { get; } + + public IExtractor IntegerExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeParser DurationParser { get; } + + public IImmutableDictionary UnitMap { get; } + + public Regex NowRegex { get; } + + public Regex AMTimeRegex => AmTimeRegex; + + public Regex PMTimeRegex => PmTimeRegex; + + public Regex SimpleTimeOfTodayAfterRegex { get; } + + public Regex SimpleTimeOfTodayBeforeRegex { get; } + + public Regex SpecificTimeOfDayRegex { get; } + + public Regex SpecificEndOfRegex { get; } + + public Regex UnspecificEndOfRegex { get; } + + public Regex UnitRegex { get; } + + public Regex DateNumberConnectorRegex { get; } + + public Regex PrepositionRegex { get; } + + public Regex ConnectorRegex { get; } + + public Regex YearRegex { get; } + + public IImmutableDictionary Numbers { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + + public int GetHour(string text, int hour) + { + int result = hour; + + var trimmedText = text.Trim(); + + if (AMTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour >= Constants.HalfDayHourCount) + { + result -= Constants.HalfDayHourCount; + } + else if (!AMTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour < Constants.HalfDayHourCount && + !(NightTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour < Constants.QuarterDayHourCount)) + { + result += Constants.HalfDayHourCount; + } + + return result; + } + + public bool GetMatchedNowTimex(string text, out string timex) + { + var trimmedText = text.Trim(); + + if (NowTimeRegex.MatchEnd(trimmedText, trim: true).Success) + { + timex = "PRESENT_REF"; + } + else if (RecentlyTimeRegex.IsExactMatch(trimmedText, trim: true)) + { + timex = "PAST_REF"; + } + else if (AsapTimeRegex.IsExactMatch(trimmedText, trim: true)) + { + timex = "FUTURE_REF"; + } + else + { + timex = null; + return false; + } + + return true; + } + + public int GetSwiftDay(string text) + { + var trimmedText = text.Trim(); + + var swift = 0; + if (NextPrefixRegex.MatchBegin(trimmedText, trim: true).Success) + { + swift = 1; + } + else if (PreviousPrefixRegex.MatchBegin(trimmedText, trim: true).Success) + { + swift = -1; + } + + return swift; + } + + public bool ContainsAmbiguousToken(string text, string matchedText) => false; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDateTimePeriodParserConfiguration.cs new file mode 100644 index 0000000000..f2d715506c --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDateTimePeriodParserConfiguration.cs @@ -0,0 +1,217 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicDateTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimePeriodParserConfiguration + { + public static readonly Regex MorningStartEndRegex = + new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfternoonStartEndRegex = + new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EveningStartEndRegex = + new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NightStartEndRegex = + new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + public ArabicDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; + + DateExtractor = config.DateExtractor; + TimeExtractor = config.TimeExtractor; + DateTimeExtractor = config.DateTimeExtractor; + TimePeriodExtractor = config.TimePeriodExtractor; + CardinalExtractor = config.CardinalExtractor; + DurationExtractor = config.DurationExtractor; + NumberParser = config.NumberParser; + DateParser = config.DateParser; + TimeParser = config.TimeParser; + TimePeriodParser = config.TimePeriodParser; + DurationParser = config.DurationParser; + DateTimeParser = config.DateTimeParser; + TimeZoneParser = config.TimeZoneParser; + + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; + + PureNumberFromToRegex = ArabicTimePeriodExtractorConfiguration.PureNumFromTo; + HyphenDateRegex = ArabicDateTimePeriodExtractorConfiguration.HyphenDateRegex; + PureNumberBetweenAndRegex = ArabicTimePeriodExtractorConfiguration.PureNumBetweenAnd; + SpecificTimeOfDayRegex = ArabicDateTimePeriodExtractorConfiguration.PeriodSpecificTimeOfDayRegex; + TimeOfDayRegex = ArabicDateTimeExtractorConfiguration.TimeOfDayRegex; + PreviousPrefixRegex = ArabicDatePeriodExtractorConfiguration.PreviousPrefixRegex; + FutureRegex = ArabicDatePeriodExtractorConfiguration.NextPrefixRegex; + FutureSuffixRegex = ArabicDatePeriodExtractorConfiguration.FutureSuffixRegex; + NumberCombinedWithUnitRegex = ArabicDateTimePeriodExtractorConfiguration.TimeNumberCombinedWithUnit; + UnitRegex = ArabicTimePeriodExtractorConfiguration.TimeUnitRegex; + PeriodTimeOfDayWithDateRegex = ArabicDateTimePeriodExtractorConfiguration.PeriodTimeOfDayWithDateRegex; + RelativeTimeUnitRegex = ArabicDateTimePeriodExtractorConfiguration.RelativeTimeUnitRegex; + RestOfDateTimeRegex = ArabicDateTimePeriodExtractorConfiguration.RestOfDateTimeRegex; + AmDescRegex = ArabicDateTimePeriodExtractorConfiguration.AmDescRegex; + PmDescRegex = ArabicDateTimePeriodExtractorConfiguration.PmDescRegex; + WithinNextPrefixRegex = ArabicDateTimePeriodExtractorConfiguration.WithinNextPrefixRegex; + PrefixDayRegex = ArabicDateTimePeriodExtractorConfiguration.PrefixDayRegex; + BeforeRegex = ArabicDateTimePeriodExtractorConfiguration.BeforeRegex; + AfterRegex = ArabicDateTimePeriodExtractorConfiguration.AfterRegex; + + UnitMap = config.UnitMap; + Numbers = config.Numbers; + } + + public string TokenBeforeDate { get; } + + public string TokenBeforeTime { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IExtractor CardinalExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser TimeParser { get; } + + public IDateTimeParser DateTimeParser { get; } + + public IDateTimeParser TimePeriodParser { get; } + + public IDateTimeParser DurationParser { get; } + + public IDateTimeParser TimeZoneParser { get; } + + public Regex PureNumberFromToRegex { get; } + + public Regex HyphenDateRegex { get; } + + public Regex PureNumberBetweenAndRegex { get; } + + public Regex SpecificTimeOfDayRegex { get; } + + public Regex TimeOfDayRegex { get; } + + public Regex PreviousPrefixRegex { get; } + + public Regex FutureRegex { get; } + + public Regex FutureSuffixRegex { get; } + + public Regex NumberCombinedWithUnitRegex { get; } + + public Regex UnitRegex { get; } + + public Regex PeriodTimeOfDayWithDateRegex { get; } + + public Regex RelativeTimeUnitRegex { get; } + + public Regex RestOfDateTimeRegex { get; } + + public Regex AmDescRegex { get; } + + public Regex PmDescRegex { get; } + + public Regex WithinNextPrefixRegex { get; } + + public Regex PrefixDayRegex { get; } + + public Regex BeforeRegex { get; } + + public Regex AfterRegex { get; } + + Regex IDateTimePeriodParserConfiguration.TasksmodeMealTimeofDayRegex => null; + + bool IDateTimePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public IImmutableDictionary UnitMap { get; } + + public IImmutableDictionary Numbers { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + + // @TODO Move time range resolution to common policy + + public bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMin) + { + var trimmedText = text.Trim(); + + beginHour = 0; + endHour = 0; + endMin = 0; + if (MorningStartEndRegex.IsMatch(trimmedText)) + { + todSymbol = "TMO"; + beginHour = 8; + endHour = Constants.HalfDayHourCount; + } + else if (AfternoonStartEndRegex.IsMatch(trimmedText)) + { + todSymbol = "TAF"; + beginHour = Constants.HalfDayHourCount; + endHour = 16; + } + else if (EveningStartEndRegex.IsMatch(trimmedText)) + { + todSymbol = "TEV"; + beginHour = 16; + endHour = 20; + } + else if (NightStartEndRegex.IsMatch(trimmedText)) + { + todSymbol = "TNI"; + beginHour = 20; + endHour = 23; + endMin = 59; + } + else + { + todSymbol = null; + return false; + } + + return true; + } + + public int GetSwiftPrefix(string text) + { + var trimmedText = text.Trim(); + + // @TODO Move hardcoded terms to resource file + + var swift = 0; + if (trimmedText.StartsWith("next", StringComparison.Ordinal)) + { + swift = 1; + } + else if (trimmedText.StartsWith("last", StringComparison.Ordinal)) + { + swift = -1; + } + + return swift; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDurationParserConfiguration.cs new file mode 100644 index 0000000000..5e68d49e17 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicDurationParserConfiguration.cs @@ -0,0 +1,86 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicDurationParserConfiguration : BaseDateTimeOptionsConfiguration, IDurationParserConfiguration + { + + public static readonly Regex PrefixArticleRegex = + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public ArabicDurationParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + + CardinalExtractor = config.CardinalExtractor; + NumberParser = config.NumberParser; + + DurationExtractor = new BaseDurationExtractor(new ArabicDurationExtractorConfiguration(this), false); + + NumberCombinedWithUnit = ArabicDurationExtractorConfiguration.NumberCombinedWithDurationUnit; + + AnUnitRegex = ArabicDurationExtractorConfiguration.AnUnitRegex; + DuringRegex = ArabicDurationExtractorConfiguration.DuringRegex; + AllDateUnitRegex = ArabicDurationExtractorConfiguration.AllRegex; + HalfDateUnitRegex = ArabicDurationExtractorConfiguration.HalfRegex; + SuffixAndRegex = ArabicDurationExtractorConfiguration.SuffixAndRegex; + FollowedUnit = ArabicDurationExtractorConfiguration.DurationFollowedUnit; + ConjunctionRegex = ArabicDurationExtractorConfiguration.ConjunctionRegex; + InexactNumberRegex = ArabicDurationExtractorConfiguration.InexactNumberRegex; + InexactNumberUnitRegex = ArabicDurationExtractorConfiguration.InexactNumberUnitRegex; + DurationUnitRegex = ArabicDurationExtractorConfiguration.DurationUnitRegex; + SpecialNumberUnitRegex = ArabicDurationExtractorConfiguration.SpecialNumberUnitRegex; + + UnitMap = config.UnitMap; + UnitValueMap = config.UnitValueMap; + DoubleNumbers = config.DoubleNumbers; + } + + public IExtractor CardinalExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IParser NumberParser { get; } + + public Regex NumberCombinedWithUnit { get; } + + public Regex AnUnitRegex { get; } + + Regex IDurationParserConfiguration.PrefixArticleRegex => PrefixArticleRegex; + + public Regex DuringRegex { get; } + + public Regex AllDateUnitRegex { get; } + + public Regex HalfDateUnitRegex { get; } + + public Regex SuffixAndRegex { get; } + + public Regex FollowedUnit { get; } + + public Regex ConjunctionRegex { get; } + + public Regex InexactNumberRegex { get; } + + public Regex InexactNumberUnitRegex { get; } + + public Regex DurationUnitRegex { get; } + + public Regex SpecialNumberUnitRegex { get; } + + bool IDurationParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public IImmutableDictionary UnitMap { get; } + + public IImmutableDictionary UnitValueMap { get; } + + public IImmutableDictionary DoubleNumbers { get; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicHolidayParserConfiguration.cs new file mode 100644 index 0000000000..18bbeac894 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicHolidayParserConfiguration.cs @@ -0,0 +1,211 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicHolidayParserConfiguration : BaseHolidayParserConfiguration + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + public ArabicHolidayParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + ThisPrefixRegex = new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + this.HolidayRegexList = ArabicHolidayExtractorConfiguration.HolidayRegexList; + this.HolidayNames = DateTimeDefinitions.HolidayNames.ToImmutableDictionary(); + } + + public Regex ThisPrefixRegex { get; } + + public Regex NextPrefixRegex { get; } + + public Regex PreviousPrefixRegex { get; } + + public override int GetSwiftYear(string text) + { + var trimmedText = text.Trim(); + var swift = -10; + + if (NextPrefixRegex.IsMatch(trimmedText)) + { + swift = 1; + } + else if (PreviousPrefixRegex.IsMatch(trimmedText)) + { + swift = -1; + } + else if (ThisPrefixRegex.IsMatch(trimmedText)) + { + swift = 0; + } + + return swift; + } + + public override string SanitizeHolidayToken(string holiday) + { + return holiday + .Replace("saint ", "st ") + .Replace(" ", string.Empty) + .Replace("'", string.Empty) + .Replace(".", string.Empty); + } + + // @TODO Change to auto-generate. + protected override IDictionary> InitHolidayFuncs() + { + return new Dictionary>(base.InitHolidayFuncs()) + { + { "maosbirthday", MaoBirthday }, + { "yuandan", NewYear }, + { "teachersday", TeacherDay }, + { "singleday", SinglesDay }, + { "allsaintsday", HalloweenDay }, + { "youthday", YouthDay }, + { "childrenday", ChildrenDay }, + { "femaleday", FemaleDay }, + { "treeplantingday", TreePlantDay }, + { "arborday", TreePlantDay }, + { "girlsday", GirlsDay }, + { "whiteloverday", WhiteLoverDay }, + { "loverday", ValentinesDay }, + { "christmas", ChristmasDay }, + { "xmas", ChristmasDay }, + { "newyear", NewYear }, + { "newyearday", NewYear }, + { "newyearsday", NewYear }, + { "inaugurationday", InaugurationDay }, + { "groundhougday", GroundhogDay }, + { "valentinesday", ValentinesDay }, + { "stpatrickday", StPatrickDay }, + { "aprilfools", FoolDay }, + { "earthday", EarthDay }, + { "stgeorgeday", StGeorgeDay }, + { "mayday", Mayday }, + { "cincodemayoday", CincoDeMayoDay }, + { "baptisteday", BaptisteDay }, + { "usindependenceday", UsaIndependenceDay }, + { "independenceday", UsaIndependenceDay }, + { "bastilleday", BastilleDay }, + { "halloweenday", HalloweenDay }, + { "allhallowday", AllHallowDay }, + { "allsoulsday", AllSoulsDay }, + { "guyfawkesday", GuyFawkesDay }, + { "veteransday", VeteransDay }, + { "christmaseve", ChristmasEve }, + { "newyeareve", NewYearEve }, + { "easterday", EasterDay }, + { "ashwednesday", AshWednesday }, + { "palmsunday", PalmSunday }, + { "maundythursday", MaundyThursday }, + { "goodfriday", GoodFriday }, + { "eastersaturday", EasterSaturday }, + { "eastermonday", EasterMonday }, + { "ascensionday", AscensionDay }, + { "whitesunday", WhiteSunday }, + { "whitemonday", WhiteMonday }, + { "trinitysunday", TrinitySunday }, + { "corpuschristi", CorpusChristi }, + { "juneteenth", Juneteenth }, + }; + } + + private static DateObject NewYear(int year) => new DateObject(year, 1, 1); + + private static DateObject NewYearEve(int year) => new DateObject(year, 12, 31); + + private static DateObject ChristmasDay(int year) => new DateObject(year, 12, 25); + + private static DateObject ChristmasEve(int year) => new DateObject(year, 12, 24); + + private static DateObject ValentinesDay(int year) => new DateObject(year, 2, 14); + + private static DateObject WhiteLoverDay(int year) => new DateObject(year, 3, 14); + + private static DateObject FoolDay(int year) => new DateObject(year, 4, 1); + + private static DateObject EarthDay(int year) => new DateObject(year, 4, 22); + + private static DateObject GirlsDay(int year) => new DateObject(year, 3, 7); + + private static DateObject TreePlantDay(int year) => new DateObject(year, 3, 12); + + private static DateObject FemaleDay(int year) => new DateObject(year, 3, 8); + + private static DateObject ChildrenDay(int year) => new DateObject(year, 6, 1); + + private static DateObject YouthDay(int year) => new DateObject(year, 5, 4); + + private static DateObject TeacherDay(int year) => new DateObject(year, 9, 10); + + private static DateObject SinglesDay(int year) => new DateObject(year, 11, 11); + + private static DateObject MaoBirthday(int year) => new DateObject(year, 12, 26); + + private static DateObject InaugurationDay(int year) => new DateObject(year, 1, 20); + + private static DateObject GroundhogDay(int year) => new DateObject(year, 2, 2); + + private static DateObject StPatrickDay(int year) => new DateObject(year, 3, 17); + + private static DateObject StGeorgeDay(int year) => new DateObject(year, 4, 23); + + private static DateObject Mayday(int year) => new DateObject(year, 5, 1); + + private static DateObject CincoDeMayoDay(int year) => new DateObject(year, 5, 5); + + private static DateObject BaptisteDay(int year) => new DateObject(year, 6, 24); + + private static DateObject UsaIndependenceDay(int year) => new DateObject(year, 7, 4); + + private static DateObject BastilleDay(int year) => new DateObject(year, 7, 14); + + private static DateObject HalloweenDay(int year) => new DateObject(year, 10, 31); + + private static DateObject AllHallowDay(int year) => new DateObject(year, 11, 1); + + private static DateObject AllSoulsDay(int year) => new DateObject(year, 11, 2); + + private static DateObject GuyFawkesDay(int year) => new DateObject(year, 11, 5); + + private static DateObject VeteransDay(int year) => new DateObject(year, 11, 11); + + private static DateObject Juneteenth(int year) => new DateObject(year, 6, 19); + + private static DateObject EasterDay(int year) => HolidayFunctions.CalculateHolidayByEaster(year); + + private static DateObject AshWednesday(int year) => EasterDay(year).AddDays(-46); + + private static DateObject PalmSunday(int year) => EasterDay(year).AddDays(-7); + + private static DateObject MaundyThursday(int year) => EasterDay(year).AddDays(-3); + + private static DateObject GoodFriday(int year) => EasterDay(year).AddDays(-2); + + private static DateObject EasterSaturday(int year) => EasterDay(year).AddDays(-1); + + private static DateObject EasterMonday(int year) => EasterDay(year).AddDays(1); + + private static DateObject AscensionDay(int year) => EasterDay(year).AddDays(39); + + private static DateObject WhiteSunday(int year) => EasterDay(year).AddDays(49); + + private static DateObject WhiteMonday(int year) => EasterDay(year).AddDays(50); + + private static DateObject TrinitySunday(int year) => EasterDay(year).AddDays(56); + + private static DateObject CorpusChristi(int year) => EasterDay(year).AddDays(60); + + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicMergedParserConfiguration.cs new file mode 100644 index 0000000000..9745235c60 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicMergedParserConfiguration.cs @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Text.Matcher; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public sealed class ArabicMergedParserConfiguration : ArabicCommonDateTimeParserConfiguration, IMergedParserConfiguration + { + public ArabicMergedParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + BeforeRegex = ArabicMergedExtractorConfiguration.BeforeRegex; + AfterRegex = ArabicMergedExtractorConfiguration.AfterRegex; + SinceRegex = (config.Options & DateTimeOptions.ExperimentalMode) != 0 ? ArabicMergedExtractorConfiguration.SinceRegexExp : + ArabicMergedExtractorConfiguration.SinceRegex; + AroundRegex = ArabicMergedExtractorConfiguration.AroundRegex; + EqualRegex = ArabicMergedExtractorConfiguration.EqualRegex; + SuffixAfter = ArabicMergedExtractorConfiguration.SuffixAfterRegex; + YearRegex = ArabicDatePeriodExtractorConfiguration.YearRegex; + + SuperfluousWordMatcher = ArabicMergedExtractorConfiguration.SuperfluousWordMatcher; + + DatePeriodParser = new BaseDatePeriodParser(new ArabicDatePeriodParserConfiguration(this)); + TimePeriodParser = new BaseTimePeriodParser(new ArabicTimePeriodParserConfiguration(this)); + DateTimePeriodParser = new BaseDateTimePeriodParser(new ArabicDateTimePeriodParserConfiguration(this)); + SetParser = new BaseSetParser(new ArabicSetParserConfiguration(this)); + HolidayParser = new BaseHolidayParser(new ArabicHolidayParserConfiguration(this)); + TimeZoneParser = new BaseTimeZoneParser(new ArabicTimeZoneParserConfiguration(this)); + } + + public Regex BeforeRegex { get; } + + public Regex AfterRegex { get; } + + public Regex SinceRegex { get; } + + public Regex AroundRegex { get; } + + public Regex EqualRegex { get; } + + public Regex SuffixAfter { get; } + + public Regex YearRegex { get; } + + public IDateTimeParser SetParser { get; } + + public IDateTimeParser HolidayParser { get; } + + public StringMatcher SuperfluousWordMatcher { get; } + + bool IMergedParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicSetParserConfiguration.cs new file mode 100644 index 0000000000..7cd0383bea --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicSetParserConfiguration.cs @@ -0,0 +1,176 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicSetParserConfiguration : BaseDateTimeOptionsConfiguration, ISetParserConfiguration + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + private static readonly Regex DoubleMultiplierRegex = + new Regex(DateTimeDefinitions.DoubleMultiplierRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex HalfMultiplierRegex = + new Regex(DateTimeDefinitions.HalfMultiplierRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex DayTypeRegex = + new Regex(DateTimeDefinitions.DayTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex WeekTypeRegex = + new Regex(DateTimeDefinitions.WeekTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex WeekendTypeRegex = + new Regex(DateTimeDefinitions.WeekendTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex MonthTypeRegex = + new Regex(DateTimeDefinitions.MonthTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex QuarterTypeRegex = + new Regex(DateTimeDefinitions.QuarterTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex YearTypeRegex = + new Regex(DateTimeDefinitions.YearTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly List ThisTerms = (List)DateTimeDefinitions.FutureTerms; + + public ArabicSetParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + DurationExtractor = config.DurationExtractor; + TimeExtractor = config.TimeExtractor; + DateExtractor = config.DateExtractor; + DateTimeExtractor = config.DateTimeExtractor; + DatePeriodExtractor = config.DatePeriodExtractor; + TimePeriodExtractor = config.TimePeriodExtractor; + DateTimePeriodExtractor = config.DateTimePeriodExtractor; + + DurationParser = config.DurationParser; + TimeParser = config.TimeParser; + DateParser = config.DateParser; + DateTimeParser = config.DateTimeParser; + DatePeriodParser = config.DatePeriodParser; + TimePeriodParser = config.TimePeriodParser; + DateTimePeriodParser = config.DateTimePeriodParser; + UnitMap = config.UnitMap; + + EachPrefixRegex = ArabicSetExtractorConfiguration.EachPrefixRegex; + PeriodicRegex = ArabicSetExtractorConfiguration.PeriodicRegex; + EachUnitRegex = ArabicSetExtractorConfiguration.EachUnitRegex; + EachDayRegex = ArabicSetExtractorConfiguration.EachDayRegex; + SetWeekDayRegex = ArabicSetExtractorConfiguration.SetWeekDayRegex; + SetEachRegex = ArabicSetExtractorConfiguration.SetEachRegex; + } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeParser DurationParser { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeParser TimeParser { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeParser DateTimeParser { get; } + + public IDateTimeExtractor DatePeriodExtractor { get; } + + public IDateTimeParser DatePeriodParser { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IDateTimeParser TimePeriodParser { get; } + + public IDateTimeExtractor DateTimePeriodExtractor { get; } + + public IDateTimeParser DateTimePeriodParser { get; } + + public IImmutableDictionary UnitMap { get; } + + public Regex EachPrefixRegex { get; } + + public Regex PeriodicRegex { get; } + + public Regex EachUnitRegex { get; } + + public Regex EachDayRegex { get; } + + public Regex SetWeekDayRegex { get; } + + public Regex SetEachRegex { get; } + + public bool GetMatchedDailyTimex(string text, out string timex) + { + var trimmedText = text.Trim(); + + float durationLength = 1; // Default value + float multiplier = 1; + string durationType; + + if (DoubleMultiplierRegex.IsMatch(trimmedText)) + { + multiplier = 2; + } + else if (HalfMultiplierRegex.IsMatch(trimmedText)) + { + multiplier = 0.5f; + } + + if (DayTypeRegex.IsMatch(trimmedText)) + { + durationType = "D"; + } + else if (WeekTypeRegex.IsMatch(trimmedText)) + { + durationType = "W"; + } + else if (WeekendTypeRegex.IsMatch(trimmedText)) + { + durationType = "WE"; + } + else if (MonthTypeRegex.IsMatch(trimmedText)) + { + durationType = "M"; + } + else if (QuarterTypeRegex.IsMatch(trimmedText)) + { + durationLength = 3; + durationType = "M"; + } + else if (YearTypeRegex.IsMatch(trimmedText)) + { + durationType = "Y"; + } + else + { + timex = null; + return false; + } + + timex = TimexUtility.GenerateSetTimex(durationType, durationLength, multiplier); + + return true; + } + + public bool GetMatchedUnitTimex(string text, out string timex) + { + return GetMatchedDailyTimex(text, out timex); + } + + public string WeekDayGroupMatchString(Match match) => SetHandler.WeekDayGroupMatchString(match); + + public string ReplaceValueInTextWithFutTerm(string text, string value) => TasksModeSetHandler.ReplaceValueInTextWithFutTerm(text, value, ThisTerms); + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicTimeParserConfiguration.cs new file mode 100644 index 0000000000..6434490ed5 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicTimeParserConfiguration.cs @@ -0,0 +1,205 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Globalization; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicTimeParserConfiguration : BaseDateTimeOptionsConfiguration, ITimeParserConfiguration + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + private static readonly Regex TimeSuffixFull = + new Regex(DateTimeDefinitions.TimeSuffixFull, RegexFlags, RegexTimeOut); + + private static readonly Regex LunchRegex = + new Regex(DateTimeDefinitions.LunchRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex NightRegex = + new Regex(DateTimeDefinitions.NightRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex HalfTokenRegex = + new Regex(DateTimeDefinitions.HalfTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex QuarterTokenRegex = + new Regex(DateTimeDefinitions.QuarterTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ThreeQuarterTokenRegex = + new Regex(DateTimeDefinitions.ThreeQuarterTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ToTokenRegex = + new Regex(DateTimeDefinitions.ToTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ToHalfTokenRegex = + new Regex(DateTimeDefinitions.ToHalfTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ForHalfTokenRegex = + new Regex(DateTimeDefinitions.ForHalfTokenRegex, RegexFlags, RegexTimeOut); + + public ArabicTimeParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TimeTokenPrefix = DateTimeDefinitions.TimeTokenPrefix; + AtRegex = ArabicTimeExtractorConfiguration.AtRegex; + TimeRegexes = ArabicTimeExtractorConfiguration.TimeRegexList; + UtilityConfiguration = config.UtilityConfiguration; + Numbers = config.Numbers; + TimeZoneParser = config.TimeZoneParser; + } + + public string TimeTokenPrefix { get; } + + public Regex AtRegex { get; } + + public Regex MealTimeRegex { get; } + + public IEnumerable TimeRegexes { get; } + + public IImmutableDictionary Numbers { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IDateTimeParser TimeZoneParser { get; } + + public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool hasMin) + { + int deltaMin; + + var trimmedPrefix = prefix.Trim(); + + if (HalfTokenRegex.IsMatch(trimmedPrefix)) + { + deltaMin = -30; + } + else if (QuarterTokenRegex.IsMatch(trimmedPrefix)) + { + deltaMin = 15; + } + else if (ThreeQuarterTokenRegex.IsMatch(trimmedPrefix)) + { + deltaMin = 45; + } + else + { + var match = ArabicTimeExtractorConfiguration.LessThanOneHour.Match(trimmedPrefix); + var minStr = match.Groups["deltamin"].Value; + if (!string.IsNullOrWhiteSpace(minStr)) + { + deltaMin = int.Parse(minStr, CultureInfo.InvariantCulture); + } + else + { + minStr = match.Groups["deltaminnum"].Value; + deltaMin = Numbers[minStr]; + } + } + + if (ToHalfTokenRegex.IsMatch(trimmedPrefix)) + { + deltaMin = deltaMin - 30; + } + else if (ForHalfTokenRegex.IsMatch(trimmedPrefix)) + { + deltaMin = -deltaMin - 30; + } + else if (ToTokenRegex.IsMatch(trimmedPrefix)) + { + deltaMin = -deltaMin; + } + + min += deltaMin; + if (min < 0) + { + min += 60; + hour -= 1; + } + + hasMin = true; + } + + public void AdjustBySuffix(string suffix, ref int hour, ref int min, ref bool hasMin, ref bool hasAm, ref bool hasPm) + { + var deltaHour = 0; + var match = TimeSuffixFull.MatchExact(suffix, trim: true); + + if (match.Success) + { + var oclockStr = match.Groups["oclock"].Value; + if (string.IsNullOrEmpty(oclockStr)) + { + var matchAmStr = match.Groups[Constants.AmGroupName].Value; + if (!string.IsNullOrEmpty(matchAmStr)) + { + if (hour >= Constants.HalfDayHourCount) + { + deltaHour = -Constants.HalfDayHourCount; + } + else + { + hasAm = true; + } + } + + var matchPmStr = match.Groups[Constants.PmGroupName].Value; + if (!string.IsNullOrEmpty(matchPmStr)) + { + if (hour < Constants.HalfDayHourCount) + { + deltaHour = Constants.HalfDayHourCount; + } + + if (LunchRegex.IsMatch(matchPmStr)) + { + if (hour >= 10 && hour <= Constants.HalfDayHourCount) + { + deltaHour = 0; + if (hour == Constants.HalfDayHourCount) + { + hasPm = true; + } + else + { + hasAm = true; + } + } + else + { + hasPm = true; + } + } + else if (NightRegex.IsMatch(matchPmStr)) + { + if (hour <= 3 || hour == Constants.HalfDayHourCount) + { + if (hour == Constants.HalfDayHourCount) + { + hour = 0; + } + + deltaHour = 0; + hasAm = true; + } + else + { + hasPm = true; + } + } + else + { + hasPm = true; + } + } + } + } + + hour = (hour + deltaHour) % 24; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicTimePeriodParserConfiguration.cs new file mode 100644 index 0000000000..0b05ea679f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicTimePeriodParserConfiguration.cs @@ -0,0 +1,130 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, ITimePeriodParserConfiguration + { + public ArabicTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TimeExtractor = config.TimeExtractor; + IntegerExtractor = config.IntegerExtractor; + TimeParser = config.TimeParser; + TimeZoneParser = config.TimeZoneParser; + + PureNumberFromToRegex = ArabicTimePeriodExtractorConfiguration.PureNumFromTo; + PureNumberBetweenAndRegex = ArabicTimePeriodExtractorConfiguration.PureNumBetweenAnd; + SpecificTimeFromToRegex = ArabicTimePeriodExtractorConfiguration.SpecificTimeFromTo; + SpecificTimeBetweenAndRegex = ArabicTimePeriodExtractorConfiguration.SpecificTimeBetweenAnd; + TimeOfDayRegex = ArabicTimePeriodExtractorConfiguration.TimeOfDayRegex; + GeneralEndingRegex = ArabicTimePeriodExtractorConfiguration.GeneralEndingRegex; + TillRegex = ArabicTimePeriodExtractorConfiguration.TillRegex; + + Numbers = config.Numbers; + UtilityConfiguration = config.UtilityConfiguration; + } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeParser TimeParser { get; } + + public IExtractor IntegerExtractor { get; } + + public IDateTimeParser TimeZoneParser { get; } + + public Regex SpecificTimeFromToRegex { get; } + + public Regex SpecificTimeBetweenAndRegex { get; } + + public Regex PureNumberFromToRegex { get; } + + public Regex PureNumberBetweenAndRegex { get; } + + public Regex TimeOfDayRegex { get; } + + public Regex GeneralEndingRegex { get; } + + public Regex TillRegex { get; } + + public IImmutableDictionary Numbers { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public bool GetMatchedTimeRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) + { + var trimmedText = text.Trim(); + if (trimmedText.EndsWith("s", StringComparison.Ordinal)) + { + trimmedText = trimmedText.Substring(0, trimmedText.Length - 1); + } + + beginHour = 0; + endHour = 0; + endMin = 0; + + var timeOfDay = string.Empty; + + if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Morning; + } + else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Afternoon; + } + else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Evening; + } + else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Daytime; + } + else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Night; + } + else if (DateTimeDefinitions.BusinessHourSplitStrings.All(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.BusinessHour; + } + else if (DateTimeDefinitions.MealtimeBreakfastTermList.Any(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.MealtimeBreakfast; + } + else if (DateTimeDefinitions.MealtimeBrunchTermList.Any(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.MealtimeBrunch; + } + else if (DateTimeDefinitions.MealtimeLunchTermList.Any(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.MealtimeLunch; + } + else if (DateTimeDefinitions.MealtimeDinnerTermList.Any(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.MealtimeDinner; + } + else + { + timex = null; + return false; + } + + var parseResult = TimexUtility.ResolveTimeOfDay(timeOfDay); + timex = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMin = parseResult.EndMin; + + return true; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicTimeZoneParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicTimeZoneParserConfiguration.cs new file mode 100644 index 0000000000..aaace4015c --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/ArabicTimeZoneParserConfiguration.cs @@ -0,0 +1,32 @@ +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class ArabicTimeZoneParserConfiguration : BaseDateTimeOptionsConfiguration, ITimeZoneParserConfiguration + { + public static readonly string TimeZoneEndRegex = TimeZoneDefinitions.TimeZoneEndRegex; + + public static readonly Dictionary FullToMinMapping = TimeZoneDefinitions.FullToMinMapping; + + public static readonly Regex DirectUtcRegex = + new Regex(TimeZoneDefinitions.DirectUtcRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline); + + public static readonly Dictionary AbbrToMinMapping = TimeZoneDefinitions.AbbrToMinMapping; + + public ArabicTimeZoneParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + } + + string ITimeZoneParserConfiguration.TimeZoneEndRegex => TimeZoneEndRegex; + + Dictionary ITimeZoneParserConfiguration.FullToMinMapping => FullToMinMapping; + + Regex ITimeZoneParserConfiguration.DirectUtcRegex => DirectUtcRegex; + + Dictionary ITimeZoneParserConfiguration.AbbrToMinMapping => AbbrToMinMapping; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/TimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/TimeParser.cs new file mode 100644 index 0000000000..2023c81afd --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Parsers/TimeParser.cs @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +using Microsoft.Recognizers.Text.Utilities; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic +{ + public class TimeParser : BaseTimeParser + { + public TimeParser(ITimeParserConfiguration configuration) + : base(configuration) + { + } + + protected override DateTimeResolutionResult InternalParse(string text, DateObject referenceTime) + { + var innerResult = base.InternalParse(text, referenceTime); + if (!innerResult.Success) + { + innerResult = ParseIsh(text, referenceTime); + } + + return innerResult; + } + + // parse "noonish", "11-ish" + private DateTimeResolutionResult ParseIsh(string text, DateObject referenceTime) + { + var ret = new DateTimeResolutionResult(); + var lowerText = text; + + var match = ArabicTimeExtractorConfiguration.IshRegex.MatchExact(lowerText, trim: true); + + if (match.Success) + { + var hourStr = match.Groups[Constants.HourGroupName].Value; + var hour = Constants.HalfDayHourCount; + if (!string.IsNullOrEmpty(hourStr)) + { + hour = int.Parse(hourStr, CultureInfo.InvariantCulture); + } + + ret.Timex = "T" + hour.ToString("D2", CultureInfo.InvariantCulture); + ret.FutureValue = + ret.PastValue = + DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day, hour, 0, 0); + ret.Success = true; + return ret; + } + + return ret; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Utilities/ArabicDatetimeUtilityConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Utilities/ArabicDatetimeUtilityConfiguration.cs new file mode 100644 index 0000000000..c1092f2e78 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Utilities/ArabicDatetimeUtilityConfiguration.cs @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Arabic; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Arabic.Utilities +{ + public class ArabicDatetimeUtilityConfiguration : BaseDatetimeUtilityConfiguration + { + public ArabicDatetimeUtilityConfiguration() + : base( + DateTimeDefinitions.AgoRegex, + DateTimeDefinitions.LaterRegex, + DateTimeDefinitions.InConnectorRegex, + DateTimeDefinitions.SinceYearSuffixRegex, + DateTimeDefinitions.WithinNextPrefixRegex, + DateTimeDefinitions.AmDescRegex, + DateTimeDefinitions.PmDescRegex, + DateTimeDefinitions.AmPmDescRegex, + DateTimeDefinitions.RangeUnitRegex, + DateTimeDefinitions.TimeUnitRegex, + DateTimeDefinitions.DateUnitRegex, + DateTimeDefinitions.CommonDatePrefixRegex, + DateTimeDefinitions.RangePrefixRegex, + RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft, + DateTimeDefinitions.CheckBothBeforeAfter) + { + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateExtractorConfiguration.cs index 3570344924..8fb9c11f2b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateExtractorConfiguration.cs @@ -1,196 +1,146 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Chinese; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseDateExtractorConfiguration : AbstractYearExtractor, IDateTimeExtractor + public class ChineseDateExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKDateExtractorConfiguration { - public static readonly string ExtractorName = Constants.SYS_DATETIME_DATE; // "Date"; - public static readonly Regex MonthRegex = new Regex(DateTimeDefinitions.MonthRegex, RegexFlags); + public static readonly Regex WeekDayRegex = new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LunarRegex = new Regex(DateTimeDefinitions.LunarRegex, RegexFlags, RegexTimeOut); - public static readonly Regex DayRegex = new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + public static readonly Regex ThisRegex = new Regex(DateTimeDefinitions.DateThisRegex, RegexFlags, RegexTimeOut); - public static readonly Regex DayRegexInChinese = new Regex(DateTimeDefinitions.DateDayRegexInChinese, RegexFlags); + public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DateLastRegex, RegexFlags, RegexTimeOut); - public static readonly Regex DayRegexNumInChinese = new Regex(DateTimeDefinitions.DayRegexNumInChinese, RegexFlags); + public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DateNextRegex, RegexFlags, RegexTimeOut); - public static readonly Regex MonthNumRegex = new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + public static readonly Regex NextNextRegex = new Regex(DateTimeDefinitions.DateNextNextRegex, RegexFlags, RegexTimeOut); - public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + public static readonly Regex LastLastRegex = new Regex(DateTimeDefinitions.DateLastLastRegex, RegexFlags, RegexTimeOut); - public static readonly Regex RelativeRegex = new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags); + public static readonly Regex SpecialDayRegex = new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut); - public static readonly Regex ZeroToNineIntegerRegexChs = new Regex(DateTimeDefinitions.ZeroToNineIntegerRegexChs, RegexFlags); + public static readonly Regex WeekDayOfMonthRegex = new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut); - public static readonly Regex YearInChineseRegex = new Regex(DateTimeDefinitions.DateYearInChineseRegex, RegexFlags); + public static readonly Regex WeekDayAndDayRegex = new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags, RegexTimeOut); - public static readonly Regex WeekDayRegex = new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + public static readonly Regex DurationRelativeDurationUnitRegex = new Regex(DateTimeDefinitions.DurationRelativeDurationUnitRegex, RegexFlags, RegexTimeOut); - public static readonly Regex LunarRegex = new Regex(DateTimeDefinitions.LunarRegex, RegexFlags); + public static readonly Regex SpecialDayWithNumRegex = new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags, RegexTimeOut); - public static readonly Regex ThisRegex = new Regex(DateTimeDefinitions.DateThisRegex, RegexFlags); + public static readonly Regex SpecialDate = new Regex(DateTimeDefinitions.SpecialDate, RegexFlags, RegexTimeOut); - public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DateLastRegex, RegexFlags); + public static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); - public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DateNextRegex, RegexFlags); + public static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); - public static readonly Regex SpecialDayRegex = new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags); + public static readonly Regex WeekDayStartEnd = new Regex(DateTimeDefinitions.WeekDayStartEnd, RegexFlags, RegexTimeOut); - public static readonly Regex WeekDayOfMonthRegex = new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags); + public static readonly Regex DateTimePeriodUnitRegex = new Regex(DateTimeDefinitions.DateTimePeriodUnitRegex, RegexFlags, RegexTimeOut); - public static readonly Regex ThisRe = new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + public static readonly Regex RangeConnectorSymbolRegex = new Regex(DateTimeDefinitions.DatePeriodTillRegex, RegexFlags, RegexTimeOut); - public static readonly Regex LastRe = new Regex(DateTimeDefinitions.LastPrefixRegex, RegexFlags); + public static readonly Regex MonthRegex = new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DayRegex = new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DayRegexInCJK = new Regex(DateTimeDefinitions.DateDayRegexInCJK, RegexFlags, RegexTimeOut); + public static readonly Regex DayRegexNumInCJK = new Regex(DateTimeDefinitions.DayRegexNumInCJK, RegexFlags, RegexTimeOut); + public static readonly Regex MonthNumRegex = new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); + public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + public static readonly Regex RelativeRegex = new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); + public static readonly Regex RelativeMonthRegex = new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex ZeroToNineIntegerRegexCJK = new Regex(DateTimeDefinitions.ZeroToNineIntegerRegexCJK, RegexFlags, RegexTimeOut); + public static readonly Regex YearInCJKRegex = new Regex(DateTimeDefinitions.DateYearInCJKRegex, RegexFlags, RegexTimeOut); + public static readonly Regex ThisRe = new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastRe = new Regex(DateTimeDefinitions.LastPrefixRegex, RegexFlags, RegexTimeOut); + public static readonly Regex NextRe = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DynastyYearRegex = new Regex(DateTimeDefinitions.DynastyYearRegex, RegexFlags, RegexTimeOut); + public static readonly string DynastyStartYear = DateTimeDefinitions.DynastyStartYear; + public static readonly ImmutableDictionary DynastyYearMap = DateTimeDefinitions.DynastyYearMap.ToImmutableDictionary(); - public static readonly Regex NextRe = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public static readonly Regex SpecialDate = new Regex(DateTimeDefinitions.SpecialDate, RegexFlags); + public ChineseDateExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + var durationConfig = new BaseDateTimeOptionsConfiguration(config.Culture, DateTimeOptions.None); - public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + DurationExtractor = new BaseCJKDurationExtractor(new ChineseDurationExtractorConfiguration(durationConfig)); - public static readonly IParser NumberParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Chinese))); + ImplicitDateList = new List + { + LunarRegex, SpecialDayRegex, ThisRegex, LastLastRegex, LastRegex, NextNextRegex, NextRegex, + WeekDayRegex, WeekDayOfMonthRegex, SpecialDate, + }; - public static readonly Regex[] DateRegexList = - { // (农历)?(2016年)?一月三日(星期三)? - new Regex(DateTimeDefinitions.DateRegexList1, RegexFlags), + var dateRegex1 = new Regex(DateTimeDefinitions.DateRegexList1, RegexFlags, RegexTimeOut); // (2015年)?(农历)?十月初一(星期三)? - new Regex(DateTimeDefinitions.DateRegexList2, RegexFlags), + var dateRegex2 = new Regex(DateTimeDefinitions.DateRegexList2, RegexFlags, RegexTimeOut); // (2015年)?(农历)?十月二十(星期三)? - new Regex(DateTimeDefinitions.DateRegexList3, RegexFlags), + var dateRegex3 = new Regex(DateTimeDefinitions.DateRegexList3, RegexFlags, RegexTimeOut); - // 7/23 - new Regex(DateTimeDefinitions.DateRegexList4, RegexFlags), + // 2015-12-23 - This regex represents the standard format in Chinese dates (YMD) and has precedence over other orderings + var dateRegex8 = new Regex(DateTimeDefinitions.DateRegexList8, RegexFlags, RegexTimeOut); // 23/7 - new Regex(DateTimeDefinitions.DateRegexList5, RegexFlags), + var dateRegex5 = new Regex(DateTimeDefinitions.DateRegexList5, RegexFlags, RegexTimeOut); - DateTimeDefinitions.DefaultLanguageFallback == Constants.DefaultLanguageFallback_DMY ? - - // 23-3-2015 - new Regex(DateTimeDefinitions.DateRegexList7, RegexFlags) : - - // 3-23-2017 - new Regex(DateTimeDefinitions.DateRegexList6, RegexFlags), - - DateTimeDefinitions.DefaultLanguageFallback == Constants.DefaultLanguageFallback_DMY ? - - // 3-23-2017 - new Regex(DateTimeDefinitions.DateRegexList6, RegexFlags) : + // 7/23 + var dateRegex4 = new Regex(DateTimeDefinitions.DateRegexList4, RegexFlags, RegexTimeOut); - // 23-3-2015 - new Regex(DateTimeDefinitions.DateRegexList7, RegexFlags), + // 23-3-2017 + var dateRegex7 = new Regex(DateTimeDefinitions.DateRegexList7, RegexFlags, RegexTimeOut); - // 2015-12-23 - new Regex(DateTimeDefinitions.DateRegexList8, RegexFlags), - }; + // 3-23-2015 + var dateRegex6 = new Regex(DateTimeDefinitions.DateRegexList6, RegexFlags, RegexTimeOut); - public static readonly Regex[] ImplicitDateList = - { - LunarRegex, SpecialDayRegex, ThisRegex, LastRegex, NextRegex, - WeekDayRegex, WeekDayOfMonthRegex, SpecialDate, - }; + // Regex precedence where the order between D and M varies is controlled by DefaultLanguageFallback + var enableDmy = DateTimeDefinitions.DefaultLanguageFallback == Constants.DefaultLanguageFallback_DMY; + var enableYmd = DateTimeDefinitions.DefaultLanguageFallback == Constants.DefaultLanguageFallback_YMD; - public static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + DateRegexList = new List { dateRegex1, dateRegex2, dateRegex3, dateRegex8 }; + DateRegexList = DateRegexList.Concat( + enableDmy ? + new[] { dateRegex5, dateRegex4, dateRegex7, dateRegex6 } : + enableYmd ? + new[] { dateRegex4, dateRegex5, dateRegex7, dateRegex6 } : + new[] { dateRegex4, dateRegex5, dateRegex6, dateRegex7 }); - public static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + } - public static readonly Regex DateTimePeriodUnitRegex = new Regex(DateTimeDefinitions.DateTimePeriodUnitRegex, RegexFlags); + public IEnumerable DateRegexList { get; } - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public IEnumerable ImplicitDateList { get; } - private static readonly ChineseDurationExtractorConfiguration DurationExtractor = new ChineseDurationExtractorConfiguration(); + public IDateTimeExtractor DurationExtractor { get; } - public ChineseDateExtractorConfiguration(IDateExtractorConfiguration config = null) - : base(config) - { - } + Regex ICJKDateExtractorConfiguration.DateTimePeriodUnitRegex => DateTimePeriodUnitRegex; - public override List Extract(string text) - { - return Extract(text, DateObject.Now); - } + Regex ICJKDateExtractorConfiguration.BeforeRegex => BeforeRegex; - public override List Extract(string text, DateObject referenceTime) - { - var tokens = new List(); - tokens.AddRange(BasicRegexMatch(text)); - tokens.AddRange(ImplicitDate(text)); - tokens.AddRange(DurationWithBeforeAndAfter(text, referenceTime)); + Regex ICJKDateExtractorConfiguration.AfterRegex => AfterRegex; - return Token.MergeAllTokens(tokens, text, ExtractorName); - } + Regex ICJKDateExtractorConfiguration.WeekDayStartEnd => WeekDayStartEnd; - // Match basic patterns in DateRegexList - private static List BasicRegexMatch(string text) - { - var ret = new List(); - foreach (var regex in DateRegexList) - { - var matches = regex.Matches(text); - foreach (Match match in matches) - { - ret.Add(new Token(match.Index, match.Index + match.Length)); - } - } - - return ret; - } + Regex ICJKDateExtractorConfiguration.RangeConnectorSymbolRegex => RangeConnectorSymbolRegex; - // Match several other implicit cases - private static List ImplicitDate(string text) - { - var ret = new List(); - foreach (var regex in ImplicitDateList) - { - var matches = regex.Matches(text); - foreach (Match match in matches) - { - ret.Add(new Token(match.Index, match.Index + match.Length)); - } - } - - return ret; - } + public Dictionary AmbiguityDateFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityDateFiltersDict); - // process case like "三天前" "两个月前" - private static List DurationWithBeforeAndAfter(string text, DateObject referenceTime) - { - var ret = new List(); - var durationEr = DurationExtractor.Extract(text, referenceTime); - foreach (var er in durationEr) - { - // Only handles date durations here - // Cases with dateTime durations will be handled in DateTime Extractor - if (DateTimePeriodUnitRegex.Match(er.Text).Success) - { - continue; - } - - var pos = (int)er.Start + (int)er.Length; - if (pos < text.Length) - { - var suffix = text.Substring(pos); - var beforeMatch = BeforeRegex.Match(suffix); - var afterMatch = AfterRegex.Match(suffix); - - if ((beforeMatch.Success && suffix.StartsWith(beforeMatch.Value)) || (afterMatch.Success && suffix.StartsWith(afterMatch.Value))) - { - var metadata = new Metadata() { IsDurationWithBeforeAndAfter = true }; - ret.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + 1, metadata)); - } - } - } - - return ret; - } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDatePeriodExtractorConfiguration.cs index 2a2d4fc06a..1cb6d0df23 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDatePeriodExtractorConfiguration.cs @@ -1,91 +1,114 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; -using Microsoft.Recognizers.Text; +using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Chinese; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseDatePeriodExtractorConfiguration : IDateTimeExtractor + public class ChineseDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKDatePeriodExtractorConfiguration { - public static readonly string ExtractorName = Constants.SYS_DATETIME_DATEPERIOD; // "DatePeriod"; + public static readonly Regex TillRegex = new Regex(DateTimeDefinitions.DatePeriodTillRegex, RegexFlags, RegexTimeOut); - public static readonly Regex TillRegex = new Regex(DateTimeDefinitions.DatePeriodTillRegex, RegexFlags); + public static readonly Regex RangePrefixRegex = new Regex(DateTimeDefinitions.DatePeriodRangePrefixRegex, RegexFlags, RegexTimeOut); - public static readonly Regex DayRegex = new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + public static readonly Regex RangeSuffixRegex = new Regex(DateTimeDefinitions.DatePeriodRangeSuffixRegex, RegexFlags, RegexTimeOut); - public static readonly Regex DayRegexInChinese = new Regex(DateTimeDefinitions.DatePeriodDayRegexInChinese, RegexFlags); + public static readonly Regex StrictYearRegex = new Regex(DateTimeDefinitions.StrictYearRegex, RegexFlags, RegexTimeOut); - public static readonly Regex MonthNumRegex = new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + public static readonly Regex YearInCJKRegex = new Regex(DateTimeDefinitions.DatePeriodYearInCJKRegex, RegexFlags, RegexTimeOut); - public static readonly Regex ThisRegex = new Regex(DateTimeDefinitions.DatePeriodThisRegex, RegexFlags); + // for case "(从)?(2017年)?一月十日到十二日" + public static readonly Regex SimpleCasesRegex = new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags, RegexTimeOut); - public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DatePeriodLastRegex, RegexFlags); + public static readonly Regex YearAndMonth = new Regex(DateTimeDefinitions.YearAndMonth, RegexFlags, RegexTimeOut); - public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DatePeriodNextRegex, RegexFlags); + // 2017.12, 2017-12, 2017/12, 12/2017 + public static readonly Regex PureNumYearAndMonth = new Regex(DateTimeDefinitions.PureNumYearAndMonth, RegexFlags, RegexTimeOut); - public static readonly Regex RelativeMonthRegex = new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + public static readonly Regex SimpleYearAndMonth = new Regex(DateTimeDefinitions.SimpleYearAndMonth, RegexFlags, RegexTimeOut); - public static readonly Regex MonthRegex = new Regex(DateTimeDefinitions.MonthRegex, RegexFlags); + public static readonly Regex OneWordPeriodRegex = new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags, RegexTimeOut); - public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + public static readonly Regex WeekOfMonthRegex = new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags, RegexTimeOut); - public static readonly Regex StrictYearRegex = new Regex(DateTimeDefinitions.StrictYearRegex, RegexFlags); + public static readonly Regex WeekOfYearRegex = new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags, RegexTimeOut); - public static readonly Regex YearRegexInNumber = new Regex(DateTimeDefinitions.YearRegexInNumber, RegexFlags); + public static readonly Regex WeekOfDateRegex = new Regex(DateTimeDefinitions.WeekOfDateRegex, RegexFlags, RegexTimeOut); - public static readonly Regex ZeroToNineIntegerRegexChs = new Regex(DateTimeDefinitions.ZeroToNineIntegerRegexChs, RegexFlags); + public static readonly Regex MonthOfDateRegex = new Regex(DateTimeDefinitions.MonthOfDateRegex, RegexFlags, RegexTimeOut); - public static readonly Regex YearInChineseRegex = new Regex(DateTimeDefinitions.DatePeriodYearInChineseRegex, RegexFlags); + public static readonly Regex WhichWeekRegex = new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags, RegexTimeOut); - public static readonly Regex MonthSuffixRegex = new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags); + public static readonly Regex FollowedUnit = new Regex(DateTimeDefinitions.FollowedUnit, RegexFlags, RegexTimeOut); - // for case "(从)?(2017年)?一月十日到十二日" - public static readonly Regex SimpleCasesRegex = new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags); + public static readonly Regex NumberCombinedWithUnit = new Regex(DateTimeDefinitions.NumberCombinedWithUnit, RegexFlags, RegexTimeOut); - public static readonly Regex YearAndMonth = new Regex(DateTimeDefinitions.YearAndMonth, RegexFlags); + public static readonly Regex YearToYear = new Regex(DateTimeDefinitions.YearToYear, RegexFlags, RegexTimeOut); - // 2017.12, 2017-12, 2017/12, 12/2017 - public static readonly Regex PureNumYearAndMonth = new Regex(DateTimeDefinitions.PureNumYearAndMonth, RegexFlags); + public static readonly Regex YearToYearSuffixRequired = new Regex(DateTimeDefinitions.YearToYearSuffixRequired, RegexFlags, RegexTimeOut); - public static readonly Regex OneWordPeriodRegex = new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags); + public static readonly Regex MonthToMonth = new Regex(DateTimeDefinitions.MonthToMonth, RegexFlags, RegexTimeOut); - public static readonly Regex WeekOfMonthRegex = new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags); + public static readonly Regex MonthToMonthSuffixRequired = new Regex(DateTimeDefinitions.MonthToMonthSuffixRequired, RegexFlags, RegexTimeOut); - public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.UnitRegex, RegexFlags); + public static readonly Regex DayToDay = new Regex(DateTimeDefinitions.DayToDay, RegexFlags, RegexTimeOut); - public static readonly Regex FollowedUnit = new Regex(DateTimeDefinitions.FollowedUnit, RegexFlags); + public static readonly Regex MonthDayRange = new Regex(DateTimeDefinitions.MonthDayRange, RegexFlags, RegexTimeOut); - public static readonly Regex NumberCombinedWithUnit = new Regex(DateTimeDefinitions.NumberCombinedWithUnit, RegexFlags); + public static readonly Regex DayRegexForPeriod = new Regex(DateTimeDefinitions.DayRegexForPeriod, RegexFlags, RegexTimeOut); - public static readonly Regex YearToYear = new Regex(DateTimeDefinitions.YearToYear, RegexFlags); + public static readonly Regex PastRegex = new Regex(DateTimeDefinitions.PastRegex, RegexFlags, RegexTimeOut); - public static readonly Regex YearToYearSuffixRequired = new Regex(DateTimeDefinitions.YearToYearSuffixRequired, RegexFlags); + public static readonly Regex FutureRegex = new Regex(DateTimeDefinitions.FutureRegex, RegexFlags, RegexTimeOut); - public static readonly Regex MonthToMonth = new Regex(DateTimeDefinitions.MonthToMonth, RegexFlags); + public static readonly Regex WeekWithWeekDayRangeRegex = new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags, RegexTimeOut); - public static readonly Regex MonthToMonthSuffixRequired = new Regex(DateTimeDefinitions.MonthToMonthSuffixRequired, RegexFlags); + public static readonly Regex FirstLastOfYearRegex = new Regex(DateTimeDefinitions.FirstLastOfYearRegex, RegexFlags, RegexTimeOut); - public static readonly Regex PastRegex = new Regex(DateTimeDefinitions.PastRegex, RegexFlags); + public static readonly Regex SeasonWithYear = new Regex(DateTimeDefinitions.SeasonWithYear, RegexFlags, RegexTimeOut); - public static readonly Regex FutureRegex = new Regex(DateTimeDefinitions.FutureRegex, RegexFlags); + public static readonly Regex QuarterRegex = new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags, RegexTimeOut); - public static readonly Regex SeasonRegex = new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags); + public static readonly Regex DecadeRegex = new Regex(DateTimeDefinitions.DecadeRegex, RegexFlags, RegexTimeOut); - public static readonly Regex SeasonWithYear = new Regex(DateTimeDefinitions.SeasonWithYear, RegexFlags); + public static readonly Regex CenturyRegex = new Regex(DateTimeDefinitions.CenturyRegex, RegexFlags, RegexTimeOut); - public static readonly Regex QuarterRegex = new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags); + public static readonly Regex ComplexDatePeriodRegex = new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags, RegexTimeOut); - public static readonly Regex DecadeRegex = new Regex(DateTimeDefinitions.DecadeRegex, RegexFlags); + public static readonly Regex SpecialMonthRegex = new Regex(DateTimeDefinitions.SpecialMonthRegex, RegexFlags, RegexTimeOut); - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public static readonly Regex SpecialYearRegex = new Regex(DateTimeDefinitions.SpecialYearRegex, RegexFlags, RegexTimeOut); - private static readonly ChineseDateExtractorConfiguration DatePointExtractor = new ChineseDateExtractorConfiguration(); + public static readonly Regex DayRegex = new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DayRegexInCJK = new Regex(DateTimeDefinitions.DatePeriodDayRegexInCJK, RegexFlags, RegexTimeOut); + public static readonly Regex MonthNumRegex = new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); + public static readonly Regex ThisRegex = new Regex(DateTimeDefinitions.DatePeriodThisRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DateUnitRegex = new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DatePeriodLastRegex, RegexFlags, RegexTimeOut); + public static readonly Regex NextNextRegex = new Regex(DateTimeDefinitions.DatePeriodNextNextRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastLastRegex = new Regex(DateTimeDefinitions.DatePeriodLastLastRegex, RegexFlags, RegexTimeOut); + public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DatePeriodNextRegex, RegexFlags, RegexTimeOut); + public static readonly Regex RelativeMonthRegex = new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LaterEarlyPeriodRegex = new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DatePointWithAgoAndLater = new Regex(DateTimeDefinitions.DatePointWithAgoAndLater, RegexFlags, RegexTimeOut); + public static readonly Regex ReferenceDatePeriodRegex = new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags, RegexTimeOut); + public static readonly Regex MonthRegex = new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + public static readonly Regex YearRegexInNumber = new Regex(DateTimeDefinitions.YearRegexInNumber, RegexFlags, RegexTimeOut); + public static readonly Regex ZeroToNineIntegerRegexCJK = new Regex(DateTimeDefinitions.ZeroToNineIntegerRegexCJK, RegexFlags, RegexTimeOut); + public static readonly Regex MonthSuffixRegex = new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags, RegexTimeOut); + public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.UnitRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DurationUnitRegex = new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); + public static readonly Regex SeasonRegex = new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags, RegexTimeOut); - private static readonly IntegerExtractor IntegerExtractor = new IntegerExtractor(); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex[] SimpleCasesRegexes = { @@ -98,144 +121,66 @@ public class ChineseDatePeriodExtractorConfiguration : IDateTimeExtractor MonthToMonthSuffixRequired, YearAndMonth, PureNumYearAndMonth, - YearInChineseRegex, + YearInCJKRegex, + SpecialMonthRegex, + SpecialYearRegex, WeekOfMonthRegex, + WeekOfYearRegex, + WeekOfDateRegex, + MonthOfDateRegex, + WhichWeekRegex, SeasonWithYear, QuarterRegex, DecadeRegex, + CenturyRegex, + ComplexDatePeriodRegex, }; - public List Extract(string text) + public ChineseDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) { - return Extract(text, DateObject.Now); - } + DatePointExtractor = new BaseCJKDateExtractor(new ChineseDateExtractorConfiguration(this)); + DurationExtractor = new BaseCJKDurationExtractor(new ChineseDurationExtractorConfiguration(this)); - public List Extract(string text, DateObject referenceTime) - { - var tokens = new List(); - tokens.AddRange(MatchSimpleCases(text)); - tokens.AddRange(MergeTwoTimePoints(text, referenceTime)); - tokens.AddRange(MatchNumberWithUnit(text)); - - return Token.MergeAllTokens(tokens, text, ExtractorName); - } - - // match pattern in simple case - private static List MatchSimpleCases(string text) - { - var ret = new List(); - foreach (var regex in SimpleCasesRegexes) + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) { - var matches = regex.Matches(text); - foreach (Match match in matches) - { - ret.Add(new Token(match.Index, match.Index + match.Length)); - } + numOptions = NumberOptions.NoProtoCache; } - return ret; + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = new IntegerExtractor(numConfig); } - // merge two date - private static List MergeTwoTimePoints(string text, DateObject referenceTime) - { - var ret = new List(); - var er = DatePointExtractor.Extract(text, referenceTime); - if (er.Count <= 1) - { - return ret; - } + public IDateTimeExtractor DatePointExtractor { get; } - // merge '{TimePoint} 到 {TimePoint}' - var idx = 0; - while (idx < er.Count - 1) - { - var middleBegin = er[idx].Start + er[idx].Length ?? 0; - var middleEnd = er[idx + 1].Start ?? 0; - if (middleBegin >= middleEnd) - { - idx++; - continue; - } - - var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); - - if (TillRegex.IsExactMatch(middleStr, trim: true)) - { - var periodBegin = er[idx].Start ?? 0; - var periodEnd = (er[idx + 1].Start ?? 0) + (er[idx + 1].Length ?? 0); - - // handle "从" - var beforeStr = text.Substring(0, periodBegin); - if (beforeStr.Trim().EndsWith("从")) - { - periodBegin = beforeStr.LastIndexOf("从", StringComparison.Ordinal); - } - - ret.Add(new Token(periodBegin, periodEnd)); - idx += 2; - continue; - } - - idx++; - } + public IDateTimeExtractor DurationExtractor { get; } - return ret; - } + public IExtractor IntegerExtractor { get; } - // extract case like "前两年" "前三个月" - private static List MatchNumberWithUnit(string text) - { - var ret = new List(); + IEnumerable ICJKDatePeriodExtractorConfiguration.SimpleCasesRegexes => SimpleCasesRegexes; - var durations = new List(); - var ers = IntegerExtractor.Extract(text); + Regex ICJKDatePeriodExtractorConfiguration.TillRegex => TillRegex; - foreach (var er in ers) - { - var afterStr = text.Substring(er.Start + er.Length ?? 0); - var match = FollowedUnit.MatchBegin(afterStr, trim: true); + Regex ICJKDatePeriodExtractorConfiguration.FutureRegex => FutureRegex; - if (match.Success) - { - durations.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + match.Length)); - } - } + Regex ICJKDatePeriodExtractorConfiguration.PastRegex => PastRegex; - if (NumberCombinedWithUnit.IsMatch(text)) - { - var matches = NumberCombinedWithUnit.Matches(text); - foreach (Match match in matches) - { - durations.Add(new Token(match.Index, match.Index + match.Length)); - } - } + Regex ICJKDatePeriodExtractorConfiguration.DateUnitRegex => DateUnitRegex; - foreach (var duration in durations) - { - var beforeStr = text.Substring(0, duration.Start); - if (string.IsNullOrWhiteSpace(beforeStr)) - { - continue; - } - - var match = PastRegex.MatchEnd(beforeStr, trim: true); - - if (match.Success) - { - ret.Add(new Token(match.Index, duration.End)); - continue; - } - - match = FutureRegex.MatchEnd(beforeStr, trim: true); - - if (match.Success) - { - ret.Add(new Token(match.Index, duration.End)); - } - } + Regex ICJKDatePeriodExtractorConfiguration.FirstLastOfYearRegex => FirstLastOfYearRegex; - return ret; - } + Regex ICJKDatePeriodExtractorConfiguration.UnitRegex => UnitRegex; + + Regex ICJKDatePeriodExtractorConfiguration.NumberCombinedWithUnit => NumberCombinedWithUnit; + + Regex ICJKDatePeriodExtractorConfiguration.FollowedUnit => FollowedUnit; + + Regex ICJKDatePeriodExtractorConfiguration.RangePrefixRegex => RangePrefixRegex; + + Regex ICJKDatePeriodExtractorConfiguration.RangeSuffixRegex => RangeSuffixRegex; + + public Dictionary AmbiguityFiltersDict => null; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateTimeExtractorConfiguration.cs index 690745483a..580837a94d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateTimeExtractorConfiguration.cs @@ -1,189 +1,77 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseDateTimeExtractorConfiguration : IDateTimeExtractor + public class ChineseDateTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKDateTimeExtractorConfiguration { public static readonly string ExtractorName = Constants.SYS_DATETIME_DATETIME; // "DateTime"; - public static readonly Regex PrepositionRegex = new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + public static readonly Regex PrepositionRegex = new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); - public static readonly Regex NowRegex = new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + public static readonly Regex NowRegex = new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); - public static readonly Regex NightRegex = new Regex(DateTimeDefinitions.NightRegex, RegexFlags); + public static readonly Regex NightRegex = new Regex(DateTimeDefinitions.NightRegex, RegexFlags, RegexTimeOut); - public static readonly Regex TimeOfTodayRegex = new Regex(DateTimeDefinitions.TimeOfTodayRegex, RegexFlags); + public static readonly Regex TimeOfSpecialDayRegex = new Regex(DateTimeDefinitions.TimeOfSpecialDayRegex, RegexFlags, RegexTimeOut); - public static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + public static readonly Regex TimeOfDayRegex = new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); - public static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + public static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); - public static readonly Regex DateTimePeriodUnitRegex = new Regex(DateTimeDefinitions.DateTimePeriodUnitRegex, RegexFlags); + public static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public static readonly Regex DurationRelativeDurationUnitRegex = new Regex(DateTimeDefinitions.DurationRelativeDurationUnitRegex, RegexFlags, RegexTimeOut); - private static readonly ChineseDateExtractorConfiguration DatePointExtractor = new ChineseDateExtractorConfiguration(); + public static readonly Regex AgoLaterRegex = new Regex(DateTimeDefinitions.AgoLaterRegex, RegexFlags, RegexTimeOut); - private static readonly ChineseTimeExtractorConfiguration TimePointExtractor = new ChineseTimeExtractorConfiguration(); + public static readonly Regex DateTimePeriodUnitRegex = new Regex(DateTimeDefinitions.DateTimePeriodUnitRegex, RegexFlags, RegexTimeOut); - private static readonly ChineseDurationExtractorConfiguration DurationExtractor = new ChineseDurationExtractorConfiguration(); + public static readonly Regex ConnectorRegex = new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); - // Match now - public static List BasicRegexMatch(string text) - { - var ret = new List(); - text = text.Trim(); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - // handle "now" - var matches = NowRegex.Matches(text); - foreach (Match match in matches) - { - ret.Add(new Token(match.Index, match.Index + match.Length)); - } + public ChineseDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { - return ret; + DatePointExtractor = new BaseCJKDateExtractor(new ChineseDateExtractorConfiguration(this)); + TimePointExtractor = new BaseCJKTimeExtractor(new ChineseTimeExtractorConfiguration(this)); + DurationExtractor = new BaseCJKDurationExtractor(new ChineseDurationExtractorConfiguration(this)); } - // Merge a Date entity and a Time entity, like "明天早上七点" - public static List MergeDateAndTime(string text, DateObject referenceTime) - { - var ret = new List(); - var ers = DatePointExtractor.Extract(text, referenceTime); - if (ers.Count == 0) - { - return ret; - } - - ers.AddRange(TimePointExtractor.Extract(text, referenceTime)); - if (ers.Count < 2) - { - return ret; - } - - ers = ers.OrderBy(o => o.Start).ToList(); - - var i = 0; - while (i < ers.Count - 1) - { - var j = i + 1; - while (j < ers.Count && ers[i].IsOverlap(ers[j])) - { - j++; - } - - if (j >= ers.Count) - { - break; - } - - if (ers[i].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && - ers[j].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) - { - var middleBegin = ers[i].Start + ers[i].Length ?? 0; - var middleEnd = ers[j].Start ?? 0; - if (middleBegin > middleEnd) - { - break; - } - - var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); - if (string.IsNullOrEmpty(middleStr) || middleStr.Equals(",") || PrepositionRegex.IsMatch(middleStr)) - { - var begin = ers[i].Start ?? 0; - var end = (ers[j].Start ?? 0) + (ers[j].Length ?? 0); - ret.Add(new Token(begin, end)); - } - - i = j + 1; - continue; - } - - i = j; - } - - return ret; - } + public IDateTimeExtractor DatePointExtractor { get; } - // Parse a specific time of today, tonight, this afternoon, "今天下午七点" - public static List TimeOfToday(string text, DateObject referenceTime) - { - var ret = new List(); - var ers = TimePointExtractor.Extract(text, referenceTime); - foreach (var er in ers) - { - var beforeStr = text.Substring(0, er.Start ?? 0); - - // handle "今晚7点" - var innerMatch = NightRegex.MatchBegin(er.Text, trim: true); - - if (innerMatch.Success) - { - beforeStr = text.Substring(0, (er.Start ?? 0) + innerMatch.Length); - } - - if (string.IsNullOrEmpty(beforeStr)) - { - continue; - } - - var match = TimeOfTodayRegex.MatchEnd(beforeStr, trim: true); - - if (match.Success) - { - var begin = match.Index; - var end = er.Start + er.Length ?? 0; - ret.Add(new Token(begin, end)); - } - } - - return ret; - } + public IDateTimeExtractor TimePointExtractor { get; } - public List Extract(string text) - { - return Extract(text, DateObject.Now); - } + public IDateTimeExtractor DurationExtractor { get; } - public List Extract(string text, DateObject referenceTime) - { - var tokens = new List(); - tokens.AddRange(MergeDateAndTime(text, referenceTime)); - tokens.AddRange(BasicRegexMatch(text)); - tokens.AddRange(TimeOfToday(text, referenceTime)); - tokens.AddRange(DurationWithBeforeAndAfter(text, referenceTime)); + Regex ICJKDateTimeExtractorConfiguration.NowRegex => NowRegex; - return Token.MergeAllTokens(tokens, text, ExtractorName); - } + Regex ICJKDateTimeExtractorConfiguration.PrepositionRegex => PrepositionRegex; + + Regex ICJKDateTimeExtractorConfiguration.NightRegex => NightRegex; + + Regex ICJKDateTimeExtractorConfiguration.TimeOfSpecialDayRegex => TimeOfSpecialDayRegex; + + Regex ICJKDateTimeExtractorConfiguration.TimeOfDayRegex => TimeOfDayRegex; + + Regex ICJKDateTimeExtractorConfiguration.BeforeRegex => BeforeRegex; + + Regex ICJKDateTimeExtractorConfiguration.AfterRegex => AfterRegex; + + Regex ICJKDateTimeExtractorConfiguration.ConnectorRegex => ConnectorRegex; + + public Dictionary AmbiguityDateTimeFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityDateTimeFiltersDict); - // Process case like "5分钟前" "二小时后" - private List DurationWithBeforeAndAfter(string text, DateObject referenceTime) - { - var ret = new List(); - var durationEr = DurationExtractor.Extract(text, referenceTime); - foreach (var er in durationEr) - { - var pos = (int)er.Start + (int)er.Length; - if (pos < text.Length) - { - var suffix = text.Substring(pos); - var beforeMatch = BeforeRegex.Match(suffix); - var afterMatch = AfterRegex.Match(suffix); - - if ((beforeMatch.Success && suffix.StartsWith(beforeMatch.Value)) || (afterMatch.Success && suffix.StartsWith(afterMatch.Value))) - { - var metadata = new Metadata() { IsDurationWithBeforeAndAfter = true }; - ret.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + 1, metadata)); - } - } - } - - return ret; - } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateTimePeriodExtractorConfiguration.cs index 56dbdd9d47..7e90654cf0 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateTimePeriodExtractorConfiguration.cs @@ -1,308 +1,154 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; +using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Chinese; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseDateTimePeriodExtractorConfiguration : IDateTimeExtractor + public class ChineseDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, + ICJKDateTimePeriodExtractorConfiguration { - public static readonly string ExtractorName = Constants.SYS_DATETIME_DATETIMEPERIOD; - - public static readonly Regex TillRegex = new Regex(DateTimeDefinitions.DateTimePeriodTillRegex, RegexFlags); - public static readonly Regex PrepositionRegex = new Regex(DateTimeDefinitions.DateTimePeriodPrepositionRegex, RegexFlags); + public static readonly Regex TillRegex = new Regex(DateTimeDefinitions.DateTimePeriodTillRegex, RegexFlags, RegexTimeOut); - public static readonly Regex HourRegex = new Regex(DateTimeDefinitions.HourRegex, RegexFlags); + public static readonly Regex PrepositionRegex = new Regex(DateTimeDefinitions.DateTimePeriodPrepositionRegex, RegexFlags, RegexTimeOut); - public static readonly Regex HourNumRegex = new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags); + public static readonly Regex ZhijianRegex = new Regex(DateTimeDefinitions.ZhijianRegex, RegexFlags, RegexTimeOut); - public static readonly Regex ZhijianRegex = new Regex(DateTimeDefinitions.ZhijianRegex, RegexFlags); + public static readonly Regex TimeOfDayRegex = new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); - public static readonly Regex ThisRegex = new Regex(DateTimeDefinitions.DateTimePeriodThisRegex, RegexFlags); + public static readonly Regex SpecificTimeOfDayRegex = new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); - public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DateTimePeriodLastRegex, RegexFlags); + public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.DateTimePeriodUnitRegex, RegexFlags, RegexTimeOut); - public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DateTimePeriodNextRegex, RegexFlags); + public static readonly Regex FollowedUnit = new Regex(DateTimeDefinitions.DateTimePeriodFollowedUnit, RegexFlags, RegexTimeOut); - public static readonly Regex TimeOfDayRegex = new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + public static readonly Regex PastRegex = new Regex(DateTimeDefinitions.PastRegex, RegexFlags, RegexTimeOut); - public static readonly Regex SpecificTimeOfDayRegex = new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + public static readonly Regex FutureRegex = new Regex(DateTimeDefinitions.FutureRegex, RegexFlags, RegexTimeOut); - public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.DateTimePeriodUnitRegex, RegexFlags); + public static readonly Regex WeekDayRegex = new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); - public static readonly Regex FollowedUnit = new Regex(DateTimeDefinitions.DateTimePeriodFollowedUnit, RegexFlags); + public static readonly Regex TimePeriodLeftRegex = new Regex(DateTimeDefinitions.TimePeriodLeftRegex, RegexFlags, RegexTimeOut); - public static readonly Regex NumberCombinedWithUnit = new Regex(DateTimeDefinitions.DateTimePeriodNumberCombinedWithUnit, RegexFlags); + public static readonly Regex RelativeRegex = new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); - public static readonly Regex PastRegex = new Regex(DateTimeDefinitions.PastRegex, RegexFlags); + public static readonly Regex RestOfDateRegex = new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags, RegexTimeOut); - public static readonly Regex FutureRegex = new Regex(DateTimeDefinitions.FutureRegex, RegexFlags); + public static readonly Regex AmPmDescRegex = new Regex(DateTimeDefinitions.AmPmDescRegex, RegexFlags, RegexTimeOut); - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public static readonly Regex BeforeAfterRegex = new Regex(DateTimeDefinitions.BeforeAfterRegex, RegexFlags, RegexTimeOut); - private static readonly ChineseTimeExtractorConfiguration SingleTimeExtractor = new ChineseTimeExtractorConfiguration(); + public static readonly Regex HourRegex = new Regex(DateTimeDefinitions.HourRegex, RegexFlags, RegexTimeOut); + public static readonly Regex HourNumRegex = new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags, RegexTimeOut); + public static readonly Regex ThisRegex = new Regex(DateTimeDefinitions.DateTimePeriodThisRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DateTimePeriodLastRegex, RegexFlags, RegexTimeOut); + public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DateTimePeriodNextRegex, RegexFlags, RegexTimeOut); + public static readonly Regex NumberCombinedWithUnit = new Regex(DateTimeDefinitions.DateTimePeriodNumberCombinedWithUnit, RegexFlags, RegexTimeOut); - private static readonly ChineseDateTimeExtractorConfiguration TimeWithDateExtractor = new ChineseDateTimeExtractorConfiguration(); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly ChineseDateExtractorConfiguration SingleDateExtractor = new ChineseDateExtractorConfiguration(); + public ChineseDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } - private static readonly CardinalExtractor CardinalExtractor = new CardinalExtractor(); + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); - private static readonly ChineseTimePeriodExtractorChsConfiguration TimePeriodExtractor = new ChineseTimePeriodExtractorChsConfiguration(); + CardinalExtractor = new CardinalExtractor(numConfig); - public List Extract(string text) - { - return Extract(text, DateObject.Now); + SingleDateExtractor = new BaseCJKDateExtractor(new ChineseDateExtractorConfiguration(this)); + SingleTimeExtractor = new BaseCJKTimeExtractor(new ChineseTimeExtractorConfiguration(this)); + SingleDateTimeExtractor = new BaseCJKDateTimeExtractor(new ChineseDateTimeExtractorConfiguration(this)); + DurationExtractor = new BaseCJKDurationExtractor(new ChineseDurationExtractorConfiguration(this)); + TimePeriodExtractor = new BaseCJKTimePeriodExtractor(new ChineseTimePeriodExtractorConfiguration(this)); } - public List Extract(string text, DateObject referenceTime) - { - var tokens = new List(); - tokens.AddRange(MergeDateAndTimePeriod(text, referenceTime)); - tokens.AddRange(MergeTwoTimePoints(text, referenceTime)); - tokens.AddRange(MatchNumberWithUnit(text)); - tokens.AddRange(MatchNight(text, referenceTime)); + public IExtractor CardinalExtractor { get; } - return Token.MergeAllTokens(tokens, text, ExtractorName); - } + public IDateTimeExtractor SingleDateExtractor { get; } - // merge Date and Time period - private List MergeDateAndTimePeriod(string text, DateObject referenceTime) - { - var ret = new List(); - var er1 = SingleDateExtractor.Extract(text, referenceTime); - var er2 = TimePeriodExtractor.Extract(text, referenceTime); - var timePoints = new List(); - - // handle the overlap problem - var j = 0; - for (var i = 0; i < er1.Count; i++) - { - timePoints.Add(er1[i]); - while (j < er2.Count && er2[j].Start + er2[j].Length <= er1[i].Start) - { - timePoints.Add(er2[j]); - j++; - } - - while (j < er2.Count && er2[j].IsOverlap(er1[i])) - { - j++; - } - } + public IDateTimeExtractor SingleTimeExtractor { get; } - for (; j < er2.Count; j++) - { - timePoints.Add(er2[j]); - } + public IDateTimeExtractor SingleDateTimeExtractor { get; } - timePoints = timePoints.OrderBy(o => o.Start).ToList(); + public IDateTimeExtractor DurationExtractor { get; } - // merge {Date} {TimePeriod} - var idx = 0; - while (idx < timePoints.Count - 1) - { - if (timePoints[idx].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && - timePoints[idx + 1].Type.Equals(Constants.SYS_DATETIME_TIMEPERIOD, StringComparison.Ordinal)) - { - var middleBegin = timePoints[idx].Start + timePoints[idx].Length ?? 0; - var middleEnd = timePoints[idx + 1].Start ?? 0; - - var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); - if (string.IsNullOrWhiteSpace(middleStr) || PrepositionRegex.IsMatch(middleStr)) - { - var periodBegin = timePoints[idx].Start ?? 0; - var periodEnd = (timePoints[idx + 1].Start ?? 0) + (timePoints[idx + 1].Length ?? 0); - ret.Add(new Token(periodBegin, periodEnd)); - idx += 2; - continue; - } - - idx++; - } - - idx++; - } + public IDateTimeExtractor TimePeriodExtractor { get; } - return ret; - } + Regex ICJKDateTimePeriodExtractorConfiguration.PrepositionRegex => PrepositionRegex; - private List MergeTwoTimePoints(string text, DateObject referenceTime) - { - var ret = new List(); - var er1 = TimeWithDateExtractor.Extract(text, referenceTime); - var er2 = SingleTimeExtractor.Extract(text, referenceTime); - var timePoints = new List(); - - // handle the overlap problem - var j = 0; - for (var i = 0; i < er1.Count; i++) - { - timePoints.Add(er1[i]); - while (j < er2.Count && er2[j].Start + er2[j].Length <= er1[i].Start) - { - timePoints.Add(er2[j]); - j++; - } - - while (j < er2.Count && er2[j].IsOverlap(er1[i])) - { - j++; - } - } + Regex ICJKDateTimePeriodExtractorConfiguration.TillRegex => TillRegex; - for (; j < er2.Count; j++) - { - timePoints.Add(er2[j]); - } + Regex ICJKDateTimePeriodExtractorConfiguration.SpecificTimeOfDayRegex => SpecificTimeOfDayRegex; - timePoints = timePoints.OrderBy(o => o.Start).ToList(); + Regex ICJKDateTimePeriodExtractorConfiguration.TimeOfDayRegex => TimeOfDayRegex; - // merge "{TimePoint} to {TimePoint}", "between {TimePoint} and {TimePoint}" - var idx = 0; - while (idx < timePoints.Count - 1) - { - // if both ends are Time. then this is a TimePeriod, not a DateTimePeriod - if (timePoints[idx].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal) && - timePoints[idx + 1].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) - { - idx++; - continue; - } - - var middleBegin = timePoints[idx].Start + timePoints[idx].Length ?? 0; - var middleEnd = timePoints[idx + 1].Start ?? 0; - - var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); - - // handle "{TimePoint} to {TimePoint}" - if (TillRegex.IsExactMatch(middleStr, trim: true)) - { - var periodBegin = timePoints[idx].Start ?? 0; - var periodEnd = (timePoints[idx + 1].Start ?? 0) + (timePoints[idx + 1].Length ?? 0); - - // handle "from" - var beforeStr = text.Substring(0, periodBegin); - if (beforeStr.Trim().EndsWith("从")) - { - periodBegin = beforeStr.LastIndexOf("从", StringComparison.Ordinal); - } - - ret.Add(new Token(periodBegin, periodEnd)); - idx += 2; - continue; - } - - // handle "between {TimePoint} and {TimePoint}" - if (middleStr.Equals("和") || middleStr.Equals("与") || middleStr.Equals("到")) - { - var periodBegin = timePoints[idx].Start ?? 0; - var periodEnd = (timePoints[idx + 1].Start ?? 0) + (timePoints[idx + 1].Length ?? 0); - - // handle "between" - var afterStr = text.Substring(periodEnd); - var match = ZhijianRegex.Match(afterStr); - if (match.Success) - { - ret.Add(new Token(periodBegin, periodEnd + match.Length)); - idx += 2; - continue; - } - } - - idx++; - } + Regex ICJKDateTimePeriodExtractorConfiguration.FollowedUnit => FollowedUnit; - return ret; - } + Regex ICJKDateTimePeriodExtractorConfiguration.UnitRegex => UnitRegex; - private List MatchNight(string text, DateObject referenceTime) - { - var ret = new List(); - var matches = SpecificTimeOfDayRegex.Matches(text); - foreach (Match match in matches) - { - ret.Add(new Token(match.Index, match.Index + match.Length)); - } + Regex ICJKDateTimePeriodExtractorConfiguration.PastRegex => PastRegex; - // Date followed by morning, afternoon - var ers = SingleDateExtractor.Extract(text, referenceTime); - if (ers.Count == 0) - { - return ret; - } + Regex ICJKDateTimePeriodExtractorConfiguration.FutureRegex => FutureRegex; - foreach (var er in ers) - { - var afterStr = text.Substring(er.Start + er.Length ?? 0); - var match = TimeOfDayRegex.Match(afterStr); - if (match.Success) - { - var middleStr = afterStr.Substring(0, match.Index); - if (string.IsNullOrWhiteSpace(middleStr) || PrepositionRegex.IsMatch(middleStr)) - { - ret.Add(new Token(er.Start ?? 0, er.Start + er.Length + match.Index + match.Length ?? 0)); - } - } - } + Regex ICJKDateTimePeriodExtractorConfiguration.TimePeriodLeftRegex => TimePeriodLeftRegex; - return ret; - } + Regex ICJKDateTimePeriodExtractorConfiguration.RelativeRegex => RelativeRegex; - private List MatchNumberWithUnit(string text) - { - var ret = new List(); + Regex ICJKDateTimePeriodExtractorConfiguration.RestOfDateRegex => RestOfDateRegex; - var durations = new List(); - var ers = CardinalExtractor.Extract(text); + Regex ICJKDateTimePeriodExtractorConfiguration.AmPmDescRegex => AmPmDescRegex; - foreach (var er in ers) - { - var afterStr = text.Substring(er.Start + er.Length ?? 0); - var match = FollowedUnit.MatchBegin(afterStr, trim: true); + Regex ICJKDateTimePeriodExtractorConfiguration.ThisRegex => ThisRegex; - if (match.Success) - { - durations.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + match.Length)); - } - } + Regex ICJKDateTimePeriodExtractorConfiguration.BeforeAfterRegex => BeforeAfterRegex; - var matches = UnitRegex.Matches(text); - foreach (Match match in matches) + public bool GetFromTokenIndex(string text, out int index) + { + index = -1; + + // @TODO move hardcoded values to resources file + if (text.Trim().EndsWith("从", StringComparison.Ordinal)) { - durations.Add(new Token(match.Index, match.Index + match.Length)); + index = text.LastIndexOf("从", StringComparison.Ordinal); + return true; } - foreach (var duration in durations) + return false; + } + + public bool GetBetweenTokenIndex(string text, out int index) + { + index = -1; + var match = ZhijianRegex.Match(text); + if (match.Success) { - var beforeStr = text.Substring(0, duration.Start); - if (string.IsNullOrWhiteSpace(beforeStr)) - { - continue; - } - - var match = PastRegex.MatchEnd(beforeStr, trim: true); - - if (match.Success) - { - ret.Add(new Token(match.Index, duration.End)); - continue; - } - - match = FutureRegex.MatchEnd(beforeStr, trim: true); - - if (match.Success) - { - ret.Add(new Token(match.Index, duration.End)); - } + index = match.Length; + return true; } - return ret; + return false; + } + + public bool HasConnectorToken(string text) + { + // @TODO move hardcoded values to resources file + return text.Equals("和", StringComparison.Ordinal) || + text.Equals("与", StringComparison.Ordinal) || + text.Equals("到", StringComparison.Ordinal); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDurationExtractorConfiguration.cs index 3d19208440..5768b8d3aa 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDurationExtractorConfiguration.cs @@ -1,77 +1,89 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; using Microsoft.Recognizers.Text.NumberWithUnit; using Microsoft.Recognizers.Text.NumberWithUnit.Chinese; -using Microsoft.Recognizers.Text.Utilities; -using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public enum DurationType - { - /// - /// Types of DurationType. - /// - WithNumber, - } - public class ChineseDurationExtractorConfiguration : ChineseBaseDateTimeExtractorConfiguration + public class ChineseDurationExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKDurationExtractorConfiguration { - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.DurationYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DurationUnitRegex = new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AnUnitRegex = new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DurationConnectorRegex = new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags, RegexTimeOut); - private static readonly IExtractor InternalExtractor = new NumberWithUnitExtractor(new DurationExtractorConfiguration()); + public static readonly Regex AllRegex = new Regex(DateTimeDefinitions.DurationAllRegex, RegexFlags, RegexTimeOut); - private static readonly Regex YearRegex = new Regex(DateTimeDefinitions.DurationYearRegex, RegexFlags); + public static readonly Regex HalfRegex = new Regex(DateTimeDefinitions.DurationHalfRegex, RegexFlags, RegexTimeOut); - private static readonly Regex HalfSuffixRegex = new Regex(DateTimeDefinitions.DurationHalfSuffixRegex, RegexFlags); + public static readonly Regex RelativeDurationUnitRegex = new Regex(DateTimeDefinitions.DurationRelativeDurationUnitRegex, RegexFlags, RegexTimeOut); - internal override ImmutableDictionary Regexes { get; } + public static readonly Regex DuringRegex = new Regex(DateTimeDefinitions.DurationDuringRegex, RegexFlags, RegexTimeOut); - protected sealed override string ExtractType { get; } = Constants.SYS_DATETIME_DURATION; // "Duration"; + public static readonly Regex SomeRegex = new Regex(DateTimeDefinitions.DurationSomeRegex, RegexFlags, RegexTimeOut); - // extract by number with unit - public override List Extract(string source, DateObject referenceTime) + public static readonly Regex MoreOrLessRegex = new Regex(DateTimeDefinitions.DurationMoreOrLessRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private readonly bool merge; + + public ChineseDurationExtractorConfiguration(IDateTimeOptionsConfiguration config, bool merge = true) + : base(config) { - // Use Unit to extract - var retList = InternalExtractor.Extract(source); - var res = new List(); - foreach (var ret in retList) - { - // filter - var match = YearRegex.Match(ret.Text); - if (match.Success) - { - continue; - } - - // match suffix "半" - var suffix = source.Substring((int)(ret.Start + ret.Length)); - var beginMatch = HalfSuffixRegex.MatchBegin(suffix, trim: true); - - if (beginMatch.Success) - { - var matchString = suffix.Substring(beginMatch.Index, beginMatch.Length); - ret.Text = ret.Text + matchString; - ret.Length = ret.Length + beginMatch.Length; - } - - res.Add(ret); - } + this.merge = merge; + + InternalExtractor = new NumberWithUnitExtractor(new DurationExtractorConfiguration()); - return res; + UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToDictionary(k => k.Key, k => k.Value.Substring(0, 1) + k.Value.Substring(1).ToLower()); + UnitValueMap = DateTimeDefinitions.DurationUnitValueMap; } + public IExtractor InternalExtractor { get; } + + public Dictionary UnitMap { get; } + + public Dictionary UnitValueMap { get; } + + public Dictionary AmbiguityDurationFiltersDict => null; + + Regex ICJKDurationExtractorConfiguration.DurationUnitRegex => DurationUnitRegex; + + Regex ICJKDurationExtractorConfiguration.DurationConnectorRegex => DurationConnectorRegex; + + Regex ICJKDurationExtractorConfiguration.YearRegex => YearRegex; + + Regex ICJKDurationExtractorConfiguration.AllRegex => AllRegex; + + Regex ICJKDurationExtractorConfiguration.HalfRegex => HalfRegex; + + Regex ICJKDurationExtractorConfiguration.RelativeDurationUnitRegex => RelativeDurationUnitRegex; + + Regex ICJKDurationExtractorConfiguration.DuringRegex => DuringRegex; + + Regex ICJKDurationExtractorConfiguration.SomeRegex => SomeRegex; + + Regex ICJKDurationExtractorConfiguration.MoreOrLessRegex => MoreOrLessRegex; + internal class DurationExtractorConfiguration : ChineseNumberWithUnitExtractorConfiguration { public static readonly ImmutableDictionary DurationSuffixList = DateTimeDefinitions.DurationSuffixList.ToImmutableDictionary(); public DurationExtractorConfiguration() - : base(new CultureInfo("zh-CN")) + : base(new CultureInfo(Text.Culture.Chinese)) { } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseHolidayExtractorConfiguration.cs index 636335c2c6..d813eb1fdc 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseHolidayExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseHolidayExtractorConfiguration.cs @@ -1,18 +1,21 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseHolidayExtractorConfiguration : BaseDateTimeOptionsConfiguration, IHolidayExtractorConfiguration + public class ChineseHolidayExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKHolidayExtractorConfiguration { - public static readonly Regex LunarHolidayRegex = new Regex(DateTimeDefinitions.LunarHolidayRegex, RegexFlags); + public static readonly Regex LunarHolidayRegex = new Regex(DateTimeDefinitions.LunarHolidayRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] HolidayRegexList = { - new Regex(DateTimeDefinitions.HolidayRegexList1, RegexFlags), - new Regex(DateTimeDefinitions.HolidayRegexList2, RegexFlags), + new Regex(DateTimeDefinitions.HolidayRegexList1, RegexFlags, RegexTimeOut), + new Regex(DateTimeDefinitions.HolidayRegexList2, RegexFlags, RegexTimeOut), LunarHolidayRegex, }; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseMergedExtractorConfiguration.cs index cdeb004f26..8472a3c8b3 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseMergedExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; @@ -9,211 +12,78 @@ namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseMergedExtractorConfiguration : IDateTimeExtractor + public class ChineseMergedExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKMergedExtractorConfiguration { - public static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.ParserConfigurationBefore, RegexFlags); - public static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.ParserConfigurationAfter, RegexFlags); - public static readonly Regex UntilRegex = new Regex(DateTimeDefinitions.ParserConfigurationUntil, RegexFlags); - public static readonly Regex SincePrefixRegex = new Regex(DateTimeDefinitions.ParserConfigurationSincePrefix, RegexFlags); - public static readonly Regex SinceSuffixRegex = new Regex(DateTimeDefinitions.ParserConfigurationSinceSuffix, RegexFlags); - public static readonly Regex EqualRegex = new Regex(BaseDateTime.EqualRegex, RegexFlags); + public static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.ParserConfigurationBefore, RegexFlags, RegexTimeOut); + public static readonly Regex UnspecificDatePeriodRegex = new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags, RegexTimeOut); + public static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.ParserConfigurationAfter, RegexFlags, RegexTimeOut); + public static readonly Regex UntilRegex = new Regex(DateTimeDefinitions.ParserConfigurationUntil, RegexFlags, RegexTimeOut); + public static readonly Regex SincePrefixRegex = new Regex(DateTimeDefinitions.ParserConfigurationSincePrefix, RegexFlags, RegexTimeOut); + public static readonly Regex SinceSuffixRegex = new Regex(DateTimeDefinitions.ParserConfigurationSinceSuffix, RegexFlags, RegexTimeOut); + public static readonly Regex AroundPrefixRegex = new Regex(DateTimeDefinitions.ParserConfigurationAroundPrefix, RegexFlags, RegexTimeOut); + public static readonly Regex AroundSuffixRegex = new Regex(DateTimeDefinitions.ParserConfigurationAroundSuffix, RegexFlags, RegexTimeOut); + public static readonly Regex EqualRegex = new Regex(BaseDateTime.EqualRegex, RegexFlags, RegexTimeOut); + public static readonly Regex PotentialAmbiguousRangeRegex = new Regex(DateTimeDefinitions.FromToRegex, RegexFlags, RegexTimeOut); + public static readonly Regex AmbiguousRangeModifierPrefix = new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly ChineseDateExtractorConfiguration DateExtractor = new ChineseDateExtractorConfiguration(); - private static readonly ChineseTimeExtractorConfiguration TimeExtractor = new ChineseTimeExtractorConfiguration(); - private static readonly ChineseDateTimeExtractorConfiguration DateTimeExtractor = new ChineseDateTimeExtractorConfiguration(); - private static readonly ChineseDatePeriodExtractorConfiguration DatePeriodExtractor = new ChineseDatePeriodExtractorConfiguration(); - private static readonly ChineseTimePeriodExtractorChsConfiguration TimePeriodExtractor = new ChineseTimePeriodExtractorChsConfiguration(); - private static readonly ChineseDateTimePeriodExtractorConfiguration DateTimePeriodExtractor = new ChineseDateTimePeriodExtractorConfiguration(); - private static readonly ChineseDurationExtractorConfiguration DurationExtractor = new ChineseDurationExtractorConfiguration(); - private static readonly ChineseSetExtractorConfiguration SetExtractor = new ChineseSetExtractorConfiguration(); - - private readonly IDateTimeOptionsConfiguration config; - public ChineseMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) { - this.config = config; - AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); - HolidayExtractor = new BaseHolidayExtractor(new ChineseHolidayExtractorConfiguration(config)); + + DateExtractor = new BaseCJKDateExtractor(new ChineseDateExtractorConfiguration(this)); + TimeExtractor = new BaseCJKTimeExtractor(new ChineseTimeExtractorConfiguration(this)); + DateTimeExtractor = new BaseCJKDateTimeExtractor(new ChineseDateTimeExtractorConfiguration(this)); + DatePeriodExtractor = new BaseCJKDatePeriodExtractor(new ChineseDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseCJKTimePeriodExtractor(new ChineseTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseCJKDateTimePeriodExtractor(new ChineseDateTimePeriodExtractorConfiguration(this)); + DurationExtractor = new BaseCJKDurationExtractor(new ChineseDurationExtractorConfiguration(this)); + SetExtractor = new BaseCJKSetExtractor(new ChineseSetExtractorConfiguration(this)); + HolidayExtractor = new BaseCJKHolidayExtractor(new ChineseHolidayExtractorConfiguration(this)); } - public Dictionary AmbiguityFiltersDict { get; } + public IDateTimeExtractor DateExtractor { get; } - private BaseHolidayExtractor HolidayExtractor { get; } + public IDateTimeExtractor TimeExtractor { get; } - public List Extract(string text) - { - return Extract(text, DateObject.Now); - } + public IDateTimeExtractor DateTimeExtractor { get; } - public List Extract(string text, DateObject referenceTime) - { - var ret = DateExtractor.Extract(text, referenceTime); + public IDateTimeExtractor DatePeriodExtractor { get; } - // the order is important, since there is a problem in merging - AddTo(ret, TimeExtractor.Extract(text, referenceTime)); - AddTo(ret, DurationExtractor.Extract(text, referenceTime)); - AddTo(ret, DatePeriodExtractor.Extract(text, referenceTime)); - AddTo(ret, DateTimeExtractor.Extract(text, referenceTime)); - AddTo(ret, TimePeriodExtractor.Extract(text, referenceTime)); - AddTo(ret, DateTimePeriodExtractor.Extract(text, referenceTime)); - AddTo(ret, SetExtractor.Extract(text, referenceTime)); - AddTo(ret, HolidayExtractor.Extract(text, referenceTime)); + public IDateTimeExtractor TimePeriodExtractor { get; } - ret = FilterAmbiguity(ret, text); + public IDateTimeExtractor DateTimePeriodExtractor { get; } - AddMod(ret, text); + public IDateTimeExtractor DurationExtractor { get; } - ret = ret.OrderBy(p => p.Start).ToList(); + public IDateTimeExtractor SetExtractor { get; } - return ret; - } + public IDateTimeExtractor HolidayExtractor { get; } - private static List MoveOverlap(List dst, ExtractResult result) - { - var duplicate = new List(); - for (var i = 0; i < dst.Count; ++i) - { - if (result.Text.Contains(dst[i].Text) && - (result.Start == dst[i].Start || result.Start + result.Length == dst[i].Start + dst[i].Length)) - { - duplicate.Add(i); - } - } - - var tempDst = dst.Where((_, i) => !duplicate.Contains(i)).ToList(); - - return tempDst; - } + Regex ICJKMergedExtractorConfiguration.AfterRegex => AfterRegex; - // Filter some bad cases like "十二周岁" and "12号", etc. - private List FilterAmbiguity(List extractResults, string text) - { - if (this.AmbiguityFiltersDict != null) - { - foreach (var regex in this.AmbiguityFiltersDict) - { - foreach (var extractResult in extractResults) - { - if (regex.Key.IsMatch(extractResult.Text)) - { - var matches = regex.Value.Matches(text).Cast(); - extractResults = extractResults.Where(er => !matches.Any(m => m.Index < er.Start + er.Length && m.Index + m.Length > er.Start)) - .ToList(); - } - } - } - } - - return extractResults; - } + Regex ICJKMergedExtractorConfiguration.BeforeRegex => BeforeRegex; - private void AddMod(List ers, string text) - { - var lastEnd = 0; - foreach (var er in ers) - { - var beforeStr = text.Substring(lastEnd, er.Start ?? 0); - var afterStr = text.Substring((er.Start ?? 0) + (er.Length ?? 0)); - - var match = BeforeRegex.MatchBegin(afterStr, trim: true); - - if (match.Success) - { - var modLength = match.Index + match.Length; - er.Length += modLength; - er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); - } - - match = AfterRegex.MatchBegin(afterStr, trim: true); - - if (match.Success) - { - var modLength = match.Index + match.Length; - er.Length += modLength; - er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); - } - - match = UntilRegex.MatchEnd(beforeStr, trim: true); - - if (match.Success) - { - var modLength = beforeStr.Length - match.Index; - er.Length += modLength; - er.Start -= modLength; - er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); - } - - match = SincePrefixRegex.MatchEnd(beforeStr, trim: true); - - if (match.Success) - { - var modLength = beforeStr.Length - match.Index; - er.Length += modLength; - er.Start -= modLength; - er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); - } - - match = SinceSuffixRegex.MatchBegin(afterStr, trim: true); - if (match.Success) - { - var modLength = match.Index + match.Length; - er.Length += modLength; - er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); - } - - match = EqualRegex.MatchBegin(beforeStr, trim: true); - if (match.Success) - { - var modLength = beforeStr.Length - match.Index; - er.Length += modLength; - er.Start -= modLength; - er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); - } - } - } + Regex ICJKMergedExtractorConfiguration.UnspecificDatePeriodRegex => UnspecificDatePeriodRegex; - private void AddTo(List dst, List src) - { - foreach (var result in src) - { - var isFound = false; - int indexRm = -1, lengthRm = 1; - for (var i = 0; i < dst.Count; i++) - { - if (dst[i].IsOverlap(result)) - { - isFound = true; - if (result.Length > dst[i].Length) - { - indexRm = i; - var j = i + 1; - while (j < dst.Count && dst[j].IsOverlap(result)) - { - lengthRm++; - j++; - } - } - - break; - } - } - - if (!isFound) - { - dst.Add(result); - } - else if (indexRm >= 0) - { - dst.RemoveRange(indexRm, lengthRm); - var tmpDst = MoveOverlap(dst, result); - dst.Clear(); - dst.AddRange(tmpDst); - dst.Insert(indexRm, result); - } - } - } + Regex ICJKMergedExtractorConfiguration.SincePrefixRegex => SincePrefixRegex; + + Regex ICJKMergedExtractorConfiguration.SinceSuffixRegex => SinceSuffixRegex; + + Regex ICJKMergedExtractorConfiguration.AroundPrefixRegex => AroundPrefixRegex; + + Regex ICJKMergedExtractorConfiguration.AroundSuffixRegex => AroundSuffixRegex; + + Regex ICJKMergedExtractorConfiguration.UntilRegex => UntilRegex; + + Regex ICJKMergedExtractorConfiguration.EqualRegex => EqualRegex; + + Regex ICJKMergedExtractorConfiguration.PotentialAmbiguousRangeRegex => PotentialAmbiguousRangeRegex; + + Regex ICJKMergedExtractorConfiguration.AmbiguousRangeModifierPrefix => AmbiguousRangeModifierPrefix; + + public Dictionary AmbiguityFiltersDict { get; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseSetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseSetExtractorConfiguration.cs index bf8a964b21..9cedaed012 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseSetExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseSetExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; @@ -6,134 +9,62 @@ namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseSetExtractorConfiguration : IDateTimeExtractor + public class ChineseSetExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKSetExtractorConfiguration { - public static readonly string ExtractorName = Constants.SYS_DATETIME_SET; + public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.SetUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachUnitRegex = new Regex(DateTimeDefinitions.SetEachUnitRegex, RegexFlags, RegexTimeOut); - public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.SetUnitRegex, RegexFlags); + public static readonly Regex EachPrefixRegex = new Regex(DateTimeDefinitions.SetEachPrefixRegex, RegexFlags, RegexTimeOut); - public static readonly Regex EachUnitRegex = new Regex(DateTimeDefinitions.SetEachUnitRegex, RegexFlags); + public static readonly Regex EachSuffixRegex = new Regex(DateTimeDefinitions.SetEachSuffixRegex, RegexFlags, RegexTimeOut); - public static readonly Regex EachPrefixRegex = new Regex(DateTimeDefinitions.SetEachPrefixRegex, RegexFlags); + public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags, RegexTimeOut); - public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags); + public static readonly Regex EachDayRegex = new Regex(DateTimeDefinitions.SetEachDayRegex, RegexFlags, RegexTimeOut); - public static readonly Regex EachDayRegex = new Regex(DateTimeDefinitions.SetEachDayRegex, RegexFlags); + public static readonly Regex EachDateUnitRegex = new Regex(DateTimeDefinitions.SetEachDateUnitRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly ChineseDurationExtractorConfiguration DurationExtractor = new ChineseDurationExtractorConfiguration(); + public ChineseSetExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DurationExtractor = new BaseCJKDurationExtractor(new ChineseDurationExtractorConfiguration(this)); + TimeExtractor = new BaseCJKTimeExtractor(new ChineseTimeExtractorConfiguration(this)); + DateExtractor = new BaseCJKDateExtractor(new ChineseDateExtractorConfiguration(this)); + DateTimeExtractor = new BaseCJKDateTimeExtractor(new ChineseDateTimeExtractorConfiguration(this)); + DatePeriodExtractor = new BaseCJKDatePeriodExtractor(new ChineseDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseCJKTimePeriodExtractor(new ChineseTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseCJKDateTimePeriodExtractor(new ChineseDateTimePeriodExtractorConfiguration(this)); + } - private static readonly ChineseTimeExtractorConfiguration TimeExtractor = new ChineseTimeExtractorConfiguration(); + public IDateTimeExtractor DurationExtractor { get; } - private static readonly ChineseDateExtractorConfiguration DateExtractor = new ChineseDateExtractorConfiguration(); + public IDateTimeExtractor TimeExtractor { get; } - private static readonly ChineseDateTimeExtractorConfiguration DateTimeExtractor = new ChineseDateTimeExtractorConfiguration(); + public IDateTimeExtractor DateExtractor { get; } - public static List MatchEachDuration(string text, DateObject referenceTime) - { - var ret = new List(); - - var ers = DurationExtractor.Extract(text, referenceTime); - foreach (var er in ers) - { - // "each last summer" doesn't make sense - if (LastRegex.IsMatch(er.Text)) - { - continue; - } - - var beforeStr = text.Substring(0, er.Start ?? 0); - var match = EachPrefixRegex.Match(beforeStr); - if (match.Success) - { - ret.Add(new Token(match.Index, er.Start + er.Length ?? 0)); - } - } - - return ret; - } + public IDateTimeExtractor DateTimeExtractor { get; } - public static List MatchEachUnit(string text) - { - var ret = new List(); + public IDateTimeExtractor DatePeriodExtractor { get; } - // handle "each month" - var matches = EachUnitRegex.Matches(text); - foreach (Match match in matches) - { - ret.Add(new Token(match.Index, match.Index + match.Length)); - } + public IDateTimeExtractor TimePeriodExtractor { get; } - return ret; - } + public IDateTimeExtractor DateTimePeriodExtractor { get; } - public static List TimeEveryday(string text, DateObject referenceTime) - { - var ret = new List(); - var ers = TimeExtractor.Extract(text, referenceTime); - foreach (var er in ers) - { - var beforeStr = text.Substring(0, er.Start ?? 0); - var match = EachDayRegex.Match(beforeStr); - if (match.Success) - { - ret.Add(new Token(match.Index, match.Index + match.Length + (er.Length ?? 0))); - } - } - - return ret; - } + Regex ICJKSetExtractorConfiguration.LastRegex => LastRegex; - public static List MatchEachDate(string text, DateObject referenceTime) - { - var ret = new List(); - var ers = DateExtractor.Extract(text, referenceTime); - foreach (var er in ers) - { - var beforeStr = text.Substring(0, er.Start ?? 0); - var match = EachPrefixRegex.Match(beforeStr); - if (match.Success) - { - ret.Add(new Token(match.Index, match.Index + match.Length + (er.Length ?? 0))); - } - } - - return ret; - } + Regex ICJKSetExtractorConfiguration.EachPrefixRegex => EachPrefixRegex; - public static List MatchEachDateTime(string text, DateObject referenceTime) - { - var ret = new List(); - var ers = DateTimeExtractor.Extract(text, referenceTime); - foreach (var er in ers) - { - var beforeStr = text.Substring(0, er.Start ?? 0); - var match = EachPrefixRegex.Match(beforeStr); - if (match.Success) - { - ret.Add(new Token(match.Index, match.Index + match.Length + (er.Length ?? 0))); - } - } - - return ret; - } + Regex ICJKSetExtractorConfiguration.EachSuffixRegex => EachSuffixRegex; - public List Extract(string text) - { - return Extract(text, DateObject.Now); - } + Regex ICJKSetExtractorConfiguration.EachUnitRegex => EachUnitRegex; - public List Extract(string text, DateObject referenceTime) - { - var tokens = new List(); - tokens.AddRange(MatchEachUnit(text)); - tokens.AddRange(MatchEachDuration(text, referenceTime)); - tokens.AddRange(TimeEveryday(text, referenceTime)); - tokens.AddRange(MatchEachDate(text, referenceTime)); - tokens.AddRange(MatchEachDateTime(text, referenceTime)); - - return Token.MergeAllTokens(tokens, text, ExtractorName); - } + Regex ICJKSetExtractorConfiguration.UnitRegex => UnitRegex; + + Regex ICJKSetExtractorConfiguration.EachDayRegex => EachDayRegex; + + Regex ICJKSetExtractorConfiguration.EachDateUnitRegex => EachDateUnitRegex; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseTimeExtractorConfiguration.cs index 00489ef039..7a0ba2bc5f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseTimeExtractorConfiguration.cs @@ -1,12 +1,16 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; +using Microsoft.Recognizers.Definitions.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseTimeExtractorConfiguration : ChineseBaseDateTimeExtractorConfiguration + public class ChineseTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKTimeExtractorConfiguration { public static readonly string HourNumRegex = DateTimeDefinitions.TimeHourNumRegex; @@ -14,11 +18,11 @@ public class ChineseTimeExtractorConfiguration : ChineseBaseDateTimeExtractorCon public static readonly string SecondNumRegex = DateTimeDefinitions.TimeSecondNumRegex; - public static readonly string HourChsRegex = DateTimeDefinitions.TimeHourChsRegex; + public static readonly string HourCJKRegex = DateTimeDefinitions.TimeHourCJKRegex; - public static readonly string MinuteChsRegex = DateTimeDefinitions.TimeMinuteChsRegex; + public static readonly string MinuteCJKRegex = DateTimeDefinitions.TimeMinuteCJKRegex; - public static readonly string SecondChsRegex = DateTimeDefinitions.TimeSecondChsRegex; + public static readonly string SecondCJKRegex = DateTimeDefinitions.TimeSecondCJKRegex; public static readonly string ClockDescRegex = DateTimeDefinitions.TimeClockDescRegex; @@ -40,7 +44,7 @@ public class ChineseTimeExtractorConfiguration : ChineseBaseDateTimeExtractorCon public static readonly string QuarterRegex = DateTimeDefinitions.TimeQuarterRegex; // e.g: 十二点五十八分|半|一刻 - public static readonly string ChineseTimeRegex = DateTimeDefinitions.TimeChineseTimeRegex; + public static readonly string CJKTimeRegex = DateTimeDefinitions.TimeCJKTimeRegex; // e.g: 12:23 public static readonly string DigitTimeRegex = DateTimeDefinitions.TimeDigitTimeRegex; @@ -54,28 +58,31 @@ public class ChineseTimeExtractorConfiguration : ChineseBaseDateTimeExtractorCon private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public ChineseTimeExtractorConfiguration() + public ChineseTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) { var regexes = new Dictionary { { - new Regex(DateTimeDefinitions.TimeRegexes1, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegexes1, RegexFlags, RegexTimeOut), TimeType.CjkTime }, { - new Regex(DateTimeDefinitions.TimeRegexes2, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegexes2, RegexFlags, RegexTimeOut), TimeType.DigitTime }, { - new Regex(DateTimeDefinitions.TimeRegexes3, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegexes3, RegexFlags, RegexTimeOut), TimeType.LessTime }, }; Regexes = regexes.ToImmutableDictionary(); + AmbiguityTimeFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityTimeFiltersDict); } - internal sealed override ImmutableDictionary Regexes { get; } + public ImmutableDictionary Regexes { get; } + + public Dictionary AmbiguityTimeFiltersDict { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_DATETIME_TIME; // "Fraction"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseTimePeriodExtractorChsConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseTimePeriodExtractorConfiguration.cs similarity index 56% rename from .NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseTimePeriodExtractorChsConfiguration.cs rename to .NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseTimePeriodExtractorConfiguration.cs index a1a5e09606..8f6fc7d520 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseTimePeriodExtractorChsConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseTimePeriodExtractorConfiguration.cs @@ -1,59 +1,65 @@ -using System.Collections.Generic; -using System.Collections.Immutable; -using System.Text.RegularExpressions; - -using Microsoft.Recognizers.Definitions.Chinese; - -namespace Microsoft.Recognizers.Text.DateTime.Chinese -{ - public class ChineseTimePeriodExtractorChsConfiguration : ChineseBaseDateTimeExtractorConfiguration - { - public const string TimePeriodConnectWords = DateTimeDefinitions.TimePeriodTimePeriodConnectWords; - - // 五点十分四十八秒 - public static readonly string ChineseTimeRegex = ChineseTimeExtractorConfiguration.ChineseTimeRegex; - - // 六点 到 九点 | 六 到 九点 - public static readonly string LeftChsTimeRegex = DateTimeDefinitions.TimePeriodLeftChsTimeRegex; - - public static readonly string RightChsTimeRegex = DateTimeDefinitions.TimePeriodRightChsTimeRegex; - - // 2:45 - public static readonly string DigitTimeRegex = ChineseTimeExtractorConfiguration.DigitTimeRegex; - - public static readonly string LeftDigitTimeRegex = DateTimeDefinitions.TimePeriodLeftDigitTimeRegex; - - public static readonly string RightDigitTimeRegex = DateTimeDefinitions.TimePeriodRightDigitTimeRegex; - - public static readonly string ShortLeftChsTimeRegex = DateTimeDefinitions.TimePeriodShortLeftChsTimeRegex; - - public static readonly string ShortLeftDigitTimeRegex = DateTimeDefinitions.TimePeriodShortLeftDigitTimeRegex; - - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - - public ChineseTimePeriodExtractorChsConfiguration() - { - var regexes = new Dictionary - { - { - new Regex(DateTimeDefinitions.TimePeriodRegexes1, RegexFlags), - PeriodType.FullTime - }, - { - new Regex(DateTimeDefinitions.TimePeriodRegexes2, RegexFlags), - PeriodType.ShortTime - }, - { - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags), - PeriodType.ShortTime - }, - }; - - Regexes = regexes.ToImmutableDictionary(); - } - - internal sealed override ImmutableDictionary Regexes { get; } - - protected sealed override string ExtractType { get; } = Constants.SYS_DATETIME_TIMEPERIOD; - } +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Chinese; +using Microsoft.Recognizers.Definitions.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Chinese +{ + public class ChineseTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKTimePeriodExtractorConfiguration + { + public const string TimePeriodConnectWords = DateTimeDefinitions.TimePeriodTimePeriodConnectWords; + + // 五点十分四十八秒 + public static readonly string CJKTimeRegex = ChineseTimeExtractorConfiguration.CJKTimeRegex; + + // 六点 到 九点 | 六 到 九点 + public static readonly string LeftCJKTimeRegex = DateTimeDefinitions.TimePeriodLeftCJKTimeRegex; + + public static readonly string RightCJKTimeRegex = DateTimeDefinitions.TimePeriodRightCJKTimeRegex; + + // 2:45 + public static readonly string DigitTimeRegex = ChineseTimeExtractorConfiguration.DigitTimeRegex; + + public static readonly string LeftDigitTimeRegex = DateTimeDefinitions.TimePeriodLeftDigitTimeRegex; + + public static readonly string RightDigitTimeRegex = DateTimeDefinitions.TimePeriodRightDigitTimeRegex; + + public static readonly string ShortLeftCJKTimeRegex = DateTimeDefinitions.TimePeriodShortLeftCJKTimeRegex; + + public static readonly string ShortLeftDigitTimeRegex = DateTimeDefinitions.TimePeriodShortLeftDigitTimeRegex; + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public ChineseTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + var regexes = new Dictionary + { + { + new Regex(DateTimeDefinitions.TimePeriodRegexes1, RegexFlags, RegexTimeOut), + PeriodType.FullTime + }, + { + new Regex(DateTimeDefinitions.TimePeriodRegexes2, RegexFlags, RegexTimeOut), + PeriodType.ShortTime + }, + { + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut), + PeriodType.ShortTime + }, + }; + + Regexes = regexes.ToImmutableDictionary(); + } + + public ImmutableDictionary Regexes { get; } + + public Dictionary AmbiguityTimePeriodFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityTimePeriodFiltersDict); + + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseCommonDateTimeParserConfiguration.cs new file mode 100644 index 0000000000..e54f4464bc --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseCommonDateTimeParserConfiguration.cs @@ -0,0 +1,63 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Chinese; +using Microsoft.Recognizers.Text.DateTime.Chinese; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Chinese; + +namespace Microsoft.Recognizers.Text.DateTime.Chinese +{ + public class ChineseCommonDateTimeParserConfiguration : BaseCJKDateParserConfiguration, ICJKCommonDateTimeParserConfiguration + { + public ChineseCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToImmutableDictionary(k => k.Key, k => k.Value.Substring(0, 1) + k.Value.Substring(1).ToLower()); + UnitValueMap = DateTimeDefinitions.DurationUnitValueMap.ToImmutableDictionary(); + CardinalMap = DateTimeDefinitions.ParserConfigurationCardinalMap.ToImmutableDictionary(); + DayOfMonth = DateTimeDefinitions.ParserConfigurationDayOfMonth.ToImmutableDictionary(); + DayOfWeek = DateTimeDefinitions.ParserConfigurationDayOfWeek.ToImmutableDictionary(); + MonthOfYear = DateTimeDefinitions.ParserConfigurationMonthOfYear.ToImmutableDictionary(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = new IntegerExtractor(numConfig); + CardinalExtractor = new CardinalExtractor(numConfig); + OrdinalExtractor = new OrdinalExtractor(numConfig); + + NumberParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration(numConfig)); + + // Do not change order. The order of initialization can lead to side-effects + DateExtractor = new BaseCJKDateExtractor(new ChineseDateExtractorConfiguration(this)); + TimeExtractor = new BaseCJKTimeExtractor(new ChineseTimeExtractorConfiguration(this)); + DateTimeExtractor = new BaseCJKDateTimeExtractor(new ChineseDateTimeExtractorConfiguration(this)); + DurationExtractor = new BaseCJKDurationExtractor(new ChineseDurationExtractorConfiguration(this)); + DatePeriodExtractor = new BaseCJKDatePeriodExtractor(new ChineseDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseCJKTimePeriodExtractor(new ChineseTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseCJKDateTimePeriodExtractor(new ChineseDateTimePeriodExtractorConfiguration(this)); + HolidayExtractor = new BaseCJKDurationExtractor(new ChineseDurationExtractorConfiguration(this)); + SetExtractor = new BaseCJKDurationExtractor(new ChineseDurationExtractorConfiguration(this)); + + DurationParser = new BaseCJKDurationParser(new ChineseDurationParserConfiguration(this)); + DateParser = new BaseCJKDateParser(new ChineseDateParserConfiguration(this)); + TimeParser = new BaseCJKTimeParser(new ChineseTimeParserConfiguration(this)); + DateTimeParser = new BaseCJKDateTimeParser(new ChineseDateTimeParserConfiguration(this)); + DatePeriodParser = new BaseCJKDatePeriodParser(new ChineseDatePeriodParserConfiguration(this)); + TimePeriodParser = new BaseCJKTimePeriodParser(new ChineseTimePeriodParserConfiguration(this)); + DateTimePeriodParser = new BaseCJKDateTimePeriodParser(new ChineseDateTimePeriodParserConfiguration(this)); + HolidayParser = new BaseCJKHolidayParser(new ChineseHolidayParserConfiguration(this)); + SetParser = new BaseCJKSetParser(new ChineseSetParserConfiguration(this)); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateParserConfiguration.cs index bb1f5e73a0..534573f58a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateParserConfiguration.cs @@ -1,759 +1,178 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Text.Number; -using Microsoft.Recognizers.Text.Number.Chinese; -using Microsoft.Recognizers.Text.Utilities; -using DateObject = System.DateTime; +using Microsoft.Recognizers.Definitions.Chinese; namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseDateParserConfiguration : IDateTimeParser + public class ChineseDateParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKDateParserConfiguration { - public static readonly string ParserName = Constants.SYS_DATETIME_DATE; // "Date"; + public static readonly Regex NextMonthRegex = new Regex(DateTimeDefinitions.ParserConfigurationNextMonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastMonthRegex = new Regex(DateTimeDefinitions.ParserConfigurationLastMonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastWeekDayRegex = new Regex(DateTimeDefinitions.ParserConfigurationLastWeekDayRegex, RegexFlags, RegexTimeOut); - private static readonly int[] MonthMaxDays = { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; - private readonly ChineseDateTimeParserConfiguration config; + public static readonly string ParserName = Constants.SYS_DATETIME_DATE; // "Date"; - private readonly IExtractor integerExtractor; - private readonly IExtractor ordinalExtractor; - private readonly IParser numberParser; - private readonly IDateTimeExtractor durationExtractor; + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public ChineseDateParserConfiguration(ChineseDateTimeParserConfiguration configuration) + public ChineseDateParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) { - config = configuration; - integerExtractor = new IntegerExtractor(); - ordinalExtractor = new OrdinalExtractor(); - durationExtractor = new ChineseDurationExtractorConfiguration(); - numberParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration(new BaseNumberOptionsConfiguration(configuration.Culture))); - } + IntegerExtractor = config.IntegerExtractor; + OrdinalExtractor = config.OrdinalExtractor; - public ParseResult Parse(ExtractResult extResult) - { - return this.Parse(extResult, DateObject.Now); - } + NumberParser = config.NumberParser; - public virtual DateTimeParseResult Parse(ExtractResult er, DateObject referenceDate) - { - object value = null; + DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; + DurationParser = config.DurationParser; - if (er.Type.Equals(ParserName, StringComparison.Ordinal)) - { - value = InnerParser(er.Text, referenceDate); - } + DateRegexList = new ChineseDateExtractorConfiguration(this).DateRegexList; + SpecialDate = ChineseDateExtractorConfiguration.SpecialDate; + NextRe = ChineseDateExtractorConfiguration.NextRe; + LastRe = ChineseDateExtractorConfiguration.LastRe; + SpecialDayRegex = ChineseDateExtractorConfiguration.SpecialDayRegex; + StrictWeekDayRegex = ChineseDateExtractorConfiguration.WeekDayRegex; + LunarRegex = ChineseDateExtractorConfiguration.LunarRegex; + UnitRegex = ChineseDateExtractorConfiguration.UnitRegex; + BeforeRegex = ChineseDateExtractorConfiguration.BeforeRegex; + AfterRegex = ChineseDateExtractorConfiguration.AfterRegex; + DynastyYearRegex = ChineseDateExtractorConfiguration.DynastyYearRegex; + DynastyStartYear = ChineseDateExtractorConfiguration.DynastyStartYear; + DynastyYearMap = ChineseDateExtractorConfiguration.DynastyYearMap; + NextRegex = ChineseDateExtractorConfiguration.NextRegex; + ThisRegex = ChineseDateExtractorConfiguration.ThisRegex; + LastRegex = ChineseDateExtractorConfiguration.LastRegex; + WeekDayOfMonthRegex = ChineseDateExtractorConfiguration.WeekDayOfMonthRegex; + WeekDayAndDayRegex = ChineseDateExtractorConfiguration.WeekDayAndDayRegex; + DurationRelativeDurationUnitRegex = ChineseDateExtractorConfiguration.DurationRelativeDurationUnitRegex; + SpecialDayWithNumRegex = ChineseDateExtractorConfiguration.SpecialDayWithNumRegex; + NextNextRegex = ChineseDateExtractorConfiguration.NextNextRegex; + LastLastRegex = ChineseDateExtractorConfiguration.LastLastRegex; - var ret = new DateTimeParseResult - { - Text = er.Text, - Start = er.Start, - Length = er.Length, - Type = er.Type, - Data = er.Data, - Value = value, - TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, - ResolutionStr = string.Empty, - }; - - return ret; - } + CardinalMap = config.CardinalMap; + UnitMap = config.UnitMap; + DayOfMonth = config.DayOfMonth; + DayOfWeek = config.DayOfWeek; + MonthOfYear = config.MonthOfYear; - public List FilterResults(string query, List candidateResults) - { - return candidateResults; } - protected DateTimeResolutionResult InnerParser(string text, DateObject reference) - { - var innerResult = ParseBasicRegexMatch(text, reference); + public IExtractor IntegerExtractor { get; } - if (!innerResult.Success) - { - innerResult = ParseImplicitDate(text, reference); - } + public IExtractor OrdinalExtractor { get; } - if (!innerResult.Success) - { - innerResult = ParseWeekdayOfMonth(text, reference); - } + public IParser NumberParser { get; } - if (!innerResult.Success) - { - innerResult = ParserDurationWithBeforeAndAfter(text, reference); - } + public IDateTimeExtractor DateExtractor { get; } - if (innerResult.Success) - { - innerResult.FutureResolution = new Dictionary - { - { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)innerResult.FutureValue) }, - }; + public IDateTimeExtractor DurationExtractor { get; } - innerResult.PastResolution = new Dictionary - { - { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)innerResult.PastValue) }, - }; + public IDateTimeParser DurationParser { get; } - innerResult.IsLunar = IsLunarCalendar(text); + public IEnumerable DateRegexList { get; } - return innerResult; - } + public Regex SpecialDate { get; } - return null; - } + public Regex NextRe { get; } - // parse basic patterns in DateRegexList - protected DateTimeResolutionResult ParseBasicRegexMatch(string text, DateObject referenceDate) - { - foreach (var regex in ChineseDateExtractorConfiguration.DateRegexList) - { - var match = regex.MatchExact(text, trim: true); - - if (match.Success) - { - // Value string will be set in Match2Date method - var ret = Match2Date(match.Match, referenceDate); - return ret; - } - } + public Regex LastRe { get; } - return new DateTimeResolutionResult(); - } + public Regex SpecialDayRegex { get; } - // match several other cases - // including '今天', '后天', '十三日' - protected DateTimeResolutionResult ParseImplicitDate(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); + public Regex StrictWeekDayRegex { get; } - // handle "十二日" "明年这个月三日" "本月十一日" - var match = ChineseDateExtractorConfiguration.SpecialDate.MatchExact(text, trim: true); - if (match.Success) - { - var yearStr = match.Groups["thisyear"].Value; - var monthStr = match.Groups["thismonth"].Value; - var dayStr = match.Groups["day"].Value; - - int month = referenceDate.Month, year = referenceDate.Year; - var day = this.config.DayOfMonth[dayStr]; - - bool hasYear = false, hasMonth = false; - - if (!string.IsNullOrEmpty(monthStr)) - { - hasMonth = true; - if (ChineseDateExtractorConfiguration.NextRe.Match(monthStr).Success) - { - month++; - if (month == Constants.MaxMonth + 1) - { - month = Constants.MinMonth; - year++; - } - } - else if (ChineseDateExtractorConfiguration.LastRe.Match(monthStr).Success) - { - month--; - if (month == Constants.MinMonth - 1) - { - month = Constants.MaxMonth; - year--; - } - } - - if (!string.IsNullOrEmpty(yearStr)) - { - hasYear = true; - if (ChineseDateExtractorConfiguration.NextRe.Match(yearStr).Success) - { - ++year; - } - else if (ChineseDateExtractorConfiguration.LastRe.Match(yearStr).Success) - { - --year; - } - } - } - - ret.Timex = DateTimeFormatUtil.LuisDate(hasYear ? year : -1, hasMonth ? month : -1, day); - - DateObject futureDate, pastDate; - - if (day > GetMonthMaxDay(year, month)) - { - var futureMonth = month + 1; - var pastMonth = month - 1; - var futureYear = year; - var pastYear = year; - - if (futureMonth == Constants.MaxMonth + 1) - { - futureMonth = Constants.MinMonth; - futureYear = year++; - } - - if (pastMonth == Constants.MinMonth - 1) - { - pastMonth = Constants.MaxMonth; - pastYear = year--; - } - - var isFutureValid = DateObjectExtension.IsValidDate(futureYear, futureMonth, day); - var isPastValid = DateObjectExtension.IsValidDate(pastYear, pastMonth, day); - - if (isFutureValid && isPastValid) - { - futureDate = DateObject.MinValue.SafeCreateFromValue(futureYear, futureMonth, day); - pastDate = DateObject.MinValue.SafeCreateFromValue(pastYear, pastMonth, day); - } - else if (isFutureValid && !isPastValid) - { - futureDate = pastDate = DateObject.MinValue.SafeCreateFromValue(futureYear, futureMonth, day); - } - else if (!isFutureValid && !isPastValid) - { - futureDate = pastDate = DateObject.MinValue.SafeCreateFromValue(pastYear, pastMonth, day); - } - else - { - // Fall back to normal cases, might lead to resolution failure - // TODO: Ideally, this failure should be filtered out in extract phase - futureDate = pastDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); - } - } - else - { - futureDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); - pastDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); - - if (!hasMonth) - { - if (futureDate < referenceDate) - { - if (IsValidDate(year, month + 1, day)) - { - futureDate = futureDate.AddMonths(1); - } - } - - if (pastDate >= referenceDate) - { - if (IsValidDate(year, month - 1, day)) - { - pastDate = pastDate.AddMonths(-1); - } - else if (IsNonleapYearFeb29th(year, month - 1, day)) - { - pastDate = pastDate.AddMonths(-2); - } - } - } - else if (!hasYear) - { - if (futureDate < referenceDate) - { - if (IsValidDate(year + 1, month, day)) - { - futureDate = futureDate.AddYears(1); - } - } - - if (pastDate >= referenceDate) - { - if (IsValidDate(year - 1, month, day)) - { - pastDate = pastDate.AddYears(-1); - } - } - } - } - - ret.FutureValue = futureDate; - ret.PastValue = pastDate; - ret.Success = true; - - return ret; - } + public Regex LunarRegex { get; } - // handle cases like "昨日", "明日", "大后天" - match = ChineseDateExtractorConfiguration.SpecialDayRegex.MatchExact(text, trim: true); + public Regex UnitRegex { get; } - if (match.Success) - { - var value = referenceDate.AddDays(ChineseDateTimeParserConfiguration.GetSwiftDay(match.Value)); - ret.Timex = DateTimeFormatUtil.LuisDate(value); - ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day); - ret.Success = true; + public Regex BeforeRegex { get; } - return ret; - } + public Regex AfterRegex { get; } - if (!ret.Success) - { - ret = MatchThisWeekday(text, referenceDate); - } + public Regex NextRegex { get; } - if (!ret.Success) - { - ret = MatchNextWeekday(text, referenceDate); - } + public Regex NextNextRegex { get; } - if (!ret.Success) - { - ret = MatchLastWeekday(text, referenceDate); - } + public Regex LastLastRegex { get; } - if (!ret.Success) - { - ret = MatchWeekdayAlone(text, referenceDate); - } + public Regex ThisRegex { get; } - return ret; - } + public Regex LastRegex { get; } - protected DateTimeResolutionResult MatchNextWeekday(string text, DateObject reference) - { - var result = new DateTimeResolutionResult(); - var match = this.config.NextRegex.MatchExact(text, trim: true); + public Regex WeekDayOfMonthRegex { get; } - if (match.Success) - { - var weekdayKey = match.Groups["weekday"].Value; - var value = reference.Next((DayOfWeek)this.config.DayOfWeek[weekdayKey]); + public Regex WeekDayAndDayRegex { get; } - result.Timex = DateTimeFormatUtil.LuisDate(value); - result.FutureValue = result.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day); - result.Success = true; - } + public Regex DurationRelativeDurationUnitRegex { get; } - return result; - } + public Regex SpecialDayWithNumRegex { get; } - protected DateTimeResolutionResult MatchThisWeekday(string text, DateObject reference) - { - var result = new DateTimeResolutionResult(); - var match = this.config.ThisRegex.MatchExact(text, trim: true); + public Regex DynastyYearRegex { get; } - if (match.Success) - { - var weekdayKey = match.Groups["weekday"].Value; - var value = reference.This((DayOfWeek)this.config.DayOfWeek[weekdayKey]); + public ImmutableDictionary DynastyYearMap { get; } - result.Timex = DateTimeFormatUtil.LuisDate(value); - result.FutureValue = result.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day); - result.Success = true; - } + public IImmutableDictionary CardinalMap { get; } - return result; - } + public IImmutableDictionary UnitMap { get; } - protected DateTimeResolutionResult MatchLastWeekday(string text, DateObject reference) - { - var result = new DateTimeResolutionResult(); - var match = this.config.LastRegex.MatchExact(text, trim: true); + public IImmutableDictionary DayOfMonth { get; } - if (match.Success) - { - var weekdayKey = match.Groups["weekday"].Value; - var value = reference.Last((DayOfWeek)this.config.DayOfWeek[weekdayKey]); + public IImmutableDictionary DayOfWeek { get; } - result.Timex = DateTimeFormatUtil.LuisDate(value); - result.FutureValue = result.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day); - result.Success = true; - } + public IImmutableDictionary MonthOfYear { get; } - return result; - } + public string DynastyStartYear { get; } - protected DateTimeResolutionResult MatchWeekdayAlone(string text, DateObject reference) - { - var result = new DateTimeResolutionResult(); - var match = this.config.StrictWeekDayRegex.MatchExact(text, trim: true); + Regex ICJKDateParserConfiguration.LastWeekDayRegex => LastWeekDayRegex; - if (match.Success) - { - var weekdayKey = match.Groups["weekday"].Value; - var weekday = this.config.DayOfWeek[weekdayKey]; - var value = reference.This((DayOfWeek)weekday); - - if (weekday == 0) - { - weekday = 7; - } - - if (weekday < (int)reference.DayOfWeek) - { - value = reference.Next((DayOfWeek)weekday); - } - - result.Timex = "XXXX-WXX-" + weekday; - var futureDate = value; - var pastDate = value; - if (futureDate < reference) - { - futureDate = futureDate.AddDays(7); - } - - if (pastDate >= reference) - { - pastDate = pastDate.AddDays(-7); - } - - result.FutureValue = futureDate; - result.PastValue = pastDate; - result.Success = true; - } + Regex ICJKDateParserConfiguration.NextMonthRegex => NextMonthRegex; - return result; - } + Regex ICJKDateParserConfiguration.LastMonthRegex => LastMonthRegex; - protected virtual DateTimeResolutionResult ParseWeekdayOfMonth(string text, DateObject referenceDate) + public int GetSwiftDay(string text) { - var ret = new DateTimeResolutionResult(); + var value = 0; - var trimmedText = text.Trim(); - var match = this.config.WeekDayOfMonthRegex.Match(trimmedText); - if (!match.Success) + // @TODO move hardcoded values to resources file + if (text.Equals("今天", StringComparison.Ordinal) || + text.Equals("今日", StringComparison.Ordinal) || + text.Equals("最近", StringComparison.Ordinal)) { - return ret; + value = 0; } - - var cardinalStr = match.Groups["cardinal"].Value; - var weekdayStr = match.Groups["weekday"].Value; - var monthStr = match.Groups["month"].Value; - var noYear = false; - int year; - - int cardinal; - if (cardinalStr.Equals(this.config.LastWeekDayToken, StringComparison.Ordinal)) + else if (text.StartsWith("明", StringComparison.Ordinal)) { - cardinal = 5; + value = 1; } - else + else if (text.StartsWith("昨", StringComparison.Ordinal)) { - cardinal = this.config.CardinalMap[cardinalStr]; + value = -1; } - - var weekday = this.config.DayOfWeek[weekdayStr]; - int month; - if (string.IsNullOrEmpty(monthStr)) + else if (text.Equals("大后天", StringComparison.Ordinal) || + text.Equals("大後天", StringComparison.Ordinal)) { - var swift = 0; - if (trimmedText.StartsWith(this.config.NextMonthToken, StringComparison.Ordinal)) - { - swift = 1; - } - else if (trimmedText.StartsWith(this.config.LastMonthToken, StringComparison.Ordinal)) - { - swift = -1; - } - - month = referenceDate.AddMonths(swift).Month; - year = referenceDate.AddMonths(swift).Year; + value = 3; } - else + else if (text.Equals("大前天", StringComparison.Ordinal)) { - month = this.config.MonthOfYear[monthStr]; - year = referenceDate.Year; - noYear = true; + value = -3; } - - var value = ComputeDate(cardinal, weekday, month, year); - if (value.Month != month) + else if (text.Equals("后天", StringComparison.Ordinal) || + text.Equals("後天", StringComparison.Ordinal)) { - cardinal -= 1; - value = value.AddDays(-7); + value = 2; } - - var futureDate = value; - var pastDate = value; - if (noYear && futureDate < referenceDate) + else if (text.Equals("前天", StringComparison.Ordinal)) { - futureDate = ComputeDate(cardinal, weekday, month, year + 1); - if (futureDate.Month != month) - { - futureDate = futureDate.AddDays(-7); - } + value = -2; } - if (noYear && pastDate >= referenceDate) - { - pastDate = ComputeDate(cardinal, weekday, month, year - 1); - if (pastDate.Month != month) - { - pastDate = pastDate.AddDays(-7); - } - } - - // here is a very special case, timeX follows future date - ret.Timex = $@"XXXX-{month:D2}-WXX-{weekday}-#{cardinal}"; - ret.FutureValue = futureDate; - ret.PastValue = pastDate; - ret.Success = true; - - return ret; - } - - // parse a regex match which includes 'day', 'month' and 'year' (optional) group - protected DateTimeResolutionResult Match2Date(Match match, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - - var monthStr = match.Groups["month"].Value; - var dayStr = match.Groups["day"].Value; - var yearStr = match.Groups["year"].Value; - var yearChsStr = match.Groups["yearchs"].Value; - int month = 0, day = 0, year = 0; - - var tmp = ConvertChineseYearToInteger(yearChsStr); - year = tmp == -1 ? 0 : tmp; - - if (this.config.MonthOfYear.ContainsKey(monthStr) && this.config.DayOfMonth.ContainsKey(dayStr)) - { - month = this.config.MonthOfYear[monthStr] > 12 ? this.config.MonthOfYear[monthStr] % 12 : this.config.MonthOfYear[monthStr]; - day = this.config.DayOfMonth[dayStr] > 31 ? this.config.DayOfMonth[dayStr] % 31 : this.config.DayOfMonth[dayStr]; - if (!string.IsNullOrEmpty(yearStr)) - { - year = int.Parse(yearStr); - if (year < 100 && year >= Constants.MinTwoDigitYearPastNum) - { - year += 1900; - } - else if (year >= 0 && year < Constants.MaxTwoDigitYearFutureNum) - { - year += 2000; - } - } - } - - var noYear = false; - if (year == 0) - { - year = referenceDate.Year; - ret.Timex = DateTimeFormatUtil.LuisDate(-1, month, day); - noYear = true; - } - else - { - ret.Timex = DateTimeFormatUtil.LuisDate(year, month, day); - } - - var futureDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); - var pastDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); - if (noYear && futureDate < referenceDate) - { - futureDate = futureDate.AddYears(+1); - } - - if (noYear && pastDate >= referenceDate) - { - pastDate = pastDate.AddYears(-1); - } - - ret.FutureValue = futureDate; - ret.PastValue = pastDate; - ret.Success = true; - - return ret; - } - - // parse if lunar contains - private static bool IsLunarCalendar(string text) - { - var trimmedText = text.Trim(); - var match = ChineseDateExtractorConfiguration.LunarRegex.Match(trimmedText); - - return match.Success; - } - - private static DateObject ComputeDate(int cardinal, int weekday, int month, int year) - { - var firstDay = DateObject.MinValue.SafeCreateFromValue(year, month, 1); - var firstWeekday = firstDay.This((DayOfWeek)weekday); - if (weekday == 0) - { - weekday = 7; - } - - if (weekday < (int)firstDay.DayOfWeek) - { - firstWeekday = firstDay.Next((DayOfWeek)weekday); - } - - return firstWeekday.AddDays(7 * (cardinal - 1)); - } - - private static int GetMonthMaxDay(int year, int month) - { - var maxDay = MonthMaxDays[month - 1]; - - if (!DateObject.IsLeapYear(year) && month == 2) - { - maxDay -= 1; - } - - return maxDay; - } - - // Judge if a date is valid - private static bool IsValidDate(int year, int month, int day) - { - if (month < Constants.MinMonth) - { - year--; - month = Constants.MaxMonth; - } - - if (month > Constants.MaxMonth) - { - year++; - month = Constants.MinMonth; - } - - return DateObjectExtension.IsValidDate(year, month, day); - } - - // Judge the date is non-leap year Feb 29th - private static bool IsNonleapYearFeb29th(int year, int month, int day) - { - return !DateObject.IsLeapYear(year) && month == 2 && day == 29; - } - - // Handle cases like "三天前" - private DateTimeResolutionResult ParserDurationWithBeforeAndAfter(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - var durationRes = durationExtractor.Extract(text, referenceDate); - var numStr = string.Empty; - var unitStr = string.Empty; - if (durationRes.Count > 0) - { - var match = ChineseDateExtractorConfiguration.UnitRegex.Match(text); - if (match.Success) - { - var suffix = text.Substring((int)durationRes[0].Start + (int)durationRes[0].Length).Trim(); - var srcUnit = match.Groups["unit"].Value; - - var numberStr = text.Substring((int)durationRes[0].Start, match.Index - (int)durationRes[0].Start).Trim(); - var number = ConvertChineseToNum(numberStr); - - if (this.config.UnitMap.ContainsKey(srcUnit)) - { - unitStr = this.config.UnitMap[srcUnit]; - - var beforeMatch = ChineseDateExtractorConfiguration.BeforeRegex.Match(suffix); - if (beforeMatch.Success && suffix.StartsWith(beforeMatch.Value)) - { - DateObject date; - switch (unitStr) - { - case Constants.TimexDay: - date = referenceDate.AddDays(-number); - break; - case Constants.TimexWeek: - date = referenceDate.AddDays(-7 * number); - break; - case Constants.TimexMonthFull: - date = referenceDate.AddMonths(-number); - break; - case Constants.TimexYear: - date = referenceDate.AddYears(-number); - break; - default: - return ret; - } - - ret.Timex = $"{DateTimeFormatUtil.LuisDate(date)}"; - ret.FutureValue = ret.PastValue = date; - ret.Success = true; - return ret; - } - - var afterMatch = ChineseDateExtractorConfiguration.AfterRegex.Match(suffix); - if (afterMatch.Success && suffix.StartsWith(afterMatch.Value)) - { - DateObject date; - switch (unitStr) - { - case Constants.TimexDay: - date = referenceDate.AddDays(number); - break; - case Constants.TimexWeek: - date = referenceDate.AddDays(7 * number); - break; - case Constants.TimexMonthFull: - date = referenceDate.AddMonths(number); - break; - case Constants.TimexYear: - date = referenceDate.AddYears(number); - break; - default: - return ret; - } - - ret.Timex = $"{DateTimeFormatUtil.LuisDate(date)}"; - ret.FutureValue = ret.PastValue = date; - ret.Success = true; - return ret; - } - } - } - } - - return ret; - } - - // Convert Chinese Number to Integer - private int ConvertChineseToNum(string numStr) - { - var num = -1; - var er = integerExtractor.Extract(numStr); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) - { - num = Convert.ToInt32((double)(numberParser.Parse(er[0]).Value ?? 0)); - } - } - - return num; - } - - // convert Chinese Year to Integer - private int ConvertChineseYearToInteger(string yearChsStr) - { - var year = 0; - var num = 0; - - var er = integerExtractor.Extract(yearChsStr); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) - { - num = Convert.ToInt32((double)(numberParser.Parse(er[0]).Value ?? 0)); - } - } - - if (num < 10) - { - num = 0; - foreach (var ch in yearChsStr) - { - num *= 10; - er = integerExtractor.Extract(ch.ToString()); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) - { - num += Convert.ToInt32((double)(numberParser.Parse(er[0]).Value ?? 0)); - } - } - } - } - - year = num; - - return year < 10 ? -1 : year; + return value; } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDatePeriodParserConfiguration.cs index 9085c04eef..7eba76ad55 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDatePeriodParserConfiguration.cs @@ -1,1535 +1,332 @@ -using System; -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Globalization; using System.Linq; +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; -using Microsoft.Recognizers.Text.Number; -using Microsoft.Recognizers.Text.Number.Chinese; -using Microsoft.Recognizers.Text.Utilities; -using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseDatePeriodParserConfiguration : IDateTimeParser + public class ChineseDatePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKDatePeriodParserConfiguration { - public static readonly string ParserName = Constants.SYS_DATETIME_DATEPERIOD; // "DatePeriod"; - - private static readonly IDateTimeExtractor SingleDateExtractor = new ChineseDateExtractorConfiguration(); - - private static readonly IExtractor IntegerExtractor = new IntegerExtractor(); - - private static readonly IParser IntegerParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Chinese))); - private static readonly IDateTimeExtractor DurationExtractor = new ChineseDurationExtractorConfiguration(); + public static readonly Regex WoMLastRegex = new Regex(DateTimeDefinitions.WoMLastRegex, RegexFlags, RegexTimeOut); + public static readonly Regex WoMPreviousRegex = new Regex(DateTimeDefinitions.WoMPreviousRegex, RegexFlags, RegexTimeOut); + public static readonly Regex WoMNextRegex = new Regex(DateTimeDefinitions.WoMNextRegex, RegexFlags, RegexTimeOut); + + public static readonly ImmutableDictionary MonthOfYear = DateTimeDefinitions.ParserConfigurationMonthOfYear.ToImmutableDictionary(); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public ChineseDatePeriodParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + IntegerExtractor = config.IntegerExtractor; + NumberParser = config.NumberParser; + DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; + CardinalExtractor = config.CardinalExtractor; + DurationParser = config.DurationParser; + DateParser = config.DateParser; - private static readonly Calendar Cal = DateTimeFormatInfo.InvariantInfo.Calendar; + DynastyYearRegex = ChineseDateExtractorConfiguration.DynastyYearRegex; + DynastyStartYear = ChineseDateExtractorConfiguration.DynastyStartYear; + DynastyYearMap = ChineseDateExtractorConfiguration.DynastyYearMap; + SimpleCasesRegex = ChineseDatePeriodExtractorConfiguration.SimpleCasesRegex; + ThisRegex = ChineseDatePeriodExtractorConfiguration.ThisRegex; + NextNextRegex = ChineseDatePeriodExtractorConfiguration.NextNextRegex; + LastLastRegex = ChineseDatePeriodExtractorConfiguration.LastLastRegex; + NextRegex = ChineseDatePeriodExtractorConfiguration.NextRegex; + LastRegex = ChineseDatePeriodExtractorConfiguration.LastRegex; + YearToYear = ChineseDatePeriodExtractorConfiguration.YearToYear; + YearToYearSuffixRequired = ChineseDatePeriodExtractorConfiguration.YearToYearSuffixRequired; + YearRegex = ChineseDatePeriodExtractorConfiguration.YearRegex; + YearInCJKRegex = ChineseDatePeriodExtractorConfiguration.YearInCJKRegex; + MonthToMonth = ChineseDatePeriodExtractorConfiguration.MonthToMonth; + MonthToMonthSuffixRequired = ChineseDatePeriodExtractorConfiguration.MonthToMonthSuffixRequired; + DayToDay = ChineseDatePeriodExtractorConfiguration.DayToDay; + MonthDayRange = ChineseDatePeriodExtractorConfiguration.MonthDayRange; + DayRegexForPeriod = ChineseDatePeriodExtractorConfiguration.DayRegexForPeriod; + MonthRegex = ChineseDatePeriodExtractorConfiguration.MonthRegex; + SpecialMonthRegex = ChineseDatePeriodExtractorConfiguration.SpecialMonthRegex; + SpecialYearRegex = ChineseDatePeriodExtractorConfiguration.SpecialYearRegex; + YearAndMonth = ChineseDatePeriodExtractorConfiguration.YearAndMonth; + PureNumYearAndMonth = ChineseDatePeriodExtractorConfiguration.PureNumYearAndMonth; + SimpleYearAndMonth = ChineseDatePeriodExtractorConfiguration.SimpleYearAndMonth; + OneWordPeriodRegex = ChineseDatePeriodExtractorConfiguration.OneWordPeriodRegex; + NumberCombinedWithUnit = ChineseDatePeriodExtractorConfiguration.NumberCombinedWithUnit; + PastRegex = ChineseDatePeriodExtractorConfiguration.PastRegex; + FutureRegex = ChineseDatePeriodExtractorConfiguration.FutureRegex; + WeekWithWeekDayRangeRegex = ChineseDatePeriodExtractorConfiguration.WeekWithWeekDayRangeRegex; + UnitRegex = ChineseDatePeriodExtractorConfiguration.UnitRegex; + DurationUnitRegex = ChineseDatePeriodExtractorConfiguration.DurationUnitRegex; + WeekOfMonthRegex = ChineseDatePeriodExtractorConfiguration.WeekOfMonthRegex; + WeekOfYearRegex = ChineseDatePeriodExtractorConfiguration.WeekOfYearRegex; + WeekOfDateRegex = ChineseDatePeriodExtractorConfiguration.WeekOfDateRegex; + MonthOfDateRegex = ChineseDatePeriodExtractorConfiguration.MonthOfDateRegex; + WhichWeekRegex = ChineseDatePeriodExtractorConfiguration.WhichWeekRegex; + FirstLastOfYearRegex = ChineseDatePeriodExtractorConfiguration.FirstLastOfYearRegex; + SeasonWithYear = ChineseDatePeriodExtractorConfiguration.SeasonWithYear; + QuarterRegex = ChineseDatePeriodExtractorConfiguration.QuarterRegex; + DecadeRegex = ChineseDatePeriodExtractorConfiguration.DecadeRegex; + CenturyRegex = ChineseDatePeriodExtractorConfiguration.CenturyRegex; + ComplexDatePeriodRegex = ChineseDatePeriodExtractorConfiguration.ComplexDatePeriodRegex; + RelativeRegex = ChineseDateExtractorConfiguration.RelativeRegex; + RelativeMonthRegex = ChineseDateExtractorConfiguration.RelativeMonthRegex; + LaterEarlyPeriodRegex = ChineseDatePeriodExtractorConfiguration.LaterEarlyPeriodRegex; + DatePointWithAgoAndLater = ChineseDatePeriodExtractorConfiguration.DatePointWithAgoAndLater; + ReferenceDatePeriodRegex = ChineseDatePeriodExtractorConfiguration.ReferenceDatePeriodRegex; + DurationRelativeDurationUnitRegex = ChineseDateExtractorConfiguration.DurationRelativeDurationUnitRegex; + UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToImmutableDictionary(); + CardinalMap = DateTimeDefinitions.ParserConfigurationCardinalMap.ToImmutableDictionary(); + DayOfMonth = DateTimeDefinitions.ParserConfigurationDayOfMonth.ToImmutableDictionary(); + SeasonMap = DateTimeDefinitions.ParserConfigurationSeasonMap.ToImmutableDictionary(); - private readonly IFullDateTimeParserConfiguration config; - - public ChineseDatePeriodParserConfiguration(IFullDateTimeParserConfiguration configuration) - { - config = configuration; } - public ParseResult Parse(ExtractResult extResult) - { - return this.Parse(extResult, DateObject.Now); - } + public IDateTimeExtractor DateExtractor { get; } - public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) - { - var referenceDate = refDate; + public IDateTimeExtractor DurationExtractor { get; } - object value = null; + public IDateTimeParser DurationParser { get; } - if (er.Type.Equals(ParserName, StringComparison.Ordinal)) - { - var innerResult = ParseSimpleCases(er.Text, referenceDate); - if (!innerResult.Success) - { - innerResult = ParseOneWordPeriod(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = MergeTwoTimePoints(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseNumberWithUnit(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseYearAndMonth(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseYearToYear(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseMonthToMonth(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseYear(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseWeekOfMonth(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseSeason(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseQuarter(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseDecade(er.Text, referenceDate); - } - - if (innerResult.Success) - { - if (innerResult.FutureValue != null && innerResult.PastValue != null) - { - innerResult.FutureResolution = new Dictionary - { - { - TimeTypeConstants.START_DATE, - DateTimeFormatUtil.FormatDate(((Tuple)innerResult.FutureValue).Item1) - }, - { - TimeTypeConstants.END_DATE, - DateTimeFormatUtil.FormatDate(((Tuple)innerResult.FutureValue).Item2) - }, - }; - - innerResult.PastResolution = new Dictionary - { - { - TimeTypeConstants.START_DATE, - DateTimeFormatUtil.FormatDate(((Tuple)innerResult.PastValue).Item1) - }, - { - TimeTypeConstants.END_DATE, - DateTimeFormatUtil.FormatDate(((Tuple)innerResult.PastValue).Item2) - }, - }; - } - else - { - innerResult.PastResolution = innerResult.FutureResolution = new Dictionary(); - } - - value = innerResult; - } - } + public IDateTimeParser DateParser { get; } - var ret = new DateTimeParseResult - { - Text = er.Text, - Start = er.Start, - Length = er.Length, - Type = er.Type, - Data = er.Data, - Value = value, - TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, - ResolutionStr = string.Empty, - }; - - return ret; - } + public IExtractor IntegerExtractor { get; } - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } + public IExtractor CardinalExtractor { get; } - // convert Chinese Number to Integer - private static int ConvertChineseToNum(string numStr) - { - var num = -1; - var er = IntegerExtractor.Extract(numStr); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) - { - num = Convert.ToInt32((double)(IntegerParser.Parse(er[0]).Value ?? 0)); - } - } + public IParser NumberParser { get; } - return num; - } + public ImmutableDictionary DynastyYearMap { get; } - // convert Chinese Year to Integer - private static int ConvertChineseToInteger(string yearChsStr) - { - var year = 0; - var num = 0; + public IImmutableDictionary UnitMap { get; } - var er = IntegerExtractor.Extract(yearChsStr); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) - { - num = Convert.ToInt32((double)(IntegerParser.Parse(er[0]).Value ?? 0)); - } - } + public IImmutableDictionary CardinalMap { get; } - if (num < 10) - { - num = 0; - foreach (var ch in yearChsStr) - { - num *= 10; - er = IntegerExtractor.Extract(ch.ToString()); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) - { - num += Convert.ToInt32((double)(IntegerParser.Parse(er[0]).Value ?? 0)); - } - } - } - - year = num; - } - else - { - year = num; - } + public IImmutableDictionary DayOfMonth { get; } - return year == 0 ? -1 : year; - } + IImmutableDictionary ICJKDatePeriodParserConfiguration.MonthOfYear => MonthOfYear; - private static DateObject ComputeDate(int cardinal, int weekday, int month, int year) - { - var firstDay = DateObject.MinValue.SafeCreateFromValue(year, month, 1); - var firstWeekday = firstDay.This((DayOfWeek)weekday); - if (weekday == 0) - { - weekday = 7; - } + public IImmutableDictionary SeasonMap { get; } - if (weekday < (int)firstDay.DayOfWeek) - { - firstWeekday = firstDay.Next((DayOfWeek)weekday); - } + public string DynastyStartYear { get; } - return firstWeekday.AddDays(7 * (cardinal - 1)); - } + public string TokenBeforeDate => string.Empty; - private DateTimeResolutionResult ParseSimpleCases(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - int year = referenceDate.Year, month = referenceDate.Month; - int beginDay, endDay; - var noYear = false; - var inputYear = false; + public Regex DynastyYearRegex { get; } - var match = ChineseDatePeriodExtractorConfiguration.SimpleCasesRegex.MatchExact(text, trim: true); - string beginLuisStr, endLuisStr; + public Regex SimpleCasesRegex { get; } - if (match.Success) - { - var days = match.Groups["day"]; - beginDay = this.config.DayOfMonth[days.Captures[0].Value]; - endDay = this.config.DayOfMonth[days.Captures[1].Value]; - - var monthStr = match.Groups["month"].Value; - var yearStr = match.Groups["year"].Value; - if (!string.IsNullOrEmpty(yearStr)) - { - year = int.Parse(yearStr); - if (year < 100 && year >= this.config.TwoNumYear) - { - year += 1900; - } - else if (year < 100 && year < this.config.TwoNumYear) - { - year += 2000; - } - - inputYear = true; - } - else - { - noYear = true; - } - - if (!string.IsNullOrEmpty(monthStr)) - { - month = ToMonthNumber(monthStr); - } - else - { - monthStr = match.Groups["relmonth"].Value.Trim(); - var thisMatch = ChineseDatePeriodExtractorConfiguration.ThisRegex.Match(monthStr); - var nextMatch = ChineseDatePeriodExtractorConfiguration.NextRegex.Match(monthStr); - var lastMatch = ChineseDatePeriodExtractorConfiguration.LastRegex.Match(monthStr); - - if (thisMatch.Success) - { - // do nothing - } - else if (nextMatch.Success) - { - if (month != 12) - { - month += 1; - } - else - { - month = 1; - year += 1; - } - } - else - { - if (month != 1) - { - month -= 1; - } - else - { - month = 12; - year -= 1; - } - } - } - - if (inputYear || ChineseDatePeriodExtractorConfiguration.ThisRegex.Match(monthStr).Success || - ChineseDatePeriodExtractorConfiguration.NextRegex.Match(monthStr).Success) - { - beginLuisStr = DateTimeFormatUtil.LuisDate(year, month, beginDay); - endLuisStr = DateTimeFormatUtil.LuisDate(year, month, endDay); - } - else - { - beginLuisStr = DateTimeFormatUtil.LuisDate(-1, month, beginDay); - endLuisStr = DateTimeFormatUtil.LuisDate(-1, month, endDay); - } - } - else - { - return ret; - } + public Regex ThisRegex { get; } - int futureYear = year, pastYear = year; - var startDate = DateObject.MinValue.SafeCreateFromValue(year, month, beginDay); - if (noYear && startDate < referenceDate) - { - futureYear++; - } + public Regex NextNextRegex { get; } - if (noYear && startDate >= referenceDate) - { - pastYear--; - } + public Regex LastLastRegex { get; } - ret.Timex = $"({beginLuisStr},{endLuisStr},P{endDay - beginDay}D)"; + public Regex NextRegex { get; } - ret.FutureValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(futureYear, month, beginDay), - DateObject.MinValue.SafeCreateFromValue(futureYear, month, endDay)); + public Regex LastRegex { get; } - ret.PastValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(pastYear, month, beginDay), - DateObject.MinValue.SafeCreateFromValue(pastYear, month, endDay)); + public Regex YearToYear { get; } - ret.Success = true; + public Regex YearToYearSuffixRequired { get; } - return ret; - } + public Regex YearRegex { get; } - // handle like "2016年到2017年", "2016年和2017年之间" - private DateTimeResolutionResult ParseYearToYear(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - var match = ChineseDatePeriodExtractorConfiguration.YearToYear.Match(text); + public Regex RelativeRegex { get; } - if (!match.Success) - { - match = ChineseDatePeriodExtractorConfiguration.YearToYearSuffixRequired.Match(text); - } + public Regex RelativeMonthRegex { get; } - if (match.Success) - { - var yearMatch = ChineseDatePeriodExtractorConfiguration.YearRegex.Matches(text); - var yearInChineseMatch = ChineseDatePeriodExtractorConfiguration.YearInChineseRegex.Matches(text); - var beginYear = 0; - var endYear = 0; - - if (yearMatch.Count == 2) - { - var yearFrom = yearMatch[0].Groups["year"].Value; - var yearTo = yearMatch[1].Groups["year"].Value; - beginYear = int.Parse(yearFrom); - endYear = int.Parse(yearTo); - } - else if (yearInChineseMatch.Count == 2) - { - var yearFrom = yearInChineseMatch[0].Groups["yearchs"].Value; - var yearTo = yearInChineseMatch[1].Groups["yearchs"].Value; - beginYear = ConvertChineseToInteger(yearFrom); - endYear = ConvertChineseToInteger(yearTo); - } - else if (yearInChineseMatch.Count == 1 && yearMatch.Count == 1) - { - if (yearMatch[0].Index < yearInChineseMatch[0].Index) - { - var yearFrom = yearMatch[0].Groups["year"].Value; - var yearTo = yearInChineseMatch[0].Groups["yearch"].Value; - beginYear = int.Parse(yearFrom); - endYear = ConvertChineseToInteger(yearTo); - } - else - { - var yearFrom = yearInChineseMatch[0].Groups["yearch"].Value; - var yearTo = yearMatch[0].Groups["year"].Value; - beginYear = ConvertChineseToInteger(yearFrom); - endYear = int.Parse(yearTo); - } - } - - if (beginYear < 100 && beginYear >= this.config.TwoNumYear) - { - beginYear += 1900; - } - else if (beginYear < 100 && beginYear < this.config.TwoNumYear) - { - beginYear += 2000; - } - - if (endYear < 100 && endYear >= this.config.TwoNumYear) - { - endYear += 1900; - } - else if (endYear < 100 && endYear < this.config.TwoNumYear) - { - endYear += 2000; - } - - var beginDay = DateObject.MinValue.SafeCreateFromValue(beginYear, 1, 1); - var endDay = DateObject.MinValue.SafeCreateFromValue(endYear, 1, 1); - ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDay, endDay, DatePeriodTimexType.ByYear); - ret.FutureValue = ret.PastValue = new Tuple(beginDay, endDay); - ret.Success = true; - return ret; - } + public Regex LaterEarlyPeriodRegex { get; } - return ret; - } + public Regex DatePointWithAgoAndLater { get; } - // handle like "3月到5月", "3月和5月之间" - private DateTimeResolutionResult ParseMonthToMonth(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - var match = ChineseDatePeriodExtractorConfiguration.MonthToMonth.Match(text); + public Regex ReferenceDatePeriodRegex { get; } - if (!match.Success) - { - match = ChineseDatePeriodExtractorConfiguration.MonthToMonthSuffixRequired.Match(text); - } + public Regex DurationRelativeDurationUnitRegex { get; } - if (match.Success) - { - var monthMatch = ChineseDatePeriodExtractorConfiguration.MonthRegex.Matches(text); - var beginMonth = 0; - var endMonth = 0; - - if (monthMatch.Count == 2) - { - var monthFrom = monthMatch[0].Groups["month"].Value; - var monthTo = monthMatch[1].Groups["month"].Value; - beginMonth = ToMonthNumber(monthFrom); - endMonth = ToMonthNumber(monthTo); - } - - var currentYear = referenceDate.Year; - var currentMonth = referenceDate.Month; - var beginYearForPastResolution = currentYear; - var endYearForPastResolution = currentYear; - var beginYearForFutureResolution = currentYear; - var endYearForFutureResolution = currentYear; - var durationMonths = 0; - - if (beginMonth < endMonth) - { - // For this case, FutureValue and PastValue share the same resolution - if (beginMonth < currentMonth && endMonth >= currentMonth) - { - // Keep the beginYear and endYear equal to currentYear - } - else if (beginMonth >= currentMonth) - { - beginYearForPastResolution = endYearForPastResolution = currentYear - 1; - } - else if (endMonth < currentMonth) - { - beginYearForFutureResolution = endYearForFutureResolution = currentYear + 1; - } - - durationMonths = endMonth - beginMonth; - } - else if (beginMonth > endMonth) - { - // For this case, FutureValue and PastValue share the same resolution - if (beginMonth < currentMonth) - { - endYearForPastResolution = endYearForFutureResolution = currentYear + 1; - } - else - { - beginYearForPastResolution = currentYear - 1; - endYearForFutureResolution = currentYear + 1; - } - - durationMonths = beginMonth - endMonth; - } - - if (durationMonths != 0) - { - var beginDateForPastResolution = DateObject.MinValue.SafeCreateFromValue(beginYearForPastResolution, beginMonth, 1); - var endDateForPastResolution = DateObject.MinValue.SafeCreateFromValue(endYearForPastResolution, endMonth, 1); - var beginDateForFutureResolution = DateObject.MinValue.SafeCreateFromValue(beginYearForFutureResolution, beginMonth, 1); - var endDateForFutureResolution = DateObject.MinValue.SafeCreateFromValue(endYearForFutureResolution, endMonth, 1); - - var beginTimex = DateTimeFormatUtil.LuisDate(beginDateForPastResolution, beginDateForFutureResolution); - var endTimex = DateTimeFormatUtil.LuisDate(endDateForPastResolution, endDateForFutureResolution); - ret.Timex = $"({beginTimex},{endTimex},P{durationMonths}M)"; - ret.PastValue = new Tuple(beginDateForPastResolution, endDateForPastResolution); - ret.FutureValue = new Tuple(beginDateForFutureResolution, endDateForFutureResolution); - ret.Success = true; - } - } + public Regex YearInCJKRegex { get; } - return ret; - } + public Regex MonthToMonth { get; } - // for case "2016年5月" - private DateTimeResolutionResult ParseYearAndMonth(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - var match = ChineseDatePeriodExtractorConfiguration.YearAndMonth.MatchExact(text, trim: true); + public Regex MonthToMonthSuffixRequired { get; } - if (!match.Success) - { - match = ChineseDatePeriodExtractorConfiguration.PureNumYearAndMonth.MatchExact(text, trim: true); - } + public Regex MonthRegex { get; } - if (!match.Success) - { - return ret; - } + public Regex YearAndMonth { get; } - // parse year - var year = referenceDate.Year; - var yearNum = match.Groups["year"].Value; - var yearChs = match.Groups["yearchs"].Value; - var yearRel = match.Groups["yearrel"].Value; - if (!string.IsNullOrEmpty(yearNum)) - { - if (IsYearOnly(yearNum)) - { - yearNum = yearNum.Substring(0, yearNum.Length - 1); - } + public Regex PureNumYearAndMonth { get; } - year = int.Parse(yearNum); - } - else if (!string.IsNullOrEmpty(yearChs)) - { - if (IsYearOnly(yearChs)) - { - yearChs = yearChs.Substring(0, yearChs.Length - 1); - } + public Regex OneWordPeriodRegex { get; } - year = ConvertChineseToInteger(yearChs); - } - else if (!string.IsNullOrEmpty(yearRel)) - { - if (IsLastYear(yearRel)) - { - year--; - } - else if (IsNextYear(yearRel)) - { - year++; - } - } + public Regex NumberCombinedWithUnit { get; } - if (year < 100 && year >= this.config.TwoNumYear) - { - year += 1900; - } - else if (year < this.config.TwoNumYear) - { - year += 2000; - } + public Regex PastRegex { get; } - var monthStr = match.Groups["month"].Value; - var month = ToMonthNumber(monthStr); - var beginDay = DateObject.MinValue.SafeCreateFromValue(year, month, 1); - DateObject endDay; + public Regex FutureRegex { get; } - if (month == 12) - { - endDay = DateObject.MinValue.SafeCreateFromValue(year + 1, 1, 1); - } - else - { - endDay = DateObject.MinValue.SafeCreateFromValue(year, month + 1, 1); - } + public Regex WeekWithWeekDayRangeRegex { get; } - ret.Timex = DateTimeFormatUtil.LuisDate(year, month); - ret.FutureValue = ret.PastValue = new Tuple(beginDay, endDay); - ret.Success = true; - return ret; - } + public Regex UnitRegex { get; } - // case like "今年三月" "这个周末" "五月" - private DateTimeResolutionResult ParseOneWordPeriod(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - int year = referenceDate.Year, month = referenceDate.Month; - int futureYear = year, pastYear = year; + public Regex DurationUnitRegex { get; } - var trimmedText = text.Trim(); - var match = ChineseDatePeriodExtractorConfiguration.OneWordPeriodRegex.MatchExact(trimmedText, trim: true); + public Regex WeekOfMonthRegex { get; } - if (match.Success) - { - var monthStr = match.Groups["month"].Value; - if (IsThisYear(trimmedText)) - { - ret.Timex = TimexUtility.GenerateYearTimex(referenceDate); - ret.FutureValue = - ret.PastValue = - new Tuple(DateObject.MinValue.SafeCreateFromValue(referenceDate.Year, 1, 1), referenceDate); - ret.Success = true; - return ret; - } - - var thisMatch = ChineseDatePeriodExtractorConfiguration.ThisRegex.Match(trimmedText); - var nextMatch = ChineseDatePeriodExtractorConfiguration.NextRegex.Match(trimmedText); - var lastMatch = ChineseDatePeriodExtractorConfiguration.LastRegex.Match(trimmedText); - - if (!string.IsNullOrEmpty(monthStr)) - { - var swift = -10; - var yearRel = match.Groups["yearrel"].Value; - if (!string.IsNullOrEmpty(yearRel)) - { - if (IsNextYear(yearRel)) - { - swift = 1; - } - else if (IsLastYear(yearRel)) - { - swift = -1; - } - else if (IsThisYear(yearRel)) - { - swift = 0; - } - } - - month = ToMonthNumber(monthStr); - - if (swift >= -1) - { - year = year + swift; - ret.Timex = DateTimeFormatUtil.LuisDate(year, month); - futureYear = pastYear = year; - } - else - { - ret.Timex = DateTimeFormatUtil.LuisDate(Constants.InvalidYear, month); - if (month < referenceDate.Month) - { - futureYear++; - } - - if (month >= referenceDate.Month) - { - pastYear--; - } - } - } - else - { - var swift = 0; - if (nextMatch.Success) - { - swift = 1; - } - else if (lastMatch.Success) - { - swift = -1; - } - - // Handle cases with "(上|下)半" like "上半月"、 "下半年" - if (!string.IsNullOrEmpty(match.Groups["halfTag"].Value)) - { - return HandleWithHalfTag(trimmedText, referenceDate, ret, swift); - } - - if (IsWeekOnly(trimmedText)) - { - var monday = referenceDate.This(DayOfWeek.Monday).AddDays(7 * swift); - ret.Timex = DateTimeFormatUtil.ToIsoWeekTimex(monday); - ret.FutureValue = - ret.PastValue = - new Tuple( - referenceDate.This(DayOfWeek.Monday).AddDays(7 * swift), - referenceDate.This(DayOfWeek.Sunday).AddDays(7 * swift).AddDays(1)); - ret.Success = true; - return ret; - } - - if (IsWeekend(trimmedText)) - { - var beginDate = referenceDate.This(DayOfWeek.Saturday).AddDays(7 * swift); - var endDate = referenceDate.This(DayOfWeek.Sunday).AddDays(7 * swift); - - ret.Timex = TimexUtility.GenerateWeekendTimex(beginDate); - - ret.FutureValue = - ret.PastValue = new Tuple(beginDate, endDate.AddDays(1)); - - ret.Success = true; - - return ret; - } - - if (IsMonthOnly(trimmedText)) - { - month = referenceDate.AddMonths(swift).Month; - year = referenceDate.AddMonths(swift).Year; - ret.Timex = DateTimeFormatUtil.LuisDate(year, month); - futureYear = pastYear = year; - } - else if (IsYearOnly(trimmedText)) - { - // Handle like "今年上半年","明年下半年" - trimmedText = HandleWithHalfYear(match, trimmedText, out bool hasHalf, out bool isFirstHalf); - swift = hasHalf ? 0 : swift; - - year = referenceDate.AddYears(swift).Year; - if (IsLastYear(trimmedText)) - { - year--; - } - else if (IsNextYear(trimmedText)) - { - year++; - } - else if (IsYearBeforeLast(trimmedText)) - { - year -= 2; - } - else if (IsYearAfterNext(trimmedText)) - { - year += 2; - } - - return HandleYearResult(ret, hasHalf, isFirstHalf, year); - } - } - } - else - { - return ret; - } - - // only "month" will come to here - ret.FutureValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(futureYear, month, 1), - DateObject.MinValue.SafeCreateFromValue(futureYear, month, 1).AddMonths(1)); + public Regex WeekOfYearRegex { get; } - ret.PastValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(pastYear, month, 1), - DateObject.MinValue.SafeCreateFromValue(pastYear, month, 1).AddMonths(1)); + public Regex WeekOfDateRegex { get; } - ret.Success = true; + public Regex MonthOfDateRegex { get; } - return ret; - } - - private DateTimeResolutionResult HandleWithHalfTag(string text, DateObject referenceDate, DateTimeResolutionResult ret, int swift) - { - DateObject beginDay, endDay; - int year = referenceDate.Year, month = referenceDate.Month; + public Regex WhichWeekRegex { get; } - if (IsWeekOnly(text)) - { - // Handle like "上半周","下半周" - beginDay = swift == -1 ? referenceDate.This(DayOfWeek.Monday) : referenceDate.This(DayOfWeek.Thursday); - endDay = swift == -1 ? referenceDate.This(DayOfWeek.Thursday) : referenceDate.This(DayOfWeek.Sunday).AddDays(1); - ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDay, endDay, DatePeriodTimexType.ByDay); - } - else if (IsMonthOnly(text)) - { - // Handle like "上半月","下半月" - var monthStartDay = DateObject.MinValue.SafeCreateFromValue(year, month, 1); - var monthEndDay = DateObject.MinValue.SafeCreateFromValue(year, month + 1, 1); - var halfMonthDay = (int)((monthEndDay - monthStartDay).TotalDays / 2); - - beginDay = swift == -1 ? monthStartDay : monthStartDay.AddDays(halfMonthDay); - endDay = swift == -1 ? monthStartDay.AddDays(halfMonthDay) : monthEndDay; - ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDay, endDay, DatePeriodTimexType.ByDay); - } - else - { - // Handle like "上(个)半年","下(个)半年" - beginDay = swift == -1 ? DateObject.MinValue.SafeCreateFromValue(year, 1, 1) : DateObject.MinValue.SafeCreateFromValue(year, 7, 1); - endDay = swift == -1 ? DateObject.MinValue.SafeCreateFromValue(year, 7, 1) : DateObject.MinValue.SafeCreateFromValue(year + 1, 1, 1); - ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDay, endDay, DatePeriodTimexType.ByMonth); - } - - ret.FutureValue = ret.PastValue = new Tuple(beginDay, endDay); - ret.Success = true; - return ret; - } - - // only contains year like "2016年" or "2016年上半年" - private DateTimeResolutionResult ParseYear(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - var match = ChineseDatePeriodExtractorConfiguration.YearRegex.MatchExact(text, trim: true); - - if (match.Success) - { - var yearStr = match.Value; - - // Handle like "2016年上半年","2017年下半年" - yearStr = HandleWithHalfYear(match, yearStr, out bool hasHalf, out bool isFirstHalf); - - // Trim() to handle extra whitespaces like '07 年' - if (IsYearOnly(yearStr)) - { - yearStr = yearStr.Substring(0, yearStr.Length - 1).Trim(); - } - - var year = int.Parse(yearStr); - - return HandleYearResult(ret, hasHalf, isFirstHalf, year); - } - - match = ChineseDatePeriodExtractorConfiguration.YearInChineseRegex.MatchExact(text, trim: true); - - if (match.Success) - { - var yearStr = match.Value; - - // Handle like "二零一七年上半年","二零一七年下半年" - yearStr = HandleWithHalfYear(match, yearStr, out bool hasHalf, out bool isFirstHalf); - - if (IsYearOnly(yearStr)) - { - yearStr = yearStr.Substring(0, yearStr.Length - 1); - } - - if (yearStr.Length == 1) - { - return ret; - } - - var year = ConvertChineseToInteger(yearStr); - - return HandleYearResult(ret, hasHalf, isFirstHalf, year); - } - - return ret; - } - - private string HandleWithHalfYear(ConditionalMatch match, string text, out bool hasHalf, out bool isFirstHalf) - { - var firstHalf = match.Groups["firstHalf"].Value; - var secondHalf = match.Groups["secondHalf"].Value; - hasHalf = false; - isFirstHalf = !string.IsNullOrEmpty(firstHalf) ? true : false; - - if (isFirstHalf || !string.IsNullOrEmpty(secondHalf)) - { - var halfText = isFirstHalf ? firstHalf : secondHalf; - text = text.Substring(0, text.Length - halfText.Length); - hasHalf = true; - } - - return text; - } - - private DateTimeResolutionResult HandleYearResult(DateTimeResolutionResult ret, bool hasHalf, bool isFirstHalf, int year) - { - if (year < 100 && year >= this.config.TwoNumYear) - { - year += 1900; - } - else if (year < 100 && year < this.config.TwoNumYear) - { - year += 2000; - } + public Regex FirstLastOfYearRegex { get; } - var beginDay = DateObject.MinValue.SafeCreateFromValue(year, 1, 1); - var endDay = DateObject.MinValue.SafeCreateFromValue(year + 1, 1, 1); + public Regex SeasonWithYear { get; } - ret.Timex = DateTimeFormatUtil.LuisDate(year); + public Regex QuarterRegex { get; } - if (hasHalf) - { - if (isFirstHalf) - { - endDay = DateObject.MinValue.SafeCreateFromValue(year, 7, 1); - } - else - { - beginDay = DateObject.MinValue.SafeCreateFromValue(year, 7, 1); - } - - ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDay, endDay, DatePeriodTimexType.ByMonth); - } + public Regex DecadeRegex { get; } - ret.FutureValue = ret.PastValue = new Tuple(beginDay, endDay); - ret.Success = true; + public Regex CenturyRegex { get; } - return ret; - } + public Regex ComplexDatePeriodRegex { get; } - // parse entities that made up by two time points - private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - var er = SingleDateExtractor.Extract(text, referenceDate); - if (er.Count < 2) - { - er = SingleDateExtractor.Extract("on " + text, referenceDate); - if (er.Count < 2) - { - return ret; - } - - er[0].Start -= 3; - er[1].Start -= 3; - } + public Regex DayToDay { get; } - var pr1 = this.config.DateParser.Parse(er[0], referenceDate); - var pr2 = this.config.DateParser.Parse(er[1], referenceDate); + public Regex MonthDayRange { get; } - if (er.Count >= 2) - { - // @TODO Refactor code to remove the cycle between BaseDatePeriodParser and its config. - var dateContext = BaseDatePeriodParser.GetYearContext(this.config, er[0].Text, er[1].Text, text); + public Regex DayRegexForPeriod { get; } - if (pr1.Value == null || pr2.Value == null) - { - return ret; - } + public Regex SimpleYearAndMonth { get; } - pr1 = dateContext.ProcessDateEntityParsingResult(pr1); - pr2 = dateContext.ProcessDateEntityParsingResult(pr2); - } + public Regex SpecialMonthRegex { get; } - DateObject futureBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue, - futureEnd = (DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue; - DateObject pastBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue, - pastEnd = (DateObject)((DateTimeResolutionResult)pr2.Value).PastValue; + public Regex SpecialYearRegex { get; } - if (futureBegin > futureEnd) - { - futureBegin = pastBegin; - } + Regex ICJKDatePeriodParserConfiguration.WoMLastRegex => WoMLastRegex; - if (pastEnd < pastBegin) - { - pastEnd = futureEnd; - } + Regex ICJKDatePeriodParserConfiguration.WoMPreviousRegex => WoMPreviousRegex; - ret.Timex = $"({pr1.TimexStr},{pr2.TimexStr},P{(futureEnd - futureBegin).TotalDays}D)"; - ret.FutureValue = new Tuple(futureBegin, futureEnd); - ret.PastValue = new Tuple(pastBegin, pastEnd); - ret.Success = true; + Regex ICJKDatePeriodParserConfiguration.WoMNextRegex => WoMNextRegex; - return ret; - } + public int TwoNumYear => int.Parse(DateTimeDefinitions.TwoNumYear, CultureInfo.InvariantCulture); - // handle like "前两年" "前三个月" - private DateTimeResolutionResult ParseNumberWithUnit(string text, DateObject referenceDate) + public int ToMonthNumber(string monthStr) { - var ret = new DateTimeResolutionResult(); - - string numStr, unitStr; - - // if there are NO spaces between number and unit - var match = ChineseDatePeriodExtractorConfiguration.NumberCombinedWithUnit.Match(text); - - if (match.Success) - { - var srcUnit = match.Groups["unit"].Value; - var beforeStr = text.Substring(0, match.Index); - - if (this.config.UnitMap.ContainsKey(srcUnit)) - { - unitStr = this.config.UnitMap[srcUnit]; - numStr = match.Groups["num"].Value; - - if (ChineseDatePeriodExtractorConfiguration.PastRegex.IsExactMatch(beforeStr, trim: true)) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case Constants.TimexDay: - beginDate = referenceDate.AddDays(-double.Parse(numStr)); - endDate = referenceDate; - break; - case Constants.TimexWeek: - beginDate = referenceDate.AddDays(-7 * double.Parse(numStr)); - endDate = referenceDate; - break; - case Constants.TimexMonthFull: - beginDate = referenceDate.AddMonths(-Convert.ToInt32(double.Parse(numStr))); - endDate = referenceDate; - break; - case Constants.TimexYear: - beginDate = referenceDate.AddYears(-Convert.ToInt32(double.Parse(numStr))); - endDate = referenceDate; - break; - default: - return ret; - } - - ret.Timex = $"({DateTimeFormatUtil.LuisDate(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)},P{numStr}{unitStr[0]})"; - ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Success = true; - return ret; - } - - if (ChineseDatePeriodExtractorConfiguration.FutureRegex.IsExactMatch(beforeStr, trim: true)) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case Constants.TimexDay: - beginDate = referenceDate; - endDate = referenceDate.AddDays(double.Parse(numStr)); - break; - case Constants.TimexWeek: - beginDate = referenceDate; - endDate = referenceDate.AddDays(7 * double.Parse(numStr)); - break; - case Constants.TimexMonthFull: - beginDate = referenceDate; - endDate = referenceDate.AddMonths(Convert.ToInt32(double.Parse(numStr))); - break; - case Constants.TimexYear: - beginDate = referenceDate; - endDate = referenceDate.AddYears(Convert.ToInt32(double.Parse(numStr))); - break; - default: - return ret; - } - - ret.Timex = - $"({DateTimeFormatUtil.LuisDate(beginDate.AddDays(1))},{DateTimeFormatUtil.LuisDate(endDate.AddDays(1))},P{numStr}{unitStr[0]})"; - ret.FutureValue = - ret.PastValue = new Tuple(beginDate.AddDays(1), endDate.AddDays(1)); - ret.Success = true; - return ret; - } - } - } - - // for case "前两年" "后三年" - var durationRes = DurationExtractor.Extract(text, referenceDate); - if (durationRes.Count > 0) - { - var beforeStr = text.Substring(0, (int)durationRes[0].Start); - match = ChineseDatePeriodExtractorConfiguration.UnitRegex.Match(durationRes[0].Text); - if (match.Success) - { - var srcUnit = match.Groups["unit"].Value; - - var numberStr = durationRes[0].Text.Substring(0, match.Index).Trim(); - var number = ConvertChineseToNum(numberStr); - - if (this.config.UnitMap.ContainsKey(srcUnit)) - { - unitStr = this.config.UnitMap[srcUnit]; - - var prefixMatch = ChineseDatePeriodExtractorConfiguration.PastRegex.MatchExact(beforeStr, trim: true); - - if (prefixMatch.Success) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case Constants.TimexDay: - beginDate = referenceDate.AddDays(-number); - endDate = referenceDate; - break; - case Constants.TimexWeek: - beginDate = referenceDate.AddDays(-7 * number); - endDate = referenceDate; - break; - case Constants.TimexMonthFull: - beginDate = referenceDate.AddMonths(-number); - endDate = referenceDate; - break; - case Constants.TimexYear: - beginDate = referenceDate.AddYears(-number); - endDate = referenceDate; - break; - default: - return ret; - } - - ret.Timex = $"({DateTimeFormatUtil.LuisDate(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)},P{number}{unitStr[0]})"; - ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Success = true; - return ret; - } - - prefixMatch = ChineseDatePeriodExtractorConfiguration.FutureRegex.MatchExact(beforeStr, trim: true); - - if (prefixMatch.Success) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case Constants.TimexDay: - beginDate = referenceDate; - endDate = referenceDate.AddDays(number); - break; - case Constants.TimexWeek: - beginDate = referenceDate; - endDate = referenceDate.AddDays(7 * number); - break; - case Constants.TimexMonthFull: - beginDate = referenceDate; - endDate = referenceDate.AddMonths(number); - break; - case Constants.TimexYear: - beginDate = referenceDate; - endDate = referenceDate.AddYears(number); - break; - default: - return ret; - } - - ret.Timex = - $"({DateTimeFormatUtil.LuisDate(beginDate.AddDays(1))},{DateTimeFormatUtil.LuisDate(endDate.AddDays(1))},P{number}{unitStr[0]})"; - ret.FutureValue = - ret.PastValue = - new Tuple(beginDate.AddDays(1), endDate.AddDays(1)); - ret.Success = true; - return ret; - } - } - } - } - - return ret; + return MonthOfYear[monthStr] > 12 ? MonthOfYear[monthStr] % 12 : MonthOfYear[monthStr]; } - // case like "三月的第一周" - private DateTimeResolutionResult ParseWeekOfMonth(string text, DateObject referenceDate) + public bool IsMonthOnly(string text) { - var ret = new DateTimeResolutionResult(); var trimmedText = text.Trim(); - - var match = ChineseDatePeriodExtractorConfiguration.WeekOfMonthRegex.Match(text); - if (!match.Success) - { - return ret; - } - - var cardinalStr = match.Groups["cardinal"].Value; - var monthStr = match.Groups["month"].Value; - var noYear = false; - int year; - - int cardinal; - if (cardinalStr.Equals("最后一")) - { - cardinal = 5; - } - else - { - cardinal = this.config.CardinalMap[cardinalStr]; - } - - int month; - if (string.IsNullOrEmpty(monthStr)) - { - var swift = 0; - if (trimmedText.StartsWith("下个")) - { - swift = 1; - } - else if (trimmedText.StartsWith("上个")) - { - swift = -1; - } - - month = referenceDate.AddMonths(swift).Month; - year = referenceDate.AddMonths(swift).Year; - ret.Timex = DateTimeFormatUtil.LuisDate(referenceDate.Year, month); - } - else - { - month = ToMonthNumber(monthStr); - ret.Timex = DateTimeFormatUtil.LuisDate(Constants.InvalidYear, month); - year = referenceDate.Year; - noYear = true; - } - - var value = ComputeDate(cardinal, 1, month, year); - - var futureDate = value; - var pastDate = value; - if (noYear && futureDate < referenceDate) - { - futureDate = ComputeDate(cardinal, 1, month, year + 1); - if (futureDate.Month != month) - { - futureDate = futureDate.AddDays(-7); - } - } - - if (noYear && pastDate >= referenceDate) - { - pastDate = ComputeDate(cardinal, 1, month, year - 1); - if (pastDate.Month != month) - { - pastDate = pastDate.AddDays(-7); - } - } - - ret.Timex += "-W" + cardinal.ToString("D2"); - ret.FutureValue = new Tuple(futureDate, futureDate.AddDays(7)); - ret.PastValue = new Tuple(pastDate, pastDate.AddDays(7)); - ret.Success = true; - - return ret; - } - - // parse "今年夏天" - private DateTimeResolutionResult ParseSeason(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - var match = ChineseDatePeriodExtractorConfiguration.SeasonWithYear.MatchExact(text, trim: true); - - if (match.Success) - { - // parse year - var year = referenceDate.Year; - var hasYear = false; - var yearNum = match.Groups["year"].Value; - var yearChs = match.Groups["yearchs"].Value; - var yearRel = match.Groups["yearrel"].Value; - - if (!string.IsNullOrEmpty(yearNum)) - { - hasYear = true; - if (IsYearOnly(yearNum)) - { - yearNum = yearNum.Substring(0, yearNum.Length - 1); - } - - year = int.Parse(yearNum); - } - else if (!string.IsNullOrEmpty(yearChs)) - { - hasYear = true; - if (IsYearOnly(yearChs)) - { - yearChs = yearChs.Substring(0, yearChs.Length - 1); - } - - year = ConvertChineseToInteger(yearChs); - } - else if (!string.IsNullOrEmpty(yearRel)) - { - hasYear = true; - if (IsLastYear(yearRel)) - { - year--; - } - else if (IsNextYear(yearRel)) - { - year++; - } - } - - if (year < 100 && year >= this.config.TwoNumYear) - { - year += 1900; - } - else if (year < 100 && year < this.config.TwoNumYear) - { - year += 2000; - } - - // parse season - var seasonStr = match.Groups["season"].Value; - ret.Timex = this.config.SeasonMap[seasonStr]; - if (hasYear) - { - ret.Timex = year.ToString("D4") + "-" + ret.Timex; - } - - ret.Success = true; - return ret; - } - - return ret; + return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); } - private DateTimeResolutionResult ParseQuarter(string text, DateObject referenceDate) + public bool IsWeekend(string text) { - var ret = new DateTimeResolutionResult(); - var match = ChineseDatePeriodExtractorConfiguration.QuarterRegex.MatchExact(text, trim: true); - - if (!match.Success) - { - return ret; - } - - // parse year - var year = referenceDate.Year; - var yearNum = match.Groups["year"].Value; - var yearChs = match.Groups["yearchs"].Value; - var yearRel = match.Groups["yearrel"].Value; - if (!string.IsNullOrEmpty(yearNum)) - { - if (IsYearOnly(yearNum)) - { - yearNum = yearNum.Substring(0, yearNum.Length - 1); - } - - year = int.Parse(yearNum); - } - else if (!string.IsNullOrEmpty(yearChs)) - { - if (IsYearOnly(yearChs)) - { - yearChs = yearChs.Substring(0, yearChs.Length - 1); - } - - year = ConvertChineseToInteger(yearChs); - } - else if (!string.IsNullOrEmpty(yearRel)) - { - if (IsLastYear(yearRel)) - { - year--; - } - else if (IsNextYear(yearRel)) - { - year++; - } - } - - if (year < 100 && year >= this.config.TwoNumYear) - { - year += 1900; - } - else if (year < 100 && year < this.config.TwoNumYear) - { - year += 2000; - } - - // parse quarterNum - var cardinalStr = match.Groups["cardinal"].Value; - var quarterNum = this.config.CardinalMap[cardinalStr]; - - var beginDate = DateObject.MinValue.SafeCreateFromValue(year, (quarterNum * 3) - 2, 1); - var endDate = DateObject.MinValue.SafeCreateFromValue(year, (quarterNum * 3) + 1, 1); - ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDate, endDate, DatePeriodTimexType.ByMonth); - ret.Success = true; - - return ret; - } - - private DateTimeResolutionResult ParseDecade(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - int century = (referenceDate.Year / 100) + 1; - int decade; - int decadeLastYear = 10; - var inputCentury = false; - - var match = ChineseDatePeriodExtractorConfiguration.DecadeRegex.MatchExact(text, trim: true); - - string beginLuisStr, endLuisStr; - - if (match.Success) - { - var decadeStr = match.Groups["decade"].Value; - if (!int.TryParse(decadeStr, out decade)) - { - decade = ConvertChineseToNum(decadeStr); - } - - var centuryStr = match.Groups["century"].Value; - if (!string.IsNullOrEmpty(centuryStr)) - { - if (!int.TryParse(centuryStr, out century)) - { - century = ConvertChineseToNum(centuryStr); - } - - inputCentury = true; - } - else - { - centuryStr = match.Groups["relcentury"].Value; - - if (!string.IsNullOrEmpty(centuryStr)) - { - centuryStr = centuryStr.Trim(); - var thisMatch = ChineseDatePeriodExtractorConfiguration.ThisRegex.Match(centuryStr); - var nextMatch = ChineseDatePeriodExtractorConfiguration.NextRegex.Match(centuryStr); - var lastMatch = ChineseDatePeriodExtractorConfiguration.LastRegex.Match(centuryStr); - - if (thisMatch.Success) - { - // do nothing - } - else if (nextMatch.Success) - { - century++; - } - else - { - century--; - } - - inputCentury = true; - } - } - } - else - { - return ret; - } - - var beginYear = ((century - 1) * 100) + decade; - var endYear = beginYear + decadeLastYear; - - if (inputCentury) - { - beginLuisStr = DateTimeFormatUtil.LuisDate(beginYear, 1, 1); - endLuisStr = DateTimeFormatUtil.LuisDate(endYear, 1, 1); - } - else - { - var beginYearStr = "XX" + decade; - beginLuisStr = DateTimeFormatUtil.LuisDate(-1, 1, 1); - beginLuisStr = beginLuisStr.Replace("XXXX", beginYearStr); - - var endYearStr = "XX" + (endYear % 100).ToString("D2"); - endLuisStr = DateTimeFormatUtil.LuisDate(-1, 1, 1); - endLuisStr = endLuisStr.Replace("XXXX", endYearStr); - } - - ret.Timex = $"({beginLuisStr},{endLuisStr},P10Y)"; - - int futureYear = beginYear, pastYear = beginYear; - var startDate = DateObject.MinValue.SafeCreateFromValue(beginYear, 1, 1); - if (!inputCentury && startDate < referenceDate) - { - futureYear += 100; - } - - if (!inputCentury && startDate >= referenceDate) - { - pastYear -= 100; - } - - ret.FutureValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(futureYear, 1, 1), - DateObject.MinValue.SafeCreateFromValue(futureYear + decadeLastYear, 1, 1)); - - ret.PastValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(pastYear, 1, 1), - DateObject.MinValue.SafeCreateFromValue(pastYear + decadeLastYear, 1, 1)); - - ret.Success = true; - - return ret; + var trimmedText = text.Trim(); + return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); } - private int ToMonthNumber(string monthStr) + public bool IsWeekOnly(string text) { - return this.config.MonthOfYear[monthStr] > 12 ? this.config.MonthOfYear[monthStr] % 12 : this.config.MonthOfYear[monthStr]; + var trimmedText = text.Trim(); + return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); } - private bool IsMonthOnly(string text) + public bool IsYearOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); } - private bool IsWeekend(string text) + public bool IsThisYear(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.ThisYearTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } - private bool IsWeekOnly(string text) + public bool IsYearToDate(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } - private bool IsYearOnly(string text) + public bool IsLastYear(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.LastYearTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } - private bool IsThisYear(string text) + public bool IsNextYear(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.ThisYearTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.NextYearTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } - private bool IsLastYear(string text) + public bool IsYearAfterNext(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.LastYearTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.YearAfterNextTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } - private bool IsNextYear(string text) + public bool IsYearBeforeLast(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.NextYearTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.YearBeforeLastTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } - private bool IsYearAfterNext(string text) + public int GetSwiftMonth(string text) { - var trimmedText = text.Trim(); - return DateTimeDefinitions.YearAfterNextTerms.Any(o => trimmedText.Equals(o)); + // Current month: 今月 + var value = 0; + + // @TODO move hardcoded values to resources file + + if (text.Equals("来月", StringComparison.Ordinal)) + { + value = 1; + } + else if (text.Equals("前月", StringComparison.Ordinal) || + text.Equals("先月", StringComparison.Ordinal) || + text.Equals("昨月", StringComparison.Ordinal) || + text.Equals("先々月", StringComparison.Ordinal)) + { + value = -1; + } + else if (text.Equals("再来月", StringComparison.Ordinal)) + { + value = 2; + } + + return value; } - private bool IsYearBeforeLast(string text) + public int GetSwiftYear(string text) { - var trimmedText = text.Trim(); - return DateTimeDefinitions.YearBeforeLastTerms.Any(o => trimmedText.Equals(o)); + // Current year: 今年 + var value = 0; + + // @TODO move hardcoded values to resources file + + if (text.Equals("来年", StringComparison.Ordinal) || + text.Equals("らいねん", StringComparison.Ordinal)) + { + value = 1; + } + else if (text.Equals("昨年", StringComparison.Ordinal) || + text.Equals("前年", StringComparison.Ordinal)) + { + value = -1; + } + + return value; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateTimeParserConfiguration.cs index 1cc5b76dc6..b60c6f9e05 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateTimeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -6,155 +9,143 @@ namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseDateTimeParserConfiguration : BaseDateTimeOptionsConfiguration, IFullDateTimeParserConfiguration + public class ChineseDateTimeParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKDateTimeParserConfiguration { - public ChineseDateTimeParserConfiguration(IDateTimeOptionsConfiguration config) - : base(config) - { - DateExtractor = new ChineseDateExtractorConfiguration(); - - DateParser = new ChineseDateParserConfiguration(this); - TimeParser = new ChineseTimeParserConfiguration(this); - DateTimeParser = new ChineseDateTimeParser(this); - DatePeriodParser = new ChineseDatePeriodParserConfiguration(this); - TimePeriodParser = new ChineseTimePeriodParserConfiguration(this); - DateTimePeriodParser = new ChineseDateTimePeriodParserConfiguration(this); - DurationParser = new ChineseDurationParserConfiguration(this); - GetParser = new ChineseSetParserConfiguration(this); - HolidayParser = new ChineseHolidayParserConfiguration(this); - - UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToImmutableDictionary(); - UnitValueMap = DateTimeDefinitions.ParserConfigurationUnitValueMap.ToImmutableDictionary(); - SeasonMap = DateTimeDefinitions.ParserConfigurationSeasonMap.ToImmutableDictionary(); - SeasonValueMap = DateTimeDefinitions.ParserConfigurationSeasonValueMap.ToImmutableDictionary(); - CardinalMap = DateTimeDefinitions.ParserConfigurationCardinalMap.ToImmutableDictionary(); - DayOfMonth = DateTimeDefinitions.ParserConfigurationDayOfMonth.ToImmutableDictionary(); - DayOfWeek = DateTimeDefinitions.ParserConfigurationDayOfWeek.ToImmutableDictionary(); - MonthOfYear = DateTimeDefinitions.ParserConfigurationMonthOfYear.ToImmutableDictionary(); - Numbers = InitNumbers(); - - DateRegexList = ChineseDateExtractorConfiguration.DateRegexList; - - NextRegex = ChineseDateExtractorConfiguration.NextRegex; - ThisRegex = ChineseDateExtractorConfiguration.ThisRegex; - LastRegex = ChineseDateExtractorConfiguration.LastRegex; - YearRegex = ChineseDateExtractorConfiguration.YearRegex; - RelativeRegex = ChineseDateExtractorConfiguration.RelativeRegex; - StrictWeekDayRegex = ChineseDateExtractorConfiguration.WeekDayRegex; - WeekDayOfMonthRegex = ChineseDateExtractorConfiguration.WeekDayOfMonthRegex; - BeforeRegex = ChineseMergedExtractorConfiguration.BeforeRegex; - AfterRegex = ChineseMergedExtractorConfiguration.AfterRegex; - UntilRegex = ChineseMergedExtractorConfiguration.UntilRegex; - SincePrefixRegex = ChineseMergedExtractorConfiguration.SincePrefixRegex; - SinceSuffixRegex = ChineseMergedExtractorConfiguration.SinceSuffixRegex; - EqualRegex = ChineseMergedExtractorConfiguration.EqualRegex; - } - - public int TwoNumYear => int.Parse(DateTimeDefinitions.TwoNumYear); + public static readonly Regex LunarRegex = new Regex(DateTimeDefinitions.LunarRegex, RegexFlags, RegexTimeOut); - public string LastWeekDayToken => DateTimeDefinitions.ParserConfigurationLastWeekDayToken; + public static readonly Regex LunarHolidayRegex = new Regex(DateTimeDefinitions.LunarHolidayRegex, RegexFlags, RegexTimeOut); - public string NextMonthToken => DateTimeDefinitions.ParserConfigurationNextMonthToken; + public static readonly Regex SimpleAmRegex = new Regex(DateTimeDefinitions.DateTimeSimpleAmRegex, RegexFlags, RegexTimeOut); - public string LastMonthToken => DateTimeDefinitions.ParserConfigurationLastMonthToken; + public static readonly Regex SimplePmRegex = new Regex(DateTimeDefinitions.DateTimeSimplePmRegex, RegexFlags, RegexTimeOut); - public string DatePrefix => DateTimeDefinitions.ParserConfigurationDatePrefix; + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public IDateExtractor DateExtractor { get; } + public ChineseDateTimeParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + IntegerExtractor = config.IntegerExtractor; + DateExtractor = config.DateExtractor; + TimeExtractor = config.TimeExtractor; + DurationExtractor = config.DurationExtractor; - public IDateTimeParser DateParser { get; } + DateParser = config.DateParser; + TimeParser = config.TimeParser; + NumberParser = config.NumberParser; - public IDateTimeParser TimeParser { get; } + UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToImmutableDictionary(); + NowRegex = ChineseDateTimeExtractorConfiguration.NowRegex; + TimeOfSpecialDayRegex = ChineseDateTimeExtractorConfiguration.TimeOfSpecialDayRegex; + DateTimePeriodUnitRegex = ChineseDateTimeExtractorConfiguration.DateTimePeriodUnitRegex; + BeforeRegex = ChineseDateTimeExtractorConfiguration.BeforeRegex; + AfterRegex = ChineseDateTimeExtractorConfiguration.AfterRegex; + DurationRelativeDurationUnitRegex = ChineseDateTimeExtractorConfiguration.DurationRelativeDurationUnitRegex; + AgoLaterRegex = ChineseDateTimeExtractorConfiguration.AgoLaterRegex; + } - public IDateTimeParser DateTimeParser { get; } + public IDateTimeExtractor DateExtractor { get; } - public IDateTimeParser DatePeriodParser { get; } + public IDateTimeExtractor TimeExtractor { get; } - public IDateTimeParser TimePeriodParser { get; } + public IDateTimeExtractor DurationExtractor { get; } - public IDateTimeParser DateTimePeriodParser { get; } + public IDateTimeParser DateParser { get; } public IDateTimeParser DurationParser { get; } - public IDateTimeParser GetParser { get; } - - public IDateTimeParser HolidayParser { get; } - - public ImmutableDictionary UnitMap { get; } - - public ImmutableDictionary UnitValueMap { get; } - - public ImmutableDictionary SeasonMap { get; } - - public ImmutableDictionary SeasonValueMap { get; } - - public ImmutableDictionary CardinalMap { get; } - - public ImmutableDictionary DayOfMonth { get; } + public IDateTimeParser TimeParser { get; } - public ImmutableDictionary DayOfWeek { get; } + public IExtractor IntegerExtractor { get; } - public ImmutableDictionary MonthOfYear { get; } + public IParser NumberParser { get; } - public ImmutableDictionary Numbers { get; } + public ImmutableDictionary UnitMap { get; } - public IEnumerable DateRegexList { get; } + public Regex NowRegex { get; } - public Regex NextRegex { get; } + public Regex TimeOfSpecialDayRegex { get; } - public Regex ThisRegex { get; } + public Regex DateTimePeriodUnitRegex { get; } - public Regex LastRegex { get; } + public Regex BeforeRegex { get; } - public Regex YearRegex { get; } + public Regex AfterRegex { get; } - public Regex RelativeRegex { get; } + public Regex DurationRelativeDurationUnitRegex { get; } - public Regex StrictWeekDayRegex { get; } + public Regex AgoLaterRegex { get; } - public Regex WeekDayOfMonthRegex { get; } + Regex ICJKDateTimeParserConfiguration.LunarRegex => LunarRegex; - public Regex BeforeRegex { get; } + Regex ICJKDateTimeParserConfiguration.LunarHolidayRegex => LunarHolidayRegex; - public Regex AfterRegex { get; } + Regex ICJKDateTimeParserConfiguration.SimpleAmRegex => SimpleAmRegex; - public Regex UntilRegex { get; } + Regex ICJKDateTimeParserConfiguration.SimplePmRegex => SimplePmRegex; - public Regex SincePrefixRegex { get; } + public bool GetMatchedNowTimex(string text, out string timex) + { + var trimmedText = text.Trim(); - public Regex SinceSuffixRegex { get; } + // @TODO move hardcoded values to resources file + if (trimmedText.EndsWith("现在", StringComparison.Ordinal)) + { + timex = "PRESENT_REF"; + } + else if (trimmedText.Equals("刚刚才", StringComparison.Ordinal) || + trimmedText.Equals("刚刚", StringComparison.Ordinal) || + trimmedText.Equals("刚才", StringComparison.Ordinal)) + { + timex = "PAST_REF"; + } + else if (trimmedText.Equals("立刻", StringComparison.Ordinal) || + trimmedText.Equals("马上", StringComparison.Ordinal)) + { + timex = "FUTURE_REF"; + } + else + { + timex = null; + return false; + } - public Regex EqualRegex { get; } + return true; + } - public static int GetSwiftDay(string text) + public int GetSwiftDay(string text) { var value = 0; - if (text.Equals("今天") || text.Equals("今日") || text.Equals("最近")) + // @TODO move hardcoded values to resources file + if (text.Equals("今天", StringComparison.Ordinal) || + text.Equals("今日", StringComparison.Ordinal) || + text.Equals("最近", StringComparison.Ordinal)) { value = 0; } - else if (text.StartsWith("明")) + else if (text.StartsWith("明", StringComparison.Ordinal)) { value = 1; } - else if (text.StartsWith("昨")) + else if (text.StartsWith("昨", StringComparison.Ordinal)) { value = -1; } - else if (text.Equals("大后天") || text.Equals("大後天")) + else if (text.Equals("大后天", StringComparison.Ordinal) || + text.Equals("大後天", StringComparison.Ordinal)) { value = 3; } - else if (text.Equals("大前天")) + else if (text.Equals("大前天", StringComparison.Ordinal)) { value = -3; } - else if (text.Equals("后天") || text.Equals("後天")) + else if (text.Equals("后天", StringComparison.Ordinal) || + text.Equals("後天", StringComparison.Ordinal)) { value = 2; } - else if (text.Equals("前天")) + else if (text.Equals("前天", StringComparison.Ordinal)) { value = -2; } @@ -162,11 +153,54 @@ public static int GetSwiftDay(string text) return value; } - private static ImmutableDictionary InitNumbers() + public void AdjustByTimeOfDay(string matchStr, ref int hour, ref int swift) { - return new Dictionary + // @TODO move hardcoded values to resources file + switch (matchStr) { - }.ToImmutableDictionary(); + case "今晚": + if (hour < Constants.HalfDayHourCount) + { + hour += Constants.HalfDayHourCount; + } + + break; + case "今早": + case "今晨": + if (hour >= Constants.HalfDayHourCount) + { + hour -= Constants.HalfDayHourCount; + } + + break; + case "明晚": + swift = 1; + if (hour < Constants.HalfDayHourCount) + { + hour += Constants.HalfDayHourCount; + } + + break; + case "明早": + case "明晨": + swift = 1; + if (hour >= Constants.HalfDayHourCount) + { + hour -= Constants.HalfDayHourCount; + } + + break; + case "昨晚": + swift = -1; + if (hour < Constants.HalfDayHourCount) + { + hour += Constants.HalfDayHourCount; + } + + break; + default: + break; + } } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateTimePeriodParserConfiguration.cs index 0725345bce..53c44b6744 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateTimePeriodParserConfiguration.cs @@ -1,5 +1,10 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Collections.Immutable; +using System.Globalization; using System.Text; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; @@ -11,679 +16,186 @@ namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseDateTimePeriodParserConfiguration : IDateTimeParser + public class ChineseDateTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKDateTimePeriodParserConfiguration { - public static readonly string ParserName = Constants.SYS_DATETIME_DATETIMEPERIOD; - public static readonly Regex MORegex = new Regex(DateTimeDefinitions.DateTimePeriodMORegex, RegexFlags); + public static readonly Regex MORegex = new Regex(DateTimeDefinitions.DateTimePeriodMORegex, RegexFlags, RegexTimeOut); - public static readonly Regex MIRegex = new Regex(DateTimeDefinitions.DateTimePeriodMIRegex, RegexFlags); + public static readonly Regex MIRegex = new Regex(DateTimeDefinitions.DateTimePeriodMIRegex, RegexFlags, RegexTimeOut); - public static readonly Regex AFRegex = new Regex(DateTimeDefinitions.DateTimePeriodAFRegex, RegexFlags); + public static readonly Regex AFRegex = new Regex(DateTimeDefinitions.DateTimePeriodAFRegex, RegexFlags, RegexTimeOut); - public static readonly Regex EVRegex = new Regex(DateTimeDefinitions.DateTimePeriodEVRegex, RegexFlags); + public static readonly Regex EVRegex = new Regex(DateTimeDefinitions.DateTimePeriodEVRegex, RegexFlags, RegexTimeOut); - public static readonly Regex NIRegex = new Regex(DateTimeDefinitions.DateTimePeriodNIRegex, RegexFlags); + public static readonly Regex NIRegex = new Regex(DateTimeDefinitions.DateTimePeriodNIRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly IDateTimeExtractor SingleDateExtractor = new ChineseDateExtractorConfiguration(); - - private static readonly IDateTimeExtractor SingleTimeExtractor = new ChineseTimeExtractorConfiguration(); - - private static readonly IDateTimeExtractor TimeWithDateExtractor = new ChineseDateTimeExtractorConfiguration(); - - private static readonly IDateTimeExtractor TimePeriodExtractor = new ChineseTimePeriodExtractorChsConfiguration(); - - private static readonly IExtractor CardinalExtractor = new CardinalExtractor(); - - private static readonly IParser CardinalParser = AgnosticNumberParserFactory.GetParser( - AgnosticNumberParserType.Cardinal, new ChineseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Chinese))); - - private readonly IFullDateTimeParserConfiguration config; - - public ChineseDateTimePeriodParserConfiguration(IFullDateTimeParserConfiguration configuration) - { - config = configuration; - } - - public static string BuildTimex(TimeResult timeResult) + public ChineseDateTimePeriodParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) { - var build = new StringBuilder("T"); - if (timeResult.Hour >= 0) - { - build.Append(timeResult.Hour.ToString("D2")); - } - - if (timeResult.Minute >= 0) - { - build.Append(":" + timeResult.Minute.ToString("D2")); - } - if (timeResult.Second >= 0) - { - build.Append(":" + timeResult.Second.ToString("D2")); - } - - return build.ToString(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = config.CardinalExtractor; + CardinalParser = AgnosticNumberParserFactory.GetParser( + AgnosticNumberParserType.Cardinal, new ChineseNumberParserConfiguration(numConfig)); + + DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; + DateTimeExtractor = config.DateTimeExtractor; + TimeExtractor = config.TimeExtractor; + TimePeriodExtractor = config.TimePeriodExtractor; + DateParser = config.DateParser; + TimeParser = config.TimeParser; + DateTimeParser = config.DateTimeParser; + TimePeriodParser = config.TimePeriodParser; + DurationParser = config.DurationParser; + + SpecificTimeOfDayRegex = ChineseDateTimePeriodExtractorConfiguration.SpecificTimeOfDayRegex; + TimeOfDayRegex = ChineseDateTimePeriodExtractorConfiguration.TimeOfDayRegex; + NextRegex = ChineseDateTimePeriodExtractorConfiguration.NextRegex; + LastRegex = ChineseDateTimePeriodExtractorConfiguration.LastRegex; + PastRegex = ChineseDateTimePeriodExtractorConfiguration.PastRegex; + FutureRegex = ChineseDateTimePeriodExtractorConfiguration.FutureRegex; + WeekDayRegex = ChineseDateTimePeriodExtractorConfiguration.WeekDayRegex; + TimePeriodLeftRegex = ChineseDateTimePeriodExtractorConfiguration.TimePeriodLeftRegex; + UnitRegex = ChineseDateTimePeriodExtractorConfiguration.UnitRegex; + RestOfDateRegex = ChineseDateTimePeriodExtractorConfiguration.RestOfDateRegex; + AmPmDescRegex = ChineseDateTimePeriodExtractorConfiguration.AmPmDescRegex; + UnitMap = config.UnitMap; } - public static TimeResult DateObject2TimeResult(DateObject dateTime) - { - var timeResult = new TimeResult - { - Hour = dateTime.Hour, - Minute = dateTime.Minute, - Second = dateTime.Second, - }; + public IDateTimeExtractor DateExtractor { get; } - return timeResult; - } + public IDateTimeExtractor TimeExtractor { get; } - public ParseResult Parse(ExtractResult extResult) - { - return this.Parse(extResult, DateObject.Now); - } + public IDateTimeExtractor DateTimeExtractor { get; } - public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) - { - var referenceTime = refDate; + public IDateTimeExtractor TimePeriodExtractor { get; } - object value = null; - if (er.Type.Equals(ParserName)) - { - var innerResult = MergeDateAndTimePeriod(er.Text, referenceTime); - if (!innerResult.Success) - { - innerResult = MergeTwoTimePoints(er.Text, referenceTime); - } - - if (!innerResult.Success) - { - innerResult = ParseSpecificNight(er.Text, referenceTime); - } - - if (!innerResult.Success) - { - innerResult = ParseNumberWithUnit(er.Text, referenceTime); - } - - if (innerResult.Success) - { - innerResult.FutureResolution = new Dictionary - { - { - TimeTypeConstants.START_DATETIME, - DateTimeFormatUtil.FormatDateTime(((Tuple)innerResult.FutureValue).Item1) - }, - { - TimeTypeConstants.END_DATETIME, - DateTimeFormatUtil.FormatDateTime(((Tuple)innerResult.FutureValue).Item2) - }, - }; - - innerResult.PastResolution = new Dictionary - { - { - TimeTypeConstants.START_DATETIME, - DateTimeFormatUtil.FormatDateTime(((Tuple)innerResult.PastValue).Item1) - }, - { - TimeTypeConstants.END_DATETIME, - DateTimeFormatUtil.FormatDateTime(((Tuple)innerResult.PastValue).Item2) - }, - }; - - value = innerResult; - } - } + public IExtractor CardinalExtractor { get; } - var ret = new DateTimeParseResult - { - Text = er.Text, - Start = er.Start, - Length = er.Length, - Type = er.Type, - Data = er.Data, - Value = value, - TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, - ResolutionStr = string.Empty, - }; - - return ret; - } + public IDateTimeExtractor DurationExtractor { get; } - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } + public IParser CardinalParser { get; } - private DateTimeResolutionResult MergeDateAndTimePeriod(string text, DateObject referenceTime) - { - var ret = new DateTimeResolutionResult(); + public IDateTimeParser DateParser { get; } - var er1 = SingleDateExtractor.Extract(text, referenceTime); - var er2 = TimePeriodExtractor.Extract(text, referenceTime); - if (er1.Count != 1 || er2.Count != 1) - { - return ret; - } + public IDateTimeParser TimeParser { get; } - var pr1 = this.config.DateParser.Parse(er1[0], referenceTime); - var pr2 = this.config.TimePeriodParser.Parse(er2[0], referenceTime); - var timeRange = (Tuple)((DateTimeResolutionResult)pr2.Value).FutureValue; - var beginTime = timeRange.Item1; - var endTime = timeRange.Item2; - var futureDate = (DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue; - var pastDate = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue; - - ret.FutureValue = - new Tuple( - DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, beginTime.Hour, beginTime.Minute, beginTime.Second), - DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, endTime.Hour, endTime.Minute, endTime.Second)); - - ret.PastValue = - new Tuple( - DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, beginTime.Hour, beginTime.Minute, beginTime.Second), - DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, endTime.Hour, endTime.Minute, endTime.Second)); - - var split = pr2.TimexStr.Split('T'); - if (split.Length != 4) - { - return ret; - } + public IDateTimeParser DateTimeParser { get; } - var dateStr = pr1.TimexStr; + public IDateTimeParser TimePeriodParser { get; } - ret.Timex = split[0] + dateStr + "T" + split[1] + dateStr + "T" + split[2] + "T" + split[3]; + public IDateTimeParser DurationParser { get; } - ret.Success = true; - return ret; - } + public Regex SpecificTimeOfDayRegex { get; } - private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject referenceTime) - { - var ret = new DateTimeResolutionResult(); - DateTimeParseResult pr1 = null, pr2 = null; - bool bothHaveDates = false, beginHasDate = false, endHasDate = false; + public Regex TimeOfDayRegex { get; } - var er1 = SingleTimeExtractor.Extract(text, referenceTime); - var er2 = TimeWithDateExtractor.Extract(text, referenceTime); + public Regex NextRegex { get; } - var rightTime = DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day); - var leftTime = DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day); + public Regex LastRegex { get; } - if (er2.Count == 2) - { - pr1 = this.config.DateTimeParser.Parse(er2[0], referenceTime); - pr2 = this.config.DateTimeParser.Parse(er2[1], referenceTime); - bothHaveDates = true; - } - else if (er2.Count == 1 && er1.Count == 2) - { - if (!er2[0].IsOverlap(er1[0])) - { - pr1 = this.config.TimeParser.Parse(er1[0], referenceTime); - pr2 = this.config.DateTimeParser.Parse(er2[0], referenceTime); - endHasDate = true; - } - else - { - pr1 = this.config.DateTimeParser.Parse(er2[0], referenceTime); - pr2 = this.config.TimeParser.Parse(er1[1], referenceTime); - beginHasDate = true; - } - } - else if (er2.Count == 1 && er1.Count == 1) - { - if (er1[0].Start < er2[0].Start) - { - pr1 = this.config.TimeParser.Parse(er1[0], referenceTime); - pr2 = this.config.DateTimeParser.Parse(er2[0], referenceTime); - endHasDate = true; - } - else - { - pr1 = this.config.DateTimeParser.Parse(er2[0], referenceTime); - pr2 = this.config.TimeParser.Parse(er1[0], referenceTime); - beginHasDate = true; - } - } - else if (er1.Count == 2) - { - // if both ends are Time. then this is a TimePeriod, not a DateTimePeriod - return ret; - } - else - { - return ret; - } + public Regex PastRegex { get; } - if (pr1.Value == null || pr2.Value == null) - { - return ret; - } + public Regex FutureRegex { get; } - DateObject futureBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue, - futureEnd = (DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue; + public Regex WeekDayRegex { get; } - DateObject pastBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue, - pastEnd = (DateObject)((DateTimeResolutionResult)pr2.Value).PastValue; + public Regex TimePeriodLeftRegex { get; } - if (futureBegin > futureEnd) - { - futureBegin = pastBegin; - } + public Regex UnitRegex { get; } - if (pastEnd < pastBegin) - { - pastEnd = futureEnd; - } + public Regex RestOfDateRegex { get; } - if (bothHaveDates) - { - rightTime = DateObject.MinValue.SafeCreateFromValue(futureEnd.Year, futureEnd.Month, futureEnd.Day); - leftTime = DateObject.MinValue.SafeCreateFromValue(futureBegin.Year, futureBegin.Month, futureBegin.Day); - } - else if (beginHasDate) - { - // TODO: Handle "明天下午两点到五点" - futureEnd = DateObject.MinValue.SafeCreateFromValue( - futureBegin.Year, futureBegin.Month, futureBegin.Day, futureEnd.Hour, futureEnd.Minute, futureEnd.Second); - pastEnd = DateObject.MinValue.SafeCreateFromValue( - pastBegin.Year, pastBegin.Month, pastBegin.Day, pastEnd.Hour, pastEnd.Minute, pastEnd.Second); - - leftTime = DateObject.MinValue.SafeCreateFromValue(futureBegin.Year, futureBegin.Month, futureBegin.Day); - } - else if (endHasDate) - { - // TODO: Handle "明天下午两点到五点" - futureBegin = DateObject.MinValue.SafeCreateFromValue( - futureEnd.Year, futureEnd.Month, futureEnd.Day, futureBegin.Hour, futureBegin.Minute, futureBegin.Second); - pastBegin = DateObject.MinValue.SafeCreateFromValue( - pastEnd.Year, pastEnd.Month, pastEnd.Day, pastBegin.Hour, pastBegin.Minute, pastBegin.Second); - - rightTime = DateObject.MinValue.SafeCreateFromValue(futureEnd.Year, futureEnd.Month, futureEnd.Day); - } - - var leftResult = (DateTimeResolutionResult)pr1.Value; - var rightResult = (DateTimeResolutionResult)pr2.Value; - var leftResultTime = (DateObject)leftResult.FutureValue; - var rightResultTime = (DateObject)rightResult.FutureValue; - - int day = referenceTime.Day, - month = referenceTime.Month, - year = referenceTime.Year; - - // check if the right time is smaller than the left time, if yes, add one day - int hour = leftResultTime.Hour > 0 ? leftResultTime.Hour : 0, - min = leftResultTime.Minute > 0 ? leftResultTime.Minute : 0, - second = leftResultTime.Second > 0 ? leftResultTime.Second : 0; - - leftTime = leftTime.AddHours(hour); - leftTime = leftTime.AddMinutes(min); - leftTime = leftTime.AddSeconds(second); - DateObject.MinValue.SafeCreateFromValue(year, month, day, hour, min, second); - - hour = rightResultTime.Hour > 0 ? rightResultTime.Hour : 0; - min = rightResultTime.Minute > 0 ? rightResultTime.Minute : 0; - second = rightResultTime.Second > 0 ? rightResultTime.Second : 0; - - rightTime = rightTime.AddHours(hour); - rightTime = rightTime.AddMinutes(min); - rightTime = rightTime.AddSeconds(second); - - // the right side time contains "ampm", while the left side doesn't - if (rightResult.Comment != null && rightResult.Comment.Equals(Constants.Comment_AmPm, StringComparison.Ordinal) && - leftResult.Comment == null && rightTime < leftTime) - { - rightTime = rightTime.AddHours(Constants.HalfDayHourCount); - } - - if (rightTime < leftTime) - { - rightTime = rightTime.AddDays(1); - } - - ret.FutureValue = ret.PastValue = new Tuple(leftTime, rightTime); - - var leftTimex = string.Empty; - var rightTimex = string.Empty; - - // "X" is timex token for not determined time - if (!pr1.TimexStr.Contains("X") && !pr2.TimexStr.Contains("X")) - { - leftTimex = DateTimeFormatUtil.LuisDateTime(leftTime); - rightTimex = DateTimeFormatUtil.LuisDateTime(rightTime); - } - else - { - leftTimex = pr1.TimexStr; - rightTimex = pr2.TimexStr; - } - - ret.Timex = $"({leftTimex},{rightTimex},PT{Convert.ToInt32((rightTime - leftTime).TotalHours)}H)"; + public Regex AmPmDescRegex { get; } - ret.Success = true; - return ret; - } + public IImmutableDictionary UnitMap { get; } - // parse "this night" - private DateTimeResolutionResult ParseSpecificNight(string text, DateObject referenceTime) + public bool GetMatchedTimeRangeAndSwift(string text, out string todSymbol, out int beginHour, out int endHour, out int endMinute, out int swift) { - var ret = new DateTimeResolutionResult(); var trimmedText = text.Trim(); - int beginHour, endHour, endMin = 0; - string timeStr; - // handle 昨晚,今晨 - if (ChineseDateTimePeriodExtractorConfiguration.SpecificTimeOfDayRegex.IsExactMatch(trimmedText, trim: true)) - { - var swift = 0; - switch (trimmedText) - { - case "今晚": - swift = 0; - timeStr = "TEV"; - beginHour = 16; - endHour = 20; - break; - case "今早": - case "今晨": - swift = 0; - timeStr = "TMO"; - beginHour = 8; - endHour = Constants.HalfDayHourCount; - break; - case "明晚": - swift = 1; - timeStr = "TEV"; - beginHour = 16; - endHour = 20; - break; - case "明早": - case "明晨": - swift = 1; - timeStr = "TMO"; - beginHour = 8; - endHour = Constants.HalfDayHourCount; - break; - case "昨晚": - swift = -1; - timeStr = "TEV"; - beginHour = 16; - endHour = 20; - break; - default: - return ret; - } - - var date = referenceTime.AddDays(swift).Date; - int day = date.Day, month = date.Month, year = date.Year; - - ret.Timex = DateTimeFormatUtil.FormatDate(date) + timeStr; - ret.FutureValue = - ret.PastValue = - new Tuple( - DateObject.MinValue.SafeCreateFromValue(year, month, day, beginHour, 0, 0), - DateObject.MinValue.SafeCreateFromValue(year, month, day, endHour, endMin, endMin)); - ret.Success = true; - return ret; + // @TODO move hardcoded values to resources file + beginHour = 0; + endHour = 0; + endMinute = 0; + swift = 0; + + var tod = string.Empty; + + switch (trimmedText) + { + case "今晚": + swift = 0; + tod = Constants.Evening; + break; + case "今早": + case "今晨": + swift = 0; + tod = Constants.Morning; + break; + case "明晚": + swift = 1; + tod = Constants.Evening; + break; + case "明早": + case "明晨": + swift = 1; + tod = Constants.Morning; + break; + case "昨晚": + swift = -1; + tod = Constants.Evening; + break; } - // handle morning, afternoon.. if (MORegex.IsMatch(trimmedText)) { - timeStr = "TMO"; - beginHour = 8; - endHour = Constants.HalfDayHourCount; + tod = Constants.Morning; } else if (MIRegex.IsMatch(trimmedText)) { - timeStr = "TMI"; - beginHour = 11; - endHour = 13; + tod = Constants.MidDay; } else if (AFRegex.IsMatch(trimmedText)) { - timeStr = "TAF"; - beginHour = Constants.HalfDayHourCount; - endHour = 16; + tod = Constants.Afternoon; } else if (EVRegex.IsMatch(trimmedText)) { - timeStr = "TEV"; - beginHour = 16; - endHour = 20; + tod = Constants.Evening; } else if (NIRegex.IsMatch(trimmedText)) { - timeStr = "TNI"; - beginHour = 20; - endHour = 23; - endMin = 59; - } - else - { - return ret; + tod = Constants.Night; } - - if (ChineseDateTimePeriodExtractorConfiguration.SpecificTimeOfDayRegex.IsExactMatch(trimmedText, trim: true)) + else if (string.IsNullOrEmpty(tod)) { - var swift = 0; - if (ChineseDateTimePeriodExtractorConfiguration.NextRegex.IsMatch(trimmedText)) - { - swift = 1; - } - else if (ChineseDateTimePeriodExtractorConfiguration.LastRegex.IsMatch(trimmedText)) - { - swift = -1; - } - - var date = referenceTime.AddDays(swift).Date; - int day = date.Day, month = date.Month, year = date.Year; - - ret.Timex = DateTimeFormatUtil.FormatDate(date) + timeStr; - ret.FutureValue = - ret.PastValue = - new Tuple( - DateObject.MinValue.SafeCreateFromValue(year, month, day, beginHour, 0, 0), - DateObject.MinValue.SafeCreateFromValue(year, month, day, endHour, endMin, endMin)); - ret.Success = true; - return ret; + todSymbol = null; + return false; } - // handle Date followed by morning, afternoon - var match = ChineseDateTimePeriodExtractorConfiguration.TimeOfDayRegex.Match(trimmedText); - - if (match.Success) - { - var beforeStr = trimmedText.Substring(0, match.Index).Trim(); - var ers = SingleDateExtractor.Extract(beforeStr, referenceTime); - - if (ers.Count == 0 || ers[0].Length != beforeStr.Length) - { - return ret; - } - - var pr = this.config.DateParser.Parse(ers[0], referenceTime); - var futureDate = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; - var pastDate = (DateObject)((DateTimeResolutionResult)pr.Value).PastValue; + var parseResult = TimexUtility.ResolveTimeOfDay(tod); + todSymbol = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMinute = parseResult.EndMin; - ret.Timex = pr.TimexStr + timeStr; - - ret.FutureValue = - new Tuple( - DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, beginHour, 0, 0), - DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, endHour, endMin, endMin)); - - ret.PastValue = - new Tuple( - DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, beginHour, 0, 0), - DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, endHour, endMin, endMin)); - - ret.Success = true; - - return ret; - } - - return ret; + return true; } - // parse "in 20 minutes" - private DateTimeResolutionResult ParseNumberWithUnit(string text, DateObject referenceTime) + public bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMin) { - var ret = new DateTimeResolutionResult(); - string unitStr; - - // if there are spaces between number and unit - var ers = CardinalExtractor.Extract(text); - if (ers.Count == 1) - { - var pr = CardinalParser.Parse(ers[0]); - var srcUnit = text.Substring(ers[0].Start + ers[0].Length ?? 0).Trim(); - - if (srcUnit.StartsWith("个")) - { - srcUnit = srcUnit.Substring(1); - } - - var beforeStr = text.Substring(0, ers[0].Start ?? 0); - if (this.config.UnitMap.ContainsKey(srcUnit)) - { - var numStr = pr.ResolutionStr; - unitStr = this.config.UnitMap[srcUnit]; - var prefixMatch = ChineseDateTimePeriodExtractorConfiguration.PastRegex.MatchExact(beforeStr, trim: true); - - if (prefixMatch.Success) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case "H": - beginDate = referenceTime.AddHours(-(double)pr.Value); - endDate = referenceTime; - break; - case "M": - beginDate = referenceTime.AddMinutes(-(double)pr.Value); - endDate = referenceTime; - break; - case "S": - beginDate = referenceTime.AddSeconds(-(double)pr.Value); - endDate = referenceTime; - break; - default: - return ret; - } - - ret.Timex = - $"({DateTimeFormatUtil.LuisDate(beginDate)}T{DateTimeFormatUtil.LuisTime(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)}T{DateTimeFormatUtil.LuisTime(endDate)},PT{numStr}{unitStr[0]})"; - ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Success = true; - return ret; - } - - prefixMatch = ChineseDateTimePeriodExtractorConfiguration.FutureRegex.MatchExact(beforeStr, trim: true); - - if (prefixMatch.Success) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case "H": - beginDate = referenceTime; - endDate = referenceTime.AddHours((double)pr.Value); - break; - case "M": - beginDate = referenceTime; - endDate = referenceTime.AddMinutes((double)pr.Value); - break; - case "S": - beginDate = referenceTime; - endDate = referenceTime.AddSeconds((double)pr.Value); - break; - default: - return ret; - } - - ret.Timex = - $"({DateTimeFormatUtil.LuisDate(beginDate)}T{DateTimeFormatUtil.LuisTime(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)}T{DateTimeFormatUtil.LuisTime(endDate)},PT{numStr}{unitStr[0]})"; - ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Success = true; - return ret; - } - } - } - - // handle "last hour" - var match = ChineseDateTimePeriodExtractorConfiguration.UnitRegex.Match(text); - if (match.Success) - { - var srcUnit = match.Groups["unit"].Value; - var beforeStr = text.Substring(0, match.Index).Trim(); - if (this.config.UnitMap.ContainsKey(srcUnit)) - { - unitStr = this.config.UnitMap[srcUnit]; - - if (ChineseDateTimePeriodExtractorConfiguration.PastRegex.IsExactMatch(beforeStr, trim: true)) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case "H": - beginDate = referenceTime.AddHours(-1); - endDate = referenceTime; - break; - case "M": - beginDate = referenceTime.AddMinutes(-1); - endDate = referenceTime; - break; - case "S": - beginDate = referenceTime.AddSeconds(-1); - endDate = referenceTime; - break; - default: - return ret; - } - - ret.Timex = - $"({DateTimeFormatUtil.LuisDate(beginDate)}T{DateTimeFormatUtil.LuisTime(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)}T{DateTimeFormatUtil.LuisTime(endDate)},PT1{unitStr[0]})"; - ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Success = true; - return ret; - } - - if (ChineseDateTimePeriodExtractorConfiguration.FutureRegex.IsExactMatch(beforeStr, trim: true)) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case "H": - beginDate = referenceTime; - endDate = referenceTime.AddHours(1); - break; - case "M": - beginDate = referenceTime; - endDate = referenceTime.AddMinutes(1); - break; - case "S": - beginDate = referenceTime; - endDate = referenceTime.AddSeconds(1); - break; - default: - return ret; - } - - ret.Timex = - $"({DateTimeFormatUtil.LuisDate(beginDate)}T{DateTimeFormatUtil.LuisTime(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)}T{DateTimeFormatUtil.LuisTime(endDate)},PT1{unitStr[0]})"; - ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Success = true; - return ret; - } - } - } - - return ret; + return GetMatchedTimeRangeAndSwift(text, out todSymbol, out beginHour, out endHour, out endMin, out int swift); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDurationParserConfiguration.cs index 6fcb856a81..62b26cf14c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDurationParserConfiguration.cs @@ -1,104 +1,61 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; -using Microsoft.Recognizers.Definitions.Chinese; +using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.NumberWithUnit; using Microsoft.Recognizers.Text.NumberWithUnit.Chinese; using static Microsoft.Recognizers.Text.DateTime.Chinese.ChineseDurationExtractorConfiguration; -using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseDurationParserConfiguration : IDateTimeParser + public class ChineseDurationParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKDurationParserConfiguration { - public static readonly string ParserName = Constants.SYS_DATETIME_DURATION; // "Duration"; - public static readonly Dictionary UnitValueMap = DateTimeDefinitions.DurationUnitValueMap; + public ChineseDurationParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + InternalParser = new NumberWithUnitParser(new DurationParserConfiguration()); - private static readonly IParser InternalParser = new NumberWithUnitParser(new DurationParserConfiguration()); + var durationConfig = new BaseDateTimeOptionsConfiguration(config.Culture, DateTimeOptions.None); + DurationExtractor = new BaseCJKDurationExtractor(new ChineseDurationExtractorConfiguration(durationConfig), false); - private readonly IFullDateTimeParserConfiguration config; + YearRegex = ChineseDurationExtractorConfiguration.YearRegex; + SomeRegex = ChineseDurationExtractorConfiguration.SomeRegex; + MoreOrLessRegex = ChineseDurationExtractorConfiguration.MoreOrLessRegex; + DurationUnitRegex = ChineseDurationExtractorConfiguration.DurationUnitRegex; + AnUnitRegex = ChineseDurationExtractorConfiguration.AnUnitRegex; + DurationConnectorRegex = ChineseDurationExtractorConfiguration.DurationConnectorRegex; - public ChineseDurationParserConfiguration(IFullDateTimeParserConfiguration configuration) - { - config = configuration; + UnitMap = config.UnitMap; + UnitValueMap = config.UnitValueMap; } - public ParseResult Parse(ExtractResult extResult) - { - return this.Parse(extResult, DateObject.Now); - } + public IDateTimeExtractor DurationExtractor { get; } - public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) - { - var referenceTime = refDate; + public IParser InternalParser { get; } - // handle cases like "三年半" - var hasHalfSuffix = false; - if (er.Text.EndsWith("半")) - { - er.Length -= 1; - er.Text = er.Text.Substring(0, er.Text.Length - 1); - hasHalfSuffix = true; - } + public Regex YearRegex { get; } - var parseResult = InternalParser.Parse(er); - var unitResult = parseResult.Value as UnitValue; + public Regex SomeRegex { get; } - if (unitResult == null) - { - return null; - } + public Regex MoreOrLessRegex { get; } - var dateTimeParseResult = new DateTimeResolutionResult(); - var unitStr = unitResult.Unit; - var numStr = unitResult.Number; + public Regex DurationUnitRegex { get; } - if (hasHalfSuffix) - { - numStr = (double.Parse(numStr) + 0.5).ToString(CultureInfo.InvariantCulture); - } + public Regex AnUnitRegex { get; } - dateTimeParseResult.Timex = "P" + (BaseDurationParser.IsLessThanDay(unitStr) ? "T" : string.Empty) + numStr + unitStr[0]; - dateTimeParseResult.FutureValue = dateTimeParseResult.PastValue = double.Parse(numStr) * UnitValueMap[unitStr]; - dateTimeParseResult.Success = true; + public Regex DurationConnectorRegex { get; } - if (dateTimeParseResult.Success) - { - dateTimeParseResult.FutureResolution = new Dictionary - { - { TimeTypeConstants.DURATION, dateTimeParseResult.FutureValue.ToString() }, - }; - - dateTimeParseResult.PastResolution = new Dictionary - { - { TimeTypeConstants.DURATION, dateTimeParseResult.PastValue.ToString() }, - }; - } + public IImmutableDictionary UnitMap { get; } - var ret = new DateTimeParseResult - { - Text = er.Text, - Start = er.Start, - Length = er.Length, - Type = er.Type, - Data = er.Data, - Value = dateTimeParseResult, - TimexStr = dateTimeParseResult.Timex, - ResolutionStr = string.Empty, - }; - - return ret; - } - - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } + public IImmutableDictionary UnitValueMap { get; } internal class DurationParserConfiguration : ChineseNumberWithUnitParserConfiguration { public DurationParserConfiguration() - : base(new CultureInfo(Culture.Chinese)) + : base(new CultureInfo(Text.Culture.Chinese)) { this.BindDictionary(DurationExtractorConfiguration.DurationSuffixList); } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseHolidayParserConfiguration.cs index 23d8c0c5b0..7a935ea954 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseHolidayParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseHolidayParserConfiguration.cs @@ -1,5 +1,9 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Globalization; using System.Linq; using System.Text.RegularExpressions; @@ -11,10 +15,9 @@ namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseHolidayParserConfiguration : IDateTimeParser + public class ChineseHolidayParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKHolidayParserConfiguration { - public static readonly string ParserName = Constants.SYS_DATETIME_DATE; // "Date"; - + // @TODO Move dictionaries and hardcoded terms to resource file public static readonly Dictionary> FixedHolidaysDict = new Dictionary> { { "元旦", NewYear }, @@ -59,211 +62,57 @@ public class ChineseHolidayParserConfiguration : IDateTimeParser public static readonly Dictionary NoFixedTimex = DateTimeDefinitions.HolidayNoFixedTimex; - private static readonly IExtractor IntegerExtractor = new IntegerExtractor(); - - private static readonly IParser IntegerParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Chinese))); - - private readonly IFullDateTimeParserConfiguration config; - - public ChineseHolidayParserConfiguration(IFullDateTimeParserConfiguration configuration) + public ChineseHolidayParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) { - config = configuration; - } + IntegerExtractor = config.IntegerExtractor; + NumberParser = config.NumberParser; - public ParseResult Parse(ExtractResult extResult) - { - return this.Parse(extResult, DateObject.Now); + HolidayRegexList = ChineseHolidayExtractorConfiguration.HolidayRegexList; + LunarHolidayRegex = ChineseHolidayExtractorConfiguration.LunarHolidayRegex; } - public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) - { - var referenceDate = refDate; - object value = null; + public IExtractor IntegerExtractor { get; } - if (er.Type.Equals(ParserName)) - { - var innerResult = ParseHolidayRegexMatch(er.Text, referenceDate); - - if (innerResult.Success) - { - innerResult.FutureResolution = new Dictionary - { - { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)innerResult.FutureValue) }, - }; - - innerResult.PastResolution = new Dictionary - { - { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)innerResult.PastValue) }, - }; - - innerResult.IsLunar = IsLunarCalendar(er.Text); - value = innerResult; - } - } + public IParser NumberParser { get; } - var ret = new DateTimeParseResult - { - Text = er.Text, - Start = er.Start, - Length = er.Length, - Type = er.Type, - Data = er.Data, - Value = value, - TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, - ResolutionStr = string.Empty, - }; - - return ret; - } + Dictionary> ICJKHolidayParserConfiguration.FixedHolidaysDict => FixedHolidaysDict; - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } + Dictionary> ICJKHolidayParserConfiguration.HolidayFuncDict => HolidayFuncDict; - private static DateTimeResolutionResult ParseHolidayRegexMatch(string text, DateObject referenceDate) - { - foreach (var regex in ChineseHolidayExtractorConfiguration.HolidayRegexList) - { - var match = regex.Match(text); - - if (match.Success) - { - // Value string will be set in Match2Date method - var ret = Match2Date(match, referenceDate); - return ret; - } - } + Dictionary ICJKHolidayParserConfiguration.NoFixedTimex => NoFixedTimex; - return new DateTimeResolutionResult(); - } - - private static DateTimeResolutionResult Match2Date(Match match, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - var holidayStr = match.Groups["holiday"].Value; - - var year = referenceDate.Year; - var hasYear = false; - var yearNum = match.Groups["year"].Value; - var yearChs = match.Groups["yearchs"].Value; - var yearRel = match.Groups["yearrel"].Value; - if (!string.IsNullOrEmpty(yearNum)) - { - hasYear = true; - if (yearNum.EndsWith("年")) - { - yearNum = yearNum.Substring(0, yearNum.Length - 1); - } + public IEnumerable HolidayRegexList { get; } - year = int.Parse(yearNum); - } - else if (!string.IsNullOrEmpty(yearChs)) - { - hasYear = true; - if (yearChs.EndsWith("年")) - { - yearChs = yearChs.Substring(0, yearChs.Length - 1); - } - - year = ConvertChineseToInteger(yearChs); - } - else if (!string.IsNullOrEmpty(yearRel)) - { - hasYear = true; - if (yearRel.EndsWith("去年")) - { - year--; - } - else if (yearRel.EndsWith("明年")) - { - year++; - } - } + public Regex LunarHolidayRegex { get; } - if (year < 100 && year >= 90) - { - year += 1900; - } - else if (year < 20) - { - year += 2000; - } + public int GetSwiftYear(string text) + { + // @TODO move hardcoded values to resource file + var trimmedText = text.Trim(); + var swift = -10; - if (!string.IsNullOrEmpty(holidayStr)) + if (text.EndsWith("去年", StringComparison.Ordinal)) { - DateObject value; - string timexStr; - if (FixedHolidaysDict.ContainsKey(holidayStr)) - { - value = FixedHolidaysDict[holidayStr](year); - timexStr = $"-{value.Month:D2}-{value.Day:D2}"; - } - else - { - if (HolidayFuncDict.ContainsKey(holidayStr)) - { - value = HolidayFuncDict[holidayStr](year); - timexStr = NoFixedTimex[holidayStr]; - } - else - { - return ret; - } - } - - if (hasYear) - { - ret.Timex = year.ToString("D4") + timexStr; - ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(year, value.Month, value.Day); - ret.Success = true; - return ret; - } - - ret.Timex = "XXXX" + timexStr; - ret.FutureValue = GetFutureValue(value, referenceDate, holidayStr); - ret.PastValue = GetPastValue(value, referenceDate, holidayStr); - ret.Success = true; - return ret; + swift = -1; } - - return ret; - } - - private static DateObject GetFutureValue(DateObject value, DateObject referenceDate, string holiday) - { - if (value < referenceDate) + else if (text.EndsWith("明年", StringComparison.Ordinal)) { - if (FixedHolidaysDict.ContainsKey(holiday)) - { - return value.AddYears(1); - } - - if (HolidayFuncDict.ContainsKey(holiday)) - { - value = HolidayFuncDict[holiday](referenceDate.Year + 1); - } + swift = +1; } - return value; + return swift; } - private static DateObject GetPastValue(DateObject value, DateObject referenceDate, string holiday) + public string SanitizeYearToken(string yearStr) { - if (value >= referenceDate) + // @TODO move hardcoded values to resource file + if (yearStr.EndsWith("年", StringComparison.Ordinal)) { - if (FixedHolidaysDict.ContainsKey(holiday)) - { - return value.AddYears(-1); - } - - if (HolidayFuncDict.ContainsKey(holiday)) - { - value = HolidayFuncDict[holiday](referenceDate.Year - 1); - } + yearStr = yearStr.Substring(0, yearStr.Length - 1); } - return value; + return yearStr; } private static DateObject NewYear(int year) => new DateObject(year, 1, 1); @@ -374,53 +223,5 @@ private static DateObject GetThanksgivingDayOfYear(int year) where DateObject.MinValue.SafeCreateFromValue(year, 11, day).DayOfWeek == DayOfWeek.Thursday select day).ElementAt(3)); } - - private static int ConvertChineseToInteger(string yearChsStr) - { - var year = 0; - var num = 0; - - var er = IntegerExtractor.Extract(yearChsStr); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER)) - { - num = Convert.ToInt32((double)(IntegerParser.Parse(er[0]).Value ?? 0)); - } - } - - if (num < 10) - { - num = 0; - foreach (var ch in yearChsStr) - { - num *= 10; - er = IntegerExtractor.Extract(ch.ToString()); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER)) - { - num += Convert.ToInt32((double)(IntegerParser.Parse(er[0]).Value ?? 0)); - } - } - } - - year = num; - } - else - { - year = num; - } - - return year == 0 ? -1 : year; - } - - // parse if lunar contains - private bool IsLunarCalendar(string text) - { - var trimmedText = text.Trim(); - var match = ChineseHolidayExtractorConfiguration.LunarHolidayRegex.Match(trimmedText); - return match.Success; - } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseMergedDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseMergedDateTimeParserConfiguration.cs deleted file mode 100644 index 45cf235efd..0000000000 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseMergedDateTimeParserConfiguration.cs +++ /dev/null @@ -1,135 +0,0 @@ -using System; -using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions.Chinese; - -using DateObject = System.DateTime; - -namespace Microsoft.Recognizers.Text.DateTime.Chinese -{ - public class ChineseMergedDateTimeParserConfiguration : BaseMergedDateTimeParser - { - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - - private static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.MergedBeforeRegex, RegexFlags); - - private static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.MergedAfterRegex, RegexFlags); - - // TODO implement SinceRegex - private static readonly Regex SinceRegex = new Regex(DateTimeDefinitions.MergedAfterRegex, RegexFlags); - - public ChineseMergedDateTimeParserConfiguration(IMergedParserConfiguration configuration) - : base(configuration) - { - } - - public new ParseResult Parse(ExtractResult er) - { - return Parse(er, DateObject.Now); - } - - public new ParseResult Parse(ExtractResult er, DateObject refTime) - { - var referenceTime = refTime; - DateTimeParseResult pr; - - // push, save teh MOD string - bool hasBefore = false, hasAfter = false, hasSince = false; - if (BeforeRegex.IsMatch(er.Text)) - { - hasBefore = true; - } - else if (AfterRegex.IsMatch(er.Text)) - { - hasAfter = true; - } - else if (SinceRegex.IsMatch(er.Text)) - { - hasSince = true; - } - - if (er.Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal)) - { - pr = this.Config.DateParser.Parse(er, referenceTime); - if (pr.Value == null) - { - // pr = this.config.HolidayParser.Parse(er, referenceTime); - } - } - else if (er.Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) - { - pr = this.Config.TimeParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_DATETIME, StringComparison.Ordinal)) - { - pr = this.Config.DateTimeParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal)) - { - pr = this.Config.DatePeriodParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_TIMEPERIOD, StringComparison.Ordinal)) - { - pr = this.Config.TimePeriodParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_DATETIMEPERIOD, StringComparison.Ordinal)) - { - pr = this.Config.DateTimePeriodParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_DURATION, StringComparison.Ordinal)) - { - pr = this.Config.DurationParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_SET, StringComparison.Ordinal)) - { - pr = this.Config.SetParser.Parse(er, referenceTime); - } - else - { - return null; - } - - // pop, restore the MOD string - if (hasBefore) - { - var val = (DateTimeResolutionResult)pr.Value; - if (val != null) - { - val.Mod = Constants.BEFORE_MOD; - } - - pr.Value = val; - } - - if (hasAfter) - { - var val = (DateTimeResolutionResult)pr.Value; - if (val != null) - { - val.Mod = Constants.AFTER_MOD; - } - - pr.Value = val; - } - - if (hasSince) - { - var val = (DateTimeResolutionResult)pr.Value; - if (val != null) - { - val.Mod = Constants.SINCE_MOD; - } - - pr.Value = val; - } - - pr.Value = DateTimeResolution(pr); - - var hasModifier = hasBefore || hasAfter || hasSince; - - // change the type at last for the after or before mode - pr.Type = $"{ParserTypeName}.{DetermineDateTimeType(er.Type, hasModifier)}"; - - return pr; - } - } -} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseMergedParserConfiguration.cs new file mode 100644 index 0000000000..0c2de4f482 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseMergedParserConfiguration.cs @@ -0,0 +1,43 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Chinese; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Chinese +{ + public class ChineseMergedParserConfiguration : ChineseCommonDateTimeParserConfiguration, ICJKMergedParserConfiguration + { + public ChineseMergedParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + BeforeRegex = ChineseMergedExtractorConfiguration.BeforeRegex; + AfterRegex = ChineseMergedExtractorConfiguration.AfterRegex; + SincePrefixRegex = ChineseMergedExtractorConfiguration.SincePrefixRegex; + SinceSuffixRegex = ChineseMergedExtractorConfiguration.SinceSuffixRegex; + AroundPrefixRegex = ChineseMergedExtractorConfiguration.AroundPrefixRegex; + AroundSuffixRegex = ChineseMergedExtractorConfiguration.AroundSuffixRegex; + EqualRegex = ChineseMergedExtractorConfiguration.EqualRegex; + UntilRegex = ChineseMergedExtractorConfiguration.UntilRegex; + } + + public Regex BeforeRegex { get; } + + public Regex AfterRegex { get; } + + public Regex SincePrefixRegex { get; } + + public Regex SinceSuffixRegex { get; } + + public Regex AroundPrefixRegex { get; } + + public Regex AroundSuffixRegex { get; } + + public Regex UntilRegex { get; } + + public Regex EqualRegex { get; } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseSetParserConfiguration.cs index be27d8157d..0e19080937 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseSetParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseSetParserConfiguration.cs @@ -1,230 +1,99 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseSetParserConfiguration : IDateTimeParser + public class ChineseSetParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKSetParserConfiguration { - public static readonly string ParserName = Constants.SYS_DATETIME_SET; + public ChineseSetParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + DurationExtractor = config.DurationExtractor; + TimeExtractor = config.TimeExtractor; + TimePeriodExtractor = config.TimePeriodExtractor; + DateExtractor = config.DateExtractor; + DateTimeExtractor = config.DateTimeExtractor; + + DurationParser = config.DurationParser; + TimeParser = config.TimeParser; + TimePeriodParser = config.TimePeriodParser; + DateParser = config.DateParser; + DateTimeParser = config.DateTimeParser; + + EachPrefixRegex = ChineseSetExtractorConfiguration.EachPrefixRegex; + EachUnitRegex = ChineseSetExtractorConfiguration.EachUnitRegex; + EachDayRegex = ChineseSetExtractorConfiguration.EachDayRegex; + EachDateUnitRegex = ChineseSetExtractorConfiguration.EachDateUnitRegex; + UnitMap = config.UnitMap; + } - private static readonly IDateTimeExtractor DurationExtractor = new ChineseDurationExtractorConfiguration(); - private static readonly IDateTimeExtractor TimeExtractor = new ChineseTimeExtractorConfiguration(); - private static readonly IDateTimeExtractor DateExtractor = new ChineseDateExtractorConfiguration(); - private static readonly IDateTimeExtractor DateTimeExtractor = new ChineseDateTimeExtractorConfiguration(); + public IDateTimeExtractor DurationExtractor { get; } - private readonly IFullDateTimeParserConfiguration config; + public IDateTimeExtractor TimeExtractor { get; } - public ChineseSetParserConfiguration(IFullDateTimeParserConfiguration configuration) - { - config = configuration; - } + public IDateTimeExtractor TimePeriodExtractor { get; } - public ParseResult Parse(ExtractResult extResult) - { - return this.Parse(extResult, DateObject.Now); - } + public IDateTimeExtractor DateExtractor { get; } - public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) - { - object value = null; - if (er.Type.Equals(ParserName, StringComparison.Ordinal)) - { - var innerResult = ParseEachUnit(er.Text); - if (!innerResult.Success) - { - innerResult = ParseEachDuration(er.Text, refDate); - } - - if (!innerResult.Success) - { - innerResult = ParserTimeEveryday(er.Text, refDate); - } - - // NOTE: Please do not change the order of following function - // we must consider datetime before date - if (!innerResult.Success) - { - innerResult = ParseEachDateTime(er.Text, refDate); - } - - if (!innerResult.Success) - { - innerResult = ParseEachDate(er.Text, refDate); - } - - if (innerResult.Success) - { - innerResult.FutureResolution = new Dictionary - { - { TimeTypeConstants.SET, (string)innerResult.FutureValue }, - }; - - innerResult.PastResolution = new Dictionary - { - { TimeTypeConstants.SET, (string)innerResult.PastValue }, - }; - - value = innerResult; - } - } + public IDateTimeExtractor DateTimeExtractor { get; } - var ret = new DateTimeParseResult - { - Text = er.Text, - Start = er.Start, - Length = er.Length, - Type = er.Type, - Data = er.Data, - Value = value, - TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, - ResolutionStr = string.Empty, - }; - return ret; - } + public IDateTimeParser DurationParser { get; } - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } + public IDateTimeParser TimeParser { get; } - private static bool IsLessThanDay(string unit) - { - return unit.Equals("S") || unit.Equals("M") || unit.Equals("H"); - } + public IDateTimeParser TimePeriodParser { get; } - private DateTimeResolutionResult ParseEachDuration(string text, DateObject refDate) - { - var ret = new DateTimeResolutionResult(); - var ers = DurationExtractor.Extract(text, refDate); - if (ers.Count != 1 || !string.IsNullOrWhiteSpace(text.Substring(ers[0].Start + ers[0].Length ?? 0))) - { - return ret; - } + public IDateTimeParser DateParser { get; } - var beforeStr = text.Substring(0, ers[0].Start ?? 0); - if (ChineseSetExtractorConfiguration.EachPrefixRegex.IsMatch(beforeStr)) - { - var pr = this.config.DurationParser.Parse(ers[0], DateObject.Now); - ret.Timex = pr.TimexStr; - ret.FutureValue = ret.PastValue = "Set: " + pr.TimexStr; - ret.Success = true; - return ret; - } + public IDateTimeParser DateTimeParser { get; } - return ret; - } + public Regex EachPrefixRegex { get; } - private DateTimeResolutionResult ParseEachUnit(string text) - { - var ret = new DateTimeResolutionResult(); + public Regex EachUnitRegex { get; } - // handle "each month" - var match = ChineseSetExtractorConfiguration.EachUnitRegex.MatchExact(text, trim: true); + public Regex EachDayRegex { get; } - if (match.Success) - { - var sourceUnit = match.Groups["unit"].Value; - if (!string.IsNullOrEmpty(sourceUnit) && this.config.UnitMap.ContainsKey(sourceUnit)) - { - if (sourceUnit.Equals("天") || sourceUnit.Equals("日")) - { - ret.Timex = "P1D"; - } - else if (sourceUnit.Equals("周") || sourceUnit.Equals("星期")) - { - ret.Timex = "P1W"; - } - else if (sourceUnit.Equals("月")) - { - ret.Timex = "P1M"; - } - else if (sourceUnit.Equals("年")) - { - ret.Timex = "P1Y"; - } - else - { - return ret; - } - - ret.FutureValue = ret.PastValue = "Set: " + ret.Timex; - ret.Success = true; - return ret; - } - } + public Regex EachDateUnitRegex { get; } - return ret; - } + public IImmutableDictionary UnitMap { get; } - private DateTimeResolutionResult ParserTimeEveryday(string text, DateObject refDate) + public bool GetMatchedUnitTimex(string text, out string timex) { - var ret = new DateTimeResolutionResult(); - var ers = TimeExtractor.Extract(text, refDate); - if (ers.Count != 1) - { - return ret; - } + var trimmedText = text.Trim(); - var beforeStr = text.Substring(0, ers[0].Start ?? 0); - var match = ChineseSetExtractorConfiguration.EachDayRegex.Match(beforeStr); - if (match.Success) + // @TODO move hardcoded values to resources file + if (trimmedText.Equals("天", StringComparison.Ordinal) || + trimmedText.Equals("日", StringComparison.Ordinal)) { - var pr = this.config.TimeParser.Parse(ers[0], DateObject.Now); - ret.Timex = pr.TimexStr; - ret.FutureValue = ret.PastValue = "Set: " + ret.Timex; - ret.Success = true; - return ret; + timex = "P1D"; } - - return ret; - } - - private DateTimeResolutionResult ParseEachDate(string text, DateObject refDate) - { - var ret = new DateTimeResolutionResult(); - var ers = DateExtractor.Extract(text, refDate); - if (ers.Count != 1) + else if (trimmedText.Equals("周", StringComparison.Ordinal) || + trimmedText.Equals("星期", StringComparison.Ordinal)) { - return ret; + timex = "P1W"; } - - var beforeStr = text.Substring(0, ers[0].Start ?? 0); - var match = ChineseSetExtractorConfiguration.EachPrefixRegex.Match(beforeStr); - if (match.Success) + else if (trimmedText.Equals("月", StringComparison.Ordinal)) { - var pr = this.config.DateParser.Parse(ers[0], DateObject.Now); - ret.Timex = pr.TimexStr; - ret.FutureValue = ret.PastValue = "Set: " + ret.Timex; - ret.Success = true; - return ret; + timex = "P1M"; } - - return ret; - } - - private DateTimeResolutionResult ParseEachDateTime(string text, DateObject refDate) - { - var ret = new DateTimeResolutionResult(); - var ers = DateTimeExtractor.Extract(text, refDate); - if (ers.Count != 1) + else if (trimmedText.Equals("年", StringComparison.Ordinal)) { - return ret; + timex = "P1Y"; } - - var beforeStr = text.Substring(0, ers[0].Start ?? 0); - var match = ChineseSetExtractorConfiguration.EachPrefixRegex.Match(beforeStr); - if (match.Success) + else { - var pr = this.config.DateTimeParser.Parse(ers[0], DateObject.Now); - ret.Timex = pr.TimexStr; - ret.FutureValue = ret.PastValue = "Set: " + ret.Timex; - ret.Success = true; - return ret; + timex = null; + return false; } - return ret; + return true; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseTimeParserConfiguration.cs index e60d36ff21..f89bdd681a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseTimeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using Microsoft.Recognizers.Definitions.Chinese; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -6,10 +9,8 @@ namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseTimeParserConfiguration : IDateTimeParser + public class ChineseTimeParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKTimeParserConfiguration { - public static readonly IDateTimeExtractor TimeExtractor = new ChineseTimeExtractorConfiguration(); - private static TimeFunctions timeFunc = new TimeFunctions { NumberDictionary = DateTimeDefinitions.TimeNumberDictionary, @@ -25,68 +26,18 @@ public class ChineseTimeParserConfiguration : IDateTimeParser { TimeType.LessTime, timeFunc.HandleLess }, }; - private readonly IFullDateTimeParserConfiguration config; - - public ChineseTimeParserConfiguration(IFullDateTimeParserConfiguration configuration) - { - config = configuration; - } - - private delegate TimeResult TimeFunction(DateTimeExtra extra); - - public ParseResult Parse(ExtractResult extResult) + public ChineseTimeParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) { - return this.Parse(extResult, DateObject.Now); + TimeExtractor = config.TimeExtractor; } - public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) - { - var referenceTime = refDate; - var extra = er.Data as DateTimeExtra; - if (extra == null) - { - var result = TimeExtractor.Extract(er.Text, refDate); - extra = result[0]?.Data as DateTimeExtra; - } - - if (extra != null) - { - var timeResult = FunctionMap[extra.Type](extra); - var parseResult = timeFunc.PackTimeResult(extra, timeResult, referenceTime); - if (parseResult.Success) - { - parseResult.FutureResolution = new Dictionary - { - { TimeTypeConstants.TIME, DateTimeFormatUtil.FormatTime((DateObject)parseResult.FutureValue) }, - }; - - parseResult.PastResolution = new Dictionary - { - { TimeTypeConstants.TIME, DateTimeFormatUtil.FormatTime((DateObject)parseResult.PastValue) }, - }; - } + // public delegate TimeResult TimeFunction(DateTimeExtra extra); - var ret = new DateTimeParseResult - { - Start = er.Start, - Text = er.Text, - Type = er.Type, - Length = er.Length, - Value = parseResult, - Data = timeResult, - ResolutionStr = string.Empty, - TimexStr = parseResult.Timex, - }; + public IDateTimeExtractor TimeExtractor { get; } - return ret; - } + TimeFunctions ICJKTimeParserConfiguration.TimeFunc => timeFunc; - return null; - } - - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } + Dictionary ICJKTimeParserConfiguration.FunctionMap => FunctionMap; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseTimePeriodParserConfiguration.cs index 8ff6f3cc35..ff54719c61 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseTimePeriodParserConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; @@ -8,7 +11,7 @@ namespace Microsoft.Recognizers.Text.DateTime.Chinese { - public class ChineseTimePeriodParserConfiguration : IDateTimeParser + public class ChineseTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKTimePeriodParserConfiguration { private static TimeFunctions timeFunc = new TimeFunctions { @@ -17,88 +20,20 @@ public class ChineseTimePeriodParserConfiguration : IDateTimeParser DayDescRegex = ChineseTimeExtractorConfiguration.DayDescRegex, }; - private readonly IFullDateTimeParserConfiguration config; - - public ChineseTimePeriodParserConfiguration(IFullDateTimeParserConfiguration configuration) + public ChineseTimePeriodParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) { - config = configuration; + TimeExtractor = config.TimeExtractor; + TimeParser = config.TimeParser; } - public ParseResult Parse(ExtractResult extResult) - { - return this.Parse(extResult, DateObject.Now); - } - - public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) - { - var referenceTime = refDate; - var extra = er.Data as DateTimeExtra; - if (extra == null) - { - var result = new ChineseTimeExtractorConfiguration().Extract(er.Text, refDate); - extra = result[0]?.Data as DateTimeExtra; - } - - if (extra != null) - { - // Handle special case like '上午', '下午' - var parseResult = ParseChineseTimeOfDay(er.Text, referenceTime); - - if (!parseResult.Success) - { - parseResult = TimePeriodFunctions.Handle(this.config.TimeParser, extra, referenceTime, timeFunc); - } + public IDateTimeExtractor TimeExtractor { get; } - if (parseResult.Success) - { - parseResult.FutureResolution = new Dictionary - { - { - TimeTypeConstants.START_TIME, - DateTimeFormatUtil.FormatTime(((Tuple)parseResult.FutureValue).Item1) - }, - { - TimeTypeConstants.END_TIME, - DateTimeFormatUtil.FormatTime(((Tuple)parseResult.FutureValue).Item2) - }, - }; + public IDateTimeParser TimeParser { get; } - parseResult.PastResolution = new Dictionary - { - { - TimeTypeConstants.START_TIME, - DateTimeFormatUtil.FormatTime(((Tuple)parseResult.PastValue).Item1) - }, - { - TimeTypeConstants.END_TIME, - DateTimeFormatUtil.FormatTime(((Tuple)parseResult.PastValue).Item2) - }, - }; - } - - var ret = new DateTimeParseResult - { - Start = er.Start, - Text = er.Text, - Type = er.Type, - Length = er.Length, - Value = parseResult, - ResolutionStr = string.Empty, - TimexStr = parseResult.Timex, - }; - - return ret; - } + TimeFunctions ICJKTimePeriodParserConfiguration.TimeFunc => timeFunc; - return null; - } - - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } - - private static bool GetMatchedTimexRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) + public bool GetMatchedTimexRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) { var trimmedText = text.Trim(); beginHour = 0; @@ -107,27 +42,27 @@ private static bool GetMatchedTimexRange(string text, out string timex, out int var timeOfDay = string.Empty; - if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o))) + if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Morning; } - else if (DateTimeDefinitions.MidDayTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.MidDayTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.MidDay; } - else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Afternoon; } - else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Evening; } - else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o))) + else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o, StringComparison.Ordinal))) { timeOfDay = Constants.Daytime; } - else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Night; } @@ -137,7 +72,7 @@ private static bool GetMatchedTimexRange(string text, out string timex, out int return false; } - var parseResult = TimexUtility.ParseTimeOfDay(timeOfDay); + var parseResult = TimexUtility.ResolveTimeOfDay(timeOfDay); timex = parseResult.Timex; beginHour = parseResult.BeginHour; endHour = parseResult.EndHour; @@ -145,26 +80,5 @@ private static bool GetMatchedTimexRange(string text, out string timex, out int return true; } - - private DateTimeResolutionResult ParseChineseTimeOfDay(string text, DateObject referenceTime) - { - int day = referenceTime.Day, - month = referenceTime.Month, - year = referenceTime.Year; - var ret = new DateTimeResolutionResult(); - - if (!GetMatchedTimexRange(text, out string timex, out int beginHour, out int endHour, out int endMinSeg)) - { - return new DateTimeResolutionResult(); - } - - ret.Timex = timex; - ret.FutureValue = ret.PastValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(year, month, day, beginHour, 0, 0), - DateObject.MinValue.SafeCreateFromValue(year, month, day, endHour, endMinSeg, 0)); - ret.Success = true; - - return ret; - } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Config/BaseDateTimeOptionsConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Config/BaseDateTimeOptionsConfiguration.cs index 4e8df3fa67..8ce3552cde 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Config/BaseDateTimeOptionsConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Config/BaseDateTimeOptionsConfiguration.cs @@ -1,4 +1,10 @@ -namespace Microsoft.Recognizers.Text.DateTime +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Reflection; + +namespace Microsoft.Recognizers.Text.DateTime { public class BaseDateTimeOptionsConfiguration : IDateTimeOptionsConfiguration { @@ -20,7 +26,10 @@ public BaseDateTimeOptionsConfiguration(IDateTimeOptionsConfiguration config) public bool DmyDateFormat { get; } + public string LanguageMarker { get; set; } + public string Culture { get; } + protected static TimeSpan RegexTimeOut => DateTimeRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Config/IDateTimeOptionsConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Config/IDateTimeOptionsConfiguration.cs index 3227e463e8..5415326b4d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Config/IDateTimeOptionsConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Config/IDateTimeOptionsConfiguration.cs @@ -1,4 +1,5 @@ -using Microsoft.Recognizers.Text.Config; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. namespace Microsoft.Recognizers.Text.DateTime { @@ -7,5 +8,7 @@ public interface IDateTimeOptionsConfiguration : IConfiguration DateTimeOptions Options { get; } bool DmyDateFormat { get; } + + string LanguageMarker { get; } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs index f860745ccf..d8a6e0ba6e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs @@ -1,4 +1,9 @@ -using System.Diagnostics.CodeAnalysis; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Diagnostics.CodeAnalysis; +using System.Globalization; + using Microsoft.Recognizers.Definitions; namespace Microsoft.Recognizers.Text.DateTime @@ -20,6 +25,9 @@ public static class Constants // SourceEntity Types public const string SYS_DATETIME_DATETIMEPOINT = "datetimepoint"; + // Number Types + public const string SYS_NUMBER_ORDINAL = "builtin.num.ordinal"; + // Model Name public const string MODEL_DATETIME = "datetime"; @@ -48,6 +56,7 @@ public static class Constants // AmPm time representation for time parser public const string Comment_AmPm = "ampm"; + public const string Comment_Am = "am"; // Prefix early/late for time parser public const string Comment_Early = "early"; @@ -57,6 +66,9 @@ public static class Constants public const string Comment_WeekOf = "WeekOf"; public const string Comment_MonthOf = "MonthOf"; + // Tag to mark cases where the specifc resolution timex depends on future or past values. + public const string Comment_DoubleTimex = "DoubleTimex"; + // MOD Value // "before" -> To mean "preceding in time". I.e. Does not include the extracted datetime entity in the resolution's ending point. Equivalent to "<" public const string BEFORE_MOD = "before"; @@ -81,6 +93,12 @@ public static class Constants public const string APPROX_MOD = "approx"; + public const string HAS_MOD = "mod"; + + // labels associated to AgoRegex and LaterRegex + public const string AGO_LABEL = "ago"; + public const string LATER_LABEL = "later"; + // These are some particular values for timezone recognition public const int InvalidOffsetValue = -10000; public const string UtcOffsetMinsKey = "utcOffsetMins"; @@ -98,18 +116,65 @@ public static class Constants public const int MaxWeekOfMonth = 5; public const int MaxMonth = 12; public const int MinMonth = 1; + public const int MaxDayMonth = 31; - // hours of one half day + // Day start hour + public const int DayHourStart = 0; + + // Hours in a day + public const int DayHourCount = 24; + + // Hours in a half day public const int HalfDayHourCount = 12; - // hours of a half mid-day-duration + // Hours in a quarter of a day + public const int QuarterDayHourCount = 6; + + // Hours is a half mid-day-duration public const int HalfMidDayDurationHourCount = 2; - // the length of four digits year, e.g., 2018 + // Minutes in an hour + public const int HourMinuteCount = 60; + + // Char length of four digits year, e.g., 2018 public const int FourDigitsYearLength = 4; + // Default boundaries for time of day resolution + public const int EarlyMorningBeginHour = 4; + public const int EarlyMorningEndHour = 8; + public const int MorningBeginHour = 8; + public const int MorningEndHour = 12; + public const int MidDayBeginHour = 11; + public const int MidDayEndHour = 13; + public const int AfternoonBeginHour = 12; + public const int AfternoonEndHour = 16; + public const int EveningBeginHour = 16; + public const int EveningEndHour = 20; + public const int DaytimeBeginHour = 8; + public const int DaytimeEndHour = 18; + public const int NighttimeBeginHour = 0; + public const int NighttimeEndHour = 8; + public const int BusinessBeginHour = 8; + public const int BusinessEndHour = 18; + public const int NightBeginHour = 20; + public const int NightEndHour = 23; + public const int NightEndMin = 59; + public const int MealtimeBreakfastBeginHour = 8; + public const int MealtimeBreakfastEndHour = 12; + public const int MealtimeBrunchBeginHour = 8; + public const int MealtimeBrunchEndHour = 12; + public const int MealtimeLunchBeginHour = 11; + public const int MealtimeLunchEndHour = 13; + public const int MealtimeDinnerBeginHour = 16; + public const int MealtimeDinnerEndHour = 20; + + // Default period range modifier deltas + public const int EARLY_LATE_TIME_DELTA = 2; + + // Constants specifying the priority of interpreting month and day order public const string DefaultLanguageFallback_MDY = "MDY"; public const string DefaultLanguageFallback_DMY = "DMY"; + public const string DefaultLanguageFallback_YMD = "YMD"; // ZH // Groups' names for named groups in regexes public const string NextGroupName = "next"; @@ -124,33 +189,103 @@ public static class Constants public const string MinuteGroupName = "min"; public const string HourGroupName = "hour"; public const string YearGroupName = "year"; + public const string YearRelGroupName = "yearrel"; + public const string FullYearGroupName = "fullyear"; + public const string FourDigitYearGroupName = "FourDigitYear"; + public const string FirstTwoYearGroupName = "firsttwoyearnum"; + public const string LastTwoYearGroupName = "lasttwoyearnum"; + public const string DayGroupName = "day"; + public const string WeekdayGroupName = "weekday"; + public const string WeekGroupName = "week"; + public const string MonthGroupName = "month"; + public const string RelMonthGroupName = "relmonth"; + public const string MonthFromGroupName = "monthFrom"; + public const string MonthToGroupName = "monthTo"; + public const string SeasonGroupName = "season"; + public const string DecadeGroupName = "decade"; + public const string CenturyGroupName = "century"; + public const string RelCenturyGroupName = "relcentury"; + public const string AnotherGroupName = "another"; + public const string HalfGroupName = "half"; + public const string HalfTagGroupName = "halfTag"; + public const string FirstHalfGroupName = "firstHalf"; + public const string SecondHalfGroupName = "secondHalf"; + public const string QuarterGroupName = "quarter"; + public const string ThreeQuarterGroupName = "threequarter"; + public const string CardinalGroupName = "cardinal"; public const string TimeOfDayGroupName = "timeOfDay"; public const string BusinessDayGroupName = "business"; public const string LeftAmPmGroupName = "leftDesc"; public const string RightAmPmGroupName = "rightDesc"; public const string MealTimeGroupName = "mealTime"; + public const string NegativeGroupName = "neg"; + public const string YearCJKGroupName = "yearCJK"; + public const string UnitOfYearGroupName = "uoy"; + public const string UnitGroupName = "unit"; + public const string NumGroupName = "num"; + public const string FirstGroupName = "first"; + public const string LastGroupName = "last"; + public const string LatestGroupName = "latest"; + public const string AfterGroupName = "after"; + public const string RelEarlyGroupName = "RelEarly"; + public const string RelLateGroupName = "RelLate"; + public const string EarlyPrefixGroupName = "EarlyPrefix"; + public const string LessGroupName = "less"; + public const string MoreGroupName = "more"; + public const string FewGroupName = "few"; + public const string LaterGroupName = "later"; + public const string SpecificEndOfGroupName = "SpecificEndOf"; + public const string TomorrowGroupName = "tomorrow"; + public const string LatePrefixGroupName = "LatePrefix"; + public const string MidPrefixGroupName = "MidPrefix"; + public const string RestOfGroupName = "restof"; + public const string DurationGroupName = "duration"; + public const string ToDateGroupName = "toDate"; + public const string SpecialGroupName = "special"; + public const string StartGroupName = "start"; + public const string EndGroupName = "end"; + public const string WithinGroupName = "within"; + public const string ForGroupName = "for"; + public const string FromGroupName = "from"; + public const string NumberGroupName = "number"; + public const string OrdinalGroupName = "ordinal"; + public const string OrderGroupName = "order"; + public const string AgoGroupName = "ago"; + public const string YesterdayGroupName = "yesterday"; + public const string PluralUnit = "plural"; + public const string AmbiguousPattern = "ambiguous"; + public const string HolidayWeekend = "holidayWeekend"; + + // Include the date mentioned, to make "before" -> "until" or "after" -> "since". Such as "on or earlier than 1/1/2016". + public const string IncludeGroupName = "include"; public const string DECADE_UNIT = "10Y"; public const string FORTNIGHT_UNIT = "2W"; + public const string QUARTER_UNIT = "3MON"; + public const string WEEKEND_UNIT = "WE"; // Timex public const string TimexYear = "Y"; public const string TimexMonth = "M"; public const string TimexMonthFull = "MON"; public const string TimexWeek = "W"; + public const string TimexFortnight = "W"; // Unit calculation comes from code public const string TimexDay = "D"; public const string TimexBusinessDay = "BD"; public const string TimexWeekend = "WE"; public const string TimexHour = "H"; public const string TimexMinute = "M"; public const string TimexSecond = "S"; + public const string TimexNow = "PRESENT_REF"; public const char TimexFuzzy = 'X'; public const string TimexFuzzyYear = "XXXX"; + public const string TimexFuzzyTwoDigitYear = "XX"; public const string TimexFuzzyMonth = "XX"; public const string TimexFuzzyWeek = "WXX"; public const string TimexFuzzyDay = "XX"; public const string DateTimexConnector = "-"; public const string TimeTimexConnector = ":"; + public const string TimexSeparator = ","; public const string GeneralPeriodPrefix = "P"; public const string TimeTimexPrefix = "T"; @@ -161,6 +296,7 @@ public static class Constants public const string Afternoon = "TAF"; public const string Evening = "TEV"; public const string Daytime = "TDT"; + public const string Nighttime = "TNT"; public const string Night = "TNI"; public const string BusinessHour = "TBH"; public const string MealtimeBreakfast = "TMEB"; @@ -168,6 +304,10 @@ public static class Constants public const string MealtimeLunch = "TMEL"; public const string MealtimeDinner = "TMED"; + public const string InvalidDateString = "0001-01-01"; + + public const char CompositeTimexDelimiter = '|'; + // Invalid year public const int InvalidYear = int.MinValue; public const int InvalidMonth = int.MinValue; @@ -180,14 +320,17 @@ public static class Constants public const int INVALID_CONNECTOR_CODE = -1; // Invalid year non-constant - public static readonly int MinYearNum = int.Parse(BaseDateTime.MinYearNum); - public static readonly int MaxYearNum = int.Parse(BaseDateTime.MaxYearNum); + public static readonly int MinYearNum = int.Parse(BaseDateTime.MinYearNum, CultureInfo.InvariantCulture); + public static readonly int MaxYearNum = int.Parse(BaseDateTime.MaxYearNum, CultureInfo.InvariantCulture); - public static readonly int MaxTwoDigitYearFutureNum = int.Parse(BaseDateTime.MaxTwoDigitYearFutureNum); - public static readonly int MinTwoDigitYearPastNum = int.Parse(BaseDateTime.MinTwoDigitYearPastNum); + public static readonly int MaxTwoDigitYearFutureNum = int.Parse(BaseDateTime.MaxTwoDigitYearFutureNum, CultureInfo.InvariantCulture); + public static readonly int MinTwoDigitYearPastNum = int.Parse(BaseDateTime.MinTwoDigitYearPastNum, CultureInfo.InvariantCulture); public static readonly System.DateTime InvalidDate = default(System.DateTime); + public static readonly int BASE_YEAR_PAST_CENTURY = 1900; + public static readonly int BASE_YEAR_CURRENT_CENTURY = 2000; // Timex non-constant public static readonly string[] DatePeriodTimexSplitter = { ",", "(", ")" }; + public static readonly char[] DurationUnitChar = { 'D', 'W', 'M', 'Y', 'B' }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/DataStructure.cs b/.NET/Microsoft.Recognizers.Text.DateTime/DataStructure.cs index 87d5d3d847..db141a042c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/DataStructure.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/DataStructure.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.DateTime +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.DateTime { public enum DatePeriodTimexType { @@ -12,6 +15,11 @@ public enum DatePeriodTimexType /// ByWeek, + /// + /// Represents a fortnight Period + /// + ByFortnight, + /// /// Represents a month Period /// diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeOptions.cs b/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeOptions.cs index ed9f3acb77..0e068a7628 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeOptions.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeOptions.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; namespace Microsoft.Recognizers.Text.DateTime { @@ -30,6 +33,16 @@ public enum DateTimeOptions /// ExtendedTypes = 8, + /// + /// NoProtoCache + /// + NoProtoCache = 16, + + /// + /// TasksMode, specific functionality that changes default behaviour for business reasons. + /// + TasksMode = 1048576, // 2 ^20 + /// /// FailFast, mode that aborts extraction/tagging quickly for non-entity cases. May be removed later. /// diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeRecognizer.cs b/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeRecognizer.cs index 6f956641b7..b2dd6656ea 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeRecognizer.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeRecognizer.cs @@ -1,20 +1,32 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System; +using System.Collections.Generic; using Microsoft.Recognizers.Text.DateTime.Chinese; using Microsoft.Recognizers.Text.DateTime.Dutch; using Microsoft.Recognizers.Text.DateTime.English; using Microsoft.Recognizers.Text.DateTime.French; using Microsoft.Recognizers.Text.DateTime.German; +using Microsoft.Recognizers.Text.DateTime.Hindi; using Microsoft.Recognizers.Text.DateTime.Italian; +using Microsoft.Recognizers.Text.DateTime.Japanese; using Microsoft.Recognizers.Text.DateTime.Portuguese; using Microsoft.Recognizers.Text.DateTime.Spanish; +using Microsoft.Recognizers.Text.DateTime.Turkish; +using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime { public class DateTimeRecognizer : Recognizer { + public DateTimeRecognizer(string targetCulture, DateTimeOptions options, bool lazyInitialization, int timeoutInSeconds) + : base(targetCulture, options, lazyInitialization, timeoutInSeconds) + { + } + public DateTimeRecognizer(string targetCulture, DateTimeOptions options = DateTimeOptions.None, bool lazyInitialization = false) - : base(targetCulture, options, lazyInitialization) + : base(targetCulture, options, lazyInitialization, 0) { } @@ -24,7 +36,7 @@ public DateTimeRecognizer(string targetCulture, int options, bool lazyInitializa } public DateTimeRecognizer(DateTimeOptions options = DateTimeOptions.None, bool lazyInitialization = true) - : this(null, options, lazyInitialization) + : this(null, options, lazyInitialization, 0) { } @@ -66,9 +78,10 @@ protected override void InitializeConfiguration() RegisterModel( Culture.Chinese, options => new DateTimeModel( - new FullDateTimeParser( - new ChineseDateTimeParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Chinese, options))), - new ChineseMergedExtractorConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Chinese, options)))); + new BaseCJKMergedDateTimeParser( + new ChineseMergedParserConfiguration(new ChineseCommonDateTimeParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Chinese, options)))), + new BaseCJKMergedDateTimeExtractor( + new ChineseMergedExtractorConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Chinese, options))))); RegisterModel( Culture.Spanish, @@ -78,6 +91,14 @@ protected override void InitializeConfiguration() new BaseMergedDateTimeExtractor( new SpanishMergedExtractorConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Spanish, options))))); + RegisterModel( + Culture.SpanishMexican, + options => new DateTimeModel( + new BaseMergedDateTimeParser( + new SpanishMergedParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.SpanishMexican, options))), + new BaseMergedDateTimeExtractor( + new SpanishMergedExtractorConfiguration(new BaseDateTimeOptionsConfiguration(Culture.SpanishMexican, options))))); + RegisterModel( Culture.French, options => new DateTimeModel( @@ -110,21 +131,80 @@ protected override void InitializeConfiguration() new BaseMergedDateTimeExtractor( new ItalianMergedExtractorConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Italian, options))))); - // TODO to be uncommented when all tests for Dutch are green. - // RegisterModel( - // Culture.Dutch, - // options => new DateTimeModel( - // new BaseMergedDateTimeParser( - // new DutchMergedParserConfiguration(new BaseOptionsConfiguration(options, dmyDateFormat: true))), - // new BaseMergedDateTimeExtractor( - // new DutchMergedExtractorConfiguration(new BaseOptionsConfiguration(options, dmyDateFormat: true))))); - - // TODO to be uncommented when all tests for Japanese are green. + RegisterModel( + Culture.Turkish, + options => new DateTimeModel( + new BaseMergedDateTimeParser( + new TurkishMergedParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Turkish, options))), + new BaseMergedDateTimeExtractor( + new TurkishMergedExtractorConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Turkish, options))))); + + RegisterModel( + Culture.Hindi, + options => new DateTimeModel( + new BaseMergedDateTimeParser( + new HindiMergedParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Hindi, options))), + new BaseMergedDateTimeExtractor( + new HindiMergedExtractorConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Hindi, options))))); + + RegisterModel( + Culture.Dutch, + options => new DateTimeModel( + new BaseMergedDateTimeParser( + new DutchMergedParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Dutch, options))), + new BaseMergedDateTimeExtractor( + new DutchMergedExtractorConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Dutch, options))))); + + // TODO: to be uncommented when all tests for Swedish are green. // RegisterModel( - // Culture.Japanese, + // Culture.Swedish, // options => new DateTimeModel( - // new FullDateTimeParser(new JapaneseDateTimeParserConfiguration(options)), - // new JapaneseMergedExtractor(options))); + // new BaseMergedDateTimeParser( + // new SwedishMergedParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Swedish, options))), + // new BaseMergedDateTimeExtractor( + // new SwedishMergedExtractorConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Swedish, options))))); + + RegisterModel( + Culture.Japanese, + options => new DateTimeModel( + new BaseCJKMergedDateTimeParser( + new JapaneseMergedParserConfiguration(new JapaneseCommonDateTimeParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Japanese, options)))), + new BaseCJKMergedDateTimeExtractor( + new JapaneseMergedExtractorConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Japanese, options))))); + + // TODO to be uncommented when all tests for Arabic are green. + /*RegisterModel( + Culture.Arabic, + options => new DateTimeModel( + new BaseMergedDateTimeParser( + new ArabicMergedParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Arabic, options, dmyDateFormat: false))), + new BaseMergedDateTimeExtractor( + new ArabicMergedExtractorConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Arabic, options, dmyDateFormat: false))))); + */ + + // TODO to be uncommented when all tests for Korean are green. + /*RegisterModel( + Culture.Korean, + options => new DateTimeModel( + new BaseCJKMergedDateTimeParser( + new KoreanMergedParserConfiguration(new KoreanCommonDateTimeParserConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Korean, options)))), + new BaseCJKMergedDateTimeExtractor( + new KoreanMergedExtractorConfiguration(new BaseDateTimeOptionsConfiguration(Culture.Korean, options))))); + */ + } + + protected override List GetRelatedTypes() + { + return new List() + { + typeof(BaseDateTimeOptionsConfiguration), + typeof(BaseTimeExtractor), + typeof(BaseCJKTimeExtractor), + typeof(BaseDateTimePeriodParser), + typeof(MatchingUtil), + typeof(TimeZoneUtility), + typeof(BaseDatetimeUtilityConfiguration), + }; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeResolutionKey.cs b/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeResolutionKey.cs index f8e1bc2b57..b16f99e016 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeResolutionKey.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeResolutionKey.cs @@ -1,10 +1,14 @@ -namespace Microsoft.Recognizers.Text.DateTime +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.DateTime { public static class DateTimeResolutionKey { public const string Timex = "timex"; public const string Mod = "Mod"; public const string IsLunar = "isLunar"; + public const string Value = "value"; public const string Start = "start"; public const string End = "end"; public const string List = "list"; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateExtractorConfiguration.cs index f2d8410cf3..bc6a886cd6 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; @@ -16,100 +19,100 @@ public class DutchDateExtractorConfiguration : BaseDateTimeOptionsConfiguration, new Regex(DateTimeDefinitions.MonthRegex, RegexOptions.Singleline); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SingleWeekDayRegex = - new Regex(DateTimeDefinitions.SingleWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SingleWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex OnRegex = - new Regex(DateTimeDefinitions.OnRegex, RegexFlags); + new Regex(DateTimeDefinitions.OnRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelaxedOnRegex = - new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisRegex = - new Regex(DateTimeDefinitions.ThisRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisRegex, RegexFlags, RegexTimeOut); public static readonly Regex LastDateRegex = - new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextDateRegex = - new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDayRegex = - new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayOfMonthRegex = - new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeWeekDayRegex = - new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDate = - new Regex(DateTimeDefinitions.SpecialDate, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDate, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDayWithNumRegex = - new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex ForTheRegex = - new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags); + new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayOfMothRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex StrictRelativeRegex = - new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixArticleRegex = - new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); public static readonly Regex OfMonth = - new Regex(DateTimeDefinitions.OfMonth, RegexFlags); + new Regex(DateTimeDefinitions.OfMonth, RegexFlags, RegexTimeOut); public static readonly Regex MonthEnd = - new Regex(DateTimeDefinitions.MonthEnd, RegexFlags); + new Regex(DateTimeDefinitions.MonthEnd, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayEnd = - new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayStart = - new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeConnectorSymbolRegex = - new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags); + new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags, RegexTimeOut); public static readonly ImmutableDictionary DayOfWeek = DateTimeDefinitions.DayOfWeek.ToImmutableDictionary(); @@ -117,19 +120,33 @@ public class DutchDateExtractorConfiguration : BaseDateTimeOptionsConfiguration, public static readonly ImmutableDictionary MonthOfYear = DateTimeDefinitions.MonthOfYear.ToImmutableDictionary(); + public static readonly Regex BeforeAfterRegex = + new Regex(DateTimeDefinitions.BeforeAfterRegex, RegexFlags, RegexTimeOut); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.ImplicitDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.ImplicitDayRegex, RegexFlags, RegexTimeOut); public DutchDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Dutch.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new DutchNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Dutch.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new DutchNumberParserConfiguration(numConfig)); + DurationExtractor = new BaseDurationExtractor(new DutchDurationExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new DutchHolidayExtractorConfiguration(this)); UtilityConfiguration = new DutchDatetimeUtilityConfiguration(); ImplicitDateList = new List @@ -174,39 +191,39 @@ public DutchDateExtractorConfiguration(IDateTimeOptionsConfiguration config) } // 3-23-2017 - var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags); + var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags, RegexTimeOut); // 23-3-2015 - var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags); + var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags, RegexTimeOut); // on (Sunday,)? 1.3 - var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags); + var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags, RegexTimeOut); // on (Sunday,)? 24-12 - var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags); + var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags, RegexTimeOut); // "(Sunday,)? 7/23, 2018", year part is required - var dateRegex7L = new Regex(DateTimeDefinitions.DateExtractor7L, RegexFlags); + var dateRegex7L = new Regex(DateTimeDefinitions.DateExtractor7L, RegexFlags, RegexTimeOut); // "(Sunday,)? 7/23", year part is not required - var dateRegex7S = new Regex(DateTimeDefinitions.DateExtractor7S, RegexFlags); + var dateRegex7S = new Regex(DateTimeDefinitions.DateExtractor7S, RegexFlags, RegexTimeOut); // "(Sunday,)? 23/7, 2018", year part is required - var dateRegex9L = new Regex(DateTimeDefinitions.DateExtractor9L, RegexFlags); + var dateRegex9L = new Regex(DateTimeDefinitions.DateExtractor9L, RegexFlags, RegexTimeOut); // "(Sunday,)? 23/7", year part is not required - var dateRegex9S = new Regex(DateTimeDefinitions.DateExtractor9S, RegexFlags); + var dateRegex9S = new Regex(DateTimeDefinitions.DateExtractor9S, RegexFlags, RegexTimeOut); // (Sunday,)? 2015-12-23 - var dateRegexA = new Regex(DateTimeDefinitions.DateExtractorA, RegexFlags); + var dateRegexA = new Regex(DateTimeDefinitions.DateExtractorA, RegexFlags, RegexTimeOut); DateRegexList = new List { // (Sunday,)? April 5 or (Sunday,)? April 5, 2016 - new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags, RegexTimeOut), // (Sunday,)? 6th of April - new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags, RegexTimeOut), }; var enableDmy = DmyDateFormat || @@ -227,6 +244,8 @@ public DutchDateExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DurationExtractor { get; } + public IDateTimeExtractor HolidayExtractor { get; } + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } public IEnumerable ImplicitDateList { get; } @@ -274,5 +293,7 @@ public DutchDateExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IDateExtractorConfiguration.RangeUnitRegex => RangeUnitRegex; Regex IDateExtractorConfiguration.RangeConnectorSymbolRegex => RangeConnectorSymbolRegex; + + Regex IDateExtractorConfiguration.BeforeAfterRegex => BeforeAfterRegex; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDatePeriodExtractorConfiguration.cs index 9f87bebfea..bd5dd2aa45 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDatePeriodExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; @@ -13,161 +16,173 @@ public class DutchDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfigur { // Base regexes public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); + new Regex(DateTimeDefinitions.ComplexTillRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeConnectorRegex = - new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex IllegalYearRegex = - new Regex(BaseDateTime.IllegalYearRegex, RegexFlags); + new Regex(BaseDateTime.IllegalYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WrittenMonthRegex = - new Regex(DateTimeDefinitions.WrittenMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WrittenMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthSuffixRegex = - new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex FutureSuffixRegex = - new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags, RegexTimeOut); // composite regexes public static readonly Regex SimpleCasesRegex = - new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthFrontSimpleCasesRegex = - new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthFrontBetweenRegex = - new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags, RegexTimeOut); public static readonly Regex BetweenRegex = - new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthWithYear = - new Regex(DateTimeDefinitions.MonthWithYear, RegexFlags); + new Regex(DateTimeDefinitions.MonthWithYear, RegexFlags, RegexTimeOut); public static readonly Regex OneWordPeriodRegex = - new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumWithYear = - new Regex(DateTimeDefinitions.MonthNumWithYear, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumWithYear, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfMonthRegex = - new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfYearRegex = - new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex FollowedDateUnit = - new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithDateUnit = - new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags, RegexTimeOut); public static readonly Regex QuarterRegex = - new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags, RegexTimeOut); public static readonly Regex QuarterRegexYearFront = - new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags, RegexTimeOut); public static readonly Regex AllHalfYearRegex = - new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex SeasonRegex = - new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags); + new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags, RegexTimeOut); public static readonly Regex WhichWeekRegex = - new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags); + new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfRegex = - new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthOfRegex = - new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RestOfDateRegex = - new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterEarlyPeriodRegex = - new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekWithWeekDayRangeRegex = - new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPlusNumberRegex = - new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex DecadeWithCenturyRegex = - new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags); + new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPeriodRegex = - new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex ComplexDatePeriodRegex = - new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDecadeRegex = - new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags, RegexTimeOut); public static readonly Regex ReferenceDatePeriodRegex = - new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoRegex, RegexFlags); + new Regex(DateTimeDefinitions.AgoRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterRegex, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex CenturySuffixRegex = - new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FirstLastRegex = + new Regex(DateTimeDefinitions.FirstLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OfYearRegex = + new Regex(DateTimeDefinitions.OfYearRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + private static readonly Regex FromTokenRegex = + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex BetweenTokenRegex = + new Regex(DateTimeDefinitions.BetweenTokenRegex, RegexFlags, RegexTimeOut); + private static readonly Regex[] SimpleCasesRegexes = { // "3-5 Jan, 2018", @@ -241,10 +256,20 @@ public DutchDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration confi : base(config) { DatePointExtractor = new BaseDateExtractor(new DutchDateExtractorConfiguration(this)); - CardinalExtractor = Number.Dutch.CardinalExtractor.GetInstance(); - OrdinalExtractor = Number.Dutch.OrdinalExtractor.GetInstance(); DurationExtractor = new BaseDurationExtractor(new DutchDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new DutchNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Dutch.CardinalExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Dutch.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new DutchNumberParserConfiguration(numConfig)); } public IDateExtractor DatePointExtractor { get; } @@ -311,32 +336,36 @@ public DutchDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration confi Regex IDatePeriodExtractorConfiguration.NowRegex => NowRegex; + Regex IDatePeriodExtractorConfiguration.FirstLastRegex => FirstLastRegex; + + Regex IDatePeriodExtractorConfiguration.OfYearRegex => OfYearRegex; + bool IDatePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; string[] IDatePeriodExtractorConfiguration.DurationDateRestrictions => DateTimeDefinitions.DurationDateRestrictions; public bool GetFromTokenIndex(string text, out int index) { - index = -1; - if (text.EndsWith("from")) - { - index = text.LastIndexOf("from", StringComparison.Ordinal); - return true; - } - - return false; + index = -1; + var fromMatch = FromTokenRegex.Match(text); + if (fromMatch.Success) + { + index = fromMatch.Index; + } + + return fromMatch.Success; } public bool GetBetweenTokenIndex(string text, out int index) { - index = -1; - if (text.EndsWith("between")) - { - index = text.LastIndexOf("between", StringComparison.Ordinal); - return true; - } - - return false; + index = -1; + var betweenMatch = BetweenTokenRegex.Match(text); + if (betweenMatch.Success) + { + index = betweenMatch.Index; + } + + return betweenMatch.Success; } public bool HasConnectorToken(string text) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimeAltExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimeAltExtractorConfiguration.cs index b66d3669b8..00c816c104 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimeAltExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimeAltExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; @@ -7,22 +10,22 @@ namespace Microsoft.Recognizers.Text.DateTime.Dutch public class DutchDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeAltExtractorConfiguration { public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangePrefixRegex = - new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] RelativePrefixList = { @@ -37,10 +40,10 @@ public class DutchDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfigu private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex OrRegex = - new Regex(DateTimeDefinitions.OrRegex, RegexFlags); + new Regex(DateTimeDefinitions.OrRegex, RegexFlags, RegexTimeOut); private static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public DutchDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimeExtractorConfiguration.cs index cf208ddf9e..ded41fab7e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimeExtractorConfiguration.cs @@ -1,76 +1,91 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; using Microsoft.Recognizers.Text.DateTime.Dutch.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Dutch { public class DutchDateTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeExtractorConfiguration { public static readonly Regex PrepositionRegex = - new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfDayRegex = - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificEndOfRegex = - new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectorRegex = - new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex NumberAsTimeRegex = - new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateNumberConnectorRegex = - new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public DutchDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(numConfig); + DatePointExtractor = new BaseDateExtractor(new DutchDateExtractorConfiguration(this)); TimePointExtractor = new BaseTimeExtractor(new DutchTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new DutchDurationExtractorConfiguration(this)); UtilityConfiguration = new DutchDatetimeUtilityConfiguration(); + HolidayExtractor = new BaseHolidayExtractor(new DutchHolidayExtractorConfiguration(this)); } public IExtractor IntegerExtractor { get; } @@ -81,6 +96,8 @@ public DutchDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + public IDateTimeExtractor HolidayExtractor { get; } + Regex IDateTimeExtractorConfiguration.NowRegex => NowRegex; Regex IDateTimeExtractorConfiguration.SuffixRegex => SuffixRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimePeriodExtractorConfiguration.cs index cee7b1d6d2..ca8a8f9dc9 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimePeriodExtractorConfiguration.cs @@ -1,7 +1,12 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Dutch; +using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Dutch @@ -10,68 +15,83 @@ public class DutchDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsConf IDateTimePeriodExtractorConfiguration { public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixDayRegex = new Regex(DateTimeDefinitions.PrefixDayRegex, RegexFlags | RegexOptions.RightToLeft); public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDaysRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeNumberCombinedWithUnit = - new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex HyphenDateRegex = + new Regex(BaseDateTime.HyphenDateRegex, RegexFlags, RegexTimeOut); + // Anchors needed to correctly handle patterns when multiple TimeOfDay entities are present. public static readonly Regex PeriodTimeOfDayWithDateRegex = - new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegexWithAnchors, RegexFlags, RegexTimeOut); public static readonly Regex RelativeTimeUnitRegex = - new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RestOfDateTimeRegex = - new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); private static readonly Regex MiddlePauseRegex = - new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags); + new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags, RegexTimeOut); private static readonly Regex PeriodTimeOfDayRegex = - new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags, RegexTimeOut); private static readonly Regex PeriodSpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.PeriodSpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodSpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); private static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); private static readonly Regex TimeFollowedUnit = - new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); + + private static readonly Regex FromTokenRegex = + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex BetweenTokenRegex = + new Regex(DateTimeDefinitions.BetweenTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); private static readonly Regex[] SimpleCases = -{ + { DutchTimePeriodExtractorConfiguration.PureNumFromTo, + DutchTimePeriodExtractorConfiguration.TimeDateFromTo, DutchTimePeriodExtractorConfiguration.PureNumBetweenAnd, + DutchTimePeriodExtractorConfiguration.SpecificTimeFromTo, }; public DutchDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) @@ -79,13 +99,23 @@ public DutchDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration c { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; - CardinalExtractor = Number.Dutch.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Dutch.CardinalExtractor.GetInstance(numConfig); + SingleDateExtractor = new BaseDateExtractor(new DutchDateExtractorConfiguration(this)); SingleTimeExtractor = new BaseTimeExtractor(new DutchTimeExtractorConfiguration(this)); SingleDateTimeExtractor = new BaseDateTimeExtractor(new DutchDateTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new DutchDurationExtractorConfiguration(this)); TimePeriodExtractor = new BaseTimePeriodExtractor(new DutchTimePeriodExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new DutchTimeZoneExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new DutchHolidayExtractorConfiguration(this)); } public string TokenBeforeDate { get; } @@ -110,7 +140,9 @@ public DutchDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration c public Regex FollowedUnit => TimeFollowedUnit; - bool IDateTimePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + // CheckBothBeforeAfter normally gets its value from DateTimeDefinitions.CheckBothBeforeAfter which however for Dutch is false. + // It only needs to be true in DateTimePeriod. + bool IDateTimePeriodExtractorConfiguration.CheckBothBeforeAfter => true; Regex IDateTimePeriodExtractorConfiguration.PrefixDayRegex => PrefixDayRegex; @@ -142,6 +174,8 @@ public DutchDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration c Regex IDateTimePeriodExtractorConfiguration.AfterRegex => AfterRegex; + Regex IDateTimePeriodExtractorConfiguration.TasksmodeMealTimeofDayRegex => null; + public IExtractor CardinalExtractor { get; } public IDateTimeExtractor SingleDateExtractor { get; } @@ -154,38 +188,38 @@ public DutchDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration c public IDateTimeExtractor TimePeriodExtractor { get; } - public IDateTimeExtractor TimeZoneExtractor { get; } - + public IDateTimeExtractor TimeZoneExtractor { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + // TODO: these three methods are the same in DatePeriod, should be abstracted public bool GetFromTokenIndex(string text, out int index) { - index = -1; - if (text.EndsWith("from")) - { - index = text.LastIndexOf("from", StringComparison.Ordinal); - return true; - } - - return false; + index = -1; + var fromMatch = FromTokenRegex.Match(text); + if (fromMatch.Success) + { + index = fromMatch.Index; + } + + return fromMatch.Success; } public bool GetBetweenTokenIndex(string text, out int index) { - index = -1; - if (text.EndsWith("between")) - { - index = text.LastIndexOf("between", StringComparison.Ordinal); - return true; - } - - return false; + index = -1; + var betweenMatch = BetweenTokenRegex.Match(text); + if (betweenMatch.Success) + { + index = betweenMatch.Index; + } + + return betweenMatch.Success; } public bool HasConnectorToken(string text) { - var rangeConnetorRegex = new Regex(DateTimeDefinitions.RangeConnectorRegex); - - return rangeConnetorRegex.IsExactMatch(text, trim: true); + return RangeConnectorRegex.IsExactMatch(text, trim: true); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDurationExtractorConfiguration.cs index a40b6a37e4..dc61201ef4 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDurationExtractorConfiguration.cs @@ -1,50 +1,61 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Dutch { public class DutchDurationExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDurationExtractorConfiguration { public static readonly Regex DurationUnitRegex = - new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAndRegex = - new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationFollowedUnit = - new Regex(DateTimeDefinitions.DurationFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.DurationFollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithDurationUnit = - new Regex(DateTimeDefinitions.NumberCombinedWithDurationUnit, RegexFlags); + new Regex(DateTimeDefinitions.NumberCombinedWithDurationUnit, RegexFlags, RegexTimeOut); public static readonly Regex AnUnitRegex = - new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DuringRegex = - new Regex(DateTimeDefinitions.DuringRegex, RegexFlags); + new Regex(DateTimeDefinitions.DuringRegex, RegexFlags, RegexTimeOut); public static readonly Regex AllRegex = - new Regex(DateTimeDefinitions.AllRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllRegex, RegexFlags, RegexTimeOut); public static readonly Regex HalfRegex = - new Regex(DateTimeDefinitions.HalfRegex, RegexFlags); + new Regex(DateTimeDefinitions.HalfRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConjunctionRegex = - new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags, RegexTimeOut); public static readonly Regex InexactNumberRegex = - new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex InexactNumberUnitRegex = - new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDurationUnitRegex = - new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationConnectorRegex = - new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModPrefixRegex = + new Regex(DateTimeDefinitions.ModPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModSuffixRegex = + new Regex(DateTimeDefinitions.ModSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialNumberUnitRegex = null; @@ -59,7 +70,16 @@ public class DutchDurationExtractorConfiguration : BaseDateTimeOptionsConfigurat public DutchDurationExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - CardinalExtractor = Number.Dutch.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Dutch.NumberExtractor.GetInstance(numConfig); + UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); } @@ -103,5 +123,11 @@ public DutchDurationExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IDurationExtractorConfiguration.MoreThanRegex => MoreThanRegex; Regex IDurationExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex; + + Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex; + + public Dictionary AmbiguityFiltersDict => null; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchHolidayExtractorConfiguration.cs index a70c0a166a..6b587d0976 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchHolidayExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchHolidayExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; @@ -8,22 +11,14 @@ namespace Microsoft.Recognizers.Text.DateTime.Dutch public class DutchHolidayExtractorConfiguration : BaseDateTimeOptionsConfiguration, IHolidayExtractorConfiguration { public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); - - public static readonly Regex H1 = - new Regex(DateTimeDefinitions.HolidayRegex1, RegexFlags); - - public static readonly Regex H2 = - new Regex(DateTimeDefinitions.HolidayRegex2, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); - public static readonly Regex H3 = - new Regex(DateTimeDefinitions.HolidayRegex3, RegexFlags); + public static readonly Regex H = + new Regex(DateTimeDefinitions.HolidayRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] HolidayRegexList = { - H1, - H2, - H3, + H, }; private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchMergedExtractorConfiguration.cs index 221cc030ec..e232b703cb 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchMergedExtractorConfiguration.cs @@ -1,57 +1,67 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Dutch; using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Matcher; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Dutch { public class DutchMergedExtractorConfiguration : BaseDateTimeOptionsConfiguration, IMergedExtractorConfiguration { public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex SinceRegex = - new Regex(DateTimeDefinitions.SinceRegex, RegexFlags); + new Regex(DateTimeDefinitions.SinceRegex, RegexFlags, RegexTimeOut); public static readonly Regex AroundRegex = - new Regex(DateTimeDefinitions.AroundRegex, RegexFlags); + new Regex(DateTimeDefinitions.AroundRegex, RegexFlags, RegexTimeOut); public static readonly Regex EqualRegex = - new Regex(BaseDateTime.EqualRegex, RegexFlags); + new Regex(BaseDateTime.EqualRegex, RegexFlags, RegexTimeOut); public static readonly Regex FromToRegex = - new Regex(DateTimeDefinitions.FromToRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromToRegex, RegexFlags, RegexTimeOut); public static readonly Regex SingleAmbiguousMonthRegex = - new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrepositionSuffixRegex = - new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmbiguousRangeModifierPrefix = - new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags); + new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags, RegexTimeOut); public static readonly Regex NumberEndingPattern = - new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags); + new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificDatePeriodRegex = - new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PotentialAmbiguousRangeRegex = + new Regex(DateTimeDefinitions.PotentialAmbiguousRangeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] TermFilterRegexes = { // one on one - new Regex(DateTimeDefinitions.OneOnOneRegex, RegexFlags), + new Regex(DateTimeDefinitions.OneOnOneRegex, RegexFlags, RegexTimeOut), // (the)? (day|week|month|year) - new Regex(DateTimeDefinitions.SingleAmbiguousTermsRegex, RegexFlags), + new Regex(DateTimeDefinitions.SingleAmbiguousTermsRegex, RegexFlags, RegexTimeOut), }; public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); @@ -72,7 +82,16 @@ public DutchMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) HolidayExtractor = new BaseHolidayExtractor(new DutchHolidayExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new DutchTimeZoneExtractorConfiguration(this)); DateTimeAltExtractor = new BaseDateTimeAltExtractor(new DutchDateTimeAltExtractorConfiguration(this)); - IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(numConfig); AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); @@ -124,9 +143,9 @@ public DutchMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.PrepositionSuffixRegex => PrepositionSuffixRegex; - Regex IMergedExtractorConfiguration.AmbiguousRangeModifierPrefix => null; + Regex IMergedExtractorConfiguration.AmbiguousRangeModifierPrefix => AmbiguousRangeModifierPrefix; - Regex IMergedExtractorConfiguration.PotentialAmbiguousRangeRegex => null; + Regex IMergedExtractorConfiguration.PotentialAmbiguousRangeRegex => PotentialAmbiguousRangeRegex; Regex IMergedExtractorConfiguration.NumberEndingPattern => NumberEndingPattern; @@ -136,10 +155,17 @@ public DutchMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + public Regex FailFastRegex { get; } = null; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; StringMatcher IMergedExtractorConfiguration.SuperfluousWordMatcher => SuperfluousWordMatcher; + + bool IMergedExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public Regex TasksModeMentionFilters { get; } + } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchSetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchSetExtractorConfiguration.cs index 06743ae586..ff9399bfc6 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchSetExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchSetExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; @@ -9,28 +12,31 @@ namespace Microsoft.Recognizers.Text.DateTime.Dutch public class DutchSetExtractorConfiguration : BaseDateTimeOptionsConfiguration, ISetExtractorConfiguration { public static readonly Regex SetUnitRegex = - new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodicRegex = - new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachUnitRegex = - new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachPrefixRegex = - new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetLastRegex = - new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachDayRegex = - new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetWeekDayRegex = - new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetEachRegex = - new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BeforeEachDayRegex = + new Regex(DateTimeDefinitions.BeforeEachDayRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -60,6 +66,10 @@ public DutchSetExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DateTimePeriodExtractor { get; } + // CheckBothBeforeAfter normally gets its value from DateTimeDefinitions.CheckBothBeforeAfter which however for Dutch is false. + // It only needs to be true in SetExtractor. + bool ISetExtractorConfiguration.CheckBothBeforeAfter => true; + Regex ISetExtractorConfiguration.LastRegex => SetLastRegex; Regex ISetExtractorConfiguration.EachPrefixRegex => EachPrefixRegex; @@ -70,7 +80,7 @@ public DutchSetExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex ISetExtractorConfiguration.EachDayRegex => EachDayRegex; - Regex ISetExtractorConfiguration.BeforeEachDayRegex => null; + Regex ISetExtractorConfiguration.BeforeEachDayRegex => BeforeEachDayRegex; Regex ISetExtractorConfiguration.SetWeekDayRegex => SetWeekDayRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimeExtractorConfiguration.cs index 04c609093b..9b9e48a53a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimeExtractorConfiguration.cs @@ -1,7 +1,11 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; +using Microsoft.Recognizers.Definitions.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Dutch { @@ -10,114 +14,117 @@ public class DutchTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, // part 1: smallest component // -------------------------------------- public static readonly Regex DescRegex = - new Regex(DateTimeDefinitions.DescRegex, RegexFlags); + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); public static readonly Regex HourNumRegex = - new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex MinuteNumRegex = - new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags, RegexTimeOut); // part 2: middle level component // -------------------------------------- // handle "... o'clock" public static readonly Regex OclockRegex = - new Regex(DateTimeDefinitions.OclockRegex, RegexFlags); + new Regex(DateTimeDefinitions.OclockRegex, RegexFlags, RegexTimeOut); // handle "... afternoon" public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); // handle "... in the morning" public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); // handle "half past ..." "a quarter to ..." // rename 'min' group to 'deltamin' public static readonly Regex LessThanOneHour = - new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags); + new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags, RegexTimeOut); // handle "six thirty", "six twenty one" public static readonly Regex WrittenTimeRegex = - new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimePrefix = - new Regex(DateTimeDefinitions.TimePrefix, RegexFlags); + new Regex(DateTimeDefinitions.TimePrefix, RegexFlags, RegexTimeOut); public static readonly Regex TimeSuffix = - new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags); + new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags, RegexTimeOut); public static readonly Regex BasicTime = - new Regex(DateTimeDefinitions.BasicTime, RegexFlags); + new Regex(DateTimeDefinitions.BasicTime, RegexFlags, RegexTimeOut); // handle special time such as 'at midnight', 'midnight', 'midday' public static readonly Regex MidnightRegex = - new Regex(DateTimeDefinitions.MidnightRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidnightRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidmorningRegex = - new Regex(DateTimeDefinitions.MidmorningRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidmorningRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidafternoonRegex = - new Regex(DateTimeDefinitions.MidafternoonRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidafternoonRegex, RegexFlags, RegexTimeOut); public static readonly Regex MiddayRegex = - new Regex(DateTimeDefinitions.MiddayRegex, RegexFlags); + new Regex(DateTimeDefinitions.MiddayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidTimeRegex = - new Regex(DateTimeDefinitions.MidTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidTimeRegex, RegexFlags, RegexTimeOut); // part 3: regex for time // -------------------------------------- // handle "at four" "at 3" public static readonly Regex AtRegex = - new Regex(DateTimeDefinitions.AtRegex, RegexFlags); + new Regex(DateTimeDefinitions.AtRegex, RegexFlags, RegexTimeOut); public static readonly Regex IshRegex = - new Regex(DateTimeDefinitions.IshRegex, RegexFlags); + new Regex(DateTimeDefinitions.IshRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectNumRegex = - new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeBeforeAfterRegex = - new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] TimeRegexList = { // (three min past)? seven|7|(seven thirty) pm - new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags, RegexTimeOut), // (three min past)? 3:00(:00)? (pm)? - new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags, RegexTimeOut), // (three min past)? 3.00 (pm) - new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags, RegexTimeOut), // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) - new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags, RegexTimeOut), // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags, RegexTimeOut), // (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) - new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags, RegexTimeOut), // (in the night) at (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags, RegexTimeOut), // (in the night) (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex8, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex8, RegexFlags, RegexTimeOut), - new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags, RegexTimeOut), // (three min past)? 3h00 (pm)? - new Regex(DateTimeDefinitions.TimeRegex10, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex10, RegexFlags, RegexTimeOut), // at 2.30, "at" prefix is required here // 3.30pm, "am/pm" suffix is required here - new Regex(DateTimeDefinitions.TimeRegex11, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex11, RegexFlags, RegexTimeOut), + + // 16 from "16 vandaag" + new Regex(DateTimeDefinitions.TimeRegex12, RegexFlags, RegexTimeOut), // 340pm ConnectNumRegex, @@ -143,5 +150,9 @@ public DutchTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DurationExtractor { get; } public IDateTimeExtractor TimeZoneExtractor { get; } + + public string TimeTokenPrefix => DateTimeDefinitions.TimeTokenPrefix; + + public Dictionary AmbiguityFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityTimeFiltersDict); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimePeriodExtractorConfiguration.cs index 52dd13e689..da917dc8d3 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimePeriodExtractorConfiguration.cs @@ -1,74 +1,101 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; using Microsoft.Recognizers.Text.DateTime.Dutch.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Dutch { public class DutchTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, ITimePeriodExtractorConfiguration { public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); public static readonly Regex HourRegex = - new Regex(DateTimeDefinitions.HourRegex, RegexFlags); + new Regex(DateTimeDefinitions.HourRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodHourNumRegex = - new Regex(DateTimeDefinitions.PeriodHourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodHourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodDescRegex = - new Regex(DateTimeDefinitions.DescRegex, RegexFlags); + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); public static readonly Regex PureNumFromTo = - new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags); + new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags, RegexTimeOut); + public static readonly Regex TimeDateFromTo = + new Regex(DateTimeDefinitions.TimeDateFromTo, RegexFlags, RegexTimeOut); + public static readonly Regex PureNumBetweenAnd = - new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeFromTo = - new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeBetweenAnd = - new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex PrepositionRegex = - new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfDayRegex = - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeFollowedUnit = - new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex TimeNumberCombinedWithUnit = - new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); public static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + private static readonly Regex FromRegex = + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex BetweenRegex = + new Regex(DateTimeDefinitions.BetweenTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); + public DutchTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; SingleTimeExtractor = new BaseTimeExtractor(new DutchTimeExtractorConfiguration(this)); UtilityConfiguration = new DutchDatetimeUtilityConfiguration(); - IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(numConfig); + TimeZoneExtractor = new BaseTimeZoneExtractor(new DutchTimeZoneExtractorConfiguration(this)); } @@ -100,33 +127,53 @@ public DutchTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration confi Regex ITimePeriodExtractorConfiguration.GeneralEndingRegex => GeneralEndingRegex; - public bool GetFromTokenIndex(string text, out int index) - { - index = -1; - if (text.EndsWith("from")) - { - index = text.LastIndexOf("from", StringComparison.Ordinal); - return true; - } - - return false; + public bool GetFromTokenIndex(string text, out int index) + { + index = -1; + var fromMatch = FromRegex.Match(text); + if (fromMatch.Success) + { + index = fromMatch.Index; + } + + return fromMatch.Success; } - public bool GetBetweenTokenIndex(string text, out int index) - { - index = -1; - if (text.EndsWith("between")) - { - index = text.LastIndexOf("between", StringComparison.Ordinal); - return true; - } - - return false; + public bool GetBetweenTokenIndex(string text, out int index) + { + index = -1; + var betweenMatch = BetweenRegex.Match(text); + if (betweenMatch.Success) + { + index = betweenMatch.Index; + } + + return betweenMatch.Success; } - public bool IsConnectorToken(string text) - { - return text.Equals("and"); + public bool IsConnectorToken(string text) + { + return RangeConnectorRegex.IsExactMatch(text, trim: true); + } + + // This method is used to disambiguate extractions containing 'morgen' (that can mean both 'tomorrow' and 'morning'). + // It discards isolated occurrences of 'morgen', keeping as valid extractions only those cases + // where it is part of a bigger match (e.g. 'diensdag morgen') + public List ApplyPotentialPeriodAmbiguityHotfix(string text, List timePeriodErs) + { + { + var morgenStr = DateTimeDefinitions.MorningTermList[0]; + List timePeriodErsResult = new List(); + foreach (var timePeriodEr in timePeriodErs) + { + if (!timePeriodEr.Text.Equals(morgenStr, StringComparison.Ordinal)) + { + timePeriodErsResult.Add(timePeriodEr); + } + } + + return timePeriodErsResult; + } } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimeZoneExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimeZoneExtractorConfiguration.cs index 8d62a41b11..d98e19d4a1 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimeZoneExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimeZoneExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchCommonDateTimeParserConfiguration.cs index 5e464d0c6b..b9904697f0 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchCommonDateTimeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Dutch; using Microsoft.Recognizers.Text.DateTime.Dutch.Utilities; @@ -26,13 +29,23 @@ public DutchCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration conf WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); - CardinalExtractor = Number.Dutch.CardinalExtractor.GetInstance(); - IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Dutch.OrdinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Dutch.CardinalExtractor.GetInstance(numConfig); + IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Dutch.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new DutchNumberParserConfiguration(numConfig)); - TimeZoneParser = new BaseTimeZoneParser(); - NumberParser = new BaseNumberParser(new DutchNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + TimeZoneParser = new BaseTimeZoneParser(new DutchTimeZoneParserConfiguration(this)); DateExtractor = new BaseDateExtractor(new DutchDateExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new DutchHolidayExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new DutchTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new DutchDateTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new DutchDurationExtractorConfiguration(this)); @@ -41,6 +54,7 @@ public DutchCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration conf DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new DutchDateTimePeriodExtractorConfiguration(this)); DurationParser = new BaseDurationParser(new DutchDurationParserConfiguration(this)); DateParser = new BaseDateParser(new DutchDateParserConfiguration(this)); + HolidayTimeParser = new BaseHolidayParser(new DutchHolidayParserConfiguration(this)); TimeParser = new TimeParser(new DutchTimeParserConfiguration(this)); DateTimeParser = new BaseDateTimeParser(new DutchDateTimeParserConfiguration(this)); DatePeriodParser = new BaseDatePeriodParser(new DutchDatePeriodParserConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateParserConfiguration.cs index 40c935638b..c1f5fa64b4 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateParserConfiguration.cs @@ -1,5 +1,9 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -8,6 +12,8 @@ namespace Microsoft.Recognizers.Text.DateTime.Dutch { public class DutchDateParserConfiguration : BaseDateTimeOptionsConfiguration, IDateParserConfiguration { + private IImmutableList lastCardinalTerms = DateTimeDefinitions.LastCardinalTerms.ToImmutableList(); + public DutchDateParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -19,6 +25,7 @@ public DutchDateParserConfiguration(ICommonDateTimeParserConfiguration config) DurationExtractor = config.DurationExtractor; DateExtractor = config.DateExtractor; DurationParser = config.DurationParser; + HolidayParser = new BaseHolidayParser(new DutchHolidayParserConfiguration(this)); DateRegexes = new DutchDateExtractorConfiguration(this).DateRegexList; OnRegex = DutchDateExtractorConfiguration.OnRegex; SpecialDayRegex = DutchDateExtractorConfiguration.SpecialDayRegex; @@ -37,6 +44,7 @@ public DutchDateParserConfiguration(ICommonDateTimeParserConfiguration config) StrictRelativeRegex = DutchDateExtractorConfiguration.StrictRelativeRegex; YearSuffix = DutchDateExtractorConfiguration.YearSuffix; RelativeWeekDayRegex = DutchDateExtractorConfiguration.RelativeWeekDayRegex; + BeforeAfterRegex = DutchDateExtractorConfiguration.BeforeAfterRegex; RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexOptions.Singleline); NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexOptions.Singleline); PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexOptions.Singleline); @@ -71,6 +79,8 @@ public DutchDateParserConfiguration(ICommonDateTimeParserConfiguration config) public IDateTimeParser DurationParser { get; } + public IDateTimeParser HolidayParser { get; } + public IEnumerable DateRegexes { get; } public IImmutableDictionary UnitMap { get; } @@ -119,6 +129,10 @@ public DutchDateParserConfiguration(ICommonDateTimeParserConfiguration config) public Regex PastPrefixRegex { get; } + public Regex BeforeAfterRegex { get; } + + public Regex TasksModeDurationToDatePatterns { get; } + public IImmutableDictionary DayOfMonth { get; } public IImmutableDictionary DayOfWeek { get; } @@ -162,7 +176,7 @@ public int GetSwiftMonthOrYear(string text) public bool IsCardinalLast(string text) { var trimmedText = text.Trim(); - return trimmedText.Equals("last"); + return lastCardinalTerms.Contains(trimmedText); } public string Normalize(string text) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDatePeriodParserConfiguration.cs index 0807d24460..23a44dc43c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDatePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; @@ -10,22 +14,22 @@ namespace Microsoft.Recognizers.Text.DateTime.Dutch public class DutchDatePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDatePeriodParserConfiguration { public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterNextSuffixRegex = - new Regex(DateTimeDefinitions.AfterNextSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterNextSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeRegex = - new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRangeRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -79,6 +83,10 @@ public DutchDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration con MoreThanRegex = DutchDatePeriodExtractorConfiguration.MoreThanRegex; CenturySuffixRegex = DutchDatePeriodExtractorConfiguration.CenturySuffixRegex; NowRegex = DutchDatePeriodExtractorConfiguration.NowRegex; + FirstLastRegex = DutchDatePeriodExtractorConfiguration.FirstLastRegex; + OfYearRegex = DutchDatePeriodExtractorConfiguration.OfYearRegex; + TodayNowRegex = new Regex(DateTimeDefinitions.TodayNowRegex, RegexOptions.Singleline); + SpecialDayRegex = DutchDateExtractorConfiguration.SpecialDayRegex; UnitMap = config.UnitMap; CardinalMap = config.CardinalMap; DayOfMonth = config.DayOfMonth; @@ -200,6 +208,14 @@ public DutchDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration con public Regex NowRegex { get; } + public Regex TodayNowRegex { get; } + + public Regex SpecialDayRegex { get; } + + public Regex FirstLastRegex { get; } + + public Regex OfYearRegex { get; } + Regex ISimpleDatePeriodParserConfiguration.RelativeRegex => RelativeRegex; Regex IDatePeriodParserConfiguration.NextPrefixRegex => NextPrefixRegex; @@ -210,6 +226,8 @@ public DutchDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration con Regex IDatePeriodParserConfiguration.UnspecificEndOfRangeRegex => UnspecificEndOfRangeRegex; + Regex IDatePeriodParserConfiguration.AmbiguousPointRangeRegex => null; + bool IDatePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; public IImmutableDictionary UnitMap { get; } @@ -283,54 +301,60 @@ public int GetSwiftYear(string text) public bool IsFuture(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.FutureTerms.Any(o => trimmedText.StartsWith(o)); + return DateTimeDefinitions.FutureTerms.Any(o => trimmedText.StartsWith(o, StringComparison.Ordinal)); } public bool IsLastCardinal(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.LastCardinalTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.LastCardinalTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } public bool IsMonthOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (MonthTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); } public bool IsMonthToDate(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } public bool IsWeekend(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (WeekendTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); } public bool IsWeekOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (WeekTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); } public bool IsYearOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (YearTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)) || - (DateTimeDefinitions.GenericYearTerms.Any(o => trimmedText.EndsWith(o)) && UnspecificEndOfRangeRegex.IsMatch(trimmedText)); + (DateTimeDefinitions.GenericYearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) && + UnspecificEndOfRangeRegex.IsMatch(trimmedText)); + } + + public bool IsFortnight(string text) + { + return false; } public bool IsYearToDate(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateTimeAltParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateTimeAltParserConfiguration.cs index 953caa98cb..438554887c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateTimeAltParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateTimeAltParserConfiguration.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.DateTime.Dutch +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.DateTime.Dutch { public class DutchDateTimeAltParserConfiguration : IDateTimeAltParserConfiguration { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateTimeParserConfiguration.cs index 1dbb8149f1..6e4f506981 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateTimeParserConfiguration.cs @@ -1,20 +1,46 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; using Microsoft.Recognizers.Text.DateTime.Utilities; - +using Microsoft.Recognizers.Text.Utilities; + namespace Microsoft.Recognizers.Text.DateTime.Dutch { public class DutchDateTimeParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeParserConfiguration { public static readonly Regex AmTimeRegex = - new Regex(DateTimeDefinitions.AMTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.AMTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmTimeRegex = - new Regex(DateTimeDefinitions.PMTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.PMTimeRegex, RegexFlags, RegexTimeOut); + public static readonly Regex NightTimeRegex = + new Regex(DateTimeDefinitions.NightTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MorningTimeRegex = + new Regex(DateTimeDefinitions.MorningTimeRegex, RegexFlags, RegexTimeOut); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + private static readonly Regex NowTimeRegex = + new Regex(DateTimeDefinitions.NowTimeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex RecentlyTimeRegex = + new Regex(DateTimeDefinitions.RecentlyTimeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex AsapTimeRegex = + new Regex(DateTimeDefinitions.AsapTimeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex NextPrefixRegex = + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex PreviousPrefixRegex = + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + public DutchDateTimeParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -24,7 +50,9 @@ public DutchDateTimeParserConfiguration(ICommonDateTimeParserConfiguration confi DateExtractor = config.DateExtractor; TimeExtractor = config.TimeExtractor; DateParser = config.DateParser; - TimeParser = config.TimeParser; + TimeParser = config.TimeParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; NowRegex = DutchDateTimeExtractorConfiguration.NowRegex; @@ -99,66 +127,71 @@ public DutchDateTimeParserConfiguration(ICommonDateTimeParserConfiguration confi public IImmutableDictionary Numbers { get; } - public IDateTimeUtilityConfiguration UtilityConfiguration { get; } - - public int GetHour(string text, int hour) - { - int result = hour; - - var trimmedText = text.Trim().ToLowerInvariant(); - - if (trimmedText.EndsWith("ochtend") && hour >= Constants.HalfDayHourCount) - { - result -= Constants.HalfDayHourCount; - } - else if (!trimmedText.EndsWith("ochtend") && hour < Constants.HalfDayHourCount) - { - result += Constants.HalfDayHourCount; - } + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } - return result; + public int GetHour(string text, int hour) + { + int result = hour; + + var trimmedText = text.Trim(); + + if (MorningTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour >= Constants.HalfDayHourCount) + { + result -= Constants.HalfDayHourCount; + } + else if (!MorningTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour < Constants.HalfDayHourCount && + !(NightTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour < Constants.QuarterDayHourCount)) + { + result += Constants.HalfDayHourCount; + } + + return result; } - public bool GetMatchedNowTimex(string text, out string timex) - { - var trimmedText = text.Trim().ToLowerInvariant(); - - if (trimmedText.EndsWith("nu")) - { - timex = "PRESENT_REF"; - } - else if (trimmedText.Equals("kort geleden") || trimmedText.Equals("eerder")) - { - timex = "PAST_REF"; - } - else if (trimmedText.Equals("zo snel mogelijk") || trimmedText.Equals("zsm")) - { - timex = "FUTURE_REF"; - } - else - { - timex = null; - return false; - } - - return true; + public bool GetMatchedNowTimex(string text, out string timex) + { + var trimmedText = text.Trim(); + + if (NowTimeRegex.MatchEnd(trimmedText, trim: true).Success) + { + timex = "PRESENT_REF"; + } + else if (RecentlyTimeRegex.IsExactMatch(trimmedText, trim: true)) + { + timex = "PAST_REF"; + } + else if (AsapTimeRegex.IsExactMatch(trimmedText, trim: true)) + { + timex = "FUTURE_REF"; + } + else + { + timex = null; + return false; + } + + return true; } - public int GetSwiftDay(string text) - { - var trimmedText = text.Trim().ToLowerInvariant(); - - var swift = 0; - if (trimmedText.StartsWith("volgende")) - { - swift = 1; - } - else if (trimmedText.StartsWith("vorige") || trimmedText.StartsWith("laatste")) - { - swift = -1; - } - - return swift; + public int GetSwiftDay(string text) + { + var trimmedText = text.Trim(); + + var swift = 0; + if (NextPrefixRegex.MatchBegin(trimmedText, trim: true).Success) + { + swift = 1; + } + else if (PreviousPrefixRegex.MatchBegin(trimmedText, trim: true).Success) + { + swift = -1; + } + + return swift; } public bool ContainsAmbiguousToken(string text, string matchedText) => false; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateTimePeriodParserConfiguration.cs index 2d1307e054..269604db3c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDateTimePeriodParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; @@ -8,16 +11,19 @@ namespace Microsoft.Recognizers.Text.DateTime.Dutch public class DutchDateTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimePeriodParserConfiguration { public static readonly Regex MorningStartEndRegex = - new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfternoonStartEndRegex = - new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex EveningStartEndRegex = - new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex NightStartEndRegex = - new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodTimeOfDayWithDateRegex = + new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -25,6 +31,7 @@ public DutchDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration : base(config) { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; DateExtractor = config.DateExtractor; TimeExtractor = config.TimeExtractor; @@ -38,9 +45,12 @@ public DutchDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration TimePeriodParser = config.TimePeriodParser; DurationParser = config.DurationParser; DateTimeParser = config.DateTimeParser; - TimeZoneParser = config.TimeZoneParser; + TimeZoneParser = config.TimeZoneParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; PureNumberFromToRegex = DutchTimePeriodExtractorConfiguration.PureNumFromTo; + HyphenDateRegex = DutchDateTimePeriodExtractorConfiguration.HyphenDateRegex; PureNumberBetweenAndRegex = DutchTimePeriodExtractorConfiguration.PureNumBetweenAnd; SpecificTimeOfDayRegex = DutchDateTimeExtractorConfiguration.SpecificTimeOfDayRegex; TimeOfDayRegex = DutchDateTimeExtractorConfiguration.TimeOfDayRegex; @@ -49,7 +59,6 @@ public DutchDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration FutureSuffixRegex = DutchDatePeriodExtractorConfiguration.FutureSuffixRegex; NumberCombinedWithUnitRegex = DutchDateTimePeriodExtractorConfiguration.TimeNumberCombinedWithUnit; UnitRegex = DutchTimePeriodExtractorConfiguration.TimeUnitRegex; - PeriodTimeOfDayWithDateRegex = DutchDateTimePeriodExtractorConfiguration.PeriodTimeOfDayWithDateRegex; RelativeTimeUnitRegex = DutchDateTimePeriodExtractorConfiguration.RelativeTimeUnitRegex; RestOfDateTimeRegex = DutchDateTimePeriodExtractorConfiguration.RestOfDateTimeRegex; AmDescRegex = DutchDateTimePeriodExtractorConfiguration.AmDescRegex; @@ -64,6 +73,8 @@ public DutchDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration public string TokenBeforeDate { get; } + public string TokenBeforeTime { get; } + public IDateExtractor DateExtractor { get; } public IDateTimeExtractor TimeExtractor { get; } @@ -90,8 +101,12 @@ public DutchDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration public IDateTimeParser TimeZoneParser { get; } + Regex IDateTimePeriodParserConfiguration.TasksmodeMealTimeofDayRegex => null; + public Regex PureNumberFromToRegex { get; } + public Regex HyphenDateRegex { get; } + public Regex PureNumberBetweenAndRegex { get; } public Regex SpecificTimeOfDayRegex { get; } @@ -108,7 +123,7 @@ public DutchDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration public Regex UnitRegex { get; } - public Regex PeriodTimeOfDayWithDateRegex { get; } + Regex IDateTimePeriodParserConfiguration.PeriodTimeOfDayWithDateRegex => PeriodTimeOfDayWithDateRegex; public Regex RelativeTimeUnitRegex { get; } @@ -126,68 +141,72 @@ public DutchDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration public Regex AfterRegex { get; } - bool IDateTimePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + // CheckBothBeforeAfter normally gets its value from DateTimeDefinitions.CheckBothBeforeAfter which however for Dutch is false. + // It only needs to be true in DateTimePeriod. + bool IDateTimePeriodParserConfiguration.CheckBothBeforeAfter => true; public IImmutableDictionary UnitMap { get; } public IImmutableDictionary Numbers { get; } - public bool GetMatchedTimeRange(string text, out string timeStr, out int beginHour, out int endHour, out int endMin) + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + + public bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMin) { - var trimmedText = text.Trim().ToLowerInvariant(); + var trimmedText = text.Trim(); beginHour = 0; endHour = 0; endMin = 0; + if (MorningStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TMO"; - beginHour = 8; - endHour = Constants.HalfDayHourCount; + todSymbol = Constants.Morning; } else if (AfternoonStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TAF"; - beginHour = Constants.HalfDayHourCount; - endHour = 16; + todSymbol = Constants.Afternoon; } else if (EveningStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TEV"; - beginHour = 16; - endHour = 20; + todSymbol = Constants.Evening; } else if (NightStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TNI"; - beginHour = 20; - endHour = 23; - endMin = 59; + todSymbol = Constants.Night; } else { - timeStr = null; + todSymbol = null; return false; } + var parseResult = TimexUtility.ResolveTimeOfDay(todSymbol); + todSymbol = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMin = parseResult.EndMin; + return true; } - public int GetSwiftPrefix(string text) - { - var trimmedText = text.Trim().ToLowerInvariant(); - - var swift = 0; - if (trimmedText.StartsWith("volgende")) - { - swift = 1; - } - else if (trimmedText.StartsWith("vorige") || trimmedText.StartsWith("laatste")) - { - swift = -1; - } - - return swift; + public int GetSwiftPrefix(string text) + { + var trimmedText = text.Trim(); + + var swift = 0; + if (FutureRegex.IsMatch(trimmedText)) + { + swift = 1; + } + else if (PreviousPrefixRegex.IsMatch(trimmedText)) + { + swift = -1; + } + + return swift; } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDurationParserConfiguration.cs index 9867eb3a24..e93181d507 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDurationParserConfiguration.cs @@ -1,5 +1,9 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Dutch; using Microsoft.Recognizers.Text.Number; @@ -7,10 +11,24 @@ namespace Microsoft.Recognizers.Text.DateTime.Dutch { public class DutchDurationParserConfiguration : BaseDateTimeOptionsConfiguration, IDurationParserConfiguration { + + public static readonly Regex PrefixArticleRegex = + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public DutchDurationParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { - CardinalExtractor = config.CardinalExtractor; + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Dutch.NumberExtractor.GetInstance(numConfig); NumberParser = config.NumberParser; DurationExtractor = new BaseDurationExtractor(new DutchDurationExtractorConfiguration(this), false); NumberCombinedWithUnit = DutchDurationExtractorConfiguration.NumberCombinedWithDurationUnit; @@ -34,7 +52,7 @@ public DutchDurationParserConfiguration(ICommonDateTimeParserConfiguration confi public IExtractor CardinalExtractor { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public IParser NumberParser { get; } @@ -42,6 +60,8 @@ public DutchDurationParserConfiguration(ICommonDateTimeParserConfiguration confi public Regex AnUnitRegex { get; } + Regex IDurationParserConfiguration.PrefixArticleRegex => PrefixArticleRegex; + public Regex DuringRegex { get; } public Regex AllDateUnitRegex { get; } @@ -62,6 +82,8 @@ public DutchDurationParserConfiguration(ICommonDateTimeParserConfiguration confi public Regex SpecialNumberUnitRegex { get; } + bool IDurationParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public IImmutableDictionary UnitMap { get; } public IImmutableDictionary UnitValueMap { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchHolidayParserConfiguration.cs index 8bda11315a..ce1f5188ba 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchHolidayParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchHolidayParserConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -14,9 +17,9 @@ public class DutchHolidayParserConfiguration : BaseHolidayParserConfiguration public DutchHolidayParserConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - ThisPrefixRegex = new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); - NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); - PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + ThisPrefixRegex = new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); this.HolidayRegexList = DutchHolidayExtractorConfiguration.HolidayRegexList; this.HolidayNames = DateTimeDefinitions.HolidayNames.ToImmutableDictionary(); } @@ -51,7 +54,8 @@ public override string SanitizeHolidayToken(string holiday) { return holiday .Replace(" ", string.Empty) - .Replace("'", string.Empty); + .Replace("'", string.Empty) + .Replace("-", string.Empty); } protected override IDictionary> InitHolidayFuncs() @@ -66,8 +70,7 @@ protected override IDictionary> InitHolidayFuncs() { "youthday", YouthDay }, { "childrenday", ChildrenDay }, { "femaleday", FemaleDay }, - { "treeplantingday", TreePlantDay }, - { "arborday", TreePlantDay }, + { "treeplantingday", DutchTreePlantDay }, { "girlsday", GirlsDay }, { "whiteloverday", WhiteLoverDay }, { "loverday", ValentinesDay }, @@ -97,6 +100,8 @@ protected override IDictionary> InitHolidayFuncs() { "newyeareve", NewYearEve }, { "oudejaarsavond", NewYearEve }, { "easterday", EasterDay }, + { "eastermonday", EasterMonday }, + { "goodfriday", GoodFriday }, { "kingsday", KingsDay }, { "queensday", QueensDay }, { "prinsjesdag", Prinsjesdag }, @@ -105,6 +110,18 @@ protected override IDictionary> InitHolidayFuncs() { "bevrijdingsdag", Bevrijdingsdag }, { "dodenherdenking", Dodenherdenking }, { "dagvandearbeid", Dagvandearbeid }, + { "ascensionday", AscensionDay }, + { "whitesunday", WhiteSunday }, + { "sinterklaas", Sinterklaas }, + { "stmartinsday", StMartinsDay }, + { "driekoningen", Driekoningen }, + { "ketikoti", KetiKoti }, + { "ramadan", Ramadan }, + { "sacrifice", Sacrifice }, + { "eidalfitr", EidAlFitr }, + { "islamicnewyear", IslamicNewYear }, + { "earthday", EarthDay }, + { "juneteenth", Juneteenth }, }; } @@ -116,6 +133,8 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject SecondChristmasDay(int year) => new DateObject(year, 12, 26); + private static DateObject StMartinsDay(int year) => new DateObject(year, 11, 11); + private static DateObject ChristmasEve(int year) => new DateObject(year, 12, 24); private static DateObject ValentinesDay(int year) => new DateObject(year, 2, 14); @@ -126,7 +145,7 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject GirlsDay(int year) => new DateObject(year, 3, 7); - private static DateObject TreePlantDay(int year) => new DateObject(year, 3, 12); + private static DateObject DutchTreePlantDay(int year) => DateObject.MinValue.SafeCreateFromValue(year, 3, GetDay(year, 3, 2, DayOfWeek.Wednesday)); private static DateObject FemaleDay(int year) => new DateObject(year, 3, 8); @@ -166,9 +185,11 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject GuyFawkesDay(int year) => new DateObject(year, 11, 5); - private static DateObject Veteransday(int year) => new DateObject(year, 11, 11); + private static DateObject GoodFriday(int year) => EasterDay(year).AddDays(-2); - private static DateObject EasterDay(int year) => DateObject.MinValue; + private static DateObject EasterDay(int year) => HolidayFunctions.CalculateHolidayByEaster(year); + + private static DateObject EasterMonday(int year) => EasterDay(year).AddDays(1); private static DateObject KingsDay(int year) => new DateObject(year, 4, 27); @@ -180,10 +201,32 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject Bevrijdingsdag(int year) => new DateObject(year, 5, 5); + private static DateObject Sinterklaas(int year) => new DateObject(year, 12, 5); + private static DateObject DutchTeachersDay(int year) => new DateObject(year, 10, 5); private static DateObject DutchVeteransday(int year) => DateObject.MinValue.SafeCreateFromValue(year, 6, GetLastDay(year, 6, DayOfWeek.Saturday)); private static DateObject Dagvandearbeid(int year) => new DateObject(year, 5, 1); + + private static DateObject AscensionDay(int year) => EasterDay(year).AddDays(39); + + private static DateObject WhiteSunday(int year) => EasterDay(year).AddDays(49); + + private static DateObject Driekoningen(int year) => new DateObject(year, 1, 6); + + private static DateObject KetiKoti(int year) => new DateObject(year, 7, 1); + + private static DateObject EarthDay(int year) => new DateObject(year, 4, 22); + + private static DateObject Juneteenth(int year) => new DateObject(year, 6, 19); + + private static DateObject Ramadan(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.Ramadan); + + private static DateObject Sacrifice(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.Sacrifice); + + private static DateObject EidAlFitr(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.EidAlFitr); + + private static DateObject IslamicNewYear(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.NewYear); } -} +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchMergedParserConfiguration.cs index 1a75cac3ef..7e966d2681 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchMergedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchMergedParserConfiguration.cs @@ -1,5 +1,8 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Dutch; using Microsoft.Recognizers.Text.Matcher; namespace Microsoft.Recognizers.Text.DateTime.Dutch @@ -23,7 +26,7 @@ public DutchMergedParserConfiguration(IDateTimeOptionsConfiguration config) DateTimePeriodParser = new BaseDateTimePeriodParser(new DutchDateTimePeriodParserConfiguration(this)); SetParser = new BaseSetParser(new DutchSetParserConfiguration(this)); HolidayParser = new BaseHolidayParser(new DutchHolidayParserConfiguration(this)); - TimeZoneParser = new BaseTimeZoneParser(); + TimeZoneParser = new BaseTimeZoneParser(new DutchTimeZoneParserConfiguration(this)); } public Regex BeforeRegex { get; } @@ -45,5 +48,7 @@ public DutchMergedParserConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeParser HolidayParser { get; } public StringMatcher SuperfluousWordMatcher { get; } + + bool IMergedParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchSetParserConfiguration.cs index c30d653f16..ae99f343f0 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchSetParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchSetParserConfiguration.cs @@ -1,11 +1,46 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Dutch; using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Dutch { public class DutchSetParserConfiguration : BaseDateTimeOptionsConfiguration, ISetParserConfiguration { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex DayTypeRegex = + new Regex(DateTimeDefinitions.DayTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex WeekTypeRegex = + new Regex(DateTimeDefinitions.WeekTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex BiWeekTypeRegex = + new Regex(DateTimeDefinitions.BiWeekTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex MonthTypeRegex = + new Regex(DateTimeDefinitions.MonthTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex QuarterTypeRegex = + new Regex(DateTimeDefinitions.QuarterTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex YearTypeRegex = + new Regex(DateTimeDefinitions.YearTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex SemiYearTypeRegex = + new Regex(DateTimeDefinitions.SemiYearTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex WeekendTypeRegex = + new Regex(DateTimeDefinitions.WeekendTypeRegex, RegexFlags, RegexTimeOut); + + // pass FutureTerms as List to ReplaceValueInTextWithFutTerm function + private static readonly List ThisTerms = (List)DateTimeDefinitions.FutureTerms; + public DutchSetParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -78,54 +113,39 @@ public DutchSetParserConfiguration(ICommonDateTimeParserConfiguration config) public bool GetMatchedDailyTimex(string text, out string timex) { - var trimmedText = text.Trim().ToLowerInvariant(); - if (trimmedText.Equals("dagelijks")) + var trimmedText = text.Trim(); + + if (DayTypeRegex.IsMatch(trimmedText)) { timex = "P1D"; } - else if (trimmedText.Equals("wekelijks")) + else if (WeekTypeRegex.IsMatch(trimmedText)) { timex = "P1W"; } - else if (trimmedText.Equals("tweewekelijks")) + else if (BiWeekTypeRegex.IsMatch(trimmedText)) { timex = "P2W"; } - else if (trimmedText.Equals("maandelijks")) + else if (MonthTypeRegex.IsMatch(trimmedText)) { timex = "P1M"; } - else if (trimmedText.Equals("elk jaar") || trimmedText.Equals("jaarlijks")) + else if (YearTypeRegex.IsMatch(trimmedText)) { timex = "P1Y"; } - else + else if (SemiYearTypeRegex.IsMatch(trimmedText)) { - timex = null; - return false; + timex = "P0.5Y"; } - - return true; - } - - public bool GetMatchedUnitTimex(string text, out string timex) - { - var trimmedText = text.Trim().ToLowerInvariant(); - if (trimmedText.Equals("dag")) + else if (QuarterTypeRegex.IsMatch(trimmedText)) { - timex = "P1D"; - } - else if (trimmedText.Equals("week")) + timex = "P3M"; + } + else if (WeekendTypeRegex.IsMatch(trimmedText)) { - timex = "P1W"; - } - else if (trimmedText.Equals("maand")) - { - timex = "P1M"; - } - else if (trimmedText.Equals("jaar")) - { - timex = "P1Y"; + timex = "XXXX-WXX-WE"; } else { @@ -134,8 +154,15 @@ public bool GetMatchedUnitTimex(string text, out string timex) } return true; + } + + public bool GetMatchedUnitTimex(string text, out string timex) + { + return GetMatchedDailyTimex(text, out timex); } public string WeekDayGroupMatchString(Match match) => SetHandler.WeekDayGroupMatchString(match); + + public string ReplaceValueInTextWithFutTerm(string text, string value) => TasksModeSetHandler.ReplaceValueInTextWithFutTerm(text, value, ThisTerms); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchTimeParserConfiguration.cs index b301eb3bfc..a939544051 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchTimeParserConfiguration.cs @@ -1,5 +1,9 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; @@ -14,13 +18,31 @@ public class DutchTimeParserConfiguration : BaseDateTimeOptionsConfiguration, IT private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex TimeSuffixFull = - new Regex(DateTimeDefinitions.TimeSuffixFull, RegexFlags); + new Regex(DateTimeDefinitions.TimeSuffixFull, RegexFlags, RegexTimeOut); private static readonly Regex LunchRegex = - new Regex(DateTimeDefinitions.LunchRegex, RegexFlags); + new Regex(DateTimeDefinitions.LunchRegex, RegexFlags, RegexTimeOut); private static readonly Regex NightRegex = - new Regex(DateTimeDefinitions.NightRegex, RegexFlags); + new Regex(DateTimeDefinitions.NightRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex HalfTokenRegex = + new Regex(DateTimeDefinitions.HalfTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex QuarterTokenRegex = + new Regex(DateTimeDefinitions.QuarterTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ThreeQuarterTokenRegex = + new Regex(DateTimeDefinitions.ThreeQuarterTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ToTokenRegex = + new Regex(DateTimeDefinitions.ToTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ToHalfTokenRegex = + new Regex(DateTimeDefinitions.ToHalfTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ForHalfTokenRegex = + new Regex(DateTimeDefinitions.ForHalfTokenRegex, RegexFlags, RegexTimeOut); public DutchTimeParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) @@ -51,27 +73,27 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha { int deltaMin; - var trimedPrefix = prefix.Trim().ToLowerInvariant(); + var trimmedPrefix = prefix.Trim(); - if (trimedPrefix.StartsWith("half")) + if (HalfTokenRegex.IsMatch(trimmedPrefix)) { - deltaMin = 30; + deltaMin = -30; } - else if (trimedPrefix.StartsWith("a quarter") || trimedPrefix.StartsWith("quarter")) + else if (QuarterTokenRegex.IsMatch(trimmedPrefix)) { deltaMin = 15; } - else if (trimedPrefix.StartsWith("three quarter")) + else if (ThreeQuarterTokenRegex.IsMatch(trimmedPrefix)) { deltaMin = 45; } else { - var match = DutchTimeExtractorConfiguration.LessThanOneHour.Match(trimedPrefix); + var match = DutchTimeExtractorConfiguration.LessThanOneHour.Match(trimmedPrefix); var minStr = match.Groups["deltamin"].Value; if (!string.IsNullOrWhiteSpace(minStr)) { - deltaMin = int.Parse(minStr); + deltaMin = int.Parse(minStr, CultureInfo.InvariantCulture); } else { @@ -80,9 +102,17 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha } } - if (trimedPrefix.EndsWith("to")) - { - deltaMin = -deltaMin; + if (ToHalfTokenRegex.IsMatch(trimmedPrefix)) + { + deltaMin = deltaMin - 30; + } + else if (ForHalfTokenRegex.IsMatch(trimmedPrefix)) + { + deltaMin = -deltaMin - 30; + } + else if (ToTokenRegex.IsMatch(trimmedPrefix)) + { + deltaMin = -deltaMin; } min += deltaMin; @@ -97,13 +127,14 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha public void AdjustBySuffix(string suffix, ref int hour, ref int min, ref bool hasMin, ref bool hasAm, ref bool hasPm) { - var lowerSuffix = suffix.ToLowerInvariant(); + var deltaHour = 0; - var match = TimeSuffixFull.MatchExact(lowerSuffix, trim: true); + var match = TimeSuffixFull.MatchExact(suffix, trim: true); if (match.Success) { var oclockStr = match.Groups["oclock"].Value; + if (string.IsNullOrEmpty(oclockStr)) { var stringAm = match.Groups[Constants.AmGroupName].Value; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchTimePeriodParserConfiguration.cs index c86430ec9e..6cb18a17e4 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchTimePeriodParserConfiguration.cs @@ -1,10 +1,13 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; using Microsoft.Recognizers.Text.DateTime.Utilities; -using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Dutch { @@ -54,10 +57,10 @@ public DutchTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration con public IDateTimeUtilityConfiguration UtilityConfiguration { get; } - public bool GetMatchedTimexRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) + public bool GetMatchedTimeRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) { - var trimmedText = text.Trim().ToLowerInvariant(); - if (trimmedText.EndsWith("s")) + var trimmedText = text.Trim(); + if (trimmedText.EndsWith("s", StringComparison.Ordinal)) { trimmedText = trimmedText.Substring(0, trimmedText.Length - 1); } @@ -67,23 +70,24 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou endMin = 0; var timeOfDay = string.Empty; - if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o))) + + if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Morning; } - else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Afternoon; } - else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Evening; } - else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o))) + else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o, StringComparison.Ordinal))) { timeOfDay = Constants.Daytime; } - else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Night; } @@ -91,13 +95,29 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou { timeOfDay = Constants.BusinessHour; } + else if (DateTimeDefinitions.BusinessHourSplitStrings2.All(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.BusinessHour; + } + else if (DateTimeDefinitions.MealtimeBreakfastTermList.Any(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.MealtimeBreakfast; + } + else if (DateTimeDefinitions.MealtimeLunchTermList.Any(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.MealtimeLunch; + } + else if (DateTimeDefinitions.MealtimeDinnerTermList.Any(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.MealtimeDinner; + } else { timex = null; return false; } - var parseResult = TimexUtility.ParseTimeOfDay(timeOfDay); + var parseResult = TimexUtility.ResolveTimeOfDay(timeOfDay); timex = parseResult.Timex; beginHour = parseResult.BeginHour; endHour = parseResult.EndHour; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchTimeZoneParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchTimeZoneParserConfiguration.cs new file mode 100644 index 0000000000..89251c94ca --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchTimeZoneParserConfiguration.cs @@ -0,0 +1,33 @@ +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Dutch; + +namespace Microsoft.Recognizers.Text.DateTime.Dutch +{ + public class DutchTimeZoneParserConfiguration : BaseDateTimeOptionsConfiguration, ITimeZoneParserConfiguration + { + public static readonly string TimeZoneEndRegex = TimeZoneDefinitions.TimeZoneEndRegex; + + public static readonly Dictionary FullToMinMapping = TimeZoneDefinitions.FullToMinMapping; + + public static readonly Regex DirectUtcRegex = + new Regex(TimeZoneDefinitions.DirectUtcRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline); + + public static readonly Dictionary AbbrToMinMapping = TimeZoneDefinitions.AbbrToMinMapping; + + public DutchTimeZoneParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + } + + string ITimeZoneParserConfiguration.TimeZoneEndRegex => TimeZoneEndRegex; + + Dictionary ITimeZoneParserConfiguration.FullToMinMapping => FullToMinMapping; + + Regex ITimeZoneParserConfiguration.DirectUtcRegex => DirectUtcRegex; + + Dictionary ITimeZoneParserConfiguration.AbbrToMinMapping => AbbrToMinMapping; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/TimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/TimeParser.cs index c1474decfb..4b6f8966e7 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/TimeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/TimeParser.cs @@ -1,4 +1,10 @@ -using Microsoft.Recognizers.Text.Utilities; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +using Microsoft.Recognizers.Text.Utilities; + using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Dutch @@ -35,10 +41,10 @@ private DateTimeResolutionResult ParseIsh(string text, DateObject referenceTime) var hour = Constants.HalfDayHourCount; if (!string.IsNullOrEmpty(hourStr)) { - hour = int.Parse(hourStr); + hour = int.Parse(hourStr, CultureInfo.InvariantCulture); } - ret.Timex = "T" + hour.ToString("D2"); + ret.Timex = "T" + hour.ToString("D2", CultureInfo.InvariantCulture); ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day, hour, 0, 0); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Utilities/DutchDatetimeUtilityConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Utilities/DutchDatetimeUtilityConfiguration.cs index c58cf20504..afe4957bda 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Utilities/DutchDatetimeUtilityConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Utilities/DutchDatetimeUtilityConfiguration.cs @@ -1,73 +1,32 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Dutch.Utilities { - public class DutchDatetimeUtilityConfiguration : IDateTimeUtilityConfiguration + public class DutchDatetimeUtilityConfiguration : BaseDatetimeUtilityConfiguration { - public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoRegex, RegexFlags); - - public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); - - public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); - - public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); - - public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); - - public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); - - public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); - - public static readonly Regex AmPmDescRegex = - new Regex(DateTimeDefinitions.AmPmDescRegex, RegexFlags); - - public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); - - public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); - - public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); - - public static readonly Regex CommonDatePrefixRegex = - new Regex(DateTimeDefinitions.CommonDatePrefixRegex, RegexFlags); - - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - - Regex IDateTimeUtilityConfiguration.LaterRegex => LaterRegex; - - Regex IDateTimeUtilityConfiguration.AgoRegex => AgoRegex; - - Regex IDateTimeUtilityConfiguration.InConnectorRegex => InConnectorRegex; - - Regex IDateTimeUtilityConfiguration.SinceYearSuffixRegex => SinceYearSuffixRegex; - - Regex IDateTimeUtilityConfiguration.WithinNextPrefixRegex => WithinNextPrefixRegex; - - Regex IDateTimeUtilityConfiguration.AmDescRegex => AmDescRegex; - - Regex IDateTimeUtilityConfiguration.PmDescRegex => PmDescRegex; - - Regex IDateTimeUtilityConfiguration.AmPmDescRegex => AmPmDescRegex; - - Regex IDateTimeUtilityConfiguration.RangeUnitRegex => RangeUnitRegex; - - Regex IDateTimeUtilityConfiguration.TimeUnitRegex => TimeUnitRegex; - - Regex IDateTimeUtilityConfiguration.DateUnitRegex => DateUnitRegex; - - Regex IDateTimeUtilityConfiguration.CommonDatePrefixRegex => CommonDatePrefixRegex; - - bool IDateTimeUtilityConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public DutchDatetimeUtilityConfiguration() + : base( + DateTimeDefinitions.AgoRegex, + DateTimeDefinitions.LaterRegex, + DateTimeDefinitions.InConnectorRegex, + DateTimeDefinitions.SinceYearSuffixRegex, + DateTimeDefinitions.WithinNextPrefixRegex, + DateTimeDefinitions.AmDescRegex, + DateTimeDefinitions.PmDescRegex, + DateTimeDefinitions.AmPmDescRegex, + DateTimeDefinitions.RangeUnitRegex, + DateTimeDefinitions.TimeUnitRegex, + DateTimeDefinitions.DateUnitRegex, + DateTimeDefinitions.CommonDatePrefixRegex, + DateTimeDefinitions.RangePrefixRegex, + RegexOptions.Singleline | RegexOptions.ExplicitCapture, + DateTimeDefinitions.CheckBothBeforeAfter) + { + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateExtractorConfiguration.cs index 5bb3865bb9..15a786daf7 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateExtractorConfiguration.cs @@ -14,103 +14,106 @@ public class EnglishDateExtractorConfiguration : BaseDateTimeOptionsConfiguratio { public static readonly Regex MonthRegex = - new Regex(DateTimeDefinitions.MonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SingleWeekDayRegex = - new Regex(DateTimeDefinitions.SingleWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SingleWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex OnRegex = - new Regex(DateTimeDefinitions.OnRegex, RegexFlags); + new Regex(DateTimeDefinitions.OnRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelaxedOnRegex = - new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisRegex = - new Regex(DateTimeDefinitions.ThisRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisRegex, RegexFlags, RegexTimeOut); public static readonly Regex LastDateRegex = - new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextDateRegex = - new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDayRegex = - new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayOfMonthRegex = - new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeWeekDayRegex = - new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDate = - new Regex(DateTimeDefinitions.SpecialDate, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDate, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDayWithNumRegex = - new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex ForTheRegex = - new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags); + new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayOfMothRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex StrictRelativeRegex = - new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixArticleRegex = - new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); public static readonly Regex OfMonth = - new Regex(DateTimeDefinitions.OfMonth, RegexFlags); + new Regex(DateTimeDefinitions.OfMonth, RegexFlags, RegexTimeOut); public static readonly Regex MonthEnd = - new Regex(DateTimeDefinitions.MonthEnd, RegexFlags); + new Regex(DateTimeDefinitions.MonthEnd, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayEnd = - new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayStart = - new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeConnectorSymbolRegex = - new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags); + new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BeforeAfterRegex = + new Regex(DateTimeDefinitions.BeforeAfterRegex, RegexFlags, RegexTimeOut); public static readonly ImmutableDictionary DayOfWeek = DateTimeDefinitions.DayOfWeek.ToImmutableDictionary(); @@ -121,16 +124,30 @@ public class EnglishDateExtractorConfiguration : BaseDateTimeOptionsConfiguratio private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.ImplicitDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.ImplicitDayRegex, RegexFlags, RegexTimeOut); + + // convert duration to date under tasksmode: next year -->date; + private static readonly Regex TasksModeDurationToDatePatterns = + new Regex(DateTimeDefinitions.TasksModeDurationToDatePatterns, RegexFlags, RegexTimeOut); public EnglishDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.English.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new EnglishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.English.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.English.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new EnglishNumberParserConfiguration(numConfig)); DurationExtractor = new BaseDurationExtractor(new EnglishDurationExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new EnglishHolidayExtractorConfiguration(this)); UtilityConfiguration = new EnglishDatetimeUtilityConfiguration(); ImplicitDateList = new List @@ -174,40 +191,45 @@ public EnglishDateExtractorConfiguration(IDateTimeOptionsConfiguration config) ImplicitDateList = ImplicitDateList.Concat(new[] { DayRegex }); } + if ((Options & DateTimeOptions.TasksMode) != 0) + { + ImplicitDateList = ImplicitDateList.Concat(new[] { TasksModeDurationToDatePatterns }); + } + // 3-23-2017 - var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags); + var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags, RegexTimeOut); // 23-3-2015 - var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags); + var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags, RegexTimeOut); // on (Sunday,)? 1.3 - var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags); + var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags, RegexTimeOut); // on (Sunday,)? 24-12 - var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags); + var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags, RegexTimeOut); // "(Sunday,)? 7/23, 2018", year part is required - var dateRegex7L = new Regex(DateTimeDefinitions.DateExtractor7L, RegexFlags); + var dateRegex7L = new Regex(DateTimeDefinitions.DateExtractor7L, RegexFlags, RegexTimeOut); // "(Sunday,)? 7/23", year part is not required - var dateRegex7S = new Regex(DateTimeDefinitions.DateExtractor7S, RegexFlags); + var dateRegex7S = new Regex(DateTimeDefinitions.DateExtractor7S, RegexFlags, RegexTimeOut); // "(Sunday,)? 23/7, 2018", year part is required - var dateRegex9L = new Regex(DateTimeDefinitions.DateExtractor9L, RegexFlags); + var dateRegex9L = new Regex(DateTimeDefinitions.DateExtractor9L, RegexFlags, RegexTimeOut); // "(Sunday,)? 23/7", year part is not required - var dateRegex9S = new Regex(DateTimeDefinitions.DateExtractor9S, RegexFlags); + var dateRegex9S = new Regex(DateTimeDefinitions.DateExtractor9S, RegexFlags, RegexTimeOut); // (Sunday,)? 2015-12-23 - var dateRegexA = new Regex(DateTimeDefinitions.DateExtractorA, RegexFlags); + var dateRegexA = new Regex(DateTimeDefinitions.DateExtractorA, RegexFlags, RegexTimeOut); DateRegexList = new List { // (Sunday,)? April 5 or (Sunday,)? April 5, 2016 - new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags, RegexTimeOut), // (Sunday,)? 6th of April - new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags, RegexTimeOut), }; var enableDmy = DmyDateFormat || @@ -228,6 +250,8 @@ public EnglishDateExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DurationExtractor { get; } + public IDateTimeExtractor HolidayExtractor { get; } + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } public IEnumerable ImplicitDateList { get; } @@ -275,5 +299,7 @@ public EnglishDateExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IDateExtractorConfiguration.RangeUnitRegex => RangeUnitRegex; Regex IDateExtractorConfiguration.RangeConnectorSymbolRegex => RangeConnectorSymbolRegex; + + Regex IDateExtractorConfiguration.BeforeAfterRegex => BeforeAfterRegex; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDatePeriodExtractorConfiguration.cs index f481beaa2b..7816834d7b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDatePeriodExtractorConfiguration.cs @@ -13,161 +13,179 @@ public class EnglishDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfig { // Base regexes public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeConnectorRegex = - new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex IllegalYearRegex = - new Regex(BaseDateTime.IllegalYearRegex, RegexFlags); + new Regex(BaseDateTime.IllegalYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WrittenMonthRegex = - new Regex(DateTimeDefinitions.WrittenMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WrittenMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthSuffixRegex = - new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex FutureSuffixRegex = - new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); // composite regexes public static readonly Regex SimpleCasesRegex = - new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthFrontSimpleCasesRegex = - new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthFrontBetweenRegex = - new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags, RegexTimeOut); public static readonly Regex BetweenRegex = - new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthWithYear = - new Regex(DateTimeDefinitions.MonthWithYear, RegexFlags); + new Regex(DateTimeDefinitions.MonthWithYear, RegexFlags, RegexTimeOut); public static readonly Regex OneWordPeriodRegex = - new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumWithYear = - new Regex(DateTimeDefinitions.MonthNumWithYear, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumWithYear, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfMonthRegex = - new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfYearRegex = - new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex FollowedDateUnit = - new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithDateUnit = - new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags, RegexTimeOut); public static readonly Regex QuarterRegex = - new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags, RegexTimeOut); public static readonly Regex QuarterRegexYearFront = - new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags, RegexTimeOut); public static readonly Regex AllHalfYearRegex = - new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex SeasonRegex = - new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags); + new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags, RegexTimeOut); public static readonly Regex WhichWeekRegex = - new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags); + new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfRegex = - new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthOfRegex = - new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ForPrefixRegex = + new Regex(DateTimeDefinitions.ForPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RestOfDateRegex = - new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterEarlyPeriodRegex = - new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekWithWeekDayRangeRegex = - new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPlusNumberRegex = - new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex DecadeWithCenturyRegex = - new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags); + new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPeriodRegex = - new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex ComplexDatePeriodRegex = - new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDecadeRegex = - new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags, RegexTimeOut); public static readonly Regex ReferenceDatePeriodRegex = - new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoRegex, RegexFlags); + new Regex(DateTimeDefinitions.AgoRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterRegex, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex CenturySuffixRegex = - new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FirstLastRegex = + new Regex(DateTimeDefinitions.FirstLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OfYearRegex = + new Regex(DateTimeDefinitions.OfYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex StartingRegex = + new Regex(DateTimeDefinitions.StartingRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + private static readonly Regex FromTokenRegex = + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex BetweenTokenRegex = + new Regex(DateTimeDefinitions.BetweenTokenRegex, RegexFlags, RegexTimeOut); + private static readonly Regex[] SimpleCasesRegexes = { // "3-5 Jan, 2018", @@ -241,10 +259,21 @@ public EnglishDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con : base(config) { DatePointExtractor = new BaseDateExtractor(new EnglishDateExtractorConfiguration(this)); - CardinalExtractor = Number.English.CardinalExtractor.GetInstance(); - OrdinalExtractor = Number.English.OrdinalExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.English.CardinalExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.English.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new EnglishNumberParserConfiguration(numConfig)); + DurationExtractor = new BaseDurationExtractor(new EnglishDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new EnglishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); } public IDateExtractor DatePointExtractor { get; } @@ -311,6 +340,10 @@ public EnglishDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con Regex IDatePeriodExtractorConfiguration.NowRegex => NowRegex; + Regex IDatePeriodExtractorConfiguration.FirstLastRegex => FirstLastRegex; + + Regex IDatePeriodExtractorConfiguration.OfYearRegex => OfYearRegex; + bool IDatePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; string[] IDatePeriodExtractorConfiguration.DurationDateRestrictions => DateTimeDefinitions.DurationDateRestrictions; @@ -318,25 +351,25 @@ public EnglishDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con public bool GetFromTokenIndex(string text, out int index) { index = -1; - if (text.EndsWith("from")) + var fromMatch = FromTokenRegex.Match(text); + if (fromMatch.Success) { - index = text.LastIndexOf("from", StringComparison.Ordinal); - return true; + index = fromMatch.Index; } - return false; + return fromMatch.Success; } public bool GetBetweenTokenIndex(string text, out int index) { index = -1; - if (text.EndsWith("between")) + var betweenMatch = BetweenTokenRegex.Match(text); + if (betweenMatch.Success) { - index = text.LastIndexOf("between", StringComparison.Ordinal); - return true; + index = betweenMatch.Index; } - return false; + return betweenMatch.Success; } public bool HasConnectorToken(string text) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimeAltExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimeAltExtractorConfiguration.cs index 899358f88f..dfaffefdd9 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimeAltExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimeAltExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; @@ -7,22 +10,22 @@ namespace Microsoft.Recognizers.Text.DateTime.English public class EnglishDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeAltExtractorConfiguration { public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangePrefixRegex = - new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] RelativePrefixList = { @@ -37,10 +40,10 @@ public class EnglishDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfi private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex OrRegex = - new Regex(DateTimeDefinitions.OrRegex, RegexFlags); + new Regex(DateTimeDefinitions.OrRegex, RegexFlags, RegexTimeOut); private static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public EnglishDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimeExtractorConfiguration.cs index 573d7b7636..2f46b55d69 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimeExtractorConfiguration.cs @@ -1,76 +1,91 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; using Microsoft.Recognizers.Text.DateTime.English.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.English { public class EnglishDateTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeExtractorConfiguration { public static readonly Regex PrepositionRegex = - new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfDayRegex = - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificEndOfRegex = - new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectorRegex = - new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex NumberAsTimeRegex = - new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateNumberConnectorRegex = - new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public EnglishDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.English.IntegerExtractor.GetInstance(numConfig); + DatePointExtractor = new BaseDateExtractor(new EnglishDateExtractorConfiguration(this)); TimePointExtractor = new BaseTimeExtractor(new EnglishTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new EnglishDurationExtractorConfiguration(this)); UtilityConfiguration = new EnglishDatetimeUtilityConfiguration(); + HolidayExtractor = new BaseHolidayExtractor(new EnglishHolidayExtractorConfiguration(this)); } public IExtractor IntegerExtractor { get; } @@ -81,6 +96,8 @@ public EnglishDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration confi public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + public IDateTimeExtractor HolidayExtractor { get; } + Regex IDateTimeExtractorConfiguration.NowRegex => NowRegex; Regex IDateTimeExtractorConfiguration.SuffixRegex => SuffixRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimePeriodExtractorConfiguration.cs index 7f5db11ef5..ca45a0e792 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimePeriodExtractorConfiguration.cs @@ -1,7 +1,12 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.English; +using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.English @@ -10,46 +15,55 @@ public class EnglishDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsCo IDateTimePeriodExtractorConfiguration { public static readonly Regex TimeNumberCombinedWithUnit = - new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex HyphenDateRegex = + new Regex(BaseDateTime.HyphenDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodTimeOfDayWithDateRegex = - new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeTimeUnitRegex = - new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RestOfDateTimeRegex = - new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixDayRegex = new Regex(DateTimeDefinitions.PrefixDayRegex, RegexFlags | RegexOptions.RightToLeft); public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDaysRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodSpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.PeriodSpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodSpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TasksmodeMealTimeofDayRegex = + new Regex(DateTimeDefinitions.TasksmodeMealTimeofDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex StartingRegex = + new Regex(DateTimeDefinitions.StartingRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -60,32 +74,46 @@ public class EnglishDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsCo }; private static readonly Regex PeriodTimeOfDayRegex = - new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags, RegexTimeOut); private static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); private static readonly Regex TimeFollowedUnit = - new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); private static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); private static readonly Regex MiddlePauseRegex = - new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags); + new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); public EnglishDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; - CardinalExtractor = Number.English.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.English.CardinalExtractor.GetInstance(numConfig); + SingleDateExtractor = new BaseDateExtractor(new EnglishDateExtractorConfiguration(this)); SingleTimeExtractor = new BaseTimeExtractor(new EnglishTimeExtractorConfiguration(this)); SingleDateTimeExtractor = new BaseDateTimeExtractor(new EnglishDateTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new EnglishDurationExtractorConfiguration(this)); TimePeriodExtractor = new BaseTimePeriodExtractor(new EnglishTimePeriodExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new EnglishTimeZoneExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new EnglishHolidayExtractorConfiguration(this)); + } public IEnumerable SimpleCasesRegex => SimpleCases; @@ -128,6 +156,8 @@ public EnglishDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration Regex IDateTimePeriodExtractorConfiguration.PeriodTimeOfDayWithDateRegex => PeriodTimeOfDayWithDateRegex; + Regex IDateTimePeriodExtractorConfiguration.TasksmodeMealTimeofDayRegex => TasksmodeMealTimeofDayRegex; + Regex IDateTimePeriodExtractorConfiguration.AmDescRegex => AmDescRegex; Regex IDateTimePeriodExtractorConfiguration.PmDescRegex => PmDescRegex; @@ -156,11 +186,16 @@ public EnglishDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration public IDateTimeExtractor TimeZoneExtractor { get; } + public IDateTimeExtractor HolidayExtractor { get; } + // TODO: these three methods are the same in DatePeriod, should be abstracted public bool GetFromTokenIndex(string text, out int index) { index = -1; - if (text.EndsWith("from")) + + // @TODO move hardcoded values to resources file + + if (text.EndsWith("from", StringComparison.Ordinal)) { index = text.LastIndexOf("from", StringComparison.Ordinal); return true; @@ -172,7 +207,10 @@ public bool GetFromTokenIndex(string text, out int index) public bool GetBetweenTokenIndex(string text, out int index) { index = -1; - if (text.EndsWith("between")) + + // @TODO move hardcoded values to resources file + + if (text.EndsWith("between", StringComparison.Ordinal)) { index = text.LastIndexOf("between", StringComparison.Ordinal); return true; @@ -183,9 +221,7 @@ public bool GetBetweenTokenIndex(string text, out int index) public bool HasConnectorToken(string text) { - var rangeConnetorRegex = new Regex(DateTimeDefinitions.RangeConnectorRegex); - - return rangeConnetorRegex.IsExactMatch(text, trim: true); + return RangeConnectorRegex.IsExactMatch(text, trim: true); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDurationExtractorConfiguration.cs index 4c712ce4a0..8217607668 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDurationExtractorConfiguration.cs @@ -1,50 +1,62 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; +using Microsoft.Recognizers.Definitions.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.English { public class EnglishDurationExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDurationExtractorConfiguration { public static readonly Regex DurationUnitRegex = - new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAndRegex = - new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationFollowedUnit = - new Regex(DateTimeDefinitions.DurationFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.DurationFollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithDurationUnit = - new Regex(DateTimeDefinitions.NumberCombinedWithDurationUnit, RegexFlags); + new Regex(DateTimeDefinitions.NumberCombinedWithDurationUnit, RegexFlags, RegexTimeOut); public static readonly Regex AnUnitRegex = - new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DuringRegex = - new Regex(DateTimeDefinitions.DuringRegex, RegexFlags); + new Regex(DateTimeDefinitions.DuringRegex, RegexFlags, RegexTimeOut); public static readonly Regex AllRegex = - new Regex(DateTimeDefinitions.AllRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllRegex, RegexFlags, RegexTimeOut); public static readonly Regex HalfRegex = - new Regex(DateTimeDefinitions.HalfRegex, RegexFlags); + new Regex(DateTimeDefinitions.HalfRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConjunctionRegex = - new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags, RegexTimeOut); public static readonly Regex InexactNumberRegex = - new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex InexactNumberUnitRegex = - new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDurationUnitRegex = - new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationConnectorRegex = - new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModPrefixRegex = + new Regex(DateTimeDefinitions.ModPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModSuffixRegex = + new Regex(DateTimeDefinitions.ModSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialNumberUnitRegex = null; @@ -59,7 +71,17 @@ public class EnglishDurationExtractorConfiguration : BaseDateTimeOptionsConfigur public EnglishDurationExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - CardinalExtractor = Number.English.CardinalExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.English.CardinalExtractor.GetInstance(numConfig); + UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); } @@ -103,5 +125,11 @@ public EnglishDurationExtractorConfiguration(IDateTimeOptionsConfiguration confi Regex IDurationExtractorConfiguration.MoreThanRegex => MoreThanRegex; Regex IDurationExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex; + + Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex; + + public Dictionary AmbiguityFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityDurationFiltersDict); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishHolidayExtractorConfiguration.cs index 8e8fc97cbb..5ef9087617 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishHolidayExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishHolidayExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; @@ -8,22 +11,14 @@ namespace Microsoft.Recognizers.Text.DateTime.English public class EnglishHolidayExtractorConfiguration : BaseDateTimeOptionsConfiguration, IHolidayExtractorConfiguration { public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); - - public static readonly Regex H1 = - new Regex(DateTimeDefinitions.HolidayRegex1, RegexFlags); - - public static readonly Regex H2 = - new Regex(DateTimeDefinitions.HolidayRegex2, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); - public static readonly Regex H3 = - new Regex(DateTimeDefinitions.HolidayRegex3, RegexFlags); + public static readonly Regex H = + new Regex(DateTimeDefinitions.HolidayRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] HolidayRegexList = { - H1, - H2, - H3, + H, }; private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishMergedExtractorConfiguration.cs index 0a50953332..61f8675023 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishMergedExtractorConfiguration.cs @@ -1,63 +1,70 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; + using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.English; using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Matcher; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.English { public class EnglishMergedExtractorConfiguration : BaseDateTimeOptionsConfiguration, IMergedExtractorConfiguration { public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); - - public static readonly Regex SinceRegex = - new Regex(DateTimeDefinitions.SinceRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex AroundRegex = - new Regex(DateTimeDefinitions.AroundRegex, RegexFlags); + new Regex(DateTimeDefinitions.AroundRegex, RegexFlags, RegexTimeOut); public static readonly Regex EqualRegex = - new Regex(BaseDateTime.EqualRegex, RegexFlags); + new Regex(BaseDateTime.EqualRegex, RegexFlags, RegexTimeOut); public static readonly Regex FromToRegex = - new Regex(DateTimeDefinitions.FromToRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromToRegex, RegexFlags, RegexTimeOut); public static readonly Regex SingleAmbiguousMonthRegex = - new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrepositionSuffixRegex = - new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmbiguousRangeModifierPrefix = - new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags); + new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags, RegexTimeOut); public static readonly Regex NumberEndingPattern = - new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags); + new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificDatePeriodRegex = - new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificTimePeriodRegex = - new Regex(DateTimeDefinitions.UnspecificTimePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificTimePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex FailFastRegex = new Regex(DateTimeDefinitions.FailFastRegex, RegexFlags | RegexOptions.Compiled); + // Used to skip only year references in a text in TasksMode + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + public static readonly Regex[] TermFilterRegexes = { // one on one - new Regex(DateTimeDefinitions.OneOnOneRegex, RegexFlags), + new Regex(DateTimeDefinitions.OneOnOneRegex, RegexFlags, RegexTimeOut), // (the)? (day|week|month|year) - new Regex(DateTimeDefinitions.SingleAmbiguousTermsRegex, RegexFlags), + new Regex(DateTimeDefinitions.SingleAmbiguousTermsRegex, RegexFlags, RegexTimeOut), + }; public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); @@ -78,7 +85,36 @@ public EnglishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) HolidayExtractor = new BaseHolidayExtractor(new EnglishHolidayExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new EnglishTimeZoneExtractorConfiguration(this)); DateTimeAltExtractor = new BaseDateTimeAltExtractor(new EnglishDateTimeAltExtractorConfiguration(this)); - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + if ((config.Options & DateTimeOptions.ExperimentalMode) != 0) + { + SinceRegex = SinceRegexExp; + } + + /* + supression under tasksmode, cases like 1ampm, 1p --> pm, + holiday time reference whose celebration dates are region specific, + Remove decade regex ex seventies, twenties + Remove decade regex ex 1990s, 9s, + Suppress "Q1 2018", "2nd quarter", + "2016 Q1", "last year the 4th quarter", + "2015 the H1", "H2 of 2016", "1st half 2018", "2nd half this year", + summer winter etc + */ + if ((config.Options & DateTimeOptions.TasksMode) != 0) + { + TasksModeMentionFilters = new Regex(DateTimeDefinitions.TasksModeSupressionRegexes, RegexFlags, RegexTimeOut); + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.English.IntegerExtractor.GetInstance(numConfig); AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); @@ -88,6 +124,12 @@ public EnglishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) } } + // Used in Standard mode + public static Regex SinceRegex { get; set; } = new Regex(DateTimeDefinitions.SinceRegex, RegexFlags, RegexTimeOut); + + // used in Experimental mode + public static Regex SinceRegexExp { get; } = new Regex(DateTimeDefinitions.SinceRegexExp, RegexFlags, RegexTimeOut); + public IDateExtractor DateExtractor { get; } public IDateTimeExtractor TimeExtractor { get; } @@ -142,10 +184,17 @@ public EnglishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => UnspecificTimePeriodRegex; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + Regex IMergedExtractorConfiguration.FailFastRegex => FailFastRegex; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; StringMatcher IMergedExtractorConfiguration.SuperfluousWordMatcher => SuperfluousWordMatcher; + + bool IMergedExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public Regex TasksModeMentionFilters { get; } + } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishSetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishSetExtractorConfiguration.cs index 590ac8195d..6d9a58d893 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishSetExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishSetExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; @@ -9,28 +12,31 @@ namespace Microsoft.Recognizers.Text.DateTime.English public class EnglishSetExtractorConfiguration : BaseDateTimeOptionsConfiguration, ISetExtractorConfiguration { public static readonly Regex SetUnitRegex = - new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodicRegex = - new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachUnitRegex = - new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachPrefixRegex = - new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetLastRegex = - new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachDayRegex = - new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BeforeEachDayRegex = + new Regex(DateTimeDefinitions.DayRegex, RegexFlags); public static readonly Regex SetWeekDayRegex = - new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetEachRegex = - new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -60,6 +66,8 @@ public EnglishSetExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DateTimePeriodExtractor { get; } + bool ISetExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + Regex ISetExtractorConfiguration.LastRegex => SetLastRegex; Regex ISetExtractorConfiguration.EachPrefixRegex => EachPrefixRegex; @@ -70,7 +78,7 @@ public EnglishSetExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex ISetExtractorConfiguration.EachDayRegex => EachDayRegex; - Regex ISetExtractorConfiguration.BeforeEachDayRegex => null; + Regex ISetExtractorConfiguration.BeforeEachDayRegex => BeforeEachDayRegex; Regex ISetExtractorConfiguration.SetWeekDayRegex => SetWeekDayRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimeExtractorConfiguration.cs index 96348dd06c..d78b925a9d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimeExtractorConfiguration.cs @@ -1,7 +1,11 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; +using Microsoft.Recognizers.Definitions.Utilities; namespace Microsoft.Recognizers.Text.DateTime.English { @@ -10,111 +14,111 @@ public class EnglishTimeExtractorConfiguration : BaseDateTimeOptionsConfiguratio // part 1: smallest component // -------------------------------------- public static readonly Regex DescRegex = - new Regex(DateTimeDefinitions.DescRegex, RegexFlags); + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); public static readonly Regex HourNumRegex = - new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex MinuteNumRegex = - new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags, RegexTimeOut); // part 2: middle level component // -------------------------------------- // handle "... o'clock" public static readonly Regex OclockRegex = - new Regex(DateTimeDefinitions.OclockRegex, RegexFlags); + new Regex(DateTimeDefinitions.OclockRegex, RegexFlags, RegexTimeOut); // handle "... afternoon" public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); // handle "... in the morning" public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); // handle "half past ..." "a quarter to ..." // rename 'min' group to 'deltamin' public static readonly Regex LessThanOneHour = - new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags); + new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags, RegexTimeOut); // handle "six thirty", "six twenty one" public static readonly Regex WrittenTimeRegex = - new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimePrefix = - new Regex(DateTimeDefinitions.TimePrefix, RegexFlags); + new Regex(DateTimeDefinitions.TimePrefix, RegexFlags, RegexTimeOut); public static readonly Regex TimeSuffix = - new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags); + new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags, RegexTimeOut); public static readonly Regex BasicTime = - new Regex(DateTimeDefinitions.BasicTime, RegexFlags); + new Regex(DateTimeDefinitions.BasicTime, RegexFlags, RegexTimeOut); // handle special time such as 'at midnight', 'midnight', 'midday' public static readonly Regex MidnightRegex = - new Regex(DateTimeDefinitions.MidnightRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidnightRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidmorningRegex = - new Regex(DateTimeDefinitions.MidmorningRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidmorningRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidafternoonRegex = - new Regex(DateTimeDefinitions.MidafternoonRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidafternoonRegex, RegexFlags, RegexTimeOut); public static readonly Regex MiddayRegex = - new Regex(DateTimeDefinitions.MiddayRegex, RegexFlags); + new Regex(DateTimeDefinitions.MiddayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidTimeRegex = - new Regex(DateTimeDefinitions.MidTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidTimeRegex, RegexFlags, RegexTimeOut); // part 3: regex for time // -------------------------------------- // handle "at four" "at 3" public static readonly Regex AtRegex = - new Regex(DateTimeDefinitions.AtRegex, RegexFlags); + new Regex(DateTimeDefinitions.AtRegex, RegexFlags, RegexTimeOut); public static readonly Regex IshRegex = - new Regex(DateTimeDefinitions.IshRegex, RegexFlags); + new Regex(DateTimeDefinitions.IshRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectNumRegex = - new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeBeforeAfterRegex = - new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] TimeRegexList = { // (three min past)? seven|7|(seven thirty) pm - new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags, RegexTimeOut), // (three min past)? 3:00(:00)? (pm)? - new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags, RegexTimeOut), // (three min past)? 3.00 (pm) - new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags, RegexTimeOut), // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) - new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags, RegexTimeOut), // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags, RegexTimeOut), // (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) - new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags, RegexTimeOut), // (in the night) at? (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags, RegexTimeOut), - new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags, RegexTimeOut), // (three min past)? 3h00 (pm)? - new Regex(DateTimeDefinitions.TimeRegex10, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex10, RegexFlags, RegexTimeOut), // at 2.30, "at" prefix is required here // 3.30pm, "am/pm" suffix is required here - new Regex(DateTimeDefinitions.TimeRegex11, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex11, RegexFlags, RegexTimeOut), // 340pm ConnectNumRegex, @@ -140,5 +144,9 @@ public EnglishTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DurationExtractor { get; } public IDateTimeExtractor TimeZoneExtractor { get; } + + public string TimeTokenPrefix => DateTimeDefinitions.TimeTokenPrefix; + + public Dictionary AmbiguityFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityTimeFiltersDict); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimePeriodExtractorConfiguration.cs index 88ac210c37..ad89094978 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimePeriodExtractorConfiguration.cs @@ -1,64 +1,71 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; using Microsoft.Recognizers.Text.DateTime.English.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.English { public class EnglishTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, ITimePeriodExtractorConfiguration { public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); public static readonly Regex HourRegex = - new Regex(DateTimeDefinitions.HourRegex, RegexFlags); + new Regex(DateTimeDefinitions.HourRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodHourNumRegex = - new Regex(DateTimeDefinitions.PeriodHourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodHourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodDescRegex = - new Regex(DateTimeDefinitions.DescRegex, RegexFlags); + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); public static readonly Regex PureNumFromTo = - new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags); + new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags, RegexTimeOut); public static readonly Regex PureNumBetweenAnd = - new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeFromTo = - new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeBetweenAnd = - new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex PrepositionRegex = - new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfDayRegex = - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeFollowedUnit = - new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex TimeNumberCombinedWithUnit = - new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); public static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimePeriodWithDurationRegex = + new Regex(DateTimeDefinitions.TimePeriodWithDurationRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -68,7 +75,17 @@ public EnglishTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; SingleTimeExtractor = new BaseTimeExtractor(new EnglishTimeExtractorConfiguration(this)); UtilityConfiguration = new EnglishDatetimeUtilityConfiguration(); - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.English.IntegerExtractor.GetInstance(numConfig); + TimeZoneExtractor = new BaseTimeZoneExtractor(new EnglishTimeZoneExtractorConfiguration(this)); } @@ -100,10 +117,12 @@ public EnglishTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con Regex ITimePeriodExtractorConfiguration.GeneralEndingRegex => GeneralEndingRegex; + // @TODO move hardcoded strings to YAML file public bool GetFromTokenIndex(string text, out int index) { index = -1; - if (text.EndsWith("from")) + + if (text.EndsWith("from", StringComparison.Ordinal)) { index = text.LastIndexOf("from", StringComparison.Ordinal); return true; @@ -115,7 +134,8 @@ public bool GetFromTokenIndex(string text, out int index) public bool GetBetweenTokenIndex(string text, out int index) { index = -1; - if (text.EndsWith("between")) + + if (text.EndsWith("between", StringComparison.Ordinal)) { index = text.LastIndexOf("between", StringComparison.Ordinal); return true; @@ -126,7 +146,9 @@ public bool GetBetweenTokenIndex(string text, out int index) public bool IsConnectorToken(string text) { - return text.Equals("and"); + return text.Equals("and", StringComparison.Ordinal); } + + public List ApplyPotentialPeriodAmbiguityHotfix(string text, List timePeriodErs) => TimePeriodFunctions.ApplyPotentialPeriodAmbiguityHotfix(text, timePeriodErs); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimeZoneExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimeZoneExtractorConfiguration.cs index 6fb26914b0..921f06845d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimeZoneExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimeZoneExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; @@ -11,7 +14,7 @@ namespace Microsoft.Recognizers.Text.DateTime.English public class EnglishTimeZoneExtractorConfiguration : BaseDateTimeOptionsConfiguration, ITimeZoneExtractorConfiguration { public static readonly Regex DirectUtcRegex = - new Regex(TimeZoneDefinitions.DirectUtcRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline); + new Regex(TimeZoneDefinitions.DirectUtcRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline, RegexTimeOut); public static readonly List AbbreviationsList = new List(TimeZoneDefinitions.AbbreviationsList); @@ -23,7 +26,7 @@ public class EnglishTimeZoneExtractorConfiguration : BaseDateTimeOptionsConfigur TimeZoneUtility.BuildMatcherFromLists(FullNameList, AbbreviationsList); public static readonly Regex LocationTimeSuffixRegex = - new Regex(TimeZoneDefinitions.LocationTimeSuffixRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline); + new Regex(TimeZoneDefinitions.LocationTimeSuffixRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline, RegexTimeOut); public static readonly StringMatcher LocationMatcher = new StringMatcher(); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishCommonDateTimeParserConfiguration.cs index a926b8d677..1c00159ceb 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishCommonDateTimeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.English; using Microsoft.Recognizers.Text.DateTime.English.Utilities; @@ -26,26 +29,41 @@ public EnglishCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration co WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); - CardinalExtractor = Number.English.CardinalExtractor.GetInstance(); - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.English.OrdinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.English.CardinalExtractor.GetInstance(numConfig); + IntegerExtractor = Number.English.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.English.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new EnglishNumberParserConfiguration(numConfig)); - TimeZoneParser = new BaseTimeZoneParser(); - NumberParser = new BaseNumberParser(new EnglishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + TimeZoneParser = new BaseTimeZoneParser(new EnglishTimeZoneParserConfiguration(this)); + + // Do not change order. The order of initialization can lead to side-effects DateExtractor = new BaseDateExtractor(new EnglishDateExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new EnglishHolidayExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new EnglishTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new EnglishDateTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new EnglishDurationExtractorConfiguration(this)); DatePeriodExtractor = new BaseDatePeriodExtractor(new EnglishDatePeriodExtractorConfiguration(this)); TimePeriodExtractor = new BaseTimePeriodExtractor(new EnglishTimePeriodExtractorConfiguration(this)); DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new EnglishDateTimePeriodExtractorConfiguration(this)); + DurationParser = new BaseDurationParser(new EnglishDurationParserConfiguration(this)); DateParser = new BaseDateParser(new EnglishDateParserConfiguration(this)); + HolidayTimeParser = new BaseHolidayParser(new EnglishHolidayParserConfiguration(this)); TimeParser = new TimeParser(new EnglishTimeParserConfiguration(this)); DateTimeParser = new BaseDateTimeParser(new EnglishDateTimeParserConfiguration(this)); DatePeriodParser = new BaseDatePeriodParser(new EnglishDatePeriodParserConfiguration(this)); TimePeriodParser = new BaseTimePeriodParser(new EnglishTimePeriodParserConfiguration(this)); DateTimePeriodParser = new BaseDateTimePeriodParser(new EnglishDateTimePeriodParserConfiguration(this)); + DateTimeAltParser = new BaseDateTimeAltParser(new EnglishDateTimeAltParserConfiguration(this)); } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateParserConfiguration.cs index e25792ac8e..afda6c8ecd 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; @@ -14,13 +18,16 @@ public EnglishDateParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { DateTokenPrefix = DateTimeDefinitions.DateTokenPrefix; + IntegerExtractor = config.IntegerExtractor; OrdinalExtractor = config.OrdinalExtractor; CardinalExtractor = config.CardinalExtractor; NumberParser = config.NumberParser; - DurationExtractor = config.DurationExtractor; DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; DurationParser = config.DurationParser; + HolidayParser = new BaseHolidayParser(new EnglishHolidayParserConfiguration(this)); + DateRegexes = new EnglishDateExtractorConfiguration(this).DateRegexList; OnRegex = EnglishDateExtractorConfiguration.OnRegex; SpecialDayRegex = EnglishDateExtractorConfiguration.SpecialDayRegex; @@ -39,12 +46,13 @@ public EnglishDateParserConfiguration(ICommonDateTimeParserConfiguration config) StrictRelativeRegex = EnglishDateExtractorConfiguration.StrictRelativeRegex; YearSuffix = EnglishDateExtractorConfiguration.YearSuffix; RelativeWeekDayRegex = EnglishDateExtractorConfiguration.RelativeWeekDayRegex; + BeforeAfterRegex = EnglishDateExtractorConfiguration.BeforeAfterRegex; - RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexFlags); - NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); - PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); - UpcomingPrefixRegex = new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags); - PastPrefixRegex = new Regex(DateTimeDefinitions.PastPrefixRegex, RegexFlags); + RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + UpcomingPrefixRegex = new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags, RegexTimeOut); + PastPrefixRegex = new Regex(DateTimeDefinitions.PastPrefixRegex, RegexFlags, RegexTimeOut); DayOfMonth = config.DayOfMonth; DayOfWeek = config.DayOfWeek; @@ -58,6 +66,11 @@ public EnglishDateParserConfiguration(ICommonDateTimeParserConfiguration config) PlusTwoDayTerms = DateTimeDefinitions.PlusTwoDayTerms.ToImmutableList(); MinusOneDayTerms = DateTimeDefinitions.MinusOneDayTerms.ToImmutableList(); MinusTwoDayTerms = DateTimeDefinitions.MinusTwoDayTerms.ToImmutableList(); + + if ((config.Options & DateTimeOptions.TasksMode) != 0) + { + TasksModeDurationToDatePatterns = new Regex(DateTimeDefinitions.TasksModeDurationToDatePatterns, RegexFlags, RegexTimeOut); + } } public string DateTokenPrefix { get; } @@ -76,6 +89,8 @@ public EnglishDateParserConfiguration(ICommonDateTimeParserConfiguration config) public IDateTimeParser DurationParser { get; } + public IDateTimeParser HolidayParser { get; } + public IEnumerable DateRegexes { get; } public IImmutableDictionary UnitMap { get; } @@ -124,6 +139,10 @@ public EnglishDateParserConfiguration(ICommonDateTimeParserConfiguration config) public Regex PastPrefixRegex { get; } + public Regex BeforeAfterRegex { get; } + + public Regex TasksModeDurationToDatePatterns { get; } + public IImmutableDictionary DayOfMonth { get; } public IImmutableDictionary DayOfWeek { get; } @@ -166,8 +185,12 @@ public int GetSwiftMonthOrYear(string text) public bool IsCardinalLast(string text) { + + // @TODO move hardcoded values to resources file + var trimmedText = text.Trim(); - return trimmedText.Equals("last"); + + return trimmedText.Equals("last", StringComparison.Ordinal); } public string Normalize(string text) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDatePeriodParserConfiguration.cs index cec83d2f8f..b67d2ac334 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDatePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; @@ -10,19 +14,22 @@ namespace Microsoft.Recognizers.Text.DateTime.English public class EnglishDatePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDatePeriodParserConfiguration { public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterNextSuffixRegex = - new Regex(DateTimeDefinitions.AfterNextSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterNextSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeRegex = - new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRangeRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NowParseRegex = + new Regex(DateTimeDefinitions.NowParseRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -35,11 +42,14 @@ public class EnglishDatePeriodParserConfiguration : BaseDateTimeOptionsConfigura private static IList weekTermsPadded = DateTimeDefinitions.WeekTerms.Select(str => $" {str} ").ToList(); + private static IList fortnightTermsPadded = + DateTimeDefinitions.FortnightTerms.Select(str => $" {str} ").ToList(); + private static IList yearTermsPadded = DateTimeDefinitions.YearTerms.Select(str => $" {str} ").ToList(); private static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public EnglishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) @@ -85,13 +95,19 @@ public EnglishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c RelativeDecadeRegex = EnglishDatePeriodExtractorConfiguration.RelativeDecadeRegex; InConnectorRegex = config.UtilityConfiguration.InConnectorRegex; WithinNextPrefixRegex = EnglishDatePeriodExtractorConfiguration.WithinNextPrefixRegex; + ForPrefixRegex = EnglishDatePeriodExtractorConfiguration.ForPrefixRegex; ReferenceDatePeriodRegex = EnglishDatePeriodExtractorConfiguration.ReferenceDatePeriodRegex; AgoRegex = EnglishDatePeriodExtractorConfiguration.AgoRegex; LaterRegex = EnglishDatePeriodExtractorConfiguration.LaterRegex; LessThanRegex = EnglishDatePeriodExtractorConfiguration.LessThanRegex; MoreThanRegex = EnglishDatePeriodExtractorConfiguration.MoreThanRegex; CenturySuffixRegex = EnglishDatePeriodExtractorConfiguration.CenturySuffixRegex; - NowRegex = EnglishDatePeriodExtractorConfiguration.NowRegex; + FirstLastRegex = EnglishDatePeriodExtractorConfiguration.FirstLastRegex; + OfYearRegex = EnglishDatePeriodExtractorConfiguration.OfYearRegex; + NowRegex = NowParseRegex; + SpecialDayRegex = EnglishDateExtractorConfiguration.SpecialDayRegex; + TodayNowRegex = new Regex(DateTimeDefinitions.TodayNowRegex, RegexOptions.Singleline, RegexTimeOut); + StartingRegex = EnglishDatePeriodExtractorConfiguration.StartingRegex; UnitMap = config.UnitMap; CardinalMap = config.CardinalMap; @@ -172,6 +188,8 @@ public EnglishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c public Regex WithinNextPrefixRegex { get; } + public Regex ForPrefixRegex { get; } + public Regex RestOfDateRegex { get; } public Regex LaterEarlyPeriodRegex { get; } @@ -202,6 +220,16 @@ public EnglishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c public Regex NowRegex { get; } + public Regex SpecialDayRegex { get; } + + public Regex TodayNowRegex { get; } + + public Regex FirstLastRegex { get; } + + public Regex OfYearRegex { get; } + + public Regex StartingRegex { get; } + Regex ISimpleDatePeriodParserConfiguration.RelativeRegex => RelativeRegex; Regex IDatePeriodParserConfiguration.NextPrefixRegex => NextPrefixRegex; @@ -212,6 +240,8 @@ public EnglishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c Regex IDatePeriodParserConfiguration.UnspecificEndOfRangeRegex => UnspecificEndOfRangeRegex; + Regex IDatePeriodParserConfiguration.AmbiguousPointRangeRegex => null; + bool IDatePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; public IImmutableDictionary UnitMap { get; } @@ -285,54 +315,62 @@ public int GetSwiftYear(string text) public bool IsFuture(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.FutureTerms.Any(o => trimmedText.StartsWith(o)); + return DateTimeDefinitions.FutureTerms.Any(o => trimmedText.StartsWith(o, StringComparison.Ordinal)); } public bool IsLastCardinal(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.LastCardinalTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.LastCardinalTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } public bool IsMonthOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (monthTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); } public bool IsMonthToDate(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } public bool IsWeekend(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (weekendTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); } public bool IsWeekOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (weekTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); } + public bool IsFortnight(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.FortnightTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (fortnightTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); + } + public bool IsYearOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (yearTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)) || - (DateTimeDefinitions.GenericYearTerms.Any(o => trimmedText.EndsWith(o)) && UnspecificEndOfRangeRegex.IsMatch(trimmedText)); + (DateTimeDefinitions.GenericYearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) && + UnspecificEndOfRangeRegex.IsMatch(trimmedText)); } public bool IsYearToDate(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateTimeAltParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateTimeAltParserConfiguration.cs index db64393b12..b4707c3b1d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateTimeAltParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateTimeAltParserConfiguration.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.DateTime.English +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.DateTime.English { public class EnglishDateTimeAltParserConfiguration : IDateTimeAltParserConfiguration { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateTimeParserConfiguration.cs index fcbc04267e..337fb224a9 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateTimeParserConfiguration.cs @@ -1,20 +1,44 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Text.RegularExpressions; + using Microsoft.Recognizers.Definitions.English; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.English { public class EnglishDateTimeParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeParserConfiguration { public static readonly Regex AmTimeRegex = - new Regex(DateTimeDefinitions.AMTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.AMTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmTimeRegex = - new Regex(DateTimeDefinitions.PMTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.PMTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NightTimeRegex = + new Regex(DateTimeDefinitions.NightTimeRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + private static readonly Regex NowTimeRegex = + new Regex(DateTimeDefinitions.NowTimeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex RecentlyTimeRegex = + new Regex(DateTimeDefinitions.RecentlyTimeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex AsapTimeRegex = + new Regex(DateTimeDefinitions.AsapTimeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex NextPrefixRegex = + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex PreviousPrefixRegex = + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + public EnglishDateTimeParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -26,6 +50,9 @@ public EnglishDateTimeParserConfiguration(ICommonDateTimeParserConfiguration con DateParser = config.DateParser; TimeParser = config.TimeParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; + NowRegex = EnglishDateTimeExtractorConfiguration.NowRegex; SimpleTimeOfTodayAfterRegex = EnglishDateTimeExtractorConfiguration.SimpleTimeOfTodayAfterRegex; @@ -101,17 +128,22 @@ public EnglishDateTimeParserConfiguration(ICommonDateTimeParserConfiguration con public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + public int GetHour(string text, int hour) { int result = hour; var trimmedText = text.Trim(); - if (trimmedText.EndsWith("morning") && hour >= Constants.HalfDayHourCount) + if (AMTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour >= Constants.HalfDayHourCount) { result -= Constants.HalfDayHourCount; } - else if (!trimmedText.EndsWith("morning") && hour < Constants.HalfDayHourCount) + else if (!AMTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour < Constants.HalfDayHourCount && + !(NightTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour < Constants.QuarterDayHourCount)) { result += Constants.HalfDayHourCount; } @@ -123,15 +155,15 @@ public bool GetMatchedNowTimex(string text, out string timex) { var trimmedText = text.Trim(); - if (trimmedText.EndsWith("now")) + if (NowTimeRegex.MatchEnd(trimmedText, trim: true).Success) { timex = "PRESENT_REF"; } - else if (trimmedText.Equals("recently") || trimmedText.Equals("previously")) + else if (RecentlyTimeRegex.IsExactMatch(trimmedText, trim: true)) { timex = "PAST_REF"; } - else if (trimmedText.Equals("as soon as possible") || trimmedText.Equals("asap")) + else if (AsapTimeRegex.IsExactMatch(trimmedText, trim: true)) { timex = "FUTURE_REF"; } @@ -149,11 +181,11 @@ public int GetSwiftDay(string text) var trimmedText = text.Trim(); var swift = 0; - if (trimmedText.StartsWith("next")) + if (NextPrefixRegex.MatchBegin(trimmedText, trim: true).Success) { swift = 1; } - else if (trimmedText.StartsWith("last")) + else if (PreviousPrefixRegex.MatchBegin(trimmedText, trim: true).Success) { swift = -1; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateTimePeriodParserConfiguration.cs index 21e7ab194c..22607d75a8 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDateTimePeriodParserConfiguration.cs @@ -1,4 +1,9 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; @@ -8,16 +13,16 @@ namespace Microsoft.Recognizers.Text.DateTime.English public class EnglishDateTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimePeriodParserConfiguration { public static readonly Regex MorningStartEndRegex = - new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfternoonStartEndRegex = - new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex EveningStartEndRegex = - new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex NightStartEndRegex = - new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -25,6 +30,7 @@ public EnglishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati : base(config) { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; DateExtractor = config.DateExtractor; TimeExtractor = config.TimeExtractor; @@ -39,8 +45,11 @@ public EnglishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati DurationParser = config.DurationParser; DateTimeParser = config.DateTimeParser; TimeZoneParser = config.TimeZoneParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; PureNumberFromToRegex = EnglishTimePeriodExtractorConfiguration.PureNumFromTo; + HyphenDateRegex = EnglishDateTimePeriodExtractorConfiguration.HyphenDateRegex; PureNumberBetweenAndRegex = EnglishTimePeriodExtractorConfiguration.PureNumBetweenAnd; SpecificTimeOfDayRegex = EnglishDateTimePeriodExtractorConfiguration.PeriodSpecificTimeOfDayRegex; TimeOfDayRegex = EnglishDateTimeExtractorConfiguration.TimeOfDayRegex; @@ -58,13 +67,17 @@ public EnglishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati PrefixDayRegex = EnglishDateTimePeriodExtractorConfiguration.PrefixDayRegex; BeforeRegex = EnglishDateTimePeriodExtractorConfiguration.BeforeRegex; AfterRegex = EnglishDateTimePeriodExtractorConfiguration.AfterRegex; - UnitMap = config.UnitMap; Numbers = config.Numbers; + StartingRegex = EnglishDateTimePeriodExtractorConfiguration.StartingRegex; + + TasksmodeMealTimeofDayRegex = EnglishDateTimePeriodExtractorConfiguration.TasksmodeMealTimeofDayRegex; } public string TokenBeforeDate { get; } + public string TokenBeforeTime { get; } + public IDateExtractor DateExtractor { get; } public IDateTimeExtractor TimeExtractor { get; } @@ -93,6 +106,8 @@ public EnglishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati public Regex PureNumberFromToRegex { get; } + public Regex HyphenDateRegex { get; } + public Regex PureNumberBetweenAndRegex { get; } public Regex SpecificTimeOfDayRegex { get; } @@ -127,50 +142,116 @@ public EnglishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati public Regex AfterRegex { get; } + public Regex TasksmodeMealTimeofDayRegex { get; } + + public Regex StartingRegex { get; } + bool IDateTimePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; public IImmutableDictionary UnitMap { get; } public IImmutableDictionary Numbers { get; } - public bool GetMatchedTimeRange(string text, out string timeStr, out int beginHour, out int endHour, out int endMin) + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + + // @TODO Move time range resolution to common policy + + public bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMin) { var trimmedText = text.Trim(); - beginHour = 0; endHour = 0; endMin = 0; + if (MorningStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TMO"; + todSymbol = "TMO"; beginHour = 8; endHour = Constants.HalfDayHourCount; } else if (AfternoonStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TAF"; + todSymbol = "TAF"; beginHour = Constants.HalfDayHourCount; endHour = 16; } else if (EveningStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TEV"; + todSymbol = "TEV"; beginHour = 16; endHour = 20; } else if (NightStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TNI"; + todSymbol = "TNI"; beginHour = 20; endHour = 23; endMin = 59; + + } + else if (((Options & DateTimeOptions.TasksMode) != 0) && DateTimeDefinitions.MealtimeBreakfastTermList.Any(o => trimmedText.Contains(o))) + { + todSymbol = Constants.MealtimeBreakfast; + beginHour = Constants.MealtimeBreakfastBeginHour; + endHour = Constants.MealtimeBreakfastEndHour; + } + else if (((Options & DateTimeOptions.TasksMode) != 0) && DateTimeDefinitions.MealtimeBrunchTermList.Any(o => trimmedText.Contains(o))) + { + todSymbol = Constants.MealtimeBrunch; + beginHour = Constants.MealtimeBrunchBeginHour; + endHour = Constants.MealtimeBrunchEndHour; + } + else if (((Options & DateTimeOptions.TasksMode) != 0) && DateTimeDefinitions.MealtimeLunchTermList.Any(o => trimmedText.Contains(o))) + { + todSymbol = Constants.MealtimeLunch; + beginHour = Constants.MealtimeLunchBeginHour; + endHour = Constants.MealtimeLunchEndHour; + } + else if (((Options & DateTimeOptions.TasksMode) != 0) && DateTimeDefinitions.MealtimeDinnerTermList.Any(o => trimmedText.Contains(o))) + { + todSymbol = Constants.MealtimeDinner; + beginHour = Constants.MealtimeDinnerBeginHour; + endHour = Constants.MealtimeDinnerEndHour; + } + else if (((Options & DateTimeOptions.TasksMode) != 0) && DateTimeDefinitions.MealtimeBreakfastTermList.Any(o => trimmedText.Contains(o))) + { + todSymbol = Constants.MealtimeBreakfast; + beginHour = Constants.MealtimeBreakfastBeginHour; + endHour = Constants.MealtimeBreakfastEndHour; + } + else if (((Options & DateTimeOptions.TasksMode) != 0) && DateTimeDefinitions.MealtimeBrunchTermList.Any(o => trimmedText.Contains(o))) + { + todSymbol = Constants.MealtimeBrunch; + beginHour = Constants.MealtimeBrunchBeginHour; + endHour = Constants.MealtimeBrunchEndHour; + } + else if (((Options & DateTimeOptions.TasksMode) != 0) && DateTimeDefinitions.MealtimeLunchTermList.Any(o => trimmedText.Contains(o))) + { + todSymbol = Constants.MealtimeLunch; + beginHour = Constants.MealtimeLunchBeginHour; + endHour = Constants.MealtimeLunchEndHour; + } + else if (((Options & DateTimeOptions.TasksMode) != 0) && DateTimeDefinitions.MealtimeDinnerTermList.Any(o => trimmedText.Contains(o))) + { + todSymbol = Constants.MealtimeDinner; + beginHour = Constants.MealtimeDinnerBeginHour; + endHour = Constants.MealtimeDinnerEndHour; } else { - timeStr = null; + todSymbol = null; return false; } + // TasksMode modifies the values of Ambiguous time refrences like morning, lunchtime etc. + if ((Options & DateTimeOptions.TasksMode) != 0) + { + return TasksModeProcessing.GetMatchedTimeRangeForTasksMode(text, todSymbol, + out beginHour, out endHour, out endMin); + } + return true; } @@ -178,12 +259,14 @@ public int GetSwiftPrefix(string text) { var trimmedText = text.Trim(); + // @TODO Move hardcoded terms to resource file + var swift = 0; - if (trimmedText.StartsWith("next")) + if (trimmedText.StartsWith("next", StringComparison.Ordinal)) { swift = 1; } - else if (trimmedText.StartsWith("last")) + else if (trimmedText.StartsWith("last", StringComparison.Ordinal)) { swift = -1; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDurationParserConfiguration.cs index e78fc6fbbb..f6f08e96fd 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDurationParserConfiguration.cs @@ -1,10 +1,20 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.English; namespace Microsoft.Recognizers.Text.DateTime.English { public class EnglishDurationParserConfiguration : BaseDateTimeOptionsConfiguration, IDurationParserConfiguration { + + public static readonly Regex PrefixArticleRegex = + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public EnglishDurationParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -34,7 +44,7 @@ public EnglishDurationParserConfiguration(ICommonDateTimeParserConfiguration con public IExtractor CardinalExtractor { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public IParser NumberParser { get; } @@ -42,6 +52,8 @@ public EnglishDurationParserConfiguration(ICommonDateTimeParserConfiguration con public Regex AnUnitRegex { get; } + Regex IDurationParserConfiguration.PrefixArticleRegex => PrefixArticleRegex; + public Regex DuringRegex { get; } public Regex AllDateUnitRegex { get; } @@ -62,6 +74,8 @@ public EnglishDurationParserConfiguration(ICommonDateTimeParserConfiguration con public Regex SpecialNumberUnitRegex { get; } + bool IDurationParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public IImmutableDictionary UnitMap { get; } public IImmutableDictionary UnitValueMap { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishHolidayParserConfiguration.cs index 6b4a444b46..3cfa9def21 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishHolidayParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishHolidayParserConfiguration.cs @@ -1,35 +1,52 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; + using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.English { public class EnglishHolidayParserConfiguration : BaseHolidayParserConfiguration { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public EnglishHolidayParserConfiguration(IDateTimeOptionsConfiguration config) : base(config) { + ThisPrefixRegex = new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + this.HolidayRegexList = EnglishHolidayExtractorConfiguration.HolidayRegexList; this.HolidayNames = DateTimeDefinitions.HolidayNames.ToImmutableDictionary(); } + public Regex ThisPrefixRegex { get; } + + public Regex NextPrefixRegex { get; } + + public Regex PreviousPrefixRegex { get; } + public override int GetSwiftYear(string text) { var trimmedText = text.Trim(); var swift = -10; - if (trimmedText.StartsWith("next")) + if (NextPrefixRegex.IsMatch(trimmedText)) { swift = 1; } - else if (trimmedText.StartsWith("last")) + else if (PreviousPrefixRegex.IsMatch(trimmedText)) { swift = -1; } - else if (trimmedText.StartsWith("this")) + else if (ThisPrefixRegex.IsMatch(trimmedText)) { swift = 0; } @@ -43,7 +60,8 @@ public override string SanitizeHolidayToken(string holiday) .Replace("saint ", "st ") .Replace(" ", string.Empty) .Replace("'", string.Empty) - .Replace(".", string.Empty); + .Replace(".", string.Empty) + .Replace("-", string.Empty); } // @TODO Change to auto-generate. @@ -77,16 +95,16 @@ protected override IDictionary> InitHolidayFuncs() { "earthday", EarthDay }, { "stgeorgeday", StGeorgeDay }, { "mayday", Mayday }, - { "cincodemayoday", CincoDeMayoday }, + { "cincodemayoday", CincoDeMayoDay }, { "baptisteday", BaptisteDay }, { "usindependenceday", UsaIndependenceDay }, { "independenceday", UsaIndependenceDay }, { "bastilleday", BastilleDay }, { "halloweenday", HalloweenDay }, { "allhallowday", AllHallowDay }, - { "allsoulsday", AllSoulsday }, + { "allsoulsday", AllSoulsDay }, { "guyfawkesday", GuyFawkesDay }, - { "veteransday", Veteransday }, + { "veteransday", VeteransDay }, { "christmaseve", ChristmasEve }, { "newyeareve", NewYearEve }, { "easterday", EasterDay }, @@ -101,6 +119,11 @@ protected override IDictionary> InitHolidayFuncs() { "whitemonday", WhiteMonday }, { "trinitysunday", TrinitySunday }, { "corpuschristi", CorpusChristi }, + { "juneteenth", Juneteenth }, + { "ramadan", Ramadan }, + { "sacrifice", Sacrifice }, + { "eidalfitr", EidAlFitr }, + { "islamicnewyear", IslamicNewYear }, }; } @@ -146,7 +169,7 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject Mayday(int year) => new DateObject(year, 5, 1); - private static DateObject CincoDeMayoday(int year) => new DateObject(year, 5, 5); + private static DateObject CincoDeMayoDay(int year) => new DateObject(year, 5, 5); private static DateObject BaptisteDay(int year) => new DateObject(year, 6, 24); @@ -158,13 +181,15 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject AllHallowDay(int year) => new DateObject(year, 11, 1); - private static DateObject AllSoulsday(int year) => new DateObject(year, 11, 2); + private static DateObject AllSoulsDay(int year) => new DateObject(year, 11, 2); private static DateObject GuyFawkesDay(int year) => new DateObject(year, 11, 5); - private static DateObject Veteransday(int year) => new DateObject(year, 11, 11); + private static DateObject VeteransDay(int year) => new DateObject(year, 11, 11); - private static DateObject EasterDay(int year) => CalculateHolidayByEaster(year); + private static DateObject Juneteenth(int year) => new DateObject(year, 6, 19); + + private static DateObject EasterDay(int year) => HolidayFunctions.CalculateHolidayByEaster(year); private static DateObject AshWednesday(int year) => EasterDay(year).AddDays(-46); @@ -188,26 +213,12 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject CorpusChristi(int year) => EasterDay(year).AddDays(60); - // function adopted from German implementation - private static DateObject CalculateHolidayByEaster(int year, int days = 0) - { - int day = 0; - int month = 3; - - int g = year % 19; - int c = year / 100; - int h = (c - (int)(c / 4) - (int)(((8 * c) + 13) / 25) + (19 * g) + 15) % 30; - int i = h - ((int)(h / 28) * (1 - ((int)(h / 28) * (int)(29 / (h + 1)) * (int)((21 - g) / 11)))); + private static DateObject Ramadan(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.Ramadan); - day = i - ((year + (int)(year / 4) + i + 2 - c + (int)(c / 4)) % 7) + 28; + private static DateObject Sacrifice(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.Sacrifice); - if (day > 31) - { - month++; - day -= 31; - } + private static DateObject EidAlFitr(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.EidAlFitr); - return DateObject.MinValue.SafeCreateFromValue(year, month, day).AddDays(days); - } + private static DateObject IslamicNewYear(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.NewYear); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishMergedParserConfiguration.cs index 7a4566c2ba..aaf15ea98f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishMergedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishMergedParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.English; using Microsoft.Recognizers.Text.Matcher; namespace Microsoft.Recognizers.Text.DateTime.English @@ -10,7 +14,8 @@ public EnglishMergedParserConfiguration(IDateTimeOptionsConfiguration config) { BeforeRegex = EnglishMergedExtractorConfiguration.BeforeRegex; AfterRegex = EnglishMergedExtractorConfiguration.AfterRegex; - SinceRegex = EnglishMergedExtractorConfiguration.SinceRegex; + SinceRegex = (config.Options & DateTimeOptions.ExperimentalMode) != 0 ? EnglishMergedExtractorConfiguration.SinceRegexExp : + EnglishMergedExtractorConfiguration.SinceRegex; AroundRegex = EnglishMergedExtractorConfiguration.AroundRegex; EqualRegex = EnglishMergedExtractorConfiguration.EqualRegex; SuffixAfter = EnglishMergedExtractorConfiguration.SuffixAfterRegex; @@ -23,7 +28,7 @@ public EnglishMergedParserConfiguration(IDateTimeOptionsConfiguration config) DateTimePeriodParser = new BaseDateTimePeriodParser(new EnglishDateTimePeriodParserConfiguration(this)); SetParser = new BaseSetParser(new EnglishSetParserConfiguration(this)); HolidayParser = new BaseHolidayParser(new EnglishHolidayParserConfiguration(this)); - TimeZoneParser = new BaseTimeZoneParser(); + TimeZoneParser = new BaseTimeZoneParser(new EnglishTimeZoneParserConfiguration(this)); } public Regex BeforeRegex { get; } @@ -45,5 +50,7 @@ public EnglishMergedParserConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeParser HolidayParser { get; } public StringMatcher SuperfluousWordMatcher { get; } + + bool IMergedParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishSetParserConfiguration.cs index a12f79e44b..3b8d32ccfd 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishSetParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishSetParserConfiguration.cs @@ -1,11 +1,53 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.English; using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime.English { public class EnglishSetParserConfiguration : BaseDateTimeOptionsConfiguration, ISetParserConfiguration { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex DoubleMultiplierRegex = + new Regex(DateTimeDefinitions.DoubleMultiplierRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex HalfMultiplierRegex = + new Regex(DateTimeDefinitions.HalfMultiplierRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex DayTypeRegex = + new Regex(DateTimeDefinitions.DayTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex WeekTypeRegex = + new Regex(DateTimeDefinitions.WeekTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex WeekendTypeRegex = + new Regex(DateTimeDefinitions.WeekendTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex MonthTypeRegex = + new Regex(DateTimeDefinitions.MonthTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex QuarterTypeRegex = + new Regex(DateTimeDefinitions.QuarterTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex YearTypeRegex = + new Regex(DateTimeDefinitions.YearTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex FortNightRegex = + new Regex(DateTimeDefinitions.FortNightRegex, RegexFlags); + + private static readonly Regex WeekDayTypeRegex = + new Regex(DateTimeDefinitions.WeekDayTypeRegex, RegexFlags); + + // pass FutureTerms as List to ReplaceValueInTextWithFutTerm function + private static readonly List ThisTerms = (List)DateTimeDefinitions.FutureTerms; + public EnglishSetParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -80,54 +122,52 @@ public bool GetMatchedDailyTimex(string text, out string timex) { var trimmedText = text.Trim(); - if (trimmedText.Equals("daily")) + float durationLength = 1; // Default value + float multiplier = 1; + string durationType; + + if (DoubleMultiplierRegex.IsMatch(trimmedText)) { - timex = "P1D"; + multiplier = 2; } - else if (trimmedText.Equals("weekly")) + else if (HalfMultiplierRegex.IsMatch(trimmedText)) { - timex = "P1W"; + multiplier = 0.5f; } - else if (trimmedText.Equals("biweekly")) + + if (WeekDayTypeRegex.IsMatch(trimmedText)) { - timex = "P2W"; + durationType = DateTimeDefinitions.UnitMap["weekday"]; } - else if (trimmedText.Equals("monthly")) + else if (DayTypeRegex.IsMatch(trimmedText)) { - timex = "P1M"; + durationType = DateTimeDefinitions.UnitMap["day"]; } - else if (trimmedText.Equals("yearly") || trimmedText.Equals("annually") || trimmedText.Equals("annual")) + else if (WeekTypeRegex.IsMatch(trimmedText)) { - timex = "P1Y"; + durationType = DateTimeDefinitions.UnitMap["week"]; } - else + else if (WeekendTypeRegex.IsMatch(trimmedText)) { - timex = null; - return false; + durationType = DateTimeDefinitions.UnitMap["weekend"]; } - - return true; - } - - public bool GetMatchedUnitTimex(string text, out string timex) - { - var trimmedText = text.Trim(); - - if (trimmedText.Equals("day")) + else if (FortNightRegex.IsMatch(trimmedText)) { - timex = "P1D"; + durationLength = 2; + durationType = DateTimeDefinitions.UnitMap["week"]; } - else if (trimmedText.Equals("week")) + else if (MonthTypeRegex.IsMatch(trimmedText)) { - timex = "P1W"; + durationType = DateTimeDefinitions.UnitMap["m"]; } - else if (trimmedText.Equals("month")) + else if (QuarterTypeRegex.IsMatch(trimmedText)) { - timex = "P1M"; + durationLength = 3; + durationType = DateTimeDefinitions.UnitMap["m"]; } - else if (trimmedText.Equals("year")) + else if (YearTypeRegex.IsMatch(trimmedText)) { - timex = "P1Y"; + durationType = DateTimeDefinitions.UnitMap["y"]; } else { @@ -135,9 +175,18 @@ public bool GetMatchedUnitTimex(string text, out string timex) return false; } + timex = TimexUtility.GenerateSetTimex(durationType, durationLength, multiplier); + return true; } + public bool GetMatchedUnitTimex(string text, out string timex) + { + return GetMatchedDailyTimex(text, out timex); + } + public string WeekDayGroupMatchString(Match match) => SetHandler.WeekDayGroupMatchString(match); + + public string ReplaceValueInTextWithFutTerm(string text, string value) => TasksModeSetHandler.ReplaceValueInTextWithFutTerm(text, value, ThisTerms); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishTimeParserConfiguration.cs index 662babeba5..cf9c8306e7 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishTimeParserConfiguration.cs @@ -1,5 +1,10 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -12,13 +17,25 @@ public class EnglishTimeParserConfiguration : BaseDateTimeOptionsConfiguration, private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex TimeSuffixFull = - new Regex(DateTimeDefinitions.TimeSuffixFull, RegexFlags); + new Regex(DateTimeDefinitions.TimeSuffixFull, RegexFlags, RegexTimeOut); private static readonly Regex LunchRegex = - new Regex(DateTimeDefinitions.LunchRegex, RegexFlags); + new Regex(DateTimeDefinitions.LunchRegex, RegexFlags, RegexTimeOut); private static readonly Regex NightRegex = - new Regex(DateTimeDefinitions.NightRegex, RegexFlags); + new Regex(DateTimeDefinitions.NightRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex HalfTokenRegex = + new Regex(DateTimeDefinitions.HalfTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex QuarterTokenRegex = + new Regex(DateTimeDefinitions.QuarterTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ThreeQuarterTokenRegex = + new Regex(DateTimeDefinitions.ThreeQuarterTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ToTokenRegex = + new Regex(DateTimeDefinitions.ToTokenRegex, RegexFlags, RegexTimeOut); public EnglishTimeParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) @@ -51,15 +68,15 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha var trimedPrefix = prefix.Trim(); - if (trimedPrefix.StartsWith("half")) + if (HalfTokenRegex.IsMatch(trimedPrefix)) { deltaMin = 30; } - else if (trimedPrefix.StartsWith("a quarter") || trimedPrefix.StartsWith("quarter")) + else if (QuarterTokenRegex.IsMatch(trimedPrefix)) { deltaMin = 15; } - else if (trimedPrefix.StartsWith("three quarter")) + else if (ThreeQuarterTokenRegex.IsMatch(trimedPrefix)) { deltaMin = 45; } @@ -69,7 +86,7 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha var minStr = match.Groups["deltamin"].Value; if (!string.IsNullOrWhiteSpace(minStr)) { - deltaMin = int.Parse(minStr); + deltaMin = int.Parse(minStr, CultureInfo.InvariantCulture); } else { @@ -78,7 +95,7 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha } } - if (trimedPrefix.EndsWith("to")) + if (ToTokenRegex.IsMatch(trimedPrefix)) { deltaMin = -deltaMin; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishTimePeriodParserConfiguration.cs index e66619cc17..6900777393 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishTimePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; @@ -23,6 +27,9 @@ public EnglishTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration c TimeOfDayRegex = EnglishTimePeriodExtractorConfiguration.TimeOfDayRegex; GeneralEndingRegex = EnglishTimePeriodExtractorConfiguration.GeneralEndingRegex; TillRegex = EnglishTimePeriodExtractorConfiguration.TillRegex; + TimePeriodWithDurationRegex = EnglishTimePeriodExtractorConfiguration.TimePeriodWithDurationRegex; + DurationParser = config.DurationParser; + DurationExtractor = config.DurationExtractor; Numbers = config.Numbers; UtilityConfiguration = config.UtilityConfiguration; @@ -36,6 +43,10 @@ public EnglishTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration c public IDateTimeParser TimeZoneParser { get; } + public IDateTimeParser DurationParser { get; } + + public IDateTimeExtractor DurationExtractor { get; } + public Regex SpecificTimeFromToRegex { get; } public Regex SpecificTimeBetweenAndRegex { get; } @@ -50,14 +61,16 @@ public EnglishTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration c public Regex TillRegex { get; } + public Regex TimePeriodWithDurationRegex { get; } + public IImmutableDictionary Numbers { get; } public IDateTimeUtilityConfiguration UtilityConfiguration { get; } - public bool GetMatchedTimexRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) + public bool GetMatchedTimeRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) { var trimmedText = text.Trim(); - if (trimmedText.EndsWith("s")) + if (trimmedText.EndsWith("s", StringComparison.Ordinal)) { trimmedText = trimmedText.Substring(0, trimmedText.Length - 1); } @@ -67,23 +80,27 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou endMin = 0; var timeOfDay = string.Empty; - if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o))) + if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Morning; } - else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Afternoon; } - else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Evening; } - else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o))) + else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Daytime; } - else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.NighttimeTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Nighttime; + } + else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Night; } @@ -113,12 +130,24 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou return false; } - var parseResult = TimexUtility.ParseTimeOfDay(timeOfDay); + var parseResult = TimexUtility.ResolveTimeOfDay(timeOfDay); timex = parseResult.Timex; beginHour = parseResult.BeginHour; endHour = parseResult.EndHour; endMin = parseResult.EndMin; + if ((Options & DateTimeOptions.TasksMode) != 0) + { + beginHour = 0; + endHour = 0; + endMin = 0; + parseResult = TasksModeProcessing.TasksModeResolveTimeOfDay(timeOfDay); + timex = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMin = parseResult.EndMin; + } + return true; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishTimeZoneParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishTimeZoneParserConfiguration.cs new file mode 100644 index 0000000000..52516e9a60 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishTimeZoneParserConfiguration.cs @@ -0,0 +1,33 @@ +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.English; + +namespace Microsoft.Recognizers.Text.DateTime.English +{ + public class EnglishTimeZoneParserConfiguration : BaseDateTimeOptionsConfiguration, ITimeZoneParserConfiguration + { + public static readonly string TimeZoneEndRegex = TimeZoneDefinitions.TimeZoneEndRegex; + + public static readonly Dictionary FullToMinMapping = TimeZoneDefinitions.FullToMinMapping; + + public static readonly Regex DirectUtcRegex = + new Regex(TimeZoneDefinitions.DirectUtcRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline, RegexTimeOut); + + public static readonly Dictionary AbbrToMinMapping = TimeZoneDefinitions.AbbrToMinMapping; + + public EnglishTimeZoneParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + } + + string ITimeZoneParserConfiguration.TimeZoneEndRegex => TimeZoneEndRegex; + + Dictionary ITimeZoneParserConfiguration.FullToMinMapping => FullToMinMapping; + + Regex ITimeZoneParserConfiguration.DirectUtcRegex => DirectUtcRegex; + + Dictionary ITimeZoneParserConfiguration.AbbrToMinMapping => AbbrToMinMapping; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/TimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/TimeParser.cs index b05af9772c..8b7d7abc96 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/TimeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/TimeParser.cs @@ -1,4 +1,10 @@ -using Microsoft.Recognizers.Text.Utilities; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +using Microsoft.Recognizers.Text.Utilities; + using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.English @@ -35,10 +41,10 @@ private DateTimeResolutionResult ParseIsh(string text, DateObject referenceTime) var hour = Constants.HalfDayHourCount; if (!string.IsNullOrEmpty(hourStr)) { - hour = int.Parse(hourStr); + hour = int.Parse(hourStr, CultureInfo.InvariantCulture); } - ret.Timex = "T" + hour.ToString("D2"); + ret.Timex = "T" + hour.ToString("D2", CultureInfo.InvariantCulture); ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day, hour, 0, 0); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Utilities/EnglishDatetimeUtilityConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Utilities/EnglishDatetimeUtilityConfiguration.cs index 66e6724162..dfa6a7198b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Utilities/EnglishDatetimeUtilityConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Utilities/EnglishDatetimeUtilityConfiguration.cs @@ -1,73 +1,32 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime.English.Utilities { - public class EnglishDatetimeUtilityConfiguration : IDateTimeUtilityConfiguration + public class EnglishDatetimeUtilityConfiguration : BaseDatetimeUtilityConfiguration { - public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoRegex, RegexFlags); - - public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); - - public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); - - public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); - - public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); - - public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); - - public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); - - public static readonly Regex AmPmDescRegex = - new Regex(DateTimeDefinitions.AmPmDescRegex, RegexFlags); - - public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); - - public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); - - public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); - - public static readonly Regex CommonDatePrefixRegex = - new Regex(DateTimeDefinitions.CommonDatePrefixRegex, RegexFlags); - - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - - Regex IDateTimeUtilityConfiguration.LaterRegex => LaterRegex; - - Regex IDateTimeUtilityConfiguration.AgoRegex => AgoRegex; - - Regex IDateTimeUtilityConfiguration.InConnectorRegex => InConnectorRegex; - - Regex IDateTimeUtilityConfiguration.SinceYearSuffixRegex => SinceYearSuffixRegex; - - Regex IDateTimeUtilityConfiguration.WithinNextPrefixRegex => WithinNextPrefixRegex; - - Regex IDateTimeUtilityConfiguration.AmDescRegex => AmDescRegex; - - Regex IDateTimeUtilityConfiguration.PmDescRegex => PmDescRegex; - - Regex IDateTimeUtilityConfiguration.AmPmDescRegex => AmPmDescRegex; - - Regex IDateTimeUtilityConfiguration.RangeUnitRegex => RangeUnitRegex; - - Regex IDateTimeUtilityConfiguration.TimeUnitRegex => TimeUnitRegex; - - Regex IDateTimeUtilityConfiguration.DateUnitRegex => DateUnitRegex; - - Regex IDateTimeUtilityConfiguration.CommonDatePrefixRegex => CommonDatePrefixRegex; - - bool IDateTimeUtilityConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public EnglishDatetimeUtilityConfiguration() + : base( + DateTimeDefinitions.AgoRegex, + DateTimeDefinitions.LaterRegex, + DateTimeDefinitions.InConnectorRegex, + DateTimeDefinitions.SinceYearSuffixRegex, + DateTimeDefinitions.WithinNextPrefixRegex, + DateTimeDefinitions.AmDescRegex, + DateTimeDefinitions.PmDescRegex, + DateTimeDefinitions.AmPmDescRegex, + DateTimeDefinitions.RangeUnitRegex, + DateTimeDefinitions.TimeUnitRegex, + DateTimeDefinitions.DateUnitRegex, + DateTimeDefinitions.CommonDatePrefixRegex, + DateTimeDefinitions.RangePrefixRegex, + RegexOptions.Singleline | RegexOptions.ExplicitCapture, + DateTimeDefinitions.CheckBothBeforeAfter) + { + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/AbstractYearExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/AbstractYearExtractor.cs index c5ee718275..4fdf95b6cd 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/AbstractYearExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/AbstractYearExtractor.cs @@ -1,12 +1,17 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Globalization; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime { public abstract class AbstractYearExtractor : IDateExtractor { - public AbstractYearExtractor(IDateExtractorConfiguration config) + + protected AbstractYearExtractor(IDateExtractorConfiguration config) { this.Config = config; } @@ -22,16 +27,18 @@ public int GetYearFromText(Match match) int year = Constants.InvalidYear; var yearStr = match.Groups["year"].Value; - if (!string.IsNullOrEmpty(yearStr)) + var writtenYearStr = match.Groups["fullyear"].Value; + + if (!string.IsNullOrEmpty(yearStr) && !yearStr.Equals(writtenYearStr, StringComparison.Ordinal)) { - year = int.Parse(yearStr); + year = int.Parse(yearStr, CultureInfo.InvariantCulture); if (year < 100 && year >= Constants.MinTwoDigitYearPastNum) { - year += 1900; + year += Constants.BASE_YEAR_PAST_CENTURY; } else if (year >= 0 && year < Constants.MaxTwoDigitYearFutureNum) { - year += 2000; + year += Constants.BASE_YEAR_CURRENT_CENTURY; } } else @@ -76,6 +83,30 @@ public int GetYearFromText(Match match) year = (firstTwoYearNum * 100) + lastTwoYearNum; } } + else + { + + if (!string.IsNullOrEmpty(writtenYearStr)) + { + var er = new ExtractResult + { + Text = writtenYearStr, + Start = match.Groups["fullyear"].Index, + Length = match.Groups["fullyear"].Length, + }; + + year = Convert.ToInt32((double)(this.Config.NumberParser.Parse(er).Value ?? 0)); + + if (year < 100 && year >= Constants.MinTwoDigitYearPastNum) + { + year += Constants.BASE_YEAR_PAST_CENTURY; + } + else if (year >= 0 && year < Constants.MaxTwoDigitYearFutureNum) + { + year += Constants.BASE_YEAR_CURRENT_CENTURY; + } + } + } } return year; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateExtractor.cs index 5105855702..7e0bfe3118 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateExtractor.cs @@ -1,7 +1,13 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; +using System.Linq; using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Text.InternalCache; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -11,9 +17,16 @@ public class BaseDateExtractor : AbstractYearExtractor, IDateExtractor { public static readonly string ExtractorName = Constants.SYS_DATETIME_DATE; // "Date"; + private static readonly ResultsCache ResultsCache = new ResultsCache(); + + private readonly string keyPrefix; + public BaseDateExtractor(IDateExtractorConfiguration config) : base(config) { + + keyPrefix = string.Intern(Config.Options + "_" + Config.LanguageMarker); + } public static bool IsOverlapWithExistExtractions(Token er, List existErs) @@ -36,13 +49,21 @@ public override List Extract(string text) public override List Extract(string text, DateObject reference) { - var tokens = new List(); - tokens.AddRange(BasicRegexMatch(text)); - tokens.AddRange(ImplicitDate(text)); - tokens.AddRange(NumberWithMonth(text, reference)); - tokens.AddRange(ExtractRelativeDurationDate(text, reference)); - return Token.MergeAllTokens(tokens, text, ExtractorName); + List results; + + if ((this.Config.Options & DateTimeOptions.NoProtoCache) != 0) + { + results = ExtractImpl(text, reference); + } + else + { + var key = (keyPrefix, text, reference); + + results = ResultsCache.GetOrCreate(key, () => ExtractImpl(text, reference)); + } + + return results; } // "In 3 days/weeks/months/years" = "3 days/weeks/months/years from now" @@ -73,14 +94,12 @@ public List ExtractRelativeDurationDateWithInPrefix(string text, List ExtractImpl(string text, DateObject reference) + { + var tokens = new List(); + tokens.AddRange(BasicRegexMatch(text)); + tokens.AddRange(ImplicitDate(text)); + tokens.AddRange(NumberWithMonth(text, reference)); + tokens.AddRange(ExtractRelativeDurationDate(text, tokens, reference)); + + var results = Token.MergeAllTokens(tokens, text, ExtractorName); + + return results; } // match basic patterns in DateRegexList private List BasicRegexMatch(string text) { - var ret = new List(); + + var results = new List(); + foreach (var regex in this.Config.DateRegexList) { var matches = regex.Matches(text); + foreach (Match match in matches) { // some match might be part of the date range entity, and might be split in a wrong way - if (ValidateMatch(match, text)) + if (DateContext.ValidateMatch(match, text, this.Config.DateRegexList, this.Config.RangeConnectorSymbolRegex)) { // Cases that the relative term is before the detected date entity, like "this 5/12", "next friday 5/12" var preText = text.Substring(0, match.Index); var relativeRegex = this.Config.StrictRelativeRegex.MatchEnd(preText, trim: true); + if (relativeRegex.Success) { - ret.Add(new Token(relativeRegex.Index, match.Index + match.Length)); + results.Add(new Token(relativeRegex.Index, match.Index + match.Length)); } else { - ret.Add(new Token(match.Index, match.Index + match.Length)); + results.Add(new Token(match.Index, match.Index + match.Length)); } } } } - return ret; - } - - // this method is to validate whether the match is part of date range and is a correct split - // For example: in case "10-1 - 11-7", "10-1 - 11" can be matched by some of the Regexes, but the full text is a date range, so "10-1 - 11" is not a correct split - private bool ValidateMatch(Match match, string text) - { - // If the match doesn't contains "year" part, it will not be ambiguous and it's a valid match - var isValidMatch = !match.Groups["year"].Success; - - if (!isValidMatch) - { - var yearGroup = match.Groups["year"]; - - // If the "year" part is not at the end of the match, it's a valid match - if (!(yearGroup.Index + yearGroup.Length == match.Index + match.Length)) - { - isValidMatch = true; - } - else - { - var subText = text.Substring(yearGroup.Index); - - // If the following text (include the "year" part) doesn't start with a Date entity, it's a valid match - if (!StartsWithBasicDate(subText)) - { - isValidMatch = true; - } - else - { - // If the following text (include the "year" part) starts with a Date entity, but the following text (doesn't include the "year" part) also starts with a valid Date entity, the current match is still valid - // For example, "10-1-2018-10-2-2018". Match "10-1-2018" is valid because though "2018-10-2" a valid match (indicates the first year "2018" might belongs to the second Date entity), but "10-2-2018" is also a valid match. - subText = text.Substring(yearGroup.Index + yearGroup.Length).Trim(); - subText = TrimStartRangeConnectorSymbols(subText); - isValidMatch = StartsWithBasicDate(subText); - } - } - } - - return isValidMatch; - } - - // TODO: Simplify this method to improve the performance - private string TrimStartRangeConnectorSymbols(string text) - { - var rangeConnectorSymbolMatches = Config.RangeConnectorSymbolRegex.Matches(text); - - foreach (Match symbolMatch in rangeConnectorSymbolMatches) - { - var startSymbolLength = -1; - - if (symbolMatch.Success && symbolMatch.Index == 0 && symbolMatch.Length > startSymbolLength) - { - startSymbolLength = symbolMatch.Length; - } - - if (startSymbolLength > 0) - { - text = text.Substring(startSymbolLength); - } - } - - return text.Trim(); - } - - // TODO: Simplify this method to improve the performance - private bool StartsWithBasicDate(string text) - { - foreach (var regex in this.Config.DateRegexList) - { - var match = regex.MatchBegin(text, trim: true); - - if (match.Success) - { - return true; - } - } - - return false; + return results; } // match several other cases @@ -255,9 +213,9 @@ private List NumberWithMonth(string text, DateObject reference) foreach (var result in er) { - int.TryParse((this.Config.NumberParser.Parse(result).Value ?? 0).ToString(), out int num); + var parsed = int.TryParse((this.Config.NumberParser.Parse(result).Value ?? 0).ToString(), out int num); - if (num < 1 || num > 31) + if (!parsed || (num < 1 || num > 31)) { continue; } @@ -265,9 +223,16 @@ private List NumberWithMonth(string text, DateObject reference) if (result.Start >= 0) { // Handling cases like '(Monday,) Jan twenty two' - var frontStr = text.Substring(0, result.Start ?? 0); + var prefixStr = text.Substring(0, result.Start ?? 0); + + // Check that the extracted number is not part of a decimal number, time expression or currency + // (e.g. '123.24', '12:24', '$12') + if (MatchingUtil.IsInvalidDayNumberPrefix(prefixStr)) + { + continue; + } - var match = this.Config.MonthEnd.Match(frontStr); + var match = this.Config.MonthEnd.Match(prefixStr); if (match.Success) { var startIndex = match.Index; @@ -310,12 +275,13 @@ private List NumberWithMonth(string text, DateObject reference) // Handling cases like 'Thursday the 21st', which both 'Thursday' and '21st' refer to a same date matches = this.Config.WeekDayAndDayOfMonthRegex.Matches(text); + foreach (Match matchCase in matches) { if (matchCase.Success) { var ordinalNum = matchCase.Groups["DayOfMonth"].Value; - if (ordinalNum == result.Text) + if (ordinalNum == result.Text && matchCase.Groups["DayOfMonth"].Index == result.Start) { // Get week of day for the ordinal number which is regarded as a date of reference month var date = DateObject.MinValue.SafeCreateFromValue(reference.Year, reference.Month, num); @@ -325,21 +291,10 @@ private List NumberWithMonth(string text, DateObject reference) // to see whether they refer to the same week day var extractedWeekDayStr = matchCase.Groups["weekday"].Value; - // calculate matchLength considering that matchCase can preceed or follow result - var matchLength = matchCase.Index < result.Start ? result.Start + result.Length - matchCase.Index : matchCase.Index + matchCase.Length - result.Start; - if (!date.Equals(DateObject.MinValue) && - numWeekDayInt == Config.DayOfWeek[extractedWeekDayStr] && - matchCase.Length == matchLength) + numWeekDayInt == Config.DayOfWeek[extractedWeekDayStr]) { - if (matchCase.Index < result.Start) - { - ret.Add(new Token(matchCase.Index, result.Start + result.Length ?? 0)); - } - else - { - ret.Add(new Token((int)result.Start, matchCase.Index + matchCase.Length)); - } + ret.Add(new Token(matchCase.Index, matchCase.Index + matchCase.Length)); isFound = true; } @@ -363,8 +318,12 @@ private List NumberWithMonth(string text, DateObject reference) if (matchLength == matchCase.Length) { - ret.Add(new Token(matchCase.Index, result.Start + result.Length ?? 0)); - isFound = true; + // check if day number is compatible with reference month + if (DateObjectExtension.IsValidDate(reference.Year, reference.Month, num) || !this.Config.CheckBothBeforeAfter) + { + ret.Add(new Token(matchCase.Index, result.Start + result.Length ?? 0)); + isFound = true; + } } } } @@ -377,6 +336,7 @@ private List NumberWithMonth(string text, DateObject reference) // Handling cases like '20th of next month' var suffixStr = text.Substring(result.Start + result.Length ?? 0); var beginMatch = this.Config.RelativeMonthRegex.MatchBegin(suffixStr.Trim(), trim: true); + if (beginMatch.Success && beginMatch.Index == 0) { var spaceLen = suffixStr.Length - suffixStr.Trim().Length; @@ -386,6 +346,7 @@ private List NumberWithMonth(string text, DateObject reference) // Check if prefix contains 'the', include it if any var prefix = text.Substring(0, resStart ?? 0); var prefixMatch = this.Config.PrefixArticleRegex.Match(prefix); + if (prefixMatch.Success) { resStart = prefixMatch.Index; @@ -400,7 +361,8 @@ private List NumberWithMonth(string text, DateObject reference) beginMatch = this.Config.WeekDayRegex.MatchBegin(suffixStr.Trim(), trim: true); if (beginMatch.Success && num >= 1 && num <= 5 - && result.Type.Equals(Number.Constants.SYS_NUM_ORDINAL, StringComparison.Ordinal)) + && result.Type.Equals(Number.Constants.SYS_NUM_ORDINAL, StringComparison.Ordinal) + && !this.Config.WeekDayRegex.IsExactMatch(result.Text, trim: true)) { var weekDayStr = beginMatch.Groups["weekday"].Value; if (this.Config.DayOfWeek.ContainsKey(weekDayStr)) @@ -442,25 +404,25 @@ private void ExtendWithWeekdayAndYear(ref int startIndex, ref int endIndex, int // Check whether there's a year var suffix = text.Substring(endIndex); var prefix = text.Substring(0, startIndex); - bool inSuffix = true; - endIndex += GetYearIndex(suffix, inSuffix, ref year, out bool success); + endIndex += GetYearIndex(suffix, ref year, out bool success, inPrefix: false); // Check also in prefix if (!success && Config.CheckBothBeforeAfter) { - inSuffix = false; - startIndex -= GetYearIndex(prefix, inSuffix, ref year, out success); + startIndex -= GetYearIndex(prefix, ref year, out success, inPrefix: true); } var date = DateObject.MinValue.SafeCreateFromValue(year, month, day); // Check whether there's a weekday + bool isMatchInSuffix = false; var matchWeekDay = this.Config.WeekDayEnd.Match(prefix); // Check for weekday in the suffix if (!matchWeekDay.Success) { matchWeekDay = this.Config.WeekDayStart.Match(suffix); + isMatchInSuffix = matchWeekDay.Success; } if (matchWeekDay.Success) @@ -475,7 +437,7 @@ private void ExtendWithWeekdayAndYear(ref int startIndex, ref int endIndex, int { if (!date.Equals(DateObject.MinValue) && weekDay1 == weekDay2) { - if (matchWeekDay.Index < startIndex) + if (!isMatchInSuffix) { startIndex = matchWeekDay.Index; } @@ -490,9 +452,10 @@ private void ExtendWithWeekdayAndYear(ref int startIndex, ref int endIndex, int // Cases like "3 days from today", "5 weeks before yesterday", "2 months after tomorrow" // Note that these cases are of type "date" - private List ExtractRelativeDurationDate(string text, DateObject reference) + private List ExtractRelativeDurationDate(string text, List tokens, DateObject reference) { var ret = new List(); + var tempTokens = new List(tokens); var durationEr = Config.DurationExtractor.Extract(text, reference); foreach (var er in durationEr) @@ -518,6 +481,36 @@ private List ExtractRelativeDurationDate(string text, DateObject referenc if (match.Success) { ret.AddRange(AgoLaterUtil.ExtractorDurationWithBeforeAndAfter(text, er, ret, Config.UtilityConfiguration)); + + // Take into account also holiday dates + if (ret.Count < 1) + { + var holidayEr = Config.HolidayExtractor.Extract(text, reference); + foreach (var holiday in holidayEr) + { + tempTokens.Add(new Token((int)holiday.Start, (int)(holiday.Start + holiday.Length))); + } + } + + // Check for combined patterns Duration + Date, e.g. '3 days before Monday', '4 weeks after January 15th' + if (ret.Count < 1 && tempTokens.Count > 0 && er.Text != match.Value) + { + var afterStr = text.Substring((int)er.Start + (int)er.Length); + var connector = Config.BeforeAfterRegex.MatchBegin(afterStr, trim: true); + if (connector.Success) + { + foreach (var token in tempTokens) + { + var start = (int)er.Start + (int)er.Length + connector.Index + connector.Length; + var length = token.Start - start; + if (length > 0 && start + length < text.Length && string.IsNullOrWhiteSpace(text.Substring(start, length))) + { + Token tok = new Token((int)er.Start, token.End); + ret.Add(tok); + } + } + } + } } } @@ -533,6 +526,9 @@ private List ExtractRelativeDurationDate(string text, DateObject referenc } } + // Extend extraction with weekdays like in "Friday two weeks from now", "in 3 weeks on Monday" + ret.AddRange(ExtendWithWeekDay(ret, text)); + return ret; } @@ -540,22 +536,21 @@ private void StripInequalityDuration(ExtractResult er) { if (this.Config.CheckBothBeforeAfter) { - bool inPrefix = false; - StripInequality(er, Config.MoreThanRegex, inPrefix); - StripInequality(er, Config.LessThanRegex, inPrefix); + StripInequality(er, Config.MoreThanRegex, inPrefix: false); + StripInequality(er, Config.LessThanRegex, inPrefix: false); } else { - bool inPrefix = true; - StripInequality(er, Config.MoreThanRegex, inPrefix); - StripInequality(er, Config.LessThanRegex, inPrefix); + StripInequality(er, Config.MoreThanRegex, inPrefix: true); + StripInequality(er, Config.LessThanRegex, inPrefix: true); } } // Used in ExtractRelativeDurationDateWithInPrefix to extract the connector "in" in cases like "In 3 days/weeks/months/years" - private List ExtractInConnector(string text, string firstStr, string secondStr, Token duration, bool inPrefix, out bool success) + private List ExtractInConnector(string text, string firstStr, string secondStr, Token duration, out bool success, bool inPrefix) { List ret = new List(); + var match = inPrefix ? Config.InConnectorRegex.MatchEnd(firstStr, trim: true) : Config.InConnectorRegex.MatchBegin(firstStr, trim: true); success = match.Success; @@ -585,22 +580,51 @@ private List ExtractInConnector(string text, string firstStr, string seco return ret; } - private int GetYearIndex(string affix, bool inSuffix, ref int year, out bool success) + private int GetYearIndex(string affix, ref int year, out bool success, bool inPrefix) { int index = 0; + var matchYear = this.Config.YearSuffix.Match(affix); - success = inSuffix ? matchYear.Success && matchYear.Index == 0 : matchYear.Success && matchYear.Index + matchYear.Length == affix.TrimEnd().Length; + + success = !inPrefix ? + matchYear.Success && matchYear.Index == 0 : + matchYear.Success && matchYear.Index + matchYear.Length == affix.TrimEnd().Length; + if (success) { year = GetYearFromText(matchYear); if (year >= Constants.MinYearNum && year <= Constants.MaxYearNum) { - index = inSuffix ? matchYear.Length : matchYear.Length + (affix.Length - affix.TrimEnd().Length); + index = !inPrefix ? + matchYear.Length : + matchYear.Length + (affix.Length - affix.TrimEnd().Length); } } return index; } + + private List ExtendWithWeekDay(List ret, string text) + { + var newRet = new List(); + foreach (var er in ret) + { + var beforeStr = text.Substring(0, er.Start); + var afterStr = text.Substring(er.End); + var beforeMatch = Config.WeekDayEnd.Match(beforeStr); + var afterMatch = Config.WeekDayStart.Match(afterStr); + if (beforeMatch.Success || afterMatch.Success) + { + var start = beforeMatch.Success ? beforeMatch.Index : er.Start; + var end = beforeMatch.Success ? er.End : er.End + afterMatch.Index + afterMatch.Length; + Metadata metadata = new Metadata { IsDurationDateWithWeekday = true }; + Token tok = new Token(start, end, metadata); + newRet.Add(tok); + } + } + + return newRet; + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDatePeriodExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDatePeriodExtractor.cs index d19c56db6d..8e7b7e91c9 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDatePeriodExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDatePeriodExtractor.cs @@ -1,6 +1,12 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.English; +using Microsoft.Recognizers.Text.InternalCache; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -10,11 +16,16 @@ public class BaseDatePeriodExtractor : IDateTimeExtractor { private const string ExtractorName = Constants.SYS_DATETIME_DATEPERIOD; + private static readonly ResultsCache ResultsCache = new ResultsCache(); + private readonly IDatePeriodExtractorConfiguration config; + private readonly string keyPrefix; + public BaseDatePeriodExtractor(IDatePeriodExtractorConfiguration config) { this.config = config; + keyPrefix = string.Intern(config.Options + "_" + config.LanguageMarker); } public List Extract(string text) @@ -24,20 +35,20 @@ public List Extract(string text) public List Extract(string text, DateObject reference) { - var tokens = new List(); - tokens.AddRange(MatchSimpleCases(text)); + List results; - var simpleCasesResults = Token.MergeAllTokens(tokens, text, ExtractorName); - var ordinalExtractions = config.OrdinalExtractor.Extract(text); + if ((this.config.Options & DateTimeOptions.NoProtoCache) != 0) + { + results = ExtractImpl(text, reference); + } + else + { + var key = (keyPrefix, text, reference); - tokens.AddRange(MergeTwoTimePoints(text, reference)); - tokens.AddRange(MatchDuration(text, reference)); - tokens.AddRange(SingleTimePointWithPatterns(text, new List(ordinalExtractions), reference)); - tokens.AddRange(MatchComplexCases(text, simpleCasesResults, reference)); - tokens.AddRange(MatchYearPeriod(text, reference)); - tokens.AddRange(MatchOrdinalNumberWithCenturySuffix(text, new List(ordinalExtractions))); + results = ResultsCache.GetOrCreate(key, () => ExtractImpl(text, reference)); + } - return Token.MergeAllTokens(tokens, text, ExtractorName); + return results; } public List MatchDuration(string text, DateObject reference) @@ -46,19 +57,17 @@ public List MatchDuration(string text, DateObject reference) var durations = new List(); var durationExtractions = config.DurationExtractor.Extract(text, reference); + foreach (var durationExtraction in durationExtractions) { - var match = config.DateUnitRegex.Match(durationExtraction.Text); - if (match.Success) + var dateUnitMatch = config.DateUnitRegex.Match(durationExtraction.Text); + if (!dateUnitMatch.Success) { - durations.Add(new Token( - durationExtraction.Start ?? 0, - durationExtraction.Start + durationExtraction.Length ?? 0)); + continue; } - } - foreach (var duration in durations) - { + var isPlurarUnit = dateUnitMatch.Groups[Constants.PluralUnit].Success; + var duration = new Token(durationExtraction.Start ?? 0, durationExtraction.Start + durationExtraction.Length ?? 0); var beforeStr = text.Substring(0, duration.Start); var afterStr = text.Substring(duration.Start + duration.Length); @@ -67,10 +76,24 @@ public List MatchDuration(string text, DateObject reference) continue; } + // Patterns like 'first 3 weeks of 2018', 'last two months of 2020' + if (dateUnitMatch.Groups[Constants.UnitOfYearGroupName].Success) + { + var beforeMatch = this.config.FirstLastRegex.MatchEnd(beforeStr, trim: true); + if (beforeMatch.Success) + { + var afterMatch = this.config.OfYearRegex.MatchBegin(afterStr, trim: true); + if (afterMatch.Success) + { + ret.Add(new Token(beforeMatch.Index, duration.End + afterMatch.Index + afterMatch.Length)); + continue; + } + } + } + // within "Days/Weeks/Months/Years" should be handled as dateRange here // if duration contains "Seconds/Minutes/Hours", it should be treated as datetimeRange - bool inPrefix = true; - Token matchToken = MatchWithinNextPrexixRegex(text, duration, inPrefix); + Token matchToken = MatchWithinNextAffixRegex(text, duration, inPrefix: true); if (matchToken.Start >= 0) { ret.Add(matchToken); @@ -80,8 +103,7 @@ public List MatchDuration(string text, DateObject reference) // check also afterStr if (this.config.CheckBothBeforeAfter) { - inPrefix = false; - matchToken = MatchWithinNextPrexixRegex(text, duration, inPrefix); + matchToken = MatchWithinNextAffixRegex(text, duration, inPrefix: false); if (matchToken.Start >= 0) { ret.Add(matchToken); @@ -114,12 +136,13 @@ public List MatchDuration(string text, DateObject reference) { var prefix = beforeStr.Substring(0, index).Trim(); var durationText = text.Substring(duration.Start, duration.Length); + var numbersInPrefix = config.CardinalExtractor.Extract(prefix); var numbersInDuration = config.CardinalExtractor.Extract(durationText); // Cases like "2 upcoming days", should be supported here // Cases like "2 upcoming 3 days" is invalid, only extract "upcoming 3 days" by default - if (numbersInPrefix.Any() && !numbersInDuration.Any()) + if (numbersInPrefix.Any() && !numbersInDuration.Any() && isPlurarUnit) { var lastNumber = numbersInPrefix.OrderBy(t => t.Start + t.Length).Last(); @@ -146,14 +169,6 @@ public List MatchDuration(string text, DateObject reference) continue; } - match = this.config.FutureRegex.MatchBegin(afterStr, trim: true); - - if (match.Success) - { - ret.Add(new Token(duration.Start, duration.End + match.Index + match.Length)); - continue; - } - match = this.config.FutureSuffixRegex.MatchBegin(afterStr, trim: true); if (match.Success) @@ -169,13 +184,18 @@ public List MatchDuration(string text, DateObject reference) private static List GetTokenForRegexMatching(string text, Regex regex, ExtractResult er, bool inPrefix) { var ret = new List(); + var match = regex.Match(text); - bool isMatchAtEdge = inPrefix ? text.Trim().EndsWith(match.Value.Trim()) : text.Trim().StartsWith(match.Value.Trim()); + bool isMatchAtEdge = inPrefix ? + text.Trim().EndsWith(match.Value.Trim(), StringComparison.Ordinal) : + text.Trim().StartsWith(match.Value.Trim(), StringComparison.Ordinal); + if (match.Success && isMatchAtEdge) { - var startIndex = inPrefix ? text.LastIndexOf(match.Value) : (int)er.Start; + var startIndex = inPrefix ? text.LastIndexOf(match.Value, StringComparison.Ordinal) : (int)er.Start; var endIndex = (int)er.Start + (int)er.Length; endIndex += inPrefix ? 0 : match.Index + match.Length; + ret.Add(new Token(startIndex, endIndex)); } @@ -188,7 +208,7 @@ private static bool InfixBoundaryCheck(Match match, string source) bool isMatchInfixOfSource = false; if (match.Index > 0 && match.Index + match.Length < source.Length) { - if (source.Substring(match.Index, match.Length).Equals(match.Value)) + if (source.AsSpan(match.Index, match.Length).Equals(match.Value.AsSpan(), StringComparison.InvariantCulture)) { isMatchInfixOfSource = true; } @@ -318,6 +338,24 @@ private static bool HasDigitNumberAfterDash(string source, int dashSuffixIndex, return hasDigitNumberAfterDash; } + private List ExtractImpl(string text, DateObject reference) + { + var tokens = new List(); + tokens.AddRange(MatchSimpleCases(text)); + + var simpleCasesResults = Token.MergeAllTokens(tokens, text, ExtractorName); + var ordinalExtractions = config.OrdinalExtractor.Extract(text); + + tokens.AddRange(MergeTwoTimePoints(text, reference)); + tokens.AddRange(MatchDuration(text, reference)); + tokens.AddRange(SingleTimePointWithPatterns(text, ordinalExtractions, reference)); + tokens.AddRange(MatchComplexCases(text, simpleCasesResults, reference)); + tokens.AddRange(MatchYearPeriod(text, reference)); + tokens.AddRange(MatchOrdinalNumberWithCenturySuffix(text, ordinalExtractions)); + + return Token.MergeAllTokens(tokens, text, ExtractorName); + } + // Cases like "21st century" private List MatchOrdinalNumberWithCenturySuffix(string text, List ordinalExtractions) { @@ -330,12 +368,12 @@ private List MatchOrdinalNumberWithCenturySuffix(string text, List MatchSimpleCases(string text) foreach (var regex in this.config.SimpleCasesRegexes) { var matches = regex.Matches(text); + foreach (Match match in matches) { var matchYear = this.config.YearRegex.Match(match.Value); + if (matchYear.Success && matchYear.Length == match.Value.Length) { var year = config.DatePointExtractor.GetYearFromText(matchYear); + if (!(year >= Constants.MinYearNum && year <= Constants.MaxYearNum)) { continue; @@ -437,6 +478,7 @@ private List MatchSimpleCases(string text) if (InfixBoundaryCheck(match, text)) { var substr = text.Substring(match.Index - 1, 6); + if (this.config.IllegalYearRegex.IsMatch(substr)) { continue; @@ -511,6 +553,9 @@ private List MatchComplexCases(string text, List simpleDat er = er.OrderBy(t => t.Start).ToList(); + // Handle "now" + er = MatchNow(text, er); + return MergeMultipleExtractions(text, er); } @@ -519,22 +564,7 @@ private List MergeTwoTimePoints(string text, DateObject reference) var er = this.config.DatePointExtractor.Extract(text, reference); // Handle "now" - var matches = this.config.NowRegex.Matches(text); - if (matches.Count != 0) - { - foreach (Match match in matches) - { - var nowEr = new ExtractResult - { - Start = match.Index, - Length = match.Length, - }; - er.Add(nowEr); - - } - - er = er.OrderBy(o => o.Start).ToList(); - } + er = MatchNow(text, er); return MergeMultipleExtractions(text, er); } @@ -565,20 +595,38 @@ private List MergeMultipleExtractions(string text, List ex } var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); + var endPointStr = extractionResults[idx + 1].Text; - if (config.TillRegex.IsExactMatch(middleStr, trim: true)) + if (config.TillRegex.IsExactMatch(middleStr, trim: true) || (string.IsNullOrEmpty(middleStr) && + config.TillRegex.MatchBegin(endPointStr, trim: true).Success)) { var periodBegin = extractionResults[idx].Start ?? 0; var periodEnd = (extractionResults[idx + 1].Start ?? 0) + (extractionResults[idx + 1].Length ?? 0); // handle "from/between" together with till words (till/until/through...) var beforeStr = text.Substring(0, periodBegin).Trim(); + if (this.config.GetFromTokenIndex(beforeStr, out int fromIndex) || this.config.GetBetweenTokenIndex(beforeStr, out fromIndex)) { periodBegin = fromIndex; } + // handle "between...and..." case when "between" follows the datepoints + if (this.config.CheckBothBeforeAfter) + { + var afterStr = text.Substring(periodEnd, text.Length - periodEnd); + if (this.config.GetBetweenTokenIndex(afterStr, out int afterIndex)) + { + periodEnd += afterIndex; + ret.Add(new Token(periodBegin, periodEnd, metadata)); + + // merge two tokens here, increase the index by two + idx += 2; + continue; + } + } + ret.Add(new Token(periodBegin, periodEnd, metadata)); // merge two tokens here, increase the index by two @@ -633,7 +681,7 @@ private List SingleTimePointWithPatterns(string text, List var datePoints = this.config.DatePointExtractor.Extract(text, reference); // For cases like "week of the 18th" - datePoints.AddRange(ordinalExtractions.Where(o => !datePoints.Any(er => er.IsOverlap(o)))); + datePoints.AddRange(ordinalExtractions.Where(o => !datePoints.Any(er => er.IsOverlap(o)) && !o.Metadata.IsOrdinalRelative)); if (datePoints.Count < 1) { @@ -644,52 +692,79 @@ private List SingleTimePointWithPatterns(string text, List { if (extractionResult.Start != null && extractionResult.Length != null) { - bool inPrefix = true; var beforeString = text.Substring(0, (int)extractionResult.Start); - var afterString = text.Substring((int)extractionResult.Start + (int)extractionResult.Length, text.Length - (int)extractionResult.Start - (int)extractionResult.Length); - ret.AddRange(GetTokenForRegexMatching(beforeString, config.WeekOfRegex, extractionResult, inPrefix)); - ret.AddRange(GetTokenForRegexMatching(beforeString, config.MonthOfRegex, extractionResult, inPrefix)); + var afterString = text.Substring((int)extractionResult.Start + (int)extractionResult.Length, + text.Length - (int)extractionResult.Start - (int)extractionResult.Length); + + ret.AddRange(GetTokenForRegexMatching(beforeString, config.WeekOfRegex, extractionResult, inPrefix: true)); + ret.AddRange(GetTokenForRegexMatching(beforeString, config.MonthOfRegex, extractionResult, inPrefix: true)); // Check also afterString if (this.config.CheckBothBeforeAfter) { - inPrefix = false; - ret.AddRange(GetTokenForRegexMatching(afterString, config.WeekOfRegex, extractionResult, inPrefix)); - ret.AddRange(GetTokenForRegexMatching(afterString, config.MonthOfRegex, extractionResult, inPrefix)); + ret.AddRange(GetTokenForRegexMatching(afterString, config.WeekOfRegex, extractionResult, inPrefix: false)); + ret.AddRange(GetTokenForRegexMatching(afterString, config.MonthOfRegex, extractionResult, inPrefix: false)); } // Cases like "3 days from today", "2 weeks before yesterday", "3 months after tomorrow" if (IsRelativeDurationDate(extractionResult)) { - inPrefix = true; - ret.AddRange(GetTokenForRegexMatching(beforeString, config.LessThanRegex, extractionResult, inPrefix)); - ret.AddRange(GetTokenForRegexMatching(beforeString, config.MoreThanRegex, extractionResult, inPrefix)); + ret.AddRange(GetTokenForRegexMatching(beforeString, config.LessThanRegex, extractionResult, inPrefix: true)); + ret.AddRange(GetTokenForRegexMatching(beforeString, config.MoreThanRegex, extractionResult, inPrefix: true)); // Check also afterString if (this.config.CheckBothBeforeAfter) { - inPrefix = false; - ret.AddRange(GetTokenForRegexMatching(afterString, config.LessThanRegex, extractionResult, inPrefix)); - ret.AddRange(GetTokenForRegexMatching(afterString, config.MoreThanRegex, extractionResult, inPrefix)); + ret.AddRange(GetTokenForRegexMatching(afterString, config.LessThanRegex, extractionResult, inPrefix: false)); + ret.AddRange(GetTokenForRegexMatching(afterString, config.MoreThanRegex, extractionResult, inPrefix: false)); } // For "within" case, only duration with relative to "today" or "now" makes sense // Cases like "within 3 days from yesterday/tomorrow" does not make any sense if (IsDateRelativeToNowOrToday(extractionResult)) { - inPrefix = true; - var tokens = ExtractWithinNextPrefix(beforeString, inPrefix, extractionResult); + var tokens = ExtractWithinNextPrefix(beforeString, extractionResult, inPrefix: true); ret.AddRange(tokens); // check also afterString if (this.config.CheckBothBeforeAfter && tokens.Count == 0) { - inPrefix = false; - tokens = ExtractWithinNextPrefix(afterString, inPrefix, extractionResult); + tokens = ExtractWithinNextPrefix(afterString, extractionResult, inPrefix: false); ret.AddRange(tokens); } } } + + // For cases like "for 1 week from today", "for 3 days from 20th May" etc.. + if (EnglishDatePeriodExtractorConfiguration.ForPrefixRegex != null) + { + Match prefixMatchFor = EnglishDatePeriodExtractorConfiguration.ForPrefixRegex.Match(beforeString); + Match datepointMatchFrom = EnglishDatePeriodExtractorConfiguration.ForPrefixRegex.Match(extractionResult.Text); + if (prefixMatchFor.Success && prefixMatchFor.Groups[Constants.ForGroupName].Success + && datepointMatchFrom.Success && datepointMatchFrom.Groups[Constants.FromGroupName].Success) + { + ret.AddRange(GetTokenForRegexMatching(beforeString, EnglishDatePeriodExtractorConfiguration.ForPrefixRegex, extractionResult, inPrefix: true)); + } + } + + // For cases like xx weeks/days starting (from) a date point + if (this.config as EnglishDatePeriodExtractorConfiguration != null) + { + var match = EnglishDatePeriodExtractorConfiguration.StartingRegex.MatchEnd(beforeString, true); + if (match.Success) + { + var durationERs = this.config.DurationExtractor.Extract(beforeString); + if (durationERs.Count >= 1) + { + var lastDuration = durationERs[durationERs.Count - 1]; + string startingWord = beforeString.Substring(beforeString.LastIndexOf(lastDuration.Text, StringComparison.Ordinal) + lastDuration.Text.Length); + if (startingWord.Trim() == match.Value.Trim()) + { + ret.Add(new Token(lastDuration.Start ?? 0, (extractionResult.Start ?? 0) + (extractionResult.Length ?? 0))); + } + } + } + } } } @@ -724,16 +799,22 @@ private bool IsDateRelativeToNowOrToday(ExtractResult er) } // Matches "within (the next)?" part (in beforeStr or afterStr) in "within Days/Weeks/Months/Years" - private Token MatchWithinNextPrexixRegex(string text, Token duration, bool inPrefix) + private Token MatchWithinNextAffixRegex(string text, Token duration, bool inPrefix) { - var beforeStr = text.Substring(0, duration.Start); - var afterStr = text.Substring(duration.Start + duration.Length); int startToken = -1; int endToken = -1; - var match = inPrefix ? config.WithinNextPrefixRegex.MatchEnd(beforeStr, trim: true) : config.WithinNextPrefixRegex.MatchBegin(afterStr, trim: true); + + var beforeStr = text.Substring(0, duration.Start); + var afterStr = text.Substring(duration.Start + duration.Length); + + var match = inPrefix ? + config.WithinNextPrefixRegex.MatchEnd(beforeStr, trim: true) : + config.WithinNextPrefixRegex.MatchBegin(afterStr, trim: true); + if (match.Success) { var durationStr = text.Substring(duration.Start, duration.Length); + var matchDate = config.DateUnitRegex.Match(durationStr); var matchTime = config.TimeUnitRegex.Match(durationStr); @@ -741,6 +822,7 @@ private Token MatchWithinNextPrexixRegex(string text, Token duration, bool inPre { startToken = inPrefix ? match.Index : duration.Start; endToken = inPrefix ? duration.End : duration.End + match.Index + match.Length; + if (!inPrefix) { // Check prefix for "next" @@ -756,10 +838,12 @@ private Token MatchWithinNextPrexixRegex(string text, Token duration, bool inPre return new Token(startToken, endToken); } - private List ExtractWithinNextPrefix(string subStr, bool inPrefix, ExtractResult extractionResult) + private List ExtractWithinNextPrefix(string subStr, ExtractResult extractionResult, bool inPrefix) { var tokens = new List(); + var match = this.config.WithinNextPrefixRegex.Match(subStr); + if (match.Success) { var isNext = !string.IsNullOrEmpty(match.Groups[Constants.NextGroupName].Value); @@ -774,5 +858,30 @@ private List ExtractWithinNextPrefix(string subStr, bool inPrefix, Extrac return tokens; } + + // Handle cases with "now" + private List MatchNow(string text, List er) + { + var matches = this.config.NowRegex.Matches(text); + if (matches.Count != 0) + { + foreach (Match match in matches) + { + var nowEr = new ExtractResult + { + Start = match.Index, + Length = match.Length, + Text = text.Substring(match.Index, match.Length), + }; + + er.Add(nowEr); + + } + + er = er.OrderBy(o => o.Start).ToList(); + } + + return er; + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeAltExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeAltExtractor.cs index 980b61d12d..77f69ab47e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeAltExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeAltExtractor.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; @@ -242,8 +245,8 @@ private static bool ApplyParentTextMetadata(List extractResults, private static void ApplyMetadata(List ers, Dictionary metadata, string parentText) { // Share the timeZone info - var metaDataOrigin = ers[0].Data as Dictionary; - if (metaDataOrigin != null && metaDataOrigin.ContainsKey(Constants.SYS_DATETIME_TIMEZONE)) + if (ers[0].Data is Dictionary metaDataOrigin && + metaDataOrigin.ContainsKey(Constants.SYS_DATETIME_TIMEZONE)) { metadata.Add(Constants.SYS_DATETIME_TIMEZONE, metaDataOrigin[Constants.SYS_DATETIME_TIMEZONE]); } @@ -361,7 +364,7 @@ private List AddImplicitDates(List originalErs, st // in which "18" is wrongly recognized as time without context. var nextEr = originalErs[i + 1]; if (nextEr.Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && - originalErs[i].Text.Equals(dateEr.Text) && + originalErs[i].Text.Equals(dateEr.Text, StringComparison.Ordinal) && IsConnectorOrWhiteSpace((int)(dateEr.Start + dateEr.Length), (int)nextEr.Start, text)) { ret.Add(dateEr); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeExtractor.cs index 97ab5fb0bd..79e1561b1d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeExtractor.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; @@ -27,12 +30,17 @@ public List Extract(string text, DateObject reference) { var tokens = new List(); tokens.AddRange(MergeDateAndTime(text, reference)); + + if ((config.Options & DateTimeOptions.TasksMode) != 0) + { + tokens.AddRange(MergeHolidayAndTime(text, reference)); + } + tokens.AddRange(BasicRegexMatch(text)); tokens.AddRange(TimeOfTodayBefore(text, reference)); tokens.AddRange(TimeOfTodayAfter(text, reference)); tokens.AddRange(SpecialTimeOfDate(text, reference)); tokens.AddRange(DurationWithBeforeAndAfter(text, reference)); - tokens.AddRange(SpecialTimeOfDay(text, reference)); return Token.MergeAllTokens(tokens, text, ExtractorName); } @@ -87,12 +95,33 @@ public List MergeDateAndTime(string text, DateObject reference) Text = match.Value, Type = Number.Constants.SYS_NUM_INTEGER, }; + numErs.Add(node); } ers.AddRange(numErs); } + // handle cases which use numbers + desc as time points + if (timeNumMatches.Cast().Any(match => match.Groups[Constants.DescGroupName].Success)) + { + var numDescErs = new List(); + foreach (var match in timeNumMatches.Cast().Where(match => match.Groups[Constants.DescGroupName].Success)) + { + var node = new ExtractResult + { + Start = match.Index, + Length = match.Length, + Text = match.Value, + Type = Constants.SYS_DATETIME_TIME, + }; + + numDescErs.Add(node); + } + + ers.AddRange(numDescErs); + } + ers = ers.OrderBy(o => o.Start).ToList(); var i = 0; @@ -116,6 +145,7 @@ public List MergeDateAndTime(string text, DateObject reference) (ers[i].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && ers[j].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal))) { + var middleBegin = ers[i].Start + ers[i].Length ?? 0; var middleEnd = ers[j].Start ?? 0; if (middleBegin > middleEnd) @@ -195,6 +225,151 @@ public List MergeDateAndTime(string text, DateObject reference) return ret; } + // Merge a Holiday entity and a Time entity, like "on christmas at 5" + public List MergeHolidayAndTime(string text, DateObject reference) + { + var ret = new List(); + var dateErs = this.config.HolidayExtractor.Extract(text, reference); + if (dateErs.Count == 0) + { + return ret; + } + + var timeErs = this.config.TimePointExtractor.Extract(text, reference); + var timeNumMatches = this.config.NumberAsTimeRegex.Matches(text); + if (timeErs.Count == 0 && timeNumMatches.Count == 0) + { + return ret; + } + + var ers = dateErs; + ers.AddRange(timeErs); + + // handle cases which use numbers as time points + // only enabled in CalendarMode + if ((this.config.Options & DateTimeOptions.CalendarMode) != 0) + { + var numErs = new List(); + for (var idx = 0; idx < timeNumMatches.Count; idx++) + { + var match = timeNumMatches[idx]; + var node = new ExtractResult + { + Start = match.Index, + Length = match.Length, + Text = match.Value, + Type = Number.Constants.SYS_NUM_INTEGER, + }; + + numErs.Add(node); + } + + ers.AddRange(numErs); + } + + ers = ers.OrderBy(o => o.Start).ToList(); + + var i = 0; + while (i < ers.Count - 1) + { + var j = i + 1; + while (j < ers.Count && ers[i].IsOverlap(ers[j])) + { + j++; + } + + if (j >= ers.Count) + { + break; + } + + if ((ers[i].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && + ers[j].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) || + (ers[i].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal) && + ers[j].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal)) || + (ers[i].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && + ers[j].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal))) + { + + var middleBegin = ers[i].Start + ers[i].Length ?? 0; + var middleEnd = ers[j].Start ?? 0; + if (middleBegin > middleEnd) + { + i = j + 1; + continue; + } + + var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); + var valid = false; + + // for cases like "christmas 3", "chritmas at 3" + if (ers[j].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) + { + var match = this.config.DateNumberConnectorRegex.Match(middleStr); + if (string.IsNullOrEmpty(middleStr) || match.Success) + { + valid = true; + } + } + else + { + // For case like "3 pm or later on christmas" + var match = this.config.SuffixAfterRegex.Match(middleStr); + if (match.Success) + { + middleStr = middleStr.Substring(match.Index + match.Length, middleStr.Length - match.Length).Trim(); + } + + if (!(match.Success && middleStr.Length == 0)) + { + if (this.config.IsConnector(middleStr)) + { + valid = true; + } + } + } + + if (valid) + { + var begin = ers[i].Start ?? 0; + var end = (ers[j].Start ?? 0) + (ers[j].Length ?? 0); + + ExtendWithDateTimeAndYear(ref begin, ref end, text, reference); + + ret.Add(new Token(begin, end)); + i = j + 1; + continue; + } + } + + i = j; + } + + // Handle "in the afternoon" at the end of entity + for (var idx = 0; idx < ret.Count; idx++) + { + var afterStr = text.Substring(ret[idx].End); + var match = this.config.SuffixRegex.Match(afterStr); + if (match.Success) + { + ret[idx] = new Token(ret[idx].Start, ret[idx].End + match.Length); + } + } + + // Handle "day" prefixes + for (var idx = 0; idx < ret.Count; idx++) + { + var beforeStr = text.Substring(0, ret[idx].Start); + var match = this.config.UtilityConfiguration.CommonDatePrefixRegex.Match(beforeStr); + if (match.Success) + { + ret[idx] = new Token(ret[idx].Start - match.Length, ret[idx].End); + } + } + + return ret; + } + // Parses a specific time of today, tonight, this afternoon, like "seven this afternoon" public List TimeOfTodayAfter(string text, DateObject reference) { @@ -305,19 +480,6 @@ public List SpecialTimeOfDate(string text, DateObject reference) return ret; } - // Special case for 'the end of today' - public List SpecialTimeOfDay(string text, DateObject reference) - { - var ret = new List(); - var match = this.config.SpecificEndOfRegex.Match(text); - if (match.Success) - { - ret.Add(new Token(match.Index, text.Length)); - } - - return ret; - } - // Process case like "two minutes ago" "three hours later" private List DurationWithBeforeAndAfter(string text, DateObject reference) { @@ -327,7 +489,7 @@ private List DurationWithBeforeAndAfter(string text, DateObject reference foreach (var er in durationEr) { // if it is a multiple duration and its type is equal to Date then skip it. - if (er.Data != null && er.Data.ToString() == Constants.MultipleDuration_Date) + if (er.Data != null && er.Data.ToString() is Constants.MultipleDuration_Date) { continue; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimePeriodExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimePeriodExtractor.cs index bed3e5803e..0bcbf84478 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimePeriodExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimePeriodExtractor.cs @@ -1,7 +1,11 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.English; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -29,6 +33,15 @@ public List Extract(string text, DateObject reference) // Date and time Extractions should be extracted from the text only once, and shared in the methods below, passed by value var dateErs = config.SingleDateExtractor.Extract(text, reference); + + // adding support for merging holiday dates with timerange references. + + if ((config.Options & DateTimeOptions.TasksMode) != 0) + { + var holidates = config.HolidayExtractor.Extract(text, reference); + dateErs.AddRange(holidates); + } + var timeErs = config.SingleTimeExtractor.Extract(text, reference); tokens.AddRange(MatchSimpleCases(text, reference)); @@ -39,6 +52,9 @@ public List Extract(string text, DateObject reference) tokens.AddRange(MatchDateWithPeriodPrefix(text, reference, new List(dateErs))); tokens.AddRange(MergeDateWithTimePeriodSuffix(text, new List(dateErs), new List(timeErs))); + // Extracting cases like [duration] starting [datetime] + tokens.AddRange(MatchStartingWithDuration(text, reference)); + var ers = Token.MergeAllTokens(tokens, text, ExtractorName); if ((this.config.Options & DateTimeOptions.EnablePreview) != 0) @@ -103,22 +119,22 @@ private IEnumerable MergeDateWithTimePeriodSuffix(string text, List MergeDateWithTimePeriodSuffix(string text, List @@ -186,6 +202,16 @@ private IEnumerable MatchDateWithPeriodPrefix(string text, DateObject ref { ret.Add(new Token(match.Index, dateStrEnd)); } + else if (this.config.CheckBothBeforeAfter) + { + // Check also afterStr + var afterStr = text.Substring(dateStrEnd, text.Length - dateStrEnd); + var matchAfter = this.config.PrefixDayRegex.MatchBegin(afterStr, trim: true); + if (matchAfter.Success) + { + ret.Add(new Token((int)dateEr.Start, dateStrEnd + matchAfter.Index + matchAfter.Length)); + } + } } return ret; @@ -205,6 +231,14 @@ private List MatchSimpleCases(string text, DateObject reference) if (!string.IsNullOrEmpty(beforeStr)) { var ers = this.config.SingleDateExtractor.Extract(beforeStr, reference); + + // adding support for merging holiday with timeperiod + if ((config.Options & DateTimeOptions.TasksMode) != 0) + { + var holidates = config.HolidayExtractor.Extract(beforeStr, reference); + ers.AddRange(holidates); + } + if (ers.Count > 0) { var er = ers.Last(); @@ -224,6 +258,14 @@ private List MatchSimpleCases(string text, DateObject reference) { // Is it followed by a date? var er = this.config.SingleDateExtractor.Extract(followedStr, reference); + + // check if follwed by holiday? + if ((config.Options & DateTimeOptions.TasksMode) != 0) + { + var holidates = config.HolidayExtractor.Extract(followedStr, reference); + er.AddRange(holidates); + } + if (er.Count > 0) { var begin = er[0].Start ?? 0; @@ -352,6 +394,18 @@ private List MergeTwoTimePoints(string text, DateObject reference, List x.Start).ToList(); for (idx = 0; idx < points.Count - 1; idx++) @@ -367,13 +421,30 @@ private List MergeTwoTimePoints(string text, DateObject reference, List 0) { var midStr = text.Substring(midBegin, midEnd - midBegin); - if (string.IsNullOrWhiteSpace(midStr) || midStr.TrimStart().StartsWith(config.TokenBeforeDate)) + bool isMatchTokenBeforeDate = string.IsNullOrWhiteSpace(midStr) || + (midStr.TrimStart().StartsWith(config.TokenBeforeDate, StringComparison.Ordinal) && + (points[idx + 1].Type == Constants.SYS_DATETIME_DATE || points[idx + 1].Type == Constants.SYS_DATETIME_DATETIME)); + + if (this.config.CheckBothBeforeAfter && !string.IsNullOrWhiteSpace(midStr)) + { + List tokenListBeforeDate = config.TokenBeforeDate.Split('|').ToList(); + foreach (string token in tokenListBeforeDate.Where(n => !string.IsNullOrEmpty(n))) + { + if (midStr.Trim().Equals(token, StringComparison.OrdinalIgnoreCase)) + { + isMatchTokenBeforeDate = true; + break; + } + } + } + + if (isMatchTokenBeforeDate) { // Extend date extraction for cases like "Monday evening next week" var extendedStr = points[idx].Text + text.Substring((int)(points[idx + 1].Start + points[idx + 1].Length)); - var extendedDateEr = config.SingleDateExtractor.Extract(extendedStr).FirstOrDefault(); + var extendedDateEr = config.SingleDateExtractor.Extract(extendedStr, reference).FirstOrDefault(); var offset = 0; - if (extendedDateEr != null && extendedDateEr.Start == 0) + if (extendedDateEr != null && extendedDateEr.Start == 0 && !this.config.CheckBothBeforeAfter) { offset = (int)(extendedDateEr.Length - points[idx].Length); } @@ -398,7 +469,7 @@ private List MatchTimeOfDay(string text, DateObject reference, List MatchTimeOfDay(string text, DateObject reference, List 0) { foreach (var tp in timeErs) @@ -517,7 +588,7 @@ private List MatchTimeOfDay(string text, DateObject reference, List 0) { foreach (var tp in timeErs) @@ -531,6 +602,26 @@ private List MatchTimeOfDay(string text, DateObject reference, List 0) + { + var beforeStr = text.Substring(0, e.Start); + if (!string.IsNullOrEmpty(beforeStr)) + { + ret.AddRange(MatchPureNumberCases(beforeStr, e, before: true)); + } + } + + // Try to extract a pure number period in after-string + if (e.End < text.Length) + { + var afterStr = text.Substring(e.End); + if (!string.IsNullOrEmpty(afterStr)) + { + ret.AddRange(MatchPureNumberCases(afterStr, e, before: false)); + } + } } return ret; @@ -546,17 +637,14 @@ private List MatchDuration(string text, DateObject reference) foreach (var durationExtraction in durationExtractions) { - var match = config.TimeUnitRegex.Match(durationExtraction.Text); - if (match.Success) + var timeUnitMatch = config.TimeUnitRegex.Match(durationExtraction.Text); + if (!timeUnitMatch.Success) { - durations.Add(new Token( - durationExtraction.Start ?? 0, - durationExtraction.Start + durationExtraction.Length ?? 0)); + continue; } - } - foreach (var duration in durations) - { + var isPlurarUnit = timeUnitMatch.Groups[Constants.PluralUnit].Success; + var duration = new Token(durationExtraction.Start ?? 0, durationExtraction.Start + durationExtraction.Length ?? 0); var beforeStr = text.Substring(0, duration.Start); var afterStr = text.Substring(duration.Start + duration.Length); @@ -567,8 +655,7 @@ private List MatchDuration(string text, DateObject reference) // within (the) (next) "Seconds/Minutes/Hours" should be handled as datetimeRange here // within (the) (next) XX days/months/years + "Seconds/Minutes/Hours" should also be handled as datetimeRange here - bool inPrefix = true; - Token token = MatchWithinNextPrefix(beforeStr, text, duration, inPrefix); + Token token = MatchWithinNextPrefix(beforeStr, text, duration, inPrefix: true); if (token.Start >= 0) { ret.Add(token); @@ -578,8 +665,7 @@ private List MatchDuration(string text, DateObject reference) // check also afterStr if (this.config.CheckBothBeforeAfter) { - inPrefix = false; - token = MatchWithinNextPrefix(afterStr, text, duration, inPrefix); + token = MatchWithinNextPrefix(afterStr, text, duration, inPrefix: false); if (token.Start >= 0) { ret.Add(token); @@ -612,7 +698,7 @@ private List MatchDuration(string text, DateObject reference) // Cases like "2 upcoming days", should be supported here // Cases like "2 upcoming 3 days" is invalid, only extract "upcoming 3 days" by default - if (numbersInPrefix.Any() && !numbersInDuration.Any()) + if (numbersInPrefix.Any() && !numbersInDuration.Any() && isPlurarUnit) { var lastNumber = numbersInPrefix.OrderBy(t => t.Start + t.Length).Last(); @@ -710,5 +796,63 @@ private Token MatchWithinNextPrefix(string subStr, string text, Token duration, return new Token(startOut, endOut); } + // The method matches pure number ranges. It is used inside MatchTimeOfDay, so the condition IsNullOrWhiteSpace(midStr) implies + // that the range must be contiguous to a TimeOfDay expression (e.g. "last night from 7 to 9"). + private List MatchPureNumberCases(string text, Token tok, bool before) + { + var ret = new List(); + foreach (var regex in this.config.SimpleCasesRegex) + { + var matches = regex.Matches(text); + foreach (Match match in matches) + { + var midStr = before ? text.Substring(match.Index + match.Length) : text.Substring(0, match.Index); + if (string.IsNullOrWhiteSpace(midStr)) + { + if (before) + { + ret.Add(new Token(match.Index, tok.Start + tok.Length)); + } + else + { + ret.Add(new Token(tok.Start, tok.End + match.Index + match.Length)); + } + } + } + } + + return ret; + } + + private List MatchStartingWithDuration(string text, DateObject reference) + { + var ret = new List(); + + if (this.config as EnglishDateTimePeriodExtractorConfiguration != null + && EnglishDateTimePeriodExtractorConfiguration.StartingRegex.Match(text).Success) + { + var dateTimeERs = this.config.SingleDateTimeExtractor.Extract(text, reference); + foreach (var dateTimeER in dateTimeERs) + { + var beforeString = text.Substring(0, (int)dateTimeER.Start); + var match = EnglishDatePeriodExtractorConfiguration.StartingRegex.MatchEnd(beforeString, true); + if (match.Success) + { + var durationERs = this.config.DurationExtractor.Extract(beforeString); + if (durationERs.Count >= 1) + { + var lastDuration = durationERs[durationERs.Count - 1]; + string startingWord = beforeString.Substring(beforeString.LastIndexOf(lastDuration.Text, StringComparison.Ordinal) + lastDuration.Text.Length); + if (startingWord.Trim() == match.Value.Trim()) + { + ret.Add(new Token((int)lastDuration.Start, (int)dateTimeER.Start + (int)dateTimeER.Length)); + } + } + } + } + } + + return ret; + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDurationExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDurationExtractor.cs index dce57584bb..95b4583e43 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDurationExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDurationExtractor.cs @@ -1,6 +1,9 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -36,6 +39,9 @@ public List Extract(string text, DateObject reference) var rets = Token.MergeAllTokens(tokens, text, ExtractorName); + // Remove common ambiguous cases + rets = ExtractResultExtension.FilterAmbiguity(rets, text, this.config.AmbiguityFiltersDict); + // First MergeMultipleDuration then ResolveMoreThanOrLessThanPrefix so cases like "more than 4 days and less than 1 week" will not be merged into one "multipleDuration" if (this.merge) { @@ -62,7 +68,7 @@ private List TagInequalityPrefix(string text, List // check also afterString if (this.config.CheckBothBeforeAfter && !match.Success) { - match = config.MoreThanRegex.MatchBegin(afterString, trim: true); + match = config.MoreThanRegex.MatchesBegin(afterString, trim: true); isMatchAfter = true; } @@ -80,7 +86,7 @@ private List TagInequalityPrefix(string text, List // check also afterString if (this.config.CheckBothBeforeAfter && !match.Success) { - match = config.LessThanRegex.MatchBegin(afterString, trim: true); + match = config.LessThanRegex.MatchesBegin(afterString, trim: true); isMatchAfter = true; } @@ -124,6 +130,16 @@ private List NumberWithUnitAndSuffix(string text, List ers) { ret.Add(new Token(er.Start, (er.Start + er.Length) + match.Length)); } + else if (this.config.CheckBothBeforeAfter) + { + // check also beforeStr + var beforeStr = text.Substring(0, er.Start); + match = this.config.SuffixAndRegex.MatchEnd(beforeStr, trim: true); + if (match.Success) + { + ret.Add(new Token(match.Index, er.Start + er.Length)); + } + } } return ret; @@ -214,7 +230,8 @@ private List MergeMultipleDuration(string text, List ret = new List(); + List results = new List(); + List> separateResults = new List>(); var firstExtractionIndex = 0; var timeUnit = 0; @@ -240,6 +257,9 @@ private List MergeMultipleDuration(string text, List separateList = new List() { extractorResults[firstExtractionIndex] }; + var secondExtractionIndex = firstExtractionIndex + 1; while (secondExtractionIndex < extractorResults.Count) { @@ -276,6 +296,9 @@ private List MergeMultipleDuration(string text, List MergeMultipleDuration(string text, List= 0; i--) + { + var start = (int)results[i].Start; + var end = start + (int)results[i].Length; + var beforeStr = text.Substring(0, start); + var afterStr = text.Substring(end); + var beforeMod = this.config.ModPrefixRegex.MatchEnd(beforeStr, trim: true); + var afterMod = this.config.ModSuffixRegex.MatchBegin(afterStr, trim: true); + if (beforeMod.Success && afterMod.Success) + { + results.RemoveAt(i); + results.InsertRange(i, separateResults[i]); + } + } + + return results; } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseHolidayExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseHolidayExtractor.cs index caaaa5927e..1999e9a7d1 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseHolidayExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseHolidayExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using DateObject = System.DateTime; @@ -7,6 +10,7 @@ namespace Microsoft.Recognizers.Text.DateTime public class BaseHolidayExtractor : IDateTimeExtractor { private const string ExtractorName = Constants.SYS_DATETIME_DATE; // "Date"; + private const string RangeExtractorName = Constants.SYS_DATETIME_DATEPERIOD; // "Daterange"; private readonly IHolidayExtractorConfiguration config; @@ -27,10 +31,13 @@ public List Extract(string text, DateObject reference) var ers = Token.MergeAllTokens(tokens, text, ExtractorName); foreach (var er in ers) { - er.Metadata = new Metadata + // If this is a daterange that contains a holiday, we should change its + // type to indicate that. + + if (er.Metadata?.IsHolidayRange ?? false) { - IsHoliday = true, - }; + er.Type = RangeExtractorName; + } } return ers; @@ -42,9 +49,28 @@ private List HolidayMatch(string text) foreach (var regex in this.config.HolidayRegexes) { var matches = regex.Matches(text); + foreach (Match match in matches) { - ret.Add(new Token(match.Index, match.Index + match.Length)); + var metaData = new Metadata(); + + // The objective here is to not lose the information of the holiday name + // and year (if captured) when choosing. The data is extracted from the match + // groups. + + if (match.Groups[Constants.HolidayWeekend].Success) + { + metaData.IsHolidayRange = metaData.IsHolidayWeekend = true; + metaData.HolidayName = match.Groups["holiday"].Value; + if (match.Groups["year"].Success) + { + metaData.HolidayName = metaData.HolidayName + " " + match.Groups["year"].Value; + } + } + + metaData.IsHoliday = true; + + ret.Add(new Token(match.Index, match.Index + match.Length, metaData)); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseMergedDateTimeExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseMergedDateTimeExtractor.cs index 55566dea26..5f18a4b8c0 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseMergedDateTimeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseMergedDateTimeExtractor.cs @@ -1,8 +1,10 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; - using Microsoft.Recognizers.Text.Matcher; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -18,19 +20,24 @@ public BaseMergedDateTimeExtractor(IMergedExtractorConfiguration config) this.config = config; } - public static bool HasTokenIndex(string text, Regex regex, out int index) + public static bool HasTokenIndex(string text, Regex regex, out int index, bool inPrefix) { index = -1; // Support cases has two or more specific tokens // For example, "show me sales after 2010 and before 2018 or before 2000" // When extract "before 2000", we need the second "before" which will be matched in the second Regex match - var match = Regex.Match(text, regex.ToString(), RegexOptions.RightToLeft | RegexOptions.Singleline); + RegexOptions regexFlags = inPrefix ? RegexOptions.RightToLeft | RegexOptions.Singleline : RegexOptions.Singleline; + var match = Regex.Match(text, regex.ToString(), regexFlags); - if (match.Success && string.IsNullOrEmpty(text.Substring(match.Index + match.Length))) + if (match.Success) { - index = match.Index; - return true; + var subStr = inPrefix ? text.Substring(match.Index + match.Length) : text.Substring(0, match.Index); + if (string.IsNullOrEmpty(subStr)) + { + index = inPrefix ? match.Index : match.Length; + return true; + } } return false; @@ -39,19 +46,22 @@ public static bool HasTokenIndex(string text, Regex regex, out int index) public bool TryMergeModifierToken(ExtractResult er, Regex tokenRegex, string text, bool potentialAmbiguity = false) { var beforeStr = text.Substring(0, er.Start ?? 0); + var afterStr = text.Substring(er.Start + er.Length ?? 0); - // Avoid adding mod for ambiguity cases, such as "from" in "from ... to ..." should not add mod - if (potentialAmbiguity && this.config.AmbiguousRangeModifierPrefix != null && this.config.AmbiguousRangeModifierPrefix.IsMatch(beforeStr)) + if (HasTokenIndex(beforeStr.TrimEnd(), tokenRegex, out var tokenIndex, inPrefix: true)) { - var matches = this.config.PotentialAmbiguousRangeRegex.Matches(text).Cast(); - if (matches.Any(m => m.Index < er.Start + er.Length && m.Index + m.Length > er.Start)) + // Avoid adding mod for ambiguity cases, such as "from" in "from ... to ..." should not add mod + if (potentialAmbiguity && this.config.AmbiguousRangeModifierPrefix != null && this.config.AmbiguousRangeModifierPrefix.IsMatch(beforeStr.Substring(tokenIndex))) { - return false; + var matches = this.config.PotentialAmbiguousRangeRegex.Matches(text).Cast(); + + // Weak ambiguous matches are considered only if the extraction is of type range + if (matches.Any(m => m.Index < er.Start + er.Length && m.Index + m.Length > er.Start && !(m.Groups[Constants.AmbiguousPattern].Success && !er.Type.EndsWith("range")))) + { + return false; + } } - } - if (HasTokenIndex(beforeStr.TrimEnd(), tokenRegex, out var tokenIndex)) - { var modLength = beforeStr.Length - tokenIndex; er.Length += modLength; @@ -62,6 +72,22 @@ public bool TryMergeModifierToken(ExtractResult er, Regex tokenRegex, string tex return true; } + else if (this.config.CheckBothBeforeAfter) + { + // check also afterStr + afterStr = text.Substring(er.Start + er.Length ?? 0); + if (HasTokenIndex(afterStr.TrimStart(), tokenRegex, out tokenIndex, inPrefix: false)) + { + var modLength = tokenIndex + afterStr.Length - afterStr.TrimStart().Length; + + er.Length += modLength; + er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); + er.Data = Constants.HAS_MOD; + er.Metadata = AssignModMetadata(er.Metadata); + + return true; + } + } return false; } @@ -86,12 +112,11 @@ public List Extract(string text, DateObject reference) var originText = text; List> superfluousWordMatches = null; + + // Push if ((this.config.Options & DateTimeOptions.EnablePreview) != 0) { - text = MatchingUtil.PreProcessTextRemoveSuperfluousWords( - text, - this.config.SuperfluousWordMatcher, - out superfluousWordMatches); + text = MatchingUtil.PreProcessTextRemoveSuperfluousWords(text, this.config.SuperfluousWordMatcher, out superfluousWordMatches); } // The order is important, since there can be conflicts in merging @@ -123,7 +148,7 @@ public List Extract(string text, DateObject reference) ret = FilterUnspecificDatePeriod(ret); // Remove common ambiguous cases - ret = FilterAmbiguity(ret, text); + ret = ExtractResultExtension.FilterAmbiguity(ret, text, this.config.AmbiguityFiltersDict); ret = AddMod(ret, text); @@ -135,6 +160,10 @@ public List Extract(string text, DateObject reference) ret = ret.OrderBy(p => p.Start).ToList(); + // Merge overlapping results + ret = ExtractResultExtension.MergeAllResults(ret); + + // Pop if ((this.config.Options & DateTimeOptions.EnablePreview) != 0) { ret = MatchingUtil.PostProcessRecoverSuperfluousWords(ret, superfluousWordMatches, originText); @@ -191,6 +220,14 @@ private void AddTo(List dst, List src, string text } } + if ((config.Options & DateTimeOptions.TasksMode) != 0) + { + if (ShouldSkipOnlyYear(result) || TasksModeFilters(result)) + { + continue; + } + } + var isFound = false; var overlapIndexes = new List(); var firstIndex = -1; @@ -238,31 +275,31 @@ private bool ShouldSkipFromToMerge(ExtractResult er) return config.FromToRegex.IsMatch(er.Text); } - private List FilterUnspecificDatePeriod(List ers) + private bool TasksModeFilters(ExtractResult er) { - ers.RemoveAll(o => this.config.UnspecificDatePeriodRegex.IsMatch(o.Text)); - return ers; - } + var match = config.TasksModeMentionFilters.Match(er.Text); - private List FilterAmbiguity(List extractResults, string text) - { - if (this.config.AmbiguityFiltersDict != null) + if (match.Success) { - foreach (var regex in this.config.AmbiguityFiltersDict) - { - foreach (var extractResult in extractResults) - { - if (regex.Key.IsMatch(extractResult.Text)) - { - var matches = regex.Value.Matches(text).Cast(); - extractResults = extractResults.Where(er => !matches.Any(m => m.Index < er.Start + er.Length && m.Index + m.Length > er.Start)) - .ToList(); - } - } - } + return true; } - return extractResults; + return false; + } + + /*Under TasksMode: Should not treat a four-digit number as a daterange if the input text does not include a month or year reference. + It should not treat 2005 as a daterange in statements like "Milk 2005." + (The year 2005 should be treated as a number only.) + */ + private bool ShouldSkipOnlyYear(ExtractResult er) + { + return config.YearRegex.Match(er.Text).Value == er.Text; + } + + private List FilterUnspecificDatePeriod(List ers) + { + ers.RemoveAll(o => this.config.UnspecificDatePeriodRegex.IsMatch(o.Text)); + return ers; } // Handle cases like "move 3pm appointment to 4" @@ -299,7 +336,11 @@ private List AddMod(List ers, string text) { foreach (var er in ers) { - var success = TryMergeModifierToken(er, config.BeforeRegex, text); + // AroundRegex is matched non-exclusively before the other relative regexes in order to catch also combined modifiers e.g. "before around 1pm" + TryMergeModifierToken(er, config.AroundRegex, text); + + // BeforeRegex in Dutch contains the term "voor" which is ambiguous (meaning both "for" and "before") + var success = TryMergeModifierToken(er, config.BeforeRegex, text, potentialAmbiguity: true); if (!success) { @@ -314,12 +355,7 @@ private List AddMod(List ers, string text) if (!success) { - TryMergeModifierToken(er, config.AroundRegex, text); - } - - if (!success) - { - TryMergeModifierToken(er, config.EqualRegex, text); + success = TryMergeModifierToken(er, config.EqualRegex, text); } if (er.Type.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal) || @@ -329,34 +365,38 @@ private List AddMod(List ers, string text) // 2012 or after/above, 3 pm or later var afterStr = text.Substring((er.Start ?? 0) + (er.Length ?? 0)); - var match = config.SuffixAfterRegex.MatchBegin(afterStr.TrimStart(), trim: true); - - if (match.Success) + if (afterStr.Length > 1) { - var isFollowedByOtherEntity = true; - if (match.Length == afterStr.Trim().Length) - { - isFollowedByOtherEntity = false; - } - else + var match = config.SuffixAfterRegex.MatchBegin(afterStr.TrimStart(), trim: true); + + if (match.Success && match.Value != ".") { - var nextStr = afterStr.Trim().Substring(match.Length).Trim(); - var nextEr = ers.FirstOrDefault(t => t.Start > er.Start); + var isFollowedByOtherEntity = true; - if (nextEr == null || !nextStr.StartsWith(nextEr.Text)) + if (match.Length == afterStr.Trim().Length) { isFollowedByOtherEntity = false; } - } + else + { + var nextStr = afterStr.Trim().Substring(match.Length).Trim(); + var nextEr = ers.FirstOrDefault(t => t.Start > er.Start); - if (!isFollowedByOtherEntity) - { - var modLength = match.Length + afterStr.IndexOf(match.Value, StringComparison.Ordinal); - er.Length += modLength; - er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); + if (nextEr == null || !nextStr.StartsWith(nextEr.Text, StringComparison.Ordinal)) + { + isFollowedByOtherEntity = false; + } + } + + if (!isFollowedByOtherEntity) + { + var modLength = match.Length + afterStr.IndexOf(match.Value, StringComparison.Ordinal); + er.Length += modLength; + er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); - er.Metadata = AssignModMetadata(er.Metadata); + er.Metadata = AssignModMetadata(er.Metadata); + } } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseSetExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseSetExtractor.cs index 7e7a39faf0..e945cc79c2 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseSetExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseSetExtractor.cs @@ -1,6 +1,10 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.Number; using DateObject = System.DateTime; @@ -28,6 +32,12 @@ public List Extract(string text, DateObject reference) tokens.AddRange(MatchEachUnit(text)); tokens.AddRange(MatchEachDuration(text, reference)); tokens.AddRange(TimeEveryday(text, reference)); + + if ((config.Options & DateTimeOptions.TasksMode) != 0) + { + tokens.AddRange(DayEveryweek(text, reference)); + } + tokens.AddRange(MatchEach(config.DateExtractor, text, reference)); tokens.AddRange(MatchEach(config.TimeExtractor, text, reference)); tokens.AddRange(MatchEach(config.DateTimeExtractor, text, reference)); @@ -62,6 +72,7 @@ public List MatchEachDuration(string text, DateObject reference) return ret; } + // every month, weekly, quarterly etc public List MatchEachUnit(string text) { var ret = new List(); @@ -77,7 +88,24 @@ public List MatchEachUnit(string text) matches = this.config.EachUnitRegex.Matches(text); foreach (Match match in matches) { - ret.Add(new Token(match.Index, match.Index + match.Length)); + if (match.Groups["unit"].Value.Equals("month")) + { + var beforeStr = text.Substring(0, match.Index); + var dayMatch = this.config.BeforeEachDayRegex.Match(beforeStr); + + if (dayMatch.Success) + { + ret.Add(new Token(dayMatch.Index, match.Index + match.Length)); + } + else + { + ret.Add(new Token(match.Index, match.Index + match.Length)); + } + } + else + { + ret.Add(new Token(match.Index, match.Index + match.Length)); + } } return ret; @@ -87,25 +115,106 @@ public virtual List TimeEveryday(string text, DateObject reference) { var ret = new List(); var ers = this.config.TimeExtractor.Extract(text, reference); + + if ((config.Options & DateTimeOptions.TasksMode) != 0) + { + var ersTimePeriod = this.config.TimePeriodExtractor.Extract(text, reference); + if (ers.Count == 0 && ersTimePeriod.Count == 1) + { + ers = ersTimePeriod; + } + } + foreach (var er in ers) { var afterStr = text.Substring(er.Start + er.Length ?? 0); - if (string.IsNullOrEmpty(afterStr) && this.config.BeforeEachDayRegex != null) + var beforeStr = text.Substring(0, er.Start ?? 0); + var beforeMatch = this.config.EachDayRegex.Match(beforeStr); + var startIndexBeforeMatch = beforeMatch.Length + beforeMatch.Index - beforeMatch.Value.TrimStart().Length; + if (beforeMatch.Success) + { + ret.Add(new Token(startIndexBeforeMatch, er.Start + er.Length ?? 0)); + } + + var match = this.config.EachDayRegex.Match(afterStr); + if (match.Success) { - var beforeStr = text.Substring(0, er.Start ?? 0); - var beforeMatch = this.config.BeforeEachDayRegex.Match(beforeStr); - if (beforeMatch.Success) + ret.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + match.Length + match.Index)); + } + } + + return ret; + } + + // Handle cases like 19th of every month: For now specific to TasksMode + public virtual List DayEveryweek(string text, DateObject reference) + { + var ret = new List(); + var ers = this.config.DateExtractor.Extract(text, reference); + + // @TODO change call to the Number recognizer, it has to config specific. + if (NumberRecognizer.RecognizeOrdinal(text, config.Culture).Count > 0) + { + return ret; + } + + if (ers.Count != 1) + { + return ret; + } + + foreach (var er in ers) + { + var afterStr = text.Substring(er.Start + er.Length ?? 0); + var beforeStr = text.Substring(0, er.Start ?? 0); + var beforeMatch = MatchEachUnit(beforeStr); + var timeBeforeErs = this.config.TimeExtractor.Extract(beforeStr, reference); + var timePeriodBeforeErs = this.config.TimePeriodExtractor.Extract(beforeStr, reference); + if (timeBeforeErs.Count == 0 && (timePeriodBeforeErs.Count != 0)) + { + timeBeforeErs = timePeriodBeforeErs; + } + + var match = MatchEachUnit(afterStr); + var timeErs = this.config.TimeExtractor.Extract(afterStr, reference); + var timePeriodErs = this.config.TimePeriodExtractor.Extract(afterStr, reference); + if (timeErs.Count == 0 && (timePeriodErs.Count != 0)) + { + timeErs = timePeriodErs; + } + + if (beforeMatch.Count > 0) + { + var beforeMatchInd = beforeMatch[0].Start; + if (timeBeforeErs.Count > 0) { - ret.Add(new Token(beforeMatch.Index, er.Start + er.Length ?? 0)); + beforeMatchInd = Math.Min(beforeMatchInd, (int)timeBeforeErs[0].Start); } + + var erEnd = er.Start + er.Length ?? 0; + if (timeErs.Count > 0) + { + erEnd += (int)timeErs[0].Start + (int)timeErs[0].Length; + } + + ret.Add(new Token(beforeMatchInd, erEnd)); } - else + + if (match.Count > 0) { - var match = this.config.EachDayRegex.Match(afterStr); - if (match.Success) + var matchInd = match[0].Length + match[0].Start; + if (timeErs.Count > 0) { - ret.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + match.Length)); + matchInd = Math.Max(matchInd, (int)timeErs[0].Start + (int)timeErs[0].Length); } + + var erStart = er.Start ?? 0; + if (timeBeforeErs.Count > 0) + { + erStart = Math.Min(erStart, (int)timeBeforeErs[0].Start); + } + + ret.Add(new Token(erStart, (er.Start + er.Length ?? 0) + matchInd)); } } @@ -116,11 +225,14 @@ public List MatchEach(IDateTimeExtractor extractor, string text, DateObje { var ret = new List(); var matches = config.SetEachRegex.Matches(text); + foreach (Match match in matches) { if (match.Success) { + // "3pm *each* day" var trimmedText = text.Remove(match.Index, match.Length); + var ers = extractor.Extract(trimmedText, reference); foreach (var er in ers) { @@ -141,6 +253,7 @@ public List MatchEach(IDateTimeExtractor extractor, string text, DateObje Tuple weekdayTuple = config.WeekDayGroupMatchTuple(match); string weekday = weekdayTuple.Item1; int del = weekdayTuple.Item2; + var trimmedText = text.Remove(match.Index, match.Length); trimmedText = trimmedText.Insert(match.Index, weekday); @@ -150,11 +263,16 @@ public List MatchEach(IDateTimeExtractor extractor, string text, DateObje if (er.Start <= match.Index && er.Text.Contains(match.Groups["weekday"].Value)) { var len = (er.Length ?? 0) + del; - if (match.Groups[Constants.PrefixGroupName].ToString() != string.Empty) + if (match.Groups[Constants.PrefixGroupName].ToString().Length > 0) { len += match.Groups[Constants.PrefixGroupName].ToString().Length; } + if (match.Groups[Constants.SuffixGroupName].ToString().Length > 0) + { + len += match.Groups[Constants.SuffixGroupName].ToString().Length; + } + ret.Add(new Token(er.Start ?? 0, er.Start + len ?? 0)); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimeExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimeExtractor.cs index b77e42fdaa..2cc1856114 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimeExtractor.cs @@ -1,6 +1,15 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; using System.Text.RegularExpressions; + using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Text.InternalCache; + using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime @@ -8,29 +17,55 @@ namespace Microsoft.Recognizers.Text.DateTime public class BaseTimeExtractor : IDateTimeExtractor { public static readonly Regex HourRegex = - new Regex(BaseDateTime.HourRegex, RegexOptions.Singleline); + new Regex(BaseDateTime.HourRegex, RegexOptions.Singleline | RegexOptions.Compiled, RegexTimeOut); public static readonly Regex MinuteRegex = - new Regex(BaseDateTime.MinuteRegex, RegexOptions.Singleline); + new Regex(BaseDateTime.MinuteRegex, RegexOptions.Singleline | RegexOptions.Compiled, RegexTimeOut); public static readonly Regex SecondRegex = - new Regex(BaseDateTime.SecondRegex, RegexOptions.Singleline); + new Regex(BaseDateTime.SecondRegex, RegexOptions.Singleline | RegexOptions.Compiled, RegexTimeOut); private const string ExtractorName = Constants.SYS_DATETIME_TIME; // "Time"; + private static readonly ResultsCache ResultsCache = new ResultsCache(); + + private readonly string keyPrefix; + private readonly ITimeExtractorConfiguration config; public BaseTimeExtractor(ITimeExtractorConfiguration config) { this.config = config; + keyPrefix = string.Intern(config.Options + "_" + config.LanguageMarker); } + protected static TimeSpan RegexTimeOut => DateTimeRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + public virtual List Extract(string text) { return Extract(text, DateObject.Now); } public virtual List Extract(string text, DateObject reference) + { + + List results; + + if ((this.config.Options & DateTimeOptions.NoProtoCache) != 0) + { + results = ExtractImpl(text, reference); + } + else + { + var key = (keyPrefix, text, reference); + + results = ResultsCache.GetOrCreate(key, () => ExtractImpl(text, reference)); + } + + return results; + } + + public virtual List ExtractImpl(string text, DateObject reference) { var tokens = new List(); tokens.AddRange(BasicRegexMatch(text)); @@ -45,12 +80,15 @@ public virtual List Extract(string text, DateObject reference) timeErs = TimeZoneUtility.MergeTimeZones(timeErs, config.TimeZoneExtractor.Extract(text, reference), text); } + // Remove common ambiguous cases + timeErs = ExtractResultExtension.FilterAmbiguity(timeErs, text, this.config.AmbiguityFiltersDict); + return timeErs; } private List BasicRegexMatch(string text) { - var result = new List(); + var results = new List(); foreach (var regex in this.config.TimeRegexList) { @@ -58,12 +96,19 @@ private List BasicRegexMatch(string text) foreach (Match match in matches) { - result.Add(new Token(match.Index, match.Index + match.Length)); + // @TODO Workaround to avoid incorrect partial-only matches. Remove after time regex reviews across languages. + var lth = match.Groups["lth"].Value; + + if (string.IsNullOrEmpty(lth) || + (lth.Length != match.Length && !(match.Length == lth.Length + 1 && match.Value.EndsWith(" ", StringComparison.Ordinal)))) + { + results.Add(new Token(match.Index, match.Index + match.Length)); + } } } - return result; + return results; } private List AtRegexMatch(string text) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimePeriodExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimePeriodExtractor.cs index e464c3368e..5ef4c4126b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimePeriodExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimePeriodExtractor.cs @@ -1,6 +1,12 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.English; +using Microsoft.Recognizers.Text.InternalCache; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -10,11 +16,16 @@ public class BaseTimePeriodExtractor : IDateTimeExtractor { public static readonly string ExtractorName = Constants.SYS_DATETIME_TIMEPERIOD; // "TimePeriod"; + private static readonly ResultsCache ResultsCache = new ResultsCache(); + private readonly ITimePeriodExtractorConfiguration config; + private readonly string keyPrefix; + public BaseTimePeriodExtractor(ITimePeriodExtractorConfiguration config) { this.config = config; + keyPrefix = string.Intern(config.Options + "_" + config.LanguageMarker); } public List Extract(string text) @@ -23,9 +34,28 @@ public List Extract(string text) } public List Extract(string text, DateObject reference) + { + List results; + + if ((this.config.Options & DateTimeOptions.NoProtoCache) != 0) + { + results = ExtractImpl(text, reference); + } + else + { + var key = (keyPrefix, text, reference); + + results = ResultsCache.GetOrCreate(key, () => ExtractImpl(text, reference)); + } + + return results; + } + + private List ExtractImpl(string text, DateObject reference) { var tokens = new List(); tokens.AddRange(MatchSimpleCases(text)); + tokens.AddRange(MatchTimePeriodWithDurationCases(text)); tokens.AddRange(MergeTwoTimePoints(text, reference)); tokens.AddRange(MatchTimeOfDay(text)); @@ -42,23 +72,8 @@ public List Extract(string text, DateObject reference) timePeriodErs = TimeZoneUtility.MergeTimeZones(timePeriodErs, config.TimeZoneExtractor.Extract(text, reference), text); } - // TODO: Fix to solve german morgen (morning) / morgen (tomorrow) ambiguity. To be removed after the first version of DateTimeV2 in German is in production. - timePeriodErs = GermanMorgenWorkaround(text, timePeriodErs); - - return timePeriodErs; - } - - // For German there is a problem with cases like "Morgen Abend" which is parsed as "Morning Evening" as "Morgen" can mean both "tomorrow" and "morning". - // When the extractor extracts "Abend" in this example it will take the string before that to look for a relative shift to another day like "yesterday", "tomorrow" etc. - // When trying to do this on the string "morgen" it will be extracted as a time period ("morning") by the TimePeriodExtractor, and not as "tomorrow". - // Filtering out the string "morgen" from the TimePeriodExtractor will fix the problem as only in the case where "morgen" is NOT a time period the string "morgen" will be passed to this extractor. - // It should also be solvable through the config but we do not want to introduce changes to the interface and configs for all other languages. - private List GermanMorgenWorkaround(string text, List timePeriodErs) - { - if (text.Equals("morgen")) - { - timePeriodErs.Clear(); - } + // Filter ambiguous extractions e.g. 'morgen' in German and Dutch + timePeriodErs = this.config.ApplyPotentialPeriodAmbiguityHotfix(text, timePeriodErs); return timePeriodErs; } @@ -96,7 +111,7 @@ private List MatchSimpleCases(string text) var endWithGeneralEndings = this.config.GeneralEndingRegex.Match(afterStr).Success; var endWithAmPm = match.Groups[Constants.RightAmPmGroupName].Success; - if (endWithGeneralEndings || endWithAmPm || afterStr.TrimStart().StartsWith(this.config.TokenBeforeDate)) + if (endWithGeneralEndings || endWithAmPm || afterStr.TrimStart().StartsWith(this.config.TokenBeforeDate, StringComparison.Ordinal)) { endWithValidToken = true; } @@ -139,6 +154,22 @@ private List MatchSimpleCases(string text) return ret; } + // Cases like "from 6am for 3 hours" and "for 3 hours from 6 am" are extracted as timerange here. + private List MatchTimePeriodWithDurationCases(string text) + { + var ret = new List(); + if (this.config as EnglishTimePeriodExtractorConfiguration != null) + { + Match match = EnglishTimePeriodExtractorConfiguration.TimePeriodWithDurationRegex.Match(text); + if (match.Success) + { + ret.Add(new Token(match.Index, match.Index + match.Length)); + } + } + + return ret; + } + private bool StartsWithTimeZone(string afterText) { var startsWithTimeZone = false; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimeZoneExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimeZoneExtractor.cs index 48e20382e1..708a17fbd1 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimeZoneExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimeZoneExtractor.cs @@ -1,4 +1,9 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Matcher; using Microsoft.Recognizers.Text.Utilities; @@ -28,8 +33,11 @@ public List Extract(string text, DateObject reference) var normalizedText = QueryProcessor.RemoveDiacritics(text); - tokens.AddRange(MatchTimeZones(normalizedText)); - tokens.AddRange(MatchLocationTimes(normalizedText, tokens)); + // If normalized and original texts have different lengths, re-calculate indices + var reIndex = text.Length > normalizedText.Length; + + tokens.AddRange(MatchTimeZones(normalizedText, text, reIndex)); + tokens.AddRange(MatchLocationTimes(normalizedText, tokens, text, reIndex)); return Token.MergeAllTokens(tokens, text, ExtractorName); } @@ -40,7 +48,7 @@ public List RemoveAmbiguousTimezone(List ers) return ers; } - private IEnumerable MatchLocationTimes(string text, List tokens) + private IEnumerable MatchLocationTimes(string text, List tokens, string originalText, bool reIndex) { var ret = new List(); @@ -82,15 +90,31 @@ private IEnumerable MatchLocationTimes(string text, List tokens) if (timeMatch.Count != 0 && !isAllSuffixInsideTokens) { var lastMatchIndex = timeMatch[timeMatch.Count - 1].Index; + var matches = config.LocationMatcher.Find(text.Substring(0, lastMatchIndex)); var locationMatches = MatchingUtil.RemoveSubMatches(matches); + if (reIndex) + { + foreach (var locMatch in locationMatches) + { + locMatch.Start = originalText.IndexOf(locMatch.CanonicalValues.FirstOrDefault(), locMatch.Start, StringComparison.Ordinal); + } + } + var i = 0; foreach (Match match in timeMatch) { var hasCityBefore = false; - while (i < locationMatches.Count && locationMatches[i].End <= match.Index) + var index = match.Index; + + if (reIndex) + { + index = originalText.IndexOf(match.Value, match.Index, StringComparison.Ordinal); + } + + while (i < locationMatches.Count && locationMatches[i].End <= index) { hasCityBefore = true; i++; @@ -101,9 +125,9 @@ private IEnumerable MatchLocationTimes(string text, List tokens) } } - if (hasCityBefore && locationMatches[i - 1].End == match.Index) + if (hasCityBefore && locationMatches[i - 1].End == index) { - ret.Add(new Token(locationMatches[i - 1].Start, match.Index + match.Length)); + ret.Add(new Token(locationMatches[i - 1].Start, index + match.Length)); } if (i == locationMatches.Count) @@ -116,7 +140,7 @@ private IEnumerable MatchLocationTimes(string text, List tokens) return ret; } - private List MatchTimeZones(string text) + private List MatchTimeZones(string text, string originalText, bool reIndex) { var ret = new List(); @@ -126,13 +150,29 @@ private List MatchTimeZones(string text) var directUtc = this.config.DirectUtcRegex.Matches(text); foreach (Match match in directUtc) { - ret.Add(new Token(match.Index, match.Index + match.Length)); + + var index = match.Index; + + if (reIndex) + { + index = originalText.IndexOf(match.Value, match.Index, StringComparison.Ordinal); + } + + ret.Add(new Token(index, index + match.Length)); + } var matches = this.config.TimeZoneMatcher.Find(text); foreach (MatchResult match in matches) { - ret.Add(new Token(match.Start, match.Start + match.Length)); + var index = match.Start; + + if (reIndex) + { + index = originalText.IndexOf(match.CanonicalValues.FirstOrDefault(), match.Start, StringComparison.Ordinal); + } + + ret.Add(new Token(index, index + match.Length)); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDateExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDateExtractor.cs new file mode 100644 index 0000000000..f5fef80e5a --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDateExtractor.cs @@ -0,0 +1,148 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.InternalCache; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKDateExtractor : IDateTimeExtractor + { + public static readonly string ExtractorName = Constants.SYS_DATETIME_DATE; // "Date"; + + private static readonly ResultsCache ResultsCache = new ResultsCache(); + + private readonly ICJKDateExtractorConfiguration config; + + public BaseCJKDateExtractor(ICJKDateExtractorConfiguration config) + { + this.config = config; + } + + public List Extract(string text) + { + return Extract(text, DateObject.Now); + } + + public List Extract(string text, DateObject referenceTime) + { + var tokens = new List(); + var result = new List(); + + tokens.AddRange(BasicRegexMatch(text)); + tokens.AddRange(ImplicitDate(text)); + tokens.AddRange(DurationWithAgoAndLater(text, referenceTime)); + result = Token.MergeAllTokens(tokens, text, ExtractorName); + + result = ExtractResultExtension.FilterAmbiguity(result, text, this.config.AmbiguityDateFiltersDict); + + return result; + } + + // Match basic patterns in DateRegexList + private List BasicRegexMatch(string text) + { + var ret = new List(); + foreach (var regex in this.config.DateRegexList) + { + var matches = regex.Matches(text); + foreach (Match match in matches) + { + // some match might be part of the date range entity, and might be split in a wrong way + if (DateContext.ValidateMatch(match, text, this.config.DateRegexList, this.config.RangeConnectorSymbolRegex)) + { + ret.Add(new Token(match.Index, match.Index + match.Length)); + + } + } + } + + return ret; + } + + // Match several other implicit cases + private List ImplicitDate(string text) + { + var ret = new List(); + foreach (var regex in this.config.ImplicitDateList) + { + var matches = regex.Matches(text); + foreach (Match match in matches) + { + ret.Add(new Token(match.Index, match.Index + match.Length)); + } + } + + return ret; + } + + // process case like "三天前" "两个月前" + private List DurationWithAgoAndLater(string text, DateObject referenceTime) + { + var ret = new List(); + + var durationEr = this.config.DurationExtractor.Extract(text, referenceTime); + + foreach (var er in durationEr) + { + // Only handles date durations here + // Cases with dateTime durations will be handled in DateTime Extractor + if (this.config.DateTimePeriodUnitRegex.Match(er.Text).Success) + { + continue; + } + + var pos = (int)er.Start + (int)er.Length; + + if (pos < text.Length) + { + var suffix = text.Substring(pos); + var match = this.config.BeforeRegex.Match(suffix); + if (!match.Success) + { + match = this.config.AfterRegex.Match(suffix); + } + + if (match.Success && suffix.Trim().StartsWith(match.Value, StringComparison.Ordinal)) + { + var metadata = new Metadata() { IsDurationWithAgoAndLater = true }; + ret.Add(new Token((int)er.Start, (int)(er.Start + er.Length) + match.Index + match.Length, metadata)); + } + } + } + + // Extend extraction with weekdays like in "Friday two weeks from now", "in 3 weeks on Monday" + ret.AddRange(ExtendWithWeekDay(ret, text)); + + return ret; + } + + private List ExtendWithWeekDay(List ret, string text) + { + var newRet = new List(); + foreach (var er in ret) + { + var beforeStr = text.Substring(0, er.Start); + var afterStr = text.Substring(er.End); + var beforeMatch = this.config.WeekDayStartEnd.Match(beforeStr); + var afterMatch = this.config.WeekDayStartEnd.Match(afterStr); + if (beforeMatch.Success || afterMatch.Success) + { + var start = beforeMatch.Success ? beforeMatch.Index : er.Start; + var end = beforeMatch.Success ? er.End : er.End + afterMatch.Index + afterMatch.Length; + Metadata metadata = new Metadata { IsDurationDateWithWeekday = true }; + Token tok = new Token(start, end, metadata); + newRet.Add(tok); + } + } + + return newRet; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDatePeriodExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDatePeriodExtractor.cs new file mode 100644 index 0000000000..15bb370bbf --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDatePeriodExtractor.cs @@ -0,0 +1,333 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.InternalCache; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKDatePeriodExtractor : IDateTimeExtractor + { + private const string ExtractorName = Constants.SYS_DATETIME_DATEPERIOD; + + private static readonly ResultsCache ResultsCache = new ResultsCache(); + + private readonly ICJKDatePeriodExtractorConfiguration config; + + private readonly string keyPrefix; + + public BaseCJKDatePeriodExtractor(ICJKDatePeriodExtractorConfiguration config) + { + this.config = config; + keyPrefix = string.Intern(config.Options + "_" + config.LanguageMarker); + } + + public List Extract(string text) + { + return Extract(text, DateObject.Now); + } + + public List Extract(string text, DateObject referenceTime) + { + var tokens = new List(); + tokens.AddRange(MatchSimpleCases(text)); + var simpleCasesResults = Token.MergeAllTokens(tokens, text, ExtractorName); + tokens.AddRange(MatchComplexCases(text, simpleCasesResults, referenceTime)); + tokens.AddRange(MergeTwoTimePoints(text, referenceTime)); + tokens.AddRange(MatchNumberWithUnit(text)); + tokens.AddRange(MatchDurations(text, referenceTime)); + + var rets = Token.MergeAllTokens(tokens, text, ExtractorName); + + // Remove common ambiguous cases + rets = ExtractResultExtension.FilterAmbiguity(rets, text, this.config.AmbiguityFiltersDict); + + return rets; + } + + // match pattern in simple case + private List MatchSimpleCases(string text) + { + var ret = new List(); + foreach (var regex in this.config.SimpleCasesRegexes) + { + var matches = regex.Matches(text); + foreach (Match match in matches) + { + ret.Add(new Token(match.Index, match.Index + match.Length)); + } + } + + return ret; + } + + private List MatchDurations(string text, DateObject reference) + { + var ret = new List(); + + var durationExtractions = config.DurationExtractor.Extract(text, reference); + + foreach (var durationExtraction in durationExtractions) + { + var dateUnitMatch = config.DateUnitRegex.Match(durationExtraction.Text); + if (!dateUnitMatch.Success) + { + continue; + } + + var duration = new Token(durationExtraction.Start ?? 0, durationExtraction.Start + durationExtraction.Length ?? 0); + var beforeStr = text.Substring(0, duration.Start); + var afterStr = text.Substring(duration.Start + duration.Length); + + if (string.IsNullOrWhiteSpace(beforeStr) && string.IsNullOrWhiteSpace(afterStr)) + { + continue; + } + + // handle cases with 'within' and 'next' + var matchWithin = config.FutureRegex.MatchBegin(afterStr, trim: true); + var matchNext = config.FutureRegex.MatchEnd(beforeStr, trim: true); + + if (matchWithin.Success && matchNext.Success && !matchNext.Groups[Constants.WithinGroupName].Success) + { + if (matchNext.Value == matchWithin.Value) + { + ret.Add(new Token(duration.Start - matchNext.Value.Length, duration.End)); + } + else + { + ret.Add(new Token(duration.Start - matchNext.Value.Length, duration.End + matchWithin.Value.Length)); + } + } + else if (matchWithin.Success) + { + ret.Add(new Token(duration.Start, duration.End + matchWithin.Value.Length)); + } + else if (matchNext.Success) + { + ret.Add(new Token(duration.Start - matchNext.Value.Length, duration.End)); + } + + } + + return ret; + } + + // merge two date + private List MergeTwoTimePoints(string text, DateObject referenceTime) + { + var ret = new List(); + var er = this.config.DatePointExtractor.Extract(text, referenceTime); + if (er.Count <= 1) + { + return ret; + } + + // merge '{TimePoint} 到 {TimePoint}' + var idx = 0; + while (idx < er.Count - 1) + { + var middleBegin = er[idx].Start + er[idx].Length ?? 0; + var middleEnd = er[idx + 1].Start ?? 0; + if (middleBegin >= middleEnd) + { + idx++; + continue; + } + + var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); + + if (this.config.TillRegex.IsExactMatch(middleStr, trim: true)) + { + var periodBegin = er[idx].Start ?? 0; + var periodEnd = (er[idx + 1].Start ?? 0) + (er[idx + 1].Length ?? 0); + + // handle suffix + var afterStr = text.Substring(periodEnd); + var match = this.config.RangeSuffixRegex.MatchBegin(afterStr, true); + if (match.Success) + { + periodEnd = periodEnd + match.Index + match.Length; + } + + // handle prefix + var beforeStr = text.Substring(0, periodBegin); + match = this.config.RangePrefixRegex.MatchEnd(beforeStr, true); + if (match.Success) + { + periodBegin = match.Index; + } + + ret.Add(new Token(periodBegin, periodEnd)); + idx += 2; + continue; + } + + idx++; + } + + return ret; + } + + // extract case like "前两年" "前三个月" + private List MatchNumberWithUnit(string text) + { + var ret = new List(); + + var durations = new List(); + var ers = this.config.IntegerExtractor.Extract(text); + + foreach (var er in ers) + { + var afterStr = text.Substring(er.Start + er.Length ?? 0); + var match = this.config.FollowedUnit.MatchBegin(afterStr, trim: true); + + if (match.Success) + { + durations.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + match.Length)); + } + } + + if (this.config.NumberCombinedWithUnit.IsMatch(text)) + { + var matches = this.config.NumberCombinedWithUnit.Matches(text); + foreach (Match match in matches) + { + durations.Add(new Token(match.Index, match.Index + match.Length)); + } + } + + foreach (var duration in durations) + { + var beforeStr = text.Substring(0, duration.Start); + if (string.IsNullOrWhiteSpace(beforeStr)) + { + continue; + } + + // Cases like 'first 2 weeks of 2018' (2021年的前2周) + var match = this.config.FirstLastOfYearRegex.MatchEnd(beforeStr, trim: true); + + if (match.Success) + { + // Check if the unit is compatible (day, week, month) + var durationStr = text.Substring(duration.Start, duration.Length); + var unitMatch = this.config.UnitRegex.Match(durationStr); + if (unitMatch.Groups[Constants.UnitOfYearGroupName].Success) + { + ret.Add(new Token(match.Index, duration.End)); + continue; + } + } + + match = this.config.PastRegex.MatchEnd(beforeStr, trim: true); + + if (match.Success) + { + ret.Add(new Token(match.Index, duration.End)); + continue; + } + + match = this.config.FutureRegex.MatchEnd(beforeStr, trim: true); + + if (match.Success) + { + ret.Add(new Token(match.Index, duration.End)); + } + } + + return ret; + } + + // Complex cases refer to the combination of daterange and datepoint + // For Example: from|between {DateRange|DatePoint} to|till|and {DateRange|DatePoint} + private List MatchComplexCases(string text, List simpleDateRangeResults, DateObject reference) + { + var er = this.config.DatePointExtractor.Extract(text, reference); + + // Filter out DateRange results that are part of DatePoint results + // For example, "Feb 1st 2018" => "Feb" and "2018" should be filtered out here + er.AddRange(simpleDateRangeResults + .Where(simpleDateRange => !er.Any(datePoint => (datePoint.Start <= simpleDateRange.Start && datePoint.Start + datePoint.Length >= simpleDateRange.Start + simpleDateRange.Length)))); + + er = er.OrderBy(t => t.Start).ToList(); + + return MergeMultipleExtractions(text, er); + } + + private List MergeMultipleExtractions(string text, List extractionResults) + { + var ret = new List(); + var metadata = new Metadata + { + PossiblyIncludePeriodEnd = true, + }; + + if (extractionResults.Count <= 1) + { + return ret; + } + + var idx = 0; + + while (idx < extractionResults.Count - 1) + { + var middleBegin = extractionResults[idx].Start + extractionResults[idx].Length ?? 0; + var middleEnd = extractionResults[idx + 1].Start ?? 0; + if (middleBegin > middleEnd) + { + idx++; + continue; + } + + var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); + var endPointStr = extractionResults[idx + 1].Text; + var startPointStr = extractionResults[idx].Text; + + if (config.TillRegex.IsExactMatch(middleStr, trim: true) || (string.IsNullOrEmpty(middleStr) && + (config.TillRegex.MatchBegin(endPointStr, trim: true).Success || config.TillRegex.MatchEnd(startPointStr, trim: true).Success))) + { + var periodBegin = extractionResults[idx].Start ?? 0; + var periodEnd = (extractionResults[idx + 1].Start ?? 0) + (extractionResults[idx + 1].Length ?? 0); + + // handle "from/between" together with till words (till/until/through...) + var beforeStr = text.Substring(0, periodBegin); + + var beforeMatch = this.config.RangePrefixRegex.MatchEnd(beforeStr, trim: true); + + if (beforeMatch.Success) + { + periodBegin = beforeMatch.Index; + } + else + { + var afterStr = text.Substring(periodEnd); + + var afterMatch = this.config.RangeSuffixRegex.MatchBegin(afterStr, trim: true); + + if (afterMatch.Success) + { + periodEnd += afterMatch.Index + afterMatch.Length; + } + } + + ret.Add(new Token(periodBegin, periodEnd, metadata)); + + // merge two tokens here, increase the index by two + idx += 2; + continue; + } + + idx++; + } + + return ret; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDateTimeExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDateTimeExtractor.cs new file mode 100644 index 0000000000..6263e22329 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDateTimeExtractor.cs @@ -0,0 +1,199 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKDateTimeExtractor : IDateTimeExtractor + { + public static readonly string ExtractorName = Constants.SYS_DATETIME_DATETIME; // "DateTime"; + + private readonly ICJKDateTimeExtractorConfiguration config; + + public BaseCJKDateTimeExtractor(ICJKDateTimeExtractorConfiguration config) + { + this.config = config; + } + + public List Extract(string text) + { + return Extract(text, DateObject.Now); + } + + public List Extract(string text, DateObject referenceTime) + { + var tokens = new List(); + var result = new List(); + + tokens.AddRange(MergeDateAndTime(text, referenceTime)); + tokens.AddRange(BasicRegexMatch(text)); + tokens.AddRange(TimeOfToday(text, referenceTime)); + tokens.AddRange(DurationWithAgoAndLater(text, referenceTime)); + + result = Token.MergeAllTokens(tokens, text, ExtractorName); + + result = ExtractResultExtension.FilterAmbiguity(result, text, this.config.AmbiguityDateTimeFiltersDict); + + return result; + + } + + // Match now + public List BasicRegexMatch(string text) + { + var ret = new List(); + text = text.Trim(); + + // handle "now" + var matches = this.config.NowRegex.Matches(text); + foreach (Match match in matches) + { + ret.Add(new Token(match.Index, match.Index + match.Length)); + } + + return ret; + } + + // Merge a Date entity and a Time entity, like "明天早上七点" + public List MergeDateAndTime(string text, DateObject referenceTime) + { + var ret = new List(); + var ers = this.config.DatePointExtractor.Extract(text, referenceTime); + if (ers.Count == 0) + { + return ret; + } + + ers.AddRange(this.config.TimePointExtractor.Extract(text, referenceTime)); + if (ers.Count < 2) + { + return ret; + } + + ers = ers.OrderBy(o => o.Start).ToList(); + + var i = 0; + while (i < ers.Count - 1) + { + var j = i + 1; + while (j < ers.Count && ers[i].IsOverlap(ers[j])) + { + j++; + } + + if (j >= ers.Count) + { + break; + } + + if (ers[i].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && + ers[j].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) + { + var middleBegin = ers[i].Start + ers[i].Length ?? 0; + var middleEnd = ers[j].Start ?? 0; + if (middleBegin > middleEnd) + { + break; + } + + var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); + if (string.IsNullOrEmpty(middleStr) || this.config.ConnectorRegex.IsMatch(middleStr) || this.config.PrepositionRegex.IsMatch(middleStr)) + { + var begin = ers[i].Start ?? 0; + var end = (ers[j].Start ?? 0) + (ers[j].Length ?? 0); + ret.Add(new Token(begin, end)); + } + + i = j + 1; + continue; + } + + i = j; + } + + return ret; + } + + // Parse a specific time of today, tonight, this afternoon, "今天下午七点" + public List TimeOfToday(string text, DateObject referenceTime) + { + var ret = new List(); + var ers = this.config.TimePointExtractor.Extract(text, referenceTime); + foreach (var er in ers) + { + var beforeStr = text.Substring(0, er.Start ?? 0); + + // handle "今晚7点" + var innerMatch = this.config.NightRegex.MatchBegin(er.Text, trim: true); + + if (innerMatch.Success) + { + beforeStr = text.Substring(0, (er.Start ?? 0) + innerMatch.Length); + } + + if (string.IsNullOrEmpty(beforeStr)) + { + continue; + } + + var match = this.config.TimeOfSpecialDayRegex.MatchEnd(beforeStr, trim: true); + + if (match.Success) + { + var begin = match.Index; + var end = er.Start + er.Length ?? 0; + ret.Add(new Token(begin, end)); + } + } + + // TimePeriodExtractor cases using TimeOfDayRegex are not processed here + var matchTimeOfToday = this.config.TimeOfSpecialDayRegex.Match(text); + var matchTimeOfDay = this.config.TimeOfDayRegex.Match(text); + + if (matchTimeOfToday.Success && !matchTimeOfDay.Success) + { + ret.Add(new Token(matchTimeOfToday.Index, matchTimeOfToday.Index + matchTimeOfToday.Length)); + } + + return ret; + } + + // Process case like "5分钟前" "二小时后" + private List DurationWithAgoAndLater(string text, DateObject referenceTime) + { + var ret = new List(); + + var durationEr = this.config.DurationExtractor.Extract(text, referenceTime); + + foreach (var er in durationEr) + { + var pos = (int)er.Start + (int)er.Length; + + if (pos < text.Length) + { + var suffix = text.Substring(pos); + var match = this.config.BeforeRegex.Match(suffix); + if (!match.Success) + { + match = this.config.AfterRegex.Match(suffix); + } + + if (match.Success && suffix.StartsWith(match.Value, StringComparison.Ordinal)) + { + var metadata = new Metadata() { IsDurationWithAgoAndLater = true }; + ret.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + match.Length, metadata)); + } + } + } + + return ret; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDateTimePeriodExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDateTimePeriodExtractor.cs new file mode 100644 index 0000000000..64f0b84dd2 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDateTimePeriodExtractor.cs @@ -0,0 +1,451 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKDateTimePeriodExtractor : IDateTimeExtractor + { + public static readonly string ExtractorName = Constants.SYS_DATETIME_DATETIMEPERIOD; + + private readonly ICJKDateTimePeriodExtractorConfiguration config; + + public BaseCJKDateTimePeriodExtractor(ICJKDateTimePeriodExtractorConfiguration config) + { + this.config = config; + } + + public List Extract(string text) + { + return Extract(text, DateObject.Now); + } + + public List Extract(string text, DateObject referenceTime) + { + // Date and time Extractions should be extracted from the text only once, and shared in the methods below, passed by value + var dateErs = this.config.SingleDateExtractor.Extract(text, referenceTime); + var timeErs = this.config.SingleTimeExtractor.Extract(text, referenceTime); + var timeRangeErs = this.config.TimePeriodExtractor.Extract(text, referenceTime); + var dateTimeErs = this.config.SingleDateTimeExtractor.Extract(text, referenceTime); + + var tokens = new List(); + tokens.AddRange(MergeDateAndTimePeriod(text, dateErs, timeRangeErs)); + tokens.AddRange(MergeTwoTimePoints(text, dateTimeErs, timeErs)); + tokens.AddRange(MatchDuration(text, referenceTime)); + tokens.AddRange(MatchRelativeUnit(text)); + tokens.AddRange(MatchDateWithPeriodSuffix(text, dateErs)); + tokens.AddRange(MatchNumberWithUnit(text)); + tokens.AddRange(MatchNight(text, referenceTime)); + tokens.AddRange(MergeDateWithTimePeriodSuffix(text, dateErs, timeErs)); + + return Token.MergeAllTokens(tokens, text, ExtractorName); + } + + // merge Date and Time period + private List MergeDateAndTimePeriod(string text, List dateErs, List timeRangeErs) + { + var ret = new List(); + var timePoints = new List(); + + // handle the overlap problem + var j = 0; + for (var i = 0; i < dateErs.Count; i++) + { + timePoints.Add(dateErs[i]); + while (j < timeRangeErs.Count && timeRangeErs[j].Start + timeRangeErs[j].Length <= dateErs[i].Start) + { + timePoints.Add(timeRangeErs[j]); + j++; + } + + while (j < timeRangeErs.Count && timeRangeErs[j].IsOverlap(dateErs[i])) + { + j++; + } + } + + for (; j < timeRangeErs.Count; j++) + { + timePoints.Add(timeRangeErs[j]); + } + + timePoints = timePoints.OrderBy(o => o.Start).ToList(); + + // merge {Date} {TimePeriod} + var idx = 0; + while (idx < timePoints.Count - 1) + { + if (timePoints[idx].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && + timePoints[idx + 1].Type.Equals(Constants.SYS_DATETIME_TIMEPERIOD, StringComparison.Ordinal)) + { + var middleBegin = timePoints[idx].Start + timePoints[idx].Length ?? 0; + var middleEnd = timePoints[idx + 1].Start ?? 0; + + var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); + if (string.IsNullOrWhiteSpace(middleStr) || this.config.PrepositionRegex.IsMatch(middleStr)) + { + var periodBegin = timePoints[idx].Start ?? 0; + var periodEnd = (timePoints[idx + 1].Start ?? 0) + (timePoints[idx + 1].Length ?? 0); + ret.Add(new Token(periodBegin, periodEnd)); + idx += 2; + continue; + } + + idx++; + } + + idx++; + } + + return ret; + } + + private List MergeTwoTimePoints(string text, List dateTimeErs, List timeErs) + { + var ret = new List(); + var timePoints = new List(); + + // handle the overlap problem + var j = 0; + for (var i = 0; i < dateTimeErs.Count; i++) + { + timePoints.Add(dateTimeErs[i]); + while (j < timeErs.Count && timeErs[j].Start + timeErs[j].Length <= dateTimeErs[i].Start) + { + timePoints.Add(timeErs[j]); + j++; + } + + while (j < timeErs.Count && timeErs[j].IsOverlap(dateTimeErs[i])) + { + j++; + } + } + + for (; j < timeErs.Count; j++) + { + timePoints.Add(timeErs[j]); + } + + timePoints = timePoints.OrderBy(o => o.Start).ToList(); + + // merge "{TimePoint} to {TimePoint}", "between {TimePoint} and {TimePoint}" + var idx = 0; + while (idx < timePoints.Count - 1) + { + // if both ends are Time. then this is a TimePeriod, not a DateTimePeriod + if (timePoints[idx].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal) && + timePoints[idx + 1].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) + { + idx++; + continue; + } + + var middleBegin = timePoints[idx].Start + timePoints[idx].Length ?? 0; + var middleEnd = timePoints[idx + 1].Start ?? 0; + + var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); + + // handle "{TimePoint} to {TimePoint}" + if (this.config.TillRegex.IsExactMatch(middleStr, trim: true)) + { + var periodBegin = timePoints[idx].Start ?? 0; + var periodEnd = (timePoints[idx + 1].Start ?? 0) + (timePoints[idx + 1].Length ?? 0); + + // handle "from" + var beforeStr = text.Substring(0, periodBegin); + if (this.config.GetFromTokenIndex(beforeStr, out int index)) + { + periodBegin = index; + } + else + { + var afterStr = text.Substring(periodEnd); + if (this.config.GetFromTokenIndex(afterStr, out index)) + { + periodEnd += index; + } + } + + ret.Add(new Token(periodBegin, periodEnd)); + idx += 2; + continue; + } + + // handle "between {TimePoint} and {TimePoint}" + if (this.config.HasConnectorToken(middleStr)) + { + var periodBegin = timePoints[idx].Start ?? 0; + var periodEnd = (timePoints[idx + 1].Start ?? 0) + (timePoints[idx + 1].Length ?? 0); + + // handle "between" + var afterStr = text.Substring(periodEnd); + if (this.config.GetBetweenTokenIndex(afterStr, out int index)) + { + ret.Add(new Token(periodBegin, periodEnd + index)); + idx += 2; + continue; + } + } + + idx++; + } + + return ret; + } + + private List MatchNight(string text, DateObject referenceTime) + { + var ret = new List(); + var matches = this.config.SpecificTimeOfDayRegex.Matches(text); + foreach (Match match in matches) + { + ret.Add(new Token(match.Index, match.Index + match.Length)); + } + + // Date followed by morning, afternoon + var ers = this.config.SingleDateExtractor.Extract(text, referenceTime); + if (ers.Count == 0) + { + return ret; + } + + foreach (var er in ers) + { + var afterStr = text.Substring(er.Start + er.Length ?? 0); + var match = this.config.TimeOfDayRegex.Match(afterStr); + if (match.Success) + { + var middleStr = afterStr.Substring(0, match.Index); + if (string.IsNullOrWhiteSpace(middleStr) || this.config.PrepositionRegex.IsMatch(middleStr)) + { + ret.Add(new Token(er.Start ?? 0, er.Start + er.Length + match.Index + match.Length ?? 0)); + } + } + } + + return ret; + } + + // Cases like "2015年1月1日の2時以降", "On January 1, 2015 after 2:00" + private IEnumerable MergeDateWithTimePeriodSuffix(string text, List dateErs, List timeErs) + { + var ret = new List(); + + if (!dateErs.Any()) + { + return ret; + } + + if (!timeErs.Any()) + { + return ret; + } + + var ers = dateErs; + ers.AddRange(timeErs); + + ers = ers.OrderBy(o => o.Start).ToList(); + + var i = 0; + while (i < ers.Count - 1) + { + var j = i + 1; + while (j < ers.Count && ers[i].IsOverlap(ers[j])) + { + j++; + } + + if (j >= ers.Count) + { + break; + } + + if (ers[i].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && + ers[j].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) + { + var middleBegin = ers[i].Start + ers[i].Length ?? 0; + var middleEnd = ers[j].Start ?? 0; + if (middleBegin > middleEnd) + { + i = j + 1; + continue; + } + + var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); + + if (this.config.BeforeAfterRegex.IsMatch(middleStr)) + { + var begin = ers[i].Start ?? 0; + var end = (ers[j].Start ?? 0) + (ers[j].Length ?? 0); + + ret.Add(new Token(begin, end)); + } + + i = j + 1; + continue; + } + + i = j; + } + + return ret; + } + + // Extract patterns that involve durations e.g. "Within 5 hours from now" + private List MatchDuration(string text, DateObject reference) + { + var ret = new List(); + var durationExtractions = config.DurationExtractor.Extract(text, reference); + + foreach (var durationExtraction in durationExtractions) + { + var timeUnitMatch = config.UnitRegex.Match(durationExtraction.Text); + if (!timeUnitMatch.Success) + { + continue; + } + + var duration = new Token(durationExtraction.Start ?? 0, durationExtraction.Start + durationExtraction.Length ?? 0); + var beforeStr = text.Substring(0, duration.Start); + var afterStr = text.Substring(duration.Start + duration.Length); + + if (string.IsNullOrWhiteSpace(beforeStr) && string.IsNullOrWhiteSpace(afterStr)) + { + continue; + } + + var startOut = -1; + var endOut = -1; + var match = config.FutureRegex.Match(afterStr); + + var inPrefixMatch = config.ThisRegex.Match(beforeStr); + var inPrefix = inPrefixMatch.Success; + + if (match.Groups[Constants.WithinGroupName].Success) + { + var startToken = inPrefix ? inPrefixMatch.Index : duration.Start; + var withinlength = match.Groups[Constants.WithinGroupName].Value.Length; + var endToken = duration.End + (inPrefix ? 0 : match.Index + match.Length); + + match = config.UnitRegex.Match(text.Substring(duration.Start, duration.Length)); + + if (match.Success) + { + startOut = startToken; + endOut = inPrefix ? endToken + withinlength : endToken; + } + + Token token = new Token(startOut, endOut); + ret.Add(token); + } + } + + return ret; + } + + private List MatchRelativeUnit(string text) + { + var ret = new List(); + var matches = this.config.RestOfDateRegex.Matches(text); + + foreach (Match match in matches) + { + ret.Add(new Token(match.Index, match.Index + match.Length)); + } + + return ret; + } + + // For cases like "Early in the day Wednesday" + private IEnumerable MatchDateWithPeriodSuffix(string text, List dateErs) + { + var ret = new List(); + + foreach (var dateEr in dateErs) + { + var dateStrEnd = (int)(dateEr.Start + dateEr.Length); + var afterStr = text.Substring(dateStrEnd, text.Length - dateStrEnd); + var matchAfter = this.config.TimePeriodLeftRegex.MatchBegin(afterStr, trim: true); + if (matchAfter.Success) + { + ret.Add(new Token((int)dateEr.Start, dateStrEnd + matchAfter.Index + matchAfter.Length)); + } + } + + return ret; + } + + private List MatchNumberWithUnit(string text) + { + var ret = new List(); + + var durations = new List(); + var ers = this.config.CardinalExtractor.Extract(text); + + foreach (var er in ers) + { + var afterStr = text.Substring(er.Start + er.Length ?? 0); + var match = this.config.FollowedUnit.MatchBegin(afterStr, trim: true); + + if (match.Success) + { + durations.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + match.Length)); + } + + match = this.config.PastRegex.MatchBegin(afterStr, trim: true); + + if (match.Success) + { + durations.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + match.Length)); + } + } + + var matches = this.config.UnitRegex.Matches(text); + foreach (Match match in matches) + { + durations.Add(new Token(match.Index, match.Index + match.Length)); + } + + foreach (var duration in durations) + { + var beforeStr = text.Substring(0, duration.Start); + if (string.IsNullOrWhiteSpace(beforeStr)) + { + continue; + } + + var match = this.config.PastRegex.MatchEnd(beforeStr, trim: true); + + if (match.Success) + { + ret.Add(new Token(match.Index, duration.End)); + continue; + } + + match = this.config.FutureRegex.MatchEnd(beforeStr, trim: true); + + if (match.Success) + { + ret.Add(new Token(match.Index, duration.End)); + } + + match = this.config.TimePeriodLeftRegex.MatchEnd(beforeStr, trim: true); + + if (match.Success) + { + ret.Add(new Token(match.Index, duration.End)); + } + } + + return ret; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDurationExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDurationExtractor.cs new file mode 100644 index 0000000000..a01bf08f7e --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDurationExtractor.cs @@ -0,0 +1,228 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKDurationExtractor : IDateTimeExtractor + { + public static readonly string ExtractorName = Constants.SYS_DATETIME_DURATION; + + private readonly ICJKDurationExtractorConfiguration config; + + private readonly bool merge; + + public BaseCJKDurationExtractor(ICJKDurationExtractorConfiguration config, bool merge = true) + { + this.config = config; + this.merge = merge; + } + + public List Extract(string text) + { + return Extract(text, DateObject.Now); + } + + public List Extract(string source, DateObject referenceTime) + { + // Use Unit to extract + var retList = this.config.InternalExtractor.Extract(source); + var res = new List(); + foreach (var ret in retList) + { + // filter + var match = this.config.YearRegex.Match(ret.Text); + if (match.Success) + { + continue; + } + + res.Add(ret); + } + + // handle "all day", "more days", "few days" + res.AddRange(ImplicitDuration(source)); + + res = ExtractResultExtension.MergeAllResults(res); + + if (this.merge) + { + res = MergeMultipleDuration(source, res); + res = ExtractResultExtension.FilterAmbiguity(res, source, this.config.AmbiguityDurationFiltersDict); + } + + return res; + } + + private List MergeMultipleDuration(string text, List extractorResults) + { + if (extractorResults.Count <= 1) + { + return extractorResults; + } + + var unitMap = this.config.UnitMap; + var unitValueMap = this.config.UnitValueMap; + var unitRegex = this.config.DurationUnitRegex; + List ret = new List(); + + var firstExtractionIndex = 0; + var timeUnit = 0; + var totalUnit = 0; + while (firstExtractionIndex < extractorResults.Count) + { + string curUnit = null; + var unitMatch = unitRegex.Match(extractorResults[firstExtractionIndex].Text); + + if (unitMatch.Success && unitMap.ContainsKey(unitMatch.Groups[Constants.UnitGroupName].ToString())) + { + curUnit = unitMatch.Groups[Constants.UnitGroupName].ToString(); + totalUnit++; + if (DurationParsingUtil.IsTimeDurationUnit(unitMap[curUnit])) + { + timeUnit++; + } + } + + if (string.IsNullOrEmpty(curUnit)) + { + firstExtractionIndex++; + continue; + } + + var secondExtractionIndex = firstExtractionIndex + 1; + while (secondExtractionIndex < extractorResults.Count) + { + var valid = false; + var midStrBegin = extractorResults[secondExtractionIndex - 1].Start + extractorResults[secondExtractionIndex - 1].Length ?? 0; + var midStrEnd = extractorResults[secondExtractionIndex].Start ?? 0; + if (midStrBegin > midStrEnd) + { + return extractorResults; + } + + var midStr = text.Substring(midStrBegin, midStrEnd - midStrBegin); + var match = this.config.DurationConnectorRegex.Match(midStr); + if (match.Success) + { + // If the second element of a group is a modifier, it should not be merged with subsequent elements. + // For example "4 days or more and 1 week or less" should return 2 separate extractions. + if (secondExtractionIndex > 1 && extractorResults[secondExtractionIndex - 1].Metadata != null && + extractorResults[secondExtractionIndex - 1].Metadata.HasMod) + { + break; + } + + unitMatch = unitRegex.Match(extractorResults[secondExtractionIndex].Text); + if (unitMatch.Success && unitMap.ContainsKey(unitMatch.Groups[Constants.UnitGroupName].ToString())) + { + var nextUnitStr = unitMatch.Groups[Constants.UnitGroupName].ToString(); + if (unitValueMap[unitMap[nextUnitStr]] != unitValueMap[unitMap[curUnit]]) + { + valid = true; + if (unitValueMap[unitMap[nextUnitStr]] < unitValueMap[unitMap[curUnit]]) + { + curUnit = nextUnitStr; + } + } + + totalUnit++; + if (DurationParsingUtil.IsTimeDurationUnit(unitMap[nextUnitStr])) + { + timeUnit++; + } + } + } + + if (!valid) + { + break; + } + + secondExtractionIndex++; + } + + if (secondExtractionIndex - 1 > firstExtractionIndex) + { + var node = new ExtractResult(); + node.Start = extractorResults[firstExtractionIndex].Start; + node.Length = extractorResults[secondExtractionIndex - 1].Start + extractorResults[secondExtractionIndex - 1].Length - node.Start; + node.Text = text.Substring(node.Start ?? 0, node.Length ?? 0); + node.Type = extractorResults[firstExtractionIndex].Type; + + // Add multiple duration type to extract result + string type = Constants.MultipleDuration_DateTime; // Default type + if (timeUnit == totalUnit) + { + type = Constants.MultipleDuration_Time; + } + else if (timeUnit == 0) + { + type = Constants.MultipleDuration_Date; + } + + node.Data = type; + + ret.Add(node); + + timeUnit = 0; + totalUnit = 0; + } + else + { + ret.Add(extractorResults[firstExtractionIndex]); + } + + firstExtractionIndex = secondExtractionIndex; + } + + return ret; + } + + private List ImplicitDuration(string text) + { + var ret = new List(); + + // handle "all day", "all year" + ret.AddRange(Token.GetTokenFromRegex(config.AllRegex, text)); + + // handle "half day", "half year" + ret.AddRange(Token.GetTokenFromRegex(config.HalfRegex, text)); + + // handle "next day", "last year" + ret.AddRange(Token.GetTokenFromRegex(config.RelativeDurationUnitRegex, text)); + + // handle "more day", "more year" + ret.AddRange(Token.GetTokenFromRegex(config.MoreOrLessRegex, text)); + + // handle "few days", "few months" + ret.AddRange(Token.GetTokenFromRegex(config.SomeRegex, text)); + + // handle "during/for the day/week/month/year" + if ((config.Options & DateTimeOptions.CalendarMode) != 0) + { + ret.AddRange(Token.GetTokenFromRegex(config.DuringRegex, text)); + } + + var result = new List(); + foreach (var e in ret) + { + var node = new ExtractResult(); + node.Start = e.Start; + node.Length = e.Length; + node.Text = text.Substring(node.Start ?? 0, node.Length ?? 0); + node.Type = ExtractorName; + node.Metadata = new Metadata { HasMod = true }; + + result.Add(node); + } + + return result; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKHolidayExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKHolidayExtractor.cs new file mode 100644 index 0000000000..7f34b1af5d --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKHolidayExtractor.cs @@ -0,0 +1,57 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKHolidayExtractor : IDateTimeExtractor + { + private const string ExtractorName = Constants.SYS_DATETIME_DATE; // "Date"; + + private readonly ICJKHolidayExtractorConfiguration config; + + public BaseCJKHolidayExtractor(ICJKHolidayExtractorConfiguration config) + { + this.config = config; + } + + public List Extract(string text) + { + return Extract(text, DateObject.Now); + } + + public List Extract(string text, DateObject reference) + { + var tokens = new List(); + tokens.AddRange(HolidayMatch(text)); + var ers = Token.MergeAllTokens(tokens, text, ExtractorName); + foreach (var er in ers) + { + er.Metadata = new Metadata + { + IsHoliday = true, + }; + } + + return ers; + } + + private List HolidayMatch(string text) + { + var ret = new List(); + foreach (var regex in this.config.HolidayRegexes) + { + var matches = regex.Matches(text); + foreach (Match match in matches) + { + ret.Add(new Token(match.Index, match.Index + match.Length)); + } + } + + return ret; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKMergedDateTimeExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKMergedDateTimeExtractor.cs new file mode 100644 index 0000000000..9bffc93b0d --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKMergedDateTimeExtractor.cs @@ -0,0 +1,259 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Text.Utilities; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKMergedDateTimeExtractor : IDateTimeExtractor + { + private readonly ICJKMergedExtractorConfiguration config; + + public BaseCJKMergedDateTimeExtractor(ICJKMergedExtractorConfiguration config) + { + this.config = config; + } + + public List Extract(string text) + { + return Extract(text, DateObject.Now); + } + + public List Extract(string text, DateObject referenceTime) + { + var ret = this.config.DateExtractor.Extract(text, referenceTime); + + // the order is important, since there is a problem in merging + AddTo(ret, this.config.TimeExtractor.Extract(text, referenceTime)); + AddTo(ret, this.config.DurationExtractor.Extract(text, referenceTime)); + AddTo(ret, this.config.DatePeriodExtractor.Extract(text, referenceTime)); + AddTo(ret, this.config.DateTimeExtractor.Extract(text, referenceTime)); + AddTo(ret, this.config.TimePeriodExtractor.Extract(text, referenceTime)); + AddTo(ret, this.config.DateTimePeriodExtractor.Extract(text, referenceTime)); + AddTo(ret, this.config.SetExtractor.Extract(text, referenceTime)); + AddTo(ret, this.config.HolidayExtractor.Extract(text, referenceTime)); + + ret = FilterUnspecificDatePeriod(ret); + + ret = ExtractResultExtension.FilterAmbiguity(ret, text, this.config.AmbiguityFiltersDict); + + ret = AddMod(ret, text); + + ret = ret.OrderBy(p => p.Start).ToList(); + + return ret; + } + + private static List MoveOverlap(List dst, ExtractResult result) + { + var duplicate = new List(); + for (var i = 0; i < dst.Count; ++i) + { + if (result.Text.Contains(dst[i].Text) && + (result.Start == dst[i].Start || result.Start + result.Length == dst[i].Start + dst[i].Length)) + { + duplicate.Add(i); + } + } + + var tempDst = dst.Where((_, i) => !duplicate.Contains(i)).ToList(); + + return tempDst; + } + + private List FilterUnspecificDatePeriod(List ers) + { + ers.RemoveAll(o => this.config.UnspecificDatePeriodRegex.IsMatch(o.Text)); + return ers; + } + + private List AddMod(List ers, string text) + { + var lastEnd = 0; + foreach (var er in ers) + { + var beforeStr = text.Substring(lastEnd, er.Start ?? 0); + var afterStr = text.Substring((er.Start ?? 0) + (er.Length ?? 0)); + + var match = this.config.BeforeRegex.MatchBegin(afterStr, trim: true); + + if (match.Success) + { + var modLength = match.Index + match.Length; + er.Length += modLength; + er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); + + er.Metadata = AssignModMetadata(er.Metadata); + } + + match = this.config.AfterRegex.MatchBegin(afterStr, trim: true); + + if (match.Success) + { + var modLength = match.Index + match.Length; + er.Length += modLength; + er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); + + er.Metadata = AssignModMetadata(er.Metadata); + } + + match = this.config.UntilRegex.MatchEnd(beforeStr, trim: true); + + if (match.Success) + { + var modLength = beforeStr.Length - match.Index; + er.Length += modLength; + er.Start -= modLength; + er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); + + er.Metadata = AssignModMetadata(er.Metadata); + } + + match = this.config.UntilRegex.MatchBegin(afterStr, trim: true); + + if (match.Success) + { + var modLength = match.Index + match.Length; + er.Length += modLength; + er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); + + er.Metadata = AssignModMetadata(er.Metadata); + } + + match = this.config.SincePrefixRegex.MatchEnd(beforeStr, trim: true); + + if (match.Success && AmbiguousRangeChecker(beforeStr, text, er)) + { + var modLength = beforeStr.Length - match.Index; + er.Length += modLength; + er.Start -= modLength; + er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); + + er.Metadata = AssignModMetadata(er.Metadata); + } + + match = this.config.SinceSuffixRegex.MatchBegin(afterStr, trim: true); + if (match.Success) + { + var modLength = match.Index + match.Length; + er.Length += modLength; + er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); + + er.Metadata = AssignModMetadata(er.Metadata); + } + + match = this.config.AroundPrefixRegex.MatchEnd(beforeStr, trim: true); + + if (match.Success && AmbiguousRangeChecker(beforeStr, text, er)) + { + var modLength = beforeStr.Length - match.Index; + er.Length += modLength; + er.Start -= modLength; + er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); + + er.Metadata = AssignModMetadata(er.Metadata); + } + + match = this.config.AroundSuffixRegex.MatchBegin(afterStr, trim: true); + if (match.Success) + { + var modLength = match.Index + match.Length; + er.Length += modLength; + er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); + + er.Metadata = AssignModMetadata(er.Metadata); + } + + match = this.config.EqualRegex.MatchBegin(beforeStr, trim: true); + if (match.Success) + { + var modLength = beforeStr.Length - match.Index; + er.Length += modLength; + er.Start -= modLength; + er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); + + er.Metadata = AssignModMetadata(er.Metadata); + } + } + + return ers; + } + + private void AddTo(List dst, List src) + { + foreach (var result in src) + { + var isFound = false; + int indexRm = -1, lengthRm = 1; + for (var i = 0; i < dst.Count; i++) + { + if (dst[i].IsOverlap(result)) + { + isFound = true; + if (result.Length > dst[i].Length) + { + indexRm = i; + var j = i + 1; + while (j < dst.Count && dst[j].IsOverlap(result)) + { + lengthRm++; + j++; + } + } + + break; + } + } + + if (!isFound) + { + dst.Add(result); + } + else if (indexRm >= 0) + { + dst.RemoveRange(indexRm, lengthRm); + var tmpDst = MoveOverlap(dst, result); + dst.Clear(); + dst.AddRange(tmpDst); + dst.Insert(indexRm, result); + } + } + } + + // Avoid adding mod for ambiguity cases, such as "从" in "从 ... 到 ..." should not add mod + // TODO: Revise PotentialAmbiguousRangeRegex to support cases like "从2015年起,哪所大学需要的分数在80到90之间" + private bool AmbiguousRangeChecker(string beforeStr, string text, ExtractResult er) + { + if (this.config.AmbiguousRangeModifierPrefix.MatchEnd(beforeStr, true).Success) + { + var matches = this.config.PotentialAmbiguousRangeRegex.Matches(text).Cast(); + if (matches.Any(m => m.Index < er.Start + er.Length && m.Index + m.Length > er.Start)) + { + return false; + } + } + + return true; + } + + private Metadata AssignModMetadata(Metadata metadata) + { + if (metadata == null) + { + metadata = new Metadata { HasMod = true }; + } + else + { + metadata.HasMod = true; + } + + return metadata; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKSetExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKSetExtractor.cs new file mode 100644 index 0000000000..0e9091fc39 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKSetExtractor.cs @@ -0,0 +1,116 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKSetExtractor : IDateTimeExtractor + { + public static readonly string ExtractorName = Constants.SYS_DATETIME_SET; + + private readonly ICJKSetExtractorConfiguration config; + + public BaseCJKSetExtractor(ICJKSetExtractorConfiguration config) + { + this.config = config; + } + + public List Extract(string text) + { + return Extract(text, DateObject.Now); + } + + public List Extract(string text, DateObject referenceTime) + { + var tokens = new List(); + tokens.AddRange(MatchEachUnit(text)); + tokens.AddRange(MatchEachDuration(text, referenceTime)); + tokens.AddRange(MatchEach(this.config.DateExtractor, text, referenceTime)); + tokens.AddRange(MatchEach(this.config.DateTimeExtractor, text, referenceTime)); + tokens.AddRange(MatchEach(this.config.TimePeriodExtractor, text, referenceTime)); + tokens.AddRange(MatchEach(this.config.TimeExtractor, text, referenceTime)); + + return Token.MergeAllTokens(tokens, text, ExtractorName); + } + + public List MatchEachDuration(string text, DateObject referenceTime) + { + var ret = new List(); + + var ers = this.config.DurationExtractor.Extract(text, referenceTime); + foreach (var er in ers) + { + // "each last summer" doesn't make sense + if (this.config.LastRegex.IsMatch(er.Text)) + { + continue; + } + + var beforeStr = text.Substring(0, er.Start ?? 0); + var match = this.config.EachPrefixRegex.Match(beforeStr); + if (match.Success) + { + ret.Add(new Token(match.Index, er.Start + er.Length ?? 0)); + } + else + { + var afterStr = text.Substring(er.Start + er.Length ?? 0); + match = this.config.EachSuffixRegex.Match(afterStr); + if (match.Success) + { + ret.Add(new Token(er.Start ?? 0, er.Length + match.Length ?? 00)); + } + } + } + + return ret; + } + + public List MatchEachUnit(string text) + { + var ret = new List(); + + // handle "each month" + var matches = this.config.EachUnitRegex.Matches(text); + foreach (Match match in matches) + { + ret.Add(new Token(match.Index, match.Index + match.Length)); + } + + return ret; + } + + public List MatchEach(IDateTimeExtractor extractor, string text, DateObject referenceTime) + { + var ret = new List(); + var ers = extractor.Extract(text, referenceTime); + foreach (var er in ers) + { + var beforeStr = text.Substring(0, er.Start ?? 0); + var match = this.config.EachPrefixRegex.Match(beforeStr); + if (match.Success) + { + ret.Add(new Token(match.Index, match.Index + match.Length + (er.Length ?? 0))); + } + else if (er.Type == Constants.SYS_DATETIME_TIME || er.Type == Constants.SYS_DATETIME_DATE) + { + // Cases like "every day at 2pm" or "every year on April 15th" + var eachRegex = er.Type == Constants.SYS_DATETIME_TIME ? this.config.EachDayRegex : this.config.EachDateUnitRegex; + match = eachRegex.Match(beforeStr); + if (match.Success) + { + ret.Add(new Token(match.Index, match.Index + match.Length + (er.Length ?? 0))); + } + } + } + + return ret; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseBaseDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKTimeExtractor.cs similarity index 57% rename from .NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseBaseDateTimeExtractorConfiguration.cs rename to .NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKTimeExtractor.cs index 8977f75124..8e3af4a1af 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseBaseDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKTimeExtractor.cs @@ -1,91 +1,122 @@ -using System.Collections.Generic; -using System.Collections.Immutable; -using System.Linq; -using System.Text.RegularExpressions; -using Microsoft.Recognizers.Text.DateTime.Utilities; -using DateObject = System.DateTime; - -namespace Microsoft.Recognizers.Text.DateTime.Japanese -{ - public abstract class JapaneseBaseDateTimeExtractorConfiguration : IDateTimeExtractor - { - internal abstract ImmutableDictionary Regexes { get; } - - protected virtual string ExtractType { get; } = string.Empty; - - public List Extract(string text) - { - return Extract(text, DateObject.Now); - } - - public virtual List Extract(string source, DateObject referenceTime) - { - if (string.IsNullOrEmpty(source)) - { - return new List(); - } - - var result = new List(); - var matchSource = new Dictionary(); - var matched = new bool[source.Length]; - for (var i = 0; i < source.Length; i++) - { - matched[i] = false; - } - - foreach (var collection in Regexes.ToDictionary(o => o.Key.Matches(source), p => p.Value)) - { - foreach (Match m in collection.Key) - { - for (var j = 0; j < m.Length; j++) - { - matched[m.Index + j] = true; - } - - // Keep Source Data for extra information - matchSource.Add(m, collection.Value); - } - } - - var last = -1; - for (var i = 0; i < source.Length; i++) - { - if (matched[i]) - { - if (i + 1 == source.Length || !matched[i + 1]) - { - var start = last + 1; - var length = i - last; - var substr = source.Substring(start, length); - - if (matchSource.Keys.Any(o => o.Index == start && o.Length == length)) - { - var srcMatch = matchSource.Keys.First(o => o.Index == start && o.Length == length); - var er = new ExtractResult - { - Start = start, - Length = length, - Text = substr, - Type = ExtractType, - Data = matchSource.ContainsKey(srcMatch) ? - new DateTimeExtra - { - NamedEntity = srcMatch.Groups, - Type = matchSource[srcMatch], - } - : null, - }; - result.Add(er); - } - } - } - else - { - last = i; - } - } - - return result; - } - } -} \ No newline at end of file +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.InternalCache; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKTimeExtractor : IDateTimeExtractor + { + public static readonly Regex HourRegex = + new Regex(BaseDateTime.HourRegex, RegexOptions.Singleline | RegexOptions.Compiled, RegexTimeOut); + + public static readonly Regex MinuteRegex = + new Regex(BaseDateTime.MinuteRegex, RegexOptions.Singleline | RegexOptions.Compiled, RegexTimeOut); + + public static readonly Regex SecondRegex = + new Regex(BaseDateTime.SecondRegex, RegexOptions.Singleline | RegexOptions.Compiled, RegexTimeOut); + + private const string ExtractorName = Constants.SYS_DATETIME_TIME; // "Time"; + + private static readonly ResultsCache ResultsCache = new ResultsCache(); + + private readonly string keyPrefix; + + private readonly ICJKTimeExtractorConfiguration config; + + public BaseCJKTimeExtractor(ICJKTimeExtractorConfiguration config) + { + this.config = config; + keyPrefix = string.Intern(config.Options + "_" + config.LanguageMarker); + } + + protected static TimeSpan RegexTimeOut => DateTimeRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + + public List Extract(string text) + { + return Extract(text, DateObject.Now); + } + + public virtual List Extract(string source, DateObject referenceTime) + { + if (string.IsNullOrEmpty(source)) + { + return new List(); + } + + var result = new List(); + var matchSource = new Dictionary(); + var matched = new bool[source.Length]; + for (var i = 0; i < source.Length; i++) + { + matched[i] = false; + } + + foreach (var collection in this.config.Regexes.ToDictionary(o => o.Key.Matches(source), p => p.Value)) + { + foreach (Match m in collection.Key) + { + for (var j = 0; j < m.Length; j++) + { + matched[m.Index + j] = true; + } + + // Keep Source Data for extra information + matchSource.Add(m, collection.Value); + } + } + + var last = -1; + for (var i = 0; i < source.Length; i++) + { + if (matched[i]) + { + if (i + 1 == source.Length || !matched[i + 1]) + { + var start = last + 1; + var length = i - last; + var substr = source.Substring(start, length); + + if (matchSource.Keys.Any(o => o.Index == start && o.Length == length)) + { + var srcMatch = matchSource.Keys.First(o => o.Index == start && o.Length == length); + var er = new ExtractResult + { + Start = start, + Length = length, + Text = substr, + Type = ExtractorName, + Data = matchSource.ContainsKey(srcMatch) ? + new DateTimeExtra + { + NamedEntity = srcMatch.Groups, + Type = matchSource[srcMatch], + } + : null, + }; + result.Add(er); + } + } + } + else + { + last = i; + } + } + + result = ExtractResultExtension.FilterAmbiguity(result, source, this.config.AmbiguityTimeFiltersDict); + + return result; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseBaseDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKTimePeriodExtractor.cs similarity index 65% rename from .NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseBaseDateTimeExtractorConfiguration.cs rename to .NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKTimePeriodExtractor.cs index 8ff935743c..31af790412 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseBaseDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKTimePeriodExtractor.cs @@ -1,92 +1,108 @@ -using System.Collections.Generic; -using System.Collections.Immutable; -using System.Linq; -using System.Text.RegularExpressions; -using Microsoft.Recognizers.Text.DateTime.Utilities; - -using DateObject = System.DateTime; - -namespace Microsoft.Recognizers.Text.DateTime.Chinese -{ - public abstract class ChineseBaseDateTimeExtractorConfiguration : IDateTimeExtractor - { - internal abstract ImmutableDictionary Regexes { get; } - - protected virtual string ExtractType { get; } = string.Empty; - - public List Extract(string text) - { - return Extract(text, DateObject.Now); - } - - public virtual List Extract(string source, DateObject referenceTime) - { - if (string.IsNullOrEmpty(source)) - { - return new List(); - } - - var result = new List(); - var matchSource = new Dictionary(); - var matched = new bool[source.Length]; - for (var i = 0; i < source.Length; i++) - { - matched[i] = false; - } - - foreach (var collection in Regexes.ToDictionary(o => o.Key.Matches(source), p => p.Value)) - { - foreach (Match m in collection.Key) - { - for (var j = 0; j < m.Length; j++) - { - matched[m.Index + j] = true; - } - - // Keep Source Data for extra information - matchSource.Add(m, collection.Value); - } - } - - var last = -1; - for (var i = 0; i < source.Length; i++) - { - if (matched[i]) - { - if (i + 1 == source.Length || !matched[i + 1]) - { - var start = last + 1; - var length = i - last; - var substr = source.Substring(start, length); - - if (matchSource.Keys.Any(o => o.Index == start && o.Length == length)) - { - var srcMatch = matchSource.Keys.First(o => o.Index == start && o.Length == length); - var er = new ExtractResult - { - Start = start, - Length = length, - Text = substr, - Type = ExtractType, - Data = matchSource.ContainsKey(srcMatch) ? - new DateTimeExtra - { - NamedEntity = srcMatch.Groups, - Type = matchSource[srcMatch], - } - : null, - }; - result.Add(er); - } - } - } - else - { - last = i; - } - } - - return result; - } - } -} \ No newline at end of file +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.InternalCache; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKTimePeriodExtractor : IDateTimeExtractor + { + public static readonly string ExtractorName = Constants.SYS_DATETIME_TIMEPERIOD; // "TimePeriod"; + + private static readonly ResultsCache ResultsCache = new ResultsCache(); + + private readonly ICJKTimePeriodExtractorConfiguration config; + + private readonly string keyPrefix; + + public BaseCJKTimePeriodExtractor(ICJKTimePeriodExtractorConfiguration config) + { + this.config = config; + keyPrefix = string.Intern(config.Options + "_" + config.LanguageMarker); + } + + public List Extract(string text) + { + return Extract(text, DateObject.Now); + } + + public virtual List Extract(string source, DateObject referenceTime) + { + if (string.IsNullOrEmpty(source)) + { + return new List(); + } + + var result = new List(); + var matchSource = new Dictionary(); + var matched = new bool[source.Length]; + for (var i = 0; i < source.Length; i++) + { + matched[i] = false; + } + + foreach (var collection in this.config.Regexes.ToDictionary(o => o.Key.Matches(source), p => p.Value)) + { + foreach (Match m in collection.Key) + { + for (var j = 0; j < m.Length; j++) + { + matched[m.Index + j] = true; + } + + // Keep Source Data for extra information + matchSource.Add(m, collection.Value); + } + } + + var last = -1; + for (var i = 0; i < source.Length; i++) + { + if (matched[i]) + { + if (i + 1 == source.Length || !matched[i + 1]) + { + var start = last + 1; + var length = i - last; + var substr = source.Substring(start, length); + + if (matchSource.Keys.Any(o => o.Index == start && o.Length == length)) + { + var srcMatch = matchSource.Keys.First(o => o.Index == start && o.Length == length); + var er = new ExtractResult + { + Start = start, + Length = length, + Text = substr, + Type = ExtractorName, + Data = matchSource.ContainsKey(srcMatch) ? + new DateTimeExtra + { + NamedEntity = srcMatch.Groups, + Type = matchSource[srcMatch], + } + : null, + }; + result.Add(er); + } + } + } + else + { + last = i; + } + } + + // Remove common ambiguous cases + result = ExtractResultExtension.FilterAmbiguity(result, source, this.config.AmbiguityTimePeriodFiltersDict); + + return result; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDateExtractorConfiguration.cs new file mode 100644 index 0000000000..47aaa439c6 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDateExtractorConfiguration.cs @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKDateExtractorConfiguration : IDateTimeOptionsConfiguration + { + IEnumerable DateRegexList { get; } + + IEnumerable ImplicitDateList { get; } + + Regex DateTimePeriodUnitRegex { get; } + + Regex BeforeRegex { get; } + + Regex AfterRegex { get; } + + Regex WeekDayStartEnd { get; } + + Regex RangeConnectorSymbolRegex { get; } + + IDateTimeExtractor DurationExtractor { get; } + + Dictionary AmbiguityDateFiltersDict { get; } + + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDatePeriodExtractorConfiguration.cs new file mode 100644 index 0000000000..83d1cc7df4 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDatePeriodExtractorConfiguration.cs @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKDatePeriodExtractorConfiguration : IDateTimeOptionsConfiguration + { + IEnumerable SimpleCasesRegexes { get; } + + Regex TillRegex { get; } + + Regex DateUnitRegex { get; } + + Regex RangePrefixRegex { get; } + + Regex RangeSuffixRegex { get; } + + Regex FutureRegex { get; } + + Regex PastRegex { get; } + + Regex FirstLastOfYearRegex { get; } + + Regex UnitRegex { get; } + + Regex NumberCombinedWithUnit { get; } + + Regex FollowedUnit { get; } + + IDateTimeExtractor DatePointExtractor { get; } + + IDateTimeExtractor DurationExtractor { get; } + + IExtractor IntegerExtractor { get; } + + Dictionary AmbiguityFiltersDict { get; } + + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDateTimeExtractorConfiguration.cs new file mode 100644 index 0000000000..dccdb604bc --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDateTimeExtractorConfiguration.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKDateTimeExtractorConfiguration : IDateTimeOptionsConfiguration + { + Regex NowRegex { get; } + + Regex PrepositionRegex { get; } + + Regex NightRegex { get; } + + Regex TimeOfSpecialDayRegex { get; } + + Regex TimeOfDayRegex { get; } + + Regex BeforeRegex { get; } + + Regex AfterRegex { get; } + + Regex ConnectorRegex { get; } + + IDateTimeExtractor DurationExtractor { get; } + + IDateTimeExtractor DatePointExtractor { get; } + + IDateTimeExtractor TimePointExtractor { get; } + + Dictionary AmbiguityDateTimeFiltersDict { get; } + + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDateTimePeriodExtractorConfiguration.cs new file mode 100644 index 0000000000..ca4ad914b8 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDateTimePeriodExtractorConfiguration.cs @@ -0,0 +1,57 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKDateTimePeriodExtractorConfiguration : IDateTimeOptionsConfiguration + { + Regex PrepositionRegex { get; } + + Regex TillRegex { get; } + + Regex SpecificTimeOfDayRegex { get; } + + Regex TimeOfDayRegex { get; } + + Regex FollowedUnit { get; } + + Regex UnitRegex { get; } + + Regex PastRegex { get; } + + Regex FutureRegex { get; } + + Regex TimePeriodLeftRegex { get; } + + Regex RelativeRegex { get; } + + Regex RestOfDateRegex { get; } + + Regex AmPmDescRegex { get; } + + Regex ThisRegex { get; } + + Regex BeforeAfterRegex { get; } + + IExtractor CardinalExtractor { get; } + + IDateTimeExtractor SingleDateExtractor { get; } + + IDateTimeExtractor SingleTimeExtractor { get; } + + IDateTimeExtractor SingleDateTimeExtractor { get; } + + IDateTimeExtractor DurationExtractor { get; } + + IDateTimeExtractor TimePeriodExtractor { get; } + + bool GetFromTokenIndex(string text, out int index); + + bool HasConnectorToken(string text); + + bool GetBetweenTokenIndex(string text, out int index); + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDurationExtractorConfiguration.cs new file mode 100644 index 0000000000..d16a2a1d5f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDurationExtractorConfiguration.cs @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKDurationExtractorConfiguration : IDateTimeOptionsConfiguration + { + Regex DurationUnitRegex { get; } + + Regex DurationConnectorRegex { get; } + + Regex YearRegex { get; } + + Regex AllRegex { get; } + + Regex HalfRegex { get; } + + Regex RelativeDurationUnitRegex { get; } + + Regex DuringRegex { get; } + + Regex SomeRegex { get; } + + Regex MoreOrLessRegex { get; } + + IExtractor InternalExtractor { get; } + + Dictionary UnitMap { get; } + + Dictionary UnitValueMap { get; } + + Dictionary AmbiguityDurationFiltersDict { get; } + + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKHolidayExtractorConfiguration.cs new file mode 100644 index 0000000000..7d25c412cc --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKHolidayExtractorConfiguration.cs @@ -0,0 +1,13 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKHolidayExtractorConfiguration : IDateTimeOptionsConfiguration + { + IEnumerable HolidayRegexes { get; } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKMergedExtractorConfiguration.cs new file mode 100644 index 0000000000..65b0280ba3 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKMergedExtractorConfiguration.cs @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Text.Matcher; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKMergedExtractorConfiguration : IDateTimeOptionsConfiguration + { + IDateTimeExtractor DateExtractor { get; } + + IDateTimeExtractor TimeExtractor { get; } + + IDateTimeExtractor DateTimeExtractor { get; } + + IDateTimeExtractor DatePeriodExtractor { get; } + + IDateTimeExtractor TimePeriodExtractor { get; } + + IDateTimeExtractor DateTimePeriodExtractor { get; } + + IDateTimeExtractor DurationExtractor { get; } + + IDateTimeExtractor SetExtractor { get; } + + IDateTimeExtractor HolidayExtractor { get; } + + Regex AfterRegex { get; } + + Regex BeforeRegex { get; } + + Regex UnspecificDatePeriodRegex { get; } + + Regex SinceSuffixRegex { get; } + + Regex SincePrefixRegex { get; } + + Regex AroundSuffixRegex { get; } + + Regex AroundPrefixRegex { get; } + + Regex UntilRegex { get; } + + Regex EqualRegex { get; } + + Regex PotentialAmbiguousRangeRegex { get; } + + Regex AmbiguousRangeModifierPrefix { get; } + + Dictionary AmbiguityFiltersDict { get; } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKSetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKSetExtractorConfiguration.cs new file mode 100644 index 0000000000..c74f8f015d --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKSetExtractorConfiguration.cs @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKSetExtractorConfiguration : IDateTimeOptionsConfiguration + { + Regex LastRegex { get; } + + Regex EachPrefixRegex { get; } + + Regex EachSuffixRegex { get; } + + Regex EachUnitRegex { get; } + + Regex UnitRegex { get; } + + Regex EachDayRegex { get; } + + Regex EachDateUnitRegex { get; } + + IDateTimeExtractor DurationExtractor { get; } + + IDateTimeExtractor TimeExtractor { get; } + + IDateTimeExtractor DateExtractor { get; } + + IDateTimeExtractor DateTimeExtractor { get; } + + IDateTimeExtractor DatePeriodExtractor { get; } + + IDateTimeExtractor TimePeriodExtractor { get; } + + IDateTimeExtractor DateTimePeriodExtractor { get; } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKTimeExtractorConfiguration.cs new file mode 100644 index 0000000000..71a6cf3882 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKTimeExtractorConfiguration.cs @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKTimeExtractorConfiguration : IDateTimeOptionsConfiguration + { + ImmutableDictionary Regexes { get; } + + Dictionary AmbiguityTimeFiltersDict { get; } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKTimePeriodExtractorConfiguration.cs new file mode 100644 index 0000000000..f7f46d4270 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKTimePeriodExtractorConfiguration.cs @@ -0,0 +1,17 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKTimePeriodExtractorConfiguration : IDateTimeOptionsConfiguration + { + ImmutableDictionary Regexes { get; } + + Dictionary AmbiguityTimePeriodFiltersDict { get; } + + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateExtractor.cs index 2005a6753b..b7fec065ec 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateExtractor.cs @@ -1,4 +1,5 @@ -// Enable GetYearFromText method +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. using System.Text.RegularExpressions; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateExtractorConfiguration.cs index a2243406a8..ea750d36de 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -51,6 +54,8 @@ public interface IDateExtractorConfiguration : IDateTimeOptionsConfiguration Regex RangeConnectorSymbolRegex { get; } + Regex BeforeAfterRegex { get; } + IExtractor IntegerExtractor { get; } IExtractor OrdinalExtractor { get; } @@ -59,6 +64,8 @@ public interface IDateExtractorConfiguration : IDateTimeOptionsConfiguration IDateTimeExtractor DurationExtractor { get; } + IDateTimeExtractor HolidayExtractor { get; } + IDateTimeUtilityConfiguration UtilityConfiguration { get; } IImmutableDictionary DayOfWeek { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDatePeriodExtractorConfiguration.cs index 0ad76b4fe7..b1b9f6f525 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDatePeriodExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime @@ -59,6 +62,10 @@ public interface IDatePeriodExtractorConfiguration : IDateTimeOptionsConfigurati Regex NowRegex { get; } + Regex FirstLastRegex { get; } + + Regex OfYearRegex { get; } + bool CheckBothBeforeAfter { get; } IDateExtractor DatePointExtractor { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeAltExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeAltExtractorConfiguration.cs index 274fbaa0be..501f66da5b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeAltExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeAltExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeExtractor.cs index b0326af918..7271a1e5e5 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeExtractor.cs @@ -1,4 +1,5 @@ -// Enable a reference time pass to the extractor +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. using System.Collections.Generic; using DateObject = System.DateTime; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeExtractorConfiguration.cs index 82ab3144b6..7447be1c0c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime @@ -45,6 +48,8 @@ public interface IDateTimeExtractorConfiguration : IDateTimeOptionsConfiguration IDateTimeUtilityConfiguration UtilityConfiguration { get; } + IDateTimeExtractor HolidayExtractor { get; } + bool IsConnector(string text); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeListExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeListExtractor.cs index 784c486bf0..83e8e0d49c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeListExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeListExtractor.cs @@ -1,4 +1,5 @@ -// A type of Extractors receiving ExtractResult List as its input +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. using System.Collections.Generic; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimePeriodExtractorConfiguration.cs index bc8f86f6d5..17a518de72 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimePeriodExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime @@ -57,6 +60,8 @@ public interface IDateTimePeriodExtractorConfiguration : IDateTimeOptionsConfigu Regex AfterRegex { get; } + Regex TasksmodeMealTimeofDayRegex { get; } + bool CheckBothBeforeAfter { get; } IExtractor CardinalExtractor { get; } @@ -73,6 +78,8 @@ public interface IDateTimePeriodExtractorConfiguration : IDateTimeOptionsConfigu IDateTimeExtractor TimeZoneExtractor { get; } + IDateTimeExtractor HolidayExtractor { get; } + bool GetFromTokenIndex(string text, out int index); bool HasConnectorToken(string text); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeZoneExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeZoneExtractor.cs index dc223f3bc6..e6deee24bd 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeZoneExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeZoneExtractor.cs @@ -1,5 +1,7 @@ -using System.Collections.Generic; -using DateObject = System.DateTime; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.DateTime { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDurationExtractorConfiguration.cs index 0c1bbd5d92..cb0008a363 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDurationExtractorConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime @@ -35,6 +39,10 @@ public interface IDurationExtractorConfiguration : IDateTimeOptionsConfiguration Regex MoreThanRegex { get; } + Regex ModPrefixRegex { get; } + + Regex ModSuffixRegex { get; } + Regex SpecialNumberUnitRegex { get; } bool CheckBothBeforeAfter { get; } @@ -45,5 +53,7 @@ public interface IDurationExtractorConfiguration : IDateTimeOptionsConfiguration IImmutableDictionary UnitValueMap { get; } + Dictionary AmbiguityFiltersDict { get; } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IHolidayExtractorConfiguration.cs index bb24988f1b..c36a1e0feb 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IHolidayExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IHolidayExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IMergedExtractorConfiguration.cs index 6d54736b7e..89e231a847 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IMergedExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Matcher; @@ -33,6 +36,11 @@ public interface IMergedExtractorConfiguration : IDateTimeOptionsConfiguration IEnumerable TermFilterRegexes { get; } + /* TasksModeMentionFilters should be used only under TasksMode. + Some scenarios that don't need to be classified are suppressed. + */ + Regex TasksModeMentionFilters { get; } + Regex AfterRegex { get; } Regex BeforeRegex { get; } @@ -64,8 +72,12 @@ public interface IMergedExtractorConfiguration : IDateTimeOptionsConfiguration // Regex to act as umbrella for key terms so that sentences that clearly don't have entities can be rejected quickly Regex FailFastRegex { get; } + Regex YearRegex { get; } + StringMatcher SuperfluousWordMatcher { get; } Dictionary AmbiguityFiltersDict { get; } + + bool CheckBothBeforeAfter { get; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ISetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ISetExtractorConfiguration.cs index 8b2ab880f0..608841495b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ISetExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ISetExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime @@ -21,6 +24,8 @@ public interface ISetExtractorConfiguration : IDateTimeOptionsConfiguration Regex SetEachRegex { get; } + bool CheckBothBeforeAfter { get; } + IDateTimeExtractor DurationExtractor { get; } IDateTimeExtractor TimeExtractor { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ITimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ITimeExtractorConfiguration.cs index fa914b111c..1e7be72f53 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ITimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ITimeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime @@ -14,5 +17,10 @@ public interface ITimeExtractorConfiguration : IDateTimeOptionsConfiguration Regex IshRegex { get; } Regex TimeBeforeAfterRegex { get; } + + string TimeTokenPrefix { get; } + + Dictionary AmbiguityFiltersDict { get; } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ITimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ITimePeriodExtractorConfiguration.cs index 2e9b04d880..497599df90 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ITimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ITimePeriodExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime @@ -30,5 +33,7 @@ public interface ITimePeriodExtractorConfiguration : IDateTimeOptionsConfigurati bool IsConnectorToken(string text); bool GetBetweenTokenIndex(string text, out int index); + + List ApplyPotentialPeriodAmbiguityHotfix(string text, List timePeriodErs); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ITimeZoneExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ITimeZoneExtractorConfiguration.cs index 158b5cefd9..9276b684e9 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ITimeZoneExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/ITimeZoneExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Matcher; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateExtractorConfiguration.cs index 9b95539446..22471157d4 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; @@ -13,77 +16,77 @@ namespace Microsoft.Recognizers.Text.DateTime.French public class FrenchDateExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateExtractorConfiguration { public static readonly Regex MonthRegex = - new Regex(DateTimeDefinitions.MonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex OnRegex = - new Regex(DateTimeDefinitions.OnRegex, RegexFlags); + new Regex(DateTimeDefinitions.OnRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelaxedOnRegex = - new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisRegex = - new Regex(DateTimeDefinitions.ThisRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisRegex, RegexFlags, RegexTimeOut); public static readonly Regex LastDateRegex = - new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextDateRegex = - new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); // day before yesterday, day after tomorrow, next day, last day, the day yesterday, the day tomorrow public static readonly Regex SpecialDayRegex = - new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDayWithNumRegex = - new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex StrictWeekDay = - new Regex(DateTimeDefinitions.StrictWeekDay, RegexFlags); + new Regex(DateTimeDefinitions.StrictWeekDay, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayOfMonthRegex = - new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDate = - new Regex(DateTimeDefinitions.SpecialDate, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDate, RegexFlags, RegexTimeOut); public static readonly Regex RelativeWeekDayRegex = - new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex ForTheRegex = - new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags); + new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayOfMothRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex StrictRelativeRegex = - new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixArticleRegex = - new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] ImplicitDateList = { @@ -92,37 +95,37 @@ public class FrenchDateExtractorConfiguration : BaseDateTimeOptionsConfiguration }; public static readonly Regex OfMonth = - new Regex(DateTimeDefinitions.OfMonth, RegexFlags); + new Regex(DateTimeDefinitions.OfMonth, RegexFlags, RegexTimeOut); public static readonly Regex MonthEnd = - new Regex(DateTimeDefinitions.MonthEnd, RegexFlags); + new Regex(DateTimeDefinitions.MonthEnd, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayEnd = - new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayStart = - new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeConnectorSymbolRegex = - new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags); + new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags, RegexTimeOut); public static readonly ImmutableDictionary DayOfWeek = DateTimeDefinitions.DayOfWeek.ToImmutableDictionary(); @@ -132,50 +135,64 @@ public class FrenchDateExtractorConfiguration : BaseDateTimeOptionsConfiguration // @TODO move out to resources file public static readonly Regex NonDateUnitRegex = - new Regex(@"(?heures?|hrs|secondes?|secs?|minutes?|mins?)\b", RegexFlags); + new Regex(@"(?heures?|hrs|secondes?|secs?|minutes?|mins?)\b", RegexFlags, RegexTimeOut); + + public static readonly Regex BeforeAfterRegex = + new Regex(DateTimeDefinitions.BeforeAfterRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public FrenchDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.French.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.French.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new FrenchNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.French.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.French.OrdinalExtractor.GetInstance(numConfig); + NumberParser = new BaseNumberParser(new FrenchNumberParserConfiguration(numConfig)); + DurationExtractor = new BaseDurationExtractor(new FrenchDurationExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new FrenchHolidayExtractorConfiguration(this)); UtilityConfiguration = new FrenchDatetimeUtilityConfiguration(); // 3-23-2017 - var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags); + var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags, RegexTimeOut); // 23-3-2015 - var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags); + var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags, RegexTimeOut); // on 1.3 - var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags); + var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags, RegexTimeOut); // on 24-12 - var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags); + var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags, RegexTimeOut); // 7/23 - var dateRegex7 = new Regex(DateTimeDefinitions.DateExtractor7, RegexFlags); + var dateRegex7 = new Regex(DateTimeDefinitions.DateExtractor7, RegexFlags, RegexTimeOut); // 23/7 - var dateRegex9 = new Regex(DateTimeDefinitions.DateExtractor9, RegexFlags); + var dateRegex9 = new Regex(DateTimeDefinitions.DateExtractor9, RegexFlags, RegexTimeOut); // 2015-12-23 - var dateRegexA = new Regex(DateTimeDefinitions.DateExtractorA, RegexFlags); + var dateRegexA = new Regex(DateTimeDefinitions.DateExtractorA, RegexFlags, RegexTimeOut); DateRegexList = new List { // (Sunday,)? April 5 - new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags, RegexTimeOut), // (Sunday,)? April 5, 2016 - new Regex(DateTimeDefinitions.DateExtractor2, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor2, RegexFlags, RegexTimeOut), // (Sunday,)? 6th of April - new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags, RegexTimeOut), }; var enableDmy = DmyDateFormat || @@ -196,6 +213,8 @@ public FrenchDateExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DurationExtractor { get; } + public IDateTimeExtractor HolidayExtractor { get; } + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } IEnumerable IDateExtractorConfiguration.ImplicitDateList => ImplicitDateList; @@ -243,5 +262,7 @@ public FrenchDateExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IDateExtractorConfiguration.RangeUnitRegex => RangeUnitRegex; Regex IDateExtractorConfiguration.RangeConnectorSymbolRegex => RangeConnectorSymbolRegex; + + Regex IDateExtractorConfiguration.BeforeAfterRegex => BeforeAfterRegex; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDatePeriodExtractorConfiguration.cs index ca747b230d..97088c4688 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDatePeriodExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.French; @@ -13,185 +16,191 @@ public class FrenchDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfigu // until public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); // and public static readonly Regex RangeConnectorRegex = - new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex IllegalYearRegex = - new Regex(BaseDateTime.IllegalYearRegex, RegexFlags); + new Regex(BaseDateTime.IllegalYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); // this month, next month, last month public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WrittenMonthRegex = - new Regex(DateTimeDefinitions.WrittenMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WrittenMonthRegex, RegexFlags, RegexTimeOut); // in, of, no "on"... public static readonly Regex MonthSuffixRegex = - new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags, RegexTimeOut); // year, month, week, day public static readonly Regex DateUnitRegex = new Regex( - DateTimeDefinitions.DateUnitRegex, RegexFlags); + DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); // **In French, Past/Next is suffix, but interface enforces this // past, last, previous public static readonly Regex PastPrefixRegex = - new Regex(DateTimeDefinitions.PastSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PastSuffixRegex, RegexFlags, RegexTimeOut); // **In French, Past/Next is suffix, but interface enforces this // next, in public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex FutureSuffixRegex = - new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); // composite regexes public static readonly Regex SimpleCasesRegex = - new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags, RegexTimeOut); // between 'x' until 'y', from 'x' until 'y' public static readonly Regex MonthFrontSimpleCasesRegex = - new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthFrontBetweenRegex = - new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags, RegexTimeOut); public static readonly Regex BetweenRegex = - new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthWithYear = - new Regex(DateTimeDefinitions.MonthWithYear, RegexFlags); + new Regex(DateTimeDefinitions.MonthWithYear, RegexFlags, RegexTimeOut); // a cote de - 'next to', cette - 'this', dernier - 'last' (always after the noun, i.e annee dernier - 'last year' public static readonly Regex OneWordPeriodRegex = - new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumWithYear = - new Regex(DateTimeDefinitions.MonthNumWithYear, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumWithYear, RegexFlags, RegexTimeOut); // le/la - masc/fem 'the' public static readonly Regex WeekOfMonthRegex = - new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfYearRegex = - new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex FollowedDateUnit = - new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithDateUnit = - new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags, RegexTimeOut); // 1st quarter of this year, 2nd quarter of next/last year, etc public static readonly Regex QuarterRegex = - new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags, RegexTimeOut); public static readonly Regex QuarterRegexYearFront = - new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags, RegexTimeOut); public static readonly Regex AllHalfYearRegex = - new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex SeasonRegex = - new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags); + new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags, RegexTimeOut); public static readonly Regex WhichWeekRegex = - new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags); + new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfRegex = - new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthOfRegex = - new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags, RegexTimeOut); // TODO: add regexs below public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RestOfDateRegex = - new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterEarlyPeriodRegex = - new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekWithWeekDayRangeRegex = - new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPlusNumberRegex = - new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex DecadeWithCenturyRegex = - new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags); + new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPeriodRegex = - new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex ComplexDatePeriodRegex = - new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDecadeRegex = - new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags, RegexTimeOut); public static readonly Regex ReferenceDatePeriodRegex = - new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoRegex, RegexFlags); + new Regex(DateTimeDefinitions.AgoRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterRegex, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex CenturySuffixRegex = - new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FirstLastRegex = + new Regex(DateTimeDefinitions.FirstLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OfYearRegex = + new Regex(DateTimeDefinitions.OfYearRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex FromRegex = - new Regex(DateTimeDefinitions.FromRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); private static readonly Regex ConnectorAndRegex = - new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags, RegexTimeOut); private static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex2, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex2, RegexFlags, RegexTimeOut); private static readonly Regex[] SimpleCasesRegexes = { @@ -220,10 +229,20 @@ public FrenchDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration conf : base(config) { DatePointExtractor = new BaseDateExtractor(new FrenchDateExtractorConfiguration(this)); - CardinalExtractor = Number.French.CardinalExtractor.GetInstance(); - OrdinalExtractor = Number.French.OrdinalExtractor.GetInstance(); DurationExtractor = new BaseDurationExtractor(new FrenchDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new FrenchNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.French.CardinalExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.French.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new FrenchNumberParserConfiguration(numConfig)); } public IDateExtractor DatePointExtractor { get; } @@ -290,6 +309,10 @@ public FrenchDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration conf Regex IDatePeriodExtractorConfiguration.NowRegex => NowRegex; + Regex IDatePeriodExtractorConfiguration.FirstLastRegex => FirstLastRegex; + + Regex IDatePeriodExtractorConfiguration.OfYearRegex => OfYearRegex; + bool IDatePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; string[] IDatePeriodExtractorConfiguration.DurationDateRestrictions => DateTimeDefinitions.DurationDateRestrictions; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimeAltExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimeAltExtractorConfiguration.cs index dd89ddcc5d..4353e68d41 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimeAltExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimeAltExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.French; @@ -7,16 +10,16 @@ namespace Microsoft.Recognizers.Text.DateTime.French public class FrenchDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeAltExtractorConfiguration { public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangePrefixRegex = - new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] RelativePrefixList = { @@ -31,10 +34,10 @@ public class FrenchDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfig private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex OrRegex = - new Regex(DateTimeDefinitions.OrRegex, RegexFlags); + new Regex(DateTimeDefinitions.OrRegex, RegexFlags, RegexTimeOut); private static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public FrenchDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimeExtractorConfiguration.cs index 9b124429f6..ff8fc5ac32 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimeExtractorConfiguration.cs @@ -1,7 +1,11 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.French; using Microsoft.Recognizers.Text.DateTime.French.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.French { @@ -9,71 +13,83 @@ public class FrenchDateTimeExtractorConfiguration : BaseDateTimeOptionsConfigura { // à - time at which, en - length of time, dans - amount of time public static readonly Regex PrepositionRegex = - new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); // right now, as soon as possible, recently, previously public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); // in the evening, afternoon, morning, night public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfDayRegex = - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificEndOfRegex = - new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectorRegex = - new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex NumberAsTimeRegex = - new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateNumberConnectorRegex = - new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public FrenchDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.French.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.French.IntegerExtractor.GetInstance(numConfig); + DatePointExtractor = new BaseDateExtractor(new FrenchDateExtractorConfiguration(this)); TimePointExtractor = new BaseTimeExtractor(new FrenchTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new FrenchDurationExtractorConfiguration(this)); UtilityConfiguration = new FrenchDatetimeUtilityConfiguration(); + HolidayExtractor = new BaseHolidayExtractor(new FrenchHolidayExtractorConfiguration(this)); + } public IExtractor IntegerExtractor { get; } @@ -84,6 +100,8 @@ public FrenchDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + public IDateTimeExtractor HolidayExtractor { get; } + Regex IDateTimeExtractorConfiguration.NowRegex => NowRegex; Regex IDateTimeExtractorConfiguration.SuffixRegex => SuffixRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimePeriodExtractorConfiguration.cs index ab4e7a0586..f16e13b545 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimePeriodExtractorConfiguration.cs @@ -1,56 +1,64 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.French; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.French { public class FrenchDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimePeriodExtractorConfiguration { public static readonly Regex TimeNumberCombinedWithUnit = - new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex HyphenDateRegex = + new Regex(BaseDateTime.HyphenDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodTimeOfDayWithDateRegex = - new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeTimeUnitRegex = - new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RestOfDateTimeRegex = - new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); public static readonly Regex MiddlePauseRegex = - new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags); + new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixDayRegex = new Regex(DateTimeDefinitions.PrefixDayRegex, RegexFlags | RegexOptions.RightToLeft); public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDaysRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -62,35 +70,46 @@ public class FrenchDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsCon }; private static readonly Regex FromRegex = - new Regex(DateTimeDefinitions.FromRegex2, RegexFlags); + new Regex(DateTimeDefinitions.FromRegex2, RegexFlags, RegexTimeOut); private static readonly Regex ConnectorAndRegex = - new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags, RegexTimeOut); private static readonly Regex PeriodTimeOfDayRegex = - new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags, RegexTimeOut); private static readonly Regex PeriodSpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.PeriodSpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodSpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); private static readonly Regex TimeTimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); private static readonly Regex TimeFollowedUnit = - new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); public FrenchDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; - CardinalExtractor = Number.English.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.English.CardinalExtractor.GetInstance(numConfig); + SingleDateExtractor = new BaseDateExtractor(new FrenchDateExtractorConfiguration(this)); SingleTimeExtractor = new BaseTimeExtractor(new FrenchTimeExtractorConfiguration(this)); SingleDateTimeExtractor = new BaseDateTimeExtractor(new FrenchDateTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new FrenchDurationExtractorConfiguration(this)); TimePeriodExtractor = new BaseTimePeriodExtractor(new FrenchTimePeriodExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new FrenchTimeZoneExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new FrenchHolidayExtractorConfiguration(this)); + } public IEnumerable SimpleCasesRegex => SimpleCases; @@ -131,6 +150,8 @@ public FrenchDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration public IDateTimeExtractor TimeZoneExtractor { get; } + public IDateTimeExtractor HolidayExtractor { get; } + bool IDateTimePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; Regex IDateTimePeriodExtractorConfiguration.PrefixDayRegex => PrefixDayRegex; @@ -163,6 +184,8 @@ public FrenchDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration Regex IDateTimePeriodExtractorConfiguration.AfterRegex => AfterRegex; + Regex IDateTimePeriodExtractorConfiguration.TasksmodeMealTimeofDayRegex => null; + public bool GetFromTokenIndex(string text, out int index) { index = -1; @@ -191,7 +214,7 @@ public bool GetBetweenTokenIndex(string text, out int index) public bool HasConnectorToken(string text) { - return ConnectorAndRegex.IsMatch(text); + return ConnectorAndRegex.IsExactMatch(text, trim: true); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDurationExtractorConfiguration.cs index cdc24dfccf..35021c932c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDurationExtractorConfiguration.cs @@ -1,66 +1,86 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.French; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.French { public class FrenchDurationExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDurationExtractorConfiguration { public static readonly Regex DurationUnitRegex = - new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAndRegex = - new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationFollowedUnit = - new Regex(DateTimeDefinitions.DurationFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.DurationFollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithDurationUnit = - new Regex(DateTimeDefinitions.NumberCombinedWithDurationUnit, RegexFlags); + new Regex(DateTimeDefinitions.NumberCombinedWithDurationUnit, RegexFlags, RegexTimeOut); public static readonly Regex AnUnitRegex = - new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DuringRegex = - new Regex(DateTimeDefinitions.DuringRegex, RegexFlags); + new Regex(DateTimeDefinitions.DuringRegex, RegexFlags, RegexTimeOut); public static readonly Regex AllRegex = - new Regex(DateTimeDefinitions.AllRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllRegex, RegexFlags, RegexTimeOut); public static readonly Regex HalfRegex = - new Regex(DateTimeDefinitions.HalfRegex, RegexFlags); + new Regex(DateTimeDefinitions.HalfRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConjunctionRegex = - new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags, RegexTimeOut); // quelques = "a few, some," etc public static readonly Regex InexactNumberRegex = - new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex InexactNumberUnitRegex = - new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDurationUnitRegex = - new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationConnectorRegex = - new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModPrefixRegex = + new Regex(DateTimeDefinitions.ModPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModSuffixRegex = + new Regex(DateTimeDefinitions.ModSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialNumberUnitRegex = null; public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public FrenchDurationExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - CardinalExtractor = Number.French.CardinalExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.French.CardinalExtractor.GetInstance(numConfig); UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); } @@ -104,5 +124,11 @@ public FrenchDurationExtractorConfiguration(IDateTimeOptionsConfiguration config Regex IDurationExtractorConfiguration.MoreThanRegex => MoreThanRegex; Regex IDurationExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex; + + Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex; + + public Dictionary AmbiguityFiltersDict => null; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchHolidayExtractorConfiguration.cs index 4e0d6f6c43..fe09071536 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchHolidayExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchHolidayExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.French; @@ -8,20 +11,20 @@ namespace Microsoft.Recognizers.Text.DateTime.French public class FrenchHolidayExtractorConfiguration : BaseDateTimeOptionsConfiguration, IHolidayExtractorConfiguration { public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex H1 = - new Regex(DateTimeDefinitions.HolidayRegex1, RegexFlags); + new Regex(DateTimeDefinitions.HolidayRegex1, RegexFlags, RegexTimeOut); public static readonly Regex H2 = - new Regex(DateTimeDefinitions.HolidayRegex2, RegexFlags); + new Regex(DateTimeDefinitions.HolidayRegex2, RegexFlags, RegexTimeOut); public static readonly Regex H3 = - new Regex(DateTimeDefinitions.HolidayRegex3, RegexFlags); + new Regex(DateTimeDefinitions.HolidayRegex3, RegexFlags, RegexTimeOut); // added to include more options, "fete des meres" mothers day, etc public static readonly Regex H4 = - new Regex(DateTimeDefinitions.HolidayRegex4, RegexFlags); + new Regex(DateTimeDefinitions.HolidayRegex4, RegexFlags, RegexTimeOut); public static readonly Regex[] HolidayRegexList = { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchMergedExtractorConfiguration.cs index 0f14210ac4..8e2dea6275 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchMergedExtractorConfiguration.cs @@ -1,8 +1,14 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; + using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.French; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Matcher; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.French { @@ -10,47 +16,50 @@ public class FrenchMergedExtractorConfiguration : BaseDateTimeOptionsConfigurati { // avant - 'before' public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); // ensuite/puis are for adverbs, i.e 'i ate and then i walked', so we'll use apres public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex SinceRegex = - new Regex(DateTimeDefinitions.SinceRegex, RegexFlags); + new Regex(DateTimeDefinitions.SinceRegex, RegexFlags, RegexTimeOut); public static readonly Regex AroundRegex = - new Regex(DateTimeDefinitions.AroundRegex, RegexFlags); + new Regex(DateTimeDefinitions.AroundRegex, RegexFlags, RegexTimeOut); public static readonly Regex EqualRegex = - new Regex(BaseDateTime.EqualRegex, RegexFlags); + new Regex(BaseDateTime.EqualRegex, RegexFlags, RegexTimeOut); // 'Je vais du lundi au mecredi' - I will go from monday to weds public static readonly Regex FromToRegex = - new Regex(DateTimeDefinitions.FromToRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromToRegex, RegexFlags, RegexTimeOut); public static readonly Regex SingleAmbiguousMonthRegex = - new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrepositionSuffixRegex = - new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmbiguousRangeModifierPrefix = - new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags); + new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags, RegexTimeOut); public static readonly Regex NumberEndingPattern = - new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags); + new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificDatePeriodRegex = - new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags, RegexTimeOut); - public static readonly Regex[] TermFilterRegexes = { }; + public static readonly Regex[] TermFilterRegexes = System.Array.Empty(); public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public FrenchMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) @@ -67,7 +76,18 @@ public FrenchMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) HolidayExtractor = new BaseHolidayExtractor(new FrenchHolidayExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new FrenchTimeZoneExtractorConfiguration(this)); DateTimeAltExtractor = new BaseDateTimeAltExtractor(new FrenchDateTimeAltExtractorConfiguration(this)); - IntegerExtractor = Number.French.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.French.IntegerExtractor.GetInstance(numConfig); + + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); } public IDateExtractor DateExtractor { get; } @@ -94,7 +114,7 @@ public FrenchMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) public IExtractor IntegerExtractor { get; } - public Dictionary AmbiguityFiltersDict { get; } = null; + public Dictionary AmbiguityFiltersDict { get; } Regex IMergedExtractorConfiguration.AfterRegex => AfterRegex; @@ -124,10 +144,16 @@ public FrenchMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + public Regex FailFastRegex { get; } = null; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; StringMatcher IMergedExtractorConfiguration.SuperfluousWordMatcher => SuperfluousWordMatcher; + + bool IMergedExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public Regex TasksModeMentionFilters { get; } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchSetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchSetExtractorConfiguration.cs index b1a8f7863d..a902e5cb28 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchSetExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchSetExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.French; @@ -11,29 +14,29 @@ public class FrenchSetExtractorConfiguration : BaseDateTimeOptionsConfiguration, public static readonly string ExtractorName = Constants.SYS_DATETIME_SET; public static readonly Regex SetUnitRegex = - new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); // TODO: Decide between adjective and adverb, i.e monthly - 'mensuel' vs 'mensuellement' public static readonly Regex PeriodicRegex = - new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachUnitRegex = - new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachPrefixRegex = - new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachDayRegex = - new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetLastRegex = - new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetWeekDayRegex = - new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetEachRegex = - new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -63,6 +66,8 @@ public FrenchSetExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DateTimePeriodExtractor { get; } + bool ISetExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + Regex ISetExtractorConfiguration.LastRegex => SetLastRegex; Regex ISetExtractorConfiguration.EachPrefixRegex => EachPrefixRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimeExtractorConfiguration.cs index 330a532b12..58ac0bbe16 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimeExtractorConfiguration.cs @@ -1,7 +1,11 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.French; +using Microsoft.Recognizers.Definitions.Utilities; namespace Microsoft.Recognizers.Text.DateTime.French { @@ -10,112 +14,112 @@ public class FrenchTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration // part 1: smallest component // -------------------------------- public static readonly Regex DescRegex = - new Regex(DateTimeDefinitions.DescRegex, RegexFlags); + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); public static readonly Regex HourNumRegex = - new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex MinuteNumRegex = - new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags, RegexTimeOut); // part 2: middle level component // -------------------------------------- // handle "... heures (o'clock, en punto)" public static readonly Regex OclockRegex = - new Regex(DateTimeDefinitions.OclockRegex, RegexFlags); + new Regex(DateTimeDefinitions.OclockRegex, RegexFlags, RegexTimeOut); // handle "... après midi (afternoon, tarde)" public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); // handle "... dans la matinee (in the morning)" public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); // handle "half past ..." "a quarter to ..." // rename 'min' group to 'deltamin' public static readonly Regex LessThanOneHour = - new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags); + new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags, RegexTimeOut); public static readonly Regex WrittenTimeRegex = - new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags, RegexTimeOut); // TODO - will have change below // handle "six heures et demie" (six thirty), "six heures et vingt-et-un" (six twenty one) public static readonly Regex TimePrefix = - new Regex(DateTimeDefinitions.TimePrefix, RegexFlags); + new Regex(DateTimeDefinitions.TimePrefix, RegexFlags, RegexTimeOut); public static readonly Regex TimeSuffix = - new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags); + new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags, RegexTimeOut); public static readonly Regex BasicTime = - new Regex(DateTimeDefinitions.BasicTime, RegexFlags); + new Regex(DateTimeDefinitions.BasicTime, RegexFlags, RegexTimeOut); // handle special time such as 'at midnight', 'midnight', 'midday' // midnight - le minuit, la zero heure // midday - midi public static readonly Regex MidnightRegex = - new Regex(DateTimeDefinitions.MidnightRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidnightRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidmorningRegex = - new Regex(DateTimeDefinitions.MidmorningRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidmorningRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidafternoonRegex = - new Regex(DateTimeDefinitions.MidafternoonRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidafternoonRegex, RegexFlags, RegexTimeOut); public static readonly Regex MiddayRegex = - new Regex(DateTimeDefinitions.MiddayRegex, RegexFlags); + new Regex(DateTimeDefinitions.MiddayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidTimeRegex = - new Regex(DateTimeDefinitions.MidTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidTimeRegex, RegexFlags, RegexTimeOut); // part 3: regex for time // -------------------------------------- // handle "at four" "at 3" public static readonly Regex AtRegex = - new Regex(DateTimeDefinitions.AtRegex, RegexFlags); + new Regex(DateTimeDefinitions.AtRegex, RegexFlags, RegexTimeOut); public static readonly Regex IshRegex = - new Regex(DateTimeDefinitions.IshRegex, RegexFlags); + new Regex(DateTimeDefinitions.IshRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectNumRegex = - new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeBeforeAfterRegex = - new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] TimeRegexList = { // (three min past)? seven|7|(seven thirty) pm - new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags, RegexTimeOut), // (three min past)? 3:00(:00)? (pm)? - new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags, RegexTimeOut), // (three min past)? 3.00 (pm)? - new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags, RegexTimeOut), // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) - new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags, RegexTimeOut), // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags, RegexTimeOut), // (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) - new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags, RegexTimeOut), // (in the night) at (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags, RegexTimeOut), // (in the night) (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex8, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex8, RegexFlags, RegexTimeOut), - new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags, RegexTimeOut), - new Regex(DateTimeDefinitions.TimeRegex10, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex10, RegexFlags, RegexTimeOut), // 340pm ConnectNumRegex, @@ -141,5 +145,9 @@ public FrenchTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DurationExtractor { get; } public IDateTimeExtractor TimeZoneExtractor { get; } + + public string TimeTokenPrefix => DateTimeDefinitions.TimeTokenPrefix; + + public Dictionary AmbiguityFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityTimeFiltersDict); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimePeriodExtractorConfiguration.cs index b2312292fc..0a30c108e4 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimePeriodExtractorConfiguration.cs @@ -1,9 +1,13 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.French; using Microsoft.Recognizers.Text.DateTime.French.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.French { @@ -12,66 +16,66 @@ public class FrenchTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfigu public static readonly string ExtractorName = Constants.SYS_DATETIME_TIMEPERIOD; // "TimePeriod"; public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); public static readonly Regex HourRegex = - new Regex(DateTimeDefinitions.HourRegex, RegexFlags); + new Regex(DateTimeDefinitions.HourRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodHourNumRegex = - new Regex(DateTimeDefinitions.PeriodHourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodHourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodDescRegex = - new Regex(DateTimeDefinitions.PeriodDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); public static readonly Regex PureNumFromTo = - new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags); + new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags, RegexTimeOut); public static readonly Regex PureNumBetweenAnd = - new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeFromTo = - new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeBetweenAnd = - new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex PrepositionRegex = - new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfDayRegex = - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeFollowedUnit = - new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex TimeNumberCombinedWithUnit = - new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); public static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex FromRegex = - new Regex(DateTimeDefinitions.FromRegex2, RegexFlags); + new Regex(DateTimeDefinitions.FromRegex2, RegexFlags, RegexTimeOut); private static readonly Regex ConnectorAndRegex = - new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags, RegexTimeOut); private static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex2, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex2, RegexFlags, RegexTimeOut); public FrenchTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) @@ -79,7 +83,17 @@ public FrenchTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration conf TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; SingleTimeExtractor = new BaseTimeExtractor(new FrenchTimeExtractorConfiguration(this)); UtilityConfiguration = new FrenchDatetimeUtilityConfiguration(); - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.English.IntegerExtractor.GetInstance(numConfig); + TimeZoneExtractor = new BaseTimeZoneExtractor(new FrenchTimeZoneExtractorConfiguration(this)); } @@ -135,5 +149,7 @@ public bool IsConnectorToken(string text) { return ConnectorAndRegex.IsMatch(text); } + + public List ApplyPotentialPeriodAmbiguityHotfix(string text, List timePeriodErs) => TimePeriodFunctions.ApplyPotentialPeriodAmbiguityHotfix(text, timePeriodErs); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimeZoneExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimeZoneExtractorConfiguration.cs index 3b80821ac7..caf2628dde 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimeZoneExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimeZoneExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Matcher; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchCommonDateTimeParserConfiguration.cs index e4cddd2032..1a38c2007c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchCommonDateTimeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.French; using Microsoft.Recognizers.Text.DateTime.French.Utilities; @@ -26,12 +29,22 @@ public FrenchCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration con WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); - CardinalExtractor = Number.French.CardinalExtractor.GetInstance(); - IntegerExtractor = Number.French.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.French.OrdinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.French.CardinalExtractor.GetInstance(numConfig); + IntegerExtractor = Number.French.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.French.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new FrenchNumberParserConfiguration(numConfig)); - NumberParser = new BaseNumberParser(new FrenchNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); DateExtractor = new BaseDateExtractor(new FrenchDateExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new FrenchHolidayExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new FrenchTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new FrenchDateTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new FrenchDurationExtractorConfiguration(this)); @@ -42,6 +55,7 @@ public FrenchCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration con // DurationParser should be assigned first, as DateParser would reference the DurationParser DurationParser = new BaseDurationParser(new FrenchDurationParserConfiguration(this)); DateParser = new BaseDateParser(new FrenchDateParserConfiguration(this)); + HolidayTimeParser = new BaseHolidayParser(new FrenchHolidayParserConfiguration(this)); TimeParser = new BaseTimeParser(new FrenchTimeParserConfiguration(this)); DateTimeParser = new BaseDateTimeParser(new FrenchDateTimeParserConfiguration(this)); DatePeriodParser = new BaseDatePeriodParser(new FrenchDatePeriodParserConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateParserConfiguration.cs index 76cdc586d3..e842842da5 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -20,6 +24,8 @@ public FrenchDateParserConfiguration(ICommonDateTimeParserConfiguration config) DurationExtractor = config.DurationExtractor; DateExtractor = config.DateExtractor; DurationParser = config.DurationParser; + HolidayParser = new BaseHolidayParser(new FrenchHolidayParserConfiguration(this)); + DateRegexes = new FrenchDateExtractorConfiguration(this).DateRegexList; OnRegex = FrenchDateExtractorConfiguration.OnRegex; SpecialDayRegex = FrenchDateExtractorConfiguration.SpecialDayRegex; @@ -38,12 +44,14 @@ public FrenchDateParserConfiguration(ICommonDateTimeParserConfiguration config) RelativeMonthRegex = FrenchDateExtractorConfiguration.RelativeMonthRegex; StrictRelativeRegex = FrenchDateExtractorConfiguration.StrictRelativeRegex; YearSuffix = FrenchDateExtractorConfiguration.YearSuffix; + BeforeAfterRegex = FrenchDateExtractorConfiguration.BeforeAfterRegex; RelativeWeekDayRegex = FrenchDateExtractorConfiguration.RelativeWeekDayRegex; RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexOptions.Singleline); NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexOptions.Singleline); PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexOptions.Singleline); UpcomingPrefixRegex = new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexOptions.Singleline); PastPrefixRegex = new Regex(DateTimeDefinitions.PastPrefixRegex, RegexOptions.Singleline); + DayOfMonth = config.DayOfMonth; DayOfWeek = config.DayOfWeek; MonthOfYear = config.MonthOfYear; @@ -73,6 +81,8 @@ public FrenchDateParserConfiguration(ICommonDateTimeParserConfiguration config) public IDateTimeParser DurationParser { get; } + public IDateTimeParser HolidayParser { get; } + public IImmutableDictionary UnitMap { get; } public IEnumerable DateRegexes { get; } @@ -123,6 +133,10 @@ public FrenchDateParserConfiguration(ICommonDateTimeParserConfiguration config) public Regex PastPrefixRegex { get; } + public Regex BeforeAfterRegex { get; } + + public Regex TasksModeDurationToDatePatterns { get; } + public IImmutableDictionary DayOfMonth { get; } public IImmutableDictionary DayOfWeek { get; } @@ -151,31 +165,36 @@ public static int GetSwiftDay(string text) var swift = 0; + // @TODO move hardcoded values to resource files + // today - if (trimmedText.Equals("aujourd'hui") || trimmedText.Equals("auj")) + if (trimmedText.Equals("aujourd'hui", StringComparison.Ordinal) || + trimmedText.Equals("auj", StringComparison.Ordinal)) { swift = 0; } - else if (trimmedText.Equals("demain") || trimmedText.Equals("a2m1") || - trimmedText.Equals("lendemain") || trimmedText.Equals("jour suivant")) + else if (trimmedText.Equals("demain", StringComparison.Ordinal) || + trimmedText.Equals("a2m1", StringComparison.Ordinal) || + trimmedText.Equals("lendemain", StringComparison.Ordinal) || + trimmedText.Equals("jour suivant", StringComparison.Ordinal)) { swift = 1; } // yesterday - else if (trimmedText.Equals("hier")) + else if (trimmedText.Equals("hier", StringComparison.Ordinal)) { swift = -1; } - else if (trimmedText.EndsWith("après demain") || // day after tomorrow - trimmedText.EndsWith("après-demain")) + else if (trimmedText.EndsWith("après demain", StringComparison.Ordinal) || // day after tomorrow + trimmedText.EndsWith("après-demain", StringComparison.Ordinal)) { swift = 2; } - else if (trimmedText.StartsWith("avant-hier") || // day before yesterday - trimmedText.StartsWith("avant hier")) + else if (trimmedText.StartsWith("avant-hier", StringComparison.Ordinal) || // day before yesterday + trimmedText.StartsWith("avant hier", StringComparison.Ordinal)) { swift = -2; } // dernier - else if (trimmedText.EndsWith("dernier")) + else if (trimmedText.EndsWith("dernier", StringComparison.Ordinal)) { swift = -1; } @@ -186,13 +205,20 @@ public static int GetSwiftDay(string text) public int GetSwiftMonthOrYear(string text) { var trimmedText = text.Trim(); + var swift = 0; - if (trimmedText.EndsWith("prochaine") || trimmedText.EndsWith("prochain")) + + // @TODO move hardcoded values to resource files + + if (trimmedText.EndsWith("prochaine", StringComparison.Ordinal) || + trimmedText.EndsWith("prochain", StringComparison.Ordinal)) { swift = 1; } - else if (trimmedText.Equals("dernière") || trimmedText.Equals("dernières") || - trimmedText.Equals("derniere") || trimmedText.Equals("dernieres")) + else if (trimmedText.Equals("dernière", StringComparison.Ordinal) || + trimmedText.Equals("dernières", StringComparison.Ordinal) || + trimmedText.Equals("derniere", StringComparison.Ordinal) || + trimmedText.Equals("dernieres", StringComparison.Ordinal)) { swift = -1; } @@ -203,8 +229,13 @@ public int GetSwiftMonthOrYear(string text) public bool IsCardinalLast(string text) { var trimmedText = text.Trim(); - return trimmedText.Equals("dernière") || trimmedText.Equals("dernières") || - trimmedText.Equals("derniere") || trimmedText.Equals("dernieres"); + + // @TODO move hardcoded values to resource files + + return trimmedText.Equals("dernière", StringComparison.Ordinal) || + trimmedText.Equals("dernières", StringComparison.Ordinal) || + trimmedText.Equals("derniere", StringComparison.Ordinal) || + trimmedText.Equals("dernieres", StringComparison.Ordinal); } public string Normalize(string text) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDatePeriodParserConfiguration.cs index 1cb23f4a5c..b8314eba69 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDatePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; @@ -10,25 +14,28 @@ public class FrenchDatePeriodParserConfiguration : BaseDateTimeOptionsConfigurat { // @TODO move to resources - French - relative public static readonly Regex NextPrefixRegex = - new Regex(@"(prochain|prochaine)\b", RegexFlags); + new Regex(@"(prochain|prochaine)\b", RegexFlags, RegexTimeOut); public static readonly Regex PastPrefixRegex = - new Regex(@"(dernier)\b", RegexFlags); + new Regex(@"(dernier)\b", RegexFlags, RegexTimeOut); public static readonly Regex ThisPrefixRegex = - new Regex(@"(ce|cette)\b", RegexFlags); + new Regex(@"(ce|cette)\b", RegexFlags, RegexTimeOut); public static readonly Regex NextSuffixRegex = - new Regex(DateTimeDefinitions.NextSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PastSuffixRegex = - new Regex(DateTimeDefinitions.PastSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PastSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeRegex = - new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRangeRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmbiguousPointRangeRegex = + new Regex(DateTimeDefinitions.AmbiguousPointRangeRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -85,6 +92,10 @@ public FrenchDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration co MoreThanRegex = FrenchDatePeriodExtractorConfiguration.MoreThanRegex; CenturySuffixRegex = FrenchDatePeriodExtractorConfiguration.CenturySuffixRegex; NowRegex = FrenchDatePeriodExtractorConfiguration.NowRegex; + FirstLastRegex = FrenchDatePeriodExtractorConfiguration.FirstLastRegex; + OfYearRegex = FrenchDatePeriodExtractorConfiguration.OfYearRegex; + SpecialDayRegex = FrenchDateExtractorConfiguration.SpecialDayRegex; + TodayNowRegex = new Regex(DateTimeDefinitions.TodayNowRegex, RegexOptions.Singleline); UnitMap = config.UnitMap; CardinalMap = config.CardinalMap; @@ -93,6 +104,7 @@ public FrenchDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration co SeasonMap = config.SeasonMap; SpecialYearPrefixesMap = config.SpecialYearPrefixesMap; WrittenDecades = config.WrittenDecades; + Numbers = config.Numbers; SpecialDecadeCases = config.SpecialDecadeCases; } @@ -194,6 +206,14 @@ public FrenchDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration co public Regex NowRegex { get; } + public Regex SpecialDayRegex { get; } + + public Regex TodayNowRegex { get; } + + public Regex FirstLastRegex { get; } + + public Regex OfYearRegex { get; } + Regex ISimpleDatePeriodParserConfiguration.RelativeRegex => RelativeRegex; Regex IDatePeriodParserConfiguration.NextPrefixRegex => NextPrefixRegex; @@ -204,6 +224,8 @@ public FrenchDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration co Regex IDatePeriodParserConfiguration.UnspecificEndOfRangeRegex => UnspecificEndOfRangeRegex; + Regex IDatePeriodParserConfiguration.AmbiguousPointRangeRegex => AmbiguousPointRangeRegex; + bool IDatePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; public IImmutableDictionary UnitMap { get; } @@ -230,22 +252,23 @@ public int GetSwiftDayOrMonth(string text) { var swift = 0; - var trimmedText = text.Trim().ToLowerInvariant(); + var trimmedText = text.Trim(); - // TODO: Replace with a regex - // TODO: Add 'upcoming' key word + // @TODO move hardcoded values to resources file + // @TODO Add 'upcoming' key word // example: "nous serons ensemble cette fois la semaine prochaine" - "We'll be together this time next week" - if (trimmedText.EndsWith("prochain") || trimmedText.EndsWith("prochaine")) + if (trimmedText.EndsWith("prochain", StringComparison.Ordinal) || + trimmedText.EndsWith("prochaine", StringComparison.Ordinal)) { swift = 1; } - // TODO: Replace with a regex - // example: Je l'ai vue pas plus tard que la semaine derniere - "I saw her only last week" - if (trimmedText.EndsWith("dernière") || trimmedText.EndsWith("dernières") || - trimmedText.EndsWith("derniere") || trimmedText.EndsWith("dernieres")) + if (trimmedText.EndsWith("dernière", StringComparison.Ordinal) || + trimmedText.EndsWith("dernières", StringComparison.Ordinal) || + trimmedText.EndsWith("derniere", StringComparison.Ordinal) || + trimmedText.EndsWith("dernieres", StringComparison.Ordinal)) { swift = -1; } @@ -257,19 +280,25 @@ public int GetSwiftYear(string text) { var swift = -10; - var trimmedText = text.Trim().ToLowerInvariant(); + var trimmedText = text.Trim(); - if (trimmedText.EndsWith("prochain") || trimmedText.EndsWith("prochaine")) + // @TODO move hardcoded values to resources file + + if (trimmedText.EndsWith("prochain", StringComparison.Ordinal) || + trimmedText.EndsWith("prochaine", StringComparison.Ordinal)) { swift = 1; } - if (trimmedText.EndsWith("dernières") || trimmedText.EndsWith("dernière") || - trimmedText.EndsWith("dernieres") || trimmedText.EndsWith("derniere") || trimmedText.EndsWith("dernier")) + if (trimmedText.EndsWith("dernières", StringComparison.Ordinal) || + trimmedText.EndsWith("dernière", StringComparison.Ordinal) || + trimmedText.EndsWith("dernieres", StringComparison.Ordinal) || + trimmedText.EndsWith("derniere", StringComparison.Ordinal) || + trimmedText.EndsWith("dernier", StringComparison.Ordinal)) { swift = -1; } - else if (trimmedText.StartsWith("cette")) + else if (trimmedText.StartsWith("cette", StringComparison.Ordinal)) { swift = 0; } @@ -279,53 +308,63 @@ public int GetSwiftYear(string text) public bool IsFuture(string text) { - var trimmedText = text.Trim().ToLowerInvariant(); - return DateTimeDefinitions.FutureStartTerms.Any(o => trimmedText.StartsWith(o)) || - DateTimeDefinitions.FutureEndTerms.Any(o => trimmedText.EndsWith(o)); + var trimmedText = text.Trim(); + + return DateTimeDefinitions.FutureStartTerms.Any(o => trimmedText.StartsWith(o, StringComparison.Ordinal)) || + DateTimeDefinitions.FutureEndTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); } public bool IsLastCardinal(string text) { - var trimmedText = text.Trim().ToLowerInvariant(); - return DateTimeDefinitions.LastCardinalTerms.Any(o => trimmedText.Equals(o)); + var trimmedText = text.Trim(); + return DateTimeDefinitions.LastCardinalTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } public bool IsMonthOnly(string text) { - var trimmedText = text.Trim().ToLowerInvariant(); - return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o)); + var trimmedText = text.Trim(); + return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); } public bool IsMonthToDate(string text) { - var trimmedText = text.Trim().ToLowerInvariant(); - return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o)); + var trimmedText = text.Trim(); + return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } public bool IsWeekend(string text) { - var trimmedText = text.Trim().ToLowerInvariant(); - return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o)); + var trimmedText = text.Trim(); + return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); } public bool IsWeekOnly(string text) { - var trimmedText = text.Trim().ToLowerInvariant(); - return (DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o)) || - (DateTimeDefinitions.WeekTerms.Any(o => trimmedText.Contains(o)) && (NextSuffixRegex.IsMatch(trimmedText) || PastSuffixRegex.IsMatch(trimmedText)))) && - !DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o)); + var trimmedText = text.Trim(); + + return (DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (DateTimeDefinitions.WeekTerms.Any(o => trimmedText.Contains(o)) && + (NextSuffixRegex.IsMatch(trimmedText) || PastSuffixRegex.IsMatch(trimmedText)))) && + !DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); + } + + public bool IsFortnight(string text) + { + return false; } public bool IsYearOnly(string text) { - var trimmedText = text.Trim().ToLowerInvariant(); - return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o)); + var trimmedText = text.Trim(); + + return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); } public bool IsYearToDate(string text) { - var trimmedText = text.Trim().ToLowerInvariant(); - return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o)); + var trimmedText = text.Trim(); + + return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateTimeAltParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateTimeAltParserConfiguration.cs index ec0005310c..2a8e46d7ae 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateTimeAltParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateTimeAltParserConfiguration.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.DateTime.French +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.DateTime.French { public class FrenchDateTimeAltParserConfiguration : IDateTimeAltParserConfiguration { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateTimeParserConfiguration.cs index 33d3802853..6f76fdb29d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateTimeParserConfiguration.cs @@ -1,21 +1,29 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.French; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.French { public class FrenchDateTimeParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeParserConfiguration { public static readonly Regex AmTimeRegex = - new Regex(DateTimeDefinitions.AMTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.AMTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmTimeRegex = - new Regex(DateTimeDefinitions.PMTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.PMTimeRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + private static readonly Regex AsapTimeRegex = + new Regex(DateTimeDefinitions.AsapTimeRegex, RegexFlags, RegexTimeOut); + public FrenchDateTimeParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -28,6 +36,9 @@ public FrenchDateTimeParserConfiguration(ICommonDateTimeParserConfiguration conf DateParser = config.DateParser; TimeParser = config.TimeParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; + NowRegex = FrenchDateTimeExtractorConfiguration.NowRegex; SimpleTimeOfTodayAfterRegex = FrenchDateTimeExtractorConfiguration.SimpleTimeOfTodayAfterRegex; @@ -102,18 +113,24 @@ public FrenchDateTimeParserConfiguration(ICommonDateTimeParserConfiguration conf public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + // Note: French typically uses 24:00 time, consider removing 12:00 am/pm public int GetHour(string text, int hour) { int result = hour; - var trimmedText = text.Trim().ToLowerInvariant(); + var trimmedText = text.Trim(); + + // @TODO move hardcoded values to resources file - if (trimmedText.EndsWith("matin") && hour >= Constants.HalfDayHourCount) + if (trimmedText.EndsWith("matin", StringComparison.Ordinal) && hour >= Constants.HalfDayHourCount) { result -= Constants.HalfDayHourCount; } - else if (!trimmedText.EndsWith("matin") && hour < Constants.HalfDayHourCount) + else if (!trimmedText.EndsWith("matin", StringComparison.Ordinal) && hour < Constants.HalfDayHourCount) { result += Constants.HalfDayHourCount; } @@ -123,17 +140,21 @@ public int GetHour(string text, int hour) public bool GetMatchedNowTimex(string text, out string timex) { - var trimmedText = text.Trim().ToLowerInvariant(); + var trimmedText = text.Trim(); - if (trimmedText.EndsWith("maintenant")) + // @TODO move hardcoded values to resources file + + if (trimmedText.EndsWith("maintenant", StringComparison.Ordinal)) { timex = "PRESENT_REF"; } - else if (trimmedText.Equals("récemment") || trimmedText.Equals("précédemment") || trimmedText.Equals("auparavant")) + else if (trimmedText.Equals("récemment", StringComparison.Ordinal) || + trimmedText.Equals("précédemment", StringComparison.Ordinal) || + trimmedText.Equals("auparavant", StringComparison.Ordinal)) { timex = "PAST_REF"; } - else if (trimmedText.Equals("dès que possible") || trimmedText.Equals("dqp")) + else if (AsapTimeRegex.IsExactMatch(trimmedText, trim: true)) { timex = "FUTURE_REF"; } @@ -150,15 +171,21 @@ public int GetSwiftDay(string text) { var swift = 0; - var trimmedText = text.Trim().ToLowerInvariant(); + var trimmedText = text.Trim(); + + // @TODO move hardcoded values to resources file - if (trimmedText.StartsWith("prochain") || trimmedText.EndsWith("prochain") || - trimmedText.StartsWith("prochaine") || trimmedText.EndsWith("prochaine")) + if (trimmedText.StartsWith("prochain", StringComparison.Ordinal) || + trimmedText.EndsWith("prochain", StringComparison.Ordinal) || + trimmedText.StartsWith("prochaine", StringComparison.Ordinal) || + trimmedText.EndsWith("prochaine", StringComparison.Ordinal)) { swift = 1; } - else if (trimmedText.StartsWith("dernier") || trimmedText.StartsWith("dernière") || - trimmedText.EndsWith("dernier") || trimmedText.EndsWith("dernière")) + else if (trimmedText.StartsWith("dernier", StringComparison.Ordinal) || + trimmedText.StartsWith("dernière", StringComparison.Ordinal) || + trimmedText.EndsWith("dernier", StringComparison.Ordinal) || + trimmedText.EndsWith("dernière", StringComparison.Ordinal)) { swift = -1; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateTimePeriodParserConfiguration.cs index 75d0c22ee3..1726b626cd 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDateTimePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.French; @@ -8,16 +12,16 @@ namespace Microsoft.Recognizers.Text.DateTime.French public class FrenchDateTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimePeriodParserConfiguration { public static readonly Regex MorningStartEndRegex = - new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfternoonStartEndRegex = - new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex EveningStartEndRegex = - new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex NightStartEndRegex = - new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -25,6 +29,8 @@ public FrenchDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguratio : base(config) { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; + DateExtractor = config.DateExtractor; TimeExtractor = config.TimeExtractor; DateTimeExtractor = config.DateTimeExtractor; @@ -39,7 +45,11 @@ public FrenchDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguratio DateTimeParser = config.DateTimeParser; TimeZoneParser = config.TimeZoneParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; + PureNumberFromToRegex = FrenchTimePeriodExtractorConfiguration.PureNumFromTo; + HyphenDateRegex = FrenchDateTimePeriodExtractorConfiguration.HyphenDateRegex; PureNumberBetweenAndRegex = FrenchTimePeriodExtractorConfiguration.PureNumBetweenAnd; SpecificTimeOfDayRegex = FrenchDateTimeExtractorConfiguration.SpecificTimeOfDayRegex; TimeOfDayRegex = FrenchDateTimeExtractorConfiguration.TimeOfDayRegex; @@ -64,6 +74,8 @@ public FrenchDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguratio public string TokenBeforeDate { get; } + public string TokenBeforeTime { get; } + public IDateExtractor DateExtractor { get; } public IDateTimeExtractor TimeExtractor { get; } @@ -92,6 +104,8 @@ public FrenchDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguratio public Regex PureNumberFromToRegex { get; } + public Regex HyphenDateRegex { get; } + public Regex PureNumberBetweenAndRegex { get; } public Regex SpecificTimeOfDayRegex { get; } @@ -128,65 +142,75 @@ public FrenchDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguratio bool IDateTimePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + Regex IDateTimePeriodParserConfiguration.TasksmodeMealTimeofDayRegex => null; + public IImmutableDictionary UnitMap { get; } public IImmutableDictionary Numbers { get; } - public bool GetMatchedTimeRange(string text, out string timeStr, out int beginHour, out int endHour, out int endMin) + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + + public bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMin) { beginHour = 0; endHour = 0; endMin = 0; - var trimmedText = text.Trim().ToLowerInvariant(); + var trimmedText = text.Trim(); if (MorningStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TMO"; - beginHour = 8; - endHour = Constants.HalfDayHourCount; + todSymbol = Constants.Morning; } else if (AfternoonStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TAF"; - beginHour = Constants.HalfDayHourCount; - endHour = 16; + todSymbol = Constants.Afternoon; } else if (EveningStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TEV"; - beginHour = 16; - endHour = 20; + todSymbol = Constants.Evening; } else if (NightStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TNI"; - beginHour = 20; - endHour = 23; - endMin = 59; + todSymbol = Constants.Night; } else { - timeStr = null; + todSymbol = null; return false; } + var parseResult = TimexUtility.ResolveTimeOfDay(todSymbol); + todSymbol = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMin = parseResult.EndMin; + return true; } // **NOTE: for certain cases, prochain/dernier (next, last) are suffix OR prefix public int GetSwiftPrefix(string text) { - var trimmedText = text.Trim().ToLowerInvariant(); + var trimmedText = text.Trim(); + var swift = 0; - if (trimmedText.StartsWith("prochain") || trimmedText.EndsWith("prochain") || - trimmedText.StartsWith("prochaine") || trimmedText.EndsWith("prochaine")) + // @TODO move hardcoded values to resources file + + if (trimmedText.StartsWith("prochain", StringComparison.Ordinal) || + trimmedText.EndsWith("prochain", StringComparison.Ordinal) || + trimmedText.StartsWith("prochaine", StringComparison.Ordinal) || + trimmedText.EndsWith("prochaine", StringComparison.Ordinal)) { swift = 1; } - else if (trimmedText.StartsWith("derniere") || trimmedText.StartsWith("dernier") || - trimmedText.EndsWith("derniere") || trimmedText.EndsWith("dernier")) + else if (trimmedText.StartsWith("derniere", StringComparison.Ordinal) || + trimmedText.StartsWith("dernier", StringComparison.Ordinal) || + trimmedText.EndsWith("derniere", StringComparison.Ordinal) || + trimmedText.EndsWith("dernier", StringComparison.Ordinal)) { swift = -1; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDurationParserConfiguration.cs index 40a7162b3f..352fa6f90e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDurationParserConfiguration.cs @@ -1,10 +1,19 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.French; namespace Microsoft.Recognizers.Text.DateTime.French { public class FrenchDurationParserConfiguration : BaseDateTimeOptionsConfiguration, IDurationParserConfiguration { + public static readonly Regex PrefixArticleRegex = + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public FrenchDurationParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -32,7 +41,7 @@ public FrenchDurationParserConfiguration(ICommonDateTimeParserConfiguration conf public IExtractor CardinalExtractor { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public IParser NumberParser { get; } @@ -40,6 +49,8 @@ public FrenchDurationParserConfiguration(ICommonDateTimeParserConfiguration conf public Regex AnUnitRegex { get; } + Regex IDurationParserConfiguration.PrefixArticleRegex => PrefixArticleRegex; + public Regex DuringRegex { get; } public Regex AllDateUnitRegex { get; } @@ -60,6 +71,8 @@ public FrenchDurationParserConfiguration(ICommonDateTimeParserConfiguration conf public Regex SpecialNumberUnitRegex { get; } + bool IDurationParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public IImmutableDictionary UnitMap { get; } public IImmutableDictionary UnitValueMap { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchHolidayParserConfiguration.cs index 91d3310ca7..9f31eaadb7 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchHolidayParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchHolidayParserConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; using Microsoft.Recognizers.Definitions.French; @@ -19,18 +22,22 @@ public FrenchHolidayParserConfiguration(IDateTimeOptionsConfiguration config) public override int GetSwiftYear(string text) { var trimmedText = text.Trim(); + var swift = -10; - if (trimmedText.EndsWith("prochain")) + + // @TODO move hardcoded values to resources file + + if (trimmedText.EndsWith("prochain", StringComparison.Ordinal)) { // next - 'l'annee prochain' swift = 1; } - else if (trimmedText.EndsWith("dernier")) + else if (trimmedText.EndsWith("dernier", StringComparison.Ordinal)) { // last - 'l'annee dernier' swift = -1; } - else if (trimmedText.StartsWith("cette")) + else if (trimmedText.StartsWith("cette", StringComparison.Ordinal)) { // this - 'cette annees' swift = 0; @@ -75,21 +82,21 @@ protected override IDictionary> InitHolidayFuncs() { "aprilfools", FoolDay }, { "stgeorgeday", StGeorgeDay }, { "mayday", Mayday }, - { "cincodemayoday", CincoDeMayoday }, + { "cincodemayoday", CincoDeMayoDay }, { "baptisteday", BaptisteDay }, { "usindependenceday", UsaIndependenceDay }, { "independenceday", UsaIndependenceDay }, { "bastilleday", BastilleDay }, { "halloweenday", HalloweenDay }, - { "allhallowday", AllHallowDay }, - { "allsoulsday", AllSoulsday }, + { "allhallowday", AllHallowsDay }, + { "allsoulsday", AllSoulsDay }, { "guyfawkesday", GuyFawkesDay }, - { "veteransday", Veteransday }, + { "veteransday", VeteransDay }, { "christmaseve", ChristmasEve }, { "newyeareve", NewYearEve }, { "fathersday", FathersDay }, { "mothersday", MothersDay }, - { "labourday", LabourDay }, + { "labourday", InternationalWorkersDay }, }; } @@ -133,7 +140,7 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject Mayday(int year) => new DateObject(year, 5, 1); - private static DateObject CincoDeMayoday(int year) => new DateObject(year, 5, 5); + private static DateObject CincoDeMayoDay(int year) => new DateObject(year, 5, 5); private static DateObject BaptisteDay(int year) => new DateObject(year, 6, 24); @@ -143,18 +150,17 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject HalloweenDay(int year) => new DateObject(year, 10, 31); - private static DateObject AllHallowDay(int year) => new DateObject(year, 11, 1); + private static DateObject AllHallowsDay(int year) => new DateObject(year, 11, 1); - private static DateObject AllSoulsday(int year) => new DateObject(year, 11, 2); + private static DateObject AllSoulsDay(int year) => new DateObject(year, 11, 2); private static DateObject GuyFawkesDay(int year) => new DateObject(year, 11, 5); - private static DateObject Veteransday(int year) => new DateObject(year, 11, 11); + private static DateObject VeteransDay(int year) => new DateObject(year, 11, 11); private static new DateObject FathersDay(int year) => new DateObject(year, 6, 17); private static new DateObject MothersDay(int year) => new DateObject(year, 5, 27); - private static new DateObject LabourDay(int year) => new DateObject(year, 5, 1); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchMergedParserConfiguration.cs index fe5ade69e0..43e08fb37e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchMergedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchMergedParserConfiguration.cs @@ -1,5 +1,8 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.French; using Microsoft.Recognizers.Text.Matcher; namespace Microsoft.Recognizers.Text.DateTime.French @@ -16,7 +19,9 @@ public FrenchMergedParserConfiguration(IDateTimeOptionsConfiguration config) EqualRegex = FrenchMergedExtractorConfiguration.EqualRegex; SuffixAfter = FrenchMergedExtractorConfiguration.SuffixAfterRegex; YearRegex = FrenchDatePeriodExtractorConfiguration.YearRegex; + SuperfluousWordMatcher = FrenchMergedExtractorConfiguration.SuperfluousWordMatcher; + SetParser = new BaseSetParser(new FrenchSetParserConfiguration(this)); HolidayParser = new BaseHolidayParser(new FrenchHolidayParserConfiguration(this)); TimeZoneParser = new DummyTimeZoneParser(); @@ -41,5 +46,7 @@ public FrenchMergedParserConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeParser HolidayParser { get; } public StringMatcher SuperfluousWordMatcher { get; } + + bool IMergedParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchSetParserConfiguration.cs index 25b3555c8b..7e71060579 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchSetParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchSetParserConfiguration.cs @@ -1,11 +1,20 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.French; using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime.French { public class FrenchSetParserConfiguration : BaseDateTimeOptionsConfiguration, ISetParserConfiguration { + // pass FutureTerms as List to ReplaceValueInTextWithFutTerm function + private static readonly List ThisTerms = (List)DateTimeDefinitions.FutureStartTerms; + public FrenchSetParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -79,28 +88,35 @@ public FrenchSetParserConfiguration(ICommonDateTimeParserConfiguration config) public bool GetMatchedDailyTimex(string text, out string timex) { var trimmedText = text.Trim(); - if (trimmedText.Equals("quotidien") || trimmedText.Equals("quotidienne") || - trimmedText.Equals("jours") || trimmedText.Equals("journellement")) + + // @TODO move hardcoded values to resources file + + if (trimmedText.Equals("quotidien", StringComparison.Ordinal) || + trimmedText.Equals("quotidienne", StringComparison.Ordinal) || + trimmedText.Equals("jours", StringComparison.Ordinal) || + trimmedText.Equals("journellement", StringComparison.Ordinal)) { // daily timex = "P1D"; } - else if (trimmedText.Equals("hebdomadaire")) + else if (trimmedText.Equals("hebdomadaire", StringComparison.Ordinal)) { // weekly timex = "P1W"; } - else if (trimmedText.Equals("bihebdomadaire")) + else if (trimmedText.Equals("bihebdomadaire", StringComparison.Ordinal)) { // bi weekly timex = "P2W"; } - else if (trimmedText.Equals("mensuel") || trimmedText.Equals("mensuelle")) + else if (trimmedText.Equals("mensuel", StringComparison.Ordinal) || + trimmedText.Equals("mensuelle", StringComparison.Ordinal)) { // monthly timex = "P1M"; } - else if (trimmedText.Equals("annuel") || trimmedText.Equals("annuellement")) + else if (trimmedText.Equals("annuel", StringComparison.Ordinal) || + trimmedText.Equals("annuellement", StringComparison.Ordinal)) { // yearly/annually timex = "P1Y"; @@ -117,19 +133,24 @@ public bool GetMatchedDailyTimex(string text, out string timex) public bool GetMatchedUnitTimex(string text, out string timex) { var trimmedText = text.Trim(); - if (trimmedText.Equals("jour") || trimmedText.Equals("journee")) + + // @TODO move hardcoded values to resources file + + if (trimmedText.Equals("jour", StringComparison.Ordinal) || + trimmedText.Equals("journee", StringComparison.Ordinal)) { timex = "P1D"; } - else if (trimmedText.Equals("semaine")) + else if (trimmedText.Equals("semaine", StringComparison.Ordinal)) { timex = "P1W"; } - else if (trimmedText.Equals("mois")) + else if (trimmedText.Equals("mois", StringComparison.Ordinal)) { timex = "P1M"; } - else if (trimmedText.Equals("an") || trimmedText.Equals("annee")) + else if (trimmedText.Equals("an", StringComparison.Ordinal) || + trimmedText.Equals("annee", StringComparison.Ordinal)) { // year timex = "P1Y"; @@ -144,5 +165,7 @@ public bool GetMatchedUnitTimex(string text, out string timex) } public string WeekDayGroupMatchString(Match match) => SetHandler.WeekDayGroupMatchString(match); + + public string ReplaceValueInTextWithFutTerm(string text, string value) => TasksModeSetHandler.ReplaceValueInTextWithFutTerm(text, value, ThisTerms); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchTimeParserConfiguration.cs index f1c9bee780..63a718603f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchTimeParserConfiguration.cs @@ -1,5 +1,10 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.French; @@ -10,6 +15,21 @@ namespace Microsoft.Recognizers.Text.DateTime.French { public class FrenchTimeParserConfiguration : BaseDateTimeOptionsConfiguration, ITimeParserConfiguration { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex HalfTokenRegex = + new Regex(DateTimeDefinitions.HalfTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex QuarterTokenRegex = + new Regex(DateTimeDefinitions.QuarterTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ThreeQuarterTokenRegex = + new Regex(DateTimeDefinitions.ThreeQuarterTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ToTokenRegex = + new Regex(DateTimeDefinitions.ToTokenRegex, RegexFlags, RegexTimeOut); + public FrenchTimeParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -35,39 +55,38 @@ public FrenchTimeParserConfiguration(ICommonDateTimeParserConfiguration config) public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool hasMin) { - var deltaMin = 0; - var trimmedPrefix = prefix.Trim(); + int deltaMin; + + var trimedPrefix = prefix.Trim(); - // c'este 8 heures et demie, - "it's half past 8" - if (trimmedPrefix.EndsWith("demie")) + if (HalfTokenRegex.IsMatch(trimedPrefix)) { deltaMin = 30; } - else if (trimmedPrefix.EndsWith("un quart") || trimmedPrefix.EndsWith("quart")) + else if (QuarterTokenRegex.IsMatch(trimedPrefix)) { deltaMin = 15; } - else if (trimmedPrefix.EndsWith("trois quarts")) + else if (ThreeQuarterTokenRegex.IsMatch(trimedPrefix)) { deltaMin = 45; } else { - var match = FrenchTimeExtractorConfiguration.LessThanOneHour.Match(trimmedPrefix); + var match = FrenchTimeExtractorConfiguration.LessThanOneHour.Match(trimedPrefix); var minStr = match.Groups["deltamin"].Value; if (!string.IsNullOrWhiteSpace(minStr)) { - deltaMin = int.Parse(minStr); + deltaMin = int.Parse(minStr, CultureInfo.InvariantCulture); } else { - minStr = match.Groups["deltaminnum"].Value.ToLower(); + minStr = match.Groups["deltaminnum"].Value; deltaMin = Numbers[minStr]; } } - // 'to' i.e 'one to five' = 'un à cinq' - if (trimmedPrefix.EndsWith("à")) + if (ToTokenRegex.IsMatch(trimedPrefix)) { deltaMin = -deltaMin; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchTimePeriodParserConfiguration.cs index 5f790acf23..ff1ab8e17c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchTimePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.French; @@ -52,10 +56,11 @@ public FrenchTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration co public IDateTimeUtilityConfiguration UtilityConfiguration { get; } - public bool GetMatchedTimexRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) + public bool GetMatchedTimeRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) { var trimmedText = text.Trim(); - if (trimmedText.EndsWith("s")) + + if (trimmedText.EndsWith("s", StringComparison.Ordinal)) { trimmedText = trimmedText.Substring(0, trimmedText.Length - 1); } @@ -65,23 +70,24 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou endMin = 0; var timeOfDay = string.Empty; - if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o))) + + if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Morning; } - else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Afternoon; } - else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Evening; } - else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o))) + else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o, StringComparison.Ordinal))) { timeOfDay = Constants.Daytime; } - else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Night; } @@ -91,7 +97,7 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou return false; } - var parseResult = TimexUtility.ParseTimeOfDay(timeOfDay); + var parseResult = TimexUtility.ResolveTimeOfDay(timeOfDay); timex = parseResult.Timex; beginHour = parseResult.BeginHour; endHour = parseResult.EndHour; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/TimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/TimeParser.cs index eb6fb9bbd8..7ee5f3ea49 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/TimeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/TimeParser.cs @@ -1,4 +1,10 @@ -using Microsoft.Recognizers.Text.Utilities; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +using Microsoft.Recognizers.Text.Utilities; + using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.French @@ -35,10 +41,10 @@ private DateTimeResolutionResult ParseIsh(string text, DateObject referenceTime) var hour = Constants.HalfDayHourCount; if (!string.IsNullOrEmpty(hourStr)) { - hour = int.Parse(hourStr); + hour = int.Parse(hourStr, CultureInfo.InvariantCulture); } - ret.Timex = "T" + hour.ToString("D2"); + ret.Timex = "T" + hour.ToString("D2", CultureInfo.InvariantCulture); ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day, hour, 0, 0); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Utilities/FrenchDatetimeUtilityConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Utilities/FrenchDatetimeUtilityConfiguration.cs index e58462e433..752f5f4371 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Utilities/FrenchDatetimeUtilityConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Utilities/FrenchDatetimeUtilityConfiguration.cs @@ -1,74 +1,34 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.French; using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime.French.Utilities { - public class FrenchDatetimeUtilityConfiguration : IDateTimeUtilityConfiguration + public class FrenchDatetimeUtilityConfiguration : BaseDatetimeUtilityConfiguration { - public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoPrefixRegex, RegexFlags); - - public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); - - public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); - - public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); - - public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); - - public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); - - public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); - - public static readonly Regex AmPmDescRegex = - new Regex(DateTimeDefinitions.AmPmDescRegex, RegexFlags); - - public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); - - public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); - - public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); - - public static readonly Regex CommonDatePrefixRegex = - new Regex(DateTimeDefinitions.CommonDatePrefixRegex, RegexFlags); - - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - - Regex IDateTimeUtilityConfiguration.LaterRegex => LaterRegex; - - Regex IDateTimeUtilityConfiguration.AgoRegex => AgoRegex; - - Regex IDateTimeUtilityConfiguration.InConnectorRegex => InConnectorRegex; - - Regex IDateTimeUtilityConfiguration.SinceYearSuffixRegex => SinceYearSuffixRegex; - - Regex IDateTimeUtilityConfiguration.WithinNextPrefixRegex => WithinNextPrefixRegex; - - Regex IDateTimeUtilityConfiguration.AmDescRegex => AmDescRegex; - - Regex IDateTimeUtilityConfiguration.PmDescRegex => PmDescRegex; - - Regex IDateTimeUtilityConfiguration.AmPmDescRegex => AmPmDescRegex; - - Regex IDateTimeUtilityConfiguration.RangeUnitRegex => RangeUnitRegex; - - Regex IDateTimeUtilityConfiguration.TimeUnitRegex => TimeUnitRegex; - - Regex IDateTimeUtilityConfiguration.DateUnitRegex => DateUnitRegex; - - Regex IDateTimeUtilityConfiguration.CommonDatePrefixRegex => CommonDatePrefixRegex; - - bool IDateTimeUtilityConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public FrenchDatetimeUtilityConfiguration() + : base( + DateTimeDefinitions.AgoRegex, + DateTimeDefinitions.LaterRegex, + DateTimeDefinitions.InConnectorRegex, + DateTimeDefinitions.SinceYearSuffixRegex, + DateTimeDefinitions.WithinNextPrefixRegex, + DateTimeDefinitions.AmDescRegex, + DateTimeDefinitions.PmDescRegex, + DateTimeDefinitions.AmPmDescRegex, + DateTimeDefinitions.RangeUnitRegex, + DateTimeDefinitions.TimeUnitRegex, + DateTimeDefinitions.DateUnitRegex, + DateTimeDefinitions.CommonDatePrefixRegex, + DateTimeDefinitions.RangePrefixRegex, + RegexOptions.Singleline | RegexOptions.ExplicitCapture, + true) + { + // CheckBothBeforeAfter normally gets its value from DateTimeDefinitions.CheckBothBeforeAfter which however for French is false. + // It only needs to be true here to extract 'ago/later' in prefixes (e.g. 'il y a 30 minutes'). + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateExtractorConfiguration.cs index 125c58bb87..2bcfe915ca 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; @@ -13,73 +16,73 @@ namespace Microsoft.Recognizers.Text.DateTime.German public class GermanDateExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateExtractorConfiguration { public static readonly Regex MonthRegex = - new Regex(DateTimeDefinitions.MonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SingleWeekDayRegex = - new Regex(DateTimeDefinitions.SingleWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SingleWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex OnRegex = - new Regex(DateTimeDefinitions.OnRegex, RegexFlags); + new Regex(DateTimeDefinitions.OnRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelaxedOnRegex = - new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisRegex = - new Regex(DateTimeDefinitions.ThisRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisRegex, RegexFlags, RegexTimeOut); public static readonly Regex LastDateRegex = - new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextDateRegex = - new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDayRegex = - new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDayWithNumRegex = - new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayOfMonthRegex = - new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeWeekDayRegex = - new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDate = - new Regex(DateTimeDefinitions.SpecialDate, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDate, RegexFlags, RegexTimeOut); public static readonly Regex ForTheRegex = - new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags); + new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayOfMothRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex StrictRelativeRegex = - new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixArticleRegex = - new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] ImplicitDateList = { @@ -88,37 +91,37 @@ public class GermanDateExtractorConfiguration : BaseDateTimeOptionsConfiguration }; public static readonly Regex OfMonth = - new Regex(DateTimeDefinitions.OfMonth, RegexFlags); + new Regex(DateTimeDefinitions.OfMonth, RegexFlags, RegexTimeOut); public static readonly Regex MonthEnd = - new Regex(DateTimeDefinitions.MonthEnd, RegexFlags); + new Regex(DateTimeDefinitions.MonthEnd, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayEnd = - new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayStart = - new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeConnectorSymbolRegex = - new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags); + new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags, RegexTimeOut); public static readonly ImmutableDictionary DayOfWeek = DateTimeDefinitions.DayOfWeek.ToImmutableDictionary(); @@ -126,59 +129,76 @@ public class GermanDateExtractorConfiguration : BaseDateTimeOptionsConfiguration public static readonly ImmutableDictionary MonthOfYear = DateTimeDefinitions.MonthOfYear.ToImmutableDictionary(); + public static readonly Regex BeforeAfterRegex = + new Regex(DateTimeDefinitions.BeforeAfterRegex, RegexFlags, RegexTimeOut); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public GermanDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.German.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.German.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new GermanNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.German.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.German.OrdinalExtractor.GetInstance(numConfig); + NumberParser = new BaseNumberParser(new GermanNumberParserConfiguration(new BaseNumberOptionsConfiguration(numConfig))); + DurationExtractor = new BaseDurationExtractor(new GermanDurationExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new GermanHolidayExtractorConfiguration(this)); UtilityConfiguration = new GermanDatetimeUtilityConfiguration(); // 3-23-2017 - var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags); + var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags, RegexTimeOut); // 23-3-2015 - var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags); + var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags, RegexTimeOut); // am 1.3 - var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags); + var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags, RegexTimeOut); // am 24-12 - var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags); + var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags, RegexTimeOut); // 7/23 - var dateRegex7 = new Regex(DateTimeDefinitions.DateExtractor7, RegexFlags); + var dateRegex7 = new Regex(DateTimeDefinitions.DateExtractor7, RegexFlags, RegexTimeOut); // 23/7 - var dateRegex9 = new Regex(DateTimeDefinitions.DateExtractor9, RegexFlags); + var dateRegex9 = new Regex(DateTimeDefinitions.DateExtractor9, RegexFlags, RegexTimeOut); // Nächstes Jahr (im Sommer)? - var dateRegex10 = new Regex(DateTimeDefinitions.DateExtractor10, RegexFlags); + var dateRegex10 = new Regex(DateTimeDefinitions.DateExtractor10, RegexFlags, RegexTimeOut); + + // (Sonntag,)? 23. siebter (2016)? + var dateRegex11 = new Regex(DateTimeDefinitions.DateExtractor11, RegexFlags, RegexTimeOut); // 2015-12-23 - var dateRegexA = new Regex(DateTimeDefinitions.DateExtractorA, RegexFlags); + var dateRegexA = new Regex(DateTimeDefinitions.DateExtractorA, RegexFlags, RegexTimeOut); DateRegexList = new List { // (Sonntag,)? 5. April - new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags, RegexTimeOut), // (Sonntag,)? 5. April, 2016 - new Regex(DateTimeDefinitions.DateExtractor2, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor2, RegexFlags, RegexTimeOut), // (Sonntag,)? der 6. April, 2016 - new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags, RegexTimeOut), }; var enableDmy = DmyDateFormat || DateTimeDefinitions.DefaultLanguageFallback == Constants.DefaultLanguageFallback_DMY; DateRegexList = DateRegexList.Concat(enableDmy ? - new[] { dateRegex5, dateRegex8, dateRegex9, dateRegex4, dateRegex6, dateRegex7, dateRegex10, dateRegexA } : - new[] { dateRegex4, dateRegex6, dateRegex7, dateRegex5, dateRegex8, dateRegex9, dateRegex10, dateRegexA }); + new[] { dateRegex5, dateRegex8, dateRegex9, dateRegex4, dateRegex6, dateRegex7, dateRegex10, dateRegex11, dateRegexA } : + new[] { dateRegex4, dateRegex6, dateRegex7, dateRegex5, dateRegex8, dateRegex9, dateRegex10, dateRegex11, dateRegexA }); } public IEnumerable DateRegexList { get; } @@ -191,6 +211,8 @@ public GermanDateExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DurationExtractor { get; } + public IDateTimeExtractor HolidayExtractor { get; } + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } Regex IDateExtractorConfiguration.PrefixArticleRegex => PrefixArticleRegex; @@ -238,5 +260,7 @@ public GermanDateExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IDateExtractorConfiguration.RangeUnitRegex => RangeUnitRegex; Regex IDateExtractorConfiguration.RangeConnectorSymbolRegex => RangeConnectorSymbolRegex; + + Regex IDateExtractorConfiguration.BeforeAfterRegex => BeforeAfterRegex; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDatePeriodExtractorConfiguration.cs index fe02ea252e..c4755510fb 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDatePeriodExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; @@ -13,161 +16,173 @@ public class GermanDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfigu { // base regexes public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeConnectorRegex = - new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex IllegalYearRegex = - new Regex(BaseDateTime.IllegalYearRegex, RegexFlags); + new Regex(BaseDateTime.IllegalYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WrittenMonthRegex = - new Regex(DateTimeDefinitions.WrittenMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WrittenMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthSuffixRegex = - new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex FutureSuffixRegex = - new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags, RegexTimeOut); // composite regexes public static readonly Regex SimpleCasesRegex = - new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthFrontSimpleCasesRegex = - new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthFrontBetweenRegex = - new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags, RegexTimeOut); public static readonly Regex BetweenRegex = - new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthWithYear = - new Regex(DateTimeDefinitions.MonthWithYear, RegexFlags); + new Regex(DateTimeDefinitions.MonthWithYear, RegexFlags, RegexTimeOut); public static readonly Regex OneWordPeriodRegex = - new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumWithYear = - new Regex(DateTimeDefinitions.MonthNumWithYear, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumWithYear, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfMonthRegex = - new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfYearRegex = - new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex FollowedDateUnit = - new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithDateUnit = - new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags, RegexTimeOut); public static readonly Regex QuarterRegex = - new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags, RegexTimeOut); public static readonly Regex QuarterRegexYearFront = - new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags, RegexTimeOut); public static readonly Regex AllHalfYearRegex = - new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex SeasonRegex = - new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags); + new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags, RegexTimeOut); public static readonly Regex WhichWeekRegex = - new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags); + new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfRegex = - new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthOfRegex = - new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RestOfDateRegex = - new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterEarlyPeriodRegex = - new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekWithWeekDayRangeRegex = - new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPlusNumberRegex = - new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex DecadeWithCenturyRegex = - new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags); + new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPeriodRegex = - new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex ComplexDatePeriodRegex = - new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDecadeRegex = - new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags, RegexTimeOut); public static readonly Regex ReferenceDatePeriodRegex = - new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoRegex, RegexFlags); + new Regex(DateTimeDefinitions.AgoRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterRegex, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex CenturySuffixRegex = - new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FirstLastRegex = + new Regex(DateTimeDefinitions.FirstLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OfYearRegex = + new Regex(DateTimeDefinitions.OfYearRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + private static readonly Regex FromTokenRegex = + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex BetweenTokenRegex = + new Regex(DateTimeDefinitions.BetweenTokenRegex, RegexFlags, RegexTimeOut); + private static readonly Regex[] SimpleCasesRegexes = { SimpleCasesRegex, @@ -198,10 +213,20 @@ public GermanDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration conf : base(config) { DatePointExtractor = new BaseDateExtractor(new GermanDateExtractorConfiguration(this)); - CardinalExtractor = Number.German.CardinalExtractor.GetInstance(); - OrdinalExtractor = Number.German.OrdinalExtractor.GetInstance(); DurationExtractor = new BaseDurationExtractor(new GermanDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new GermanNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.German.CardinalExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.German.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new GermanNumberParserConfiguration(numConfig)); } public IDateExtractor DatePointExtractor { get; } @@ -268,6 +293,10 @@ public GermanDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration conf Regex IDatePeriodExtractorConfiguration.NowRegex => NowRegex; + Regex IDatePeriodExtractorConfiguration.FirstLastRegex => FirstLastRegex; + + Regex IDatePeriodExtractorConfiguration.OfYearRegex => OfYearRegex; + bool IDatePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; string[] IDatePeriodExtractorConfiguration.DurationDateRestrictions => DateTimeDefinitions.DurationDateRestrictions; @@ -275,25 +304,25 @@ public GermanDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration conf public bool GetFromTokenIndex(string text, out int index) { index = -1; - if (text.EndsWith("vom")) + var fromMatch = FromTokenRegex.Match(text); + if (fromMatch.Success) { - index = text.LastIndexOf("vom", StringComparison.Ordinal); - return true; + index = fromMatch.Index; } - return false; + return fromMatch.Success; } public bool GetBetweenTokenIndex(string text, out int index) { index = -1; - if (text.EndsWith("zwischen")) + var betweenMatch = BetweenTokenRegex.Match(text); + if (betweenMatch.Success) { - index = text.LastIndexOf("zwischen", StringComparison.Ordinal); - return true; + index = betweenMatch.Index; } - return false; + return betweenMatch.Success; } public bool HasConnectorToken(string text) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateTimeAltExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateTimeAltExtractorConfiguration.cs index d8b8999d41..79273bbb40 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateTimeAltExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateTimeAltExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.German; @@ -7,22 +10,22 @@ namespace Microsoft.Recognizers.Text.DateTime.German public class GermanDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeAltExtractorConfiguration { public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangePrefixRegex = - new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] RelativePrefixList = { @@ -37,10 +40,10 @@ public class GermanDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfig private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex OrRegex = - new Regex(DateTimeDefinitions.OrRegex, RegexFlags); + new Regex(DateTimeDefinitions.OrRegex, RegexFlags, RegexTimeOut); private static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public GermanDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateTimeExtractorConfiguration.cs index 3a351bc356..d0d1ee3a6e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateTimeExtractorConfiguration.cs @@ -1,76 +1,92 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.German; using Microsoft.Recognizers.Text.DateTime.German.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.German { public class GermanDateTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeExtractorConfiguration { public static readonly Regex PrepositionRegex = - new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfDayRegex = - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificEndOfRegex = - new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectorRegex = - new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex NumberAsTimeRegex = - new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateNumberConnectorRegex = - new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public GermanDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.German.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.German.IntegerExtractor.GetInstance(numConfig); + DatePointExtractor = new BaseDateExtractor(new GermanDateExtractorConfiguration(this)); TimePointExtractor = new BaseTimeExtractor(new GermanTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new GermanDurationExtractorConfiguration(this)); UtilityConfiguration = new GermanDatetimeUtilityConfiguration(); + HolidayExtractor = new BaseHolidayExtractor(new GermanHolidayExtractorConfiguration(this)); + } public IExtractor IntegerExtractor { get; } @@ -81,6 +97,8 @@ public GermanDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + public IDateTimeExtractor HolidayExtractor { get; } + Regex IDateTimeExtractorConfiguration.NowRegex => NowRegex; Regex IDateTimeExtractorConfiguration.SuffixRegex => SuffixRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateTimePeriodExtractorConfiguration.cs index ce187685b6..9d3b410d9d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateTimePeriodExtractorConfiguration.cs @@ -1,7 +1,12 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.German; +using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.German @@ -10,40 +15,43 @@ public class GermanDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsCon IDateTimePeriodExtractorConfiguration { public static readonly Regex TimeNumberCombinedWithUnit = - new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex HyphenDateRegex = + new Regex(BaseDateTime.HyphenDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodTimeOfDayWithDateRegex = - new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeTimeUnitRegex = - new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RestOfDateTimeRegex = - new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixDayRegex = new Regex(DateTimeDefinitions.PrefixDayRegex, RegexFlags | RegexOptions.RightToLeft); public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -54,35 +62,55 @@ public class GermanDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsCon }; private static readonly Regex PeriodTimeOfDayRegex = - new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags, RegexTimeOut); private static readonly Regex PeriodSpecificTimeOfDayRegex = new Regex(DateTimeDefinitions.PeriodSpecificTimeOfDayRegex, RegexOptions.Singleline); private static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); private static readonly Regex TimeFollowedUnit = - new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); private static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); private static readonly Regex MiddlePauseRegex = - new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags); + new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex FromTokenRegex = + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex BetweenTokenRegex = + new Regex(DateTimeDefinitions.BetweenTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); public GermanDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; - CardinalExtractor = Number.German.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.German.CardinalExtractor.GetInstance(numConfig); + SingleDateExtractor = new BaseDateExtractor(new GermanDateExtractorConfiguration(this)); SingleTimeExtractor = new BaseTimeExtractor(new GermanTimeExtractorConfiguration(this)); SingleDateTimeExtractor = new BaseDateTimeExtractor(new GermanDateTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new GermanDurationExtractorConfiguration(this)); TimePeriodExtractor = new BaseTimePeriodExtractor(new GermanTimePeriodExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new GermanTimeZoneExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new GermanHolidayExtractorConfiguration(this)); + } public IEnumerable SimpleCasesRegex => SimpleCases; @@ -139,6 +167,8 @@ public GermanDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration Regex IDateTimePeriodExtractorConfiguration.AfterRegex => AfterRegex; + Regex IDateTimePeriodExtractorConfiguration.TasksmodeMealTimeofDayRegex => null; + public string TokenBeforeDate { get; } public IExtractor CardinalExtractor { get; } @@ -155,36 +185,35 @@ public GermanDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration public IDateTimeExtractor TimeZoneExtractor { get; } - // TODO: these three methods are the same in DatePeriod, should be abstracted + public IDateTimeExtractor HolidayExtractor { get; } + public bool GetFromTokenIndex(string text, out int index) { index = -1; - if (text.EndsWith("vom")) + var fromMatch = FromTokenRegex.Match(text); + if (fromMatch.Success) { - index = text.LastIndexOf("vom", StringComparison.Ordinal); - return true; + index = fromMatch.Index; } - return false; + return fromMatch.Success; } public bool GetBetweenTokenIndex(string text, out int index) { index = -1; - if (text.EndsWith("zwischen")) + var betweenMatch = BetweenTokenRegex.Match(text); + if (betweenMatch.Success) { - index = text.LastIndexOf("zwischen", StringComparison.Ordinal); - return true; + index = betweenMatch.Index; } - return false; + return betweenMatch.Success; } public bool HasConnectorToken(string text) { - var rangeConnectorRegex = new Regex(DateTimeDefinitions.RangeConnectorRegex); - - return rangeConnectorRegex.IsExactMatch(text, trim: true); + return RangeConnectorRegex.IsExactMatch(text, trim: true); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDurationExtractorConfiguration.cs index 015bf9bb93..2be2c979e6 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDurationExtractorConfiguration.cs @@ -1,65 +1,86 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.German; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.German { public class GermanDurationExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDurationExtractorConfiguration { public static readonly Regex DurationUnitRegex = - new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAndRegex = - new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationFollowedUnit = - new Regex(DateTimeDefinitions.DurationFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.DurationFollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithDurationUnit = - new Regex(DateTimeDefinitions.NumberCombinedWithDurationUnit, RegexFlags); + new Regex(DateTimeDefinitions.NumberCombinedWithDurationUnit, RegexFlags, RegexTimeOut); public static readonly Regex AnUnitRegex = - new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DuringRegex = - new Regex(DateTimeDefinitions.DuringRegex, RegexFlags); + new Regex(DateTimeDefinitions.DuringRegex, RegexFlags, RegexTimeOut); public static readonly Regex AllRegex = - new Regex(DateTimeDefinitions.AllRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllRegex, RegexFlags, RegexTimeOut); public static readonly Regex HalfRegex = - new Regex(DateTimeDefinitions.HalfRegex, RegexFlags); + new Regex(DateTimeDefinitions.HalfRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConjunctionRegex = - new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags, RegexTimeOut); public static readonly Regex InexactNumberRegex = - new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex InexactNumberUnitRegex = - new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDurationUnitRegex = - new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationConnectorRegex = - new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModPrefixRegex = + new Regex(DateTimeDefinitions.ModPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModSuffixRegex = + new Regex(DateTimeDefinitions.ModSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialNumberUnitRegex = - new Regex(DateTimeDefinitions.SpecialNumberUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialNumberUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public GermanDurationExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - CardinalExtractor = Number.German.CardinalExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.German.NumberExtractor.GetInstance(numConfig); + UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); } @@ -103,5 +124,11 @@ public GermanDurationExtractorConfiguration(IDateTimeOptionsConfiguration config Regex IDurationExtractorConfiguration.MoreThanRegex => MoreThanRegex; Regex IDurationExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex; + + Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex; + + public Dictionary AmbiguityFiltersDict => null; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanHolidayExtractorConfiguration.cs index e663849b85..aa469779e7 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanHolidayExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanHolidayExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.German; @@ -7,16 +10,16 @@ namespace Microsoft.Recognizers.Text.DateTime.German public class GermanHolidayExtractorConfiguration : BaseDateTimeOptionsConfiguration, IHolidayExtractorConfiguration { public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex H1 = - new Regex(DateTimeDefinitions.HolidayRegex1, RegexFlags); + new Regex(DateTimeDefinitions.HolidayRegex1, RegexFlags, RegexTimeOut); public static readonly Regex H2 = - new Regex(DateTimeDefinitions.HolidayRegex2, RegexFlags); + new Regex(DateTimeDefinitions.HolidayRegex2, RegexFlags, RegexTimeOut); public static readonly Regex H3 = - new Regex(DateTimeDefinitions.HolidayRegex3, RegexFlags); + new Regex(DateTimeDefinitions.HolidayRegex3, RegexFlags, RegexTimeOut); public static readonly Regex[] HolidayRegexList = { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanMergedExtractorConfiguration.cs index 74f34266c1..329c334b79 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanMergedExtractorConfiguration.cs @@ -1,56 +1,63 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.German; using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Matcher; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.German { public class GermanMergedExtractorConfiguration : BaseDateTimeOptionsConfiguration, IMergedExtractorConfiguration { public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex SinceRegex = - new Regex(DateTimeDefinitions.SinceRegex, RegexFlags); + new Regex(DateTimeDefinitions.SinceRegex, RegexFlags, RegexTimeOut); public static readonly Regex AroundRegex = - new Regex(DateTimeDefinitions.AroundRegex, RegexFlags); + new Regex(DateTimeDefinitions.AroundRegex, RegexFlags, RegexTimeOut); public static readonly Regex EqualRegex = - new Regex(BaseDateTime.EqualRegex, RegexFlags); + new Regex(BaseDateTime.EqualRegex, RegexFlags, RegexTimeOut); public static readonly Regex FromToRegex = - new Regex(DateTimeDefinitions.FromToRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromToRegex, RegexFlags, RegexTimeOut); public static readonly Regex SingleAmbiguousMonthRegex = - new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrepositionSuffixRegex = - new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmbiguousRangeModifierPrefix = - new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags); + new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags, RegexTimeOut); public static readonly Regex NumberEndingPattern = - new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags); + new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificDatePeriodRegex = - new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); public static readonly Regex[] TermFilterRegexes = { // one on one - new Regex(DateTimeDefinitions.OneOnOneRegex, RegexFlags), + new Regex(DateTimeDefinitions.OneOnOneRegex, RegexFlags, RegexTimeOut), }; private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -68,9 +75,19 @@ public GermanMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) SetExtractor = new BaseSetExtractor(new GermanSetExtractorConfiguration(this)); HolidayExtractor = new BaseHolidayExtractor(new GermanHolidayExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new GermanTimeZoneExtractorConfiguration(this)); - IntegerExtractor = Number.German.IntegerExtractor.GetInstance(); + DateTimeAltExtractor = new BaseDateTimeAltExtractor(new GermanDateTimeAltExtractorConfiguration(this)); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.German.IntegerExtractor.GetInstance(numConfig); + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); } @@ -128,10 +145,16 @@ public GermanMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + public Regex FailFastRegex { get; } = null; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; StringMatcher IMergedExtractorConfiguration.SuperfluousWordMatcher => SuperfluousWordMatcher; + + bool IMergedExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public Regex TasksModeMentionFilters { get; } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanSetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanSetExtractorConfiguration.cs index ff77f56d6a..e333408b28 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanSetExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanSetExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.German; @@ -9,31 +12,31 @@ namespace Microsoft.Recognizers.Text.DateTime.German public class GermanSetExtractorConfiguration : BaseDateTimeOptionsConfiguration, ISetExtractorConfiguration { public static readonly Regex SetUnitRegex = - new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodicRegex = - new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachUnitRegex = - new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachPrefixRegex = - new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetLastRegex = - new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachDayRegex = - new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex BeforeEachDayRegex = - new Regex(DateTimeDefinitions.BeforeEachDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeEachDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetWeekDayRegex = - new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetEachRegex = - new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -63,6 +66,8 @@ public GermanSetExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DateTimePeriodExtractor { get; } + bool ISetExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + Regex ISetExtractorConfiguration.LastRegex => SetLastRegex; Regex ISetExtractorConfiguration.EachPrefixRegex => EachPrefixRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanTimeExtractorConfiguration.cs index e40c3a37f5..0e8ce2007c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanTimeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.German; @@ -10,110 +13,110 @@ public class GermanTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration // part 1: smallest component // -------------------------------------- public static readonly Regex DescRegex = - new Regex(DateTimeDefinitions.DescRegex, RegexFlags); + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); public static readonly Regex HourNumRegex = - new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex MinuteNumRegex = - new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags, RegexTimeOut); // part 2: middle level component // -------------------------------------- // handle "... o'clock" public static readonly Regex OclockRegex = - new Regex(DateTimeDefinitions.OclockRegex, RegexFlags); + new Regex(DateTimeDefinitions.OclockRegex, RegexFlags, RegexTimeOut); // handle "... afternoon" public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); // handle "... in the morning" public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); // handle "half past ..." "a quarter to ..." // rename 'min' group to 'deltamin' public static readonly Regex LessThanOneHour = - new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags); + new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags, RegexTimeOut); // handle "six thirty", "six twenty one" public static readonly Regex WrittenTimeRegex = - new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimePrefix = - new Regex(DateTimeDefinitions.TimePrefix, RegexFlags); + new Regex(DateTimeDefinitions.TimePrefix, RegexFlags, RegexTimeOut); public static readonly Regex TimeSuffix = - new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags); + new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags, RegexTimeOut); public static readonly Regex BasicTime = - new Regex(DateTimeDefinitions.BasicTime, RegexFlags); + new Regex(DateTimeDefinitions.BasicTime, RegexFlags, RegexTimeOut); // handle special time such as 'at midnight', 'midnight', 'midday' public static readonly Regex MidnightRegex = - new Regex(DateTimeDefinitions.MidnightRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidnightRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidmorningRegex = - new Regex(DateTimeDefinitions.MidmorningRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidmorningRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidafternoonRegex = - new Regex(DateTimeDefinitions.MidafternoonRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidafternoonRegex, RegexFlags, RegexTimeOut); public static readonly Regex MiddayRegex = - new Regex(DateTimeDefinitions.MiddayRegex, RegexFlags); + new Regex(DateTimeDefinitions.MiddayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidTimeRegex = - new Regex(DateTimeDefinitions.MidTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidTimeRegex, RegexFlags, RegexTimeOut); // part 3: regex for time // -------------------------------------- // handle "at four" "at 3" public static readonly Regex AtRegex = - new Regex(DateTimeDefinitions.AtRegex, RegexFlags); + new Regex(DateTimeDefinitions.AtRegex, RegexFlags, RegexTimeOut); public static readonly Regex IshRegex = - new Regex(DateTimeDefinitions.IshRegex, RegexFlags); + new Regex(DateTimeDefinitions.IshRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectNumRegex = - new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeBeforeAfterRegex = - new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] TimeRegexList = { // (three min past)? seven|7|(senven thirty) pm - new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags, RegexTimeOut), // (three min past)? 3:00(:00)? (pm)? - new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags, RegexTimeOut), // (three min past)? 3.00 (pm) - new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags, RegexTimeOut), // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) - new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags, RegexTimeOut), // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags, RegexTimeOut), // (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) - new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags, RegexTimeOut), // (in the night) at (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags, RegexTimeOut), // (in the night) (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex8, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex8, RegexFlags, RegexTimeOut), - new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags, RegexTimeOut), // (three min past)? 3h00 (pm)? - new Regex(DateTimeDefinitions.TimeRegex10, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex10, RegexFlags, RegexTimeOut), // 340pm ConnectNumRegex, @@ -139,5 +142,9 @@ public GermanTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DurationExtractor { get; } public IDateTimeExtractor TimeZoneExtractor { get; } + + public string TimeTokenPrefix => DateTimeDefinitions.TimeTokenPrefix; + + public Dictionary AmbiguityFiltersDict => null; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanTimePeriodExtractorConfiguration.cs index b2b07656ce..94b6c16dc8 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanTimePeriodExtractorConfiguration.cs @@ -1,64 +1,72 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.German; using Microsoft.Recognizers.Text.DateTime.German.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.German { public class GermanTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, ITimePeriodExtractorConfiguration { public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); public static readonly Regex HourRegex = - new Regex(DateTimeDefinitions.HourRegex, RegexFlags); + new Regex(DateTimeDefinitions.HourRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodHourNumRegex = - new Regex(DateTimeDefinitions.PeriodHourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodHourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodDescRegex = - new Regex(DateTimeDefinitions.DescRegex, RegexFlags); + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); public static readonly Regex PureNumFromTo = - new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags); + new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags, RegexTimeOut); public static readonly Regex PureNumBetweenAnd = - new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeFromTo = - new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeBetweenAnd = - new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex PrepositionRegex = - new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfDayRegex = - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeFollowedUnit = - new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex TimeNumberCombinedWithUnit = - new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); public static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmbiguousTimePeriodRegex = + new Regex(DateTimeDefinitions.AmbiguousTimePeriodRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -68,7 +76,17 @@ public GermanTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration conf TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; SingleTimeExtractor = new BaseTimeExtractor(new GermanTimeExtractorConfiguration(this)); UtilityConfiguration = new GermanDatetimeUtilityConfiguration(); - IntegerExtractor = Number.German.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.German.IntegerExtractor.GetInstance(numConfig); + TimeZoneExtractor = new BaseTimeZoneExtractor(new GermanTimeZoneExtractorConfiguration(this)); } @@ -94,10 +112,12 @@ public GermanTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration conf Regex ITimePeriodExtractorConfiguration.GeneralEndingRegex => GeneralEndingRegex; + // @TODO move hardcoded strings to YAML file public bool GetFromTokenIndex(string text, out int index) { index = -1; - if (text.EndsWith("von")) + + if (text.EndsWith("von", StringComparison.Ordinal)) { index = text.LastIndexOf("von", StringComparison.Ordinal); return true; @@ -109,7 +129,8 @@ public bool GetFromTokenIndex(string text, out int index) public bool GetBetweenTokenIndex(string text, out int index) { index = -1; - if (text.EndsWith("zwischen")) + + if (text.EndsWith("zwischen", StringComparison.Ordinal)) { index = text.LastIndexOf("zwischen", StringComparison.Ordinal); return true; @@ -120,7 +141,37 @@ public bool GetBetweenTokenIndex(string text, out int index) public bool IsConnectorToken(string text) { - return text.Equals("und"); + return text.Equals("und", StringComparison.Ordinal); + } + + // For German there is a problem with cases like "Morgen Abend" which is parsed as "Morning Evening" as "Morgen" can mean both "tomorrow" and "morning". + // When the extractor extracts "Abend" in this example it will take the string before that to look for a relative shift to another day like "yesterday", "tomorrow" etc. + // When trying to do this on the string "morgen" it will be extracted as a time period ("morning") by the TimePeriodExtractor, and not as "tomorrow". + // Filtering out the string "morgen" from the TimePeriodExtractor will fix the problem as only in the case where "morgen" is NOT a time period the string "morgen" will be passed to this extractor. + // It should also be solvable through the config but we do not want to introduce changes to the interface and configs for all other languages. + public List ApplyPotentialPeriodAmbiguityHotfix(string text, List timePeriodErs) + { + List timePeriodErsResult = new List(); + var matches = AmbiguousTimePeriodRegex.Matches(text); + foreach (var timePeriodEr in timePeriodErs) + { + if (matches.Count > 0) + { + foreach (Match match in matches) + { + if (!(timePeriodEr.Text == match.Value && timePeriodEr.Start == match.Index && timePeriodEr.Length == match.Length)) + { + timePeriodErsResult.Add(timePeriodEr); + } + } + } + else + { + timePeriodErsResult.Add(timePeriodEr); + } + } + + return timePeriodErsResult; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanTimeZoneExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanTimeZoneExtractorConfiguration.cs index 6c73594872..4745e1bbbe 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanTimeZoneExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanTimeZoneExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Matcher; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanCommonDateTimeParserConfiguration.cs index 435f41cce2..b9ea3ba81b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanCommonDateTimeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.German; using Microsoft.Recognizers.Text.DateTime.German.Utilities; @@ -26,12 +29,22 @@ public GermanCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration con WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); - CardinalExtractor = Number.German.CardinalExtractor.GetInstance(); - IntegerExtractor = Number.German.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.German.OrdinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.German.CardinalExtractor.GetInstance(numConfig); + IntegerExtractor = Number.German.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.German.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new GermanNumberParserConfiguration(numConfig)); - NumberParser = new BaseNumberParser(new GermanNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); DateExtractor = new BaseDateExtractor(new GermanDateExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new GermanHolidayExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new GermanTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new GermanDateTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new GermanDurationExtractorConfiguration(this)); @@ -40,6 +53,7 @@ public GermanCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration con DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new GermanDateTimePeriodExtractorConfiguration(this)); DurationParser = new BaseDurationParser(new GermanDurationParserConfiguration(this)); DateParser = new BaseDateParser(new GermanDateParserConfiguration(this)); + HolidayTimeParser = new HolidayParserGer(new GermanHolidayParserConfiguration(this)); TimeParser = new TimeParser(new GermanTimeParserConfiguration(this)); DateTimeParser = new BaseDateTimeParser(new GermanDateTimeParserConfiguration(this)); DatePeriodParser = new BaseDatePeriodParser(new GermanDatePeriodParserConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDateParserConfiguration.cs index e6c5d1bb99..bfc75cd432 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDateParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDateParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.German; @@ -19,6 +23,8 @@ public GermanDateParserConfiguration(ICommonDateTimeParserConfiguration config) DurationExtractor = config.DurationExtractor; DateExtractor = config.DateExtractor; DurationParser = config.DurationParser; + HolidayParser = new BaseHolidayParser(new GermanHolidayParserConfiguration(this)); + DateRegexes = new GermanDateExtractorConfiguration(this).DateRegexList; OnRegex = GermanDateExtractorConfiguration.OnRegex; SpecialDayRegex = GermanDateExtractorConfiguration.SpecialDayRegex; @@ -36,12 +42,15 @@ public GermanDateParserConfiguration(ICommonDateTimeParserConfiguration config) RelativeMonthRegex = GermanDateExtractorConfiguration.RelativeMonthRegex; StrictRelativeRegex = GermanDateExtractorConfiguration.StrictRelativeRegex; YearSuffix = GermanDateExtractorConfiguration.YearSuffix; + BeforeAfterRegex = GermanDateExtractorConfiguration.BeforeAfterRegex; RelativeWeekDayRegex = GermanDateExtractorConfiguration.RelativeWeekDayRegex; RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexOptions.Singleline); NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexOptions.Singleline); + AfterNextPrefixRegex = new Regex(DateTimeDefinitions.AfterNextPrefixRegex, RegexOptions.Singleline); PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexOptions.Singleline); UpcomingPrefixRegex = new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexOptions.Singleline); PastPrefixRegex = new Regex(DateTimeDefinitions.PastPrefixRegex, RegexOptions.Singleline); + DayOfMonth = config.DayOfMonth; DayOfWeek = config.DayOfWeek; MonthOfYear = config.MonthOfYear; @@ -71,6 +80,8 @@ public GermanDateParserConfiguration(ICommonDateTimeParserConfiguration config) public IDateTimeParser DurationParser { get; } + public IDateTimeParser HolidayParser { get; } + public IEnumerable DateRegexes { get; } public IImmutableDictionary UnitMap { get; } @@ -113,12 +124,18 @@ public GermanDateParserConfiguration(ICommonDateTimeParserConfiguration config) public Regex NextPrefixRegex { get; } + public Regex AfterNextPrefixRegex { get; } + public Regex PreviousPrefixRegex { get; } public Regex UpcomingPrefixRegex { get; } public Regex PastPrefixRegex { get; } + public Regex BeforeAfterRegex { get; } + + public Regex TasksModeDurationToDatePatterns { get; } + public IImmutableDictionary DayOfMonth { get; } public IImmutableDictionary DayOfWeek { get; } @@ -145,7 +162,14 @@ public int GetSwiftMonthOrYear(string text) { var trimmedText = text.Trim(); var swift = 0; - if (NextPrefixRegex.IsMatch(trimmedText)) + + var afterNextMatch = AfterNextPrefixRegex.Match(text); + + if (afterNextMatch.Success) + { + swift = 2; + } + else if (NextPrefixRegex.IsMatch(trimmedText)) { swift = 1; } @@ -160,7 +184,10 @@ public int GetSwiftMonthOrYear(string text) public bool IsCardinalLast(string text) { var trimmedText = text.Trim(); - return trimmedText.Equals("letzten"); + + // @TODO move hardcoded values to resources file + + return trimmedText.Equals("letzten", StringComparison.Ordinal); } public string Normalize(string text) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDatePeriodParserConfiguration.cs index dbdbda916f..cbe97d9c9a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDatePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; @@ -9,19 +13,25 @@ namespace Microsoft.Recognizers.Text.DateTime.German public class GermanDatePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDatePeriodParserConfiguration { public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PenultimatePrefixRegex = + new Regex(DateTimeDefinitions.PenultimatePrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeRegex = - new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRangeRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfterNextPrefixRegex = + new Regex(DateTimeDefinitions.AfterNextPrefixRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -37,6 +47,7 @@ public GermanDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration co DurationExtractor = config.DurationExtractor; DurationParser = config.DurationParser; DateParser = config.DateParser; + MonthFrontBetweenRegex = GermanDatePeriodExtractorConfiguration.MonthFrontBetweenRegex; BetweenRegex = GermanDatePeriodExtractorConfiguration.BetweenRegex; MonthFrontSimpleCasesRegex = GermanDatePeriodExtractorConfiguration.MonthFrontSimpleCasesRegex; @@ -75,6 +86,11 @@ public GermanDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration co MoreThanRegex = GermanDatePeriodExtractorConfiguration.MoreThanRegex; CenturySuffixRegex = GermanDatePeriodExtractorConfiguration.CenturySuffixRegex; NowRegex = GermanDatePeriodExtractorConfiguration.NowRegex; + FirstLastRegex = GermanDatePeriodExtractorConfiguration.FirstLastRegex; + OfYearRegex = GermanDatePeriodExtractorConfiguration.OfYearRegex; + SpecialDayRegex = GermanDateExtractorConfiguration.SpecialDayRegex; + TodayNowRegex = new Regex(DateTimeDefinitions.TodayNowRegex, RegexOptions.Singleline); + UnitMap = config.UnitMap; CardinalMap = config.CardinalMap; DayOfMonth = config.DayOfMonth; @@ -184,6 +200,14 @@ public GermanDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration co public Regex NowRegex { get; } + public Regex SpecialDayRegex { get; } + + public Regex TodayNowRegex { get; } + + public Regex FirstLastRegex { get; } + + public Regex OfYearRegex { get; } + Regex ISimpleDatePeriodParserConfiguration.RelativeRegex => RelativeRegex; Regex IDatePeriodParserConfiguration.NextPrefixRegex => NextPrefixRegex; @@ -194,6 +218,8 @@ public GermanDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration co Regex IDatePeriodParserConfiguration.UnspecificEndOfRangeRegex => UnspecificEndOfRangeRegex; + Regex IDatePeriodParserConfiguration.AmbiguousPointRangeRegex => null; + bool IDatePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; public IImmutableDictionary UnitMap { get; } @@ -220,7 +246,14 @@ public int GetSwiftDayOrMonth(string text) { var trimmedText = text.Trim(); var swift = 0; - if (NextPrefixRegex.IsMatch(trimmedText)) + + var afterNextMatch = AfterNextPrefixRegex.Match(text); + + if (afterNextMatch.Success) + { + swift = 2; + } + else if (NextPrefixRegex.IsMatch(trimmedText)) { swift = 1; } @@ -228,6 +261,10 @@ public int GetSwiftDayOrMonth(string text) { swift = -1; } + else if (PenultimatePrefixRegex.IsMatch(trimmedText)) + { + swift = -2; + } return swift; } @@ -244,6 +281,10 @@ public int GetSwiftYear(string text) { swift = -1; } + else if (PenultimatePrefixRegex.IsMatch(trimmedText)) + { + swift = -2; + } else if (ThisPrefixRegex.IsMatch(trimmedText)) { swift = 0; @@ -255,49 +296,54 @@ public int GetSwiftYear(string text) public bool IsFuture(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.FutureTerms.Any(o => trimmedText.StartsWith(o)); + return DateTimeDefinitions.FutureTerms.Any(o => trimmedText.StartsWith(o, StringComparison.Ordinal)); } public bool IsLastCardinal(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.LastCardinalTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.LastCardinalTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } public bool IsMonthOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); } public bool IsMonthToDate(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } public bool IsWeekend(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); } public bool IsWeekOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); + } + + public bool IsFortnight(string text) + { + return false; } public bool IsYearOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); } public bool IsYearToDate(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDateTimeParserConfiguration.cs index 25c7dc6e95..063c916526 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDateTimeParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.German; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -21,10 +25,13 @@ public GermanDateTimeParserConfiguration(ICommonDateTimeParserConfiguration conf DateParser = config.DateParser; TimeParser = config.TimeParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; + NowRegex = GermanDateTimeExtractorConfiguration.NowRegex; - AMTimeRegex = new Regex(DateTimeDefinitions.AMTimeRegex, RegexFlags); - PMTimeRegex = new Regex(DateTimeDefinitions.PMTimeRegex, RegexFlags); + AMTimeRegex = new Regex(DateTimeDefinitions.AMTimeRegex, RegexFlags, RegexTimeOut); + PMTimeRegex = new Regex(DateTimeDefinitions.PMTimeRegex, RegexFlags, RegexTimeOut); SimpleTimeOfTodayAfterRegex = GermanDateTimeExtractorConfiguration.SimpleTimeOfTodayAfterRegex; SimpleTimeOfTodayBeforeRegex = GermanDateTimeExtractorConfiguration.SimpleTimeOfTodayBeforeRegex; @@ -95,15 +102,26 @@ public GermanDateTimeParserConfiguration(ICommonDateTimeParserConfiguration conf public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + public int GetHour(string text, int hour) { var trimmedText = text.Trim(); int result = hour; - if ((trimmedText.EndsWith("morgen") || trimmedText.EndsWith("morgens")) && hour >= Constants.HalfDayHourCount) + + // @TODO Move all hardcoded strings to resource file + + if ((trimmedText.EndsWith("morgen", StringComparison.Ordinal) || + trimmedText.EndsWith("morgens", StringComparison.Ordinal)) && + hour >= Constants.HalfDayHourCount) { result -= Constants.HalfDayHourCount; } - else if (!(trimmedText.EndsWith("morgen") || trimmedText.EndsWith("morgens")) && hour < Constants.HalfDayHourCount) + else if (!(trimmedText.EndsWith("morgen", StringComparison.Ordinal) || + trimmedText.EndsWith("morgens", StringComparison.Ordinal)) && + hour < Constants.HalfDayHourCount) { result += Constants.HalfDayHourCount; } @@ -114,16 +132,28 @@ public int GetHour(string text, int hour) public bool GetMatchedNowTimex(string text, out string timex) { var trimmedText = text.Trim(); - if (trimmedText.EndsWith("jetzt") || trimmedText.Equals("momentan") || trimmedText.Equals("gerade") || trimmedText.Equals("aktuell") || - trimmedText.Equals("im moment") || trimmedText.Equals("in diesem moment") || trimmedText.Equals("derzeit")) + + // @TODO Move all hardcoded strings to resource file + + if (trimmedText.EndsWith("jetzt", StringComparison.Ordinal) || + trimmedText.Equals("momentan", StringComparison.Ordinal) || + trimmedText.Equals("gerade", StringComparison.Ordinal) || + trimmedText.Equals("aktuell", StringComparison.Ordinal) || + trimmedText.Equals("aktuelle", StringComparison.Ordinal) || + trimmedText.Equals("im moment", StringComparison.Ordinal) || + trimmedText.Equals("in diesem moment", StringComparison.Ordinal) || + trimmedText.Equals("derzeit", StringComparison.Ordinal)) { timex = "PRESENT_REF"; } - else if (trimmedText.Equals("neulich") || trimmedText.Equals("vorher") || trimmedText.Equals("vorhin")) + else if (trimmedText.Equals("neulich", StringComparison.Ordinal) || + trimmedText.Equals("vorher", StringComparison.Ordinal) || + trimmedText.Equals("vorhin", StringComparison.Ordinal)) { timex = "PAST_REF"; } - else if (trimmedText.Equals("so früh wie möglich") || trimmedText.Equals("asap")) + else if (trimmedText.Equals("so früh wie möglich", StringComparison.Ordinal) || + trimmedText.Equals("asap", StringComparison.Ordinal)) { timex = "FUTURE_REF"; } @@ -141,11 +171,17 @@ public int GetSwiftDay(string text) var trimmedText = text.Trim(); var swift = 0; - if (trimmedText.StartsWith("nächsten") || trimmedText.StartsWith("nächste") || trimmedText.StartsWith("nächstes") || trimmedText.StartsWith("nächster")) + if (trimmedText.StartsWith("nächsten", StringComparison.Ordinal) || + trimmedText.StartsWith("nächste", StringComparison.Ordinal) || + trimmedText.StartsWith("nächstes", StringComparison.Ordinal) || + trimmedText.StartsWith("nächster", StringComparison.Ordinal)) { swift = 1; } - else if (trimmedText.StartsWith("letzten") || trimmedText.StartsWith("letzte") || trimmedText.StartsWith("letztes") || trimmedText.StartsWith("letzter")) + else if (trimmedText.StartsWith("letzten", StringComparison.Ordinal) || + trimmedText.StartsWith("letzte", StringComparison.Ordinal) || + trimmedText.StartsWith("letztes", StringComparison.Ordinal) || + trimmedText.StartsWith("letzter", StringComparison.Ordinal)) { swift = -1; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDateTimePeriodParserConfiguration.cs index f906166ad3..18a0054b49 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDateTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDateTimePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.German; @@ -8,16 +12,16 @@ namespace Microsoft.Recognizers.Text.DateTime.German public class GermanDateTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimePeriodParserConfiguration { public static readonly Regex MorningStartEndRegex = - new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfternoonStartEndRegex = - new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex EveningStartEndRegex = - new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex NightStartEndRegex = - new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -25,6 +29,7 @@ public GermanDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguratio : base(config) { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; DateExtractor = config.DateExtractor; TimeExtractor = config.TimeExtractor; @@ -40,7 +45,11 @@ public GermanDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguratio DateTimeParser = config.DateTimeParser; TimeZoneParser = config.TimeZoneParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; + PureNumberFromToRegex = GermanTimePeriodExtractorConfiguration.PureNumFromTo; + HyphenDateRegex = GermanDateTimePeriodExtractorConfiguration.HyphenDateRegex; PureNumberBetweenAndRegex = GermanTimePeriodExtractorConfiguration.PureNumBetweenAnd; SpecificTimeOfDayRegex = GermanDateTimeExtractorConfiguration.SpecificTimeOfDayRegex; TimeOfDayRegex = GermanDateTimeExtractorConfiguration.TimeOfDayRegex; @@ -64,6 +73,8 @@ public GermanDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguratio public string TokenBeforeDate { get; } + public string TokenBeforeTime { get; } + public IDateExtractor DateExtractor { get; } public IDateTimeExtractor TimeExtractor { get; } @@ -92,6 +103,8 @@ public GermanDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguratio public Regex PureNumberFromToRegex { get; } + public Regex HyphenDateRegex { get; } + public Regex PureNumberBetweenAndRegex { get; } public Regex SpecificTimeOfDayRegex { get; } @@ -128,48 +141,52 @@ public GermanDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguratio bool IDateTimePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + Regex IDateTimePeriodParserConfiguration.TasksmodeMealTimeofDayRegex => null; + public IImmutableDictionary UnitMap { get; } public IImmutableDictionary Numbers { get; } - public bool GetMatchedTimeRange(string text, out string timeStr, out int beginHour, out int endHour, out int endMin) + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + + public bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMin) { var trimmedText = text.Trim(); beginHour = 0; endHour = 0; endMin = 0; + if (MorningStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TMO"; - beginHour = 8; - endHour = Constants.HalfDayHourCount; + todSymbol = Constants.Morning; } else if (AfternoonStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TAF"; - beginHour = Constants.HalfDayHourCount; - endHour = 16; + todSymbol = Constants.Afternoon; } else if (EveningStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TEV"; - beginHour = 16; - endHour = 20; + todSymbol = Constants.Evening; } else if (NightStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TNI"; - beginHour = 20; - endHour = 23; - endMin = 59; + todSymbol = Constants.Night; } else { - timeStr = null; + todSymbol = null; return false; } + var parseResult = TimexUtility.ResolveTimeOfDay(todSymbol); + todSymbol = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMin = parseResult.EndMin; + return true; } @@ -178,11 +195,20 @@ public int GetSwiftPrefix(string text) var trimmedText = text.Trim(); var swift = 0; - if (trimmedText.StartsWith("nächster") || trimmedText.StartsWith("nächste") || trimmedText.StartsWith("nächsten") || trimmedText.StartsWith("nächstes")) + + // @TODO move hardcoded values to resources file + + if (trimmedText.StartsWith("nächster", StringComparison.Ordinal) || + trimmedText.StartsWith("nächste", StringComparison.Ordinal) || + trimmedText.StartsWith("nächsten", StringComparison.Ordinal) || + trimmedText.StartsWith("nächstes", StringComparison.Ordinal)) { swift = 1; } - else if (trimmedText.StartsWith("letzter") || trimmedText.StartsWith("letzte") || trimmedText.StartsWith("letzten") || trimmedText.StartsWith("letztes")) + else if (trimmedText.StartsWith("letzter", StringComparison.Ordinal) || + trimmedText.StartsWith("letzte", StringComparison.Ordinal) || + trimmedText.StartsWith("letzten", StringComparison.Ordinal) || + trimmedText.StartsWith("letztes", StringComparison.Ordinal)) { swift = -1; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDurationParserConfiguration.cs index a00996ff4c..e0d387a22c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDurationParserConfiguration.cs @@ -1,6 +1,12 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.German; +using Microsoft.Recognizers.Text.Number; + namespace Microsoft.Recognizers.Text.DateTime.German { public class GermanDurationParserConfiguration : BaseDateTimeOptionsConfiguration, IDurationParserConfiguration @@ -8,10 +14,15 @@ public class GermanDurationParserConfiguration : BaseDateTimeOptionsConfiguratio public GermanDurationParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { - CardinalExtractor = config.CardinalExtractor; + + var numOptions = ((BaseNumberExtractor)config.CardinalExtractor).Options; + var numConfig = new BaseNumberOptionsConfiguration(Text.Culture.German, numOptions, NumberMode.PureNumber); + + CardinalExtractor = Number.German.NumberExtractor.GetInstance(numConfig); NumberParser = config.NumberParser; DurationExtractor = new BaseDurationExtractor(new GermanDurationExtractorConfiguration(this), false); + NumberCombinedWithUnit = GermanDurationExtractorConfiguration.NumberCombinedWithDurationUnit; AnUnitRegex = GermanDurationExtractorConfiguration.AnUnitRegex; @@ -33,7 +44,7 @@ public GermanDurationParserConfiguration(ICommonDateTimeParserConfiguration conf public IExtractor CardinalExtractor { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public IParser NumberParser { get; } @@ -41,6 +52,8 @@ public GermanDurationParserConfiguration(ICommonDateTimeParserConfiguration conf public Regex AnUnitRegex { get; } + public Regex PrefixArticleRegex { get; } = null; + public Regex DuringRegex { get; } public Regex AllDateUnitRegex { get; } @@ -61,6 +74,8 @@ public GermanDurationParserConfiguration(ICommonDateTimeParserConfiguration conf public Regex SpecialNumberUnitRegex { get; } + bool IDurationParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public IImmutableDictionary UnitMap { get; } public IImmutableDictionary UnitValueMap { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanHolidayParserConfiguration.cs index e660706aab..dc490a0cb1 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanHolidayParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanHolidayParserConfiguration.cs @@ -1,30 +1,49 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.German; namespace Microsoft.Recognizers.Text.DateTime.German { public class GermanHolidayParserConfiguration : BaseHolidayParserConfiguration { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public GermanHolidayParserConfiguration(IDateTimeOptionsConfiguration config) : base(config) { + ThisPrefixRegex = new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + this.HolidayRegexList = GermanHolidayExtractorConfiguration.HolidayRegexList; this.HolidayNames = DateTimeDefinitions.HolidayNames.ToImmutableDictionary(); } + public Regex ThisPrefixRegex { get; } + + public Regex NextPrefixRegex { get; } + + public Regex PreviousPrefixRegex { get; } + public override int GetSwiftYear(string text) { var trimmedText = text.Trim(); var swift = -10; - if (trimmedText.StartsWith("nächster") || trimmedText.StartsWith("nächstes") || trimmedText.StartsWith("nächsten") || trimmedText.StartsWith("nächste")) + + if (NextPrefixRegex.IsMatch(trimmedText)) { swift = 1; } - else if (trimmedText.StartsWith("letzter") || trimmedText.StartsWith("letztes") || trimmedText.StartsWith("letzten") || trimmedText.StartsWith("letzte")) + else if (PreviousPrefixRegex.IsMatch(trimmedText)) { swift = -1; } - else if (trimmedText.StartsWith("dieser") || trimmedText.StartsWith("dieses") || trimmedText.StartsWith("diesen") || trimmedText.StartsWith("diese")) + else if (ThisPrefixRegex.IsMatch(trimmedText)) { swift = 0; } @@ -34,9 +53,9 @@ public override int GetSwiftYear(string text) public override string SanitizeHolidayToken(string holiday) { - return holiday - .Replace(" ", string.Empty) - .Replace("'", string.Empty); + return holiday.Replace(" ", string.Empty) + .Replace("-", string.Empty) + .Replace("'", string.Empty); } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanMergedParserConfiguration.cs index c9102dbdff..b92c8a307b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanMergedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanMergedParserConfiguration.cs @@ -1,5 +1,8 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.German; using Microsoft.Recognizers.Text.Matcher; namespace Microsoft.Recognizers.Text.DateTime.German @@ -46,5 +49,7 @@ public GermanMergedParserConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeParser HolidayParser { get; } public StringMatcher SuperfluousWordMatcher { get; } + + bool IMergedParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanSetParserConfiguration.cs index 36e9a1da6f..7fa28bbd97 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanSetParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanSetParserConfiguration.cs @@ -1,11 +1,20 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.German; using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime.German { public class GermanSetParserConfiguration : BaseDateTimeOptionsConfiguration, ISetParserConfiguration { + // pass FutureTerms as List to ReplaceValueInTextWithFutTerm function + private static readonly List ThisTerms = (List)DateTimeDefinitions.FutureTerms; + public GermanSetParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -79,32 +88,59 @@ public GermanSetParserConfiguration(ICommonDateTimeParserConfiguration config) public bool GetMatchedDailyTimex(string text, out string timex) { var trimmedText = text.Trim(); - if (trimmedText.Equals("täglich") || trimmedText.Equals("täglicher") || trimmedText.Equals("tägliches") || - trimmedText.Equals("tägliche") || trimmedText.Equals("täglichen") || trimmedText.Equals("alltäglich") || - trimmedText.Equals("alltäglicher") || trimmedText.Equals("alltägliches") || trimmedText.Equals("alltägliche") || - trimmedText.Equals("alltäglichen") || - trimmedText.Equals("jeden tag")) + + // @TODO move hardcoded values to resources file + + if (trimmedText.Equals("täglich", StringComparison.Ordinal) || + trimmedText.Equals("täglicher", StringComparison.Ordinal) || + trimmedText.Equals("tägliches", StringComparison.Ordinal) || + trimmedText.Equals("tägliche", StringComparison.Ordinal) || + trimmedText.Equals("täglichen", StringComparison.Ordinal) || + trimmedText.Equals("alltäglich", StringComparison.Ordinal) || + trimmedText.Equals("alltäglicher", StringComparison.Ordinal) || + trimmedText.Equals("alltägliches", StringComparison.Ordinal) || + trimmedText.Equals("alltägliche", StringComparison.Ordinal) || + trimmedText.Equals("alltäglichen", StringComparison.Ordinal) || + trimmedText.Equals("jeden tag", StringComparison.Ordinal)) { timex = "P1D"; } - else if (trimmedText.Equals("wöchentlich") || trimmedText.Equals("wöchentlicher") || trimmedText.Equals("wöchentliches") || - trimmedText.Equals("wöchentliche") || trimmedText.Equals("wöchentlichen") || trimmedText.Equals("allwöchentlich") || - trimmedText.Equals("allwöchentlicher") || trimmedText.Equals("allwöchentliches") || trimmedText.Equals("allwöchentliche") || - trimmedText.Equals("allwöchentlichen")) + else if (trimmedText.Equals("wöchentlich", StringComparison.Ordinal) || + trimmedText.Equals("wöchentlicher", StringComparison.Ordinal) || + trimmedText.Equals("wöchentliches", StringComparison.Ordinal) || + trimmedText.Equals("wöchentliche", StringComparison.Ordinal) || + trimmedText.Equals("wöchentlichen", StringComparison.Ordinal) || + trimmedText.Equals("allwöchentlich", StringComparison.Ordinal) || + trimmedText.Equals("allwöchentlicher", StringComparison.Ordinal) || + trimmedText.Equals("allwöchentliches", StringComparison.Ordinal) || + trimmedText.Equals("allwöchentliche", StringComparison.Ordinal) || + trimmedText.Equals("allwöchentlichen", StringComparison.Ordinal)) { timex = "P1W"; } - else if (trimmedText.Equals("monatlich") || trimmedText.Equals("monatlicher") || trimmedText.Equals("monatliches") || - trimmedText.Equals("monatliche") || trimmedText.Equals("monatlichen") || trimmedText.Equals("allmonatlich") || - trimmedText.Equals("allmonatlicher") || trimmedText.Equals("allmonatliches") || trimmedText.Equals("allmonatliche") || - trimmedText.Equals("allmonatlichen")) + else if (trimmedText.Equals("monatlich", StringComparison.Ordinal) || + trimmedText.Equals("monatlicher", StringComparison.Ordinal) || + trimmedText.Equals("monatliches", StringComparison.Ordinal) || + trimmedText.Equals("monatliche", StringComparison.Ordinal) || + trimmedText.Equals("monatlichen", StringComparison.Ordinal) || + trimmedText.Equals("allmonatlich", StringComparison.Ordinal) || + trimmedText.Equals("allmonatlicher", StringComparison.Ordinal) || + trimmedText.Equals("allmonatliches", StringComparison.Ordinal) || + trimmedText.Equals("allmonatliche", StringComparison.Ordinal) || + trimmedText.Equals("allmonatlichen", StringComparison.Ordinal)) { timex = "P1M"; } - else if (trimmedText.Equals("jährlich") || trimmedText.Equals("jährlicher") || trimmedText.Equals("jährliches") || - trimmedText.Equals("jährliche") || trimmedText.Equals("jährlichen") || trimmedText.Equals("alljährlich") || - trimmedText.Equals("alljährlicher") || trimmedText.Equals("alljährliches") || trimmedText.Equals("alljährliche") || - trimmedText.Equals("alljährlichen")) + else if (trimmedText.Equals("jährlich", StringComparison.Ordinal) || + trimmedText.Equals("jährlicher", StringComparison.Ordinal) || + trimmedText.Equals("jährliches", StringComparison.Ordinal) || + trimmedText.Equals("jährliche", StringComparison.Ordinal) || + trimmedText.Equals("jährlichen", StringComparison.Ordinal) || + trimmedText.Equals("alljährlich", StringComparison.Ordinal) || + trimmedText.Equals("alljährlicher", StringComparison.Ordinal) || + trimmedText.Equals("alljährliches", StringComparison.Ordinal) || + trimmedText.Equals("alljährliche", StringComparison.Ordinal) || + trimmedText.Equals("alljährlichen", StringComparison.Ordinal)) { timex = "P1Y"; } @@ -120,19 +156,22 @@ public bool GetMatchedDailyTimex(string text, out string timex) public bool GetMatchedUnitTimex(string text, out string timex) { var trimmedText = text.Trim(); - if (trimmedText.Equals("tag")) + + // @TODO move hardcoded values to resources file + + if (trimmedText.Equals("tag", StringComparison.Ordinal)) { timex = "P1D"; } - else if (trimmedText.Equals("woche")) + else if (trimmedText.Equals("woche", StringComparison.Ordinal)) { timex = "P1W"; } - else if (trimmedText.Equals("monat")) + else if (trimmedText.Equals("monat", StringComparison.Ordinal)) { timex = "P1M"; } - else if (trimmedText.Equals("jahr")) + else if (trimmedText.Equals("jahr", StringComparison.Ordinal)) { timex = "P1Y"; } @@ -146,5 +185,7 @@ public bool GetMatchedUnitTimex(string text, out string timex) } public string WeekDayGroupMatchString(Match match) => SetHandler.WeekDayGroupMatchString(match); + + public string ReplaceValueInTextWithFutTerm(string text, string value) => TasksModeSetHandler.ReplaceValueInTextWithFutTerm(text, value, ThisTerms); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanTimeParserConfiguration.cs index 2d921e8d53..b9b46203ae 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanTimeParserConfiguration.cs @@ -1,5 +1,10 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.German; @@ -10,14 +15,32 @@ namespace Microsoft.Recognizers.Text.DateTime.German { public class GermanTimeParserConfiguration : BaseDateTimeOptionsConfiguration, ITimeParserConfiguration { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + private static readonly Regex TimeSuffixFull = - new Regex(DateTimeDefinitions.TimeSuffixFull, RegexOptions.Singleline); + new Regex(DateTimeDefinitions.TimeSuffixFull, RegexFlags, RegexTimeOut); private static readonly Regex LunchRegex = - new Regex(DateTimeDefinitions.LunchRegex, RegexOptions.Singleline); + new Regex(DateTimeDefinitions.LunchRegex, RegexFlags, RegexTimeOut); private static readonly Regex NightRegex = - new Regex(DateTimeDefinitions.NightRegex, RegexOptions.Singleline); + new Regex(DateTimeDefinitions.NightRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex HalfTokenRegex = + new Regex(DateTimeDefinitions.HalfTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex QuarterToTokenRegex = + new Regex(DateTimeDefinitions.QuarterToTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex QuarterPastTokenRegex = + new Regex(DateTimeDefinitions.QuarterPastTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ThreeQuarterToTokenRegex = + new Regex(DateTimeDefinitions.ThreeQuarterToTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ThreeQuarterPastTokenRegex = + new Regex(DateTimeDefinitions.ThreeQuarterPastTokenRegex, RegexFlags, RegexTimeOut); public GermanTimeParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) @@ -49,17 +72,25 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha var deltaMin = 0; var trimmedPrefix = prefix.Trim(); - if (trimmedPrefix.StartsWith("halb")) + if (HalfTokenRegex.IsMatch(trimmedPrefix)) { deltaMin = -30; } - else if (trimmedPrefix.StartsWith("viertel nach")) + else if (QuarterToTokenRegex.IsMatch(trimmedPrefix)) + { + deltaMin = -15; + } + else if (QuarterPastTokenRegex.IsMatch(trimmedPrefix)) { deltaMin = 15; } - else if (trimmedPrefix.StartsWith("viertel vor")) + else if (ThreeQuarterToTokenRegex.IsMatch(trimmedPrefix)) { - deltaMin = -15; + deltaMin = -45; + } + else if (ThreeQuarterPastTokenRegex.IsMatch(trimmedPrefix)) + { + deltaMin = 45; } else { @@ -67,7 +98,7 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha var minStr = match.Groups["deltamin"].Value; if (!string.IsNullOrWhiteSpace(minStr)) { - deltaMin = int.Parse(minStr); + deltaMin = int.Parse(minStr, CultureInfo.InvariantCulture); } else { @@ -76,7 +107,9 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha } } - if (trimmedPrefix.EndsWith("zum")) + // @TODO move hardcoded values to resources file + + if (trimmedPrefix.EndsWith("zum", StringComparison.Ordinal)) { deltaMin = -deltaMin; } @@ -100,6 +133,7 @@ public void AdjustBySuffix(string suffix, ref int hour, ref int min, ref bool ha if (match.Success) { var oclockStr = match.Groups["oclock"].Value; + if (string.IsNullOrEmpty(oclockStr)) { var matchAmStr = match.Groups[Constants.AmGroupName].Value; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanTimePeriodParserConfiguration.cs index 07810385c7..52f5dc82f4 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanTimePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.German; @@ -54,10 +58,10 @@ public GermanTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration co public IDateTimeUtilityConfiguration UtilityConfiguration { get; } - public bool GetMatchedTimexRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) + public bool GetMatchedTimeRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) { var trimmedText = text.Trim(); - if (trimmedText.EndsWith("s")) + if (trimmedText.EndsWith("s", StringComparison.Ordinal)) { trimmedText = trimmedText.Substring(0, trimmedText.Length - 1); } @@ -67,23 +71,24 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou endMin = 0; var timeOfDay = string.Empty; - if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o))) + + if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Morning; } - else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Afternoon; } - else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Evening; } - else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o))) + else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o, StringComparison.Ordinal))) { timeOfDay = Constants.Daytime; } - else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Night; } @@ -93,7 +98,7 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou return false; } - var parseResult = TimexUtility.ParseTimeOfDay(timeOfDay); + var parseResult = TimexUtility.ResolveTimeOfDay(timeOfDay); timex = parseResult.Timex; beginHour = parseResult.BeginHour; endHour = parseResult.EndHour; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/HolidayParserGer.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/HolidayParserGer.cs index a9900e0dd3..df92625d24 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/HolidayParserGer.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/HolidayParserGer.cs @@ -1,5 +1,9 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Globalization; using System.Linq; using System.Text.RegularExpressions; @@ -9,6 +13,9 @@ namespace Microsoft.Recognizers.Text.DateTime.German { public class HolidayParserGer : IDateTimeParser { + + // Refactor this class to follow framework across languages + public static readonly string ParserName = Constants.SYS_DATETIME_DATE; // "Date"; public static readonly Dictionary> FixedHolidaysDict = new Dictionary> @@ -23,14 +30,14 @@ public class HolidayParserGer : IDateTimeParser { "austriannationalday", AustrianNationalDay }, // 26. Oktober { "immaculateconception", ImmaculateConception }, // 8. Dezember { "secondchristmasday", SecondChristmasDay }, // 26. Dezember - { "berchtoldsday", BerchtoldsDay }, // 2. Januar + { "berchtoldsday", BerchtoldDay }, // 2. Januar { "saintjosephsday", SaintJosephsDay }, // 19. März { "swissnationalday", SwissNationalDay }, // 1. August { "maosbirthday", MaoBirthday }, { "yuandan", NewYear }, { "teachersday", TeacherDay }, { "singleday", SinglesDay }, - { "allsaintsday", HalloweenDay }, + { "allsaintsday", AllHallowDay }, { "youthday", YouthDay }, { "childrenday", ChildrenDay }, { "femaleday", FemaleDay }, @@ -39,11 +46,16 @@ public class HolidayParserGer : IDateTimeParser { "girlsday", GirlsDay }, { "whiteloverday", WhiteLoverDay }, { "loverday", ValentinesDay }, + { "barbaratag", BarbaraTag }, + { "augsburgerfriedensfest", AugsburgerFriedensFest }, + { "johannistag", JohannisTag }, + { "peterundpaul", PeterUndPaul }, { "firstchristmasday", ChristmasDay }, { "xmas", ChristmasDay }, { "newyear", NewYear }, { "newyearday", NewYear }, { "newyearsday", NewYear }, + { "heiligedreikönige", HeiligeDreiKönige }, { "inaugurationday", InaugurationDay }, { "groundhougday", GroundhogDay }, { "valentinesday", ValentinesDay }, @@ -51,16 +63,18 @@ public class HolidayParserGer : IDateTimeParser { "aprilfools", FoolDay }, { "stgeorgeday", StGeorgeDay }, { "mayday", Mayday }, - { "cincodemayoday", CincoDeMayoday }, + { "labour", LaborDay }, + { "laborday", LaborDay }, + { "cincodemayoday", CincoDeMayo }, { "baptisteday", BaptisteDay }, { "usindependenceday", UsaIndependenceDay }, { "independenceday", UsaIndependenceDay }, { "bastilleday", BastilleDay }, { "halloweenday", HalloweenDay }, { "allhallowday", AllHallowDay }, - { "allsoulsday", AllSoulsday }, + { "allsoulsday", AllSoulsDay }, { "guyfawkesday", GuyFawkesDay }, - { "veteransday", Veteransday }, + { "veteransday", VeteransDay }, { "christmaseve", ChristmasEve }, { "newyeareve", NewYearEve }, { "piday", PiDay }, @@ -77,17 +91,20 @@ public class HolidayParserGer : IDateTimeParser { "easterday", GetEasterDay }, { "eastersunday", GetEasterDay }, { "eastermonday", GetEasterMondayOfYear }, + { "eastersaturday", GetEasterSaturday }, { "weiberfastnacht", GetWeiberfastnacht }, { "carnival", GetCarnival }, - { "ashwednesday", GetAshwednesday }, - { "palmsunday", GetPalmsunday }, - { "goodfriday", GetGoodfriday }, + { "ashwednesday", GetAshWednesday }, + { "palmsunday", GetPalmSunday }, + { "goodfriday", GetGoodFriday }, { "ascensionofchrist", GetAscensionOfChrist }, - { "whitsunday", GetWhitsunday }, - { "whitemonday", GetWhitMonday }, + { "whitesunday", GetWhiteSunday }, + { "whitemonday", GetWhiteMonday }, { "corpuschristi", GetCorpusChristi }, { "rosenmontag", GetRosenmontag }, { "fastnacht", GetFastnacht }, + { "fastnachtssamstag", GetFastnachtSaturday }, + { "fastnachtssonntag", GetFastnachtSunday }, { "holythursday", GetHolyThursday }, { "memorialdaygermany", GetMemorialDayGermany }, { "dayofrepentance", GetDayOfRepentance }, @@ -98,6 +115,7 @@ public class HolidayParserGer : IDateTimeParser { "fourthadvent", GetFourthAdvent }, { "chedayofrepentance", GetCheDayOfRepentance }, { "mothers", GetMothersDayOfYear }, + { "thanksgiving", GetThanksgivingDayOfYear }, }; private readonly IHolidayParserConfiguration config; @@ -212,7 +230,7 @@ private static DateObject GetPastValue(DateObject value, DateObject referenceDat private static DateObject SecondChristmasDay(int year) => new DateObject(year, 12, 26); - private static DateObject BerchtoldsDay(int year) => new DateObject(year, 1, 2); + private static DateObject BerchtoldDay(int year) => new DateObject(year, 1, 2); private static DateObject SaintJosephsDay(int year) => new DateObject(year, 3, 19); @@ -222,7 +240,7 @@ private static DateObject GetPastValue(DateObject value, DateObject referenceDat private static DateObject LaborDay(int year) => new DateObject(year, 5, 1); - private static DateObject MidautumnDay(int year) => new DateObject(year, 8, 15); + private static DateObject MidAutumnDay(int year) => new DateObject(year, 8, 15); private static DateObject SpringDay(int year) => new DateObject(year, 1, 1); @@ -278,7 +296,7 @@ private static DateObject GetPastValue(DateObject value, DateObject referenceDat private static DateObject Mayday(int year) => new DateObject(year, 5, 1); - private static DateObject CincoDeMayoday(int year) => new DateObject(year, 5, 5); + private static DateObject CincoDeMayo(int year) => new DateObject(year, 5, 5); private static DateObject BaptisteDay(int year) => new DateObject(year, 6, 24); @@ -290,11 +308,11 @@ private static DateObject GetPastValue(DateObject value, DateObject referenceDat private static DateObject AllHallowDay(int year) => new DateObject(year, 11, 1); - private static DateObject AllSoulsday(int year) => new DateObject(year, 11, 2); + private static DateObject AllSoulsDay(int year) => new DateObject(year, 11, 2); private static DateObject GuyFawkesDay(int year) => new DateObject(year, 11, 5); - private static DateObject Veteransday(int year) => new DateObject(year, 11, 11); + private static DateObject VeteransDay(int year) => new DateObject(year, 11, 11); private static DateObject PiDay(int year) => new DateObject(year, 3, 14); @@ -304,12 +322,17 @@ private static DateObject GetPastValue(DateObject value, DateObject referenceDat private static DateObject BeginningOfSpring(int year) => new DateObject(year, 3, 20); - private static DateObject BeginningOfFall(int year) => new DateObject(year, 9, 23); + private static DateObject BeginningOfFall(int year) => new DateObject(year, 9, 22); - private static DateObject GetEasterDay(int year) - { - return CalculateHolydaysByEaster(year); - } + private static DateObject BarbaraTag(int year) => new DateObject(year, 12, 4); + + private static DateObject AugsburgerFriedensFest(int year) => new DateObject(year, 8, 8); + + private static DateObject PeterUndPaul(int year) => new DateObject(year, 6, 29); + + private static DateObject JohannisTag(int year) => new DateObject(year, 6, 24); + + private static DateObject HeiligeDreiKönige(int year) => new DateObject(year, 1, 6); private static DateObject GetMothersDayOfYear(int year) { @@ -320,7 +343,7 @@ where DateObject.MinValue.SafeCreateFromValue(year, 5, day).DayOfWeek == DayOfWe private static DateObject GetFathersDayOfYear(int year) { - return CalculateHolydaysByEaster(year, 39); + return HolidayFunctions.CalculateHolidayByEaster(year, 39); } private static DateObject GetMartinLutherKingDayOfYear(int year) @@ -367,14 +390,19 @@ where DateObject.MinValue.SafeCreateFromValue(year, 10, day).DayOfWeek == DayOfW private static DateObject GetThanksgivingDayOfYear(int year) { - return DateObject.MinValue.SafeCreateFromValue(year, 11, (from day in Enumerable.Range(1, 30) - where DateObject.MinValue.SafeCreateFromValue(year, 11, day).DayOfWeek == DayOfWeek.Thursday - select day).ElementAt(3)); + return DateObject.MinValue.SafeCreateFromValue(year, 10, (from day in Enumerable.Range(1, 31) + where DateObject.MinValue.SafeCreateFromValue(year, 10, day).DayOfWeek == DayOfWeek.Sunday + select day).ElementAt(0)); + } + + private static DateObject GetEasterDay(int year) + { + return HolidayFunctions.CalculateHolidayByEaster(year); } private static DateObject GetEasterMondayOfYear(int year) { - return CalculateHolydaysByEaster(year, 1); + return HolidayFunctions.CalculateHolidayByEaster(year, 1); } private static DateObject GetCheDayOfRepentance(int year) @@ -386,27 +414,27 @@ where DateObject.MinValue.SafeCreateFromValue(year, 9, day).DayOfWeek == DayOfWe private static DateObject GetFourthAdvent(int year) { - return GetDateAdvent(year); + return HolidayFunctions.CalculateAdventDate(year); } private static DateObject GetThirdAdvent(int year) { - return GetDateAdvent(year, 7); + return HolidayFunctions.CalculateAdventDate(year, 7); } private static DateObject GetSecondAdvent(int year) { - return GetDateAdvent(year, 14); + return HolidayFunctions.CalculateAdventDate(year, 14); } private static DateObject GetFirstAdvent(int year) { - return GetDateAdvent(year, 21); + return HolidayFunctions.CalculateAdventDate(year, 21); } private static DateObject GetTotenSonntag(int year) { - return GetDateAdvent(year, 28); + return HolidayFunctions.CalculateAdventDate(year, 28); } private static DateObject GetDayOfRepentance(int year) @@ -418,107 +446,82 @@ where DateObject.MinValue.SafeCreateFromValue(year, 11, day).DayOfWeek == DayOfW private static DateObject GetMemorialDayGermany(int year) { - return GetDateAdvent(year, 35); + return HolidayFunctions.CalculateAdventDate(year, 35); } private static DateObject GetHolyThursday(int year) { - return CalculateHolydaysByEaster(year, -3); + return HolidayFunctions.CalculateHolidayByEaster(year, -3); } private static DateObject GetFastnacht(int year) { - return CalculateHolydaysByEaster(year, -47); + return HolidayFunctions.CalculateHolidayByEaster(year, -47); } private static DateObject GetRosenmontag(int year) { - return CalculateHolydaysByEaster(year, -48); + return HolidayFunctions.CalculateHolidayByEaster(year, -48); } private static DateObject GetCorpusChristi(int year) { - return CalculateHolydaysByEaster(year, 60); + return HolidayFunctions.CalculateHolidayByEaster(year, 60); } - private static DateObject GetWhitsunday(int year) + private static DateObject GetWhiteSunday(int year) { - return CalculateHolydaysByEaster(year, 49); + return HolidayFunctions.CalculateHolidayByEaster(year, 49); } - private static DateObject GetWhitMonday(int year) + private static DateObject GetWhiteMonday(int year) { - return CalculateHolydaysByEaster(year, 50); + return HolidayFunctions.CalculateHolidayByEaster(year, 50); } private static DateObject GetAscensionOfChrist(int year) { - return CalculateHolydaysByEaster(year, 39); + return HolidayFunctions.CalculateHolidayByEaster(year, 39); } - private static DateObject GetGoodfriday(int year) + private static DateObject GetGoodFriday(int year) { - return CalculateHolydaysByEaster(year, -2); + return HolidayFunctions.CalculateHolidayByEaster(year, -2); } - private static DateObject GetPalmsunday(int year) + private static DateObject GetPalmSunday(int year) { - return CalculateHolydaysByEaster(year, -7); + return HolidayFunctions.CalculateHolidayByEaster(year, -7); } - private static DateObject GetAshwednesday(int year) + private static DateObject GetAshWednesday(int year) { - return CalculateHolydaysByEaster(year, -46); + return HolidayFunctions.CalculateHolidayByEaster(year, -46); } private static DateObject GetCarnival(int year) { - return CalculateHolydaysByEaster(year, -49); + return HolidayFunctions.CalculateHolidayByEaster(year, -49); } private static DateObject GetWeiberfastnacht(int year) { - return CalculateHolydaysByEaster(year, -52); + return HolidayFunctions.CalculateHolidayByEaster(year, -52); } - private static DateObject CalculateHolydaysByEaster(int year, int days = 0) + private static DateObject GetEasterSaturday(int year) { - int day = 0; - int month = 3; - - int g = year % 19; - int c = year / 100; - int h = (c - (int)(c / 4) - (int)(((8 * c) + 13) / 25) + (19 * g) + 15) % 30; - int i = h - ((int)(h / 28) * (1 - ((int)(h / 28) * (int)(29 / (h + 1)) * (int)((21 - g) / 11)))); - - day = i - ((year + (int)(year / 4) + i + 2 - c + (int)(c / 4)) % 7) + 28; - - if (day > 31) - { - month++; - day -= 31; - } - - return DateObject.MinValue.SafeCreateFromValue(year, month, day).AddDays(days); + return HolidayFunctions.CalculateHolidayByEaster(year, -1); } - private static DateObject GetDateAdvent(int year, int days = 0) + private static DateObject GetFastnachtSaturday(int year) { - DateObject xmas = new DateObject(year, 12, 25); - int weekday = (int)xmas.DayOfWeek; - - DateObject aday; - - if (weekday == 0) - { - aday = xmas.AddDays(-7 - days); - } - else - { - aday = xmas.AddDays(-weekday - days); - } + return HolidayFunctions.CalculateHolidayByEaster(year, -50); + } - return aday; + private static DateObject GetFastnachtSunday(int year) + { + return HolidayFunctions.CalculateHolidayByEaster(year, -49); } private DateTimeResolutionResult ParseHolidayRegexMatch(string text, DateObject referenceDate) @@ -553,7 +556,7 @@ private DateTimeResolutionResult Match2Date(Match match, DateObject referenceDat if (!string.IsNullOrEmpty(yearStr)) { - year = int.Parse(yearStr); + year = int.Parse(yearStr, CultureInfo.InvariantCulture); hasYear = true; } else if (!string.IsNullOrEmpty(orderStr)) @@ -609,7 +612,7 @@ private DateTimeResolutionResult Match2Date(Match match, DateObject referenceDat if (hasYear) { - ret.Timex = year.ToString("D4") + timexStr; + ret.Timex = year.ToString("D4", CultureInfo.InvariantCulture) + timexStr; ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(year, value.Month, value.Day); ret.Success = true; return ret; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/TimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/TimeParser.cs index 83fc0cec1b..f8bfa9fc58 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/TimeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/TimeParser.cs @@ -1,4 +1,10 @@ -using Microsoft.Recognizers.Text.Utilities; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +using Microsoft.Recognizers.Text.Utilities; + using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.German @@ -34,10 +40,10 @@ private DateTimeResolutionResult ParseIsh(string text, DateObject referenceTime) var hour = Constants.HalfDayHourCount; if (!string.IsNullOrEmpty(hourStr)) { - hour = int.Parse(hourStr); + hour = int.Parse(hourStr, CultureInfo.InvariantCulture); } - ret.Timex = "T" + hour.ToString("D2"); + ret.Timex = "T" + hour.ToString("D2", CultureInfo.InvariantCulture); ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day, hour, 0, 0); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Utilities/GermanDatetimeUtilityConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Utilities/GermanDatetimeUtilityConfiguration.cs index 4788a69f6f..ea86d34cd8 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Utilities/GermanDatetimeUtilityConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Utilities/GermanDatetimeUtilityConfiguration.cs @@ -1,73 +1,32 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.German; using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime.German.Utilities { - public class GermanDatetimeUtilityConfiguration : IDateTimeUtilityConfiguration + public class GermanDatetimeUtilityConfiguration : BaseDatetimeUtilityConfiguration { - public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoRegex, RegexFlags); - - public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); - - public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); - - public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); - - public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); - - public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); - - public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); - - public static readonly Regex AmPmDescRegex = - new Regex(DateTimeDefinitions.AmPmDescRegex, RegexFlags); - - public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); - - public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); - - public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); - - public static readonly Regex CommonDatePrefixRegex = - new Regex(DateTimeDefinitions.CommonDatePrefixRegex, RegexFlags); - - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - - Regex IDateTimeUtilityConfiguration.LaterRegex => LaterRegex; - - Regex IDateTimeUtilityConfiguration.AgoRegex => AgoRegex; - - Regex IDateTimeUtilityConfiguration.InConnectorRegex => InConnectorRegex; - - Regex IDateTimeUtilityConfiguration.SinceYearSuffixRegex => SinceYearSuffixRegex; - - Regex IDateTimeUtilityConfiguration.WithinNextPrefixRegex => WithinNextPrefixRegex; - - Regex IDateTimeUtilityConfiguration.AmDescRegex => AmDescRegex; - - Regex IDateTimeUtilityConfiguration.PmDescRegex => PmDescRegex; - - Regex IDateTimeUtilityConfiguration.AmPmDescRegex => AmPmDescRegex; - - Regex IDateTimeUtilityConfiguration.RangeUnitRegex => RangeUnitRegex; - - Regex IDateTimeUtilityConfiguration.TimeUnitRegex => TimeUnitRegex; - - Regex IDateTimeUtilityConfiguration.DateUnitRegex => DateUnitRegex; - - Regex IDateTimeUtilityConfiguration.CommonDatePrefixRegex => CommonDatePrefixRegex; - - bool IDateTimeUtilityConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public GermanDatetimeUtilityConfiguration() + : base( + DateTimeDefinitions.AgoRegex, + DateTimeDefinitions.LaterRegex, + DateTimeDefinitions.InConnectorRegex, + DateTimeDefinitions.SinceYearSuffixRegex, + DateTimeDefinitions.WithinNextPrefixRegex, + DateTimeDefinitions.AmDescRegex, + DateTimeDefinitions.PmDescRegex, + DateTimeDefinitions.AmPmDescRegex, + DateTimeDefinitions.RangeUnitRegex, + DateTimeDefinitions.TimeUnitRegex, + DateTimeDefinitions.DateUnitRegex, + DateTimeDefinitions.CommonDatePrefixRegex, + DateTimeDefinitions.RangePrefixRegex, + RegexOptions.Singleline | RegexOptions.ExplicitCapture, + DateTimeDefinitions.CheckBothBeforeAfter) + { + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateExtractorConfiguration.cs new file mode 100644 index 0000000000..2e10fce351 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateExtractorConfiguration.cs @@ -0,0 +1,299 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Text.DateTime.Hindi.Utilities; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Hindi; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiDateExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateExtractorConfiguration + { + public static readonly Regex MonthRegex = + new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthNumRegex = + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayRegex = + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SingleWeekDayRegex = + new Regex(DateTimeDefinitions.SingleWeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OnRegex = + new Regex(DateTimeDefinitions.OnRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelaxedOnRegex = + new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ThisRegex = + new Regex(DateTimeDefinitions.ThisRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LastDateRegex = + new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NextDateRegex = + new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DateUnitRegex = + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialDayRegex = + new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayOfMonthRegex = + new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeWeekDayRegex = + new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialDate = + new Regex(DateTimeDefinitions.SpecialDate, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialDayWithNumRegex = + new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ForTheRegex = + new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayAndDayOfMothRegex = + new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayAndDayRegex = + new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeMonthRegex = + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex StrictRelativeRegex = + new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PrefixArticleRegex = + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OfMonth = + new Regex(DateTimeDefinitions.OfMonth, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthEnd = + new Regex(DateTimeDefinitions.MonthEnd, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayEnd = + new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayStart = + new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags, RegexTimeOut); + + public static readonly Regex YearSuffix = + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); + + public static readonly Regex LessThanRegex = + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MoreThanRegex = + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex InConnectorRegex = + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SinceYearSuffixRegex = + new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangeUnitRegex = + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangeConnectorSymbolRegex = + new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BeforeAfterRegex = + new Regex(DateTimeDefinitions.BeforeAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly ImmutableDictionary DayOfWeek = + DateTimeDefinitions.DayOfWeek.ToImmutableDictionary(); + + public static readonly ImmutableDictionary MonthOfYear = + DateTimeDefinitions.MonthOfYear.ToImmutableDictionary(); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex DayRegex = + new Regex(DateTimeDefinitions.ImplicitDayRegex, RegexFlags, RegexTimeOut); + + public HindiDateExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Hindi.IntegerExtractor.GetInstance(); + OrdinalExtractor = Number.Hindi.OrdinalExtractor.GetInstance(); + + NumberParser = new BaseIndianNumberParser(new HindiNumberParserConfiguration(numConfig)); + + DurationExtractor = new BaseDurationExtractor(new HindiDurationExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new HindiHolidayExtractorConfiguration(this)); + UtilityConfiguration = new HindiDatetimeUtilityConfiguration(); + + ImplicitDateList = new List + { + // extract "12" from "on 12" + OnRegex, + + // extract "12th" from "on/at/in 12th" + RelaxedOnRegex, + + // "the day before yesterday", "previous day", "today", "yesterday", "tomorrow" + SpecialDayRegex, + + // "this Monday", "Tuesday of this week" + ThisRegex, + + // "last/previous Monday", "Monday of last week" + LastDateRegex, + + // "next/following Monday", "Monday of next week" + NextDateRegex, + + // "Sunday", "Weds" + SingleWeekDayRegex, + + // "2nd Monday of April" + WeekDayOfMonthRegex, + + // "on the 12th" + SpecialDate, + + // "two days from today", "five days from tomorrow" + SpecialDayWithNumRegex, + + // "three Monday from now" + RelativeWeekDayRegex, + }; + + if ((Options & DateTimeOptions.CalendarMode) != 0) + { + ImplicitDateList = ImplicitDateList.Concat(new[] { DayRegex }); + } + + // 3-23-2017 + var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags, RegexTimeOut); + + // 23-3-2015 + var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags, RegexTimeOut); + + // on (Sunday,)? 1.3 + var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags, RegexTimeOut); + + // on (Sunday,)? 24-12 + var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags, RegexTimeOut); + + // "(Sunday,)? 7/23, 2018", year part is required + var dateRegex7L = new Regex(DateTimeDefinitions.DateExtractor7L, RegexFlags, RegexTimeOut); + + // "(Sunday,)? 7/23", year part is not required + var dateRegex7S = new Regex(DateTimeDefinitions.DateExtractor7S, RegexFlags, RegexTimeOut); + + // "(Sunday,)? 23/7, 2018", year part is required + var dateRegex9L = new Regex(DateTimeDefinitions.DateExtractor9L, RegexFlags, RegexTimeOut); + + // "(Sunday,)? 23/7", year part is not required + var dateRegex9S = new Regex(DateTimeDefinitions.DateExtractor9S, RegexFlags, RegexTimeOut); + + // (Sunday,)? 2015-12-23 + var dateRegexA = new Regex(DateTimeDefinitions.DateExtractorA, RegexFlags, RegexTimeOut); + + DateRegexList = new List + { + // (Sunday,)? April 5 or (Sunday,)? April 5, 2016 + new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags, RegexTimeOut), + + // (Sunday,)? 6th of April + new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags, RegexTimeOut), + }; + + var enableDmy = DmyDateFormat || + DateTimeDefinitions.DefaultLanguageFallback == Constants.DefaultLanguageFallback_DMY; + + DateRegexList = DateRegexList.Concat(enableDmy ? + new[] { dateRegex5, dateRegex8, dateRegex9L, dateRegex9S, dateRegex4, dateRegex6, dateRegex7L, dateRegex7S, dateRegexA } : + new[] { dateRegex4, dateRegex6, dateRegex7L, dateRegex7S, dateRegex5, dateRegex8, dateRegex9L, dateRegex9S, dateRegexA }); + } + + public IEnumerable DateRegexList { get; } + + public IExtractor IntegerExtractor { get; } + + public IExtractor OrdinalExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IEnumerable ImplicitDateList { get; } + + IImmutableDictionary IDateExtractorConfiguration.DayOfWeek => DayOfWeek; + + IImmutableDictionary IDateExtractorConfiguration.MonthOfYear => MonthOfYear; + + bool IDateExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex IDateExtractorConfiguration.OfMonth => OfMonth; + + Regex IDateExtractorConfiguration.MonthEnd => MonthEnd; + + Regex IDateExtractorConfiguration.WeekDayEnd => WeekDayEnd; + + Regex IDateExtractorConfiguration.WeekDayStart => WeekDayStart; + + Regex IDateExtractorConfiguration.DateUnitRegex => DateUnitRegex; + + Regex IDateExtractorConfiguration.ForTheRegex => ForTheRegex; + + Regex IDateExtractorConfiguration.WeekDayAndDayOfMonthRegex => WeekDayAndDayOfMothRegex; + + Regex IDateExtractorConfiguration.WeekDayAndDayRegex => WeekDayAndDayRegex; + + Regex IDateExtractorConfiguration.RelativeMonthRegex => RelativeMonthRegex; + + Regex IDateExtractorConfiguration.StrictRelativeRegex => StrictRelativeRegex; + + Regex IDateExtractorConfiguration.WeekDayRegex => WeekDayRegex; + + Regex IDateExtractorConfiguration.PrefixArticleRegex => PrefixArticleRegex; + + Regex IDateExtractorConfiguration.YearSuffix => YearSuffix; + + Regex IDateExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDateExtractorConfiguration.MoreThanRegex => MoreThanRegex; + + Regex IDateExtractorConfiguration.InConnectorRegex => InConnectorRegex; + + Regex IDateExtractorConfiguration.SinceYearSuffixRegex => SinceYearSuffixRegex; + + Regex IDateExtractorConfiguration.RangeUnitRegex => RangeUnitRegex; + + Regex IDateExtractorConfiguration.RangeConnectorSymbolRegex => RangeConnectorSymbolRegex; + + Regex IDateExtractorConfiguration.BeforeAfterRegex => BeforeAfterRegex; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDatePeriodExtractorConfiguration.cs new file mode 100644 index 0000000000..5d0a9d5671 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDatePeriodExtractorConfiguration.cs @@ -0,0 +1,382 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Hindi; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDatePeriodExtractorConfiguration + { + // Base regexes + public static readonly Regex TillRegex = + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DayRegex = + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthNumRegex = + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex IllegalYearRegex = + new Regex(BaseDateTime.IllegalYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayRegex = + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeMonthRegex = + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WrittenMonthRegex = + new Regex(DateTimeDefinitions.WrittenMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthSuffixRegex = + new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DateUnitRegex = + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeUnitRegex = + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PreviousPrefixRegex = + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NextPrefixRegex = + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FutureSuffixRegex = + new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NowRegex = + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); + + // composite regexes + public static readonly Regex SimpleCasesRegex = + new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NumberBeforeWeekRegex = + new Regex(DateTimeDefinitions.NumberBeforeWeekRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthFrontSimpleCasesRegex = + new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthFrontBetweenRegex = + new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BetweenRegex = + new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthWithYear = + new Regex(DateTimeDefinitions.MonthWithYear, RegexFlags, RegexTimeOut); + + public static readonly Regex OneWordPeriodRegex = + new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthNumWithYear = + new Regex(DateTimeDefinitions.MonthNumWithYear, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekOfMonthRegex = + new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekOfYearRegex = + new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FollowedDateUnit = + new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex NumberCombinedWithDateUnit = + new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex QuarterRegex = + new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex QuarterRegexYearFront = + new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags, RegexTimeOut); + + public static readonly Regex AllHalfYearRegex = + new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SeasonRegex = + new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WhichWeekRegex = + new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekOfRegex = + new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthOfRegex = + new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangeUnitRegex = + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex InConnectorRegex = + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WithinNextPrefixRegex = + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RestOfDateRegex = + new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LaterEarlyPeriodRegex = + new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekWithWeekDayRangeRegex = + new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearPlusNumberRegex = + new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DecadeWithCenturyRegex = + new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearPeriodRegex = + new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ComplexDatePeriodRegex = + new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeDecadeRegex = + new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ReferenceDatePeriodRegex = + new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AgoRegex = + new Regex(DateTimeDefinitions.AgoRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LaterRegex = + new Regex(DateTimeDefinitions.LaterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LessThanRegex = + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MoreThanRegex = + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex CenturySuffixRegex = + new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FirstLastRegex = + new Regex(DateTimeDefinitions.FirstLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OfYearRegex = + new Regex(DateTimeDefinitions.OfYearRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex FromTokenRegex = + new Regex(DateTimeDefinitions.FromTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex RangePrefixRegex = + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex[] SimpleCasesRegexes = + { + // "3-5 Jan, 2018", + SimpleCasesRegex, + + // "18 तारीख वाली सप्ताह" + NumberBeforeWeekRegex, + + // "between 3 and 5 Jan, 2018" + BetweenRegex, + + // "next april", "year to date", "previous year" + OneWordPeriodRegex, + + // "January, 2018", "this year Feb" + MonthWithYear, + + // "2018-3", "2018.3", "5-2015", only FourDigitYear is allow in this Regex + MonthNumWithYear, + + // "2018", "two thousand and ten" + YearRegex, + + // "4th week of Feb" + WeekOfMonthRegex, + + // "3rd week of 2018", "4th week last year" + WeekOfYearRegex, + + // "Jan between 8-10" + MonthFrontBetweenRegex, + + // "from Jan 5th-10th", "Feb from 5-10" + MonthFrontSimpleCasesRegex, + + // "Q1 2018", "2nd quarter" + QuarterRegex, + + // "2016 Q1", "last year the 4th quarter" + QuarterRegexYearFront, + + // "2015 the H1", "H2 of 2016", "1st half 2018", "2nd half this year" + AllHalfYearRegex, + + // "last summer", "fall of 2018", "early this summer" + SeasonRegex, + + // "week 25", "week 06" + WhichWeekRegex, + + // "rest of this week", "rest of current year" + RestOfDateRegex, + + // "early this year", "late next April" + LaterEarlyPeriodRegex, + + // "this week between Mon and Wed", "next week from Tuesday to Wednesday" + WeekWithWeekDayRangeRegex, + + // "year 834", "two thousand and nine" + YearPlusNumberRegex, + + // "21st century 30's" + DecadeWithCenturyRegex, + + // "next five decades", "previous 2 decades" + RelativeDecadeRegex, + + // "this week", "same year" + ReferenceDatePeriodRegex, + }; + + public HindiDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DatePointExtractor = new BaseDateExtractor(new HindiDateExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new HindiDurationExtractorConfiguration(this)); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Hindi.CardinalExtractor.GetInstance(); + OrdinalExtractor = Number.Hindi.OrdinalExtractor.GetInstance(); + + NumberParser = new BaseNumberParser(new HindiNumberParserConfiguration(numConfig)); + } + + public IDateExtractor DatePointExtractor { get; } + + public IExtractor CardinalExtractor { get; } + + public IExtractor OrdinalExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IParser NumberParser { get; } + + IEnumerable IDatePeriodExtractorConfiguration.SimpleCasesRegexes => SimpleCasesRegexes; + + Regex IDatePeriodExtractorConfiguration.IllegalYearRegex => IllegalYearRegex; + + Regex IDatePeriodExtractorConfiguration.YearRegex => YearRegex; + + Regex IDatePeriodExtractorConfiguration.TillRegex => TillRegex; + + Regex IDatePeriodExtractorConfiguration.FollowedDateUnit => FollowedDateUnit; + + Regex IDatePeriodExtractorConfiguration.DateUnitRegex => DateUnitRegex; + + Regex IDatePeriodExtractorConfiguration.TimeUnitRegex => TimeUnitRegex; + + Regex IDatePeriodExtractorConfiguration.NumberCombinedWithDateUnit => NumberCombinedWithDateUnit; + + Regex IDatePeriodExtractorConfiguration.PreviousPrefixRegex => PreviousPrefixRegex; + + Regex IDatePeriodExtractorConfiguration.FutureRegex => NextPrefixRegex; + + Regex IDatePeriodExtractorConfiguration.FutureSuffixRegex => FutureSuffixRegex; + + Regex IDatePeriodExtractorConfiguration.WeekOfRegex => WeekOfRegex; + + Regex IDatePeriodExtractorConfiguration.MonthOfRegex => MonthOfRegex; + + Regex IDatePeriodExtractorConfiguration.RangeUnitRegex => RangeUnitRegex; + + Regex IDatePeriodExtractorConfiguration.InConnectorRegex => InConnectorRegex; + + Regex IDatePeriodExtractorConfiguration.WithinNextPrefixRegex => WithinNextPrefixRegex; + + Regex IDatePeriodExtractorConfiguration.YearPeriodRegex => YearPeriodRegex; + + Regex IDatePeriodExtractorConfiguration.ComplexDatePeriodRegex => ComplexDatePeriodRegex; + + Regex IDatePeriodExtractorConfiguration.RelativeDecadeRegex => RelativeDecadeRegex; + + Regex IDatePeriodExtractorConfiguration.ReferenceDatePeriodRegex => ReferenceDatePeriodRegex; + + Regex IDatePeriodExtractorConfiguration.AgoRegex => AgoRegex; + + Regex IDatePeriodExtractorConfiguration.LaterRegex => LaterRegex; + + Regex IDatePeriodExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDatePeriodExtractorConfiguration.MoreThanRegex => MoreThanRegex; + + Regex IDatePeriodExtractorConfiguration.CenturySuffixRegex => CenturySuffixRegex; + + Regex IDatePeriodExtractorConfiguration.MonthNumRegex => MonthNumRegex; + + Regex IDatePeriodExtractorConfiguration.NowRegex => NowRegex; + + Regex IDatePeriodExtractorConfiguration.FirstLastRegex => FirstLastRegex; + + Regex IDatePeriodExtractorConfiguration.OfYearRegex => OfYearRegex; + + bool IDatePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + string[] IDatePeriodExtractorConfiguration.DurationDateRestrictions => DateTimeDefinitions.DurationDateRestrictions; + + public bool GetFromTokenIndex(string text, out int index) + { + index = -1; + var fromMatch = FromTokenRegex.Match(text); + if (fromMatch.Success) + { + index = fromMatch.Index; + } + + return fromMatch.Success; + } + + public bool GetBetweenTokenIndex(string text, out int index) + { + index = -1; + var betweenMatch = RangePrefixRegex.MatchBegin(text, trim: true); + if (betweenMatch.Success) + { + index = betweenMatch.Index + betweenMatch.Length; + } + + return betweenMatch.Success; + } + + public bool HasConnectorToken(string text) + { + return RangeConnectorRegex.IsExactMatch(text, trim: true); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateTimeAltExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateTimeAltExtractorConfiguration.cs new file mode 100644 index 0000000000..9572d3ae5f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateTimeAltExtractorConfiguration.cs @@ -0,0 +1,71 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Hindi; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeAltExtractorConfiguration + { + public static readonly Regex ThisPrefixRegex = + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PreviousPrefixRegex = + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NextPrefixRegex = + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmRegex = + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PmRegex = + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangePrefixRegex = + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex[] RelativePrefixList = + { + ThisPrefixRegex, PreviousPrefixRegex, NextPrefixRegex, + }; + + public static readonly Regex[] AmPmRegexList = + { + AmRegex, PmRegex, + }; + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex OrRegex = + new Regex(DateTimeDefinitions.OrRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex DayRegex = + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); + + public HindiDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DateExtractor = new BaseDateExtractor(new HindiDateExtractorConfiguration(this)); + DatePeriodExtractor = new BaseDatePeriodExtractor(new HindiDatePeriodExtractorConfiguration(this)); + } + + IEnumerable IDateTimeAltExtractorConfiguration.RelativePrefixList => RelativePrefixList; + + IEnumerable IDateTimeAltExtractorConfiguration.AmPmRegexList => AmPmRegexList; + + Regex IDateTimeAltExtractorConfiguration.OrRegex => OrRegex; + + Regex IDateTimeAltExtractorConfiguration.ThisPrefixRegex => ThisPrefixRegex; + + Regex IDateTimeAltExtractorConfiguration.DayRegex => DayRegex; + + Regex IDateTimeAltExtractorConfiguration.RangePrefixRegex => RangePrefixRegex; + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor DatePeriodExtractor { get; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateTimeExtractorConfiguration.cs new file mode 100644 index 0000000000..c72e32ac49 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateTimeExtractorConfiguration.cs @@ -0,0 +1,129 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Text.DateTime.Hindi.Utilities; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiDateTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeExtractorConfiguration + { + public static readonly Regex PrepositionRegex = + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NowRegex = + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SuffixRegex = + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeOfDayRegex = + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificTimeOfDayRegex = + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeOfTodayAfterRegex = + new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeOfTodayBeforeRegex = + new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SimpleTimeOfTodayAfterRegex = + new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SimpleTimeOfTodayBeforeRegex = + new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificEndOfRegex = + new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex UnspecificEndOfRegex = + new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex UnitRegex = + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ConnectorRegex = + new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NumberAsTimeRegex = + new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DateNumberConnectorRegex = + new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearSuffix = + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); + + public static readonly Regex SuffixAfterRegex = + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public HindiDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + IntegerExtractor = Number.Hindi.IntegerExtractor.GetInstance(); + DatePointExtractor = new BaseDateExtractor(new HindiDateExtractorConfiguration(this)); + TimePointExtractor = new BaseTimeExtractor(new HindiTimeExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new HindiDurationExtractorConfiguration(this)); + UtilityConfiguration = new HindiDatetimeUtilityConfiguration(); + HolidayExtractor = new BaseHolidayExtractor(new HindiHolidayExtractorConfiguration(this)); + + } + + public IExtractor IntegerExtractor { get; } + + public IDateExtractor DatePointExtractor { get; } + + public IDateTimeExtractor TimePointExtractor { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + Regex IDateTimeExtractorConfiguration.NowRegex => NowRegex; + + Regex IDateTimeExtractorConfiguration.SuffixRegex => SuffixRegex; + + Regex IDateTimeExtractorConfiguration.TimeOfTodayAfterRegex => TimeOfTodayAfterRegex; + + Regex IDateTimeExtractorConfiguration.SimpleTimeOfTodayAfterRegex => SimpleTimeOfTodayAfterRegex; + + Regex IDateTimeExtractorConfiguration.TimeOfTodayBeforeRegex => TimeOfTodayBeforeRegex; + + Regex IDateTimeExtractorConfiguration.SimpleTimeOfTodayBeforeRegex => SimpleTimeOfTodayBeforeRegex; + + Regex IDateTimeExtractorConfiguration.TimeOfDayRegex => TimeOfDayRegex; + + Regex IDateTimeExtractorConfiguration.SpecificEndOfRegex => SpecificEndOfRegex; + + Regex IDateTimeExtractorConfiguration.UnspecificEndOfRegex => UnspecificEndOfRegex; + + Regex IDateTimeExtractorConfiguration.UnitRegex => UnitRegex; + + Regex IDateTimeExtractorConfiguration.NumberAsTimeRegex => NumberAsTimeRegex; + + Regex IDateTimeExtractorConfiguration.DateNumberConnectorRegex => DateNumberConnectorRegex; + + Regex IDateTimeExtractorConfiguration.YearRegex => YearRegex; + + Regex IDateTimeExtractorConfiguration.YearSuffix => YearSuffix; + + Regex IDateTimeExtractorConfiguration.SuffixAfterRegex => SuffixAfterRegex; + + public IDateTimeExtractor DurationExtractor { get; } + + public bool IsConnector(string text) + { + text = text.Trim(); + return string.IsNullOrEmpty(text) || PrepositionRegex.IsMatch(text) || ConnectorRegex.IsMatch(text); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateTimePeriodExtractorConfiguration.cs new file mode 100644 index 0000000000..b9f482232e --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateTimePeriodExtractorConfiguration.cs @@ -0,0 +1,208 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimePeriodExtractorConfiguration + { + public static readonly Regex TimeNumberCombinedWithUnit = + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex HyphenDateRegex = + new Regex(BaseDateTime.HyphenDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodTimeOfDayWithDateRegex = + new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeTimeUnitRegex = + new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RestOfDateTimeRegex = + new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmDescRegex = + new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PmDescRegex = + new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WithinNextPrefixRegex = + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DateUnitRegex = + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PrefixDayRegex = + new Regex(DateTimeDefinitions.PrefixDayRegex, RegexFlags | RegexOptions.RightToLeft); + + public static readonly Regex SuffixRegex = + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BeforeRegex = + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfterRegex = + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDaysRegex = + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodSpecificTimeOfDayRegex = + new Regex(DateTimeDefinitions.PeriodSpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex[] SimpleCases = + { + HindiTimePeriodExtractorConfiguration.PureNumFromTo, + HindiTimePeriodExtractorConfiguration.PureNumBetweenAnd, + }; + + private static readonly Regex PeriodTimeOfDayRegex = + new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex TimeUnitRegex = + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex TimeFollowedUnit = + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); + + private static readonly Regex GeneralEndingRegex = + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex MiddlePauseRegex = + new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex FromRegex = + new Regex(DateTimeDefinitions.FromTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex BetweenRegex = + new Regex(DateTimeDefinitions.BetweenTokenRegex, RegexFlags, RegexTimeOut); + + public HindiDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + + CardinalExtractor = Number.Hindi.CardinalExtractor.GetInstance(); + SingleDateExtractor = new BaseDateExtractor(new HindiDateExtractorConfiguration(this)); + SingleTimeExtractor = new BaseTimeExtractor(new HindiTimeExtractorConfiguration(this)); + SingleDateTimeExtractor = new BaseDateTimeExtractor(new HindiDateTimeExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new HindiDurationExtractorConfiguration(this)); + TimePeriodExtractor = new BaseTimePeriodExtractor(new HindiTimePeriodExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new HindiHolidayExtractorConfiguration(this)); + + } + + public IEnumerable SimpleCasesRegex => SimpleCases; + + public Regex PrepositionRegex => HindiTimePeriodExtractorConfiguration.PrepositionRegex; + + public Regex TillRegex => HindiTimePeriodExtractorConfiguration.TillRegex; + + public Regex TimeOfDayRegex => PeriodTimeOfDayRegex; + + public Regex SpecificTimeOfDayRegex => PeriodSpecificTimeOfDayRegex; + + public Regex PreviousPrefixRegex => HindiDatePeriodExtractorConfiguration.PreviousPrefixRegex; + + public Regex NextPrefixRegex => HindiDatePeriodExtractorConfiguration.NextPrefixRegex; + + public Regex FutureSuffixRegex => HindiDatePeriodExtractorConfiguration.FutureSuffixRegex; + + public Regex WeekDayRegex => WeekDaysRegex; + + public Regex FollowedUnit => TimeFollowedUnit; + + bool IDateTimePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex IDateTimePeriodExtractorConfiguration.PrefixDayRegex => PrefixDayRegex; + + Regex IDateTimePeriodExtractorConfiguration.DateUnitRegex => DateUnitRegex; + + Regex IDateTimePeriodExtractorConfiguration.NumberCombinedWithUnit => TimeNumberCombinedWithUnit; + + Regex IDateTimePeriodExtractorConfiguration.TimeUnitRegex => TimeUnitRegex; + + Regex IDateTimePeriodExtractorConfiguration.RelativeTimeUnitRegex => RelativeTimeUnitRegex; + + Regex IDateTimePeriodExtractorConfiguration.RestOfDateTimeRegex => RestOfDateTimeRegex; + + Regex IDateTimePeriodExtractorConfiguration.GeneralEndingRegex => GeneralEndingRegex; + + Regex IDateTimePeriodExtractorConfiguration.MiddlePauseRegex => MiddlePauseRegex; + + Regex IDateTimePeriodExtractorConfiguration.PeriodTimeOfDayWithDateRegex => PeriodTimeOfDayWithDateRegex; + + Regex IDateTimePeriodExtractorConfiguration.AmDescRegex => AmDescRegex; + + Regex IDateTimePeriodExtractorConfiguration.PmDescRegex => PmDescRegex; + + Regex IDateTimePeriodExtractorConfiguration.WithinNextPrefixRegex => WithinNextPrefixRegex; + + Regex IDateTimePeriodExtractorConfiguration.SuffixRegex => SuffixRegex; + + Regex IDateTimePeriodExtractorConfiguration.BeforeRegex => BeforeRegex; + + Regex IDateTimePeriodExtractorConfiguration.AfterRegex => AfterRegex; + + Regex IDateTimePeriodExtractorConfiguration.TasksmodeMealTimeofDayRegex => null; + + public string TokenBeforeDate { get; } + + public IExtractor CardinalExtractor { get; } + + public IDateTimeExtractor SingleDateExtractor { get; } + + public IDateTimeExtractor SingleTimeExtractor { get; } + + public IDateTimeExtractor SingleDateTimeExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IDateTimeExtractor TimeZoneExtractor { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + // TODO: these three methods are the same in DatePeriod, should be abstracted + public bool GetFromTokenIndex(string text, out int index) + { + index = -1; + var fromMatch = FromRegex.Match(text); + if (fromMatch.Success) + { + index = fromMatch.Index; + } + + return fromMatch.Success; + } + + public bool GetBetweenTokenIndex(string text, out int index) + { + index = -1; + var betweenMatch = BetweenRegex.Match(text); + if (betweenMatch.Success) + { + index = betweenMatch.Index; + } + + return betweenMatch.Success; + } + + public bool HasConnectorToken(string text) + { + var rangeConnetorRegex = new Regex(DateTimeDefinitions.RangeConnectorRegex); + + return rangeConnetorRegex.IsExactMatch(text, trim: true); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDurationExtractorConfiguration.cs new file mode 100644 index 0000000000..ed394011f5 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDurationExtractorConfiguration.cs @@ -0,0 +1,123 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Hindi; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiDurationExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDurationExtractorConfiguration + { + public static readonly Regex DurationUnitRegex = + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SuffixAndRegex = + new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DurationFollowedUnit = + new Regex(DateTimeDefinitions.DurationFollowedUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex NumberCombinedWithDurationUnit = + new Regex(DateTimeDefinitions.NumberCombinedWithDurationUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex AnUnitRegex = + new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DuringRegex = + new Regex(DateTimeDefinitions.DuringRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AllRegex = + new Regex(DateTimeDefinitions.AllRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex HalfRegex = + new Regex(DateTimeDefinitions.HalfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ConjunctionRegex = + new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex InexactNumberRegex = + new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex InexactNumberUnitRegex = + new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeDurationUnitRegex = + new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DurationConnectorRegex = + new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModPrefixRegex = + new Regex(DateTimeDefinitions.ModPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModSuffixRegex = + new Regex(DateTimeDefinitions.ModSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialNumberUnitRegex = null; + + public static readonly Regex MoreThanRegex = + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags | RegexOptions.RightToLeft); + + public static readonly Regex LessThanRegex = + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags | RegexOptions.RightToLeft); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public HindiDurationExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + CardinalExtractor = Number.Hindi.CardinalExtractor.GetInstance(); + UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); + UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); + } + + public IExtractor CardinalExtractor { get; } + + public IImmutableDictionary UnitMap { get; } + + public IImmutableDictionary UnitValueMap { get; } + + bool IDurationExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex IDurationExtractorConfiguration.FollowedUnit => DurationFollowedUnit; + + Regex IDurationExtractorConfiguration.NumberCombinedWithUnit => NumberCombinedWithDurationUnit; + + Regex IDurationExtractorConfiguration.AnUnitRegex => AnUnitRegex; + + Regex IDurationExtractorConfiguration.DuringRegex => DuringRegex; + + Regex IDurationExtractorConfiguration.AllRegex => AllRegex; + + Regex IDurationExtractorConfiguration.HalfRegex => HalfRegex; + + Regex IDurationExtractorConfiguration.SuffixAndRegex => SuffixAndRegex; + + Regex IDurationExtractorConfiguration.ConjunctionRegex => ConjunctionRegex; + + Regex IDurationExtractorConfiguration.InexactNumberRegex => InexactNumberRegex; + + Regex IDurationExtractorConfiguration.InexactNumberUnitRegex => InexactNumberUnitRegex; + + Regex IDurationExtractorConfiguration.RelativeDurationUnitRegex => RelativeDurationUnitRegex; + + Regex IDurationExtractorConfiguration.DurationUnitRegex => DurationUnitRegex; + + Regex IDurationExtractorConfiguration.DurationConnectorRegex => DurationConnectorRegex; + + Regex IDurationExtractorConfiguration.SpecialNumberUnitRegex => SpecialNumberUnitRegex; + + Regex IDurationExtractorConfiguration.MoreThanRegex => MoreThanRegex; + + Regex IDurationExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex; + + Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex; + + public Dictionary AmbiguityFiltersDict => null; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiHolidayExtractorConfiguration.cs new file mode 100644 index 0000000000..4d160d9ad0 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiHolidayExtractorConfiguration.cs @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Hindi; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiHolidayExtractorConfiguration : BaseDateTimeOptionsConfiguration, IHolidayExtractorConfiguration + { + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex H1 = + new Regex(DateTimeDefinitions.HolidayRegex1, RegexFlags, RegexTimeOut); + + public static readonly Regex H2 = + new Regex(DateTimeDefinitions.HolidayRegex2, RegexFlags, RegexTimeOut); + + public static readonly Regex H3 = + new Regex(DateTimeDefinitions.HolidayRegex3, RegexFlags, RegexTimeOut); + + public static readonly Regex[] HolidayRegexList = + { + H1, + H2, + H3, + }; + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public HindiHolidayExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + } + + public IEnumerable HolidayRegexes => HolidayRegexList; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiMergedExtractorConfiguration.cs new file mode 100644 index 0000000000..54cf74443f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiMergedExtractorConfiguration.cs @@ -0,0 +1,159 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Definitions.Utilities; +using Microsoft.Recognizers.Text.Matcher; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiMergedExtractorConfiguration : BaseDateTimeOptionsConfiguration, IMergedExtractorConfiguration + { + public static readonly Regex BeforeRegex = + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfterRegex = + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SinceRegex = + new Regex(DateTimeDefinitions.SinceRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AroundRegex = + new Regex(DateTimeDefinitions.AroundRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EqualRegex = + new Regex(BaseDateTime.EqualRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FromToRegex = + new Regex(DateTimeDefinitions.FromToRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SingleAmbiguousMonthRegex = + new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PrepositionSuffixRegex = + new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmbiguousRangeModifierPrefix = + new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags, RegexTimeOut); + + public static readonly Regex NumberEndingPattern = + new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags, RegexTimeOut); + + public static readonly Regex SuffixAfterRegex = + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex UnspecificDatePeriodRegex = + new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FailFastRegex = + new Regex(DateTimeDefinitions.FailFastRegex, RegexFlags | RegexOptions.Compiled); + + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex[] TermFilterRegexes = + { + // one on one + new Regex(DateTimeDefinitions.OneOnOneRegex, RegexFlags, RegexTimeOut), + + // (the)? (day|week|month|year) + new Regex(DateTimeDefinitions.SingleAmbiguousTermsRegex, RegexFlags, RegexTimeOut), + }; + + public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public HindiMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DateExtractor = new BaseDateExtractor(new HindiDateExtractorConfiguration(this)); + TimeExtractor = new BaseTimeExtractor(new HindiTimeExtractorConfiguration(this)); + DateTimeExtractor = new BaseDateTimeExtractor(new HindiDateTimeExtractorConfiguration(this)); + DatePeriodExtractor = new BaseDatePeriodExtractor(new HindiDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseTimePeriodExtractor(new HindiTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new HindiDateTimePeriodExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new HindiDurationExtractorConfiguration(this)); + SetExtractor = new BaseSetExtractor(new HindiSetExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new HindiHolidayExtractorConfiguration(this)); + DateTimeAltExtractor = new BaseDateTimeAltExtractor(new HindiDateTimeAltExtractorConfiguration(this)); + IntegerExtractor = Number.Hindi.IntegerExtractor.GetInstance(); + + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); + + if ((Options & DateTimeOptions.EnablePreview) != 0) + { + SuperfluousWordMatcher.Init(DateTimeDefinitions.SuperfluousWordList); + } + } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeExtractor DatePeriodExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IDateTimeExtractor DateTimePeriodExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor SetExtractor { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeZoneExtractor TimeZoneExtractor { get; } + + public IDateTimeListExtractor DateTimeAltExtractor { get; } + + public IExtractor IntegerExtractor { get; } + + public Dictionary AmbiguityFiltersDict { get; } + + Regex IMergedExtractorConfiguration.AfterRegex => AfterRegex; + + Regex IMergedExtractorConfiguration.BeforeRegex => BeforeRegex; + + Regex IMergedExtractorConfiguration.SinceRegex => SinceRegex; + + Regex IMergedExtractorConfiguration.AroundRegex => AroundRegex; + + Regex IMergedExtractorConfiguration.EqualRegex => EqualRegex; + + Regex IMergedExtractorConfiguration.FromToRegex => FromToRegex; + + Regex IMergedExtractorConfiguration.SingleAmbiguousMonthRegex => SingleAmbiguousMonthRegex; + + Regex IMergedExtractorConfiguration.PrepositionSuffixRegex => PrepositionSuffixRegex; + + Regex IMergedExtractorConfiguration.AmbiguousRangeModifierPrefix => AmbiguousRangeModifierPrefix; + + Regex IMergedExtractorConfiguration.PotentialAmbiguousRangeRegex => FromToRegex; + + Regex IMergedExtractorConfiguration.NumberEndingPattern => NumberEndingPattern; + + Regex IMergedExtractorConfiguration.SuffixAfterRegex => SuffixAfterRegex; + + Regex IMergedExtractorConfiguration.UnspecificDatePeriodRegex => UnspecificDatePeriodRegex; + + Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + + Regex IMergedExtractorConfiguration.FailFastRegex => FailFastRegex; + + IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; + + StringMatcher IMergedExtractorConfiguration.SuperfluousWordMatcher => SuperfluousWordMatcher; + + bool IMergedExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public Regex TasksModeMentionFilters { get; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiSetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiSetExtractorConfiguration.cs new file mode 100644 index 0000000000..a3fa0f87d7 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiSetExtractorConfiguration.cs @@ -0,0 +1,89 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiSetExtractorConfiguration : BaseDateTimeOptionsConfiguration, ISetExtractorConfiguration + { + public static readonly Regex SetUnitRegex = + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodicRegex = + new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachUnitRegex = + new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachPrefixRegex = + new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SetLastRegex = + new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachDayRegex = + new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SetWeekDayRegex = + new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SetEachRegex = + new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex EachDayRegexPrefix = + new Regex(DateTimeDefinitions.EachDayRegexPrefix, RegexFlags, RegexTimeOut); + + public HindiSetExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DurationExtractor = new BaseDurationExtractor(new HindiDurationExtractorConfiguration(this)); + TimeExtractor = new BaseTimeExtractor(new HindiTimeExtractorConfiguration(this)); + DateExtractor = new BaseDateExtractor(new HindiDateExtractorConfiguration(this)); + DateTimeExtractor = new BaseDateTimeExtractor(new HindiDateTimeExtractorConfiguration(this)); + DatePeriodExtractor = new BaseDatePeriodExtractor(new HindiDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseTimePeriodExtractor(new HindiTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new HindiDateTimePeriodExtractorConfiguration(this)); + } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeExtractor DatePeriodExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IDateTimeExtractor DateTimePeriodExtractor { get; } + + bool ISetExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex ISetExtractorConfiguration.LastRegex => SetLastRegex; + + Regex ISetExtractorConfiguration.EachPrefixRegex => EachPrefixRegex; + + Regex ISetExtractorConfiguration.PeriodicRegex => PeriodicRegex; + + Regex ISetExtractorConfiguration.EachUnitRegex => EachUnitRegex; + + Regex ISetExtractorConfiguration.EachDayRegex => EachDayRegex; + + Regex ISetExtractorConfiguration.BeforeEachDayRegex => EachDayRegexPrefix; + + Regex ISetExtractorConfiguration.SetWeekDayRegex => SetWeekDayRegex; + + Regex ISetExtractorConfiguration.SetEachRegex => SetEachRegex; + + public Tuple WeekDayGroupMatchTuple(Match match) => SetHandler.WeekDayGroupMatchTuple(match); + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiTimeExtractorConfiguration.cs new file mode 100644 index 0000000000..81a31ee7d5 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiTimeExtractorConfiguration.cs @@ -0,0 +1,153 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Hindi; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, ITimeExtractorConfiguration + { + // part 1: smallest component + // -------------------------------------- + public static readonly Regex DescRegex = + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex HourNumRegex = + new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MinuteNumRegex = + new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags, RegexTimeOut); + + // part 2: middle level component + // -------------------------------------- + // handle "... o'clock" + public static readonly Regex OclockRegex = + new Regex(DateTimeDefinitions.OclockRegex, RegexFlags, RegexTimeOut); + + // handle "... afternoon" + public static readonly Regex PmRegex = + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); + + // handle "... in the morning" + public static readonly Regex AmRegex = + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); + + // handle "half past ..." "a quarter to ..." + // rename 'min' group to 'deltamin' + public static readonly Regex LessThanOneHour = + new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags, RegexTimeOut); + + // handle "six thirty", "six twenty one" + public static readonly Regex WrittenTimeRegex = + new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimePrefix = + new Regex(DateTimeDefinitions.TimePrefix, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeSuffix = + new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags, RegexTimeOut); + + public static readonly Regex BasicTime = + new Regex(DateTimeDefinitions.BasicTime, RegexFlags, RegexTimeOut); + + // handle special time such as 'at midnight', 'midnight', 'midday' + public static readonly Regex MidnightRegex = + new Regex(DateTimeDefinitions.MidnightRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MidmorningRegex = + new Regex(DateTimeDefinitions.MidmorningRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MidafternoonRegex = + new Regex(DateTimeDefinitions.MidafternoonRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MiddayRegex = + new Regex(DateTimeDefinitions.MiddayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MidTimeRegex = + new Regex(DateTimeDefinitions.MidTimeRegex, RegexFlags, RegexTimeOut); + + // part 3: regex for time + // -------------------------------------- + // handle "at four" "at 3" + public static readonly Regex AtRegex = + new Regex(DateTimeDefinitions.AtRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex IshRegex = + new Regex(DateTimeDefinitions.IshRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeUnitRegex = + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ConnectNumRegex = + new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeBeforeAfterRegex = + new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex[] TimeRegexList = + { + // (three min past)? seven|7|(seven thirty) pm + new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags, RegexTimeOut), + + // (three min past)? 3:00(:00)? (pm)? + new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags, RegexTimeOut), + + // (three min past)? 3.00 (pm) + new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags, RegexTimeOut), + + // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) + new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags, RegexTimeOut), + + // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? + new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags, RegexTimeOut), + + // (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) + new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags, RegexTimeOut), + + // (in the night) at? (five thirty|seven|7|7:00(:00)?) (pm)? + new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags, RegexTimeOut), + + // (?<=to) 4 + new Regex(DateTimeDefinitions.TimeRegex8, RegexFlags, RegexTimeOut), + + new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags, RegexTimeOut), + + // (three min past)? 3h00 (pm)? + new Regex(DateTimeDefinitions.TimeRegex10, RegexFlags, RegexTimeOut), + + // at 2.30, "at" prefix is required here + // 3.30pm, "am/pm" suffix is required here + new Regex(DateTimeDefinitions.TimeRegex11, RegexFlags, RegexTimeOut), + + // 340pm + ConnectNumRegex, + }; + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public HindiTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DurationExtractor = new BaseDurationExtractor(new HindiDurationExtractorConfiguration(this)); + } + + IEnumerable ITimeExtractorConfiguration.TimeRegexList => TimeRegexList; + + Regex ITimeExtractorConfiguration.AtRegex => AtRegex; + + Regex ITimeExtractorConfiguration.IshRegex => IshRegex; + + Regex ITimeExtractorConfiguration.TimeBeforeAfterRegex => TimeBeforeAfterRegex; + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor TimeZoneExtractor { get; } + + public string TimeTokenPrefix => DateTimeDefinitions.TimeTokenPrefix; + + public Dictionary AmbiguityFiltersDict => null; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiTimePeriodExtractorConfiguration.cs new file mode 100644 index 0000000000..eccab7e821 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiTimePeriodExtractorConfiguration.cs @@ -0,0 +1,146 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Text.DateTime.Hindi.Utilities; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, ITimePeriodExtractorConfiguration + { + public static readonly Regex TillRegex = + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex HourRegex = + new Regex(DateTimeDefinitions.HourRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodHourNumRegex = + new Regex(DateTimeDefinitions.PeriodHourNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodDescRegex = + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PmRegex = + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmRegex = + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PureNumFromTo = + new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags, RegexTimeOut); + + public static readonly Regex PureNumBetweenAnd = + new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificTimeFromTo = + new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificTimeBetweenAnd = + new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags, RegexTimeOut); + + public static readonly Regex PrepositionRegex = + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeOfDayRegex = + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificTimeOfDayRegex = + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeUnitRegex = + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeFollowedUnit = + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeNumberCombinedWithUnit = + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex GeneralEndingRegex = + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex FromRegex = + new Regex(DateTimeDefinitions.FromTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex BetweenRegex = + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); + + public HindiTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + SingleTimeExtractor = new BaseTimeExtractor(new HindiTimeExtractorConfiguration(this)); + UtilityConfiguration = new HindiDatetimeUtilityConfiguration(); + IntegerExtractor = Number.Hindi.IntegerExtractor.GetInstance(); + } + + public string TokenBeforeDate { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IDateTimeExtractor SingleTimeExtractor { get; } + + public IDateTimeExtractor TimeZoneExtractor { get; } + + public IExtractor IntegerExtractor { get; } + + public IEnumerable SimpleCasesRegex => new[] + { + PureNumFromTo, PureNumBetweenAnd, SpecificTimeFromTo, SpecificTimeBetweenAnd, + }; + + public IEnumerable PureNumberRegex => new[] + { + PureNumFromTo, PureNumBetweenAnd, + }; + + bool ITimePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex ITimePeriodExtractorConfiguration.TillRegex => TillRegex; + + Regex ITimePeriodExtractorConfiguration.TimeOfDayRegex => TimeOfDayRegex; + + Regex ITimePeriodExtractorConfiguration.GeneralEndingRegex => GeneralEndingRegex; + + public bool GetFromTokenIndex(string text, out int index) + { + index = -1; + var fromMatch = FromRegex.Match(text); + if (fromMatch.Success) + { + index = fromMatch.Index; + } + + return fromMatch.Success; + } + + public bool GetBetweenTokenIndex(string text, out int index) + { + index = -1; + var betweenMatch = BetweenRegex.Match(text); + if (betweenMatch.Success) + { + index = betweenMatch.Index + betweenMatch.Length; + } + + return betweenMatch.Success; + } + + public bool IsConnectorToken(string text) + { + return RangeConnectorRegex.IsExactMatch(text, trim: true); + } + + public List ApplyPotentialPeriodAmbiguityHotfix(string text, List timePeriodErs) => TimePeriodFunctions.ApplyPotentialPeriodAmbiguityHotfix(text, timePeriodErs); + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiCommonDateTimeParserConfiguration.cs new file mode 100644 index 0000000000..4935b72540 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiCommonDateTimeParserConfiguration.cs @@ -0,0 +1,69 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Text.DateTime.Hindi.Utilities; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Hindi; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiCommonDateTimeParserConfiguration : BaseDateParserConfiguration, ICommonDateTimeParserConfiguration + { + public HindiCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + UtilityConfiguration = new HindiDatetimeUtilityConfiguration(); + + UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); + UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); + SeasonMap = DateTimeDefinitions.SeasonMap.ToImmutableDictionary(); + SpecialYearPrefixesMap = DateTimeDefinitions.SpecialYearPrefixesMap.ToImmutableDictionary(); + CardinalMap = DateTimeDefinitions.CardinalMap.ToImmutableDictionary(); + DayOfWeek = DateTimeDefinitions.DayOfWeek.ToImmutableDictionary(); + MonthOfYear = DateTimeDefinitions.MonthOfYear.ToImmutableDictionary(); + Numbers = DateTimeDefinitions.Numbers.ToImmutableDictionary(); + DoubleNumbers = DateTimeDefinitions.DoubleNumbers.ToImmutableDictionary(); + WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); + SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Hindi.CardinalExtractor.GetInstance(); + IntegerExtractor = Number.Hindi.IntegerExtractor.GetInstance(); + OrdinalExtractor = Number.Hindi.OrdinalExtractor.GetInstance(); + + NumberParser = new BaseIndianNumberParser(new HindiNumberParserConfiguration(numConfig)); + + TimeZoneParser = new BaseTimeZoneParser(new HindiTimeZoneParserConfiguration(this)); + + DateExtractor = new BaseDateExtractor(new HindiDateExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new HindiHolidayExtractorConfiguration(this)); + TimeExtractor = new BaseTimeExtractor(new HindiTimeExtractorConfiguration(this)); + DateTimeExtractor = new BaseDateTimeExtractor(new HindiDateTimeExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new HindiDurationExtractorConfiguration(this)); + DatePeriodExtractor = new BaseDatePeriodExtractor(new HindiDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseTimePeriodExtractor(new HindiTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new HindiDateTimePeriodExtractorConfiguration(this)); + DurationParser = new BaseDurationParser(new HindiDurationParserConfiguration(this)); + DateParser = new BaseDateParser(new HindiDateParserConfiguration(this)); + HolidayTimeParser = new BaseHolidayParser(new HindiHolidayParserConfiguration(this)); + TimeParser = new TimeParser(new HindiTimeParserConfiguration(this)); + DateTimeParser = new BaseDateTimeParser(new HindiDateTimeParserConfiguration(this)); + DatePeriodParser = new BaseDatePeriodParser(new HindiDatePeriodParserConfiguration(this)); + TimePeriodParser = new BaseTimePeriodParser(new HindiTimePeriodParserConfiguration(this)); + DateTimePeriodParser = new BaseDateTimePeriodParser(new HindiDateTimePeriodParserConfiguration(this)); + DateTimeAltParser = new BaseDateTimeAltParser(new HindiDateTimeAltParserConfiguration(this)); + } + + public override IImmutableDictionary DayOfMonth => BaseDateTime.DayOfMonthDictionary.ToImmutableDictionary().AddRange(DateTimeDefinitions.DayOfMonth); + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDateParserConfiguration.cs new file mode 100644 index 0000000000..7dc6d337c4 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDateParserConfiguration.cs @@ -0,0 +1,192 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiDateParserConfiguration : BaseDateTimeOptionsConfiguration, IDateParserConfiguration + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private IImmutableList lastCardinalTerms = DateTimeDefinitions.LastCardinalTerms.ToImmutableList(); + + public HindiDateParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + DateTokenPrefix = DateTimeDefinitions.DateTokenPrefix; + IntegerExtractor = config.IntegerExtractor; + OrdinalExtractor = config.OrdinalExtractor; + CardinalExtractor = config.CardinalExtractor; + NumberParser = new HindiDateExtractorConfiguration(this).NumberParser; + DurationExtractor = config.DurationExtractor; + DateExtractor = config.DateExtractor; + DurationParser = config.DurationParser; + HolidayParser = new BaseHolidayParser(new HindiHolidayParserConfiguration(this)); + DateRegexes = new HindiDateExtractorConfiguration(this).DateRegexList; + OnRegex = HindiDateExtractorConfiguration.OnRegex; + SpecialDayRegex = HindiDateExtractorConfiguration.SpecialDayRegex; + SpecialDayWithNumRegex = HindiDateExtractorConfiguration.SpecialDayWithNumRegex; + NextRegex = HindiDateExtractorConfiguration.NextDateRegex; + ThisRegex = HindiDateExtractorConfiguration.ThisRegex; + LastRegex = HindiDateExtractorConfiguration.LastDateRegex; + UnitRegex = HindiDateExtractorConfiguration.DateUnitRegex; + WeekDayRegex = HindiDateExtractorConfiguration.WeekDayRegex; + MonthRegex = HindiDateExtractorConfiguration.MonthRegex; + WeekDayOfMonthRegex = HindiDateExtractorConfiguration.WeekDayOfMonthRegex; + ForTheRegex = HindiDateExtractorConfiguration.ForTheRegex; + WeekDayAndDayOfMothRegex = HindiDateExtractorConfiguration.WeekDayAndDayOfMothRegex; + WeekDayAndDayRegex = HindiDateExtractorConfiguration.WeekDayAndDayRegex; + RelativeMonthRegex = HindiDateExtractorConfiguration.RelativeMonthRegex; + StrictRelativeRegex = HindiDateExtractorConfiguration.StrictRelativeRegex; + YearSuffix = HindiDateExtractorConfiguration.YearSuffix; + RelativeWeekDayRegex = HindiDateExtractorConfiguration.RelativeWeekDayRegex; + BeforeAfterRegex = HindiDateExtractorConfiguration.BeforeAfterRegex; + + RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + UpcomingPrefixRegex = new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags, RegexTimeOut); + PastPrefixRegex = new Regex(DateTimeDefinitions.PastPrefixRegex, RegexFlags, RegexTimeOut); + + DayOfMonth = config.DayOfMonth; + DayOfWeek = config.DayOfWeek; + MonthOfYear = config.MonthOfYear; + CardinalMap = config.CardinalMap; + UnitMap = config.UnitMap; + UtilityConfiguration = config.UtilityConfiguration; + + SameDayTerms = DateTimeDefinitions.SameDayTerms.ToImmutableList(); + PlusOneDayTerms = DateTimeDefinitions.PlusOneDayTerms.ToImmutableList(); + PlusTwoDayTerms = DateTimeDefinitions.PlusTwoDayTerms.ToImmutableList(); + MinusOneDayTerms = DateTimeDefinitions.MinusOneDayTerms.ToImmutableList(); + MinusTwoDayTerms = DateTimeDefinitions.MinusTwoDayTerms.ToImmutableList(); + } + + public string DateTokenPrefix { get; } + + public IExtractor IntegerExtractor { get; } + + public IExtractor OrdinalExtractor { get; } + + public IExtractor CardinalExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeParser DurationParser { get; } + + public IDateTimeParser HolidayParser { get; } + + public IEnumerable DateRegexes { get; } + + public IImmutableDictionary UnitMap { get; } + + public Regex OnRegex { get; } + + public Regex SpecialDayRegex { get; } + + public Regex SpecialDayWithNumRegex { get; } + + public Regex NextRegex { get; } + + public Regex ThisRegex { get; } + + public Regex LastRegex { get; } + + public Regex UnitRegex { get; } + + public Regex WeekDayRegex { get; } + + public Regex MonthRegex { get; } + + public Regex WeekDayOfMonthRegex { get; } + + public Regex ForTheRegex { get; } + + public Regex WeekDayAndDayOfMothRegex { get; } + + public Regex WeekDayAndDayRegex { get; } + + public Regex RelativeMonthRegex { get; } + + public Regex StrictRelativeRegex { get; } + + public Regex YearSuffix { get; } + + public Regex RelativeWeekDayRegex { get; } + + public Regex RelativeDayRegex { get; } + + public Regex NextPrefixRegex { get; } + + public Regex PreviousPrefixRegex { get; } + + public Regex UpcomingPrefixRegex { get; } + + public Regex PastPrefixRegex { get; } + + public Regex BeforeAfterRegex { get; } + + public Regex TasksModeDurationToDatePatterns { get; } + + public IImmutableDictionary DayOfMonth { get; } + + public IImmutableDictionary DayOfWeek { get; } + + public IImmutableDictionary MonthOfYear { get; } + + public IImmutableDictionary CardinalMap { get; } + + public IImmutableList SameDayTerms { get; } + + public IImmutableList PlusOneDayTerms { get; } + + public IImmutableList MinusOneDayTerms { get; } + + public IImmutableList PlusTwoDayTerms { get; } + + public IImmutableList MinusTwoDayTerms { get; } + + bool IDateParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public int GetSwiftMonthOrYear(string text) + { + var trimmedText = text.Trim(); + var swift = 0; + + if (NextPrefixRegex.IsMatch(trimmedText)) + { + swift = 1; + } + + if (PreviousPrefixRegex.IsMatch(trimmedText)) + { + swift = -1; + } + + return swift; + } + + public bool IsCardinalLast(string text) + { + var trimmedText = text.Trim(); + return lastCardinalTerms.Contains(trimmedText); + } + + public string Normalize(string text) + { + return text; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDatePeriodParserConfiguration.cs new file mode 100644 index 0000000000..2d40aa3a3f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDatePeriodParserConfiguration.cs @@ -0,0 +1,364 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Hindi; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiDatePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDatePeriodParserConfiguration + { + public static readonly Regex PreviousPrefixRegex = + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ThisPrefixRegex = + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfterNextSuffixRegex = + new Regex(DateTimeDefinitions.AfterNextSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeRegex = + new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex UnspecificEndOfRangeRegex = + new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static IList monthTermsPadded = + DateTimeDefinitions.MonthTerms.Select(str => $" {str} ").ToList(); + + private static IList weekendTermsPadded = + DateTimeDefinitions.WeekendTerms.Select(str => $" {str} ").ToList(); + + private static IList weekTermsPadded = + DateTimeDefinitions.WeekTerms.Select(str => $" {str} ").ToList(); + + private static IList yearTermsPadded = + DateTimeDefinitions.YearTerms.Select(str => $" {str} ").ToList(); + + private static readonly Regex NextPrefixRegex = + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex NextPrefixRegexNoWeek = + new Regex(DateTimeDefinitions.NextPrefixRegexNoWeek, RegexFlags, RegexTimeOut); + + public HindiDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + CardinalExtractor = config.CardinalExtractor; + OrdinalExtractor = config.OrdinalExtractor; + IntegerExtractor = config.IntegerExtractor; + NumberParser = config.NumberParser; + DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; + DurationParser = config.DurationParser; + DateParser = config.DateParser; + + MonthFrontBetweenRegex = HindiDatePeriodExtractorConfiguration.MonthFrontBetweenRegex; + BetweenRegex = HindiDatePeriodExtractorConfiguration.BetweenRegex; + MonthFrontSimpleCasesRegex = HindiDatePeriodExtractorConfiguration.MonthFrontSimpleCasesRegex; + SimpleCasesRegex = HindiDatePeriodExtractorConfiguration.SimpleCasesRegex; + OneWordPeriodRegex = HindiDatePeriodExtractorConfiguration.OneWordPeriodRegex; + MonthWithYear = HindiDatePeriodExtractorConfiguration.MonthWithYear; + MonthNumWithYear = HindiDatePeriodExtractorConfiguration.MonthNumWithYear; + YearRegex = HindiDatePeriodExtractorConfiguration.YearRegex; + PastRegex = HindiDatePeriodExtractorConfiguration.PreviousPrefixRegex; + FutureRegex = NextPrefixRegexNoWeek; + FutureSuffixRegex = HindiDatePeriodExtractorConfiguration.FutureSuffixRegex; + NumberCombinedWithUnit = HindiDurationExtractorConfiguration.NumberCombinedWithDurationUnit; + WeekOfMonthRegex = HindiDatePeriodExtractorConfiguration.WeekOfMonthRegex; + WeekOfYearRegex = HindiDatePeriodExtractorConfiguration.WeekOfYearRegex; + QuarterRegex = HindiDatePeriodExtractorConfiguration.QuarterRegex; + QuarterRegexYearFront = HindiDatePeriodExtractorConfiguration.QuarterRegexYearFront; + AllHalfYearRegex = HindiDatePeriodExtractorConfiguration.AllHalfYearRegex; + SeasonRegex = HindiDatePeriodExtractorConfiguration.SeasonRegex; + WhichWeekRegex = HindiDatePeriodExtractorConfiguration.WhichWeekRegex; + WeekOfRegex = HindiDatePeriodExtractorConfiguration.WeekOfRegex; + MonthOfRegex = HindiDatePeriodExtractorConfiguration.MonthOfRegex; + RestOfDateRegex = HindiDatePeriodExtractorConfiguration.RestOfDateRegex; + LaterEarlyPeriodRegex = HindiDatePeriodExtractorConfiguration.LaterEarlyPeriodRegex; + WeekWithWeekDayRangeRegex = HindiDatePeriodExtractorConfiguration.WeekWithWeekDayRangeRegex; + YearPlusNumberRegex = HindiDatePeriodExtractorConfiguration.YearPlusNumberRegex; + DecadeWithCenturyRegex = HindiDatePeriodExtractorConfiguration.DecadeWithCenturyRegex; + YearPeriodRegex = HindiDatePeriodExtractorConfiguration.YearPeriodRegex; + ComplexDatePeriodRegex = HindiDatePeriodExtractorConfiguration.ComplexDatePeriodRegex; + RelativeDecadeRegex = HindiDatePeriodExtractorConfiguration.RelativeDecadeRegex; + InConnectorRegex = config.UtilityConfiguration.InConnectorRegex; + WithinNextPrefixRegex = HindiDatePeriodExtractorConfiguration.WithinNextPrefixRegex; + ReferenceDatePeriodRegex = HindiDatePeriodExtractorConfiguration.ReferenceDatePeriodRegex; + AgoRegex = HindiDatePeriodExtractorConfiguration.AgoRegex; + LaterRegex = HindiDatePeriodExtractorConfiguration.LaterRegex; + LessThanRegex = HindiDatePeriodExtractorConfiguration.LessThanRegex; + MoreThanRegex = HindiDatePeriodExtractorConfiguration.MoreThanRegex; + CenturySuffixRegex = HindiDatePeriodExtractorConfiguration.CenturySuffixRegex; + NowRegex = HindiDatePeriodExtractorConfiguration.NowRegex; + FirstLastRegex = HindiDatePeriodExtractorConfiguration.FirstLastRegex; + OfYearRegex = HindiDatePeriodExtractorConfiguration.OfYearRegex; + SpecialDayRegex = HindiDateExtractorConfiguration.SpecialDayRegex; + TodayNowRegex = new Regex(DateTimeDefinitions.TodayNowRegex, RegexOptions.Singleline); + + UnitMap = config.UnitMap; + CardinalMap = config.CardinalMap; + DayOfMonth = config.DayOfMonth; + MonthOfYear = config.MonthOfYear; + SeasonMap = config.SeasonMap; + SpecialYearPrefixesMap = config.SpecialYearPrefixesMap; + WrittenDecades = config.WrittenDecades; + Numbers = config.Numbers; + SpecialDecadeCases = config.SpecialDecadeCases; + } + + public int MinYearNum { get; } + + public int MaxYearNum { get; } + + public string TokenBeforeDate { get; } + + public IDateExtractor DateExtractor { get; } + + public IExtractor CardinalExtractor { get; } + + public IExtractor OrdinalExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IExtractor IntegerExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser DurationParser { get; } + + public Regex MonthFrontBetweenRegex { get; } + + public Regex BetweenRegex { get; } + + public Regex MonthFrontSimpleCasesRegex { get; } + + public Regex SimpleCasesRegex { get; } + + public Regex OneWordPeriodRegex { get; } + + public Regex MonthWithYear { get; } + + public Regex MonthNumWithYear { get; } + + public Regex YearRegex { get; } + + public Regex PastRegex { get; } + + public Regex FutureRegex { get; } + + public Regex FutureSuffixRegex { get; } + + public Regex NumberCombinedWithUnit { get; } + + public Regex WeekOfMonthRegex { get; } + + public Regex WeekOfYearRegex { get; } + + public Regex QuarterRegex { get; } + + public Regex QuarterRegexYearFront { get; } + + public Regex AllHalfYearRegex { get; } + + public Regex SeasonRegex { get; } + + public Regex WhichWeekRegex { get; } + + public Regex WeekOfRegex { get; } + + public Regex MonthOfRegex { get; } + + public Regex InConnectorRegex { get; } + + public Regex WithinNextPrefixRegex { get; } + + public Regex RestOfDateRegex { get; } + + public Regex LaterEarlyPeriodRegex { get; } + + public Regex WeekWithWeekDayRangeRegex { get; } + + public Regex YearPlusNumberRegex { get; } + + public Regex DecadeWithCenturyRegex { get; } + + public Regex YearPeriodRegex { get; } + + public Regex ComplexDatePeriodRegex { get; } + + public Regex RelativeDecadeRegex { get; } + + public Regex ReferenceDatePeriodRegex { get; } + + public Regex AgoRegex { get; } + + public Regex LaterRegex { get; } + + public Regex LessThanRegex { get; } + + public Regex MoreThanRegex { get; } + + public Regex CenturySuffixRegex { get; } + + public Regex NowRegex { get; } + + public Regex SpecialDayRegex { get; } + + public Regex TodayNowRegex { get; } + + public Regex FirstLastRegex { get; } + + public Regex OfYearRegex { get; } + + Regex ISimpleDatePeriodParserConfiguration.RelativeRegex => RelativeRegex; + + Regex IDatePeriodParserConfiguration.NextPrefixRegex => NextPrefixRegex; + + Regex IDatePeriodParserConfiguration.PreviousPrefixRegex => PreviousPrefixRegex; + + Regex IDatePeriodParserConfiguration.ThisPrefixRegex => ThisPrefixRegex; + + Regex IDatePeriodParserConfiguration.UnspecificEndOfRangeRegex => UnspecificEndOfRangeRegex; + + Regex IDatePeriodParserConfiguration.AmbiguousPointRangeRegex => null; + + bool IDatePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public IImmutableDictionary UnitMap { get; } + + public IImmutableDictionary CardinalMap { get; } + + public IImmutableDictionary DayOfMonth { get; } + + public IImmutableDictionary MonthOfYear { get; } + + public IImmutableDictionary SeasonMap { get; } + + public IImmutableDictionary SpecialYearPrefixesMap { get; } + + public IImmutableDictionary WrittenDecades { get; } + + public IImmutableDictionary Numbers { get; } + + public IImmutableDictionary SpecialDecadeCases { get; } + + public IImmutableList InStringList { get; } + + public int GetSwiftDayOrMonth(string text) + { + var swift = 0; + + var trimmedText = text.Trim(); + + if (AfterNextSuffixRegex.IsMatch(trimmedText)) + { + swift = 2; + } + else if (NextPrefixRegex.IsMatch(trimmedText)) + { + swift = 1; + } + else if (PreviousPrefixRegex.IsMatch(trimmedText)) + { + swift = -1; + } + + return swift; + } + + public int GetSwiftYear(string text) + { + var swift = -10; + + var trimmedText = text.Trim(); + + if (AfterNextSuffixRegex.IsMatch(trimmedText)) + { + swift = 2; + } + else if (NextPrefixRegex.IsMatch(trimmedText)) + { + swift = 1; + } + else if (PreviousPrefixRegex.IsMatch(trimmedText)) + { + swift = -1; + } + else if (ThisPrefixRegex.IsMatch(trimmedText)) + { + swift = 0; + } + + return swift; + } + + public bool IsFuture(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.FutureTerms.Any(o => trimmedText.StartsWith(o, StringComparison.Ordinal)); + } + + public bool IsLastCardinal(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.LastCardinalTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } + + public bool IsMonthOnly(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (monthTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); + } + + public bool IsMonthToDate(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } + + public bool IsWeekend(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (weekendTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); + } + + public bool IsWeekOnly(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (weekTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); + } + + public bool IsFortnight(string text) + { + return false; + } + + public bool IsYearOnly(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (yearTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)) || + (DateTimeDefinitions.GenericYearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) && + UnspecificEndOfRangeRegex.IsMatch(trimmedText)); + } + + public bool IsYearToDate(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDateTimeAltParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDateTimeAltParserConfiguration.cs new file mode 100644 index 0000000000..59fa624c32 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDateTimeAltParserConfiguration.cs @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiDateTimeAltParserConfiguration : IDateTimeAltParserConfiguration + { + public HindiDateTimeAltParserConfiguration(ICommonDateTimeParserConfiguration config) + { + DateTimeParser = config.DateTimeParser; + DateParser = config.DateParser; + TimeParser = config.TimeParser; + DateTimePeriodParser = config.DateTimePeriodParser; + TimePeriodParser = config.TimePeriodParser; + DatePeriodParser = config.DatePeriodParser; + } + + public IDateTimeParser DateTimeParser { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser TimeParser { get; } + + public IDateTimeParser DateTimePeriodParser { get; } + + public IDateTimeParser TimePeriodParser { get; } + + public IDateTimeParser DatePeriodParser { get; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDateTimeParserConfiguration.cs new file mode 100644 index 0000000000..b248ba025d --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDateTimeParserConfiguration.cs @@ -0,0 +1,191 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiDateTimeParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeParserConfiguration + { + public static readonly Regex AmTimeRegex = + new Regex(DateTimeDefinitions.AMTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PmTimeRegex = + new Regex(DateTimeDefinitions.PMTimeRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex NowTimeRegex = + new Regex(DateTimeDefinitions.NowTimeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex RecentlyTimeRegex = + new Regex(DateTimeDefinitions.RecentlyTimeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex AsapTimeRegex = + new Regex(DateTimeDefinitions.AsapTimeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex NextPrefixRegex = + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex PreviousPrefixRegex = + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + + public HindiDateTimeParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; + + DateExtractor = config.DateExtractor; + TimeExtractor = config.TimeExtractor; + DateParser = config.DateParser; + TimeParser = config.TimeParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; + + NowRegex = HindiDateTimeExtractorConfiguration.NowRegex; + + SimpleTimeOfTodayAfterRegex = HindiDateTimeExtractorConfiguration.SimpleTimeOfTodayAfterRegex; + SimpleTimeOfTodayBeforeRegex = HindiDateTimeExtractorConfiguration.SimpleTimeOfTodayBeforeRegex; + SpecificTimeOfDayRegex = HindiDateTimeExtractorConfiguration.SpecificTimeOfDayRegex; + SpecificEndOfRegex = HindiDateTimeExtractorConfiguration.SpecificEndOfRegex; + UnspecificEndOfRegex = HindiDateTimeExtractorConfiguration.UnspecificEndOfRegex; + UnitRegex = HindiTimeExtractorConfiguration.TimeUnitRegex; + DateNumberConnectorRegex = HindiDateTimeExtractorConfiguration.DateNumberConnectorRegex; + YearRegex = HindiDateTimeExtractorConfiguration.YearRegex; + + Numbers = config.Numbers; + CardinalExtractor = config.CardinalExtractor; + IntegerExtractor = config.IntegerExtractor; + NumberParser = config.NumberParser; + DurationExtractor = config.DurationExtractor; + DurationParser = config.DurationParser; + UnitMap = config.UnitMap; + UtilityConfiguration = config.UtilityConfiguration; + } + + public string TokenBeforeDate { get; } + + public string TokenBeforeTime { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser TimeParser { get; } + + public IExtractor CardinalExtractor { get; } + + public IExtractor IntegerExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeParser DurationParser { get; } + + public IImmutableDictionary UnitMap { get; } + + public Regex NowRegex { get; } + + public Regex AMTimeRegex => AmTimeRegex; + + public Regex PMTimeRegex => PmTimeRegex; + + public Regex SimpleTimeOfTodayAfterRegex { get; } + + public Regex SimpleTimeOfTodayBeforeRegex { get; } + + public Regex SpecificTimeOfDayRegex { get; } + + public Regex SpecificEndOfRegex { get; } + + public Regex UnspecificEndOfRegex { get; } + + public Regex UnitRegex { get; } + + public Regex DateNumberConnectorRegex { get; } + + public Regex PrepositionRegex { get; } + + public Regex ConnectorRegex { get; } + + public Regex YearRegex { get; } + + public IImmutableDictionary Numbers { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + + public int GetHour(string text, int hour) + { + int result = hour; + + var trimmedText = text.Trim(); + + if (AMTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour >= Constants.HalfDayHourCount) + { + result -= Constants.HalfDayHourCount; + } + else if (!AMTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour < Constants.HalfDayHourCount) + { + result += Constants.HalfDayHourCount; + } + + return result; + } + + public bool GetMatchedNowTimex(string text, out string timex) + { + var trimmedText = text.Trim(); + + if (NowTimeRegex.MatchEnd(trimmedText, trim: true).Success) + { + timex = "PRESENT_REF"; + } + else if (RecentlyTimeRegex.IsExactMatch(trimmedText, trim: true)) + { + timex = "PAST_REF"; + } + else if (AsapTimeRegex.IsExactMatch(trimmedText, trim: true)) + { + timex = "FUTURE_REF"; + } + else + { + timex = null; + return false; + } + + return true; + } + + public int GetSwiftDay(string text) + { + var trimmedText = text.Trim(); + + var swift = 0; + if (NextPrefixRegex.MatchBegin(trimmedText, trim: true).Success) + { + swift = 1; + } + else if (PreviousPrefixRegex.MatchBegin(trimmedText, trim: true).Success) + { + swift = -1; + } + + return swift; + } + + public bool ContainsAmbiguousToken(string text, string matchedText) => false; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDateTimePeriodParserConfiguration.cs new file mode 100644 index 0000000000..1ce5dfb3e5 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDateTimePeriodParserConfiguration.cs @@ -0,0 +1,209 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Hindi; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiDateTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimePeriodParserConfiguration + { + public static readonly Regex MorningStartEndRegex = + new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfternoonStartEndRegex = + new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EveningStartEndRegex = + new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NightStartEndRegex = + new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public HindiDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; + + DateExtractor = config.DateExtractor; + TimeExtractor = config.TimeExtractor; + DateTimeExtractor = config.DateTimeExtractor; + TimePeriodExtractor = config.TimePeriodExtractor; + CardinalExtractor = config.CardinalExtractor; + DurationExtractor = config.DurationExtractor; + NumberParser = config.NumberParser; + DateParser = config.DateParser; + TimeParser = config.TimeParser; + TimePeriodParser = config.TimePeriodParser; + DurationParser = config.DurationParser; + DateTimeParser = config.DateTimeParser; + TimeZoneParser = config.TimeZoneParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; + + PureNumberFromToRegex = HindiTimePeriodExtractorConfiguration.PureNumFromTo; + HyphenDateRegex = HindiDateTimePeriodExtractorConfiguration.HyphenDateRegex; + PureNumberBetweenAndRegex = HindiTimePeriodExtractorConfiguration.PureNumBetweenAnd; + SpecificTimeOfDayRegex = HindiDateTimePeriodExtractorConfiguration.PeriodSpecificTimeOfDayRegex; + TimeOfDayRegex = HindiDateTimeExtractorConfiguration.TimeOfDayRegex; + PreviousPrefixRegex = HindiDatePeriodExtractorConfiguration.PreviousPrefixRegex; + FutureRegex = HindiDatePeriodExtractorConfiguration.NextPrefixRegex; + FutureSuffixRegex = HindiDatePeriodExtractorConfiguration.FutureSuffixRegex; + NumberCombinedWithUnitRegex = HindiDateTimePeriodExtractorConfiguration.TimeNumberCombinedWithUnit; + UnitRegex = HindiTimePeriodExtractorConfiguration.TimeUnitRegex; + PeriodTimeOfDayWithDateRegex = HindiDateTimePeriodExtractorConfiguration.PeriodTimeOfDayWithDateRegex; + RelativeTimeUnitRegex = HindiDateTimePeriodExtractorConfiguration.RelativeTimeUnitRegex; + RestOfDateTimeRegex = HindiDateTimePeriodExtractorConfiguration.RestOfDateTimeRegex; + AmDescRegex = HindiDateTimePeriodExtractorConfiguration.AmDescRegex; + PmDescRegex = HindiDateTimePeriodExtractorConfiguration.PmDescRegex; + WithinNextPrefixRegex = HindiDateTimePeriodExtractorConfiguration.WithinNextPrefixRegex; + PrefixDayRegex = HindiDateTimePeriodExtractorConfiguration.PrefixDayRegex; + BeforeRegex = HindiDateTimePeriodExtractorConfiguration.BeforeRegex; + AfterRegex = HindiDateTimePeriodExtractorConfiguration.AfterRegex; + + UnitMap = config.UnitMap; + Numbers = config.Numbers; + } + + public string TokenBeforeDate { get; } + + public string TokenBeforeTime { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IExtractor CardinalExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser TimeParser { get; } + + public IDateTimeParser DateTimeParser { get; } + + public IDateTimeParser TimePeriodParser { get; } + + public IDateTimeParser DurationParser { get; } + + public IDateTimeParser TimeZoneParser { get; } + + public Regex PureNumberFromToRegex { get; } + + public Regex HyphenDateRegex { get; } + + public Regex PureNumberBetweenAndRegex { get; } + + public Regex SpecificTimeOfDayRegex { get; } + + public Regex TimeOfDayRegex { get; } + + public Regex PreviousPrefixRegex { get; } + + public Regex FutureRegex { get; } + + public Regex FutureSuffixRegex { get; } + + public Regex NumberCombinedWithUnitRegex { get; } + + public Regex UnitRegex { get; } + + public Regex PeriodTimeOfDayWithDateRegex { get; } + + public Regex RelativeTimeUnitRegex { get; } + + public Regex RestOfDateTimeRegex { get; } + + public Regex AmDescRegex { get; } + + public Regex PmDescRegex { get; } + + public Regex WithinNextPrefixRegex { get; } + + public Regex PrefixDayRegex { get; } + + public Regex BeforeRegex { get; } + + public Regex AfterRegex { get; } + + bool IDateTimePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex IDateTimePeriodParserConfiguration.TasksmodeMealTimeofDayRegex => null; + + public IImmutableDictionary UnitMap { get; } + + public IImmutableDictionary Numbers { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + + public bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMin) + { + var trimmedText = text.Trim(); + + beginHour = 0; + endHour = 0; + endMin = 0; + + if (MorningStartEndRegex.IsMatch(trimmedText)) + { + todSymbol = Constants.Morning; + } + else if (AfternoonStartEndRegex.IsMatch(trimmedText)) + { + todSymbol = Constants.Afternoon; + } + else if (EveningStartEndRegex.IsMatch(trimmedText)) + { + todSymbol = Constants.Evening; + } + else if (NightStartEndRegex.IsMatch(trimmedText)) + { + todSymbol = Constants.Night; + } + else + { + todSymbol = null; + return false; + } + + var parseResult = TimexUtility.ResolveTimeOfDay(todSymbol); + todSymbol = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMin = parseResult.EndMin; + + return true; + } + + public int GetSwiftPrefix(string text) + { + var trimmedText = text.Trim(); + + var swift = 0; + if (FutureRegex.IsMatch(trimmedText)) + { + swift = 1; + } + else if (PreviousPrefixRegex.IsMatch(trimmedText)) + { + swift = -1; + } + + return swift; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDurationParserConfiguration.cs new file mode 100644 index 0000000000..196d555083 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDurationParserConfiguration.cs @@ -0,0 +1,84 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Hindi; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiDurationParserConfiguration : BaseDateTimeOptionsConfiguration, IDurationParserConfiguration + { + + public static readonly Regex PrefixArticleRegex = + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public HindiDurationParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + CardinalExtractor = config.CardinalExtractor; + NumberParser = config.NumberParser; + + DurationExtractor = new BaseDurationExtractor(new HindiDurationExtractorConfiguration(this), false); + + NumberCombinedWithUnit = HindiDurationExtractorConfiguration.NumberCombinedWithDurationUnit; + + AnUnitRegex = HindiDurationExtractorConfiguration.AnUnitRegex; + DuringRegex = HindiDurationExtractorConfiguration.DuringRegex; + AllDateUnitRegex = HindiDurationExtractorConfiguration.AllRegex; + HalfDateUnitRegex = HindiDurationExtractorConfiguration.HalfRegex; + SuffixAndRegex = HindiDurationExtractorConfiguration.SuffixAndRegex; + FollowedUnit = HindiDurationExtractorConfiguration.DurationFollowedUnit; + ConjunctionRegex = HindiDurationExtractorConfiguration.ConjunctionRegex; + InexactNumberRegex = HindiDurationExtractorConfiguration.InexactNumberRegex; + InexactNumberUnitRegex = HindiDurationExtractorConfiguration.InexactNumberUnitRegex; + DurationUnitRegex = HindiDurationExtractorConfiguration.DurationUnitRegex; + + UnitMap = config.UnitMap; + UnitValueMap = config.UnitValueMap; + DoubleNumbers = config.DoubleNumbers; + } + + public IExtractor CardinalExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IParser NumberParser { get; } + + public Regex NumberCombinedWithUnit { get; } + + public Regex AnUnitRegex { get; } + + Regex IDurationParserConfiguration.PrefixArticleRegex => PrefixArticleRegex; + + public Regex DuringRegex { get; } + + public Regex AllDateUnitRegex { get; } + + public Regex HalfDateUnitRegex { get; } + + public Regex SuffixAndRegex { get; } + + public Regex FollowedUnit { get; } + + public Regex ConjunctionRegex { get; } + + public Regex InexactNumberRegex { get; } + + public Regex InexactNumberUnitRegex { get; } + + public Regex DurationUnitRegex { get; } + + public Regex SpecialNumberUnitRegex { get; } + + bool IDurationParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public IImmutableDictionary UnitMap { get; } + + public IImmutableDictionary UnitValueMap { get; } + + public IImmutableDictionary DoubleNumbers { get; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiHolidayParserConfiguration.cs new file mode 100644 index 0000000000..58669b2361 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiHolidayParserConfiguration.cs @@ -0,0 +1,229 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Hindi; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiHolidayParserConfiguration : BaseHolidayParserConfiguration + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public HindiHolidayParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + ThisPrefixRegex = new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + this.HolidayRegexList = HindiHolidayExtractorConfiguration.HolidayRegexList; + this.HolidayNames = DateTimeDefinitions.HolidayNames.ToImmutableDictionary(); + } + + public Regex ThisPrefixRegex { get; } + + public Regex NextPrefixRegex { get; } + + public Regex PreviousPrefixRegex { get; } + + public override int GetSwiftYear(string text) + { + var trimmedText = text.Trim(); + var swift = -10; + + if (NextPrefixRegex.IsMatch(trimmedText)) + { + swift = 1; + } + else if (PreviousPrefixRegex.IsMatch(trimmedText)) + { + swift = -1; + } + else if (ThisPrefixRegex.IsMatch(trimmedText)) + { + swift = 0; + } + + return swift; + } + + public override string SanitizeHolidayToken(string holiday) + { + return holiday + .Replace("saint ", "st ") + .Replace(" ", string.Empty) + .Replace("'", string.Empty) + .Replace(".", string.Empty); + } + + // @TODO Change to auto-generate. + protected override IDictionary> InitHolidayFuncs() + { + return new Dictionary>(base.InitHolidayFuncs()) + { + { "maosbirthday", MaoBirthday }, + { "yuandan", NewYear }, + { "teachersday", TeacherDay }, + { "singleday", SinglesDay }, + { "allsaintsday", HalloweenDay }, + { "youthday", YouthDay }, + { "childrenday", ChildrenDay }, + { "femaleday", FemaleDay }, + { "treeplantingday", TreePlantDay }, + { "arborday", TreePlantDay }, + { "girlsday", GirlsDay }, + { "whiteloverday", WhiteLoverDay }, + { "loverday", ValentinesDay }, + { "christmas", ChristmasDay }, + { "xmas", ChristmasDay }, + { "newyear", NewYear }, + { "newyearday", NewYear }, + { "newyearsday", NewYear }, + { "inaugurationday", InaugurationDay }, + { "groundhougday", GroundhogDay }, + { "valentinesday", ValentinesDay }, + { "stpatrickday", StPatrickDay }, + { "aprilfools", FoolDay }, + { "earthday", EarthDay }, + { "stgeorgeday", StGeorgeDay }, + { "mayday", Mayday }, + { "cincodemayoday", CincoDeMayoday }, + { "baptisteday", BaptisteDay }, + { "usindependenceday", UsaIndependenceDay }, + { "independenceday", UsaIndependenceDay }, + { "bastilleday", BastilleDay }, + { "halloweenday", HalloweenDay }, + { "allhallowday", AllHallowDay }, + { "allsoulsday", AllSoulsday }, + { "guyfawkesday", GuyFawkesDay }, + { "veteransday", Veteransday }, + { "christmaseve", ChristmasEve }, + { "newyeareve", NewYearEve }, + { "easterday", EasterDay }, + { "ashwednesday", AshWednesday }, + { "palmsunday", PalmSunday }, + { "maundythursday", MaundyThursday }, + { "goodfriday", GoodFriday }, + { "eastersaturday", EasterSaturday }, + { "eastermonday", EasterMonday }, + { "ascensionday", AscensionDay }, + { "whitesunday", WhiteSunday }, + { "whitemonday", WhiteMonday }, + { "trinitysunday", TrinitySunday }, + { "corpuschristi", CorpusChristi }, + { "indianindependence", IndianIndependence }, + { "republicday", RepublicDay }, + { "yogaday", YogaDay }, + { "holi", HoliDay }, + { "diwali", DiwaliDay }, + { "gandhijayanti", GandhiJayanti }, + { "rakshabandhan", RakshaBandhanDay }, + { "vaishakhi", VaishakhiDay }, + }; + } + + private static DateObject IndianIndependence(int year) => new DateObject(year, 8, 15); + + private static DateObject RepublicDay(int year) => new DateObject(year, 1, 26); + + private static DateObject YogaDay(int year) => new DateObject(year, 6, 21); + + private static DateObject HoliDay(int year) => HolidayFunctions.CalculateHoliDiwaliDate(year, isHoli: true); + + private static DateObject DiwaliDay(int year) => HolidayFunctions.CalculateHoliDiwaliDate(year, isHoli: false); + + private static DateObject GandhiJayanti(int year) => new DateObject(year, 10, 2); + + private static DateObject NewYear(int year) => new DateObject(year, 1, 1); + + private static DateObject NewYearEve(int year) => new DateObject(year, 12, 31); + + private static DateObject ChristmasDay(int year) => new DateObject(year, 12, 25); + + private static DateObject ChristmasEve(int year) => new DateObject(year, 12, 24); + + private static DateObject ValentinesDay(int year) => new DateObject(year, 2, 14); + + private static DateObject WhiteLoverDay(int year) => new DateObject(year, 3, 14); + + private static DateObject FoolDay(int year) => new DateObject(year, 4, 1); + + private static DateObject EarthDay(int year) => new DateObject(year, 4, 22); + + private static DateObject GirlsDay(int year) => new DateObject(year, 3, 7); + + private static DateObject TreePlantDay(int year) => new DateObject(year, 3, 12); + + private static DateObject FemaleDay(int year) => new DateObject(year, 3, 8); + + private static DateObject ChildrenDay(int year) => new DateObject(year, 6, 1); + + private static DateObject YouthDay(int year) => new DateObject(year, 5, 4); + + private static DateObject TeacherDay(int year) => new DateObject(year, 9, 10); + + private static DateObject SinglesDay(int year) => new DateObject(year, 11, 11); + + private static DateObject MaoBirthday(int year) => new DateObject(year, 12, 26); + + private static DateObject InaugurationDay(int year) => new DateObject(year, 1, 20); + + private static DateObject GroundhogDay(int year) => new DateObject(year, 2, 2); + + private static DateObject StPatrickDay(int year) => new DateObject(year, 3, 17); + + private static DateObject StGeorgeDay(int year) => new DateObject(year, 4, 23); + + private static DateObject Mayday(int year) => new DateObject(year, 5, 1); + + private static DateObject CincoDeMayoday(int year) => new DateObject(year, 5, 5); + + private static DateObject BaptisteDay(int year) => new DateObject(year, 6, 24); + + private static DateObject UsaIndependenceDay(int year) => new DateObject(year, 7, 4); + + private static DateObject BastilleDay(int year) => new DateObject(year, 7, 14); + + private static DateObject HalloweenDay(int year) => new DateObject(year, 10, 31); + + private static DateObject AllHallowDay(int year) => new DateObject(year, 11, 1); + + private static DateObject AllSoulsday(int year) => new DateObject(year, 11, 2); + + private static DateObject GuyFawkesDay(int year) => new DateObject(year, 11, 5); + + private static DateObject Veteransday(int year) => new DateObject(year, 11, 11); + + private static DateObject EasterDay(int year) => HolidayFunctions.CalculateHolidayByEaster(year); + + private static DateObject AshWednesday(int year) => EasterDay(year).AddDays(-46); + + private static DateObject PalmSunday(int year) => EasterDay(year).AddDays(-7); + + private static DateObject MaundyThursday(int year) => EasterDay(year).AddDays(-3); + + private static DateObject GoodFriday(int year) => EasterDay(year).AddDays(-2); + + private static DateObject EasterSaturday(int year) => EasterDay(year).AddDays(-1); + + private static DateObject EasterMonday(int year) => EasterDay(year).AddDays(1); + + private static DateObject AscensionDay(int year) => EasterDay(year).AddDays(39); + + private static DateObject WhiteSunday(int year) => EasterDay(year).AddDays(49); + + private static DateObject WhiteMonday(int year) => EasterDay(year).AddDays(50); + + private static DateObject TrinitySunday(int year) => EasterDay(year).AddDays(56); + + private static DateObject CorpusChristi(int year) => EasterDay(year).AddDays(60); + + private static DateObject RakshaBandhanDay(int year) => HolidayFunctions.CalculateRakshaBandhanVaishakhiDate(year, isRakshabandhan: true); + + private static DateObject VaishakhiDay(int year) => HolidayFunctions.CalculateRakshaBandhanVaishakhiDate(year, isRakshabandhan: false); + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiMergedParserConfiguration.cs new file mode 100644 index 0000000000..d2c6ace6d1 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiMergedParserConfiguration.cs @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Text.Matcher; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiMergedParserConfiguration : HindiCommonDateTimeParserConfiguration, IMergedParserConfiguration + { + public HindiMergedParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + BeforeRegex = HindiMergedExtractorConfiguration.BeforeRegex; + AfterRegex = HindiMergedExtractorConfiguration.AfterRegex; + SinceRegex = HindiMergedExtractorConfiguration.SinceRegex; + AroundRegex = HindiMergedExtractorConfiguration.AroundRegex; + EqualRegex = HindiMergedExtractorConfiguration.EqualRegex; + SuffixAfter = HindiMergedExtractorConfiguration.SuffixAfterRegex; + YearRegex = HindiDatePeriodExtractorConfiguration.YearRegex; + + SuperfluousWordMatcher = HindiMergedExtractorConfiguration.SuperfluousWordMatcher; + + DatePeriodParser = new BaseDatePeriodParser(new HindiDatePeriodParserConfiguration(this)); + TimePeriodParser = new BaseTimePeriodParser(new HindiTimePeriodParserConfiguration(this)); + DateTimePeriodParser = new BaseDateTimePeriodParser(new HindiDateTimePeriodParserConfiguration(this)); + SetParser = new BaseSetParser(new HindiSetParserConfiguration(this)); + HolidayParser = new BaseHolidayParser(new HindiHolidayParserConfiguration(this)); + TimeZoneParser = new BaseTimeZoneParser(new HindiTimeZoneParserConfiguration(this)); + } + + public Regex BeforeRegex { get; } + + public Regex AfterRegex { get; } + + public Regex SinceRegex { get; } + + public Regex AroundRegex { get; } + + public Regex EqualRegex { get; } + + public Regex SuffixAfter { get; } + + public Regex YearRegex { get; } + + public IDateTimeParser SetParser { get; } + + public IDateTimeParser HolidayParser { get; } + + public StringMatcher SuperfluousWordMatcher { get; } + + bool IMergedParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiSetParserConfiguration.cs new file mode 100644 index 0000000000..ec000545b1 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiSetParserConfiguration.cs @@ -0,0 +1,161 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiSetParserConfiguration : BaseDateTimeOptionsConfiguration, ISetParserConfiguration + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex DateUnitRegex = + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + + // pass FutureTerms as List to ReplaceValueInTextWithFutTerm function + private static readonly List ThisTerms = (List)DateTimeDefinitions.FutureTerms; + + public HindiSetParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + DurationExtractor = config.DurationExtractor; + TimeExtractor = config.TimeExtractor; + DateExtractor = config.DateExtractor; + DateTimeExtractor = config.DateTimeExtractor; + DatePeriodExtractor = config.DatePeriodExtractor; + TimePeriodExtractor = config.TimePeriodExtractor; + DateTimePeriodExtractor = config.DateTimePeriodExtractor; + + DurationParser = config.DurationParser; + TimeParser = config.TimeParser; + DateParser = config.DateParser; + DateTimeParser = config.DateTimeParser; + DatePeriodParser = config.DatePeriodParser; + TimePeriodParser = config.TimePeriodParser; + DateTimePeriodParser = config.DateTimePeriodParser; + UnitMap = config.UnitMap; + + EachPrefixRegex = HindiSetExtractorConfiguration.EachPrefixRegex; + PeriodicRegex = HindiSetExtractorConfiguration.PeriodicRegex; + EachUnitRegex = HindiSetExtractorConfiguration.EachUnitRegex; + EachDayRegex = HindiSetExtractorConfiguration.EachDayRegex; + SetWeekDayRegex = HindiSetExtractorConfiguration.SetWeekDayRegex; + SetEachRegex = HindiSetExtractorConfiguration.SetEachRegex; + } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeParser DurationParser { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeParser TimeParser { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeParser DateTimeParser { get; } + + public IDateTimeExtractor DatePeriodExtractor { get; } + + public IDateTimeParser DatePeriodParser { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IDateTimeParser TimePeriodParser { get; } + + public IDateTimeExtractor DateTimePeriodExtractor { get; } + + public IDateTimeParser DateTimePeriodParser { get; } + + public IImmutableDictionary UnitMap { get; } + + public Regex EachPrefixRegex { get; } + + public Regex PeriodicRegex { get; } + + public Regex EachUnitRegex { get; } + + public Regex EachDayRegex { get; } + + public Regex SetWeekDayRegex { get; } + + public Regex SetEachRegex { get; } + + public bool GetMatchedDailyTimex(string text, out string timex) + { + var trimmedText = text.Trim(); + var match = PeriodicRegex.MatchExact(trimmedText, trim: true); + + if (match.Groups["daily"].Success) + { + timex = "P1D"; + } + else if (match.Groups["weekly"].Success) + { + timex = "P1W"; + } + else if (match.Groups["biweekly"].Success) + { + timex = "P2W"; + } + else if (match.Groups["monthly"].Success) + { + timex = "P1M"; + } + else if (match.Groups["yearly"].Success) + { + timex = "P1Y"; + } + else + { + timex = null; + return false; + } + + return true; + } + + public bool GetMatchedUnitTimex(string text, out string timex) + { + var trimmedText = text.Trim(); + var match = DateUnitRegex.MatchExact(trimmedText, trim: true); + + if (match.Groups["day"].Success) + { + timex = "P1D"; + } + else if (match.Groups["week"].Success) + { + timex = "P1W"; + } + else if (match.Groups["month"].Success) + { + timex = "P1M"; + } + else if (match.Groups["year"].Success) + { + timex = "P1Y"; + } + else + { + timex = null; + return false; + } + + return true; + } + + public string WeekDayGroupMatchString(Match match) => SetHandler.WeekDayGroupMatchString(match); + + public string ReplaceValueInTextWithFutTerm(string text, string value) => TasksModeSetHandler.ReplaceValueInTextWithFutTerm(text, value, ThisTerms); + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiTimeParserConfiguration.cs new file mode 100644 index 0000000000..1ec54253d9 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiTimeParserConfiguration.cs @@ -0,0 +1,191 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Globalization; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiTimeParserConfiguration : BaseDateTimeOptionsConfiguration, ITimeParserConfiguration + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex TimeSuffixFull = + new Regex(DateTimeDefinitions.TimeSuffixFull, RegexFlags, RegexTimeOut); + + private static readonly Regex LunchRegex = + new Regex(DateTimeDefinitions.LunchRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex NightRegex = + new Regex(DateTimeDefinitions.NightRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex HalfTokenRegex = + new Regex(DateTimeDefinitions.HalfTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex QuarterTokenRegex = + new Regex(DateTimeDefinitions.QuarterTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ThreeQuarterTokenRegex = + new Regex(DateTimeDefinitions.ThreeQuarterTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ToTokenRegex = + new Regex(DateTimeDefinitions.ToTokenRegex, RegexFlags, RegexTimeOut); + + public HindiTimeParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TimeTokenPrefix = DateTimeDefinitions.TimeTokenPrefix; + AtRegex = HindiTimeExtractorConfiguration.AtRegex; + TimeRegexes = HindiTimeExtractorConfiguration.TimeRegexList; + UtilityConfiguration = config.UtilityConfiguration; + Numbers = config.Numbers; + TimeZoneParser = config.TimeZoneParser; + } + + public string TimeTokenPrefix { get; } + + public Regex AtRegex { get; } + + public Regex MealTimeRegex { get; } + + public IEnumerable TimeRegexes { get; } + + public IImmutableDictionary Numbers { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IDateTimeParser TimeZoneParser { get; } + + public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool hasMin) + { + int deltaMin; + + var trimedPrefix = prefix.Trim(); + + if (HalfTokenRegex.IsMatch(trimedPrefix)) + { + deltaMin = 30; + } + else if (QuarterTokenRegex.IsMatch(trimedPrefix)) + { + deltaMin = 15; + } + else if (ThreeQuarterTokenRegex.IsMatch(trimedPrefix)) + { + deltaMin = 45; + } + else + { + var match = HindiTimeExtractorConfiguration.LessThanOneHour.Match(trimedPrefix); + var minStr = match.Groups["deltamin"].Value; + if (!string.IsNullOrWhiteSpace(minStr)) + { + deltaMin = int.Parse(minStr, CultureInfo.InvariantCulture); + } + else + { + minStr = match.Groups["deltaminnum"].Value; + deltaMin = Numbers[minStr]; + } + } + + if (ToTokenRegex.IsMatch(trimedPrefix)) + { + deltaMin = -deltaMin; + } + + min += deltaMin; + if (min < 0) + { + min += 60; + hour -= 1; + } + + hasMin = true; + } + + public void AdjustBySuffix(string suffix, ref int hour, ref int min, ref bool hasMin, ref bool hasAm, ref bool hasPm) + { + var deltaHour = 0; + var match = TimeSuffixFull.MatchExact(suffix, trim: true); + + if (match.Success) + { + var oclockStr = match.Groups["oclock"].Value; + + if (string.IsNullOrEmpty(oclockStr)) + { + var matchAmStr = match.Groups[Constants.AmGroupName].Value; + if (!string.IsNullOrEmpty(matchAmStr)) + { + if (hour >= Constants.HalfDayHourCount) + { + deltaHour = -Constants.HalfDayHourCount; + } + else + { + hasAm = true; + } + } + + var matchPmStr = match.Groups[Constants.PmGroupName].Value; + if (!string.IsNullOrEmpty(matchPmStr)) + { + if (hour < Constants.HalfDayHourCount) + { + deltaHour = Constants.HalfDayHourCount; + } + + if (LunchRegex.IsMatch(matchPmStr)) + { + if (hour >= 10 && hour <= Constants.HalfDayHourCount) + { + deltaHour = 0; + if (hour == Constants.HalfDayHourCount) + { + hasPm = true; + } + else + { + hasAm = true; + } + } + else + { + hasPm = true; + } + } + else if (NightRegex.IsMatch(matchPmStr)) + { + if (hour <= 3 || hour == Constants.HalfDayHourCount) + { + if (hour == Constants.HalfDayHourCount) + { + hour = 0; + } + + deltaHour = 0; + hasAm = true; + } + else + { + hasPm = true; + } + } + else + { + hasPm = true; + } + } + } + } + + hour = (hour + deltaHour) % 24; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiTimePeriodParserConfiguration.cs new file mode 100644 index 0000000000..da3ef257b7 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiTimePeriodParserConfiguration.cs @@ -0,0 +1,120 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, ITimePeriodParserConfiguration + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex PluralTokenRegex = + new Regex(DateTimeDefinitions.PluralTokenRegex, RegexFlags, RegexTimeOut); + + public HindiTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TimeExtractor = config.TimeExtractor; + IntegerExtractor = config.IntegerExtractor; + TimeParser = config.TimeParser; + TimeZoneParser = config.TimeZoneParser; + + PureNumberFromToRegex = HindiTimePeriodExtractorConfiguration.PureNumFromTo; + PureNumberBetweenAndRegex = HindiTimePeriodExtractorConfiguration.PureNumBetweenAnd; + SpecificTimeFromToRegex = HindiTimePeriodExtractorConfiguration.SpecificTimeFromTo; + SpecificTimeBetweenAndRegex = HindiTimePeriodExtractorConfiguration.SpecificTimeBetweenAnd; + TimeOfDayRegex = HindiTimePeriodExtractorConfiguration.TimeOfDayRegex; + GeneralEndingRegex = HindiTimePeriodExtractorConfiguration.GeneralEndingRegex; + TillRegex = HindiTimePeriodExtractorConfiguration.TillRegex; + + Numbers = config.Numbers; + UtilityConfiguration = config.UtilityConfiguration; + } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeParser TimeParser { get; } + + public IExtractor IntegerExtractor { get; } + + public IDateTimeParser TimeZoneParser { get; } + + public Regex SpecificTimeFromToRegex { get; } + + public Regex SpecificTimeBetweenAndRegex { get; } + + public Regex PureNumberFromToRegex { get; } + + public Regex PureNumberBetweenAndRegex { get; } + + public Regex TimeOfDayRegex { get; } + + public Regex GeneralEndingRegex { get; } + + public Regex TillRegex { get; } + + public IImmutableDictionary Numbers { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public bool GetMatchedTimeRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) + { + var trimmedText = text.Trim(); + var pluralMatch = PluralTokenRegex.MatchBegin(trimmedText, trim: true); + if (pluralMatch.Success) + { + trimmedText = trimmedText.Substring(pluralMatch.Length).Trim(); + } + + beginHour = 0; + endHour = 0; + endMin = 0; + + var timeOfDay = string.Empty; + if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.StartsWith(o, StringComparison.InvariantCulture))) + { + timeOfDay = Constants.Morning; + } + else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.StartsWith(o, StringComparison.InvariantCulture))) + { + timeOfDay = Constants.Afternoon; + } + else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.StartsWith(o, StringComparison.InvariantCulture))) + { + timeOfDay = Constants.Evening; + } + else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o, StringComparison.InvariantCulture))) + { + timeOfDay = Constants.Daytime; + } + else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.StartsWith(o, StringComparison.InvariantCulture))) + { + timeOfDay = Constants.Night; + } + else if (DateTimeDefinitions.BusinessHourSplitStrings.Any(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.BusinessHour; + } + else + { + timex = null; + return false; + } + + var parseResult = TimexUtility.ResolveTimeOfDay(timeOfDay); + timex = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMin = parseResult.EndMin; + + return true; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiTimeZoneParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiTimeZoneParserConfiguration.cs new file mode 100644 index 0000000000..ddbeac63a7 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiTimeZoneParserConfiguration.cs @@ -0,0 +1,33 @@ +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Hindi; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class HindiTimeZoneParserConfiguration : BaseDateTimeOptionsConfiguration, ITimeZoneParserConfiguration + { + public static readonly string TimeZoneEndRegex = TimeZoneDefinitions.TimeZoneEndRegex; + + public static readonly Dictionary FullToMinMapping = TimeZoneDefinitions.FullToMinMapping; + + public static readonly Regex DirectUtcRegex = + new Regex(TimeZoneDefinitions.DirectUtcRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline); + + public static readonly Dictionary AbbrToMinMapping = TimeZoneDefinitions.AbbrToMinMapping; + + public HindiTimeZoneParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + } + + string ITimeZoneParserConfiguration.TimeZoneEndRegex => TimeZoneEndRegex; + + Dictionary ITimeZoneParserConfiguration.FullToMinMapping => FullToMinMapping; + + Regex ITimeZoneParserConfiguration.DirectUtcRegex => DirectUtcRegex; + + Dictionary ITimeZoneParserConfiguration.AbbrToMinMapping => AbbrToMinMapping; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/TimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/TimeParser.cs new file mode 100644 index 0000000000..ac87c5285a --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/TimeParser.cs @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +using Microsoft.Recognizers.Text.Utilities; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi +{ + public class TimeParser : BaseTimeParser + { + public TimeParser(ITimeParserConfiguration configuration) + : base(configuration) + { + } + + protected override DateTimeResolutionResult InternalParse(string text, DateObject referenceTime) + { + var innerResult = base.InternalParse(text, referenceTime); + if (!innerResult.Success) + { + innerResult = ParseIsh(text, referenceTime); + } + + return innerResult; + } + + // parse "noonish", "11-ish" + private DateTimeResolutionResult ParseIsh(string text, DateObject referenceTime) + { + var ret = new DateTimeResolutionResult(); + var lowerText = text; + + var match = HindiTimeExtractorConfiguration.IshRegex.MatchExact(lowerText, trim: true); + + if (match.Success) + { + var hourStr = match.Groups[Constants.HourGroupName].Value; + var hour = Constants.HalfDayHourCount; + if (!string.IsNullOrEmpty(hourStr)) + { + hour = int.Parse(hourStr, CultureInfo.InvariantCulture); + } + + ret.Timex = "T" + hour.ToString("D2", CultureInfo.InvariantCulture); + ret.FutureValue = + ret.PastValue = + DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day, hour, 0, 0); + ret.Success = true; + return ret; + } + + return ret; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Utilities/HindiDatetimeUtilityConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Utilities/HindiDatetimeUtilityConfiguration.cs new file mode 100644 index 0000000000..329ce4830b --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Utilities/HindiDatetimeUtilityConfiguration.cs @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Hindi; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Hindi.Utilities +{ + public class HindiDatetimeUtilityConfiguration : BaseDatetimeUtilityConfiguration + { + public HindiDatetimeUtilityConfiguration() + : base( + DateTimeDefinitions.AgoRegex, + DateTimeDefinitions.LaterRegex, + DateTimeDefinitions.InConnectorRegex, + DateTimeDefinitions.SinceYearSuffixRegex, + DateTimeDefinitions.WithinNextPrefixRegex, + DateTimeDefinitions.AmDescRegex, + DateTimeDefinitions.PmDescRegex, + DateTimeDefinitions.AmPmDescRegex, + DateTimeDefinitions.RangeUnitRegex, + DateTimeDefinitions.TimeUnitRegex, + DateTimeDefinitions.DateUnitRegex, + DateTimeDefinitions.CommonDatePrefixRegex, + DateTimeDefinitions.RangePrefixRegex, + RegexOptions.Singleline | RegexOptions.ExplicitCapture, + DateTimeDefinitions.CheckBothBeforeAfter) + { + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateExtractorConfiguration.cs index e9ae145f5f..f327bbbc79 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; @@ -13,77 +16,77 @@ namespace Microsoft.Recognizers.Text.DateTime.Italian public class ItalianDateExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateExtractorConfiguration { public static readonly Regex MonthRegex = - new Regex(DateTimeDefinitions.MonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex OnRegex = - new Regex(DateTimeDefinitions.OnRegex, RegexFlags); + new Regex(DateTimeDefinitions.OnRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelaxedOnRegex = - new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisRegex = - new Regex(DateTimeDefinitions.ThisRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisRegex, RegexFlags, RegexTimeOut); public static readonly Regex LastDateRegex = - new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextDateRegex = - new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); // day before yesterday, day after tomorrow, next day, last day, the day yesterday, the day tomorrow public static readonly Regex SpecialDayRegex = - new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDayWithNumRegex = - new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex StrictWeekDay = - new Regex(DateTimeDefinitions.StrictWeekDay, RegexFlags); + new Regex(DateTimeDefinitions.StrictWeekDay, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayOfMonthRegex = - new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDate = - new Regex(DateTimeDefinitions.SpecialDate, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDate, RegexFlags, RegexTimeOut); public static readonly Regex RelativeWeekDayRegex = - new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex ForTheRegex = - new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags); + new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayOfMothRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex StrictRelativeRegex = - new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixArticleRegex = - new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] ImplicitDateList = { @@ -92,37 +95,37 @@ public class ItalianDateExtractorConfiguration : BaseDateTimeOptionsConfiguratio }; public static readonly Regex OfMonth = - new Regex(DateTimeDefinitions.OfMonth, RegexFlags); + new Regex(DateTimeDefinitions.OfMonth, RegexFlags, RegexTimeOut); public static readonly Regex MonthEnd = - new Regex(DateTimeDefinitions.MonthEnd, RegexFlags); + new Regex(DateTimeDefinitions.MonthEnd, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayEnd = - new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayStart = - new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeConnectorSymbolRegex = - new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags); + new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags, RegexTimeOut); public static readonly ImmutableDictionary DayOfWeek = DateTimeDefinitions.DayOfWeek.ToImmutableDictionary(); @@ -130,15 +133,29 @@ public class ItalianDateExtractorConfiguration : BaseDateTimeOptionsConfiguratio public static readonly ImmutableDictionary MonthOfYear = DateTimeDefinitions.MonthOfYear.ToImmutableDictionary(); + public static readonly Regex BeforeAfterRegex = + new Regex(DateTimeDefinitions.BeforeAfterRegex, RegexFlags, RegexTimeOut); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public ItalianDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + IntegerExtractor = Number.Italian.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Italian.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new ItalianNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + OrdinalExtractor = Number.Italian.OrdinalExtractor.GetInstance(numConfig); + NumberParser = new BaseNumberParser(new ItalianNumberParserConfiguration(numConfig)); + DurationExtractor = new BaseDurationExtractor(new ItalianDurationExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new ItalianHolidayExtractorConfiguration(this)); UtilityConfiguration = new ItalianDatetimeUtilityConfiguration(); const RegexOptions dateRegexOption = RegexOptions.Singleline; @@ -194,6 +211,8 @@ public ItalianDateExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DurationExtractor { get; } + public IDateTimeExtractor HolidayExtractor { get; } + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } IEnumerable IDateExtractorConfiguration.ImplicitDateList => ImplicitDateList; @@ -241,5 +260,7 @@ public ItalianDateExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IDateExtractorConfiguration.RangeUnitRegex => RangeUnitRegex; Regex IDateExtractorConfiguration.RangeConnectorSymbolRegex => RangeConnectorSymbolRegex; + + Regex IDateExtractorConfiguration.BeforeAfterRegex => BeforeAfterRegex; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDatePeriodExtractorConfiguration.cs index fe8be3fbce..0cc14e58a3 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDatePeriodExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Italian; @@ -14,195 +17,201 @@ public class ItalianDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfig // until public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.RestrictedTillRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestrictedTillRegex, RegexFlags, RegexTimeOut); public static readonly Regex FullTillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); // and public static readonly Regex RangeConnectorRegex = - new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex IllegalYearRegex = - new Regex(BaseDateTime.IllegalYearRegex, RegexFlags); + new Regex(BaseDateTime.IllegalYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); // this month, next month, last month public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex EngMonthRegex = - new Regex(DateTimeDefinitions.EngMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.EngMonthRegex, RegexFlags, RegexTimeOut); // in, of, no "on"... public static readonly Regex MonthSuffixRegex = - new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags, RegexTimeOut); // year, month, week, day public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); // **In Italian, Past/Next is suffix, but interface enforces this // past, last, previous public static readonly Regex PastPrefixRegex = - new Regex(DateTimeDefinitions.PastSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PastSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); // **In Italian, Past/Next is suffix, but interface enforces this // next, in public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex FutureSuffixRegex = - new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); // composite regexes public static readonly Regex SimpleCasesRegex = - new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags, RegexTimeOut); // between 'x' until 'y', from 'x' until 'y' public static readonly Regex MonthFrontSimpleCasesRegex = - new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthFrontBetweenRegex = - new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags, RegexTimeOut); public static readonly Regex BetweenRegex = - new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthWithYear = - new Regex(DateTimeDefinitions.MonthWithYear, RegexFlags); + new Regex(DateTimeDefinitions.MonthWithYear, RegexFlags, RegexTimeOut); // @TODO localize comment? // a cote de - 'next to', cette - 'this', dernier - 'last' (always after the noun, i.e annee dernier - 'last year' public static readonly Regex OneWordPeriodRegex = - new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumWithYear = - new Regex(DateTimeDefinitions.MonthNumWithYear, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumWithYear, RegexFlags, RegexTimeOut); // le/la - masc/fem 'the' public static readonly Regex WeekOfMonthRegex = - new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfYearRegex = - new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex FollowedDateUnit = - new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithDateUnit = - new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags, RegexTimeOut); // 1st quarter of this year, 2nd quarter of next/last year, etc public static readonly Regex QuarterRegex = - new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags, RegexTimeOut); public static readonly Regex QuarterRegexYearFront = - new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags, RegexTimeOut); public static readonly Regex AllHalfYearRegex = - new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex SeasonRegex = - new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags); + new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags, RegexTimeOut); public static readonly Regex WhichWeekRegex = - new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags); + new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfRegex = - new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthOfRegex = - new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags, RegexTimeOut); // TODO: add regexs below public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RestOfDateRegex = - new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterEarlyPeriodRegex = - new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekWithWeekDayRangeRegex = - new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPlusNumberRegex = - new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex DecadeWithCenturyRegex = - new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags); + new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPeriodRegex = - new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex ComplexDatePeriodRegex = - new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDecadeRegex = - new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags, RegexTimeOut); public static readonly Regex ReferenceDatePeriodRegex = - new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoRegex, RegexFlags); + new Regex(DateTimeDefinitions.AgoRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterRegex, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex CenturySuffixRegex = - new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FirstLastRegex = + new Regex(DateTimeDefinitions.FirstLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OfYearRegex = + new Regex(DateTimeDefinitions.OfYearRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex FromRegex = - new Regex(DateTimeDefinitions.FromRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); private static readonly Regex ConnectorAndRegex = - new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags, RegexTimeOut); private static readonly Regex RangePrefixRegex = - new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); private static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); private static readonly Regex[] SimpleCasesRegexes = { @@ -235,10 +244,20 @@ public ItalianDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con : base(config) { DatePointExtractor = new BaseDateExtractor(new ItalianDateExtractorConfiguration(this)); - CardinalExtractor = Number.Italian.CardinalExtractor.GetInstance(); - OrdinalExtractor = Number.Italian.OrdinalExtractor.GetInstance(); DurationExtractor = new BaseDurationExtractor(new ItalianDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new ItalianNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Italian.CardinalExtractor.GetInstance(); + OrdinalExtractor = Number.Italian.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new ItalianNumberParserConfiguration(numConfig)); } public IDateExtractor DatePointExtractor { get; } @@ -305,6 +324,10 @@ public ItalianDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con Regex IDatePeriodExtractorConfiguration.NowRegex => NowRegex; + Regex IDatePeriodExtractorConfiguration.FirstLastRegex => FirstLastRegex; + + Regex IDatePeriodExtractorConfiguration.OfYearRegex => OfYearRegex; + bool IDatePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; string[] IDatePeriodExtractorConfiguration.DurationDateRestrictions => DateTimeDefinitions.DurationDateRestrictions; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimeAltExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimeAltExtractorConfiguration.cs index 3b7e8913d3..fae8089581 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimeAltExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimeAltExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; @@ -7,16 +10,16 @@ namespace Microsoft.Recognizers.Text.DateTime.Italian public class ItalianDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeAltExtractorConfiguration { public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangePrefixRegex = - new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] RelativePrefixList = { @@ -31,10 +34,10 @@ public class ItalianDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfi private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex OrRegex = - new Regex(DateTimeDefinitions.OrRegex, RegexFlags); + new Regex(DateTimeDefinitions.OrRegex, RegexFlags, RegexTimeOut); private static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public ItalianDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimeExtractorConfiguration.cs index ccca528eb8..f5369ec6b6 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; using Microsoft.Recognizers.Text.DateTime.Italian.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -10,60 +13,60 @@ public class ItalianDateTimeExtractorConfiguration : BaseDateTimeOptionsConfigur // à - time at which, en - length of time, dans - amount of time public static readonly Regex PrepositionRegex = - new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); // right now, as soon as possible, recently, previously public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); // in the evening, afternoon, morning, night public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfDayRegex = - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificEndOfRegex = - new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectorRegex = - new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex NumberAsTimeRegex = - new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateNumberConnectorRegex = - new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -75,6 +78,8 @@ public ItalianDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration confi TimePointExtractor = new BaseTimeExtractor(new ItalianTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new ItalianDurationExtractorConfiguration(this)); UtilityConfiguration = new ItalianDatetimeUtilityConfiguration(); + HolidayExtractor = new BaseHolidayExtractor(new ItalianHolidayExtractorConfiguration(this)); + } public IExtractor IntegerExtractor { get; } @@ -85,6 +90,8 @@ public ItalianDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration confi public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + public IDateTimeExtractor HolidayExtractor { get; } + Regex IDateTimeExtractorConfiguration.NowRegex => NowRegex; Regex IDateTimeExtractorConfiguration.SuffixRegex => SuffixRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimePeriodExtractorConfiguration.cs index 2148a6fe84..7f13ee0ef8 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimePeriodExtractorConfiguration.cs @@ -1,7 +1,11 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Italian; +using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Italian @@ -9,43 +13,46 @@ namespace Microsoft.Recognizers.Text.DateTime.Italian public class ItalianDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimePeriodExtractorConfiguration { public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeNumberCombinedWithUnit = - new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex HyphenDateRegex = + new Regex(BaseDateTime.HyphenDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodTimeOfDayWithDateRegex = - new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeTimeUnitRegex = - new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RestOfDateTimeRegex = - new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); public static readonly Regex MiddlePauseRegex = - new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags); + new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixDayRegex = new Regex(DateTimeDefinitions.PrefixDayRegex, RegexFlags | RegexOptions.RightToLeft); @@ -60,38 +67,49 @@ public class ItalianDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsCo }; private static readonly Regex FromRegex = - new Regex(DateTimeDefinitions.FromRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); private static readonly Regex ConnectorAndRegex = - new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags, RegexTimeOut); private static readonly Regex RangePrefixRegex = - new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); private static readonly Regex PeriodTimeOfDayRegex = - new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags, RegexTimeOut); private static readonly Regex PeriodSpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.PeriodSpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodSpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); private static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); private static readonly Regex TimeFollowedUnit = - new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); public ItalianDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; - CardinalExtractor = Number.English.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.English.CardinalExtractor.GetInstance(numConfig); + SingleDateExtractor = new BaseDateExtractor(new ItalianDateExtractorConfiguration(this)); SingleTimeExtractor = new BaseTimeExtractor(new ItalianTimeExtractorConfiguration(this)); SingleDateTimeExtractor = new BaseDateTimeExtractor(new ItalianDateTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new ItalianDurationExtractorConfiguration(this)); TimePeriodExtractor = new BaseTimePeriodExtractor(new ItalianTimePeriodExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new ItalianTimeZoneExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new ItalianHolidayExtractorConfiguration(this)); + } public IEnumerable SimpleCasesRegex => SimpleCases; @@ -124,7 +142,7 @@ public ItalianDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration public Regex FutureSuffixRegex => ItalianDatePeriodExtractorConfiguration.FutureSuffixRegex; - public Regex WeekDayRegex => new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + public Regex WeekDayRegex => new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); Regex IDateTimePeriodExtractorConfiguration.PeriodTimeOfDayWithDateRegex => PeriodTimeOfDayWithDateRegex; @@ -138,6 +156,8 @@ public ItalianDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration Regex IDateTimePeriodExtractorConfiguration.WithinNextPrefixRegex => WithinNextPrefixRegex; + Regex IDateTimePeriodExtractorConfiguration.TasksmodeMealTimeofDayRegex => null; + public string TokenBeforeDate { get; } public IExtractor CardinalExtractor { get; } @@ -154,6 +174,8 @@ public ItalianDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration public IDateTimeExtractor TimeZoneExtractor { get; } + public IDateTimeExtractor HolidayExtractor { get; } + Regex IDateTimePeriodExtractorConfiguration.PrefixDayRegex => PrefixDayRegex; Regex IDateTimePeriodExtractorConfiguration.DateUnitRegex => DateUnitRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDurationExtractorConfiguration.cs index a365524a8f..74c5e679be 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDurationExtractorConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; @@ -8,51 +12,57 @@ namespace Microsoft.Recognizers.Text.DateTime.Italian public class ItalianDurationExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDurationExtractorConfiguration { public static readonly Regex DurationUnitRegex = - new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAndRegex = - new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationFollowedUnit = - new Regex(DateTimeDefinitions.DurationFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.DurationFollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithDurationUnit = - new Regex(DateTimeDefinitions.NumberCombinedWithDurationUnit, RegexFlags); + new Regex(DateTimeDefinitions.NumberCombinedWithDurationUnit, RegexFlags, RegexTimeOut); public static readonly Regex AnUnitRegex = - new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DuringRegex = - new Regex(DateTimeDefinitions.DuringRegex, RegexFlags); + new Regex(DateTimeDefinitions.DuringRegex, RegexFlags, RegexTimeOut); public static readonly Regex AllRegex = - new Regex(DateTimeDefinitions.AllRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllRegex, RegexFlags, RegexTimeOut); public static readonly Regex HalfRegex = - new Regex(DateTimeDefinitions.HalfRegex, RegexFlags); + new Regex(DateTimeDefinitions.HalfRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConjunctionRegex = - new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags, RegexTimeOut); public static readonly Regex InexactNumberRegex = - new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex InexactNumberUnitRegex = - new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDurationUnitRegex = - new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationConnectorRegex = - new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModPrefixRegex = + new Regex(DateTimeDefinitions.ModPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModSuffixRegex = + new Regex(DateTimeDefinitions.ModSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialNumberUnitRegex = null; public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -103,5 +113,11 @@ public ItalianDurationExtractorConfiguration(IDateTimeOptionsConfiguration confi Regex IDurationExtractorConfiguration.MoreThanRegex => MoreThanRegex; Regex IDurationExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex; + + Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex; + + public Dictionary AmbiguityFiltersDict => null; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianHolidayExtractorConfiguration.cs index 9a594e9ba1..427217a6aa 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianHolidayExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianHolidayExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; @@ -8,16 +11,16 @@ namespace Microsoft.Recognizers.Text.DateTime.Italian public class ItalianHolidayExtractorConfiguration : BaseDateTimeOptionsConfiguration, IHolidayExtractorConfiguration { public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex H1 = - new Regex(DateTimeDefinitions.HolidayRegex1, RegexFlags); + new Regex(DateTimeDefinitions.HolidayRegex1, RegexFlags, RegexTimeOut); public static readonly Regex H2 = - new Regex(DateTimeDefinitions.HolidayRegex2, RegexFlags); + new Regex(DateTimeDefinitions.HolidayRegex2, RegexFlags, RegexTimeOut); public static readonly Regex H3 = - new Regex(DateTimeDefinitions.HolidayRegex3, RegexFlags); + new Regex(DateTimeDefinitions.HolidayRegex3, RegexFlags, RegexTimeOut); public static readonly Regex[] HolidayRegexList = { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianMergedExtractorConfiguration.cs index 78cecb58c0..a7ff2347aa 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianMergedExtractorConfiguration.cs @@ -1,7 +1,12 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; + using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Italian; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Matcher; namespace Microsoft.Recognizers.Text.DateTime.Italian @@ -9,48 +14,51 @@ namespace Microsoft.Recognizers.Text.DateTime.Italian public class ItalianMergedExtractorConfiguration : BaseDateTimeOptionsConfiguration, IMergedExtractorConfiguration { public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); // avant - 'before' + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); // avant - 'before' public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); // ensuite/puis are for adverbs, i.e 'i ate and then i walked', so we'll use apres + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); // ensuite/puis are for adverbs, i.e 'i ate and then i walked', so we'll use apres public static readonly Regex SinceRegex = - new Regex(DateTimeDefinitions.SinceRegex, RegexFlags); + new Regex(DateTimeDefinitions.SinceRegex, RegexFlags, RegexTimeOut); public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.RestrictedTillRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestrictedTillRegex, RegexFlags, RegexTimeOut); public static readonly Regex AroundRegex = - new Regex(DateTimeDefinitions.AroundRegex, RegexFlags); + new Regex(DateTimeDefinitions.AroundRegex, RegexFlags, RegexTimeOut); public static readonly Regex EqualRegex = - new Regex(BaseDateTime.EqualRegex, RegexFlags); + new Regex(BaseDateTime.EqualRegex, RegexFlags, RegexTimeOut); public static readonly Regex FromToRegex = - new Regex(DateTimeDefinitions.FromToRegex, RegexFlags); // 'Je vais du lundi au mecredi' - I will go from monday to weds + new Regex(DateTimeDefinitions.FromToRegex, RegexFlags, RegexTimeOut); // 'Je vais du lundi au mecredi' - I will go from monday to weds public static readonly Regex SingleAmbiguousMonthRegex = - new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrepositionSuffixRegex = - new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmbiguousRangeModifierPrefix = - new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags); + new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags, RegexTimeOut); public static readonly Regex NumberEndingPattern = - new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags); + new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificDatePeriodRegex = - new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags, RegexTimeOut); - public static readonly Regex[] TermFilterRegexes = { }; + public static readonly Regex[] TermFilterRegexes = System.Array.Empty(); public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public ItalianMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) @@ -68,6 +76,8 @@ public ItalianMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) TimeZoneExtractor = new BaseTimeZoneExtractor(new ItalianTimeZoneExtractorConfiguration(this)); DateTimeAltExtractor = new BaseDateTimeAltExtractor(new ItalianDateTimeAltExtractorConfiguration(this)); IntegerExtractor = Number.Italian.IntegerExtractor.GetInstance(); + + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); } public IDateExtractor DateExtractor { get; } @@ -94,7 +104,7 @@ public ItalianMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) public IExtractor IntegerExtractor { get; } - public Dictionary AmbiguityFiltersDict { get; } = null; + public Dictionary AmbiguityFiltersDict { get; } Regex IMergedExtractorConfiguration.AfterRegex => AfterRegex; @@ -124,10 +134,17 @@ public ItalianMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + public Regex FailFastRegex { get; } = null; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; StringMatcher IMergedExtractorConfiguration.SuperfluousWordMatcher => SuperfluousWordMatcher; + + bool IMergedExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public Regex TasksModeMentionFilters { get; } + } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianSetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianSetExtractorConfiguration.cs index 08a4101f2d..ef887ef097 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianSetExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianSetExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; @@ -10,28 +13,28 @@ public class ItalianSetExtractorConfiguration : BaseDateTimeOptionsConfiguration public static readonly string ExtractorName = Constants.SYS_DATETIME_SET; public static readonly Regex SetUnitRegex = - new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodicRegex = - new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachUnitRegex = - new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachPrefixRegex = - new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachDayRegex = - new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetLastRegex = - new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetWeekDayRegex = - new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetEachRegex = - new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -61,6 +64,8 @@ public ItalianSetExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DateTimePeriodExtractor { get; } + bool ISetExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + Regex ISetExtractorConfiguration.LastRegex => SetLastRegex; Regex ISetExtractorConfiguration.EachPrefixRegex => EachPrefixRegex; @@ -82,34 +87,34 @@ public Tuple WeekDayGroupMatchTuple(Match match) string weekday = string.Empty; int del = 0; - if (match.Groups["g0"].ToString() != string.Empty) + if (match.Groups["g0"].Length != 0) { - weekday = match.Groups["g0"].ToString() + "a"; + weekday = match.Groups["g0"] + "a"; del = 0; } - else if (match.Groups["g1"].ToString() != string.Empty) + else if (match.Groups["g1"].Length != 0) { - weekday = match.Groups["g1"].ToString() + "io"; + weekday = match.Groups["g1"] + "io"; del = -1; } - else if (match.Groups["g2"].ToString() != string.Empty) + else if (match.Groups["g2"].Length != 0) { - weekday = match.Groups["g2"].ToString() + "e"; + weekday = match.Groups["g2"] + "e"; del = 0; } - else if (match.Groups["g3"].ToString() != string.Empty) + else if (match.Groups["g3"].Length != 0) { - weekday = match.Groups["g3"].ToString() + "ì"; + weekday = match.Groups["g3"] + "ì"; del = 0; } - else if (match.Groups["g4"].ToString() != string.Empty) + else if (match.Groups["g4"].Length != 0) { - weekday = match.Groups["g4"].ToString() + "a"; + weekday = match.Groups["g4"] + "a"; del = 1; } - else if (match.Groups["g5"].ToString() != string.Empty) + else if (match.Groups["g5"].Length != 0) { - weekday = match.Groups["g5"].ToString() + "o"; + weekday = match.Groups["g5"] + "o"; del = 0; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianTimeExtractorConfiguration.cs index ad083ef7d9..ab1433fde2 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianTimeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; @@ -10,110 +13,110 @@ public class ItalianTimeExtractorConfiguration : BaseDateTimeOptionsConfiguratio // part 1: smallest component // -------------------------------- public static readonly Regex DescRegex = - new Regex(DateTimeDefinitions.DescRegex, RegexFlags); + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); public static readonly Regex HourNumRegex = - new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex MinuteNumRegex = - new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags, RegexTimeOut); // part 2: middle level component // -------------------------------------- // handle "... heures (o'clock, en punto)" public static readonly Regex OclockRegex = - new Regex(DateTimeDefinitions.OclockRegex, RegexFlags); + new Regex(DateTimeDefinitions.OclockRegex, RegexFlags, RegexTimeOut); // handle "... après midi (afternoon, tarde)" public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); // handle "... dans la matinee (in the morning)" public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); // handle "half past ..." "a quarter to ..." // rename 'min' group to 'deltamin' public static readonly Regex LessThanOneHour = - new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags); + new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags, RegexTimeOut); public static readonly Regex FrTimeRegex = - new Regex(DateTimeDefinitions.EngTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.EngTimeRegex, RegexFlags, RegexTimeOut); // TODO - will have change below // handle "six heures et demie" (six thirty), "six heures et vingt-et-un" (six twenty one) public static readonly Regex TimePrefix = - new Regex(DateTimeDefinitions.TimePrefix, RegexFlags); + new Regex(DateTimeDefinitions.TimePrefix, RegexFlags, RegexTimeOut); public static readonly Regex TimeSuffix = - new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags); + new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags, RegexTimeOut); public static readonly Regex BasicTime = - new Regex(DateTimeDefinitions.BasicTime, RegexFlags); + new Regex(DateTimeDefinitions.BasicTime, RegexFlags, RegexTimeOut); // handle special time such as 'at midnight', 'midnight', 'midday' // midnight - le minuit, la zero heure // midday - midi public static readonly Regex MidnightRegex = - new Regex(DateTimeDefinitions.MidnightRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidnightRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidmorningRegex = - new Regex(DateTimeDefinitions.MidmorningRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidmorningRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidafternoonRegex = - new Regex(DateTimeDefinitions.MidafternoonRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidafternoonRegex, RegexFlags, RegexTimeOut); public static readonly Regex MiddayRegex = - new Regex(DateTimeDefinitions.MiddayRegex, RegexFlags); + new Regex(DateTimeDefinitions.MiddayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidTimeRegex = - new Regex(DateTimeDefinitions.MidTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidTimeRegex, RegexFlags, RegexTimeOut); // part 3: regex for time // -------------------------------------- // handle "at four" "at 3" public static readonly Regex AtRegex = - new Regex(DateTimeDefinitions.AtRegex, RegexFlags); + new Regex(DateTimeDefinitions.AtRegex, RegexFlags, RegexTimeOut); public static readonly Regex IshRegex = - new Regex(DateTimeDefinitions.IshRegex, RegexFlags); + new Regex(DateTimeDefinitions.IshRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectNumRegex = - new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeBeforeAfterRegex = - new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] TimeRegexList = { // (three min past)? seven|7|(seven thirty) pm - new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags, RegexTimeOut), // (three min past)? 3:00(:00)? (pm)? - new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags, RegexTimeOut), // (three min past)? 3.00 (pm)? - new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags, RegexTimeOut), // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) - new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags, RegexTimeOut), // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags, RegexTimeOut), // (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) - new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags, RegexTimeOut), // (in the night) at (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags, RegexTimeOut), // (in the night) (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex8, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex8, RegexFlags, RegexTimeOut), - new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags, RegexTimeOut), // 340pm ConnectNumRegex, @@ -139,5 +142,9 @@ public ItalianTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DurationExtractor { get; } public IDateTimeExtractor TimeZoneExtractor { get; } + + public string TimeTokenPrefix => DateTimeDefinitions.TimeTokenPrefix; + + public Dictionary AmbiguityFiltersDict => null; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianTimePeriodExtractorConfiguration.cs index 5c7f30f2bd..9156fcc8d0 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianTimePeriodExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; @@ -13,72 +16,75 @@ public class ItalianTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfig public static readonly string ExtractorName = Constants.SYS_DATETIME_TIMEPERIOD; // "TimePeriod"; public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.RestrictedTillRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestrictedTillRegex, RegexFlags, RegexTimeOut); public static readonly Regex FullTillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); public static readonly Regex HourRegex = - new Regex(DateTimeDefinitions.HourRegex, RegexFlags); + new Regex(DateTimeDefinitions.HourRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodHourNumRegex = - new Regex(DateTimeDefinitions.PeriodHourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodHourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodDescRegex = - new Regex(DateTimeDefinitions.PeriodDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); public static readonly Regex PureNumFromTo = - new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags); + new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags, RegexTimeOut); public static readonly Regex PureNumBetweenAnd = - new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeFromTo = - new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeBetweenAnd = - new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex PrepositionRegex = - new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfDayRegex = - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeFollowedUnit = - new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex TimeNumberCombinedWithUnit = - new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); public static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex FromRegex = - new Regex(DateTimeDefinitions.FromRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); private static readonly Regex RangePrefixRegex = - new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); private static readonly Regex ConnectorAndRegex = - new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags, RegexTimeOut); private static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex RangePmRegex = + new Regex(DateTimeDefinitions.RangePmRegex, RegexFlags, RegexTimeOut); public ItalianTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) @@ -102,7 +108,7 @@ public ItalianTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con public IEnumerable SimpleCasesRegex => new[] { - PureNumFromTo, PureNumBetweenAnd, AmRegex, PmRegex, SpecificTimeFromTo, SpecificTimeBetweenAnd, + PureNumFromTo, PureNumBetweenAnd, AmRegex, RangePmRegex, SpecificTimeFromTo, SpecificTimeBetweenAnd, }; public IEnumerable PureNumberRegex => new[] { PureNumFromTo, PureNumBetweenAnd }; @@ -156,5 +162,7 @@ public bool IsConnectorToken(string text) { return ConnectorAndRegex.IsMatch(text) || FullTillRegex.IsExactMatch(text, false); } + + public List ApplyPotentialPeriodAmbiguityHotfix(string text, List timePeriodErs) => TimePeriodFunctions.ApplyPotentialPeriodAmbiguityHotfix(text, timePeriodErs); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianTimeZoneExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianTimeZoneExtractorConfiguration.cs index 93de7f2e71..85f6baa20d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianTimeZoneExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianTimeZoneExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Matcher; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianCommonDateTimeParserConfiguration.cs index 8e7389562d..62901d2b4c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianCommonDateTimeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Italian; using Microsoft.Recognizers.Text.DateTime.Italian.Utilities; @@ -25,12 +28,22 @@ public ItalianCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration co WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + CardinalExtractor = Number.Italian.CardinalExtractor.GetInstance(); IntegerExtractor = Number.Italian.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Italian.OrdinalExtractor.GetInstance(); + OrdinalExtractor = Number.Italian.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new ItalianNumberParserConfiguration(numConfig)); - NumberParser = new BaseNumberParser(new ItalianNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); DateExtractor = new BaseDateExtractor(new ItalianDateExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new ItalianHolidayExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new ItalianTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new ItalianDateTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new ItalianDurationExtractorConfiguration(this)); @@ -38,6 +51,7 @@ public ItalianCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration co TimePeriodExtractor = new BaseTimePeriodExtractor(new ItalianTimePeriodExtractorConfiguration(this)); DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new ItalianDateTimePeriodExtractorConfiguration(this)); DateParser = new BaseDateParser(new ItalianDateParserConfiguration(this)); + HolidayTimeParser = new BaseHolidayParser(new ItalianHolidayParserConfiguration(this)); TimeParser = new TimeParser(new ItalianTimeParserConfiguration(this)); DateTimeParser = new BaseDateTimeParser(new ItalianDateTimeParserConfiguration(this)); DurationParser = new BaseDurationParser(new ItalianDurationParserConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateParserConfiguration.cs index af494a9911..09e6b42a0e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -22,6 +25,7 @@ public ItalianDateParserConfiguration(ICommonDateTimeParserConfiguration config) DurationExtractor = config.DurationExtractor; DateExtractor = config.DateExtractor; DurationParser = config.DurationParser; + HolidayParser = new BaseHolidayParser(new ItalianHolidayParserConfiguration(this)); DateRegexes = new ItalianDateExtractorConfiguration(this).DateRegexList; OnRegex = ItalianDateExtractorConfiguration.OnRegex; SpecialDayRegex = ItalianDateExtractorConfiguration.SpecialDayRegex; @@ -41,13 +45,14 @@ public ItalianDateParserConfiguration(ICommonDateTimeParserConfiguration config) StrictRelativeRegex = ItalianDateExtractorConfiguration.StrictRelativeRegex; YearSuffix = ItalianDateExtractorConfiguration.YearSuffix; RelativeWeekDayRegex = ItalianDateExtractorConfiguration.RelativeWeekDayRegex; + BeforeAfterRegex = ItalianDateExtractorConfiguration.BeforeAfterRegex; // @TODO move to config - RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexFlags); - NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); - PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); - UpcomingPrefixRegex = new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags); - PastPrefixRegex = new Regex(DateTimeDefinitions.PastPrefixRegex, RegexFlags); + RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + UpcomingPrefixRegex = new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags, RegexTimeOut); + PastPrefixRegex = new Regex(DateTimeDefinitions.PastPrefixRegex, RegexFlags, RegexTimeOut); DayOfMonth = config.DayOfMonth; DayOfWeek = config.DayOfWeek; @@ -80,6 +85,8 @@ public ItalianDateParserConfiguration(ICommonDateTimeParserConfiguration config) public IDateTimeParser DurationParser { get; } + public IDateTimeParser HolidayParser { get; } + public IImmutableDictionary UnitMap { get; } public IEnumerable DateRegexes { get; } @@ -130,6 +137,10 @@ public ItalianDateParserConfiguration(ICommonDateTimeParserConfiguration config) public Regex PastPrefixRegex { get; } + public Regex BeforeAfterRegex { get; } + + public Regex TasksModeDurationToDatePatterns { get; } + public IImmutableDictionary DayOfMonth { get; } public IImmutableDictionary DayOfWeek { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDatePeriodParserConfiguration.cs index 34167a883f..ac294b79ad 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDatePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; @@ -9,34 +13,37 @@ namespace Microsoft.Recognizers.Text.DateTime.Italian public class ItalianDatePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDatePeriodParserConfiguration { public static readonly Regex UpcomingPrefixRegex = - new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PastPrefixRegex = - new Regex(DateTimeDefinitions.PastPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PastPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextSuffixRegex = - new Regex(DateTimeDefinitions.NextSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PastSuffixRegex = - new Regex(DateTimeDefinitions.PastSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PastSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterNextSuffixRegex = - new Regex(DateTimeDefinitions.AfterNextSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterNextSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeRegex = - new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRangeRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmbiguousPointRangeRegex = + new Regex(DateTimeDefinitions.AmbiguousPointRangeRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -91,6 +98,10 @@ public ItalianDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c MoreThanRegex = ItalianDatePeriodExtractorConfiguration.MoreThanRegex; CenturySuffixRegex = ItalianDatePeriodExtractorConfiguration.CenturySuffixRegex; NowRegex = ItalianDatePeriodExtractorConfiguration.NowRegex; + FirstLastRegex = ItalianDatePeriodExtractorConfiguration.FirstLastRegex; + OfYearRegex = ItalianDatePeriodExtractorConfiguration.OfYearRegex; + SpecialDayRegex = ItalianDateExtractorConfiguration.SpecialDayRegex; + TodayNowRegex = new Regex(DateTimeDefinitions.TodayNowRegex, RegexOptions.Singleline); UnitMap = config.UnitMap; CardinalMap = config.CardinalMap; @@ -201,6 +212,14 @@ public ItalianDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c public Regex NowRegex { get; } + public Regex SpecialDayRegex { get; } + + public Regex TodayNowRegex { get; } + + public Regex FirstLastRegex { get; } + + public Regex OfYearRegex { get; } + Regex ISimpleDatePeriodParserConfiguration.RelativeRegex => RelativeRegex; Regex IDatePeriodParserConfiguration.NextPrefixRegex => NextPrefixRegex; @@ -211,6 +230,8 @@ public ItalianDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c Regex IDatePeriodParserConfiguration.UnspecificEndOfRangeRegex => UnspecificEndOfRangeRegex; + Regex IDatePeriodParserConfiguration.AmbiguousPointRangeRegex => AmbiguousPointRangeRegex; + bool IDatePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; public IImmutableDictionary UnitMap { get; } @@ -284,34 +305,39 @@ public int GetSwiftYear(string text) public bool IsFuture(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.FutureStartTerms.Any(o => trimmedText.StartsWith(o)) || - DateTimeDefinitions.FutureEndTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.FutureStartTerms.Any(o => trimmedText.StartsWith(o, StringComparison.Ordinal)) || + DateTimeDefinitions.FutureEndTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); } public bool IsLastCardinal(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.LastCardinalTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.LastCardinalTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } public bool IsMonthOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (DateTimeDefinitions.MonthTerms.Any(o => trimmedText.Contains(o)) && (AfterNextSuffixRegex.IsMatch(trimmedText) || ThisPrefixRegex.IsMatch(trimmedText) || NextSuffixRegex.IsMatch(trimmedText) || PastSuffixRegex.IsMatch(trimmedText))); } + public bool IsFortnight(string text) + { + return false; + } + public bool IsMonthToDate(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } public bool IsWeekend(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.Contains(o)) && (AfterNextSuffixRegex.IsMatch(trimmedText) || ThisPrefixRegex.IsMatch(trimmedText) || NextSuffixRegex.IsMatch(trimmedText) || PastSuffixRegex.IsMatch(trimmedText))); } @@ -319,7 +345,7 @@ public bool IsWeekend(string text) public bool IsWeekOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (DateTimeDefinitions.WeekTerms.Any(o => trimmedText.Contains(o)) && (AfterNextSuffixRegex.IsMatch(trimmedText) || ThisPrefixRegex.IsMatch(trimmedText) || NextSuffixRegex.IsMatch(trimmedText) || PastSuffixRegex.IsMatch(trimmedText))); } @@ -327,7 +353,7 @@ public bool IsWeekOnly(string text) public bool IsYearOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (DateTimeDefinitions.YearTerms.Any(o => trimmedText.Contains(o)) && (AfterNextSuffixRegex.IsMatch(trimmedText) || ThisPrefixRegex.IsMatch(trimmedText) || NextSuffixRegex.IsMatch(trimmedText) || PastSuffixRegex.IsMatch(trimmedText))); } @@ -335,7 +361,7 @@ public bool IsYearOnly(string text) public bool IsYearToDate(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateTimeAltParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateTimeAltParserConfiguration.cs index b8ecf28a04..a03b0927aa 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateTimeAltParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateTimeAltParserConfiguration.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.DateTime.Italian +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.DateTime.Italian { public class ItalianDateTimeAltParserConfiguration : IDateTimeAltParserConfiguration { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateTimeParserConfiguration.cs index 8d0bb439dd..7ebb5d4b45 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateTimeParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -18,12 +22,14 @@ public ItalianDateTimeParserConfiguration(ICommonDateTimeParserConfiguration con TimeExtractor = config.TimeExtractor; DateParser = config.DateParser; TimeParser = config.TimeParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; NowRegex = ItalianDateTimeExtractorConfiguration.NowRegex; - AMTimeRegex = new Regex(DateTimeDefinitions.AMTimeRegex, RegexFlags); - PMTimeRegex = new Regex(DateTimeDefinitions.PMTimeRegex, RegexFlags); - NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); - PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + AMTimeRegex = new Regex(DateTimeDefinitions.AMTimeRegex, RegexFlags, RegexTimeOut); + PMTimeRegex = new Regex(DateTimeDefinitions.PMTimeRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); SimpleTimeOfTodayAfterRegex = ItalianDateTimeExtractorConfiguration.SimpleTimeOfTodayAfterRegex; SimpleTimeOfTodayBeforeRegex = ItalianDateTimeExtractorConfiguration.SimpleTimeOfTodayBeforeRegex; @@ -102,16 +108,28 @@ public ItalianDateTimeParserConfiguration(ICommonDateTimeParserConfiguration con public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + // Note: Italian typically uses 24:00 time, consider removing 12:00 am/pm public int GetHour(string text, int hour) { var trimmedText = text.Trim(); + int result = hour; - if ((trimmedText.EndsWith("mattino") || trimmedText.EndsWith("mattina")) && hour >= 12) + + // @TODO move hardcoded values to resources file + + if ((trimmedText.EndsWith("mattino", StringComparison.Ordinal) || + trimmedText.EndsWith("mattina", StringComparison.Ordinal)) && + hour >= 12) { result -= 12; } - else if (!(trimmedText.EndsWith("mattino") || trimmedText.EndsWith("mattina")) && hour < 12) + else if (!(trimmedText.EndsWith("mattino", StringComparison.Ordinal) || + trimmedText.EndsWith("mattina", StringComparison.Ordinal)) && + hour < 12) { result += 12; } @@ -122,15 +140,22 @@ public int GetHour(string text, int hour) public bool GetMatchedNowTimex(string text, out string timex) { var trimmedText = text.Trim(); - if (trimmedText.EndsWith("ora") || trimmedText.EndsWith("adesso") || trimmedText.EndsWith("in questo momento")) + + // @TODO move hardcoded values to resources file + + if (trimmedText.EndsWith("ora", StringComparison.Ordinal) || + trimmedText.EndsWith("adesso", StringComparison.Ordinal) || + trimmedText.EndsWith("in questo momento", StringComparison.Ordinal)) { timex = "PRESENT_REF"; } - else if (trimmedText.Equals("recentemente") || trimmedText.Equals("precedentemente")) + else if (trimmedText.Equals("recentemente", StringComparison.Ordinal) || + trimmedText.Equals("precedentemente", StringComparison.Ordinal)) { timex = "PAST_REF"; } - else if (trimmedText.Equals("il prima possibile") || trimmedText.Equals("asap")) + else if (trimmedText.Equals("il prima possibile", StringComparison.Ordinal) || + trimmedText.Equals("asap", StringComparison.Ordinal)) { timex = "FUTURE_REF"; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateTimePeriodParserConfiguration.cs index eaa5e94a56..89c15d7668 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDateTimePeriodParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; @@ -8,22 +11,22 @@ namespace Microsoft.Recognizers.Text.DateTime.Italian public class ItalianDateTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimePeriodParserConfiguration { public static readonly Regex MorningStartEndRegex = - new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfternoonStartEndRegex = - new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex EveningStartEndRegex = - new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex NightStartEndRegex = - new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex PastSuffixRegex = - new Regex(DateTimeDefinitions.PastSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PastSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextSuffixRegex = - new Regex(DateTimeDefinitions.NextSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextSuffixRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -31,6 +34,8 @@ public ItalianDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati : base(config) { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; + DateExtractor = config.DateExtractor; TimeExtractor = config.TimeExtractor; DateTimeExtractor = config.DateTimeExtractor; @@ -44,8 +49,11 @@ public ItalianDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati DurationParser = config.DurationParser; DateTimeParser = config.DateTimeParser; TimeZoneParser = config.TimeZoneParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; PureNumberFromToRegex = ItalianTimePeriodExtractorConfiguration.PureNumFromTo; + HyphenDateRegex = ItalianDateTimePeriodExtractorConfiguration.HyphenDateRegex; PureNumberBetweenAndRegex = ItalianTimePeriodExtractorConfiguration.PureNumBetweenAnd; SpecificTimeOfDayRegex = ItalianDateTimeExtractorConfiguration.SpecificTimeOfDayRegex; TimeOfDayRegex = ItalianDateTimeExtractorConfiguration.TimeOfDayRegex; @@ -70,6 +78,8 @@ public ItalianDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati public string TokenBeforeDate { get; } + public string TokenBeforeTime { get; } + public IDateExtractor DateExtractor { get; } public IDateTimeExtractor TimeExtractor { get; } @@ -98,6 +108,8 @@ public ItalianDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati public Regex PureNumberFromToRegex { get; } + public Regex HyphenDateRegex { get; } + public Regex PureNumberBetweenAndRegex { get; } public Regex SpecificTimeOfDayRegex { get; } @@ -134,47 +146,52 @@ public ItalianDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati bool IDateTimePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + Regex IDateTimePeriodParserConfiguration.TasksmodeMealTimeofDayRegex => null; + public IImmutableDictionary UnitMap { get; } public IImmutableDictionary Numbers { get; } - public bool GetMatchedTimeRange(string text, out string timeStr, out int beginHour, out int endHour, out int endMin) + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + + public bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMin) { var trimmedText = text.Trim(); + beginHour = 0; endHour = 0; endMin = 0; + if (MorningStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TMO"; - beginHour = 8; - endHour = 12; + todSymbol = Constants.Morning; } else if (AfternoonStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TAF"; - beginHour = 12; - endHour = 16; + todSymbol = Constants.Afternoon; } else if (EveningStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TEV"; - beginHour = 16; - endHour = 20; + todSymbol = Constants.Evening; } else if (NightStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TNI"; - beginHour = 20; - endHour = 23; - endMin = 59; + todSymbol = Constants.Night; } else { - timeStr = null; + todSymbol = null; return false; } + var parseResult = TimexUtility.ResolveTimeOfDay(todSymbol); + todSymbol = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMin = parseResult.EndMin; + return true; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDurationParserConfiguration.cs index 9daaa67afc..55482d1e98 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDurationParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; @@ -7,7 +10,11 @@ namespace Microsoft.Recognizers.Text.DateTime.Italian { public class ItalianDurationParserConfiguration : BaseDateTimeOptionsConfiguration, IDurationParserConfiguration { - public static readonly Regex InexactNumberUnitRegex2 = new Regex(DateTimeDefinitions.InexactNumberUnitRegex2, RegexFlags); + public static readonly Regex InexactNumberUnitRegex2 = + new Regex(DateTimeDefinitions.InexactNumberUnitRegex2, RegexFlags, RegexTimeOut); + + public static readonly Regex PrefixArticleRegex = + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -16,7 +23,9 @@ public ItalianDurationParserConfiguration(ICommonDateTimeParserConfiguration con { CardinalExtractor = config.CardinalExtractor; NumberParser = config.NumberParser; + DurationExtractor = new BaseDurationExtractor(new ItalianDurationExtractorConfiguration(this), false); + NumberCombinedWithUnit = ItalianDurationExtractorConfiguration.NumberCombinedWithDurationUnit; AnUnitRegex = ItalianDurationExtractorConfiguration.AnUnitRegex; @@ -39,7 +48,7 @@ public ItalianDurationParserConfiguration(ICommonDateTimeParserConfiguration con public IExtractor CardinalExtractor { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public IParser NumberParser { get; } @@ -47,6 +56,8 @@ public ItalianDurationParserConfiguration(ICommonDateTimeParserConfiguration con public Regex AnUnitRegex { get; } + Regex IDurationParserConfiguration.PrefixArticleRegex => PrefixArticleRegex; + public Regex DuringRegex { get; } public Regex AllDateUnitRegex { get; } @@ -67,6 +78,8 @@ public ItalianDurationParserConfiguration(ICommonDateTimeParserConfiguration con public Regex SpecialNumberUnitRegex { get; } + bool IDurationParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public IImmutableDictionary UnitMap { get; } public IImmutableDictionary UnitValueMap { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianHolidayParserConfiguration.cs index 9d2931710d..85f837ce8f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianHolidayParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianHolidayParserConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -15,9 +18,9 @@ public class ItalianHolidayParserConfiguration : BaseHolidayParserConfiguration public ItalianHolidayParserConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - ThisPrefixRegex = new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); - NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); - PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + ThisPrefixRegex = new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); this.HolidayRegexList = ItalianHolidayExtractorConfiguration.HolidayRegexList; this.HolidayNames = DateTimeDefinitions.HolidayNames.ToImmutableDictionary(); } @@ -98,7 +101,7 @@ protected override IDictionary> InitHolidayFuncs() { "newyeareve", NewYearEve }, { "fathersday", FathersDay }, { "mothersday", MothersDay }, - { "labourday", LabourDay }, + { "labourday", InternationalWorkersDay }, { "memorialday", MemorialDay }, { "ferragosto", Ferragosto }, { "liberationday", LiberationDay }, @@ -171,8 +174,6 @@ protected override IDictionary> InitHolidayFuncs() private static new DateObject MothersDay(int year) => new DateObject(year, 5, 27); - private static new DateObject LabourDay(int year) => new DateObject(year, 5, 1); - private static new DateObject MemorialDay(int year) => new DateObject(year, 1, 27); private static DateObject Ferragosto(int year) => new DateObject(year, 8, 15); @@ -181,28 +182,7 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject RepublicDay(int year) => new DateObject(year, 6, 2); - private static DateObject EasterDay(int year) => CalculateHolidayByEaster(year); - - // function adopted from German implementation - private static DateObject CalculateHolidayByEaster(int year, int days = 0) - { - int day = 0; - int month = 3; - - int g = year % 19; - int c = year / 100; - int h = (c - (int)(c / 4) - (int)(((8 * c) + 13) / 25) + (19 * g) + 15) % 30; - int i = h - ((int)(h / 28) * (1 - ((int)(h / 28) * (int)(29 / (h + 1)) * (int)((21 - g) / 11)))); + private static DateObject EasterDay(int year) => HolidayFunctions.CalculateHolidayByEaster(year); - day = i - ((year + (int)(year / 4) + i + 2 - c + (int)(c / 4)) % 7) + 28; - - if (day > 31) - { - month++; - day -= 31; - } - - return DateObject.MinValue.SafeCreateFromValue(year, month, day).AddDays(days); - } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianMergedParserConfiguration.cs index 1ffffb4a32..5f0cd00b9f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianMergedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianMergedParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Italian; using Microsoft.Recognizers.Text.Matcher; namespace Microsoft.Recognizers.Text.DateTime.Italian @@ -47,5 +51,7 @@ public ItalianMergedParserConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeParser HolidayParser { get; } public StringMatcher SuperfluousWordMatcher { get; } + + bool IMergedParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianSetParserConfiguration.cs index 523e0b4c7e..cec331a044 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianSetParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianSetParserConfiguration.cs @@ -1,11 +1,20 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Italian; +using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Italian { public class ItalianSetParserConfiguration : BaseDateTimeOptionsConfiguration, ISetParserConfiguration { + // pass FutureTerms as List to ReplaceValueInTextWithFutTerm function + private static readonly List ThisTerms = (List)DateTimeDefinitions.FutureStartTerms; + public ItalianSetParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -79,28 +88,38 @@ public ItalianSetParserConfiguration(ICommonDateTimeParserConfiguration config) public bool GetMatchedDailyTimex(string text, out string timex) { var trimmedText = text.Trim(); - if (trimmedText.Equals("quotidianamente") || trimmedText.Equals("quotidiano") || trimmedText.Equals("quotidiana") || - trimmedText.Equals("giornalmente") || trimmedText.Equals("giornaliero") || trimmedText.Equals("giornaliera")) + + // @TODO move hardcoded values to resources file + + if (trimmedText.Equals("quotidianamente", StringComparison.Ordinal) || + trimmedText.Equals("quotidiano", StringComparison.Ordinal) || + trimmedText.Equals("quotidiana", StringComparison.Ordinal) || + trimmedText.Equals("giornalmente", StringComparison.Ordinal) || + trimmedText.Equals("giornaliero", StringComparison.Ordinal) || + trimmedText.Equals("giornaliera", StringComparison.Ordinal)) { // daily timex = "P1D"; } - else if (trimmedText.Equals("settimanale") || trimmedText.Equals("settimanalmente")) + else if (trimmedText.Equals("settimanale", StringComparison.Ordinal) || + trimmedText.Equals("settimanalmente", StringComparison.Ordinal)) { // weekly timex = "P1W"; } - else if (trimmedText.Equals("bisettimanale")) + else if (trimmedText.Equals("bisettimanale", StringComparison.Ordinal)) { // bi weekly timex = "P2W"; } - else if (trimmedText.Equals("mensile") || trimmedText.Equals("mensilmente")) + else if (trimmedText.Equals("mensile", StringComparison.Ordinal) || + trimmedText.Equals("mensilmente", StringComparison.Ordinal)) { // monthly timex = "P1M"; } - else if (trimmedText.Equals("annuale") || trimmedText.Equals("annualmente")) + else if (trimmedText.Equals("annuale", StringComparison.Ordinal) || + trimmedText.Equals("annualmente", StringComparison.Ordinal)) { // yearly/annually timex = "P1Y"; @@ -117,19 +136,28 @@ public bool GetMatchedDailyTimex(string text, out string timex) public bool GetMatchedUnitTimex(string text, out string timex) { var trimmedText = text.Trim(); - if (trimmedText.Equals("giorno") || trimmedText.Equals("giornata") || trimmedText.Equals("giorni")) + + // @TODO move hardcoded values to resources file + + if (trimmedText.Equals("giorno", StringComparison.Ordinal) || + trimmedText.Equals("giornata", StringComparison.Ordinal) || + trimmedText.Equals("giorni", StringComparison.Ordinal)) { timex = "P1D"; } - else if (trimmedText.Equals("settimana") || trimmedText.Equals("settimane")) + else if (trimmedText.Equals("settimana", StringComparison.Ordinal) || + trimmedText.Equals("settimane", StringComparison.Ordinal)) { timex = "P1W"; } - else if (trimmedText.Equals("mese") || trimmedText.Equals("mesi")) + else if (trimmedText.Equals("mese", StringComparison.Ordinal) || + trimmedText.Equals("mesi", StringComparison.Ordinal)) { timex = "P1M"; } - else if (trimmedText.Equals("anno") || trimmedText.Equals("annata") || trimmedText.Equals("anni")) + else if (trimmedText.Equals("anno", StringComparison.Ordinal) || + trimmedText.Equals("annata", StringComparison.Ordinal) || + trimmedText.Equals("anni", StringComparison.Ordinal)) { // year timex = "P1Y"; @@ -146,32 +174,35 @@ public bool GetMatchedUnitTimex(string text, out string timex) public string WeekDayGroupMatchString(Match match) { string weekday = string.Empty; - if (match.Groups["g0"].ToString() != string.Empty) + + if (match.Groups["g0"].Length != 0) { - weekday = match.Groups["g0"].ToString() + "a"; + weekday = match.Groups["g0"] + "a"; } - else if (match.Groups["g1"].ToString() != string.Empty) + else if (match.Groups["g1"].Length != 0) { - weekday = match.Groups["g1"].ToString() + "io"; + weekday = match.Groups["g1"] + "io"; } - else if (match.Groups["g2"].ToString() != string.Empty) + else if (match.Groups["g2"].Length != 0) { - weekday = match.Groups["g2"].ToString() + "e"; + weekday = match.Groups["g2"] + "e"; } - else if (match.Groups["g3"].ToString() != string.Empty) + else if (match.Groups["g3"].Length != 0) { - weekday = match.Groups["g3"].ToString() + "ì"; + weekday = match.Groups["g3"] + "ì"; } - else if (match.Groups["g4"].ToString() != string.Empty) + else if (match.Groups["g4"].Length != 0) { - weekday = match.Groups["g4"].ToString() + "a"; + weekday = match.Groups["g4"] + "a"; } - else if (match.Groups["g5"].ToString() != string.Empty) + else if (match.Groups["g5"].Length != 0) { - weekday = match.Groups["g5"].ToString() + "o"; + weekday = match.Groups["g5"] + "o"; } return weekday; } + + public string ReplaceValueInTextWithFutTerm(string text, string value) => TasksModeSetHandler.ReplaceValueInTextWithFutTerm(text, value, ThisTerms); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianTimeParserConfiguration.cs index c39f2e0927..2cef97b99a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianTimeParserConfiguration.cs @@ -1,5 +1,10 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -43,16 +48,20 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha var deltaMin = 0; var trimmedPrefix = prefix.Trim(); + // @TODO move hardcoded values to resources file + // "it's half past 8" - if (trimmedPrefix.EndsWith("mezza") || trimmedPrefix.EndsWith("mezzo")) + if (trimmedPrefix.EndsWith("mezza", StringComparison.Ordinal) || + trimmedPrefix.EndsWith("mezzo", StringComparison.Ordinal)) { deltaMin = 30; } - else if (trimmedPrefix.EndsWith("un quarto") || trimmedPrefix.EndsWith("quarto")) + else if (trimmedPrefix.EndsWith("un quarto", StringComparison.Ordinal) || + trimmedPrefix.EndsWith("quarto", StringComparison.Ordinal)) { deltaMin = 15; } - else if (trimmedPrefix.EndsWith("tre quarti")) + else if (trimmedPrefix.EndsWith("tre quarti", StringComparison.Ordinal)) { deltaMin = 45; } @@ -62,7 +71,7 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha var minStr = match.Groups["deltamin"].Value; if (!string.IsNullOrWhiteSpace(minStr)) { - deltaMin = int.Parse(minStr); + deltaMin = int.Parse(minStr, CultureInfo.InvariantCulture); } else { @@ -72,7 +81,8 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha } // 'to' i.e 'one to five' - if (trimmedPrefix.StartsWith("meno")) + if (trimmedPrefix.StartsWith("meno", StringComparison.Ordinal) || + trimmedPrefix.EndsWith("alle", StringComparison.Ordinal)) { deltaMin = -deltaMin; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianTimePeriodParserConfiguration.cs index 96229ac04b..0ce74457bc 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianTimePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; @@ -54,10 +58,10 @@ public ItalianTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration c public IDateTimeUtilityConfiguration UtilityConfiguration { get; } - public bool GetMatchedTimexRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) + public bool GetMatchedTimeRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) { var trimmedText = text.Trim(); - if (trimmedText.EndsWith("s")) + if (trimmedText.EndsWith("s", StringComparison.Ordinal)) { trimmedText = trimmedText.Substring(0, trimmedText.Length - 1); } @@ -67,23 +71,24 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou endMin = 0; var timeOfDay = string.Empty; - if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o))) + + if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Morning; } - else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Afternoon; } - else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Evening; } - else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o))) + else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o, StringComparison.Ordinal))) { timeOfDay = Constants.Daytime; } - else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Night; } @@ -93,7 +98,7 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou return false; } - var parseResult = TimexUtility.ParseTimeOfDay(timeOfDay); + var parseResult = TimexUtility.ResolveTimeOfDay(timeOfDay); timex = parseResult.Timex; beginHour = parseResult.BeginHour; endHour = parseResult.EndHour; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/TimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/TimeParser.cs index e44a84c769..6d8a5c0a74 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/TimeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/TimeParser.cs @@ -1,4 +1,10 @@ -using Microsoft.Recognizers.Text.Utilities; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +using Microsoft.Recognizers.Text.Utilities; + using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Italian @@ -36,10 +42,10 @@ private DateTimeResolutionResult ParseIsh(string text, DateObject referenceTime) var hour = 12; if (!string.IsNullOrEmpty(hourStr)) { - hour = int.Parse(hourStr); + hour = int.Parse(hourStr, CultureInfo.InvariantCulture); } - ret.Timex = "T" + hour.ToString("D2"); + ret.Timex = "T" + hour.ToString("D2", CultureInfo.InvariantCulture); ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day, hour, 0, 0); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Utilities/ItalianDatetimeUtilityConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Utilities/ItalianDatetimeUtilityConfiguration.cs index 00430a8c26..af5ef48ae8 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Utilities/ItalianDatetimeUtilityConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Utilities/ItalianDatetimeUtilityConfiguration.cs @@ -1,74 +1,32 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Italian.Utilities { - public class ItalianDatetimeUtilityConfiguration : IDateTimeUtilityConfiguration + public class ItalianDatetimeUtilityConfiguration : BaseDatetimeUtilityConfiguration { - public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoRegex, RegexFlags); - - public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); - - public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); - - public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); - - public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); - - public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); - - public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); - - public static readonly Regex AmPmDescRegex = - new Regex(DateTimeDefinitions.AmPmDescRegex, RegexFlags); - - public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); - - public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); - - public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); - - public static readonly Regex CommonDatePrefixRegex = - new Regex(DateTimeDefinitions.CommonDatePrefixRegex, RegexFlags); - - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - - Regex IDateTimeUtilityConfiguration.LaterRegex => LaterRegex; - - Regex IDateTimeUtilityConfiguration.AgoRegex => AgoRegex; - - Regex IDateTimeUtilityConfiguration.InConnectorRegex => InConnectorRegex; - - Regex IDateTimeUtilityConfiguration.SinceYearSuffixRegex => SinceYearSuffixRegex; - - Regex IDateTimeUtilityConfiguration.WithinNextPrefixRegex => WithinNextPrefixRegex; - - Regex IDateTimeUtilityConfiguration.AmDescRegex => AmDescRegex; - - Regex IDateTimeUtilityConfiguration.PmDescRegex => PmDescRegex; - - Regex IDateTimeUtilityConfiguration.AmPmDescRegex => AmPmDescRegex; - - Regex IDateTimeUtilityConfiguration.RangeUnitRegex => RangeUnitRegex; - - Regex IDateTimeUtilityConfiguration.TimeUnitRegex => TimeUnitRegex; - - Regex IDateTimeUtilityConfiguration.DateUnitRegex => DateUnitRegex; - - Regex IDateTimeUtilityConfiguration.CommonDatePrefixRegex => CommonDatePrefixRegex; - - bool IDateTimeUtilityConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public ItalianDatetimeUtilityConfiguration() + : base( + DateTimeDefinitions.AgoRegex, + DateTimeDefinitions.LaterRegex, + DateTimeDefinitions.InConnectorRegex, + DateTimeDefinitions.SinceYearSuffixRegex, + DateTimeDefinitions.WithinNextPrefixRegex, + DateTimeDefinitions.AmDescRegex, + DateTimeDefinitions.PmDescRegex, + DateTimeDefinitions.AmPmDescRegex, + DateTimeDefinitions.RangeUnitRegex, + DateTimeDefinitions.TimeUnitRegex, + DateTimeDefinitions.DateUnitRegex, + DateTimeDefinitions.CommonDatePrefixRegex, + DateTimeDefinitions.RangePrefixRegex, + RegexOptions.Singleline | RegexOptions.ExplicitCapture, + DateTimeDefinitions.CheckBothBeforeAfter) + { + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDateExtractorConfiguration.cs index a74df5776b..65e11d253e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDateExtractorConfiguration.cs @@ -1,214 +1,146 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Definitions.Utilities; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Japanese; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseDateExtractorConfiguration : AbstractYearExtractor, IDateTimeExtractor + public class JapaneseDateExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKDateExtractorConfiguration { - public static readonly string ExtractorName = Constants.SYS_DATETIME_DATE; // "Date"; - - public static readonly Regex MonthRegex = new Regex(DateTimeDefinitions.MonthRegex, RegexFlags); - - public static readonly Regex DayRegex = new Regex(DateTimeDefinitions.DayRegex, RegexFlags); - public static readonly Regex DayRegexInJapanese = new Regex(DateTimeDefinitions.DateDayRegexInJapanese, RegexFlags); + public static readonly Regex WeekDayRegex = new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); - public static readonly Regex DayRegexNumInJapanese = new Regex(DateTimeDefinitions.DayRegexNumInJapanese, RegexFlags); + public static readonly Regex LunarRegex = new Regex(DateTimeDefinitions.LunarRegex, RegexFlags, RegexTimeOut); - public static readonly Regex MonthNumRegex = new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + public static readonly Regex ThisRegex = new Regex(DateTimeDefinitions.DateThisRegex, RegexFlags, RegexTimeOut); - public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DateLastRegex, RegexFlags, RegexTimeOut); - public static readonly Regex RelativeRegex = new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags); + public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DateNextRegex, RegexFlags, RegexTimeOut); - public static readonly Regex ZeroToNineIntegerRegexJap = new Regex(DateTimeDefinitions.ZeroToNineIntegerRegexJap, RegexFlags); + public static readonly Regex SpecialDayRegex = new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut); - public static readonly Regex YearInJapaneseRegex = new Regex(DateTimeDefinitions.DateYearInJapaneseRegex, RegexFlags); + public static readonly Regex WeekDayOfMonthRegex = new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut); - public static readonly Regex WeekDayRegex = new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + public static readonly Regex SpecialDate = new Regex(DateTimeDefinitions.SpecialDate, RegexFlags, RegexTimeOut); - public static readonly Regex LunarRegex = new Regex(DateTimeDefinitions.LunarRegex, RegexFlags); + public static readonly Regex SpecialDayWithNumRegex = new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags, RegexTimeOut); - public static readonly Regex ThisRegex = new Regex(DateTimeDefinitions.DateThisRegex, RegexFlags); + public static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); - public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DateLastRegex, RegexFlags); + public static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); - public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DateNextRegex, RegexFlags); + public static readonly Regex WeekDayStartEnd = new Regex(DateTimeDefinitions.WeekDayStartEnd, RegexFlags, RegexTimeOut); - public static readonly Regex SpecialDayRegex = new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags); + public static readonly Regex DateTimePeriodUnitRegex = new Regex(DateTimeDefinitions.DateTimePeriodUnitRegex, RegexFlags, RegexTimeOut); - public static readonly Regex SpecialMonthRegex = new Regex(DateTimeDefinitions.SpecialMonthRegex, RegexFlags); + public static readonly Regex RangeConnectorSymbolRegex = new Regex(DateTimeDefinitions.DatePeriodTillRegex, RegexFlags, RegexTimeOut); - public static readonly Regex SpecialYearRegex = new Regex(DateTimeDefinitions.SpecialYearRegex, RegexFlags); + public static readonly Regex MonthRegex = new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DayRegex = new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DayRegexInCJK = new Regex(DateTimeDefinitions.DateDayRegexInCJK, RegexFlags, RegexTimeOut); + public static readonly Regex DayRegexNumInCJK = new Regex(DateTimeDefinitions.DayRegexNumInCJK, RegexFlags, RegexTimeOut); + public static readonly Regex MonthNumRegex = new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); + public static readonly Regex WeekDayAndDayRegex = new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DurationRelativeDurationUnitRegex = new Regex(DateTimeDefinitions.DurationRelativeDurationUnitRegex, RegexFlags, RegexTimeOut); + public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + public static readonly Regex RelativeRegex = new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); + public static readonly Regex RelativeMonthRegex = new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex ZeroToNineIntegerRegexCJK = new Regex(DateTimeDefinitions.ZeroToNineIntegerRegexCJK, RegexFlags, RegexTimeOut); + public static readonly Regex YearInCJKRegex = new Regex(DateTimeDefinitions.DateYearInCJKRegex, RegexFlags, RegexTimeOut); + public static readonly Regex ThisRe = new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastRe = new Regex(DateTimeDefinitions.LastPrefixRegex, RegexFlags, RegexTimeOut); + public static readonly Regex NextRe = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DynastyYearRegex = new Regex(DateTimeDefinitions.DynastyYearRegex, RegexFlags, RegexTimeOut); + public static readonly string DynastyStartYear = DateTimeDefinitions.DynastyStartYear; + public static readonly ImmutableDictionary DynastyYearMap = DateTimeDefinitions.DynastyYearMap.ToImmutableDictionary(); - public static readonly Regex WeekDayOfMonthRegex = new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags); - - public static readonly Regex ThisRe = new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); - - public static readonly Regex LastRe = new Regex(DateTimeDefinitions.LastPrefixRegex, RegexFlags); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public static readonly Regex NextRe = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + public JapaneseDateExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + var durationConfig = new BaseDateTimeOptionsConfiguration(config.Culture, DateTimeOptions.None); - public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + DurationExtractor = new BaseCJKDurationExtractor(new JapaneseDurationExtractorConfiguration(durationConfig)); - public static readonly Regex SpecialDate = new Regex(DateTimeDefinitions.SpecialDate, RegexFlags); + ImplicitDateList = new List + { + SpecialDayWithNumRegex, SpecialDayRegex, ThisRegex, LastRegex, NextRegex, + WeekDayRegex, WeekDayOfMonthRegex, SpecialDate, WeekDayAndDayRegex, + }; - public static readonly Regex[] DateRegexList = - { // 2016年12月1日 - new Regex(DateTimeDefinitions.DateRegexList1, RegexFlags), - - // 2015/12/23 - new Regex(DateTimeDefinitions.DateRegexList10, RegexFlags), + var dateRegex1 = new Regex(DateTimeDefinitions.DateRegexList1, RegexFlags, RegexTimeOut); - // # 2016年12月 - new Regex(DateTimeDefinitions.DateRegexList2, RegexFlags), - - // 12月1日 - new Regex(DateTimeDefinitions.DateRegexList9, RegexFlags), + // 金曜日 6月 15日 + var dateRegex2 = new Regex(DateTimeDefinitions.DateRegexList2, RegexFlags, RegexTimeOut); // (2015年)?(农历)?十月二十(星期三)? - new Regex(DateTimeDefinitions.DateRegexList3, RegexFlags), + var dateRegex3 = new Regex(DateTimeDefinitions.DateRegexList3, RegexFlags, RegexTimeOut); - // 7/23 - new Regex(DateTimeDefinitions.DateRegexList4, RegexFlags), + // 2015-12-23 + var dateRegex8 = new Regex(DateTimeDefinitions.DateRegexList8, RegexFlags, RegexTimeOut); // 23/7 - new Regex(DateTimeDefinitions.DateRegexList5, RegexFlags), - - DateTimeDefinitions.DefaultLanguageFallback == Constants.DefaultLanguageFallback_DMY ? - - // 23-3-2015 - new Regex(DateTimeDefinitions.DateRegexList7, RegexFlags) : + var dateRegex5 = new Regex(DateTimeDefinitions.DateRegexList5, RegexFlags, RegexTimeOut); - // 3-23-2017 - new Regex(DateTimeDefinitions.DateRegexList6, RegexFlags), - - DateTimeDefinitions.DefaultLanguageFallback == Constants.DefaultLanguageFallback_DMY ? + // 7/23 + var dateRegex4 = new Regex(DateTimeDefinitions.DateRegexList4, RegexFlags, RegexTimeOut); - // 3-23-2017 - new Regex(DateTimeDefinitions.DateRegexList6, RegexFlags) : + // 23-3-2017 + var dateRegex7 = new Regex(DateTimeDefinitions.DateRegexList7, RegexFlags, RegexTimeOut); - // 23-3-2015 - new Regex(DateTimeDefinitions.DateRegexList7, RegexFlags), + // 3-23-2015 + var dateRegex6 = new Regex(DateTimeDefinitions.DateRegexList6, RegexFlags, RegexTimeOut); - // 2015-12-23 - new Regex(DateTimeDefinitions.DateRegexList8, RegexFlags), - - // 2016/12 - new Regex(DateTimeDefinitions.DateRegexList11, RegexFlags), - }; + // 12月1日 + var dateRegex9 = new Regex(DateTimeDefinitions.DateRegexList9, RegexFlags, RegexTimeOut); - public static readonly Regex[] ImplicitDateList = - { - LunarRegex, SpecialDayRegex, ThisRegex, LastRegex, NextRegex, - WeekDayRegex, WeekDayOfMonthRegex, SpecialMonthRegex, SpecialYearRegex, SpecialDate, - }; + // 2015/12/23 + var dateRegex10 = new Regex(DateTimeDefinitions.DateRegexList10, RegexFlags, RegexTimeOut); - public static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + // 2016/12 (this is not a Date) + // var dateRegex11 = new Regex(DateTimeDefinitions.DateRegexList11, RegexFlags, RegexTimeOut); - public static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + // Regex precedence where the order between D and M varies is controlled by DefaultLanguageFallback + var enableDmy = DateTimeDefinitions.DefaultLanguageFallback == Constants.DefaultLanguageFallback_DMY; - public static readonly Regex DateTimePeriodUnitRegex = new Regex(DateTimeDefinitions.DateTimePeriodUnitRegex, RegexFlags); + DateRegexList = new List { dateRegex1, dateRegex10, dateRegex2, dateRegex9, dateRegex3, dateRegex4, dateRegex5 }; + DateRegexList = DateRegexList.Concat( + enableDmy ? + new[] { dateRegex7, dateRegex6, dateRegex8/*, dateRegex11*/ } : + new[] { dateRegex6, dateRegex7, dateRegex8/*, dateRegex11*/ }); - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + } - private static readonly JapaneseDurationExtractorConfiguration DurationExtractor = new JapaneseDurationExtractorConfiguration(); + public IEnumerable DateRegexList { get; } - public JapaneseDateExtractorConfiguration(IDateExtractorConfiguration config = null) - : base(config) - { - } + public IEnumerable ImplicitDateList { get; } - public static List ExtractRaw(string text) - { - var tokens = new List(); - tokens.AddRange(BasicRegexMatch(text)); - tokens.AddRange(ImplicitDate(text)); + public IDateTimeExtractor DurationExtractor { get; } - return tokens; - } + Regex ICJKDateExtractorConfiguration.DateTimePeriodUnitRegex => DateTimePeriodUnitRegex; - public override List Extract(string text) - { - return Extract(text, DateObject.Now); - } + Regex ICJKDateExtractorConfiguration.BeforeRegex => BeforeRegex; - public override List Extract(string text, DateObject referenceTime) - { - var tokens = new List(); - tokens.AddRange(BasicRegexMatch(text)); - tokens.AddRange(ImplicitDate(text)); - tokens.AddRange(DurationWithBeforeAndAfter(text, referenceTime)); + Regex ICJKDateExtractorConfiguration.AfterRegex => AfterRegex; - return Token.MergeAllTokens(tokens, text, ExtractorName); - } + Regex ICJKDateExtractorConfiguration.WeekDayStartEnd => WeekDayStartEnd; - // Match basic patterns in DateRegexList - private static List BasicRegexMatch(string text) - { - var ret = new List(); - foreach (var regex in DateRegexList) - { - var matches = regex.Matches(text); - foreach (Match match in matches) - { - ret.Add(new Token(match.Index, match.Index + match.Length)); - } - } - - return ret; - } + Regex ICJKDateExtractorConfiguration.RangeConnectorSymbolRegex => RangeConnectorSymbolRegex; - // Match several other implicit cases - private static List ImplicitDate(string text) - { - var ret = new List(); - foreach (var regex in ImplicitDateList) - { - var matches = regex.Matches(text); - foreach (Match match in matches) - { - ret.Add(new Token(match.Index, match.Index + match.Length)); - } - } - - return ret; - } + public Dictionary AmbiguityDateFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityDateFiltersDict); - // Process case like "三天前" "两个月前" - private List DurationWithBeforeAndAfter(string text, DateObject referenceTime) - { - var ret = new List(); - var durationEr = DurationExtractor.Extract(text, referenceTime); - foreach (var er in durationEr) - { - // Only handles date durations here - // Cases with dateTime durations will be handled in DateTime Extractor - if (DateTimePeriodUnitRegex.Match(er.Text).Success) - { - continue; - } - - var pos = (int)er.Start + (int)er.Length; - if (pos < text.Length) - { - var suffix = text.Substring(pos); - var beforeMatch = BeforeRegex.Match(suffix); - var afterMatch = AfterRegex.Match(suffix); - - if ((beforeMatch.Success && suffix.StartsWith(beforeMatch.Value)) || - (afterMatch.Success && suffix.StartsWith(afterMatch.Value))) - { - var metadata = new Metadata() { IsDurationWithBeforeAndAfter = true }; - ret.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + 1, metadata)); - } - } - } - - return ret; - } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDatePeriodExtractorConfiguration.cs index 4e3f3626e0..4b678a8689 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDatePeriodExtractorConfiguration.cs @@ -1,247 +1,194 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Definitions.Utilities; +using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Japanese; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseDatePeriodExtractorConfiguration : IDateTimeExtractor + public class JapaneseDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKDatePeriodExtractorConfiguration { - public static readonly string ExtractorName = Constants.SYS_DATETIME_DATEPERIOD; // "DatePeriod"; + public static readonly Regex TillRegex = new Regex(DateTimeDefinitions.DatePeriodTillRegex, RegexFlags, RegexTimeOut); - public static readonly Regex TillRegex = new Regex(DateTimeDefinitions.DatePeriodTillRegex, RegexFlags); + public static readonly Regex RangePrefixRegex = new Regex(DateTimeDefinitions.DatePeriodRangePrefixRegex, RegexFlags, RegexTimeOut); - public static readonly Regex DayRegex = new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + public static readonly Regex RangeSuffixRegex = new Regex(DateTimeDefinitions.DatePeriodRangeSuffixRegex, RegexFlags, RegexTimeOut); - public static readonly Regex DayRegexForPeriod = new Regex(DateTimeDefinitions.DayRegexForPeriod, RegexFlags); + public static readonly Regex StrictYearRegex = new Regex(DateTimeDefinitions.StrictYearRegex, RegexFlags, RegexTimeOut); - public static readonly Regex DayRegexInJapanese = new Regex(DateTimeDefinitions.DatePeriodDayRegexInJapanese, RegexFlags); + public static readonly Regex YearInCJKRegex = new Regex(DateTimeDefinitions.DatePeriodYearInCJKRegex, RegexFlags, RegexTimeOut); - public static readonly Regex MonthNumRegex = new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + // for case "(より)?(2017)?12日に1月10日" + public static readonly Regex SimpleCasesRegex = new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags, RegexTimeOut); - public static readonly Regex ThisRegex = new Regex(DateTimeDefinitions.DatePeriodThisRegex, RegexFlags); + public static readonly Regex YearAndMonth = new Regex(DateTimeDefinitions.YearAndMonth, RegexFlags, RegexTimeOut); - public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DatePeriodLastRegex, RegexFlags); + // 2017.12, 2017-12, 2017/12, 12/2017 + public static readonly Regex PureNumYearAndMonth = new Regex(DateTimeDefinitions.PureNumYearAndMonth, RegexFlags, RegexTimeOut); - public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DatePeriodNextRegex, RegexFlags); + public static readonly Regex SimpleYearAndMonth = new Regex(DateTimeDefinitions.SimpleYearAndMonth, RegexFlags, RegexTimeOut); - public static readonly Regex RelativeMonthRegex = new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + public static readonly Regex OneWordPeriodRegex = new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags, RegexTimeOut); - public static readonly Regex MonthRegex = new Regex(DateTimeDefinitions.MonthRegex, RegexFlags); + public static readonly Regex WeekOfMonthRegex = new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags, RegexTimeOut); - public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + public static readonly Regex WeekOfYearRegex = new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags, RegexTimeOut); - public static readonly Regex StrictYearRegex = new Regex(DateTimeDefinitions.StrictYearRegex, RegexFlags); + public static readonly Regex WeekOfDateRegex = new Regex(DateTimeDefinitions.WeekOfDateRegex, RegexFlags, RegexTimeOut); - public static readonly Regex YearRegexInNumber = new Regex(DateTimeDefinitions.YearRegexInNumber, RegexFlags); + public static readonly Regex MonthOfDateRegex = new Regex(DateTimeDefinitions.MonthOfDateRegex, RegexFlags, RegexTimeOut); - public static readonly Regex ZeroToNineIntegerRegexJap = new Regex(DateTimeDefinitions.ZeroToNineIntegerRegexJap, RegexFlags); + public static readonly Regex WhichWeekRegex = new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags, RegexTimeOut); - public static readonly Regex YearInJapaneseRegex = new Regex(DateTimeDefinitions.DatePeriodYearInJapaneseRegex, RegexFlags); + public static readonly Regex FollowedUnit = new Regex(DateTimeDefinitions.FollowedUnit, RegexFlags, RegexTimeOut); - public static readonly Regex MonthSuffixRegex = new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags); + public static readonly Regex NumberCombinedWithUnit = new Regex(DateTimeDefinitions.NumberCombinedWithUnit, RegexFlags, RegexTimeOut); - // for case "(从)?(2017年)?一月十日到十二日" - public static readonly Regex SimpleCasesRegex = new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags); + public static readonly Regex YearToYear = new Regex(DateTimeDefinitions.YearToYear, RegexFlags, RegexTimeOut); - public static readonly Regex YearAndMonth = new Regex(DateTimeDefinitions.YearAndMonth, RegexFlags); + public static readonly Regex YearToYearSuffixRequired = new Regex(DateTimeDefinitions.YearToYearSuffixRequired, RegexFlags, RegexTimeOut); - public static readonly Regex SimpleYearAndMonth = new Regex(DateTimeDefinitions.SimpleYearAndMonth, RegexFlags); + public static readonly Regex MonthToMonth = new Regex(DateTimeDefinitions.MonthToMonth, RegexFlags, RegexTimeOut); - // 2017.12, 2017-12, 2017/12, 12/2017 - public static readonly Regex PureNumYearAndMonth = new Regex(DateTimeDefinitions.PureNumYearAndMonth, RegexFlags); + public static readonly Regex MonthToMonthSuffixRequired = new Regex(DateTimeDefinitions.MonthToMonthSuffixRequired, RegexFlags, RegexTimeOut); - public static readonly Regex OneWordPeriodRegex = new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags); + public static readonly Regex DayToDay = new Regex(DateTimeDefinitions.DayToDay, RegexFlags, RegexTimeOut); - public static readonly Regex WeekOfMonthRegex = new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags); + public static readonly Regex DayRegexForPeriod = new Regex(DateTimeDefinitions.DayRegexForPeriod, RegexFlags, RegexTimeOut); - public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.UnitRegex, RegexFlags); + public static readonly Regex MonthDayRange = new Regex(DateTimeDefinitions.MonthDayRange, RegexFlags, RegexTimeOut); - public static readonly Regex FollowedUnit = new Regex(DateTimeDefinitions.FollowedUnit, RegexFlags); + public static readonly Regex YearMonthRange = new Regex(DateTimeDefinitions.YearMonthRange, RegexFlags, RegexTimeOut); - public static readonly Regex NumberCombinedWithUnit = new Regex(DateTimeDefinitions.NumberCombinedWithUnit, RegexFlags); + public static readonly Regex YearMonthDayRange = new Regex(DateTimeDefinitions.YearMonthDayRange, RegexFlags, RegexTimeOut); - public static readonly Regex YearToYear = new Regex(DateTimeDefinitions.YearToYear, RegexFlags); + public static readonly Regex PastRegex = new Regex(DateTimeDefinitions.PastRegex, RegexFlags, RegexTimeOut); - public static readonly Regex MonthToMonth = new Regex(DateTimeDefinitions.MonthToMonth, RegexFlags); + public static readonly Regex FutureRegex = new Regex(DateTimeDefinitions.FutureRegex, RegexFlags, RegexTimeOut); - public static readonly Regex DayToDay = new Regex(DateTimeDefinitions.DayToDay, RegexFlags); + public static readonly Regex WeekWithWeekDayRangeRegex = new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags, RegexTimeOut); - public static readonly Regex MonthDayRange = new Regex(DateTimeDefinitions.MonthDayRange, RegexFlags); + public static readonly Regex FirstLastOfYearRegex = new Regex(DateTimeDefinitions.FirstLastOfYearRegex, RegexFlags, RegexTimeOut); - public static readonly Regex YearMonthRange = new Regex(DateTimeDefinitions.YearMonthRange, RegexFlags); + public static readonly Regex SeasonWithYear = new Regex(DateTimeDefinitions.SeasonWithYear, RegexFlags, RegexTimeOut); - public static readonly Regex YearMonthDayRange = new Regex(DateTimeDefinitions.YearMonthDayRange, RegexFlags); + public static readonly Regex QuarterRegex = new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags, RegexTimeOut); - public static readonly Regex PastRegex = new Regex(DateTimeDefinitions.PastRegex, RegexFlags); + public static readonly Regex DecadeRegex = new Regex(DateTimeDefinitions.DecadeRegex, RegexFlags, RegexTimeOut); - public static readonly Regex FutureRegex = new Regex(DateTimeDefinitions.FutureRegex, RegexFlags); + public static readonly Regex CenturyRegex = new Regex(DateTimeDefinitions.CenturyRegex, RegexFlags, RegexTimeOut); - public static readonly Regex SeasonRegex = new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags); + public static readonly Regex SpecialMonthRegex = new Regex(DateTimeDefinitions.SpecialMonthRegex, RegexFlags, RegexTimeOut); - public static readonly Regex SeasonWithYear = new Regex(DateTimeDefinitions.SeasonWithYear, RegexFlags); + public static readonly Regex SpecialYearRegex = new Regex(DateTimeDefinitions.SpecialYearRegex, RegexFlags, RegexTimeOut); - public static readonly Regex QuarterRegex = new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags); - - public static readonly Regex DecadeRegex = new Regex(DateTimeDefinitions.DecadeRegex, RegexFlags); + public static readonly Regex DayRegex = new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DayRegexInCJK = new Regex(DateTimeDefinitions.DatePeriodDayRegexInCJK, RegexFlags, RegexTimeOut); + public static readonly Regex MonthNumRegex = new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); + public static readonly Regex ThisRegex = new Regex(DateTimeDefinitions.DatePeriodThisRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DateUnitRegex = new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DatePeriodLastRegex, RegexFlags, RegexTimeOut); + public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DatePeriodNextRegex, RegexFlags, RegexTimeOut); + public static readonly Regex RelativeMonthRegex = new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LaterEarlyPeriodRegex = new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DatePointWithAgoAndLater = new Regex(DateTimeDefinitions.DatePointWithAgoAndLater, RegexFlags, RegexTimeOut); + public static readonly Regex ReferenceDatePeriodRegex = new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags, RegexTimeOut); + public static readonly Regex ComplexDatePeriodRegex = new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags, RegexTimeOut); + public static readonly Regex MonthRegex = new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + public static readonly Regex YearRegexInNumber = new Regex(DateTimeDefinitions.YearRegexInNumber, RegexFlags, RegexTimeOut); + public static readonly Regex ZeroToNineIntegerRegexCJK = new Regex(DateTimeDefinitions.ZeroToNineIntegerRegexCJK, RegexFlags, RegexTimeOut); + public static readonly Regex MonthSuffixRegex = new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags, RegexTimeOut); + public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.UnitRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DurationUnitRegex = new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); + public static readonly Regex SeasonRegex = new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly JapaneseDateExtractorConfiguration DatePointExtractor = new JapaneseDateExtractorConfiguration(); - - private static readonly IntegerExtractor IntegerExtractor = new IntegerExtractor(); - private static readonly Regex[] SimpleCasesRegexes = { SimpleCasesRegex, OneWordPeriodRegex, StrictYearRegex, YearToYear, + YearToYearSuffixRequired, MonthToMonth, DayToDay, - MonthDayRange, YearMonthRange, MonthDayRange, YearMonthDayRange, + MonthToMonthSuffixRequired, + YearAndMonth, PureNumYearAndMonth, - YearInJapaneseRegex, + YearInCJKRegex, + SpecialMonthRegex, + SpecialYearRegex, WeekOfMonthRegex, + WeekOfYearRegex, + WeekOfDateRegex, + MonthOfDateRegex, + WhichWeekRegex, + LaterEarlyPeriodRegex, SeasonWithYear, QuarterRegex, DecadeRegex, + CenturyRegex, + ReferenceDatePeriodRegex, + DatePointWithAgoAndLater, }; - public List Extract(string text) + public JapaneseDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) { - return Extract(text, DateObject.Now); - } + DatePointExtractor = new BaseCJKDateExtractor(new JapaneseDateExtractorConfiguration(this)); + DurationExtractor = new BaseCJKDurationExtractor(new JapaneseDurationExtractorConfiguration(this)); - public List Extract(string text, DateObject referenceTime) - { - var tokens = new List(); - tokens.AddRange(MatchSimpleCases(text)); - tokens.AddRange(MergeTwoTimePoints(text, referenceTime)); - tokens.AddRange(MatchNumberWithUnit(text)); - - return Token.MergeAllTokens(tokens, text, ExtractorName); - } - - // match pattern in simple case - private static List MatchSimpleCases(string text) - { - var ret = new List(); - foreach (var regex in SimpleCasesRegexes) + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) { - var matches = regex.Matches(text); - foreach (Match match in matches) - { - ret.Add(new Token(match.Index, match.Index + match.Length)); - } + numOptions = NumberOptions.NoProtoCache; } - return ret; + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = new IntegerExtractor(numConfig); } - // merge two date - private static List MergeTwoTimePoints(string text, DateObject referenceTime) - { - var ret = new List(); - var er = DatePointExtractor.Extract(text, referenceTime); - if (er.Count <= 1) - { - return ret; - } + public IDateTimeExtractor DatePointExtractor { get; } - // merge '{TimePoint} 到 {TimePoint}' - var idx = 0; - while (idx < er.Count - 1) - { - var middleBegin = er[idx].Start + er[idx].Length ?? 0; - var middleEnd = er[idx + 1].Start ?? 0; - if (middleBegin >= middleEnd) - { - idx++; - continue; - } - - var middleStr = text.Substring(middleBegin, middleEnd - middleBegin); - - if (TillRegex.IsExactMatch(middleStr, trim: true)) - { - var periodBegin = er[idx].Start ?? 0; - var periodEnd = (er[idx + 1].Start ?? 0) + (er[idx + 1].Length ?? 0); - - // handle "从" - var beforeStr = text.Substring(0, periodBegin); - if (beforeStr.Trim().EndsWith("从")) - { - periodBegin = beforeStr.LastIndexOf("从", StringComparison.Ordinal); - } - - ret.Add(new Token(periodBegin, periodEnd)); - idx += 2; - continue; - } - - idx++; - } + public IDateTimeExtractor DurationExtractor { get; } - return ret; - } + public IExtractor IntegerExtractor { get; } - // extract case like "前两年" "前三个月" - private static List MatchNumberWithUnit(string text) - { - var ret = new List(); + IEnumerable ICJKDatePeriodExtractorConfiguration.SimpleCasesRegexes => SimpleCasesRegexes; - var durations = new List(); - var ers = IntegerExtractor.Extract(text); - foreach (var er in ers) - { - var afterStr = text.Substring(er.Start + er.Length ?? 0); - var match = FollowedUnit.MatchBegin(afterStr, trim: true); + Regex ICJKDatePeriodExtractorConfiguration.TillRegex => TillRegex; - if (match.Success) - { - durations.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + match.Length)); - } - } + Regex ICJKDatePeriodExtractorConfiguration.FutureRegex => FutureRegex; - if (NumberCombinedWithUnit.IsMatch(text)) - { - var matches = NumberCombinedWithUnit.Matches(text); - foreach (Match match in matches) - { - durations.Add(new Token(match.Index, match.Index + match.Length)); - } - } + Regex ICJKDatePeriodExtractorConfiguration.PastRegex => PastRegex; - foreach (var duration in durations) - { - var beforeStr = text.Substring(0, duration.Start); - if (string.IsNullOrWhiteSpace(beforeStr)) - { - continue; - } - - var match = PastRegex.Match(beforeStr); - if (match.Success && string.IsNullOrWhiteSpace(beforeStr.Substring(match.Index + match.Length))) - { - ret.Add(new Token(match.Index, duration.End)); - continue; - } - - match = FutureRegex.Match(beforeStr); - if (match.Success && string.IsNullOrWhiteSpace(beforeStr.Substring(match.Index + match.Length))) - { - ret.Add(new Token(match.Index, duration.End)); - } - } + Regex ICJKDatePeriodExtractorConfiguration.DateUnitRegex => DateUnitRegex; - return ret; - } + Regex ICJKDatePeriodExtractorConfiguration.FirstLastOfYearRegex => FirstLastOfYearRegex; + + Regex ICJKDatePeriodExtractorConfiguration.UnitRegex => UnitRegex; + + Regex ICJKDatePeriodExtractorConfiguration.NumberCombinedWithUnit => NumberCombinedWithUnit; + + Regex ICJKDatePeriodExtractorConfiguration.FollowedUnit => FollowedUnit; + + Regex ICJKDatePeriodExtractorConfiguration.RangePrefixRegex => RangePrefixRegex; + + Regex ICJKDatePeriodExtractorConfiguration.RangeSuffixRegex => RangeSuffixRegex; + + public Dictionary AmbiguityFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityDatePeriodFiltersDict); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDateTimeExtractorConfiguration.cs index 16a0633f5b..8cd40518ef 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDateTimeExtractorConfiguration.cs @@ -1,189 +1,77 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseDateTimeExtractorConfiguration : IDateTimeExtractor + public class JapaneseDateTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKDateTimeExtractorConfiguration { public static readonly string ExtractorName = Constants.SYS_DATETIME_DATETIME; // "DateTime"; - public static readonly Regex PrepositionRegex = new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + public static readonly Regex PrepositionRegex = new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); - public static readonly Regex NowRegex = new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + public static readonly Regex NowRegex = new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); - public static readonly Regex NightRegex = new Regex(DateTimeDefinitions.NightRegex, RegexFlags); + public static readonly Regex NightRegex = new Regex(DateTimeDefinitions.NightRegex, RegexFlags, RegexTimeOut); - public static readonly Regex TimeOfTodayRegex = new Regex(DateTimeDefinitions.TimeOfTodayRegex, RegexFlags); + public static readonly Regex TimeOfSpecialDayRegex = new Regex(DateTimeDefinitions.TimeOfSpecialDayRegex, RegexFlags, RegexTimeOut); - public static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + public static readonly Regex TimeOfDayRegex = new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); - public static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + public static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); - public static readonly Regex DateTimePeriodUnitRegex = new Regex(DateTimeDefinitions.DateTimePeriodUnitRegex, RegexFlags); + public static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public static readonly Regex DateTimePeriodUnitRegex = new Regex(DateTimeDefinitions.DateTimePeriodUnitRegex, RegexFlags, RegexTimeOut); - private static readonly JapaneseDateExtractorConfiguration DatePointExtractor = new JapaneseDateExtractorConfiguration(); + public static readonly Regex DurationRelativeDurationUnitRegex = new Regex(DateTimeDefinitions.DurationRelativeDurationUnitRegex, RegexFlags, RegexTimeOut); - private static readonly JapaneseTimeExtractorConfiguration TimePointExtractor = new JapaneseTimeExtractorConfiguration(); + public static readonly Regex AgoLaterRegex = new Regex(DateTimeDefinitions.AgoLaterRegex, RegexFlags, RegexTimeOut); - private static readonly JapaneseDurationExtractorConfiguration DurationExtractor = new JapaneseDurationExtractorConfiguration(); + public static readonly Regex ConnectorRegex = new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); - // Match now - public static List BasicRegexMatch(string text) - { - var ret = new List(); - text = text.Trim(); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - // handle "now" - var matches = NowRegex.Matches(text); - foreach (Match match in matches) - { - ret.Add(new Token(match.Index, match.Index + match.Length)); - } + public JapaneseDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { - return ret; + DatePointExtractor = new BaseCJKDateExtractor(new JapaneseDateExtractorConfiguration(this)); + TimePointExtractor = new BaseCJKTimeExtractor(new JapaneseTimeExtractorConfiguration(this)); + DurationExtractor = new BaseCJKDurationExtractor(new JapaneseDurationExtractorConfiguration(this)); } - // Merge a Date entity and a Time entity, like "明天早上七点" - public static List MergeDateAndTime(string text, DateObject referenceTime) - { - var ret = new List(); - var ers = DatePointExtractor.Extract(text, referenceTime); - if (ers.Count == 0) - { - return ret; - } - - ers.AddRange(TimePointExtractor.Extract(text, referenceTime)); - if (ers.Count < 2) - { - return ret; - } - - ers = ers.OrderBy(o => o.Start).ToList(); - - var i = 0; - while (i < ers.Count - 1) - { - var j = i + 1; - while (j < ers.Count && ers[i].IsOverlap(ers[j])) - { - j++; - } - - if (j >= ers.Count) - { - break; - } - - if (ers[i].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && - ers[j].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) - { - var middleBegin = ers[i].Start + ers[i].Length ?? 0; - var middleEnd = ers[j].Start ?? 0; - if (middleBegin > middleEnd) - { - break; - } - - var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); - if (string.IsNullOrEmpty(middleStr) || middleStr.Equals(",") || PrepositionRegex.IsMatch(middleStr)) - { - var begin = ers[i].Start ?? 0; - var end = (ers[j].Start ?? 0) + (ers[j].Length ?? 0); - ret.Add(new Token(begin, end)); - } - - i = j + 1; - continue; - } - - i = j; - } - - return ret; - } + public IDateTimeExtractor DatePointExtractor { get; } - // Parse a specific time of today, tonight, this afternoon, "今天下午七点" - public static List TimeOfToday(string text, DateObject referenceTime) - { - var ret = new List(); - var ers = TimePointExtractor.Extract(text, referenceTime); - foreach (var er in ers) - { - var beforeStr = text.Substring(0, er.Start ?? 0); - - // handle "今晚7点" - var innerMatch = NightRegex.MatchBegin(er.Text, trim: true); - - if (innerMatch.Success) - { - beforeStr = text.Substring(0, (er.Start ?? 0) + innerMatch.Length); - } - - if (string.IsNullOrEmpty(beforeStr)) - { - continue; - } - - var match = TimeOfTodayRegex.Match(beforeStr); - if (match.Success && string.IsNullOrWhiteSpace(beforeStr.Substring(match.Index + match.Length))) - { - var begin = match.Index; - var end = er.Start + er.Length ?? 0; - ret.Add(new Token(begin, end)); - } - } - - return ret; - } + public IDateTimeExtractor TimePointExtractor { get; } - public List Extract(string text) - { - return Extract(text, DateObject.Now); - } + public IDateTimeExtractor DurationExtractor { get; } - public List Extract(string text, DateObject referenceTime) - { - var tokens = new List(); - tokens.AddRange(MergeDateAndTime(text, referenceTime)); - tokens.AddRange(BasicRegexMatch(text)); - tokens.AddRange(TimeOfToday(text, referenceTime)); - tokens.AddRange(DurationWithBeforeAndAfter(text, referenceTime)); + Regex ICJKDateTimeExtractorConfiguration.NowRegex => NowRegex; - return Token.MergeAllTokens(tokens, text, ExtractorName); - } + Regex ICJKDateTimeExtractorConfiguration.PrepositionRegex => PrepositionRegex; + + Regex ICJKDateTimeExtractorConfiguration.NightRegex => NightRegex; + + Regex ICJKDateTimeExtractorConfiguration.TimeOfSpecialDayRegex => TimeOfSpecialDayRegex; + + Regex ICJKDateTimeExtractorConfiguration.TimeOfDayRegex => TimeOfDayRegex; + + Regex ICJKDateTimeExtractorConfiguration.BeforeRegex => BeforeRegex; + + Regex ICJKDateTimeExtractorConfiguration.AfterRegex => AfterRegex; + + Regex ICJKDateTimeExtractorConfiguration.ConnectorRegex => ConnectorRegex; + + public Dictionary AmbiguityDateTimeFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityDateTimeFiltersDict); - // Process case like "5分钟前" "二小时后" - private List DurationWithBeforeAndAfter(string text, DateObject referenceTime) - { - var ret = new List(); - var durationEr = DurationExtractor.Extract(text, referenceTime); - foreach (var er in durationEr) - { - var pos = (int)er.Start + (int)er.Length; - if (pos < text.Length) - { - var suffix = text.Substring(pos); - var beforeMatch = BeforeRegex.Match(suffix); - var afterMatch = AfterRegex.Match(suffix); - - if ((beforeMatch.Success && suffix.StartsWith(beforeMatch.Value)) || - (afterMatch.Success && suffix.StartsWith(afterMatch.Value))) - { - var metadata = new Metadata() { IsDurationWithBeforeAndAfter = true }; - ret.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + 1, metadata)); - } - } - } - - return ret; - } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDateTimePeriodExtractorConfiguration.cs index 6832fcd591..f721e1b8d1 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDateTimePeriodExtractorConfiguration.cs @@ -1,307 +1,167 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Config; using Microsoft.Recognizers.Text.Number.Japanese; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseDateTimePeriodExtractorConfiguration : IDateTimeExtractor + public class JapaneseDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, + ICJKDateTimePeriodExtractorConfiguration { - public static readonly string ExtractorName = Constants.SYS_DATETIME_DATETIMEPERIOD; - - public static readonly Regex TillRegex = new Regex(DateTimeDefinitions.DateTimePeriodTillRegex, RegexFlags); - public static readonly Regex PrepositionRegex = new Regex(DateTimeDefinitions.DateTimePeriodPrepositionRegex, RegexFlags); + public static readonly Regex TillRegex = new Regex(DateTimeDefinitions.DateTimePeriodTillRegex, RegexFlags, RegexTimeOut); - public static readonly Regex HourRegex = new Regex(DateTimeDefinitions.HourRegex, RegexFlags); + public static readonly Regex FromPrefixRegex = new Regex(DateTimeDefinitions.DateTimePeriodFromPrefixRegex, RegexFlags, RegexTimeOut); - public static readonly Regex HourNumRegex = new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags); + public static readonly Regex FromSuffixRegex = new Regex(DateTimeDefinitions.DateTimePeriodFromSuffixRegex, RegexFlags, RegexTimeOut); - public static readonly Regex ZhijianRegex = new Regex(DateTimeDefinitions.ZhijianRegex, RegexFlags); + public static readonly Regex ConnectorRegex = new Regex(DateTimeDefinitions.DateTimePeriodConnectorRegex, RegexFlags, RegexTimeOut); - public static readonly Regex ThisRegex = new Regex(DateTimeDefinitions.DateTimePeriodThisRegex, RegexFlags); + public static readonly Regex PrepositionRegex = new Regex(DateTimeDefinitions.DateTimePeriodPrepositionRegex, RegexFlags, RegexTimeOut); - public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DateTimePeriodLastRegex, RegexFlags); + public static readonly Regex ZhijianRegex = new Regex(DateTimeDefinitions.ZhijianRegex, RegexFlags, RegexTimeOut); - public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DateTimePeriodNextRegex, RegexFlags); + public static readonly Regex TimeOfDayRegex = new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); - public static readonly Regex TimeOfDayRegex = new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + public static readonly Regex SpecificTimeOfDayRegex = new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); - public static readonly Regex SpecificTimeOfDayRegex = new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.DateTimePeriodUnitRegex, RegexFlags, RegexTimeOut); - public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.DateTimePeriodUnitRegex, RegexFlags); + public static readonly Regex FollowedUnit = new Regex(DateTimeDefinitions.DateTimePeriodFollowedUnit, RegexFlags, RegexTimeOut); - public static readonly Regex FollowedUnit = new Regex(DateTimeDefinitions.DateTimePeriodFollowedUnit, RegexFlags); + public static readonly Regex PastRegex = new Regex(DateTimeDefinitions.PastRegex, RegexFlags, RegexTimeOut); - public static readonly Regex NumberCombinedWithUnit = new Regex(DateTimeDefinitions.DateTimePeriodNumberCombinedWithUnit, RegexFlags); + public static readonly Regex FutureRegex = new Regex(DateTimeDefinitions.FutureRegex, RegexFlags, RegexTimeOut); - public static readonly Regex PastRegex = new Regex(DateTimeDefinitions.PastRegex, RegexFlags); + public static readonly Regex WeekDayRegex = new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); - public static readonly Regex FutureRegex = new Regex(DateTimeDefinitions.FutureRegex, RegexFlags); + public static readonly Regex TimePeriodLeftRegex = new Regex(DateTimeDefinitions.TimePeriodLeftRegex, RegexFlags, RegexTimeOut); - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public static readonly Regex RelativeRegex = new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); - private static readonly JapaneseTimeExtractorConfiguration SingleTimeExtractor = new JapaneseTimeExtractorConfiguration(); + public static readonly Regex RestOfDateRegex = new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags, RegexTimeOut); - private static readonly JapaneseDateTimeExtractorConfiguration TimeWithDateExtractor = new JapaneseDateTimeExtractorConfiguration(); + public static readonly Regex AmPmDescRegex = new Regex(DateTimeDefinitions.AmPmDescRegex, RegexFlags, RegexTimeOut); - private static readonly JapaneseDateExtractorConfiguration SingleDateExtractor = new JapaneseDateExtractorConfiguration(); + public static readonly Regex BeforeAfterRegex = new Regex(DateTimeDefinitions.BeforeAfterRegex, RegexFlags, RegexTimeOut); - private static readonly CardinalExtractor CardinalExtractor = new CardinalExtractor(); + public static readonly Regex HourRegex = new Regex(DateTimeDefinitions.HourRegex, RegexFlags, RegexTimeOut); + public static readonly Regex HourNumRegex = new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags, RegexTimeOut); + public static readonly Regex ThisRegex = new Regex(DateTimeDefinitions.DateTimePeriodThisRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DateTimePeriodLastRegex, RegexFlags, RegexTimeOut); + public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DateTimePeriodNextRegex, RegexFlags, RegexTimeOut); + public static readonly Regex NumberCombinedWithUnit = new Regex(DateTimeDefinitions.DateTimePeriodNumberCombinedWithUnit, RegexFlags, RegexTimeOut); - private static readonly JapaneseTimePeriodExtractorConfiguration TimePeriodExtractor = new JapaneseTimePeriodExtractorConfiguration(); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public List Extract(string text) + public JapaneseDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) { - return Extract(text, DateObject.Now); - } + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } - public List Extract(string text, DateObject referenceTime) - { - var tokens = new List(); - tokens.AddRange(MergeDateAndTimePeriod(text, referenceTime)); - tokens.AddRange(MergeTwoTimePoints(text, referenceTime)); - tokens.AddRange(MatchNumberWithUnit(text)); - tokens.AddRange(MatchNight(text, referenceTime)); + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = new CardinalExtractor(numConfig, CJKNumberExtractorMode.ExtractAll); - return Token.MergeAllTokens(tokens, text, ExtractorName); + SingleDateExtractor = new BaseCJKDateExtractor(new JapaneseDateExtractorConfiguration(this)); + SingleTimeExtractor = new BaseCJKTimeExtractor(new JapaneseTimeExtractorConfiguration(this)); + SingleDateTimeExtractor = new BaseCJKDateTimeExtractor(new JapaneseDateTimeExtractorConfiguration(this)); + DurationExtractor = new BaseCJKDurationExtractor(new JapaneseDurationExtractorConfiguration(this)); + TimePeriodExtractor = new BaseCJKTimePeriodExtractor(new JapaneseTimePeriodExtractorConfiguration(this)); } - // merge Date and Time period - private List MergeDateAndTimePeriod(string text, DateObject referenceTime) - { - var ret = new List(); - var er1 = SingleDateExtractor.Extract(text, referenceTime); - var er2 = TimePeriodExtractor.Extract(text, referenceTime); - var timePoints = new List(); - - // handle the overlap problem - var j = 0; - for (var i = 0; i < er1.Count; i++) - { - timePoints.Add(er1[i]); - while (j < er2.Count && er2[j].Start + er2[j].Length <= er1[i].Start) - { - timePoints.Add(er2[j]); - j++; - } + public IExtractor CardinalExtractor { get; } - while (j < er2.Count && er2[j].IsOverlap(er1[i])) - { - j++; - } - } + public IDateTimeExtractor SingleDateExtractor { get; } - for (; j < er2.Count; j++) - { - timePoints.Add(er2[j]); - } + public IDateTimeExtractor SingleTimeExtractor { get; } - timePoints = timePoints.OrderBy(o => o.Start).ToList(); + public IDateTimeExtractor SingleDateTimeExtractor { get; } - // merge {Date} {TimePeriod} - var idx = 0; - while (idx < timePoints.Count - 1) - { - if (timePoints[idx].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && - timePoints[idx + 1].Type.Equals(Constants.SYS_DATETIME_TIMEPERIOD, StringComparison.Ordinal)) - { - var middleBegin = timePoints[idx].Start + timePoints[idx].Length ?? 0; - var middleEnd = timePoints[idx + 1].Start ?? 0; - - var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); - if (string.IsNullOrWhiteSpace(middleStr) || PrepositionRegex.IsMatch(middleStr)) - { - var periodBegin = timePoints[idx].Start ?? 0; - var periodEnd = (timePoints[idx + 1].Start ?? 0) + (timePoints[idx + 1].Length ?? 0); - ret.Add(new Token(periodBegin, periodEnd)); - idx += 2; - continue; - } - - idx++; - } + public IDateTimeExtractor DurationExtractor { get; } - idx++; - } + public IDateTimeExtractor TimePeriodExtractor { get; } - return ret; - } + Regex ICJKDateTimePeriodExtractorConfiguration.PrepositionRegex => PrepositionRegex; - private List MergeTwoTimePoints(string text, DateObject referenceTime) - { - var ret = new List(); - var er1 = TimeWithDateExtractor.Extract(text, referenceTime); - var er2 = SingleTimeExtractor.Extract(text, referenceTime); - var timePoints = new List(); - - // handle the overlap problem - var j = 0; - for (var i = 0; i < er1.Count; i++) - { - timePoints.Add(er1[i]); - while (j < er2.Count && er2[j].Start + er2[j].Length <= er1[i].Start) - { - timePoints.Add(er2[j]); - j++; - } + Regex ICJKDateTimePeriodExtractorConfiguration.TillRegex => TillRegex; - while (j < er2.Count && er2[j].IsOverlap(er1[i])) - { - j++; - } - } + Regex ICJKDateTimePeriodExtractorConfiguration.SpecificTimeOfDayRegex => SpecificTimeOfDayRegex; - for (; j < er2.Count; j++) - { - timePoints.Add(er2[j]); - } + Regex ICJKDateTimePeriodExtractorConfiguration.TimeOfDayRegex => TimeOfDayRegex; - timePoints = timePoints.OrderBy(o => o.Start).ToList(); + Regex ICJKDateTimePeriodExtractorConfiguration.FollowedUnit => FollowedUnit; - // merge "{TimePoint} to {TimePoint}", "between {TimePoint} and {TimePoint}" - var idx = 0; - while (idx < timePoints.Count - 1) - { - // if both ends are Time. then this is a TimePeriod, not a DateTimePeriod - if (timePoints[idx].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal) && - timePoints[idx + 1].Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) - { - idx++; - continue; - } + Regex ICJKDateTimePeriodExtractorConfiguration.UnitRegex => UnitRegex; - var middleBegin = timePoints[idx].Start + timePoints[idx].Length ?? 0; - var middleEnd = timePoints[idx + 1].Start ?? 0; + Regex ICJKDateTimePeriodExtractorConfiguration.PastRegex => PastRegex; - var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); + Regex ICJKDateTimePeriodExtractorConfiguration.FutureRegex => FutureRegex; - // handle "{TimePoint} to {TimePoint}" - if (TillRegex.IsExactMatch(middleStr, trim: true)) - { - var periodBegin = timePoints[idx].Start ?? 0; - var periodEnd = (timePoints[idx + 1].Start ?? 0) + (timePoints[idx + 1].Length ?? 0); - - // handle "from" - var beforeStr = text.Substring(0, periodBegin); - if (beforeStr.Trim().EndsWith("从")) - { - periodBegin = beforeStr.LastIndexOf("从", StringComparison.Ordinal); - } - - ret.Add(new Token(periodBegin, periodEnd)); - idx += 2; - continue; - } + Regex ICJKDateTimePeriodExtractorConfiguration.TimePeriodLeftRegex => TimePeriodLeftRegex; - // handle "between {TimePoint} and {TimePoint}" - if (middleStr.Equals("和") || middleStr.Equals("与") || middleStr.Equals("到")) - { - var periodBegin = timePoints[idx].Start ?? 0; - var periodEnd = (timePoints[idx + 1].Start ?? 0) + (timePoints[idx + 1].Length ?? 0); - - // handle "between" - var afterStr = text.Substring(periodEnd); - var match = ZhijianRegex.Match(afterStr); - - if (match.Success) - { - ret.Add(new Token(periodBegin, periodEnd + match.Length)); - idx += 2; - continue; - } - } + Regex ICJKDateTimePeriodExtractorConfiguration.RelativeRegex => RelativeRegex; - idx++; - } + Regex ICJKDateTimePeriodExtractorConfiguration.RestOfDateRegex => RestOfDateRegex; - return ret; - } + Regex ICJKDateTimePeriodExtractorConfiguration.AmPmDescRegex => AmPmDescRegex; + + Regex ICJKDateTimePeriodExtractorConfiguration.ThisRegex => ThisRegex; + + Regex ICJKDateTimePeriodExtractorConfiguration.BeforeAfterRegex => BeforeAfterRegex; - private List MatchNight(string text, DateObject referenceTime) + public bool GetFromTokenIndex(string text, out int index) { - var ret = new List(); - var matches = SpecificTimeOfDayRegex.Matches(text); - foreach (Match match in matches) - { - ret.Add(new Token(match.Index, match.Index + match.Length)); - } + index = -1; - // Date followed by morning, afternoon - var ers = SingleDateExtractor.Extract(text, referenceTime); - if (ers.Count == 0) + var match = FromPrefixRegex.MatchEnd(text, trim: true); + if (match.Success) { - return ret; + index = match.Index; + return true; } - - foreach (var er in ers) + else { - var afterStr = text.Substring(er.Start + er.Length ?? 0); - var match = TimeOfDayRegex.Match(afterStr); + match = FromSuffixRegex.MatchBegin(text, trim: true); if (match.Success) { - var middleStr = afterStr.Substring(0, match.Index); - if (string.IsNullOrWhiteSpace(middleStr) || PrepositionRegex.IsMatch(middleStr)) - { - ret.Add(new Token(er.Start ?? 0, er.Start + er.Length + match.Index + match.Length ?? 0)); - } + index = match.Index + match.Length; + return true; } } - return ret; + return false; } - private List MatchNumberWithUnit(string text) + public bool GetBetweenTokenIndex(string text, out int index) { - var ret = new List(); - - var durations = new List(); - var ers = CardinalExtractor.Extract(text); - - foreach (var er in ers) + index = -1; + var match = ZhijianRegex.Match(text); + if (match.Success) { - var afterStr = text.Substring(er.Start + er.Length ?? 0); - var match = FollowedUnit.MatchBegin(afterStr, trim: true); - - if (match.Success) - { - durations.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + match.Length)); - } - } - - var matches = UnitRegex.Matches(text); - foreach (Match match in matches) - { - durations.Add(new Token(match.Index, match.Index + match.Length)); + index = match.Length; + return true; } - foreach (var duration in durations) - { - var beforeStr = text.Substring(0, duration.Start); - if (string.IsNullOrWhiteSpace(beforeStr)) - { - continue; - } - - var match = PastRegex.Match(beforeStr); - if (match.Success && string.IsNullOrWhiteSpace(beforeStr.Substring(match.Index + match.Length))) - { - ret.Add(new Token(match.Index, duration.End)); - continue; - } - - match = FutureRegex.Match(beforeStr); - if (match.Success && string.IsNullOrWhiteSpace(beforeStr.Substring(match.Index + match.Length))) - { - ret.Add(new Token(match.Index, duration.End)); - } - } + return false; + } - return ret; + public bool HasConnectorToken(string text) + { + return ConnectorRegex.IsExactMatch(text, trim: true); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDurationExtractorConfiguration.cs index 82ae230ef1..3203d52ba9 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDurationExtractorConfiguration.cs @@ -1,78 +1,91 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.NumberWithUnit; using Microsoft.Recognizers.Text.NumberWithUnit.Japanese; -using Microsoft.Recognizers.Text.Utilities; -using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public enum DurationType - { - /// - /// Types of DurationType. - /// - WithNumber, - } - public class JapaneseDurationExtractorConfiguration : - JapaneseBaseDateTimeExtractorConfiguration + public class JapaneseDurationExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKDurationExtractorConfiguration { - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.DurationYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DurationUnitRegex = new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AnUnitRegex = new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DurationConnectorRegex = new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags, RegexTimeOut); - private static readonly IExtractor InternalExtractor = new NumberWithUnitExtractor(new DurationExtractorConfiguration()); + public static readonly Regex AllRegex = new Regex(DateTimeDefinitions.DurationAllRegex, RegexFlags, RegexTimeOut); - private static readonly Regex YearRegex = new Regex(DateTimeDefinitions.DurationYearRegex, RegexFlags); + public static readonly Regex HalfRegex = new Regex(DateTimeDefinitions.DurationHalfRegex, RegexFlags, RegexTimeOut); - private static readonly Regex HalfSuffixRegex = new Regex(DateTimeDefinitions.DurationHalfSuffixRegex, RegexFlags); + public static readonly Regex RelativeDurationUnitRegex = new Regex(DateTimeDefinitions.DurationRelativeDurationUnitRegex, RegexFlags, RegexTimeOut); - internal override ImmutableDictionary Regexes { get; } + public static readonly Regex DuringRegex = new Regex(DateTimeDefinitions.DurationDuringRegex, RegexFlags, RegexTimeOut); - protected sealed override string ExtractType { get; } = Constants.SYS_DATETIME_DURATION; // "Duration"; + public static readonly Regex SomeRegex = new Regex(DateTimeDefinitions.DurationSomeRegex, RegexFlags, RegexTimeOut); - // extract by number with unit - public override List Extract(string source, DateObject referenceTime) + public static readonly Regex MoreOrLessRegex = new Regex(DateTimeDefinitions.DurationMoreOrLessRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private readonly bool merge; + + public JapaneseDurationExtractorConfiguration(IDateTimeOptionsConfiguration config, bool merge = true) + : base(config) { - // Use Unit to extract - var retList = InternalExtractor.Extract(source); - var res = new List(); - foreach (var ret in retList) - { - // filter - var match = YearRegex.Match(ret.Text); - if (match.Success) - { - continue; - } - - // match suffix "半" - var suffix = source.Substring((int)(ret.Start + ret.Length)); - var beginMatch = HalfSuffixRegex.MatchBegin(suffix, trim: true); - - if (beginMatch.Success) - { - var matchString = suffix.Substring(beginMatch.Index, beginMatch.Length); - ret.Text = ret.Text + matchString; - ret.Length = ret.Length + beginMatch.Length; - } - - res.Add(ret); - } + this.merge = merge; + + InternalExtractor = new NumberWithUnitExtractor(new DurationExtractorConfiguration()); + + UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToDictionary(k => k.Key, k => k.Value); + UnitValueMap = DateTimeDefinitions.DurationUnitValueMap; + AmbiguityDurationFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityDurationFiltersDict); - return res; } + public IExtractor InternalExtractor { get; } + + public Dictionary UnitMap { get; } + + public Dictionary UnitValueMap { get; } + + public Dictionary AmbiguityDurationFiltersDict { get; } + + Regex ICJKDurationExtractorConfiguration.DurationUnitRegex => DurationUnitRegex; + + Regex ICJKDurationExtractorConfiguration.DurationConnectorRegex => DurationConnectorRegex; + + Regex ICJKDurationExtractorConfiguration.YearRegex => YearRegex; + + Regex ICJKDurationExtractorConfiguration.AllRegex => AllRegex; + + Regex ICJKDurationExtractorConfiguration.HalfRegex => HalfRegex; + + Regex ICJKDurationExtractorConfiguration.RelativeDurationUnitRegex => RelativeDurationUnitRegex; + + Regex ICJKDurationExtractorConfiguration.DuringRegex => DuringRegex; + + Regex ICJKDurationExtractorConfiguration.SomeRegex => SomeRegex; + + Regex ICJKDurationExtractorConfiguration.MoreOrLessRegex => MoreOrLessRegex; + internal class DurationExtractorConfiguration : JapaneseNumberWithUnitExtractorConfiguration { - public static readonly ImmutableDictionary DurationSuffixList = - DateTimeDefinitions.DurationSuffixList.ToImmutableDictionary(); + public static readonly ImmutableDictionary DurationSuffixList = DateTimeDefinitions.DurationSuffixList.ToImmutableDictionary(); public DurationExtractorConfiguration() - : base(new CultureInfo(Culture.Japanese)) + : base(new CultureInfo(Text.Culture.Japanese)) { } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseHolidayExtractorConfiguration.cs index 888ea7954c..97c83b149c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseHolidayExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseHolidayExtractorConfiguration.cs @@ -1,18 +1,21 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseHolidayExtractorConfiguration : BaseDateTimeOptionsConfiguration, IHolidayExtractorConfiguration + public class JapaneseHolidayExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKHolidayExtractorConfiguration { - public static readonly Regex LunarHolidayRegex = - new Regex(DateTimeDefinitions.LunarHolidayRegex, RegexFlags); + + public static readonly Regex LunarHolidayRegex = new Regex(DateTimeDefinitions.LunarHolidayRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] HolidayRegexList = { - new Regex(DateTimeDefinitions.HolidayRegexList1, RegexFlags), - new Regex(DateTimeDefinitions.HolidayRegexList2, RegexFlags), + new Regex(DateTimeDefinitions.HolidayRegexList1, RegexFlags, RegexTimeOut), + new Regex(DateTimeDefinitions.HolidayRegexList2, RegexFlags, RegexTimeOut), LunarHolidayRegex, }; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseMergedExtractorConfiguration.cs index 45844ea8a1..632a50545f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseMergedExtractorConfiguration.cs @@ -1,244 +1,89 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseMergedExtractorConfiguration : IDateTimeExtractor + public class JapaneseMergedExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKMergedExtractorConfiguration { - public static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.ParserConfigurationBefore, RegexFlags); - - public static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.ParserConfigurationAfter, RegexFlags); - - public static readonly Regex UntilRegex = new Regex(DateTimeDefinitions.ParserConfigurationUntil, RegexFlags); - - public static readonly Regex SincePrefixRegex = new Regex(DateTimeDefinitions.ParserConfigurationSincePrefix, RegexFlags); - - public static readonly Regex SinceSuffixRegex = new Regex(DateTimeDefinitions.ParserConfigurationSinceSuffix, RegexFlags); - - public static readonly Regex EqualRegex = new Regex(BaseDateTime.EqualRegex, RegexFlags); + public static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.ParserConfigurationBefore, RegexFlags, RegexTimeOut); + public static readonly Regex UnspecificDatePeriodRegex = new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags, RegexTimeOut); + public static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.ParserConfigurationAfter, RegexFlags, RegexTimeOut); + public static readonly Regex UntilRegex = new Regex(DateTimeDefinitions.ParserConfigurationUntil, RegexFlags, RegexTimeOut); + public static readonly Regex SincePrefixRegex = new Regex(DateTimeDefinitions.ParserConfigurationSincePrefix, RegexFlags, RegexTimeOut); + public static readonly Regex SinceSuffixRegex = new Regex(DateTimeDefinitions.ParserConfigurationSinceSuffix, RegexFlags, RegexTimeOut); + public static readonly Regex AroundPrefixRegex = new Regex(DateTimeDefinitions.ParserConfigurationAroundPrefix, RegexFlags, RegexTimeOut); + public static readonly Regex AroundSuffixRegex = new Regex(DateTimeDefinitions.ParserConfigurationAroundSuffix, RegexFlags, RegexTimeOut); + public static readonly Regex EqualRegex = new Regex(BaseDateTime.EqualRegex, RegexFlags, RegexTimeOut); + public static readonly Regex PotentialAmbiguousRangeRegex = new Regex(DateTimeDefinitions.FromToRegex, RegexFlags, RegexTimeOut); + public static readonly Regex AmbiguousRangeModifierPrefix = new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly JapaneseDateExtractorConfiguration DateExtractor = new JapaneseDateExtractorConfiguration(); + public JapaneseMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); + + DateExtractor = new BaseCJKDateExtractor(new JapaneseDateExtractorConfiguration(this)); + TimeExtractor = new BaseCJKTimeExtractor(new JapaneseTimeExtractorConfiguration(this)); + DateTimeExtractor = new BaseCJKDateTimeExtractor(new JapaneseDateTimeExtractorConfiguration(this)); + DatePeriodExtractor = new BaseCJKDatePeriodExtractor(new JapaneseDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseCJKTimePeriodExtractor(new JapaneseTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseCJKDateTimePeriodExtractor(new JapaneseDateTimePeriodExtractorConfiguration(this)); + DurationExtractor = new BaseCJKDurationExtractor(new JapaneseDurationExtractorConfiguration(this)); + SetExtractor = new BaseCJKSetExtractor(new JapaneseSetExtractorConfiguration(this)); + HolidayExtractor = new BaseCJKHolidayExtractor(new JapaneseHolidayExtractorConfiguration(this)); + } - private static readonly JapaneseTimeExtractorConfiguration TimeExtractor = new JapaneseTimeExtractorConfiguration(); + public IDateTimeExtractor DateExtractor { get; } - private static readonly JapaneseDateTimeExtractorConfiguration DateTimeExtractor = new JapaneseDateTimeExtractorConfiguration(); + public IDateTimeExtractor TimeExtractor { get; } - private static readonly JapaneseDatePeriodExtractorConfiguration DatePeriodExtractor = new JapaneseDatePeriodExtractorConfiguration(); + public IDateTimeExtractor DateTimeExtractor { get; } - private static readonly JapaneseTimePeriodExtractorConfiguration TimePeriodExtractor = new JapaneseTimePeriodExtractorConfiguration(); + public IDateTimeExtractor DatePeriodExtractor { get; } - private static readonly JapaneseDateTimePeriodExtractorConfiguration DateTimePeriodExtractor = new JapaneseDateTimePeriodExtractorConfiguration(); + public IDateTimeExtractor TimePeriodExtractor { get; } - private static readonly JapaneseDurationExtractorConfiguration DurationExtractor = new JapaneseDurationExtractorConfiguration(); + public IDateTimeExtractor DateTimePeriodExtractor { get; } - private static readonly JapaneseSetExtractorConfiguration SetExtractor = new JapaneseSetExtractorConfiguration(); + public IDateTimeExtractor DurationExtractor { get; } - private readonly IDateTimeOptionsConfiguration config; + public IDateTimeExtractor SetExtractor { get; } - public JapaneseMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) - { - this.config = config; + public IDateTimeExtractor HolidayExtractor { get; } - HolidayExtractor = new BaseHolidayExtractor(new JapaneseHolidayExtractorConfiguration(config)); - } + Regex ICJKMergedExtractorConfiguration.AfterRegex => AfterRegex; - private BaseHolidayExtractor HolidayExtractor { get; } + Regex ICJKMergedExtractorConfiguration.BeforeRegex => BeforeRegex; - public List Extract(string text) - { - return Extract(text, DateObject.Now); - } + Regex ICJKMergedExtractorConfiguration.UnspecificDatePeriodRegex => UnspecificDatePeriodRegex; - public List Extract(string text, DateObject referenceTime) - { - var ret = DateExtractor.Extract(text, referenceTime); + Regex ICJKMergedExtractorConfiguration.SincePrefixRegex => SincePrefixRegex; - // the order is important, since there is a problem in merging - AddTo(ret, TimeExtractor.Extract(text, referenceTime)); - AddTo(ret, DurationExtractor.Extract(text, referenceTime)); - AddTo(ret, DatePeriodExtractor.Extract(text, referenceTime)); - AddTo(ret, DateTimeExtractor.Extract(text, referenceTime)); - AddTo(ret, TimePeriodExtractor.Extract(text, referenceTime)); - AddTo(ret, DateTimePeriodExtractor.Extract(text, referenceTime)); - AddTo(ret, SetExtractor.Extract(text, referenceTime)); - AddTo(ret, HolidayExtractor.Extract(text, referenceTime)); + Regex ICJKMergedExtractorConfiguration.SinceSuffixRegex => SinceSuffixRegex; - CheckBlackList(ref ret, text); + Regex ICJKMergedExtractorConfiguration.AroundPrefixRegex => AroundPrefixRegex; - AddMod(ret, text); + Regex ICJKMergedExtractorConfiguration.AroundSuffixRegex => AroundSuffixRegex; - ret = ret.OrderBy(p => p.Start).ToList(); + Regex ICJKMergedExtractorConfiguration.UntilRegex => UntilRegex; - return ret; - } + Regex ICJKMergedExtractorConfiguration.EqualRegex => EqualRegex; - // add some negative case - private static void CheckBlackList(ref List extractResults, string text) - { - var ret = new List(); - var regex = new Regex(@"^\d{1,2}号"); - - foreach (var extractResult in extractResults) - { - var endIndex = (int)extractResult.Start + (int)extractResult.Length; - if (endIndex != text.Length) - { - var tmpChar = text.Substring(endIndex, 1); - - // for cases like "12周岁" - if (extractResult.Text.EndsWith("周") && endIndex < text.Length && tmpChar.Equals("岁")) - { - continue; - } - } - - // for cases like "12号" - if (regex.Match(extractResult.Text).Success) - { - continue; - } - - ret.Add(extractResult); - } - - extractResults = ret; - } - - private static List MoveOverlap(List dst, ExtractResult result) - { - var duplicate = new List(); - for (var i = 0; i < dst.Count; ++i) - { - if (result.Text.Contains(dst[i].Text) && - (result.Start == dst[i].Start || result.Start + result.Length == dst[i].Start + dst[i].Length)) - { - duplicate.Add(i); - } - } - - var tempDst = dst.Where((_, i) => !duplicate.Contains(i)).ToList(); - - return tempDst; - } - - private void AddMod(List ers, string text) - { - var lastEnd = 0; - foreach (var er in ers) - { - var beforeStr = text.Substring(lastEnd, er.Start ?? 0); - var afterStr = text.Substring((er.Start ?? 0) + (er.Length ?? 0)); - - var match = BeforeRegex.MatchBegin(afterStr, trim: true); - - if (match.Success) - { - var modLength = match.Index + match.Length; - er.Length += modLength; - er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); - } - - match = AfterRegex.MatchBegin(afterStr, trim: true); - - if (match.Success) - { - var modLength = match.Index + match.Length; - er.Length += modLength; - er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); - } - - match = UntilRegex.MatchEnd(beforeStr, trim: true); - - if (match.Success) - { - var modLength = beforeStr.Length - match.Index; - er.Length += modLength; - er.Start -= modLength; - er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); - } - - match = SincePrefixRegex.MatchEnd(beforeStr, trim: true); - - if (match.Success) - { - var modLength = beforeStr.Length - match.Index; - er.Length += modLength; - er.Start -= modLength; - er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); - } - - match = SinceSuffixRegex.MatchBegin(afterStr, trim: true); - - if (match.Success) - { - var modLength = match.Index + match.Length; - er.Length += modLength; - er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); - } - } - } + Regex ICJKMergedExtractorConfiguration.PotentialAmbiguousRangeRegex => PotentialAmbiguousRangeRegex; - private bool HasTokenIndexBeforeStr(string text, Regex regex, out int index) - { - index = -1; - var match = regex.Match(text); - - if (match.Success && string.IsNullOrWhiteSpace(text.Substring(match.Index + match.Length))) - { - index = match.Index; - return true; - } - - return false; - } - - private void AddTo(List dst, List src) - { - foreach (var result in src) - { - var isFound = false; - int resultMatchIndex = -1, resultMatchLength = 1; - for (var i = 0; i < dst.Count; i++) - { - if (dst[i].IsOverlap(result)) - { - isFound = true; - if (result.Length > dst[i].Length) - { - resultMatchIndex = i; - var j = i + 1; - while (j < dst.Count && dst[j].IsOverlap(result)) - { - resultMatchLength++; - j++; - } - } - - break; - } - } - - if (!isFound) - { - dst.Add(result); - } - else if (resultMatchIndex >= 0) - { - dst.RemoveRange(resultMatchIndex, resultMatchLength); - var tmpDst = MoveOverlap(dst, result); - dst.Clear(); - dst.AddRange(tmpDst); - dst.Insert(resultMatchIndex, result); - } - } - } + Regex ICJKMergedExtractorConfiguration.AmbiguousRangeModifierPrefix => AmbiguousRangeModifierPrefix; + public Dictionary AmbiguityFiltersDict { get; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseSetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseSetExtractorConfiguration.cs index 7fba44b807..ab58010e21 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseSetExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseSetExtractorConfiguration.cs @@ -1,136 +1,70 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; + using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseSetExtractorConfiguration : IDateTimeExtractor + public class JapaneseSetExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKSetExtractorConfiguration { - public static readonly string ExtractorName = Constants.SYS_DATETIME_SET; + public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.SetUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachUnitRegex = new Regex(DateTimeDefinitions.SetEachUnitRegex, RegexFlags, RegexTimeOut); - public static readonly Regex EachUnitRegex = new Regex(DateTimeDefinitions.SetEachUnitRegex, RegexFlags); + public static readonly Regex EachPrefixRegex = new Regex(DateTimeDefinitions.SetEachPrefixRegex, RegexFlags, RegexTimeOut); - public static readonly Regex EachPrefixRegex = new Regex(DateTimeDefinitions.SetEachPrefixRegex, RegexFlags); + public static readonly Regex EachSuffixRegex = new Regex(DateTimeDefinitions.SetEachSuffixRegex, RegexFlags, RegexTimeOut); - public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags); + public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags, RegexTimeOut); - public static readonly Regex EachDayRegex = new Regex(DateTimeDefinitions.SetEachDayRegex, RegexFlags); + public static readonly Regex EachDayRegex = new Regex(DateTimeDefinitions.SetEachDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachDateUnitRegex = new Regex(DateTimeDefinitions.SetEachDateUnitRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly JapaneseDurationExtractorConfiguration DurationExtractor = new JapaneseDurationExtractorConfiguration(); + public JapaneseSetExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DurationExtractor = new BaseCJKDurationExtractor(new JapaneseDurationExtractorConfiguration(this)); + TimeExtractor = new BaseCJKTimeExtractor(new JapaneseTimeExtractorConfiguration(this)); + DateExtractor = new BaseCJKDateExtractor(new JapaneseDateExtractorConfiguration(this)); + DateTimeExtractor = new BaseCJKDateTimeExtractor(new JapaneseDateTimeExtractorConfiguration(this)); + DatePeriodExtractor = new BaseCJKDatePeriodExtractor(new JapaneseDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseCJKTimePeriodExtractor(new JapaneseTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseCJKDateTimePeriodExtractor(new JapaneseDateTimePeriodExtractorConfiguration(this)); + } - private static readonly JapaneseTimeExtractorConfiguration TimeExtractor = new JapaneseTimeExtractorConfiguration(); + public IDateTimeExtractor DurationExtractor { get; } - private static readonly JapaneseDateExtractorConfiguration DateExtractor = new JapaneseDateExtractorConfiguration(); + public IDateTimeExtractor TimeExtractor { get; } - private static readonly JapaneseDateTimeExtractorConfiguration DateTimeExtractor = new JapaneseDateTimeExtractorConfiguration(); + public IDateTimeExtractor DateExtractor { get; } - public static List MatchEachDuration(string text, DateObject referenceTime) - { - var ret = new List(); - - var ers = DurationExtractor.Extract(text, referenceTime); - foreach (var er in ers) - { - // "each last summer" doesn't make sense - if (LastRegex.IsMatch(er.Text)) - { - continue; - } - - var beforeStr = text.Substring(0, er.Start ?? 0); - var match = EachPrefixRegex.Match(beforeStr); - if (match.Success) - { - ret.Add(new Token(match.Index, er.Start + er.Length ?? 0)); - } - } - - return ret; - } + public IDateTimeExtractor DateTimeExtractor { get; } - public static List MatchEachUnit(string text) - { - var ret = new List(); + public IDateTimeExtractor DatePeriodExtractor { get; } - // handle "each month" - var matches = EachUnitRegex.Matches(text); - foreach (Match match in matches) - { - ret.Add(new Token(match.Index, match.Index + match.Length)); - } + public IDateTimeExtractor TimePeriodExtractor { get; } - return ret; - } + public IDateTimeExtractor DateTimePeriodExtractor { get; } - public static List TimeEveryday(string text, DateObject referenceTime) - { - var ret = new List(); - var ers = TimeExtractor.Extract(text, referenceTime); - foreach (var er in ers) - { - var beforeStr = text.Substring(0, er.Start ?? 0); - var match = EachDayRegex.Match(beforeStr); - if (match.Success) - { - ret.Add(new Token(match.Index, match.Index + match.Length + (er.Length ?? 0))); - } - } - - return ret; - } + Regex ICJKSetExtractorConfiguration.LastRegex => LastRegex; - public static List MatchEachDate(string text, DateObject referenceTime) - { - var ret = new List(); - var ers = DateExtractor.Extract(text, referenceTime); - foreach (var er in ers) - { - var beforeStr = text.Substring(0, er.Start ?? 0); - var match = EachPrefixRegex.Match(beforeStr); - if (match.Success) - { - ret.Add(new Token(match.Index, match.Index + match.Length + (er.Length ?? 0))); - } - } - - return ret; - } + Regex ICJKSetExtractorConfiguration.EachPrefixRegex => EachPrefixRegex; - public static List MatchEachDateTime(string text, DateObject referenceTime) - { - var ret = new List(); - var ers = DateTimeExtractor.Extract(text, referenceTime); - foreach (var er in ers) - { - var beforeStr = text.Substring(0, er.Start ?? 0); - var match = EachPrefixRegex.Match(beforeStr); - if (match.Success) - { - ret.Add(new Token(match.Index, match.Index + match.Length + (er.Length ?? 0))); - } - } - - return ret; - } + Regex ICJKSetExtractorConfiguration.EachSuffixRegex => EachSuffixRegex; - public List Extract(string text) - { - return Extract(text, DateObject.Now); - } + Regex ICJKSetExtractorConfiguration.EachUnitRegex => EachUnitRegex; - public List Extract(string text, DateObject referenceTime) - { - var tokens = new List(); - tokens.AddRange(MatchEachUnit(text)); - tokens.AddRange(MatchEachDuration(text, referenceTime)); - tokens.AddRange(TimeEveryday(text, referenceTime)); - tokens.AddRange(MatchEachDate(text, referenceTime)); - tokens.AddRange(MatchEachDateTime(text, referenceTime)); - - return Token.MergeAllTokens(tokens, text, ExtractorName); - } + Regex ICJKSetExtractorConfiguration.UnitRegex => UnitRegex; + + Regex ICJKSetExtractorConfiguration.EachDayRegex => EachDayRegex; + + Regex ICJKSetExtractorConfiguration.EachDateUnitRegex => EachDateUnitRegex; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseTimeExtractorConfiguration.cs index 413f04c25c..c42b3261f3 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseTimeExtractorConfiguration.cs @@ -1,41 +1,89 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Definitions.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseTimeExtractorConfiguration : JapaneseBaseDateTimeExtractorConfiguration + public class JapaneseTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKTimeExtractorConfiguration { - // e.g: 早上九点 + public static readonly string HourNumRegex = DateTimeDefinitions.TimeHourNumRegex; + + public static readonly string MinuteNumRegex = DateTimeDefinitions.TimeMinuteNumRegex; + + public static readonly string SecondNumRegex = DateTimeDefinitions.TimeSecondNumRegex; + + public static readonly string HourCJKRegex = DateTimeDefinitions.TimeHourCJKRegex; + + public static readonly string MinuteCJKRegex = DateTimeDefinitions.TimeMinuteCJKRegex; + + public static readonly string SecondCJKRegex = DateTimeDefinitions.TimeSecondCJKRegex; + + public static readonly string ClockDescRegex = DateTimeDefinitions.TimeClockDescRegex; + + public static readonly string MinuteDescRegex = DateTimeDefinitions.TimeMinuteDescRegex; + + public static readonly string SecondDescRegex = DateTimeDefinitions.TimeSecondDescRegex; + + public static readonly string BanHourPrefixRegex = DateTimeDefinitions.TimeBanHourPrefixRegex; + + // e.g: 12時 + public static readonly string HourRegex = DateTimeDefinitions.TimeHourRegex; + + public static readonly string MinuteRegex = DateTimeDefinitions.TimeMinuteRegex; + + public static readonly string SecondRegex = DateTimeDefinitions.TimeSecondRegex; + + public static readonly string HalfRegex = DateTimeDefinitions.TimeHalfRegex; + + public static readonly string QuarterRegex = DateTimeDefinitions.TimeQuarterRegex; + + // e.g: 十二五十から八|半分|瞬間 + public static readonly string CJKTimeRegex = DateTimeDefinitions.TimeCJKTimeRegex; + + // e.g: 12:23 + public static readonly string DigitTimeRegex = DateTimeDefinitions.TimeDigitTimeRegex; + + // e.g: 朝の9時 public static readonly string DayDescRegex = DateTimeDefinitions.TimeDayDescRegex; + public static readonly string ApproximateDescPreffixRegex = DateTimeDefinitions.TimeApproximateDescPreffixRegex; + + public static readonly string ApproximateDescSuffixRegex = DateTimeDefinitions.TimeApproximateDescSuffixRegex; + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public JapaneseTimeExtractorConfiguration() + public JapaneseTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) { var regexes = new Dictionary { { - new Regex(DateTimeDefinitions.TimeRegexes1, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegexes1, RegexFlags, RegexTimeOut), TimeType.CjkTime }, { - new Regex(DateTimeDefinitions.TimeRegexes2, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegexes2, RegexFlags, RegexTimeOut), TimeType.DigitTime }, { - new Regex(DateTimeDefinitions.TimeRegexes3, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegexes3, RegexFlags, RegexTimeOut), TimeType.LessTime }, }; - Regexes = regexes.ToImmutableDictionary(); + AmbiguityTimeFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityTimeFiltersDict); + } - internal sealed override ImmutableDictionary Regexes { get; } + public ImmutableDictionary Regexes { get; } + + public Dictionary AmbiguityTimeFiltersDict { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_DATETIME_TIME; // "Fraction"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseTimePeriodExtractorConfiguration.cs index c7936f7740..2fdd95074d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseTimePeriodExtractorConfiguration.cs @@ -1,30 +1,55 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Definitions.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseTimePeriodExtractorConfiguration : JapaneseBaseDateTimeExtractorConfiguration + public class JapaneseTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKTimePeriodExtractorConfiguration { + public const string TimePeriodConnectWords = DateTimeDefinitions.TimePeriodTimePeriodConnectWords; + + // 五点十分四十八秒 + public static readonly string CJKTimeRegex = JapaneseTimeExtractorConfiguration.CJKTimeRegex; + + // 6時から9時| 6時から9時 + public static readonly string LeftCJKTimeRegex = DateTimeDefinitions.TimePeriodLeftCJKTimeRegex; + + public static readonly string RightCJKTimeRegex = DateTimeDefinitions.TimePeriodRightCJKTimeRegex; + + // 2:45 + public static readonly string DigitTimeRegex = JapaneseTimeExtractorConfiguration.DigitTimeRegex; + + public static readonly string LeftDigitTimeRegex = DateTimeDefinitions.TimePeriodLeftDigitTimeRegex; + + public static readonly string RightDigitTimeRegex = DateTimeDefinitions.TimePeriodRightDigitTimeRegex; + + public static readonly string ShortLeftCJKTimeRegex = DateTimeDefinitions.TimePeriodShortLeftCJKTimeRegex; + + public static readonly string ShortLeftDigitTimeRegex = DateTimeDefinitions.TimePeriodShortLeftDigitTimeRegex; private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public JapaneseTimePeriodExtractorConfiguration() + public JapaneseTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) { var regexes = new Dictionary { { - new Regex(DateTimeDefinitions.TimePeriodRegexes1, RegexFlags), + new Regex(DateTimeDefinitions.TimePeriodRegexes1, RegexFlags, RegexTimeOut), PeriodType.FullTime }, { - new Regex(DateTimeDefinitions.TimePeriodRegexes2, RegexFlags), + new Regex(DateTimeDefinitions.TimePeriodRegexes2, RegexFlags, RegexTimeOut), PeriodType.ShortTime }, { - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags), + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut), PeriodType.ShortTime }, }; @@ -32,8 +57,9 @@ public JapaneseTimePeriodExtractorConfiguration() Regexes = regexes.ToImmutableDictionary(); } - internal sealed override ImmutableDictionary Regexes { get; } + public ImmutableDictionary Regexes { get; } + + public Dictionary AmbiguityTimePeriodFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityTimePeriodFiltersDict); - protected sealed override string ExtractType { get; } = Constants.SYS_DATETIME_TIMEPERIOD; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseCommonDateTimeParserConfiguration.cs new file mode 100644 index 0000000000..a1c84fae75 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseCommonDateTimeParserConfiguration.cs @@ -0,0 +1,63 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Text.DateTime.Japanese; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Japanese; + +namespace Microsoft.Recognizers.Text.DateTime.Japanese +{ + public class JapaneseCommonDateTimeParserConfiguration : BaseCJKDateParserConfiguration, ICJKCommonDateTimeParserConfiguration + { + public JapaneseCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToImmutableDictionary(k => k.Key, k => k.Value); + UnitValueMap = DateTimeDefinitions.DurationUnitValueMap.ToImmutableDictionary(); + CardinalMap = DateTimeDefinitions.ParserConfigurationCardinalMap.ToImmutableDictionary(); + DayOfMonth = DateTimeDefinitions.ParserConfigurationDayOfMonth.ToImmutableDictionary(); + DayOfWeek = DateTimeDefinitions.ParserConfigurationDayOfWeek.ToImmutableDictionary(); + MonthOfYear = DateTimeDefinitions.ParserConfigurationMonthOfYear.ToImmutableDictionary(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = new IntegerExtractor(numConfig); + CardinalExtractor = new CardinalExtractor(numConfig); + OrdinalExtractor = Number.Japanese.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseCJKNumberParser(new JapaneseNumberParserConfiguration(numConfig)); + + // Do not change order. The order of initialization can lead to side-effects + DateExtractor = new BaseCJKDateExtractor(new JapaneseDateExtractorConfiguration(this)); + TimeExtractor = new BaseCJKTimeExtractor(new JapaneseTimeExtractorConfiguration(this)); + DateTimeExtractor = new BaseCJKDateTimeExtractor(new JapaneseDateTimeExtractorConfiguration(this)); + DurationExtractor = new BaseCJKDurationExtractor(new JapaneseDurationExtractorConfiguration(this)); + DatePeriodExtractor = new BaseCJKDatePeriodExtractor(new JapaneseDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseCJKTimePeriodExtractor(new JapaneseTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseCJKDateTimePeriodExtractor(new JapaneseDateTimePeriodExtractorConfiguration(this)); + HolidayExtractor = new BaseCJKDurationExtractor(new JapaneseDurationExtractorConfiguration(this)); + SetExtractor = new BaseCJKDurationExtractor(new JapaneseDurationExtractorConfiguration(this)); + + DurationParser = new BaseCJKDurationParser(new JapaneseDurationParserConfiguration(this)); + DateParser = new BaseCJKDateParser(new JapaneseDateParserConfiguration(this)); + TimeParser = new BaseCJKTimeParser(new JapaneseTimeParserConfiguration(this)); + DateTimeParser = new BaseCJKDateTimeParser(new JapaneseDateTimeParserConfiguration(this)); + DatePeriodParser = new BaseCJKDatePeriodParser(new JapaneseDatePeriodParserConfiguration(this)); + TimePeriodParser = new BaseCJKTimePeriodParser(new JapaneseTimePeriodParserConfiguration(this)); + DateTimePeriodParser = new BaseCJKDateTimePeriodParser(new JapaneseDateTimePeriodParserConfiguration(this)); + HolidayParser = new BaseCJKHolidayParser(new JapaneseHolidayParserConfiguration(this)); + SetParser = new BaseCJKSetParser(new JapaneseSetParserConfiguration(this)); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateParserConfiguration.cs index 5082d7a06f..daf30c4b15 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateParserConfiguration.cs @@ -1,696 +1,186 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Text.Number; -using Microsoft.Recognizers.Text.Number.Japanese; +using Microsoft.Recognizers.Definitions.Japanese; using Microsoft.Recognizers.Text.Utilities; -using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseDateParserConfiguration : IDateTimeParser + public class JapaneseDateParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKDateParserConfiguration { - public static readonly string ParserName = Constants.SYS_DATETIME_DATE; // "Date"; - - private static readonly int[] MonthMaxDays = { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; + public static readonly Regex PlusOneDayRegex = new Regex(DateTimeDefinitions.PlusOneDayRegex, RegexFlags, RegexTimeOut); + public static readonly Regex MinusOneDayRegex = new Regex(DateTimeDefinitions.MinusOneDayRegex, RegexFlags, RegexTimeOut); + public static readonly Regex PlusTwoDayRegex = new Regex(DateTimeDefinitions.PlusTwoDayRegex, RegexFlags, RegexTimeOut); + public static readonly Regex MinusTwoDayRegex = new Regex(DateTimeDefinitions.MinusTwoDayRegex, RegexFlags, RegexTimeOut); + public static readonly Regex PlusThreeDayRegex = new Regex(DateTimeDefinitions.PlusThreeDayRegex, RegexFlags, RegexTimeOut); + public static readonly Regex MinusThreeDayRegex = new Regex(DateTimeDefinitions.MinusThreeDayRegex, RegexFlags, RegexTimeOut); + public static readonly Regex PlusFourDayRegex = new Regex(DateTimeDefinitions.PlusFourDayRegex, RegexFlags, RegexTimeOut); + public static readonly Regex NextMonthRegex = new Regex(DateTimeDefinitions.ParserConfigurationNextMonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastMonthRegex = new Regex(DateTimeDefinitions.ParserConfigurationLastMonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastWeekDayRegex = new Regex(DateTimeDefinitions.ParserConfigurationLastWeekDayRegex, RegexFlags, RegexTimeOut); - private readonly JapaneseDateTimeParserConfiguration config; + public static readonly string ParserName = Constants.SYS_DATETIME_DATE; // "Date"; - private readonly IExtractor integerExtractor; - private readonly IParser numberParser; - private readonly IDateTimeExtractor durationExtractor; + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public JapaneseDateParserConfiguration(JapaneseDateTimeParserConfiguration configuration) + public JapaneseDateParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) { - config = configuration; - integerExtractor = new IntegerExtractor(); - durationExtractor = new JapaneseDurationExtractorConfiguration(); - numberParser = new BaseCJKNumberParser(new JapaneseNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); - } + IntegerExtractor = config.IntegerExtractor; + OrdinalExtractor = config.OrdinalExtractor; - public ParseResult Parse(ExtractResult extResult) - { - return this.Parse(extResult, DateObject.Now); - } + NumberParser = config.NumberParser; - public virtual DateTimeParseResult Parse(ExtractResult er, DateObject referenceDate) - { - object value = null; + DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; + DurationParser = config.DurationParser; - if (er.Type.Equals(ParserName, StringComparison.Ordinal)) - { - value = InnerParser(er.Text, referenceDate); - } + DateRegexList = new JapaneseDateExtractorConfiguration(this).DateRegexList; + SpecialDate = JapaneseDateExtractorConfiguration.SpecialDate; + NextRe = JapaneseDateExtractorConfiguration.NextRe; + LastRe = JapaneseDateExtractorConfiguration.LastRe; + SpecialDayRegex = JapaneseDateExtractorConfiguration.SpecialDayRegex; + StrictWeekDayRegex = JapaneseDateExtractorConfiguration.WeekDayRegex; + LunarRegex = JapaneseDateExtractorConfiguration.LunarRegex; + UnitRegex = JapaneseDateExtractorConfiguration.UnitRegex; + BeforeRegex = JapaneseDateExtractorConfiguration.BeforeRegex; + AfterRegex = JapaneseDateExtractorConfiguration.AfterRegex; + DynastyYearRegex = JapaneseDateExtractorConfiguration.DynastyYearRegex; + DynastyStartYear = JapaneseDateExtractorConfiguration.DynastyStartYear; + DynastyYearMap = JapaneseDateExtractorConfiguration.DynastyYearMap; + NextRegex = JapaneseDateExtractorConfiguration.NextRegex; + ThisRegex = JapaneseDateExtractorConfiguration.ThisRegex; + LastRegex = JapaneseDateExtractorConfiguration.LastRegex; + WeekDayOfMonthRegex = JapaneseDateExtractorConfiguration.WeekDayOfMonthRegex; + WeekDayAndDayRegex = JapaneseDateExtractorConfiguration.WeekDayAndDayRegex; + DurationRelativeDurationUnitRegex = JapaneseDateExtractorConfiguration.DurationRelativeDurationUnitRegex; + SpecialDayWithNumRegex = JapaneseDateExtractorConfiguration.SpecialDayWithNumRegex; - var ret = new DateTimeParseResult - { - Text = er.Text, - Start = er.Start, - Length = er.Length, - Type = er.Type, - Data = er.Data, - Value = value, - TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, - ResolutionStr = string.Empty, - }; - - return ret; + CardinalMap = config.CardinalMap; + UnitMap = config.UnitMap; + DayOfMonth = config.DayOfMonth; + DayOfWeek = config.DayOfWeek; + MonthOfYear = config.MonthOfYear; } - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } + public IExtractor IntegerExtractor { get; } - protected DateTimeResolutionResult InnerParser(string text, DateObject reference) - { - var innerResult = ParseBasicRegexMatch(text, reference); + public IExtractor OrdinalExtractor { get; } - if (!innerResult.Success) - { - innerResult = ParseImplicitDate(text, reference); - } + public IParser NumberParser { get; } - if (!innerResult.Success) - { - innerResult = ParseWeekdayOfMonth(text, reference); - } + public IDateTimeExtractor DateExtractor { get; } - if (!innerResult.Success) - { - innerResult = ParserDurationWithBeforeAndAfter(text, reference); - } + public IDateTimeExtractor DurationExtractor { get; } - if (innerResult.Success) - { - innerResult.FutureResolution = new Dictionary - { - { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)innerResult.FutureValue) }, - }; + public IDateTimeParser DurationParser { get; } - innerResult.PastResolution = new Dictionary - { - { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)innerResult.PastValue) }, - }; + public IEnumerable DateRegexList { get; } - innerResult.IsLunar = IsLunarCalendar(text); + public Regex SpecialDate { get; } - return innerResult; - } + public Regex NextRe { get; } - return null; - } + public Regex LastRe { get; } - // parse basic patterns in DateRegexList - protected DateTimeResolutionResult ParseBasicRegexMatch(string text, DateObject referenceDate) - { - foreach (var regex in JapaneseDateExtractorConfiguration.DateRegexList) - { - var match = regex.MatchExact(text, trim: true); - - if (match.Success) - { - // Value string will be set in Match2Date method - var ret = Match2Date(match.Match, referenceDate); - return ret; - } - } + public Regex SpecialDayRegex { get; } - return new DateTimeResolutionResult(); - } + public Regex StrictWeekDayRegex { get; } - // match several other cases - // including '今天', '后天', '十三日' - protected DateTimeResolutionResult ParseImplicitDate(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); + public Regex LunarRegex { get; } - // handle "十二日" "明年这个月三日" "本月十一日" - var match = JapaneseDateExtractorConfiguration.SpecialDate.MatchExact(text, trim: true); + public Regex UnitRegex { get; } - if (match.Success) - { - var yearStr = match.Groups["thisyear"].Value; - var monthStr = match.Groups["thismonth"].Value; - var dayStr = match.Groups["day"].Value; - - int month = referenceDate.Month, year = referenceDate.Year; - var day = this.config.DayOfMonth[dayStr]; - - bool hasYear = false, hasMonth = false; - - if (!string.IsNullOrEmpty(monthStr)) - { - hasMonth = true; - if (JapaneseDateExtractorConfiguration.NextRe.Match(monthStr).Success) - { - month++; - if (month == 13) - { - month = 1; - year++; - } - } - else if (JapaneseDateExtractorConfiguration.LastRe.Match(monthStr).Success) - { - month--; - if (month == 0) - { - month = 12; - year--; - } - } - - if (!string.IsNullOrEmpty(yearStr)) - { - hasYear = true; - if (JapaneseDateExtractorConfiguration.NextRe.Match(yearStr).Success) - { - ++year; - } - else if (JapaneseDateExtractorConfiguration.LastRe.Match(yearStr).Success) - { - --year; - } - } - } - - ret.Timex = DateTimeFormatUtil.LuisDate(hasYear ? year : -1, hasMonth ? month : -1, day); - - DateObject futureDate, pastDate; - - if (day > MonthMaxDays[month - 1]) - { - futureDate = DateObject.MinValue.SafeCreateFromValue(year, month + 1, day); - pastDate = DateObject.MinValue.SafeCreateFromValue(year, month - 1, day); - } - else - { - futureDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); - pastDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); - if (!hasMonth) - { - if (futureDate < referenceDate) - { - futureDate = futureDate.AddMonths(1); - } - - if (pastDate >= referenceDate) - { - pastDate = pastDate.AddMonths(-1); - } - } - else if (!hasYear) - { - if (futureDate < referenceDate) - { - futureDate = futureDate.AddYears(1); - } - - if (pastDate >= referenceDate) - { - pastDate = pastDate.AddYears(-1); - } - } - } - - ret.FutureValue = futureDate; - ret.PastValue = pastDate; - ret.Success = true; - - return ret; - } + public Regex BeforeRegex { get; } - // handle cases like "昨日", "明日", "大后天" - match = JapaneseDateExtractorConfiguration.SpecialDayRegex.MatchExact(text, trim: true); + public Regex AfterRegex { get; } - if (match.Success) - { - var value = referenceDate.AddDays(JapaneseDateTimeParserConfiguration.GetSwiftDay(match.Value)); - ret.Timex = DateTimeFormatUtil.LuisDate(value); - ret.FutureValue = ret.PastValue = value; - ret.Success = true; + public Regex NextRegex { get; } - return ret; - } + public Regex ThisRegex { get; } - match = JapaneseDateExtractorConfiguration.SpecialMonthRegex.MatchExact(text, trim: true); + public Regex LastRegex { get; } - if (match.Success) - { - var value = referenceDate.AddMonths(JapaneseDateTimeParserConfiguration.GetSwiftMonth(match.Value)); - ret.Timex = DateTimeFormatUtil.LuisDate(value); - ret.FutureValue = ret.PastValue = value; - ret.Success = true; + public Regex WeekDayOfMonthRegex { get; } - return ret; - } - - match = JapaneseDateExtractorConfiguration.SpecialYearRegex.MatchExact(text, trim: true); - - if (match.Success) - { - var value = referenceDate.AddYears(JapaneseDateTimeParserConfiguration.GetSwiftYear(match.Value)); - ret.Timex = DateTimeFormatUtil.LuisDate(value); - ret.FutureValue = ret.PastValue = value; - ret.Success = true; - - return ret; - } - - if (!ret.Success) - { - ret = MatchThisWeekday(text, referenceDate); - } - - if (!ret.Success) - { - ret = MatchNextWeekday(text, referenceDate); - } - - if (!ret.Success) - { - ret = MatchLastWeekday(text, referenceDate); - } - - if (!ret.Success) - { - ret = MatchWeekdayAlone(text, referenceDate); - } - - return ret; - } - - protected DateTimeResolutionResult MatchNextWeekday(string text, DateObject reference) - { - var result = new DateTimeResolutionResult(); - var match = this.config.NextRegex.MatchExact(text, trim: true); - - if (match.Success) - { - var weekdayKey = match.Groups["weekday"].Value; - var value = reference.Next((DayOfWeek)this.config.DayOfWeek[weekdayKey]); + public Regex WeekDayAndDayRegex { get; } - result.Timex = DateTimeFormatUtil.LuisDate(value); - result.FutureValue = result.PastValue = value; - result.Success = true; - } + public Regex DurationRelativeDurationUnitRegex { get; } - return result; - } + public Regex SpecialDayWithNumRegex { get; } - protected DateTimeResolutionResult MatchThisWeekday(string text, DateObject reference) - { - var result = new DateTimeResolutionResult(); - var match = this.config.ThisRegex.MatchExact(text, trim: true); + public Regex DynastyYearRegex { get; } - if (match.Success) - { - var weekdayKey = match.Groups["weekday"].Value; - var value = reference.This((DayOfWeek)this.config.DayOfWeek[weekdayKey]); + public ImmutableDictionary DynastyYearMap { get; } - result.Timex = DateTimeFormatUtil.LuisDate(value); - result.FutureValue = result.PastValue = value; - result.Success = true; - } + public IImmutableDictionary CardinalMap { get; } - return result; - } + public IImmutableDictionary UnitMap { get; } - protected DateTimeResolutionResult MatchLastWeekday(string text, DateObject reference) - { - var result = new DateTimeResolutionResult(); - var match = this.config.LastRegex.MatchExact(text, trim: true); + public IImmutableDictionary DayOfMonth { get; } - if (match.Success) - { - var weekdayKey = match.Groups["weekday"].Value; - var value = reference.Last((DayOfWeek)this.config.DayOfWeek[weekdayKey]); + public IImmutableDictionary DayOfWeek { get; } - result.Timex = DateTimeFormatUtil.LuisDate(value); - result.FutureValue = result.PastValue = value; - result.Success = true; - } + public IImmutableDictionary MonthOfYear { get; } - return result; - } + public string DynastyStartYear { get; } - protected DateTimeResolutionResult MatchWeekdayAlone(string text, DateObject reference) - { - var result = new DateTimeResolutionResult(); - var match = this.config.StrictWeekDayRegex.MatchExact(text, trim: true); + Regex ICJKDateParserConfiguration.LastWeekDayRegex => LastWeekDayRegex; - if (match.Success) - { - var weekdayKey = match.Groups["weekday"].Value; - var weekday = this.config.DayOfWeek[weekdayKey]; - var value = reference.This((DayOfWeek)weekday); - - if (weekday == 0) - { - weekday = 7; - } - - if (weekday < (int)reference.DayOfWeek) - { - value = reference.Next((DayOfWeek)weekday); - } - - result.Timex = "XXXX-WXX-" + weekday; - var futureDate = value; - var pastDate = value; - if (futureDate < reference) - { - futureDate = futureDate.AddDays(7); - } - - if (pastDate >= reference) - { - pastDate = pastDate.AddDays(-7); - } - - result.FutureValue = futureDate; - result.PastValue = pastDate; - result.Success = true; - } + Regex ICJKDateParserConfiguration.NextMonthRegex => NextMonthRegex; - return result; - } + Regex ICJKDateParserConfiguration.LastMonthRegex => LastMonthRegex; - protected virtual DateTimeResolutionResult ParseWeekdayOfMonth(string text, DateObject referenceDate) + public int GetSwiftDay(string text) { - var ret = new DateTimeResolutionResult(); + var swift = 0; - var trimmedText = text.Trim(); - var match = this.config.WeekDayOfMonthRegex.Match(trimmedText); - if (!match.Success) + if (PlusOneDayRegex.MatchBegin(text, trim: true).Success) { - return ret; + swift = 1; } - - var cardinalStr = match.Groups["cardinal"].Value; - var weekdayStr = match.Groups["weekday"].Value; - var monthStr = match.Groups["month"].Value; - var noYear = false; - int year; - - int cardinal; - if (cardinalStr.Equals(this.config.LastWeekDayToken)) + else if (MinusOneDayRegex.MatchBegin(text, trim: true).Success) { - cardinal = 5; - } - else - { - cardinal = this.config.CardinalMap[cardinalStr]; + swift = -1; } - var weekday = this.config.DayOfWeek[weekdayStr]; - int month; - if (string.IsNullOrEmpty(monthStr)) + if (PlusOneDayRegex.IsExactMatch(text, trim: false)) { - var swift = 0; - if (trimmedText.StartsWith(this.config.NextMonthToken)) - { - swift = 1; - } - else if (trimmedText.StartsWith(this.config.LastMonthToken)) - { - swift = -1; - } - - month = referenceDate.AddMonths(swift).Month; - year = referenceDate.AddMonths(swift).Year; + swift = 1; } - else + else if (PlusThreeDayRegex.IsExactMatch(text, trim: false)) { - month = this.config.MonthOfYear[monthStr]; - year = referenceDate.Year; - noYear = true; + swift = 3; } - - var value = ComputeDate(cardinal, weekday, month, year); - if (value.Month != month) + else if (PlusFourDayRegex.IsExactMatch(text, trim: false)) { - cardinal -= 1; - value = value.AddDays(-7); + swift = 4; } - - var futureDate = value; - var pastDate = value; - if (noYear && futureDate < referenceDate) + else if (MinusThreeDayRegex.IsExactMatch(text, trim: false)) { - futureDate = ComputeDate(cardinal, weekday, month, year + 1); - if (futureDate.Month != month) - { - futureDate = futureDate.AddDays(-7); - } + swift = -3; } - - if (noYear && pastDate >= referenceDate) + else if (MinusOneDayRegex.IsExactMatch(text, trim: false)) { - pastDate = ComputeDate(cardinal, weekday, month, year - 1); - if (pastDate.Month != month) - { - pastDate = pastDate.AddDays(-7); - } - } - - // here is a very special case, timeX follows future date - ret.Timex = $@"XXXX-{month:D2}-WXX-{weekday}-#{cardinal}"; - ret.FutureValue = futureDate; - ret.PastValue = pastDate; - ret.Success = true; - - return ret; - } + swift = -1; - // parse a regex match which includes 'day', 'month' and 'year' (optional) group - protected DateTimeResolutionResult Match2Date(Match match, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - - var monthStr = match.Groups["month"].Value; - var dayStr = match.Groups["day"].Value; - var yearStr = match.Groups["year"].Value; - var yearJapStr = match.Groups["yearJap"].Value; - int month = 1, day = 1, year = 0; - - var tmp = ConvertJapaneseYearToInteger(yearJapStr); - year = tmp == -1 ? 0 : tmp; - - if (this.config.MonthOfYear.ContainsKey(monthStr)) - { - month = this.config.MonthOfYear[monthStr] > 12 ? this.config.MonthOfYear[monthStr] % 12 : this.config.MonthOfYear[monthStr]; - if (!string.IsNullOrEmpty(yearStr)) - { - year = int.Parse(yearStr); - if (year < 100 && year >= Constants.MinTwoDigitYearPastNum) - { - year += 1900; - } - else if (year >= 0 && year < Constants.MaxTwoDigitYearFutureNum) - { - year += 2000; - } - } - - if (this.config.DayOfMonth.ContainsKey(dayStr)) - { - day = this.config.DayOfMonth[dayStr] > 31 ? this.config.DayOfMonth[dayStr] % 31 : this.config.DayOfMonth[dayStr]; - } - } - - var noYear = false; - if (year == 0) - { - year = referenceDate.Year; - ret.Timex = DateTimeFormatUtil.LuisDate(-1, month, day); - noYear = true; } - else + else if (PlusTwoDayRegex.IsExactMatch(text, trim: false)) { - ret.Timex = DateTimeFormatUtil.LuisDate(year, month, day); - } + swift = 2; - var futureDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); - var pastDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); - if (noYear && futureDate < referenceDate) - { - futureDate = futureDate.AddYears(+1); } - - if (noYear && pastDate >= referenceDate) + else if (MinusTwoDayRegex.IsExactMatch(text, trim: false)) { - pastDate = pastDate.AddYears(-1); - } + swift = -2; - ret.FutureValue = futureDate; - ret.PastValue = pastDate; - ret.Success = true; - - return ret; - } - - // parse if lunar contains - private static bool IsLunarCalendar(string text) - { - var trimmedText = text.Trim(); - var match = JapaneseDateExtractorConfiguration.LunarRegex.Match(trimmedText); - - return match.Success; - } - - private static DateObject ComputeDate(int cardinal, int weekday, int month, int year) - { - var firstDay = DateObject.MinValue.SafeCreateFromValue(year, month, 1); - var firstWeekday = firstDay.This((DayOfWeek)weekday); - if (weekday == 0) - { - weekday = 7; - } - - if (weekday < (int)firstDay.DayOfWeek) - { - firstWeekday = firstDay.Next((DayOfWeek)weekday); - } - - return firstWeekday.AddDays(7 * (cardinal - 1)); - } - - // handle cases like "三天前" - private DateTimeResolutionResult ParserDurationWithBeforeAndAfter(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - var durationRes = durationExtractor.Extract(text, referenceDate); - var unitStr = string.Empty; - - if (durationRes.Count > 0) - { - var match = JapaneseDateExtractorConfiguration.UnitRegex.Match(text); - if (match.Success) - { - var suffix = text.Substring((int)durationRes[0].Start + (int)durationRes[0].Length).Trim(); - var srcUnit = match.Groups["unit"].Value; - var numberStr = text.Substring((int)durationRes[0].Start, match.Index - (int)durationRes[0].Start).Trim(); - var number = ConvertJapaneseToNum(numberStr); - - if (this.config.UnitMap.ContainsKey(srcUnit)) - { - unitStr = this.config.UnitMap[srcUnit]; - - var beforeMatch = JapaneseDateExtractorConfiguration.BeforeRegex.Match(suffix); - if (beforeMatch.Success && suffix.StartsWith(beforeMatch.Value)) - { - DateObject date; - switch (unitStr) - { - case Constants.TimexDay: - date = referenceDate.AddDays(-number); - break; - case Constants.TimexWeek: - date = referenceDate.AddDays(-7 * number); - break; - case Constants.TimexMonthFull: - date = referenceDate.AddMonths(-number); - break; - case Constants.TimexYear: - date = referenceDate.AddYears(-number); - break; - default: - return ret; - } - - ret.Timex = $"{DateTimeFormatUtil.LuisDate(date)}"; - ret.FutureValue = ret.PastValue = date; - ret.Success = true; - return ret; - } - - var afterMatch = JapaneseDateExtractorConfiguration.AfterRegex.Match(suffix); - if (afterMatch.Success && suffix.StartsWith(afterMatch.Value)) - { - DateObject date; - switch (unitStr) - { - case Constants.TimexDay: - date = referenceDate.AddDays(number); - break; - case Constants.TimexWeek: - date = referenceDate.AddDays(7 * number); - break; - case Constants.TimexMonthFull: - date = referenceDate.AddMonths(number); - break; - case Constants.TimexYear: - date = referenceDate.AddYears(number); - break; - default: - return ret; - } - - ret.Timex = $"{DateTimeFormatUtil.LuisDate(date)}"; - ret.FutureValue = ret.PastValue = date; - ret.Success = true; - return ret; - } - } - } - } - - return ret; - } - - // concert Japanese Number to Integer - private int ConvertJapaneseToNum(string numStr) - { - var num = -1; - var er = integerExtractor.Extract(numStr); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) - { - num = Convert.ToInt32((double)(numberParser.Parse(er[0]).Value ?? 0)); - } - } - - return num; - } - - // convert Japanese Year to Integer - private int ConvertJapaneseYearToInteger(string yearJapStr) - { - var year = 0; - var num = 0; - - var er = integerExtractor.Extract(yearJapStr); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) - { - num = Convert.ToInt32((double)(numberParser.Parse(er[0]).Value ?? 0)); - } } - if (num < 10) - { - num = 0; - foreach (var ch in yearJapStr) - { - num *= 10; - er = integerExtractor.Extract(ch.ToString()); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) - { - num += Convert.ToInt32((double)(numberParser.Parse(er[0]).Value ?? 0)); - } - } - } - } - - year = num; - - return year < 10 ? -1 : year; + return swift; } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDatePeriodParserConfiguration.cs index 98d67a1058..8b4258bf14 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDatePeriodParserConfiguration.cs @@ -1,6 +1,13 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Collections.Immutable; using System.Globalization; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Japanese; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Japanese; using Microsoft.Recognizers.Text.Utilities; @@ -8,1459 +15,324 @@ namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseDatePeriodParserConfiguration : IDateTimeParser + public class JapaneseDatePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKDatePeriodParserConfiguration { - public static readonly string ParserName = Constants.SYS_DATETIME_DATEPERIOD; // "DatePeriod"; - - private const int LastMonthOfYear = 12; - - private static readonly IDateTimeExtractor SingleDateExtractor = new JapaneseDateExtractorConfiguration(); - private static readonly IExtractor IntegerExtractor = new IntegerExtractor(); + public static readonly Regex WoMLastRegex = new Regex(DateTimeDefinitions.WoMLastRegex, RegexFlags, RegexTimeOut); + public static readonly Regex WoMPreviousRegex = new Regex(DateTimeDefinitions.WoMPreviousRegex, RegexFlags, RegexTimeOut); + public static readonly Regex WoMNextRegex = new Regex(DateTimeDefinitions.WoMNextRegex, RegexFlags, RegexTimeOut); - private static readonly IParser IntegerParser = new BaseCJKNumberParser(new JapaneseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Japanese))); + public static readonly ImmutableDictionary MonthOfYear = DateTimeDefinitions.ParserConfigurationMonthOfYear.ToImmutableDictionary(); - private static readonly IDateTimeExtractor DurationExtractor = new JapaneseDurationExtractorConfiguration(); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly Calendar Cal = DateTimeFormatInfo.InvariantInfo.Calendar; + private static readonly Regex NextMonthRegex = new Regex(DateTimeDefinitions.ParserConfigurationNextMonthRegex, RegexFlags, RegexTimeOut); + private static readonly Regex AfterNextMonthRegex = new Regex(DateTimeDefinitions.ParserConfigurationAfterNextMonthRegex, RegexFlags, RegexTimeOut); + private static readonly Regex LastMonthRegex = new Regex(DateTimeDefinitions.ParserConfigurationLastMonthRegex, RegexFlags, RegexTimeOut); + private static readonly Regex NextYearRegex = new Regex(DateTimeDefinitions.ParserConfigurationNextYearRegex, RegexFlags, RegexTimeOut); + private static readonly Regex AfterNextYearRegex = new Regex(DateTimeDefinitions.ParserConfigurationAfterNextYearRegex, RegexFlags, RegexTimeOut); + private static readonly Regex LastYearRegex = new Regex(DateTimeDefinitions.ParserConfigurationLastYearRegex, RegexFlags, RegexTimeOut); + private static readonly Regex ThisYearRegex = new Regex(DateTimeDefinitions.ParserConfigurationThisYearRegex, RegexFlags, RegexTimeOut); - private readonly IFullDateTimeParserConfiguration config; - - public JapaneseDatePeriodParserConfiguration(IFullDateTimeParserConfiguration configuration) + public JapaneseDatePeriodParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) { - config = configuration; - } + IntegerExtractor = config.IntegerExtractor; + NumberParser = config.NumberParser; + DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; + CardinalExtractor = config.CardinalExtractor; + DurationParser = config.DurationParser; + DateParser = config.DateParser; + + DynastyYearRegex = JapaneseDateExtractorConfiguration.DynastyYearRegex; + DynastyStartYear = JapaneseDateExtractorConfiguration.DynastyStartYear; + DynastyYearMap = JapaneseDateExtractorConfiguration.DynastyYearMap; + SimpleCasesRegex = JapaneseDatePeriodExtractorConfiguration.SimpleCasesRegex; + ThisRegex = JapaneseDatePeriodExtractorConfiguration.ThisRegex; + NextRegex = JapaneseDatePeriodExtractorConfiguration.NextRegex; + LastRegex = JapaneseDatePeriodExtractorConfiguration.LastRegex; + YearToYear = JapaneseDatePeriodExtractorConfiguration.YearToYear; + YearToYearSuffixRequired = JapaneseDatePeriodExtractorConfiguration.YearToYearSuffixRequired; + YearRegex = JapaneseDatePeriodExtractorConfiguration.YearRegex; + YearInCJKRegex = JapaneseDatePeriodExtractorConfiguration.YearInCJKRegex; + MonthToMonth = JapaneseDatePeriodExtractorConfiguration.MonthToMonth; + MonthToMonthSuffixRequired = JapaneseDatePeriodExtractorConfiguration.MonthToMonthSuffixRequired; + DayToDay = JapaneseDatePeriodExtractorConfiguration.DayToDay; + MonthDayRange = JapaneseDatePeriodExtractorConfiguration.MonthDayRange; + DayRegexForPeriod = JapaneseDatePeriodExtractorConfiguration.DayRegexForPeriod; + MonthRegex = JapaneseDatePeriodExtractorConfiguration.MonthRegex; + SpecialMonthRegex = JapaneseDatePeriodExtractorConfiguration.SpecialMonthRegex; + SpecialYearRegex = JapaneseDatePeriodExtractorConfiguration.SpecialYearRegex; + YearAndMonth = JapaneseDatePeriodExtractorConfiguration.YearAndMonth; + PureNumYearAndMonth = JapaneseDatePeriodExtractorConfiguration.PureNumYearAndMonth; + SimpleYearAndMonth = JapaneseDatePeriodExtractorConfiguration.SimpleYearAndMonth; + OneWordPeriodRegex = JapaneseDatePeriodExtractorConfiguration.OneWordPeriodRegex; + NumberCombinedWithUnit = JapaneseDatePeriodExtractorConfiguration.NumberCombinedWithUnit; + PastRegex = JapaneseDatePeriodExtractorConfiguration.PastRegex; + FutureRegex = JapaneseDatePeriodExtractorConfiguration.FutureRegex; + WeekWithWeekDayRangeRegex = JapaneseDatePeriodExtractorConfiguration.WeekWithWeekDayRangeRegex; + UnitRegex = JapaneseDatePeriodExtractorConfiguration.UnitRegex; + DurationUnitRegex = JapaneseDatePeriodExtractorConfiguration.DurationUnitRegex; + WeekOfMonthRegex = JapaneseDatePeriodExtractorConfiguration.WeekOfMonthRegex; + WeekOfYearRegex = JapaneseDatePeriodExtractorConfiguration.WeekOfYearRegex; + WeekOfDateRegex = JapaneseDatePeriodExtractorConfiguration.WeekOfDateRegex; + MonthOfDateRegex = JapaneseDatePeriodExtractorConfiguration.MonthOfDateRegex; + WhichWeekRegex = JapaneseDatePeriodExtractorConfiguration.WhichWeekRegex; + FirstLastOfYearRegex = JapaneseDatePeriodExtractorConfiguration.FirstLastOfYearRegex; + SeasonWithYear = JapaneseDatePeriodExtractorConfiguration.SeasonWithYear; + QuarterRegex = JapaneseDatePeriodExtractorConfiguration.QuarterRegex; + DecadeRegex = JapaneseDatePeriodExtractorConfiguration.DecadeRegex; + CenturyRegex = JapaneseDatePeriodExtractorConfiguration.CenturyRegex; + RelativeRegex = JapaneseDateExtractorConfiguration.RelativeRegex; + RelativeMonthRegex = JapaneseDateExtractorConfiguration.RelativeMonthRegex; + LaterEarlyPeriodRegex = JapaneseDatePeriodExtractorConfiguration.LaterEarlyPeriodRegex; + DatePointWithAgoAndLater = JapaneseDatePeriodExtractorConfiguration.DatePointWithAgoAndLater; + ReferenceDatePeriodRegex = JapaneseDatePeriodExtractorConfiguration.ReferenceDatePeriodRegex; + ComplexDatePeriodRegex = JapaneseDatePeriodExtractorConfiguration.ComplexDatePeriodRegex; + DurationRelativeDurationUnitRegex = JapaneseDateExtractorConfiguration.DurationRelativeDurationUnitRegex; + UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToImmutableDictionary(); + CardinalMap = DateTimeDefinitions.ParserConfigurationCardinalMap.ToImmutableDictionary(); + DayOfMonth = DateTimeDefinitions.ParserConfigurationDayOfMonth.ToImmutableDictionary(); + SeasonMap = DateTimeDefinitions.ParserConfigurationSeasonMap.ToImmutableDictionary(); - public ParseResult Parse(ExtractResult extResult) - { - return this.Parse(extResult, DateObject.Now); } - public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) - { - var referenceDate = refDate; + public IDateTimeExtractor DateExtractor { get; } - object value = null; + public IDateTimeExtractor DurationExtractor { get; } - if (er.Type.Equals(ParserName, StringComparison.Ordinal)) - { - var innerResult = ParseSimpleCases(er.Text, referenceDate); - if (!innerResult.Success) - { - innerResult = ParseOneWordPeriod(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = MergeTwoTimePoints(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseNumberWithUnit(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseYearToYear(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseMonthToMonth(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseDayToDay(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseYear(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseWeekOfMonth(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseSeason(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseQuarter(er.Text, referenceDate); - } - - if (!innerResult.Success) - { - innerResult = ParseDecade(er.Text, referenceDate); - } - - if (innerResult.Success) - { - if (innerResult.FutureValue != null && innerResult.PastValue != null) - { - innerResult.FutureResolution = new Dictionary - { - { - TimeTypeConstants.START_DATE, - DateTimeFormatUtil.FormatDate(((Tuple)innerResult.FutureValue).Item1) - }, - { - TimeTypeConstants.END_DATE, - DateTimeFormatUtil.FormatDate(((Tuple)innerResult.FutureValue).Item2) - }, - }; - - innerResult.PastResolution = new Dictionary - { - { - TimeTypeConstants.START_DATE, - DateTimeFormatUtil.FormatDate(((Tuple)innerResult.PastValue).Item1) - }, - { - TimeTypeConstants.END_DATE, - DateTimeFormatUtil.FormatDate(((Tuple)innerResult.PastValue).Item2) - }, - }; - } - else - { - innerResult.PastResolution = innerResult.FutureResolution = new Dictionary(); - } - - value = innerResult; - } - } + public IExtractor CardinalExtractor { get; } - var ret = new DateTimeParseResult - { - Text = er.Text, - Start = er.Start, - Length = er.Length, - Type = er.Type, - Data = er.Data, - Value = value, - TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, - ResolutionStr = string.Empty, - }; - - return ret; - } + public IDateTimeParser DurationParser { get; } - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } + public IDateTimeParser DateParser { get; } - // convert Japanese Number to Integer - private static int ConvertJapaneseToNum(string numStr) - { - var num = -1; - var er = IntegerExtractor.Extract(numStr); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) - { - num = Convert.ToInt32((double)(IntegerParser.Parse(er[0]).Value ?? 0)); - } - } + public IExtractor IntegerExtractor { get; } - return num; - } + public IParser NumberParser { get; } - // convert Japanese Year to Integer - private static int ConvertJapaneseToInteger(string yearJapStr) - { - var year = 0; - var num = 0; + public ImmutableDictionary DynastyYearMap { get; } - var er = IntegerExtractor.Extract(yearJapStr); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) - { - num = Convert.ToInt32((double)(IntegerParser.Parse(er[0]).Value ?? 0)); - } - } + public IImmutableDictionary UnitMap { get; } - if (num < 10) - { - num = 0; - foreach (var ch in yearJapStr) - { - num *= 10; - er = IntegerExtractor.Extract(ch.ToString()); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) - { - num += Convert.ToInt32((double)(IntegerParser.Parse(er[0]).Value ?? 0)); - } - } - } - - year = num; - } - else - { - year = num; - } + public IImmutableDictionary CardinalMap { get; } - return year == 0 ? -1 : year; - } + public IImmutableDictionary DayOfMonth { get; } - private static DateObject ComputeDate(int cardinal, int weekday, int month, int year) - { - var firstDay = DateObject.MinValue.SafeCreateFromValue(year, month, 1); - var firstWeekday = firstDay.This((DayOfWeek)weekday); - if (weekday == 0) - { - weekday = 7; - } + IImmutableDictionary ICJKDatePeriodParserConfiguration.MonthOfYear => MonthOfYear; - if (weekday < (int)firstDay.DayOfWeek) - { - firstWeekday = firstDay.Next((DayOfWeek)weekday); - } + public IImmutableDictionary SeasonMap { get; } - return firstWeekday.AddDays(7 * (cardinal - 1)); - } + public string DynastyStartYear { get; } - private DateTimeResolutionResult ParseSimpleCases(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - int year = referenceDate.Year, month = referenceDate.Month; - int beginDay, endDay; - var noYear = false; - var inputYear = false; + public string TokenBeforeDate => string.Empty; - var match = JapaneseDatePeriodExtractorConfiguration.SimpleCasesRegex.MatchExact(text, trim: true); - string beginLuisStr, endLuisStr; + public Regex DynastyYearRegex { get; } - if (match.Success) - { - var days = match.Groups["day"]; - beginDay = this.config.DayOfMonth[days.Captures[0].Value]; - endDay = this.config.DayOfMonth[days.Captures[1].Value]; - - var monthStr = match.Groups["month"].Value; - var yearStr = match.Groups["year"].Value; - if (!string.IsNullOrEmpty(yearStr)) - { - year = int.Parse(yearStr); - if (year < 100 && year >= this.config.TwoNumYear) - { - year += 1900; - } - else if (year < 100 && year < this.config.TwoNumYear) - { - year += 2000; - } - - inputYear = true; - } - else - { - noYear = true; - } - - if (!string.IsNullOrEmpty(monthStr)) - { - month = ToMonthNumber(monthStr); - } - else - { - monthStr = match.Groups["relmonth"].Value.Trim(); - var thisMatch = JapaneseDatePeriodExtractorConfiguration.ThisRegex.Match(monthStr); - var nextMatch = JapaneseDatePeriodExtractorConfiguration.NextRegex.Match(monthStr); - var lastMatch = JapaneseDatePeriodExtractorConfiguration.LastRegex.Match(monthStr); - - if (thisMatch.Success) - { - // do nothing - } - else if (nextMatch.Success) - { - if (month != 12) - { - month += 1; - } - else - { - month = 1; - year += 1; - } - } - else - { - if (month != 1) - { - month -= 1; - } - else - { - month = 12; - year -= 1; - } - } - } - - if (inputYear || JapaneseDatePeriodExtractorConfiguration.ThisRegex.Match(monthStr).Success || - JapaneseDatePeriodExtractorConfiguration.NextRegex.Match(monthStr).Success) - { - beginLuisStr = DateTimeFormatUtil.LuisDate(year, month, beginDay); - endLuisStr = DateTimeFormatUtil.LuisDate(year, month, endDay); - } - else - { - beginLuisStr = DateTimeFormatUtil.LuisDate(-1, month, beginDay); - endLuisStr = DateTimeFormatUtil.LuisDate(-1, month, endDay); - } - } - else - { - return ret; - } + public Regex SimpleCasesRegex { get; } - int futureYear = year, pastYear = year; - var startDate = DateObject.MinValue.SafeCreateFromValue(year, month, beginDay); - if (noYear && startDate < referenceDate) - { - futureYear++; - } + public Regex ThisRegex { get; } - if (noYear && startDate >= referenceDate) - { - pastYear--; - } + public Regex NextRegex { get; } - ret.Timex = $"({beginLuisStr},{endLuisStr},P{endDay - beginDay}D)"; + public Regex LastRegex { get; } - ret.FutureValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(futureYear, month, beginDay), - DateObject.MinValue.SafeCreateFromValue(futureYear, month, endDay)); + public Regex YearToYear { get; } - ret.PastValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(pastYear, month, beginDay), - DateObject.MinValue.SafeCreateFromValue(pastYear, month, endDay)); + public Regex YearToYearSuffixRequired { get; } - ret.Success = true; + public Regex YearRegex { get; } - return ret; - } + public Regex RelativeRegex { get; } - // handle like "2008年から2012年まで - private DateTimeResolutionResult ParseYearToYear(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - var match = JapaneseDatePeriodExtractorConfiguration.YearToYear.Match(text); + public Regex RelativeMonthRegex { get; } - if (match.Success) - { - var yearMatch = JapaneseDatePeriodExtractorConfiguration.YearRegex.Matches(text); - var yearInJapaneseMatch = JapaneseDatePeriodExtractorConfiguration.YearInJapaneseRegex.Matches(text); - var beginYear = 0; - var endYear = 0; - - if (yearMatch.Count == 2) - { - var yearFrom = yearMatch[0].Groups["year"].Value; - var yearTo = yearMatch[1].Groups["year"].Value; - beginYear = int.Parse(yearFrom); - endYear = int.Parse(yearTo); - } - else if (yearInJapaneseMatch.Count == 2) - { - var yearFrom = yearInJapaneseMatch[0].Groups["yearJap"].Value; - var yearTo = yearInJapaneseMatch[1].Groups["yearJap"].Value; - beginYear = ConvertJapaneseToInteger(yearFrom); - endYear = ConvertJapaneseToInteger(yearTo); - } - else if (yearInJapaneseMatch.Count == 1 && yearMatch.Count == 1) - { - if (yearMatch[0].Index < yearInJapaneseMatch[0].Index) - { - var yearFrom = yearMatch[0].Groups["year"].Value; - var yearTo = yearInJapaneseMatch[0].Groups["yearch"].Value; - beginYear = int.Parse(yearFrom); - endYear = ConvertJapaneseToInteger(yearTo); - } - else - { - var yearFrom = yearInJapaneseMatch[0].Groups["yearch"].Value; - var yearTo = yearMatch[0].Groups["year"].Value; - beginYear = ConvertJapaneseToInteger(yearFrom); - endYear = int.Parse(yearTo); - } - } - - if (beginYear < 100 && beginYear >= this.config.TwoNumYear) - { - beginYear += 1900; - } - else if (beginYear < 100 && beginYear < this.config.TwoNumYear) - { - beginYear += 2000; - } - - if (endYear < 100 && endYear >= this.config.TwoNumYear) - { - endYear += 1900; - } - else if (endYear < 100 && endYear < this.config.TwoNumYear) - { - endYear += 2000; - } - - var beginDate = DateObject.MinValue.SafeCreateFromValue(beginYear, 1, 1); - var endDate = DateObject.MinValue.SafeCreateFromValue(endYear, 1, 1); - var beginTimex = DateTimeFormatUtil.LuisDate(beginYear, 1, 1); - var endTimex = DateTimeFormatUtil.LuisDate(endYear, 1, 1); - ret.Timex = $"({beginTimex},{endTimex},P{endYear - beginYear}Y)"; - ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Success = true; - return ret; - } + public Regex LaterEarlyPeriodRegex { get; } - return ret; - } + public Regex DatePointWithAgoAndLater { get; } - // handle like "08月から12月まで - private DateTimeResolutionResult ParseMonthToMonth(string text, DateObject referenceDate) - { - int undefinedValue = -1; + public Regex ReferenceDatePeriodRegex { get; } - var ret = new DateTimeResolutionResult(); - var match = JapaneseDatePeriodExtractorConfiguration.MonthToMonth.Match(text); + public Regex ComplexDatePeriodRegex { get; } - if (match.Success) - { - var monthMatch = JapaneseDatePeriodExtractorConfiguration.MonthRegex.Matches(text); - var beginMonth = 0; - var endMonth = 0; - - if (monthMatch.Count == 2) - { - var monthFrom = monthMatch[0].Groups["month"].Value; - var monthTo = monthMatch[1].Groups["month"].Value; - beginMonth = ToMonthNumber(monthFrom); - endMonth = ToMonthNumber(monthTo); - } - - var currentYear = referenceDate.Year; - var currentMonth = referenceDate.Month; - var beginYearForPastResolution = currentYear; - var endYearForPastResolution = currentYear; - var beginYearForFutureResolution = currentYear; - var endYearForFutureResolution = currentYear; - var durationMonths = 0; - - if (beginMonth < endMonth) - { - // For this case, FutureValue and PastValue share the same resolution - if (beginMonth < currentMonth && endMonth >= currentMonth) - { - // Keep the beginYear and endYear equal to currentYear - } - else if (beginMonth >= currentMonth) - { - beginYearForPastResolution = endYearForPastResolution = currentYear - 1; - } - else if (endMonth < currentMonth) - { - beginYearForFutureResolution = endYearForFutureResolution = currentYear + 1; - } - - durationMonths = endMonth - beginMonth; - } - else if (beginMonth > endMonth) - { - // For this case, FutureValue and PastValue share the same resolution - if (beginMonth < currentMonth) - { - endYearForPastResolution = endYearForFutureResolution = currentYear + 1; - } - else - { - beginYearForPastResolution = currentYear - 1; - endYearForFutureResolution = currentYear + 1; - } - - durationMonths = beginMonth - endMonth; - } - - if (durationMonths != 0) - { - var beginDateForPastResolution = DateObject.MinValue.SafeCreateFromValue(beginYearForPastResolution, beginMonth, 1); - var endDateForPastResolution = DateObject.MinValue.SafeCreateFromValue(endYearForPastResolution, endMonth, 1); - var beginDateForFutureResolution = DateObject.MinValue.SafeCreateFromValue(beginYearForFutureResolution, beginMonth, 1); - var endDateForFutureResolution = DateObject.MinValue.SafeCreateFromValue(endYearForFutureResolution, endMonth, 1); - - var beginTimex = DateTimeFormatUtil.LuisDate(undefinedValue, beginMonth, 1); - var endTimex = DateTimeFormatUtil.LuisDate(undefinedValue, endMonth, 1); - ret.Timex = $"({beginTimex},{endTimex},P{durationMonths}M)"; - ret.PastValue = new Tuple(beginDateForPastResolution, endDateForPastResolution); - ret.FutureValue = new Tuple(beginDateForFutureResolution, endDateForFutureResolution); - ret.Success = true; - } - } + public Regex DurationRelativeDurationUnitRegex { get; } - return ret; - } + public Regex YearInCJKRegex { get; } - private DateTimeResolutionResult ParseDayToDay(string text, DateObject referenceDate) - { - int undefinedValue = -1; - var ret = new DateTimeResolutionResult(); - var match = JapaneseDatePeriodExtractorConfiguration.DayToDay.Match(text); + public Regex MonthToMonth { get; } - if (match.Success) - { - var dayMatchMatch = JapaneseDatePeriodExtractorConfiguration.DayRegexForPeriod.Matches(text); - var beginDay = 0; - var endDay = 0; - - if (dayMatchMatch.Count == 2) - { - var dayFrom = dayMatchMatch[0].Groups["day"].Value; - var dayTo = dayMatchMatch[1].Groups["day"].Value; - beginDay = this.config.DayOfMonth[dayFrom]; - endDay = this.config.DayOfMonth[dayTo]; - } - - var beginYearForPastResolution = referenceDate.Year; - var endYearForPastResolution = referenceDate.Year; - var beginYearForFutureResolution = referenceDate.Year; - var endYearForFutureResolution = referenceDate.Year; - var currentMonth = referenceDate.Month; - var currentDay = referenceDate.Day; - var beginMonthForPastResolution = currentMonth; - var endMonthForPastResolution = currentMonth; - var beginMonthForFutureResolution = currentMonth; - var endMonthForFutureResolution = currentMonth; - var durationDays = 0; - - if (beginDay < endDay) - { - // For this case, FutureValue and PastValue share the same resolution - if (beginDay < currentDay && endDay >= currentDay) - { - // Keep the beginMonth and endMonth equal to currentMonth - } - else if (beginDay >= currentDay) - { - if (currentMonth == 1) - { - beginMonthForPastResolution = endMonthForPastResolution = LastMonthOfYear; - beginYearForPastResolution--; - endYearForPastResolution--; - } - else - { - beginMonthForPastResolution = endMonthForPastResolution = currentMonth - 1; - } - } - else if (endDay < currentDay) - { - if (currentMonth == LastMonthOfYear) - { - beginMonthForFutureResolution = endMonthForFutureResolution = 1; - beginYearForFutureResolution++; - endYearForFutureResolution++; - } - else - { - beginMonthForFutureResolution = endMonthForFutureResolution = currentMonth + 1; - } - } - - durationDays = endDay - beginDay; - } - else if (beginDay > endDay) - { - // For this case, FutureValue and PastValue share the same resolution - if (beginDay < currentDay) - { - if (currentMonth == LastMonthOfYear) - { - endMonthForPastResolution = endMonthForFutureResolution = 1; - endYearForPastResolution++; - endYearForFutureResolution++; - } - else - { - endMonthForPastResolution = endMonthForFutureResolution = currentMonth + 1; - } - } - else - { - if (currentMonth == LastMonthOfYear) - { - beginMonthForPastResolution = currentMonth - 1; - endMonthForFutureResolution = 1; - endYearForFutureResolution++; - } - else if (currentMonth == 1) - { - beginMonthForPastResolution = 12; - beginYearForPastResolution--; - endMonthForFutureResolution = currentMonth + 1; - } - else - { - beginMonthForPastResolution = currentMonth - 1; - endMonthForFutureResolution = currentMonth + 1; - } - } - - durationDays = beginDay - endDay; - } - - if (durationDays != 0) - { - var beginDateForPastResolution = DateObject.MinValue.SafeCreateFromValue(beginYearForPastResolution, beginMonthForPastResolution, beginDay); - var endDateForPastResolution = DateObject.MinValue.SafeCreateFromValue(endYearForPastResolution, endMonthForPastResolution, endDay); - var beginDateForFutureResolution = DateObject.MinValue.SafeCreateFromValue(beginYearForFutureResolution, beginMonthForFutureResolution, beginDay); - var endDateForFutureResolution = DateObject.MinValue.SafeCreateFromValue(endYearForFutureResolution, endMonthForFutureResolution, endDay); - var beginTimex = DateTimeFormatUtil.LuisDate(undefinedValue, undefinedValue, beginDay); - var endTimex = DateTimeFormatUtil.LuisDate(undefinedValue, undefinedValue, endDay); - - ret.Timex = $"({beginTimex},{endTimex},P{durationDays}D)"; - ret.PastValue = new Tuple(beginDateForPastResolution, endDateForPastResolution); - ret.FutureValue = new Tuple(beginDateForFutureResolution, endDateForFutureResolution); - ret.Success = true; - } - } + public Regex MonthToMonthSuffixRequired { get; } - return ret; - } + public Regex MonthRegex { get; } - // case like "今年三月" "这个周末" "五月" - private DateTimeResolutionResult ParseOneWordPeriod(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - int year = referenceDate.Year, month = referenceDate.Month; - int futureYear = year, pastYear = year; + public Regex YearAndMonth { get; } - var trimmedText = text.Trim(); - var match = JapaneseDatePeriodExtractorConfiguration.OneWordPeriodRegex.MatchExact(trimmedText, trim: true); + public Regex PureNumYearAndMonth { get; } - if (match.Success) - { - var monthStr = match.Groups["month"].Value; - if (trimmedText.Equals("今年")) - { - ret.Timex = referenceDate.Year.ToString("D4"); - ret.FutureValue = - ret.PastValue = - new Tuple(DateObject.MinValue.SafeCreateFromValue(referenceDate.Year, 1, 1), referenceDate); - ret.Success = true; - return ret; - } - - var thisMatch = JapaneseDatePeriodExtractorConfiguration.ThisRegex.Match(trimmedText); - var nextMatch = JapaneseDatePeriodExtractorConfiguration.NextRegex.Match(trimmedText); - var lastMatch = JapaneseDatePeriodExtractorConfiguration.LastRegex.Match(trimmedText); - - if (!string.IsNullOrEmpty(monthStr)) - { - var swift = -10; - - if (trimmedText.StartsWith("来年") || trimmedText.StartsWith("先年")) - { - swift = 1; - } - else if (trimmedText.StartsWith("前年")) - { - swift = -1; - } - else if (trimmedText.StartsWith("今年")) - { - swift = 0; - } - - month = ToMonthNumber(monthStr); - - if (swift >= -1) - { - ret.Timex = (referenceDate.Year + swift).ToString("D4") + "-" + month.ToString("D2"); - year = year + swift; - futureYear = pastYear = year; - } - else - { - ret.Timex = "XXXX-" + month.ToString("D2"); - if (month < referenceDate.Month) - { - futureYear++; - } - - if (month >= referenceDate.Month) - { - pastYear--; - } - } - } - else - { - var swift = 0; - if (nextMatch.Success) - { - swift = 1; - } - else if (lastMatch.Success) - { - swift = -1; - } - - if (trimmedText.EndsWith("周") | trimmedText.EndsWith("星期")) - { - var monday = referenceDate.This(DayOfWeek.Monday).AddDays(7 * swift); - ret.Timex = DateTimeFormatUtil.ToIsoWeekTimex(monday); - ret.FutureValue = - ret.PastValue = - new Tuple( - referenceDate.This(DayOfWeek.Monday).AddDays(7 * swift), - referenceDate.This(DayOfWeek.Sunday).AddDays(7 * swift).AddDays(1)); - ret.Success = true; - return ret; - } - - if (trimmedText.EndsWith("周末")) - { - var beginDate = referenceDate.This(DayOfWeek.Saturday).AddDays(7 * swift); - var endDate = referenceDate.This(DayOfWeek.Sunday).AddDays(7 * swift); - - ret.Timex = beginDate.Year.ToString("D4") + "-W" + - Cal.GetWeekOfYear(beginDate, CalendarWeekRule.FirstFourDayWeek, DayOfWeek.Monday) - .ToString("D2") + "-WE"; - - ret.FutureValue = - ret.PastValue = new Tuple(beginDate, endDate.AddDays(1)); - - ret.Success = true; - - return ret; - } - - if (trimmedText.EndsWith("月")) - { - month = referenceDate.AddMonths(swift).Month; - year = referenceDate.AddMonths(swift).Year; - ret.Timex = year.ToString("D4") + "-" + month.ToString("D2"); - futureYear = pastYear = year; - } - else if (trimmedText.EndsWith("年")) - { - year = referenceDate.AddYears(swift).Year; - if (trimmedText.EndsWith("前年") || trimmedText.EndsWith("先年")) - { - year--; - } - else if (trimmedText.EndsWith("来年")) - { - year++; - } - else if (trimmedText.EndsWith("前年")) - { - year -= 2; - } - else if (trimmedText.EndsWith("后年")) - { - year += 2; - } - - ret.Timex = year.ToString("D4"); - ret.FutureValue = - ret.PastValue = - new Tuple( - DateObject.MinValue.SafeCreateFromValue(year, 1, 1), - DateObject.MinValue.SafeCreateFromValue(year, 12, 31).AddDays(1)); - ret.Success = true; - return ret; - } - } - } - else - { - return ret; - } + public Regex OneWordPeriodRegex { get; } - // only "month" will come to here - ret.FutureValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(futureYear, month, 1), - DateObject.MinValue.SafeCreateFromValue(futureYear, month, 1).AddMonths(1)); + public Regex NumberCombinedWithUnit { get; } - ret.PastValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(pastYear, month, 1), - DateObject.MinValue.SafeCreateFromValue(pastYear, month, 1).AddMonths(1)); + public Regex PastRegex { get; } - ret.Success = true; + public Regex FutureRegex { get; } - return ret; - } + public Regex WeekWithWeekDayRangeRegex { get; } - // only contains year like "2016年" - private DateTimeResolutionResult ParseYear(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - var match = JapaneseDatePeriodExtractorConfiguration.YearRegex.MatchExact(text, trim: true); + public Regex UnitRegex { get; } - if (match.Success) - { - var tmp = match.Value; - - // Trim() to handle extra whitespaces like '07 年' - if (tmp.EndsWith("年")) - { - tmp = tmp.Substring(0, tmp.Length - 1).Trim(); - } - - var num = 0; - var year = 0; - if (tmp.Length == 2) - { - num = int.Parse(tmp); - if (num < 100 && num >= 30) - { - num += 1900; - } - else if (num < 30) - { - num += 2000; - } - - year = num; - } - else - { - year = int.Parse(tmp); - } - - var beginDay = DateObject.MinValue.SafeCreateFromValue(year, 1, 1); - var endDay = DateObject.MinValue.SafeCreateFromValue(year + 1, 1, 1); - - ret.Timex = year.ToString("D4"); - ret.FutureValue = ret.PastValue = new Tuple(beginDay, endDay); - ret.Success = true; - - return ret; - } + public Regex DurationUnitRegex { get; } - match = JapaneseDatePeriodExtractorConfiguration.YearInJapaneseRegex.MatchExact(text, trim: true); + public Regex WeekOfMonthRegex { get; } - if (match.Success) - { - var tmp = match.Value; - if (tmp.EndsWith("年")) - { - tmp = tmp.Substring(0, tmp.Length - 1); - } - - if (tmp.Length == 1) - { - return ret; - } - - var re = ConvertJapaneseToInteger(tmp); - var year = re; - - if (year < 100 && year >= this.config.TwoNumYear) - { - year += 1900; - } - else if (year < 100 && year < this.config.TwoNumYear) - { - year += 2000; - } - - var beginDay = DateObject.MinValue.SafeCreateFromValue(year, 1, 1); - var endDay = DateObject.MinValue.SafeCreateFromValue(year + 1, 1, 1); - - ret.Timex = year.ToString("D4"); - ret.FutureValue = ret.PastValue = new Tuple(beginDay, endDay); - ret.Success = true; - - return ret; - } + public Regex WeekOfYearRegex { get; } - return ret; - } + public Regex WeekOfDateRegex { get; } - // parse entities that made up by two time points - private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - var er = SingleDateExtractor.Extract(text, referenceDate); - if (er.Count < 2) - { - er = SingleDateExtractor.Extract("on " + text, referenceDate); - if (er.Count < 2) - { - return ret; - } - - er[0].Start -= 3; - er[1].Start -= 3; - } + public Regex MonthOfDateRegex { get; } - var pr1 = this.config.DateParser.Parse(er[0], referenceDate); - var pr2 = this.config.DateParser.Parse(er[1], referenceDate); - if (pr1.Value == null || pr2.Value == null) - { - return ret; - } + public Regex WhichWeekRegex { get; } - DateObject futureBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue, - futureEnd = (DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue; - DateObject pastBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue, - pastEnd = (DateObject)((DateTimeResolutionResult)pr2.Value).PastValue; + public Regex FirstLastOfYearRegex { get; } - if (futureBegin > futureEnd) - { - futureBegin = pastBegin; - } + public Regex SeasonWithYear { get; } - if (pastEnd < pastBegin) - { - pastEnd = futureEnd; - } + public Regex QuarterRegex { get; } - if ((JapaneseDatePeriodExtractorConfiguration.YearAndMonth.IsMatch(pr1.Text) && - JapaneseDatePeriodExtractorConfiguration.YearAndMonth.IsMatch(pr2.Text)) || - (JapaneseDatePeriodExtractorConfiguration.SimpleYearAndMonth.IsMatch(pr1.Text) && - JapaneseDatePeriodExtractorConfiguration.SimpleYearAndMonth.IsMatch(pr2.Text))) - { - ret.Timex = $"({pr1.TimexStr},{pr2.TimexStr},P{(int)(futureEnd - futureBegin).TotalDays / 30}M)"; - } - else - { - ret.Timex = $"({pr1.TimexStr},{pr2.TimexStr},P{(futureEnd - futureBegin).TotalDays}D)"; - } + public Regex DecadeRegex { get; } - ret.FutureValue = new Tuple(futureBegin, futureEnd); - ret.PastValue = new Tuple(pastBegin, pastEnd); - ret.Success = true; + public Regex CenturyRegex { get; } - return ret; - } + public Regex DayToDay { get; } - // handle like "前两年" "前三个月" - private DateTimeResolutionResult ParseNumberWithUnit(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); + public Regex MonthDayRange { get; } - string numStr, unitStr; + public Regex DayRegexForPeriod { get; } - // if there are NO spaces between number and unit - var match = JapaneseDatePeriodExtractorConfiguration.NumberCombinedWithUnit.Match(text); - if (match.Success) - { - var srcUnit = match.Groups["unit"].Value; - var beforeStr = text.Substring(0, match.Index); - if (this.config.UnitMap.ContainsKey(srcUnit)) - { - unitStr = this.config.UnitMap[srcUnit]; - numStr = match.Groups["num"].Value; - - var prefixMatch = JapaneseDatePeriodExtractorConfiguration.PastRegex.MatchExact(beforeStr, trim: true); - - if (prefixMatch.Success) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case Constants.TimexDay: - beginDate = referenceDate.AddDays(-double.Parse(numStr)); - endDate = referenceDate; - break; - case Constants.TimexWeek: - beginDate = referenceDate.AddDays(-7 * double.Parse(numStr)); - endDate = referenceDate; - break; - case Constants.TimexMonthFull: - beginDate = referenceDate.AddMonths(-Convert.ToInt32(double.Parse(numStr))); - endDate = referenceDate; - break; - case Constants.TimexYear: - beginDate = referenceDate.AddYears(-Convert.ToInt32(double.Parse(numStr))); - endDate = referenceDate; - break; - default: - return ret; - } - - ret.Timex = $"({DateTimeFormatUtil.LuisDate(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)},P{numStr}{unitStr[0]})"; - ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Success = true; - return ret; - } - - prefixMatch = JapaneseDatePeriodExtractorConfiguration.FutureRegex.MatchExact(beforeStr, trim: true); - - if (prefixMatch.Success) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case Constants.TimexDay: - beginDate = referenceDate; - endDate = referenceDate.AddDays(double.Parse(numStr)); - break; - case Constants.TimexWeek: - beginDate = referenceDate; - endDate = referenceDate.AddDays(7 * double.Parse(numStr)); - break; - case Constants.TimexMonthFull: - beginDate = referenceDate; - endDate = referenceDate.AddMonths(Convert.ToInt32(double.Parse(numStr))); - break; - case Constants.TimexYear: - beginDate = referenceDate; - endDate = referenceDate.AddYears(Convert.ToInt32(double.Parse(numStr))); - break; - default: - return ret; - } - - ret.Timex = - $"({DateTimeFormatUtil.LuisDate(beginDate.AddDays(1))},{DateTimeFormatUtil.LuisDate(endDate.AddDays(1))},P{numStr}{unitStr[0]})"; - ret.FutureValue = - ret.PastValue = new Tuple(beginDate.AddDays(1), endDate.AddDays(1)); - ret.Success = true; - return ret; - } - } - } + public Regex SimpleYearAndMonth { get; } - // for case "前两年" "后三年" - var durationRes = DurationExtractor.Extract(text, referenceDate); - if (durationRes.Count > 0) - { - var beforeStr = text.Substring(0, (int)durationRes[0].Start); - match = JapaneseDatePeriodExtractorConfiguration.UnitRegex.Match(durationRes[0].Text); - if (match.Success) - { - var srcUnit = match.Groups["unit"].Value; - var numberStr = durationRes[0].Text.Substring(0, match.Index).Trim(); - var number = ConvertJapaneseToNum(numberStr); - - if (this.config.UnitMap.ContainsKey(srcUnit)) - { - unitStr = this.config.UnitMap[srcUnit]; - - var prefixMatch = JapaneseDatePeriodExtractorConfiguration.PastRegex.MatchExact(beforeStr, trim: true); - - if (prefixMatch.Success) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case Constants.TimexDay: - beginDate = referenceDate.AddDays(-number); - endDate = referenceDate; - break; - case Constants.TimexWeek: - beginDate = referenceDate.AddDays(-7 * number); - endDate = referenceDate; - break; - case Constants.TimexMonthFull: - beginDate = referenceDate.AddMonths(-number); - endDate = referenceDate; - break; - case Constants.TimexYear: - beginDate = referenceDate.AddYears(-number); - endDate = referenceDate; - break; - default: - return ret; - } - - ret.Timex = $"({DateTimeFormatUtil.LuisDate(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)},P{number}{unitStr[0]})"; - ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Success = true; - return ret; - } - - prefixMatch = JapaneseDatePeriodExtractorConfiguration.FutureRegex.MatchExact(beforeStr, trim: true); - - if (prefixMatch.Success) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case Constants.TimexDay: - beginDate = referenceDate; - endDate = referenceDate.AddDays(number); - break; - case Constants.TimexWeek: - beginDate = referenceDate; - endDate = referenceDate.AddDays(7 * number); - break; - case Constants.TimexMonthFull: - beginDate = referenceDate; - endDate = referenceDate.AddMonths(number); - break; - case Constants.TimexYear: - beginDate = referenceDate; - endDate = referenceDate.AddYears(number); - break; - default: - return ret; - } - - ret.Timex = - $"({DateTimeFormatUtil.LuisDate(beginDate.AddDays(1))},{DateTimeFormatUtil.LuisDate(endDate.AddDays(1))},P{number}{unitStr[0]})"; - ret.FutureValue = - ret.PastValue = - new Tuple(beginDate.AddDays(1), endDate.AddDays(1)); - ret.Success = true; - return ret; - } - } - } - } + public Regex SpecialMonthRegex { get; } - return ret; - } + public Regex SpecialYearRegex { get; } - // case like "三月的第一周" - private DateTimeResolutionResult ParseWeekOfMonth(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - var trimmedText = text.Trim(); - var match = JapaneseDatePeriodExtractorConfiguration.WeekOfMonthRegex.Match(text); - if (!match.Success) - { - return ret; - } + Regex ICJKDatePeriodParserConfiguration.WoMLastRegex => WoMLastRegex; - var cardinalStr = match.Groups["cardinal"].Value; - var monthStr = match.Groups["month"].Value; - var noYear = false; - int year; + Regex ICJKDatePeriodParserConfiguration.WoMPreviousRegex => WoMPreviousRegex; - int cardinal; - if (cardinalStr.Equals("最后一")) - { - cardinal = 5; - } - else - { - cardinal = this.config.CardinalMap[cardinalStr]; - } + Regex ICJKDatePeriodParserConfiguration.WoMNextRegex => WoMNextRegex; - int month; - if (string.IsNullOrEmpty(monthStr)) - { - var swift = 0; - if (trimmedText.StartsWith("下个")) - { - swift = 1; - } - else if (trimmedText.StartsWith("上个")) - { - swift = -1; - } - - month = referenceDate.AddMonths(swift).Month; - year = referenceDate.AddMonths(swift).Year; - ret.Timex = referenceDate.Year.ToString("D4") + "-" + month.ToString("D2"); - } - else - { - month = ToMonthNumber(monthStr); - ret.Timex = "XXXX" + "-" + month.ToString("D2"); - year = referenceDate.Year; - noYear = true; - } + public int TwoNumYear => int.Parse(DateTimeDefinitions.TwoNumYear, CultureInfo.InvariantCulture); - var value = ComputeDate(cardinal, 1, month, year); + public int ToMonthNumber(string monthStr) + { + return MonthOfYear[monthStr] > 12 ? MonthOfYear[monthStr] % 12 : MonthOfYear[monthStr]; + } - var futureDate = value; - var pastDate = value; - if (noYear && futureDate < referenceDate) - { - futureDate = ComputeDate(cardinal, 1, month, year + 1); - if (futureDate.Month != month) - { - futureDate = futureDate.AddDays(-7); - } - } + public bool IsMonthOnly(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); + } - if (noYear && pastDate >= referenceDate) - { - pastDate = ComputeDate(cardinal, 1, month, year - 1); - if (pastDate.Month != month) - { - pastDate = pastDate.AddDays(-7); - } - } + public bool IsWeekend(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); + } - ret.Timex += "-W" + cardinal.ToString("D2"); - ret.FutureValue = new Tuple(futureDate, futureDate.AddDays(7)); - ret.PastValue = new Tuple(pastDate, pastDate.AddDays(7)); - ret.Success = true; + public bool IsWeekOnly(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); + } - return ret; + public bool IsYearOnly(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal) || trimmedText.StartsWith(o, StringComparison.Ordinal)); } - // parse "今年夏天" - private DateTimeResolutionResult ParseSeason(string text, DateObject referenceDate) + public bool IsThisYear(string text) { - var ret = new DateTimeResolutionResult(); - var match = JapaneseDatePeriodExtractorConfiguration.SeasonWithYear.MatchExact(text, trim: true); + var trimmedText = text.Trim(); + return DateTimeDefinitions.ThisYearTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } - if (match.Success) - { - // parse year - var year = referenceDate.Year; - var hasYear = false; - var yearNum = match.Groups["year"].Value; - var yearJap = match.Groups["yearJap"].Value; - var yearRel = match.Groups["yearrel"].Value; - - if (!string.IsNullOrEmpty(yearNum)) - { - hasYear = true; - if (yearNum.EndsWith("年")) - { - yearNum = yearNum.Substring(0, yearNum.Length - 1); - } - - year = int.Parse(yearNum); - } - else if (!string.IsNullOrEmpty(yearJap)) - { - hasYear = true; - if (yearJap.EndsWith("年")) - { - yearJap = yearJap.Substring(0, yearJap.Length - 1); - } - - year = ConvertJapaneseToInteger(yearJap); - } - else if (!string.IsNullOrEmpty(yearRel)) - { - hasYear = true; - if (yearRel.EndsWith("前年") || yearRel.EndsWith("先年")) - { - year--; - } - else if (yearRel.EndsWith("来年")) - { - year++; - } - } - - if (year < 100 && year >= this.config.TwoNumYear) - { - year += 1900; - } - else if (year < 100 && year < this.config.TwoNumYear) - { - year += 2000; - } - - // parse season - var seasonStr = match.Groups["season"].Value; - ret.Timex = this.config.SeasonMap[seasonStr]; - if (hasYear) - { - ret.Timex = year.ToString("D4") + "-" + ret.Timex; - } - - ret.Success = true; - return ret; - } + public bool IsYearToDate(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } - return ret; + public bool IsLastYear(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.LastYearTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } - private DateTimeResolutionResult ParseQuarter(string text, DateObject referenceDate) + public bool IsNextYear(string text) { - var ret = new DateTimeResolutionResult(); - var match = JapaneseDatePeriodExtractorConfiguration.QuarterRegex.MatchExact(text, trim: true); + var trimmedText = text.Trim(); + return DateTimeDefinitions.NextYearTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } - if (!match.Success) - { - return ret; - } + public bool IsYearAfterNext(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.YearAfterNextTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } - // pare year - var year = referenceDate.Year; - var yearNum = match.Groups["year"].Value; - var yearJap = match.Groups["yearJap"].Value; - var yearRel = match.Groups["yearrel"].Value; - if (!string.IsNullOrEmpty(yearNum)) - { - if (yearNum.EndsWith("年")) - { - yearNum = yearNum.Substring(0, yearNum.Length - 1); - } + public bool IsYearBeforeLast(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.YearBeforeLastTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } - year = int.Parse(yearNum); - } - else if (!string.IsNullOrEmpty(yearJap)) - { - if (yearJap.EndsWith("年")) - { - yearJap = yearJap.Substring(0, yearJap.Length - 1); - } + public int GetSwiftMonth(string text) + { + // Current month: 今月 + var value = 0; - year = ConvertJapaneseToInteger(yearJap); - } - else if (!string.IsNullOrEmpty(yearRel)) + if (NextMonthRegex.IsMatch(text)) { - if (yearRel.EndsWith("前年") || yearRel.EndsWith("先年")) - { - year--; - } - else if (yearRel.EndsWith("来年")) - { - year++; - } + value = 1; } - - if (year < 100 && year >= this.config.TwoNumYear) + else if (LastMonthRegex.IsMatch(text)) { - year += 1900; + value = -1; } - else if (year < 100 && year < this.config.TwoNumYear) + else if (AfterNextMonthRegex.IsMatch(text)) { - year += 2000; + value = 2; } - // parse quarterNum - var cardinalStr = match.Groups["cardinal"].Value; - var quarterNum = this.config.CardinalMap[cardinalStr]; - - var beginDate = DateObject.MinValue.SafeCreateFromValue(year, (quarterNum * 3) - 2, 1); - var endDate = DateObject.MinValue.SafeCreateFromValue(year, (quarterNum * 3) + 1, 1); - ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Timex = $"({DateTimeFormatUtil.LuisDate(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)},P3M)"; - ret.Success = true; - - return ret; + return value; } - private DateTimeResolutionResult ParseDecade(string text, DateObject referenceDate) + public int GetSwiftYear(string text) { - var ret = new DateTimeResolutionResult(); - int century = (referenceDate.Year / 100) + 1; - int decade; - int beginYear, endYear; - int decadeLastYear = 10; - var inputCentury = false; - - var match = JapaneseDatePeriodExtractorConfiguration.DecadeRegex.MatchExact(text, trim: true); - string beginLuisStr, endLuisStr; + var value = -10; - if (match.Success) - { - var decadeStr = match.Groups["decade"].Value; - if (!int.TryParse(decadeStr, out decade)) - { - decade = ConvertJapaneseToNum(decadeStr); - } - - var centuryStr = match.Groups["century"].Value; - if (!string.IsNullOrEmpty(centuryStr)) - { - if (!int.TryParse(centuryStr, out century)) - { - century = ConvertJapaneseToNum(centuryStr); - } - - inputCentury = true; - } - else - { - centuryStr = match.Groups["relcentury"].Value; - - if (!string.IsNullOrEmpty(centuryStr)) - { - centuryStr = centuryStr.Trim(); - var thisMatch = JapaneseDatePeriodExtractorConfiguration.ThisRegex.Match(centuryStr); - var nextMatch = JapaneseDatePeriodExtractorConfiguration.NextRegex.Match(centuryStr); - var lastMatch = JapaneseDatePeriodExtractorConfiguration.LastRegex.Match(centuryStr); - - if (thisMatch.Success) - { - // do nothing - } - else if (nextMatch.Success) - { - century++; - } - else - { - century--; - } - - inputCentury = true; - } - } - } - else + if (AfterNextYearRegex.IsMatch(text)) { - return ret; + value = 2; } - - beginYear = ((century - 1) * 100) + decade; - endYear = beginYear + decadeLastYear; - - if (inputCentury) + else if (NextYearRegex.IsMatch(text)) { - beginLuisStr = DateTimeFormatUtil.LuisDate(beginYear, 1, 1); - endLuisStr = DateTimeFormatUtil.LuisDate(endYear, 1, 1); + value = 1; } - else + else if (LastYearRegex.IsMatch(text)) { - var beginYearStr = "XX" + decade; - beginLuisStr = DateTimeFormatUtil.LuisDate(-1, 1, 1); - beginLuisStr = beginLuisStr.Replace("XXXX", beginYearStr); - - var endYearStr = "XX" + (endYear % 100).ToString("D2"); - endLuisStr = DateTimeFormatUtil.LuisDate(-1, 1, 1); - endLuisStr = endLuisStr.Replace("XXXX", endYearStr); + value = -1; } - - ret.Timex = $"({beginLuisStr},{endLuisStr},P10Y)"; - - int futureYear = beginYear, pastYear = beginYear; - var startDate = DateObject.MinValue.SafeCreateFromValue(beginYear, 1, 1); - if (!inputCentury && startDate < referenceDate) + else if (ThisYearRegex.IsMatch(text)) { - futureYear += 100; + // Current year: 今年 + value = 0; } - if (!inputCentury && startDate >= referenceDate) - { - pastYear -= 100; - } - - ret.FutureValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(futureYear, 1, 1), - DateObject.MinValue.SafeCreateFromValue(futureYear + decadeLastYear, 1, 1)); - - ret.PastValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(pastYear, 1, 1), - DateObject.MinValue.SafeCreateFromValue(pastYear + decadeLastYear, 1, 1)); - - ret.Success = true; - - return ret; - } - - private int ToMonthNumber(string monthStr) - { - return this.config.MonthOfYear[monthStr] > 12 ? this.config.MonthOfYear[monthStr] % 12 : this.config.MonthOfYear[monthStr]; + return value; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateTimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateTimeParser.cs deleted file mode 100644 index 6baf6e0d11..0000000000 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateTimeParser.cs +++ /dev/null @@ -1,404 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions.Japanese; -using Microsoft.Recognizers.Text.Number; -using Microsoft.Recognizers.Text.Number.Japanese; -using Microsoft.Recognizers.Text.Utilities; -using DateObject = System.DateTime; - -namespace Microsoft.Recognizers.Text.DateTime.Japanese -{ - public class JapaneseDateTimeParser : IDateTimeParser - { - public static readonly string ParserName = Constants.SYS_DATETIME_DATETIME; - - public static readonly Regex SimpleAmRegex = new Regex(DateTimeDefinitions.DateTimeSimpleAmRegex, RegexFlags); - - public static readonly Regex SimplePmRegex = new Regex(DateTimeDefinitions.DateTimeSimplePmRegex, RegexFlags); - - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - - private static readonly IDateTimeExtractor SingleDateExtractor = new JapaneseDateExtractorConfiguration(); - - private static readonly IDateTimeExtractor SingleTimeExtractor = new JapaneseTimeExtractorConfiguration(); - - private readonly IDateTimeExtractor durationExtractor = new JapaneseDurationExtractorConfiguration(); - - private readonly IExtractor integerExtractor = new IntegerExtractor(); - - private readonly IParser numberParser = new BaseCJKNumberParser(new JapaneseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Japanese))); - - private readonly IFullDateTimeParserConfiguration config; - - public JapaneseDateTimeParser(IFullDateTimeParserConfiguration configuration) - { - config = configuration; - } - - public ParseResult Parse(ExtractResult extResult) - { - return this.Parse(extResult, DateObject.Now); - } - - public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) - { - var referenceTime = refDate; - - object value = null; - if (er.Type.Equals(ParserName, StringComparison.Ordinal)) - { - var innerResult = MergeDateAndTime(er.Text, referenceTime); - if (!innerResult.Success) - { - innerResult = ParseBasicRegex(er.Text, referenceTime); - } - - if (!innerResult.Success) - { - innerResult = ParseTimeOfToday(er.Text, referenceTime); - } - - if (!innerResult.Success) - { - innerResult = ParserDurationWithBeforeAndAfter(er.Text, referenceTime); - } - - if (innerResult.Success) - { - innerResult.FutureResolution = new Dictionary - { - { TimeTypeConstants.DATETIME, DateTimeFormatUtil.FormatDateTime((DateObject)innerResult.FutureValue) }, - }; - - innerResult.PastResolution = new Dictionary - { - { TimeTypeConstants.DATETIME, DateTimeFormatUtil.FormatDateTime((DateObject)innerResult.PastValue) }, - }; - - innerResult.IsLunar = IsLunarCalendar(er.Text); - - value = innerResult; - } - } - - var ret = new DateTimeParseResult - { - Text = er.Text, - Start = er.Start, - Length = er.Length, - Type = er.Type, - Data = er.Data, - Value = value, - TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, - ResolutionStr = string.Empty, - }; - return ret; - } - - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } - - private static DateTimeResolutionResult ParseBasicRegex(string text, DateObject referenceTime) - { - var ret = new DateTimeResolutionResult(); - var trimmedText = text.Trim(); - - // handle "现在" - var match = JapaneseDateTimeExtractorConfiguration.NowRegex.MatchExact(trimmedText, trim: true); - - if (match.Success) - { - if (trimmedText.EndsWith("现在")) - { - ret.Timex = "PRESENT_REF"; - } - else if (trimmedText.Equals("刚刚才") || trimmedText.Equals("刚刚") || trimmedText.Equals("刚才")) - { - ret.Timex = "PAST_REF"; - } - else if (trimmedText.Equals("立刻") || trimmedText.Equals("马上")) - { - ret.Timex = "FUTURE_REF"; - } - - ret.FutureValue = ret.PastValue = referenceTime; - ret.Success = true; - return ret; - } - - return ret; - } - - // parse if lunar contains - private bool IsLunarCalendar(string text) - { - var trimmedText = text.Trim(); - var match = JapaneseDateExtractorConfiguration.LunarRegex.Match(trimmedText); - if (match.Success) - { - return true; - } - - return JapaneseHolidayExtractorConfiguration.LunarHolidayRegex.IsMatch(trimmedText); - } - - // merge a Date entity and a Time entity - private DateTimeResolutionResult MergeDateAndTime(string text, DateObject referenceTime) - { - var ret = new DateTimeResolutionResult(); - - var er1 = SingleDateExtractor.Extract(text, referenceTime); - if (er1.Count == 0) - { - return ret; - } - - var er2 = SingleTimeExtractor.Extract(text, referenceTime); - if (er2.Count == 0) - { - return ret; - } - - // TODO: Add reference time - var pr1 = this.config.DateParser.Parse(er1[0], referenceTime.Date); - var pr2 = this.config.TimeParser.Parse(er2[0], referenceTime); - - if (pr1.Value == null || pr2.Value == null) - { - return ret; - } - - var futureDate = (DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue; - var pastDate = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue; - var time = (DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue; - - var hour = time.Hour; - var min = time.Minute; - var sec = time.Second; - - // handle morning, afternoon - if (SimplePmRegex.IsMatch(text) && hour < Constants.HalfDayHourCount) - { - hour += Constants.HalfDayHourCount; - } - else if (SimpleAmRegex.IsMatch(text) && hour >= Constants.HalfDayHourCount) - { - hour -= Constants.HalfDayHourCount; - } - - var timeStr = pr2.TimexStr; - if (timeStr.EndsWith(Constants.Comment_AmPm, StringComparison.Ordinal)) - { - timeStr = timeStr.Substring(0, timeStr.Length - 4); - } - - timeStr = "T" + hour.ToString("D2") + timeStr.Substring(3); - ret.Timex = pr1.TimexStr + timeStr; - - var val = (DateTimeResolutionResult)pr2.Value; - - if (hour <= Constants.HalfDayHourCount && !SimplePmRegex.IsMatch(text) && !SimpleAmRegex.IsMatch(text) && - !string.IsNullOrEmpty(val.Comment)) - { - // ret.Timex += "ampm"; - ret.Comment = Constants.Comment_AmPm; - } - - ret.FutureValue = DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, hour, min, sec); - ret.PastValue = DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, hour, min, sec); - ret.Success = true; - - return ret; - } - - private DateTimeResolutionResult ParseTimeOfToday(string text, DateObject referenceTime) - { - var ret = new DateTimeResolutionResult(); - var ers = SingleTimeExtractor.Extract(text, referenceTime); - if (ers.Count != 1) - { - return ret; - } - - // TODO: Add reference time - var pr = this.config.TimeParser.Parse(ers[0], referenceTime); - if (pr.Value == null) - { - return ret; - } - - var time = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; - - var hour = time.Hour; - var min = time.Minute; - var sec = time.Second; - - var match = JapaneseDateTimeExtractorConfiguration.TimeOfTodayRegex.Match(text); - - if (match.Success) - { - var matchStr = match.Value; - var swift = 0; - switch (matchStr) - { - case "今晚": - if (hour < Constants.HalfDayHourCount) - { - hour += Constants.HalfDayHourCount; - } - - break; - case "今早": - case "今晨": - if (hour >= Constants.HalfDayHourCount) - { - hour -= Constants.HalfDayHourCount; - } - - break; - case "明晚": - swift = 1; - if (hour < Constants.HalfDayHourCount) - { - hour += Constants.HalfDayHourCount; - } - - break; - case "明早": - case "明晨": - swift = 1; - if (hour >= Constants.HalfDayHourCount) - { - hour -= Constants.HalfDayHourCount; - } - - break; - case "昨晚": - swift = -1; - if (hour < Constants.HalfDayHourCount) - { - hour += Constants.HalfDayHourCount; - } - - break; - default: - break; - } - - var date = referenceTime.AddDays(swift).Date; - - // in this situation, luisStr cannot end up with "ampm", because we always have a "morning" or "night" - var timeStr = pr.TimexStr; - if (timeStr.EndsWith(Constants.Comment_AmPm, StringComparison.Ordinal)) - { - timeStr = timeStr.Substring(0, timeStr.Length - 4); - } - - timeStr = "T" + hour.ToString("D2") + timeStr.Substring(3); - - ret.Timex = DateTimeFormatUtil.FormatDate(date) + timeStr; - ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(date.Year, date.Month, date.Day, hour, min, sec); - ret.Success = true; - return ret; - } - - return ret; - } - - // handle cases like "5分钟前", "1小时以后" - private DateTimeResolutionResult ParserDurationWithBeforeAndAfter(string text, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - var durationRes = durationExtractor.Extract(text, referenceDate); - var unitStr = string.Empty; - - if (durationRes.Count > 0) - { - var match = JapaneseDateTimeExtractorConfiguration.DateTimePeriodUnitRegex.Match(text); - if (match.Success) - { - var suffix = text.Substring((int)durationRes[0].Start + (int)durationRes[0].Length).Trim(); - var srcUnit = match.Groups["unit"].Value; - var numberStr = text.Substring((int)durationRes[0].Start, match.Index - (int)durationRes[0].Start).Trim(); - var number = ConvertJapaneseToNum(numberStr); - - if (this.config.UnitMap.ContainsKey(srcUnit)) - { - unitStr = this.config.UnitMap[srcUnit]; - - var beforeMatch = JapaneseDateTimeExtractorConfiguration.BeforeRegex.Match(suffix); - if (beforeMatch.Success && suffix.StartsWith(beforeMatch.Value)) - { - DateObject date; - switch (unitStr) - { - case Constants.TimexHour: - date = referenceDate.AddHours(-number); - break; - case Constants.TimexMinute: - date = referenceDate.AddMinutes(-number); - break; - case Constants.TimexSecond: - date = referenceDate.AddSeconds(-number); - break; - default: - return ret; - } - - ret.Timex = $"{DateTimeFormatUtil.LuisDate(date)}"; - ret.FutureValue = ret.PastValue = date; - ret.Success = true; - return ret; - } - - var afterMatch = JapaneseDateTimeExtractorConfiguration.AfterRegex.Match(suffix); - if (afterMatch.Success && suffix.StartsWith(afterMatch.Value)) - { - DateObject date; - switch (unitStr) - { - case Constants.TimexHour: - date = referenceDate.AddHours(number); - break; - case Constants.TimexMinute: - date = referenceDate.AddMinutes(number); - break; - case Constants.TimexSecond: - date = referenceDate.AddSeconds(number); - break; - default: - return ret; - } - - ret.Timex = $"{DateTimeFormatUtil.LuisDate(date)}"; - ret.FutureValue = ret.PastValue = date; - ret.Success = true; - return ret; - } - } - } - } - - return ret; - } - - // convert Japanese Number to Integer - private int ConvertJapaneseToNum(string numStr) - { - var num = -1; - var er = integerExtractor.Extract(numStr); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) - { - num = Convert.ToInt32((double)(numberParser.Parse(er[0]).Value ?? 0)); - } - } - - return num; - } - } -} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateTimeParserConfiguration.cs index e65c7bb623..ebe817b435 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateTimeParserConfiguration.cs @@ -1,209 +1,210 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseDateTimeParserConfiguration : BaseDateTimeOptionsConfiguration, IFullDateTimeParserConfiguration + public class JapaneseDateTimeParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKDateTimeParserConfiguration { - public JapaneseDateTimeParserConfiguration(IDateTimeOptionsConfiguration config) - : base(config) - { - DateExtractor = new JapaneseDateExtractorConfiguration(); - - DateParser = new JapaneseDateParserConfiguration(this); - TimeParser = new JapaneseTimeParserConfiguration(this); - DateTimeParser = new JapaneseDateTimeParser(this); - DatePeriodParser = new JapaneseDatePeriodParserConfiguration(this); - TimePeriodParser = new JapaneseTimePeriodParserConfiguration(this); - DateTimePeriodParser = new JapaneseDateTimePeriodParserConfiguration(this); - DurationParser = new JapaneseDurationParserConfiguration(this); - GetParser = new JapaneseSetParserConfiguration(this); - HolidayParser = new JapaneseHolidayParserConfiguration(this); - - UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToImmutableDictionary(); - UnitValueMap = DateTimeDefinitions.ParserConfigurationUnitValueMap.ToImmutableDictionary(); - SeasonMap = DateTimeDefinitions.ParserConfigurationSeasonMap.ToImmutableDictionary(); - SeasonValueMap = DateTimeDefinitions.ParserConfigurationSeasonValueMap.ToImmutableDictionary(); - CardinalMap = DateTimeDefinitions.ParserConfigurationCardinalMap.ToImmutableDictionary(); - DayOfMonth = DateTimeDefinitions.ParserConfigurationDayOfMonth.ToImmutableDictionary(); - DayOfWeek = DateTimeDefinitions.ParserConfigurationDayOfWeek.ToImmutableDictionary(); - MonthOfYear = DateTimeDefinitions.ParserConfigurationMonthOfYear.ToImmutableDictionary(); - - Numbers = InitNumbers(); - - DateRegexList = JapaneseDateExtractorConfiguration.DateRegexList; - NextRegex = JapaneseDateExtractorConfiguration.NextRegex; - ThisRegex = JapaneseDateExtractorConfiguration.ThisRegex; - LastRegex = JapaneseDateExtractorConfiguration.LastRegex; - YearRegex = JapaneseDateExtractorConfiguration.YearRegex; - RelativeRegex = JapaneseDateExtractorConfiguration.RelativeRegex; - StrictWeekDayRegex = JapaneseDateExtractorConfiguration.WeekDayRegex; - WeekDayOfMonthRegex = JapaneseDateExtractorConfiguration.WeekDayOfMonthRegex; - BeforeRegex = JapaneseMergedExtractorConfiguration.BeforeRegex; - AfterRegex = JapaneseMergedExtractorConfiguration.AfterRegex; - UntilRegex = JapaneseMergedExtractorConfiguration.UntilRegex; - SincePrefixRegex = JapaneseMergedExtractorConfiguration.SincePrefixRegex; - SinceSuffixRegex = JapaneseMergedExtractorConfiguration.SinceSuffixRegex; - EqualRegex = JapaneseMergedExtractorConfiguration.EqualRegex; - } - - public int TwoNumYear => int.Parse(DateTimeDefinitions.TwoNumYear); - - public string LastWeekDayToken => DateTimeDefinitions.ParserConfigurationLastWeekDayToken; - - public string NextMonthToken => DateTimeDefinitions.ParserConfigurationNextMonthToken; - - public string LastMonthToken => DateTimeDefinitions.ParserConfigurationLastMonthToken; - - public string DatePrefix => DateTimeDefinitions.ParserConfigurationDatePrefix; - - public IDateExtractor DateExtractor { get; } - - public IDateTimeParser DateParser { get; } - - public IDateTimeParser TimeParser { get; } + public static readonly Regex LunarRegex = new Regex(DateTimeDefinitions.LunarRegex, RegexFlags, RegexTimeOut); - public IDateTimeParser DateTimeParser { get; } + public static readonly Regex LunarHolidayRegex = new Regex(DateTimeDefinitions.LunarHolidayRegex, RegexFlags, RegexTimeOut); - public IDateTimeParser DatePeriodParser { get; } + public static readonly Regex SimpleAmRegex = new Regex(DateTimeDefinitions.DateTimeSimpleAmRegex, RegexFlags, RegexTimeOut); - public IDateTimeParser TimePeriodParser { get; } + public static readonly Regex SimplePmRegex = new Regex(DateTimeDefinitions.DateTimeSimplePmRegex, RegexFlags, RegexTimeOut); - public IDateTimeParser DateTimePeriodParser { get; } + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public IDateTimeParser DurationParser { get; } - - public IDateTimeParser GetParser { get; } - - public IDateTimeParser HolidayParser { get; } - - public ImmutableDictionary UnitMap { get; } + private static readonly Regex NowTimeRegex = new Regex(DateTimeDefinitions.NowTimeRegex, RegexFlags, RegexTimeOut); - public ImmutableDictionary UnitValueMap { get; } + private static readonly Regex RecentlyTimeRegex = new Regex(DateTimeDefinitions.RecentlyTimeRegex, RegexFlags, RegexTimeOut); - public ImmutableDictionary SeasonMap { get; } + private static readonly Regex AsapTimeRegex = new Regex(DateTimeDefinitions.AsapTimeRegex, RegexFlags, RegexTimeOut); - public ImmutableDictionary SeasonValueMap { get; } + public JapaneseDateTimeParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + IntegerExtractor = config.IntegerExtractor; + DateExtractor = config.DateExtractor; + TimeExtractor = config.TimeExtractor; + DurationExtractor = config.DurationExtractor; - public ImmutableDictionary CardinalMap { get; } + DateParser = config.DateParser; + DurationParser = config.DurationParser; + TimeParser = config.TimeParser; + NumberParser = config.NumberParser; - public ImmutableDictionary DayOfMonth { get; } + UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToImmutableDictionary(); + NowRegex = JapaneseDateTimeExtractorConfiguration.NowRegex; + TimeOfSpecialDayRegex = JapaneseDateTimeExtractorConfiguration.TimeOfSpecialDayRegex; + DateTimePeriodUnitRegex = JapaneseDateTimeExtractorConfiguration.DateTimePeriodUnitRegex; + BeforeRegex = JapaneseDateTimeExtractorConfiguration.BeforeRegex; + AfterRegex = JapaneseDateTimeExtractorConfiguration.AfterRegex; + DurationRelativeDurationUnitRegex = JapaneseDateTimeExtractorConfiguration.DurationRelativeDurationUnitRegex; + AgoLaterRegex = JapaneseDateTimeExtractorConfiguration.AgoLaterRegex; + } - public ImmutableDictionary DayOfWeek { get; } + public IDateTimeExtractor DateExtractor { get; } - public ImmutableDictionary MonthOfYear { get; } + public IDateTimeExtractor TimeExtractor { get; } - public ImmutableDictionary Numbers { get; } + public IDateTimeExtractor DurationExtractor { get; } - public IEnumerable DateRegexList { get; } + public IDateTimeParser DateParser { get; } - public Regex NextRegex { get; } + public IDateTimeParser DurationParser { get; } - public Regex ThisRegex { get; } + public IDateTimeParser TimeParser { get; } - public Regex LastRegex { get; } + public IExtractor IntegerExtractor { get; } - public Regex YearRegex { get; } + public IParser NumberParser { get; } - public Regex DatePeriodYearRegex { get; } + public ImmutableDictionary UnitMap { get; } - public Regex RelativeRegex { get; } + public Regex NowRegex { get; } - public Regex StrictWeekDayRegex { get; } + public Regex TimeOfSpecialDayRegex { get; } - public Regex WeekDayOfMonthRegex { get; } + public Regex DateTimePeriodUnitRegex { get; } public Regex BeforeRegex { get; } public Regex AfterRegex { get; } - public Regex UntilRegex { get; } + public Regex DurationRelativeDurationUnitRegex { get; } - public Regex SincePrefixRegex { get; } + public Regex AgoLaterRegex { get; } - public Regex SinceSuffixRegex { get; } + Regex ICJKDateTimeParserConfiguration.LunarRegex => LunarRegex; - public Regex EqualRegex { get; } + Regex ICJKDateTimeParserConfiguration.LunarHolidayRegex => LunarHolidayRegex; - public static int GetSwiftDay(string text) + Regex ICJKDateTimeParserConfiguration.SimpleAmRegex => SimpleAmRegex; + + Regex ICJKDateTimeParserConfiguration.SimplePmRegex => SimplePmRegex; + + public bool GetMatchedNowTimex(string text, out string timex) { - // Today: 今天, 今日, 最近, きょう, この日 - var value = 0; + var trimmedText = text.Trim(); - if (text.StartsWith("来") || text.Equals("あす") || text.Equals("あした") || text.Equals("明日")) + if (NowTimeRegex.MatchEnd(trimmedText, trim: true).Success) { - value = 1; - } - else if (text.StartsWith("昨") || text.Equals("きのう") || text.Equals("前日")) - { - value = -1; + timex = "PRESENT_REF"; } - else if (text.Equals("大后天") || text.Equals("大後天")) + else if (RecentlyTimeRegex.IsExactMatch(trimmedText, trim: true)) { - value = 3; + timex = "PAST_REF"; } - else if (text.Equals("大前天")) + else if (AsapTimeRegex.IsExactMatch(trimmedText, trim: true)) { - value = -3; + timex = "FUTURE_REF"; } - else if (text.Equals("后天") || text.Equals("後天") || text.Equals("明後日") || text.Equals("あさって")) + else { - value = 2; - } - else if (text.Equals("前天") || text.Equals("一昨日") || text.Equals("二日前") || text.Equals("おととい")) - { - value = -2; + timex = null; + return false; } - return value; + return true; } - public static int GetSwiftMonth(string text) + public int GetSwiftDay(string text) { - // Current month: 今月 var value = 0; - if (text.Equals("来月")) + // @TODO move hardcoded values to resources file + if (text.Equals("今天", StringComparison.Ordinal) || + text.Equals("今日", StringComparison.Ordinal) || + text.Equals("最近", StringComparison.Ordinal)) + { + value = 0; + } + else if (text.StartsWith("明", StringComparison.Ordinal)) { value = 1; } - else if (text.Equals("前月") || text.Equals("先月") || text.Equals("昨月") || text.Equals("先々月")) + else if (text.StartsWith("昨", StringComparison.Ordinal)) { value = -1; } - else if (text.Equals("再来月")) + else if (text.Equals("大后天", StringComparison.Ordinal) || + text.Equals("大後天", StringComparison.Ordinal)) { - value = 2; + value = 3; } - - return value; - } - - public static int GetSwiftYear(string text) - { - // Current year: 今年 - var value = 0; - - if (text.Equals("来年") || text.Equals("らいねん")) + else if (text.Equals("大前天", StringComparison.Ordinal)) { - value = 1; + value = -3; } - else if (text.Equals("昨年") || text.Equals("前年")) + else if (text.Equals("后天", StringComparison.Ordinal) || + text.Equals("後天", StringComparison.Ordinal)) { - value = -1; + value = 2; + } + else if (text.Equals("前天", StringComparison.Ordinal)) + { + value = -2; } return value; } - private static ImmutableDictionary InitNumbers() + public void AdjustByTimeOfDay(string matchStr, ref int hour, ref int swift) { - return new Dictionary + // @TODO move hardcoded values to resources file + switch (matchStr) { - }.ToImmutableDictionary(); + case "今晚": + if (hour < Constants.HalfDayHourCount) + { + hour += Constants.HalfDayHourCount; + } + + break; + case "今早": + case "今晨": + if (hour >= Constants.HalfDayHourCount) + { + hour -= Constants.HalfDayHourCount; + } + + break; + case "明晚": + swift = 1; + if (hour < Constants.HalfDayHourCount) + { + hour += Constants.HalfDayHourCount; + } + + break; + case "明早": + case "明晨": + swift = 1; + if (hour >= Constants.HalfDayHourCount) + { + hour -= Constants.HalfDayHourCount; + } + + break; + case "昨晚": + swift = -1; + if (hour < Constants.HalfDayHourCount) + { + hour += Constants.HalfDayHourCount; + } + + break; + default: + break; + } } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateTimePeriodParserConfiguration.cs index 871586ad58..f9a9061744 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateTimePeriodParserConfiguration.cs @@ -1,685 +1,202 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Collections.Immutable; +using System.Globalization; using System.Text; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; using Microsoft.Recognizers.Text.DateTime.Utilities; using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Config; using Microsoft.Recognizers.Text.Number.Japanese; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseDateTimePeriodParserConfiguration : IDateTimeParser + public class JapaneseDateTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKDateTimePeriodParserConfiguration { - public static readonly string ParserName = Constants.SYS_DATETIME_DATETIMEPERIOD; - - public static readonly Regex MORegex = new Regex(DateTimeDefinitions.DateTimePeriodMORegex, RegexFlags); - - public static readonly Regex AFRegex = new Regex(DateTimeDefinitions.DateTimePeriodAFRegex, RegexFlags); - - public static readonly Regex EVRegex = new Regex(DateTimeDefinitions.DateTimePeriodEVRegex, RegexFlags); - - public static readonly Regex NIRegex = new Regex(DateTimeDefinitions.DateTimePeriodNIRegex, RegexFlags); - - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - - private static readonly IDateTimeExtractor SingleDateExtractor = new JapaneseDateExtractorConfiguration(); - private static readonly IDateTimeExtractor SingleTimeExtractor = new JapaneseTimeExtractorConfiguration(); + public static readonly Regex MORegex = new Regex(DateTimeDefinitions.DateTimePeriodMORegex, RegexFlags, RegexTimeOut); - private static readonly IDateTimeExtractor TimeWithDateExtractor = new JapaneseDateTimeExtractorConfiguration(); + public static readonly Regex MIRegex = new Regex(DateTimeDefinitions.DateTimePeriodMIRegex, RegexFlags, RegexTimeOut); - private static readonly IDateTimeExtractor TimePeriodExtractor = new JapaneseTimePeriodExtractorConfiguration(); + public static readonly Regex AFRegex = new Regex(DateTimeDefinitions.DateTimePeriodAFRegex, RegexFlags, RegexTimeOut); - private static readonly IExtractor CardinalExtractor = new CardinalExtractor(); + public static readonly Regex EVRegex = new Regex(DateTimeDefinitions.DateTimePeriodEVRegex, RegexFlags, RegexTimeOut); - private static readonly IParser CardinalParser = AgnosticNumberParserFactory.GetParser( - AgnosticNumberParserType.Cardinal, new JapaneseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Japanese))); + public static readonly Regex NIRegex = new Regex(DateTimeDefinitions.DateTimePeriodNIRegex, RegexFlags, RegexTimeOut); - private readonly IFullDateTimeParserConfiguration config; + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public JapaneseDateTimePeriodParserConfiguration(IFullDateTimeParserConfiguration configuration) + public JapaneseDateTimePeriodParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) { - config = configuration; - } - public static string BuildTimex(TimeResult timeResult) - { - var build = new StringBuilder("T"); - if (timeResult.Hour >= 0) - { - build.Append(timeResult.Hour.ToString("D2")); - } - - if (timeResult.Minute >= 0) - { - build.Append(":" + timeResult.Minute.ToString("D2")); - } - - if (timeResult.Second >= 0) - { - build.Append(":" + timeResult.Second.ToString("D2")); - } - - return build.ToString(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = new CardinalExtractor(numConfig, CJKNumberExtractorMode.ExtractAll); + CardinalParser = AgnosticNumberParserFactory.GetParser( + AgnosticNumberParserType.Cardinal, new JapaneseNumberParserConfiguration(numConfig)); + + DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; + DateTimeExtractor = config.DateTimeExtractor; + TimeExtractor = config.TimeExtractor; + TimePeriodExtractor = config.TimePeriodExtractor; + DateParser = config.DateParser; + TimeParser = config.TimeParser; + DateTimeParser = config.DateTimeParser; + TimePeriodParser = config.TimePeriodParser; + DurationParser = config.DurationParser; + + SpecificTimeOfDayRegex = JapaneseDateTimePeriodExtractorConfiguration.SpecificTimeOfDayRegex; + TimeOfDayRegex = JapaneseDateTimePeriodExtractorConfiguration.TimeOfDayRegex; + NextRegex = JapaneseDateTimePeriodExtractorConfiguration.NextRegex; + LastRegex = JapaneseDateTimePeriodExtractorConfiguration.LastRegex; + PastRegex = JapaneseDateTimePeriodExtractorConfiguration.PastRegex; + FutureRegex = JapaneseDateTimePeriodExtractorConfiguration.FutureRegex; + WeekDayRegex = JapaneseDateTimePeriodExtractorConfiguration.WeekDayRegex; + TimePeriodLeftRegex = JapaneseDateTimePeriodExtractorConfiguration.TimePeriodLeftRegex; + UnitRegex = JapaneseDateTimePeriodExtractorConfiguration.UnitRegex; + RestOfDateRegex = JapaneseDateTimePeriodExtractorConfiguration.RestOfDateRegex; + AmPmDescRegex = JapaneseDateTimePeriodExtractorConfiguration.AmPmDescRegex; + UnitMap = config.UnitMap; } - public static TimeResult DateObject2TimeResult(DateObject dateTime) - { - var timeResult = new TimeResult - { - Hour = dateTime.Hour, - Minute = dateTime.Minute, - Second = dateTime.Second, - }; + public IDateTimeExtractor DateExtractor { get; } - return timeResult; - } + public IDateTimeExtractor TimeExtractor { get; } - public ParseResult Parse(ExtractResult extResult) - { - return this.Parse(extResult, DateObject.Now); - } + public IDateTimeExtractor DateTimeExtractor { get; } - public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) - { - var referenceTime = refDate; + public IDateTimeExtractor TimePeriodExtractor { get; } - object value = null; - if (er.Type.Equals(ParserName, StringComparison.Ordinal)) - { - var innerResult = MergeDateAndTimePeriod(er.Text, referenceTime); - if (!innerResult.Success) - { - innerResult = MergeTwoTimePoints(er.Text, referenceTime); - } - - if (!innerResult.Success) - { - innerResult = ParseSpecificNight(er.Text, referenceTime); - } - - if (!innerResult.Success) - { - innerResult = ParseNumberWithUnit(er.Text, referenceTime); - } - - if (innerResult.Success) - { - innerResult.FutureResolution = new Dictionary - { - { - TimeTypeConstants.START_DATETIME, - DateTimeFormatUtil.FormatDateTime(((Tuple)innerResult.FutureValue).Item1) - }, - { - TimeTypeConstants.END_DATETIME, - DateTimeFormatUtil.FormatDateTime(((Tuple)innerResult.FutureValue).Item2) - }, - }; - - innerResult.PastResolution = new Dictionary - { - { - TimeTypeConstants.START_DATETIME, - DateTimeFormatUtil.FormatDateTime(((Tuple)innerResult.PastValue).Item1) - }, - { - TimeTypeConstants.END_DATETIME, - DateTimeFormatUtil.FormatDateTime(((Tuple)innerResult.PastValue).Item2) - }, - }; - - value = innerResult; - } - } + public IExtractor CardinalExtractor { get; } - var ret = new DateTimeParseResult - { - Text = er.Text, - Start = er.Start, - Length = er.Length, - Type = er.Type, - Data = er.Data, - Value = value, - TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, - ResolutionStr = string.Empty, - }; - - return ret; - } + public IDateTimeExtractor DurationExtractor { get; } - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } + public IParser CardinalParser { get; } - private DateTimeResolutionResult MergeDateAndTimePeriod(string text, DateObject referenceTime) - { - var ret = new DateTimeResolutionResult(); + public IDateTimeParser DateParser { get; } - var er1 = SingleDateExtractor.Extract(text, referenceTime); - var er2 = TimePeriodExtractor.Extract(text, referenceTime); - if (er1.Count != 1 || er2.Count != 1) - { - return ret; - } + public IDateTimeParser TimeParser { get; } - var pr1 = this.config.DateParser.Parse(er1[0], referenceTime); - var pr2 = this.config.TimePeriodParser.Parse(er2[0], referenceTime); - var timerange = (Tuple)((DateTimeResolutionResult)pr2.Value).FutureValue; - var beginTime = timerange.Item1; - var endTime = timerange.Item2; - var futureDate = (DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue; - var pastDate = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue; - - ret.FutureValue = - new Tuple( - DateObject.MinValue.SafeCreateFromValue( - futureDate.Year, futureDate.Month, futureDate.Day, beginTime.Hour, beginTime.Minute, beginTime.Second), - DateObject.MinValue.SafeCreateFromValue( - futureDate.Year, futureDate.Month, futureDate.Day, endTime.Hour, endTime.Minute, endTime.Second)); - - ret.PastValue = - new Tuple( - DateObject.MinValue.SafeCreateFromValue( - pastDate.Year, pastDate.Month, pastDate.Day, beginTime.Hour, beginTime.Minute, beginTime.Second), - DateObject.MinValue.SafeCreateFromValue( - pastDate.Year, pastDate.Month, pastDate.Day, endTime.Hour, endTime.Minute, endTime.Second)); - - var splited = pr2.TimexStr.Split('T'); - if (splited.Length != 4) - { - return ret; - } + public IDateTimeParser DateTimeParser { get; } - var dateStr = pr1.TimexStr; + public IDateTimeParser TimePeriodParser { get; } - ret.Timex = splited[0] + dateStr + "T" + splited[1] + dateStr + "T" + splited[2] + "T" + splited[3]; + public IDateTimeParser DurationParser { get; } - ret.Success = true; - return ret; - } + public Regex SpecificTimeOfDayRegex { get; } - private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject referenceTime) - { - var ret = new DateTimeResolutionResult(); - DateTimeParseResult pr1 = null, pr2 = null; - bool bothHaveDates = false, beginHasDate = false, endHasDate = false; + public Regex TimeOfDayRegex { get; } - var er1 = SingleTimeExtractor.Extract(text, referenceTime); - var er2 = TimeWithDateExtractor.Extract(text, referenceTime); + public Regex NextRegex { get; } - var rightTime = DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day); - var leftTime = DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day); + public Regex LastRegex { get; } - if (er2.Count == 2) - { - pr1 = this.config.DateTimeParser.Parse(er2[0], referenceTime); - pr2 = this.config.DateTimeParser.Parse(er2[1], referenceTime); - bothHaveDates = true; - } - else if (er2.Count == 1 && er1.Count == 2) - { - if (!er2[0].IsOverlap(er1[0])) - { - pr1 = this.config.TimeParser.Parse(er1[0], referenceTime); - pr2 = this.config.DateTimeParser.Parse(er2[0], referenceTime); - endHasDate = true; - } - else - { - pr1 = this.config.DateTimeParser.Parse(er2[0], referenceTime); - pr2 = this.config.TimeParser.Parse(er1[1], referenceTime); - beginHasDate = true; - } - } - else if (er2.Count == 1 && er1.Count == 1) - { - if (er1[0].Start < er2[0].Start) - { - pr1 = this.config.TimeParser.Parse(er1[0], referenceTime); - pr2 = this.config.DateTimeParser.Parse(er2[0], referenceTime); - endHasDate = true; - } - else - { - pr1 = this.config.DateTimeParser.Parse(er2[0], referenceTime); - pr2 = this.config.TimeParser.Parse(er1[0], referenceTime); - beginHasDate = true; - } - } - else if (er1.Count == 2) - { - // if both ends are Time. then this is a TimePeriod, not a DateTimePeriod - return ret; - } - else - { - return ret; - } + public Regex PastRegex { get; } - if (pr1.Value == null || pr2.Value == null) - { - return ret; - } + public Regex FutureRegex { get; } - DateObject futureBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue, - futureEnd = (DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue; + public Regex WeekDayRegex { get; } - DateObject pastBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue, - pastEnd = (DateObject)((DateTimeResolutionResult)pr2.Value).PastValue; + public Regex TimePeriodLeftRegex { get; } - if (futureBegin > futureEnd) - { - futureBegin = pastBegin; - } + public Regex UnitRegex { get; } - if (pastEnd < pastBegin) - { - pastEnd = futureEnd; - } + public Regex RestOfDateRegex { get; } - if (bothHaveDates) - { - rightTime = DateObject.MinValue.SafeCreateFromValue(futureEnd.Year, futureEnd.Month, futureEnd.Day); - leftTime = DateObject.MinValue.SafeCreateFromValue(futureBegin.Year, futureBegin.Month, futureBegin.Day); - } - else if (beginHasDate) - { - // TODO: Handle "明天下午两点到五点" - futureEnd = DateObject.MinValue.SafeCreateFromValue( - futureBegin.Year, futureBegin.Month, futureBegin.Day, futureEnd.Hour, futureEnd.Minute, futureEnd.Second); - pastEnd = DateObject.MinValue.SafeCreateFromValue( - pastBegin.Year, pastBegin.Month, pastBegin.Day, pastEnd.Hour, pastEnd.Minute, pastEnd.Second); - - leftTime = DateObject.MinValue.SafeCreateFromValue(futureBegin.Year, futureBegin.Month, futureBegin.Day); - } - else if (endHasDate) - { - // TODO: Handle "明天下午两点到五点" - futureBegin = DateObject.MinValue.SafeCreateFromValue( - futureEnd.Year, futureEnd.Month, futureEnd.Day, futureBegin.Hour, futureBegin.Minute, futureBegin.Second); - pastBegin = DateObject.MinValue.SafeCreateFromValue( - pastEnd.Year, pastEnd.Month, pastEnd.Day, pastBegin.Hour, pastBegin.Minute, pastBegin.Second); - - rightTime = DateObject.MinValue.SafeCreateFromValue(futureEnd.Year, futureEnd.Month, futureEnd.Day); - } - - var leftResult = (DateTimeResolutionResult)pr1.Value; - var rightResult = (DateTimeResolutionResult)pr2.Value; - var leftResultTime = (DateObject)leftResult.FutureValue; - var rightResultTime = (DateObject)rightResult.FutureValue; - - int day = referenceTime.Day, - month = referenceTime.Month, - year = referenceTime.Year; - - // check if the right time is smaller than the left time, if yes, add one day - int hour = leftResultTime.Hour > 0 ? leftResultTime.Hour : 0, - min = leftResultTime.Minute > 0 ? leftResultTime.Minute : 0, - second = leftResultTime.Second > 0 ? leftResultTime.Second : 0; - - leftTime = leftTime.AddHours(hour); - leftTime = leftTime.AddMinutes(min); - leftTime = leftTime.AddSeconds(second); - DateObject.MinValue.SafeCreateFromValue(year, month, day, hour, min, second); - - hour = rightResultTime.Hour > 0 ? rightResultTime.Hour : 0; - min = rightResultTime.Minute > 0 ? rightResultTime.Minute : 0; - second = rightResultTime.Second > 0 ? rightResultTime.Second : 0; - - rightTime = rightTime.AddHours(hour); - rightTime = rightTime.AddMinutes(min); - rightTime = rightTime.AddSeconds(second); - - // the right side time contains "ampm", while the left side doesn't - if (rightResult.Comment != null && rightResult.Comment.Equals(Constants.Comment_AmPm, StringComparison.Ordinal) && - leftResult.Comment == null && rightTime < leftTime) - { - rightTime = rightTime.AddHours(Constants.HalfDayHourCount); - } - - if (rightTime < leftTime) - { - rightTime = rightTime.AddDays(1); - } - - ret.FutureValue = ret.PastValue = new Tuple(leftTime, rightTime); - - var leftTimex = string.Empty; - var rightTimex = string.Empty; - - // "X" is timex token for not determined time - if (!pr1.TimexStr.Contains("X") && !pr2.TimexStr.Contains("X")) - { - leftTimex = DateTimeFormatUtil.LuisDateTime(leftTime); - rightTimex = DateTimeFormatUtil.LuisDateTime(rightTime); - } - else - { - leftTimex = pr1.TimexStr; - rightTimex = pr2.TimexStr; - } + public Regex AmPmDescRegex { get; } - ret.Timex = $"({leftTimex},{rightTimex},PT{Convert.ToInt32((rightTime - leftTime).TotalHours)}H)"; + public IImmutableDictionary UnitMap { get; } - ret.Success = true; - return ret; - } - - // parse "this night" - private DateTimeResolutionResult ParseSpecificNight(string text, DateObject referenceTime) + public bool GetMatchedTimeRangeAndSwift(string text, out string todSymbol, out int beginHour, out int endHour, out int endMinute, out int swift) { - var ret = new DateTimeResolutionResult(); var trimmedText = text.Trim(); - int beginHour, endHour, endMin = 0; - string timeStr; - - // handle 昨晚,今晨 - var exactMatch = JapaneseDateTimePeriodExtractorConfiguration.SpecificTimeOfDayRegex.MatchExact(trimmedText, trim: true); - if (exactMatch.Success) - { - var swift = 0; - switch (trimmedText) - { - case "今晚": - swift = 0; - timeStr = "TEV"; - beginHour = 16; - endHour = 20; - break; - case "今早": - case "今晨": - swift = 0; - timeStr = "TMO"; - beginHour = 8; - endHour = Constants.HalfDayHourCount; - break; - case "明晚": - swift = 1; - timeStr = "TEV"; - beginHour = 16; - endHour = 20; - break; - case "明早": - case "明晨": - swift = 1; - timeStr = "TMO"; - beginHour = 8; - endHour = Constants.HalfDayHourCount; - break; - case "昨晚": - swift = -1; - timeStr = "TEV"; - beginHour = 16; - endHour = 20; - break; - default: - return ret; - } - - var date = referenceTime.AddDays(swift).Date; - int day = date.Day, month = date.Month, year = date.Year; - - ret.Timex = DateTimeFormatUtil.FormatDate(date) + timeStr; - ret.FutureValue = - ret.PastValue = - new Tuple( - DateObject.MinValue.SafeCreateFromValue(year, month, day, beginHour, 0, 0), - DateObject.MinValue.SafeCreateFromValue(year, month, day, endHour, endMin, endMin)); - ret.Success = true; - return ret; + // @TODO move hardcoded values to resources file + beginHour = 0; + endHour = 0; + endMinute = 0; + swift = 0; + + var tod = string.Empty; + + switch (trimmedText) + { + case "今晚": + swift = 0; + tod = Constants.Evening; + break; + case "今早": + case "今晨": + swift = 0; + tod = Constants.Morning; + break; + case "明晚": + swift = 1; + tod = Constants.Evening; + break; + case "明早": + case "明晨": + swift = 1; + tod = Constants.Morning; + break; + case "昨晚": + swift = -1; + tod = Constants.Evening; + break; } - // handle morning, afternoon.. if (MORegex.IsMatch(trimmedText)) { - timeStr = "TMO"; - beginHour = 8; - endHour = Constants.HalfDayHourCount; + tod = Constants.Morning; + } + else if (MIRegex.IsMatch(trimmedText)) + { + tod = Constants.MidDay; } else if (AFRegex.IsMatch(trimmedText)) { - timeStr = "TAF"; - beginHour = Constants.HalfDayHourCount; - endHour = 16; + tod = Constants.Afternoon; } else if (EVRegex.IsMatch(trimmedText)) { - timeStr = "TEV"; - beginHour = 16; - endHour = 20; + tod = Constants.Evening; } else if (NIRegex.IsMatch(trimmedText)) { - timeStr = "TNI"; - beginHour = 20; - endHour = 23; - endMin = 59; - } - else - { - return ret; + tod = Constants.Night; } - - exactMatch = JapaneseDateTimePeriodExtractorConfiguration.SpecificTimeOfDayRegex.MatchExact(trimmedText, trim: true); - - if (exactMatch.Success) + else if (string.IsNullOrEmpty(tod)) { - var swift = 0; - if (JapaneseDateTimePeriodExtractorConfiguration.NextRegex.IsMatch(trimmedText)) - { - swift = 1; - } - else if (JapaneseDateTimePeriodExtractorConfiguration.LastRegex.IsMatch(trimmedText)) - { - swift = -1; - } - - var date = referenceTime.AddDays(swift).Date; - int day = date.Day, month = date.Month, year = date.Year; - - ret.Timex = DateTimeFormatUtil.FormatDate(date) + timeStr; - ret.FutureValue = - ret.PastValue = - new Tuple( - DateObject.MinValue.SafeCreateFromValue(year, month, day, beginHour, 0, 0), - DateObject.MinValue.SafeCreateFromValue(year, month, day, endHour, endMin, endMin)); - ret.Success = true; - return ret; + todSymbol = null; + return false; } - // handle Date followed by morning, afternoon - var match = JapaneseDateTimePeriodExtractorConfiguration.TimeOfDayRegex.Match(trimmedText); - - if (match.Success) - { - var beforeStr = trimmedText.Substring(0, match.Index).Trim(); - var ers = SingleDateExtractor.Extract(beforeStr, referenceTime); - if (ers.Count == 0 || ers[0].Length != beforeStr.Length) - { - return ret; - } - - var pr = this.config.DateParser.Parse(ers[0], referenceTime); - var futureDate = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; - var pastDate = (DateObject)((DateTimeResolutionResult)pr.Value).PastValue; - - ret.Timex = pr.TimexStr + timeStr; - - ret.FutureValue = - new Tuple( - DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, beginHour, 0, 0), - DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, endHour, endMin, endMin)); - - ret.PastValue = - new Tuple( - DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, beginHour, 0, 0), - DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, endHour, endMin, endMin)); - - ret.Success = true; - - return ret; - } + var parseResult = TimexUtility.ResolveTimeOfDay(tod); + todSymbol = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMinute = parseResult.EndMin; - return ret; + return true; } - // parse "in 20 minutes" - private DateTimeResolutionResult ParseNumberWithUnit(string text, DateObject referenceTime) + public bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMin) { - var ret = new DateTimeResolutionResult(); - string numStr, unitStr; - - // if there are spaces between number and unit - var ers = CardinalExtractor.Extract(text); - if (ers.Count == 1) - { - var pr = CardinalParser.Parse(ers[0]); - var srcUnit = text.Substring(ers[0].Start + ers[0].Length ?? 0).Trim(); - if (srcUnit.StartsWith("个")) - { - srcUnit = srcUnit.Substring(1); - } - - var beforeStr = text.Substring(0, ers[0].Start ?? 0); - if (this.config.UnitMap.ContainsKey(srcUnit)) - { - numStr = pr.ResolutionStr; - unitStr = this.config.UnitMap[srcUnit]; - - if (JapaneseDateTimePeriodExtractorConfiguration.PastRegex.IsExactMatch(beforeStr, trim: true)) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case "H": - beginDate = referenceTime.AddHours(-(double)pr.Value); - endDate = referenceTime; - break; - case "M": - beginDate = referenceTime.AddMinutes(-(double)pr.Value); - endDate = referenceTime; - break; - case "S": - beginDate = referenceTime.AddSeconds(-(double)pr.Value); - endDate = referenceTime; - break; - default: - return ret; - } - - ret.Timex = - $"({DateTimeFormatUtil.LuisDate(beginDate)}T{DateTimeFormatUtil.LuisTime(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)}T{DateTimeFormatUtil.LuisTime(endDate)},PT{numStr}{unitStr[0]})"; - ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Success = true; - return ret; - } - - if (JapaneseDateTimePeriodExtractorConfiguration.FutureRegex.IsExactMatch(beforeStr, trim: true)) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case "H": - beginDate = referenceTime; - endDate = referenceTime.AddHours((double)pr.Value); - break; - case "M": - beginDate = referenceTime; - endDate = referenceTime.AddMinutes((double)pr.Value); - break; - case "S": - beginDate = referenceTime; - endDate = referenceTime.AddSeconds((double)pr.Value); - break; - default: - return ret; - } - - ret.Timex = - $"({DateTimeFormatUtil.LuisDate(beginDate)}T{DateTimeFormatUtil.LuisTime(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)}T{DateTimeFormatUtil.LuisTime(endDate)},PT{numStr}{unitStr[0]})"; - ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Success = true; - return ret; - } - } - } - - // handle "last hour" - var match = JapaneseDateTimePeriodExtractorConfiguration.UnitRegex.Match(text); - if (match.Success) - { - var srcUnit = match.Groups["unit"].Value; - var beforeStr = text.Substring(0, match.Index); - - if (this.config.UnitMap.ContainsKey(srcUnit)) - { - unitStr = this.config.UnitMap[srcUnit]; - - if (JapaneseDateTimePeriodExtractorConfiguration.PastRegex.IsExactMatch(beforeStr, trim: true)) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case "H": - beginDate = referenceTime.AddHours(-1); - endDate = referenceTime; - break; - case "M": - beginDate = referenceTime.AddMinutes(-1); - endDate = referenceTime; - break; - case "S": - beginDate = referenceTime.AddSeconds(-1); - endDate = referenceTime; - break; - default: - return ret; - } - - ret.Timex = - $"({DateTimeFormatUtil.LuisDate(beginDate)}T{DateTimeFormatUtil.LuisTime(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)}T{DateTimeFormatUtil.LuisTime(endDate)},PT1{unitStr[0]})"; - ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Success = true; - return ret; - } - - if (JapaneseDateTimePeriodExtractorConfiguration.FutureRegex.IsExactMatch(beforeStr, trim: true)) - { - DateObject beginDate, endDate; - switch (unitStr) - { - case "H": - beginDate = referenceTime; - endDate = referenceTime.AddHours(1); - break; - case "M": - beginDate = referenceTime; - endDate = referenceTime.AddMinutes(1); - break; - case "S": - beginDate = referenceTime; - endDate = referenceTime.AddSeconds(1); - break; - default: - return ret; - } - - ret.Timex = - $"({DateTimeFormatUtil.LuisDate(beginDate)}T{DateTimeFormatUtil.LuisTime(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)}T{DateTimeFormatUtil.LuisTime(endDate)},PT1{unitStr[0]})"; - ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Success = true; - return ret; - } - } - } - - return ret; + return GetMatchedTimeRangeAndSwift(text, out todSymbol, out beginHour, out endHour, out endMin, out int swift); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDurationParserConfiguration.cs index 20dec0b0b6..7d63c7713a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDurationParserConfiguration.cs @@ -1,104 +1,61 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; -using Microsoft.Recognizers.Definitions.Japanese; +using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.NumberWithUnit; using Microsoft.Recognizers.Text.NumberWithUnit.Japanese; using static Microsoft.Recognizers.Text.DateTime.Japanese.JapaneseDurationExtractorConfiguration; -using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseDurationParserConfiguration : IDateTimeParser + public class JapaneseDurationParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKDurationParserConfiguration { - public static readonly Dictionary UnitValueMap = DateTimeDefinitions.DurationUnitValueMap; - public static readonly string ParserName = Constants.SYS_DATETIME_DURATION; // "Duration"; + public JapaneseDurationParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + InternalParser = new NumberWithUnitParser(new DurationParserConfiguration()); - private static readonly IParser InternalParser = new NumberWithUnitParser(new DurationParserConfiguration()); + var durationConfig = new BaseDateTimeOptionsConfiguration(config.Culture, DateTimeOptions.None); + DurationExtractor = new BaseCJKDurationExtractor(new JapaneseDurationExtractorConfiguration(durationConfig), false); - private readonly IFullDateTimeParserConfiguration config; + YearRegex = JapaneseDurationExtractorConfiguration.YearRegex; + SomeRegex = JapaneseDurationExtractorConfiguration.SomeRegex; + MoreOrLessRegex = JapaneseDurationExtractorConfiguration.MoreOrLessRegex; + DurationUnitRegex = JapaneseDurationExtractorConfiguration.DurationUnitRegex; + AnUnitRegex = JapaneseDurationExtractorConfiguration.AnUnitRegex; + DurationConnectorRegex = JapaneseDurationExtractorConfiguration.DurationConnectorRegex; - public JapaneseDurationParserConfiguration(IFullDateTimeParserConfiguration configuration) - { - config = configuration; + UnitMap = config.UnitMap; + UnitValueMap = config.UnitValueMap; } - public ParseResult Parse(ExtractResult extResult) - { - return this.Parse(extResult, DateObject.Now); - } + public IDateTimeExtractor DurationExtractor { get; } - public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) - { - var referenceTime = refDate; + public IParser InternalParser { get; } - // handle cases like "三年半" - var hasHalfSuffix = false; - if (er.Text.EndsWith("半")) - { - er.Length -= 1; - er.Text = er.Text.Substring(0, er.Text.Length - 1); - hasHalfSuffix = true; - } + public Regex YearRegex { get; } - var parseResult = InternalParser.Parse(er); - var unitResult = parseResult.Value as UnitValue; + public Regex SomeRegex { get; } - if (unitResult == null) - { - return null; - } + public Regex MoreOrLessRegex { get; } - var dateTimeParseResult = new DateTimeResolutionResult(); - var unitStr = unitResult.Unit; - var numStr = unitResult.Number; + public Regex DurationUnitRegex { get; } - if (hasHalfSuffix) - { - numStr = (double.Parse(numStr) + 0.5).ToString(CultureInfo.InvariantCulture); - } + public Regex AnUnitRegex { get; } - dateTimeParseResult.Timex = "P" + (BaseDurationParser.IsLessThanDay(unitStr) ? "T" : string.Empty) + numStr + unitStr[0]; - dateTimeParseResult.FutureValue = dateTimeParseResult.PastValue = double.Parse(numStr) * UnitValueMap[unitStr]; - dateTimeParseResult.Success = true; + public Regex DurationConnectorRegex { get; } - if (dateTimeParseResult.Success) - { - dateTimeParseResult.FutureResolution = new Dictionary - { - { TimeTypeConstants.DURATION, dateTimeParseResult.FutureValue.ToString() }, - }; - - dateTimeParseResult.PastResolution = new Dictionary - { - { TimeTypeConstants.DURATION, dateTimeParseResult.PastValue.ToString() }, - }; - } + public IImmutableDictionary UnitMap { get; } - var ret = new DateTimeParseResult - { - Text = er.Text, - Start = er.Start, - Length = er.Length, - Type = er.Type, - Data = er.Data, - Value = dateTimeParseResult, - TimexStr = dateTimeParseResult.Timex, - ResolutionStr = string.Empty, - }; - - return ret; - } - - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } + public IImmutableDictionary UnitValueMap { get; } internal class DurationParserConfiguration : JapaneseNumberWithUnitParserConfiguration { public DurationParserConfiguration() - : base(new CultureInfo(Culture.Japanese)) + : base(new CultureInfo(Text.Culture.Japanese)) { this.BindDictionary(DurationExtractorConfiguration.DurationSuffixList); } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseHolidayParserConfiguration.cs index 09e0d8da3c..6cd1dc7e06 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseHolidayParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseHolidayParserConfiguration.cs @@ -1,272 +1,158 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Globalization; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Japanese; -using Microsoft.Recognizers.Text.Utilities; + using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseHolidayParserConfiguration : IDateTimeParser + public class JapaneseHolidayParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKHolidayParserConfiguration { - public static readonly string ParserName = Constants.SYS_DATETIME_DATE; // "Date"; - + // @TODO Move dictionaries and hardcoded terms to resource file public static readonly Dictionary> FixedHolidaysDict = new Dictionary> { { "元旦", NewYear }, + { "旧正月", NewYear }, { "元旦节", NewYear }, + { "お正月", NewYear }, + { "独立記念日", UsaIndependenceDay }, + { "旧暦の正月初一", SpringDay }, { "教师节", TeacherDay }, + { "教師の日", TeacherDay }, { "青年节", YouthDay }, + { "青年の日", YouthDay }, { "儿童节", ChildrenDay }, + { "子供の日", ChildrenDay }, { "妇女节", FemaleDay }, + { "国際婦人デー", FemaleDay }, { "植树节", TreePlantDay }, + { "植樹祭", TreePlantDay }, { "情人节", LoverDay }, + { "バレンタインデー", LoverDay }, { "圣诞节", ChristmasDay }, + { "クリスマスの日", ChristmasDay }, + { "クリスマス", ChristmasDay }, + { "クリスマスイブ", ChristmasEve }, { "新年", NewYear }, + { "復活祭", EasterDay }, { "愚人节", FoolDay }, + { "エイプリルフール", FoolDay }, { "五一", LaborDay }, { "劳动节", LaborDay }, + { "メーデー", LaborDay }, + { "国際的な労働者の日", LaborDay }, { "万圣节", HalloweenDay }, + { "ハロウィン", HalloweenDay }, { "中秋节", MidautumnDay }, { "中秋", MidautumnDay }, + { "中秋節", MidautumnDay }, { "春节", SpringDay }, { "除夕", NewYearEve }, + { "大晦日", NewYearEve }, { "元宵节", LanternDay }, + { "元宵節", LanternDay }, { "清明节", QingMingDay }, + { "清明節", QingMingDay }, { "清明", QingMingDay }, { "端午节", DragonBoatDay }, + { "端午の節句", BoysFestival }, { "端午", DragonBoatDay }, { "国庆节", JapNationalDay }, + { "国慶節", JapNationalDay }, { "建军节", JapMilBuildDay }, + { "建軍節", JapMilBuildDay }, { "女生节", GirlsDay }, + { "ガールズデー", GirlsDay }, { "光棍节", SinglesDay }, { "双十一", SinglesDay }, + { "ダブル十一", SinglesDay }, + { "シングルデー", SinglesDay }, { "重阳节", ChongYangDay }, + { "重陽節", ChongYangDay }, }; public static readonly Dictionary> HolidayFuncDict = new Dictionary > { { "父亲节", GetFathersDayOfYear }, + { "父の日", GetFathersDayOfYear }, { "母亲节", GetMothersDayOfYear }, + { "母の日", GetMothersDayOfYear }, { "感恩节", GetThanksgivingDayOfYear }, + { "感謝祭の日", GetThanksgivingDayOfYear }, + { "感謝祭", GetThanksgivingDayOfYear }, + { "キング牧師記念日", GetMartinLutherKingDayOfYear }, + }; public static readonly Dictionary NoFixedTimex = DateTimeDefinitions.HolidayNoFixedTimex; - private static readonly IExtractor IntegerExtractor = new IntegerExtractor(); - - private static readonly IParser IntegerParser = new BaseCJKNumberParser(new JapaneseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Japanese))); - - private readonly IFullDateTimeParserConfiguration config; - - public JapaneseHolidayParserConfiguration(IFullDateTimeParserConfiguration configuration) + public JapaneseHolidayParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) { - config = configuration; - } + IntegerExtractor = config.IntegerExtractor; + NumberParser = config.NumberParser; - public ParseResult Parse(ExtractResult extResult) - { - return this.Parse(extResult, DateObject.Now); + HolidayRegexList = JapaneseHolidayExtractorConfiguration.HolidayRegexList; + LunarHolidayRegex = JapaneseHolidayExtractorConfiguration.LunarHolidayRegex; } - public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) - { - var referenceDate = refDate; - object value = null; - - if (er.Type.Equals(ParserName, StringComparison.Ordinal)) - { - var innerResult = ParseHolidayRegexMatch(er.Text, referenceDate); - - if (innerResult.Success) - { - innerResult.FutureResolution = new Dictionary - { - { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)innerResult.FutureValue) }, - }; - - innerResult.PastResolution = new Dictionary - { - { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)innerResult.PastValue) }, - }; - - innerResult.IsLunar = IsLunarCalendar(er.Text); - value = innerResult; - } - } - - var ret = new DateTimeParseResult - { - Text = er.Text, - Start = er.Start, - Length = er.Length, - Type = er.Type, - Data = er.Data, - Value = value, - TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, - ResolutionStr = string.Empty, - }; - - return ret; - } + public IExtractor IntegerExtractor { get; } - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } + public IParser NumberParser { get; } - private static DateTimeResolutionResult ParseHolidayRegexMatch(string text, DateObject referenceDate) - { - foreach (var regex in JapaneseHolidayExtractorConfiguration.HolidayRegexList) - { - var match = regex.MatchExact(text, trim: true); - - if (match.Success) - { - // Value string will be set in Match2Date method - var ret = Match2Date(match.Match, referenceDate); - return ret; - } - } + Dictionary> ICJKHolidayParserConfiguration.FixedHolidaysDict => FixedHolidaysDict; - return new DateTimeResolutionResult(); - } + Dictionary> ICJKHolidayParserConfiguration.HolidayFuncDict => HolidayFuncDict; - private static DateTimeResolutionResult Match2Date(Match match, DateObject referenceDate) - { - var ret = new DateTimeResolutionResult(); - var holidayStr = match.Groups["holiday"].Value; - - var year = referenceDate.Year; - var hasYear = false; - var yearNum = match.Groups["year"].Value; - var yearJap = match.Groups["yearJap"].Value; - var yearRel = match.Groups["yearrel"].Value; - if (!string.IsNullOrEmpty(yearNum)) - { - hasYear = true; - if (yearNum.EndsWith("年")) - { - yearNum = yearNum.Substring(0, yearNum.Length - 1); - } + Dictionary ICJKHolidayParserConfiguration.NoFixedTimex => NoFixedTimex; - year = int.Parse(yearNum); - } - else if (!string.IsNullOrEmpty(yearJap)) - { - hasYear = true; - if (yearJap.EndsWith("年")) - { - yearJap = yearJap.Substring(0, yearJap.Length - 1); - } + public IEnumerable HolidayRegexList { get; } - year = ConvertJapaneseToInteger(yearJap); - } - else if (!string.IsNullOrEmpty(yearRel)) - { - hasYear = true; - if (yearRel.EndsWith("前年") || yearRel.EndsWith("先年")) - { - year--; - } - else if (yearRel.EndsWith("来年")) - { - year++; - } - } + public Regex LunarHolidayRegex { get; } - if (year < 100 && year >= 90) - { - year += 1900; - } - else if (year < 20) - { - year += 2000; - } + public int GetSwiftYear(string text) + { + // @TODO move hardcoded values to resource file + var trimmedText = text.Trim(); + var swift = -10; - if (!string.IsNullOrEmpty(holidayStr)) + if (text.EndsWith("前年", StringComparison.Ordinal) || text.EndsWith("先年", StringComparison.Ordinal)) { - DateObject value; - string timexStr; - if (FixedHolidaysDict.ContainsKey(holidayStr)) - { - value = FixedHolidaysDict[holidayStr](year); - timexStr = $"-{value.Month:D2}-{value.Day:D2}"; - } - else - { - if (HolidayFuncDict.ContainsKey(holidayStr)) - { - value = HolidayFuncDict[holidayStr](year); - timexStr = NoFixedTimex[holidayStr]; - } - else - { - return ret; - } - } - - if (hasYear) - { - ret.Timex = year.ToString("D4") + timexStr; - ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(year, value.Month, value.Day); - ret.Success = true; - return ret; - } - - ret.Timex = "XXXX" + timexStr; - ret.FutureValue = GetFutureValue(value, referenceDate, holidayStr); - ret.PastValue = GetPastValue(value, referenceDate, holidayStr); - ret.Success = true; - return ret; + swift = -1; } - - return ret; - } - - private static DateObject GetFutureValue(DateObject value, DateObject referenceDate, string holiday) - { - if (value < referenceDate) + else if (text.EndsWith("来年", StringComparison.Ordinal)) { - if (FixedHolidaysDict.ContainsKey(holiday)) - { - return value.AddYears(1); - } - - if (HolidayFuncDict.ContainsKey(holiday)) - { - value = HolidayFuncDict[holiday](referenceDate.Year + 1); - } + swift = +1; } - return value; + return swift; } - private static DateObject GetPastValue(DateObject value, DateObject referenceDate, string holiday) + public string SanitizeYearToken(string yearStr) { - if (value >= referenceDate) + // @TODO move hardcoded values to resource file + if (yearStr.EndsWith("年", StringComparison.Ordinal)) { - if (FixedHolidaysDict.ContainsKey(holiday)) - { - return value.AddYears(-1); - } - - if (HolidayFuncDict.ContainsKey(holiday)) - { - value = HolidayFuncDict[holiday](referenceDate.Year - 1); - } + yearStr = yearStr.Substring(0, yearStr.Length - 1); } - return value; + return yearStr; } private static DateObject NewYear(int year) => new DateObject(year, 1, 1); + private static DateObject UsaIndependenceDay(int year) => new DateObject(year, 7, 4); + private static DateObject TeacherDay(int year) => new DateObject(year, 9, 10); private static DateObject YouthDay(int year) => new DateObject(year, 5, 4); @@ -281,6 +167,10 @@ private static DateObject GetPastValue(DateObject value, DateObject referenceDat private static DateObject ChristmasDay(int year) => new DateObject(year, 12, 25); + private static DateObject ChristmasEve(int year) => new DateObject(year, 12, 24); + + private static DateObject EasterDay(int year) => HolidayFunctions.CalculateHolidayByEaster(year); + private static DateObject FoolDay(int year) => new DateObject(year, 4, 1); private static DateObject LaborDay(int year) => new DateObject(year, 5, 1); @@ -299,6 +189,8 @@ private static DateObject GetPastValue(DateObject value, DateObject referenceDat private static DateObject DragonBoatDay(int year) => new DateObject(year, 5, 5); + private static DateObject BoysFestival(int year) => new DateObject(year, 5, 5); + private static DateObject JapNationalDay(int year) => new DateObject(year, 10, 1); private static DateObject JapMilBuildDay(int year) => new DateObject(year, 8, 1); @@ -371,53 +263,5 @@ private static DateObject GetThanksgivingDayOfYear(int year) where DateObject.MinValue.SafeCreateFromValue(year, 11, day).DayOfWeek == DayOfWeek.Thursday select day).ElementAt(3)); } - - private static int ConvertJapaneseToInteger(string yearJapStr) - { - var year = 0; - var num = 0; - - var er = IntegerExtractor.Extract(yearJapStr); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) - { - num = Convert.ToInt32((double)(IntegerParser.Parse(er[0]).Value ?? 0)); - } - } - - if (num < 10) - { - num = 0; - foreach (var ch in yearJapStr) - { - num *= 10; - er = IntegerExtractor.Extract(ch.ToString()); - if (er.Count != 0) - { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) - { - num += Convert.ToInt32((double)(IntegerParser.Parse(er[0]).Value ?? 0)); - } - } - } - - year = num; - } - else - { - year = num; - } - - return year == 0 ? -1 : year; - } - - // parse if lunar contains - private bool IsLunarCalendar(string text) - { - var trimmedText = text.Trim(); - var match = JapaneseHolidayExtractorConfiguration.LunarHolidayRegex.Match(trimmedText); - return match.Success; - } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseMergedDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseMergedDateTimeParserConfiguration.cs deleted file mode 100644 index 9f900fb06b..0000000000 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseMergedDateTimeParserConfiguration.cs +++ /dev/null @@ -1,131 +0,0 @@ -using System; -using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions.Japanese; -using DateObject = System.DateTime; - -namespace Microsoft.Recognizers.Text.DateTime.Japanese -{ - public class JapaneseMergedDateTimeParserConfiguration : BaseMergedDateTimeParser - { - - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - - private static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.MergedBeforeRegex, RegexFlags); - - private static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.MergedAfterRegex, RegexFlags); - - // TODO implement SinceRegex - private static readonly Regex SinceRegex = new Regex(DateTimeDefinitions.MergedAfterRegex, RegexFlags); - - public JapaneseMergedDateTimeParserConfiguration(IMergedParserConfiguration configuration) - : base(configuration) - { - } - - public new ParseResult Parse(ExtractResult er) - { - return Parse(er, DateObject.Now); - } - - public new ParseResult Parse(ExtractResult er, DateObject refTime) - { - var referenceTime = refTime; - DateTimeParseResult pr; - - // push, save teh MOD string - bool hasBefore = false, hasAfter = false, hasSince = false; - if (BeforeRegex.IsMatch(er.Text)) - { - hasBefore = true; - } - else if (AfterRegex.IsMatch(er.Text)) - { - hasAfter = true; - } - else if (SinceRegex.IsMatch(er.Text)) - { - hasSince = true; - } - - if (er.Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal)) - { - pr = this.Config.DateParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) - { - pr = this.Config.TimeParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_DATETIME, StringComparison.Ordinal)) - { - pr = this.Config.DateTimeParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal)) - { - pr = this.Config.DatePeriodParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_TIMEPERIOD, StringComparison.Ordinal)) - { - pr = this.Config.TimePeriodParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_DATETIMEPERIOD, StringComparison.Ordinal)) - { - pr = this.Config.DateTimePeriodParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_DURATION, StringComparison.Ordinal)) - { - pr = this.Config.DurationParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_SET, StringComparison.Ordinal)) - { - pr = this.Config.SetParser.Parse(er, referenceTime); - } - else - { - return null; - } - - // pop, restore the MOD string - if (hasBefore) - { - var val = (DateTimeResolutionResult)pr.Value; - if (val != null) - { - val.Mod = Constants.BEFORE_MOD; - } - - pr.Value = val; - } - - if (hasAfter) - { - var val = (DateTimeResolutionResult)pr.Value; - if (val != null) - { - val.Mod = Constants.AFTER_MOD; - } - - pr.Value = val; - } - - if (hasSince) - { - var val = (DateTimeResolutionResult)pr.Value; - if (val != null) - { - val.Mod = Constants.SINCE_MOD; - } - - pr.Value = val; - } - - pr.Value = DateTimeResolution(pr); - - var hasModifier = hasBefore || hasAfter || hasSince; - - // change the type at last for the after or before mode - pr.Type = $"{ParserTypeName}.{DetermineDateTimeType(er.Type, hasModifier)}"; - - return pr; - } - } -} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseMergedParserConfiguration.cs new file mode 100644 index 0000000000..002d4710c7 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseMergedParserConfiguration.cs @@ -0,0 +1,43 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Japanese; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Japanese +{ + public class JapaneseMergedParserConfiguration : JapaneseCommonDateTimeParserConfiguration, ICJKMergedParserConfiguration + { + public JapaneseMergedParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + BeforeRegex = JapaneseMergedExtractorConfiguration.BeforeRegex; + AfterRegex = JapaneseMergedExtractorConfiguration.AfterRegex; + SincePrefixRegex = JapaneseMergedExtractorConfiguration.SincePrefixRegex; + SinceSuffixRegex = JapaneseMergedExtractorConfiguration.SinceSuffixRegex; + AroundPrefixRegex = JapaneseMergedExtractorConfiguration.AroundPrefixRegex; + AroundSuffixRegex = JapaneseMergedExtractorConfiguration.AroundSuffixRegex; + EqualRegex = JapaneseMergedExtractorConfiguration.EqualRegex; + UntilRegex = JapaneseMergedExtractorConfiguration.UntilRegex; + } + + public Regex BeforeRegex { get; } + + public Regex AfterRegex { get; } + + public Regex SincePrefixRegex { get; } + + public Regex SinceSuffixRegex { get; } + + public Regex AroundPrefixRegex { get; } + + public Regex AroundSuffixRegex { get; } + + public Regex UntilRegex { get; } + + public Regex EqualRegex { get; } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseSetParserConfiguration.cs index 9120b97207..9afc451e9f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseSetParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseSetParserConfiguration.cs @@ -1,234 +1,118 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Japanese; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseSetParserConfiguration : IDateTimeParser + public class JapaneseSetParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKSetParserConfiguration { - public static readonly string ParserName = Constants.SYS_DATETIME_SET; + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly IDateTimeExtractor DurationExtractor = new JapaneseDurationExtractorConfiguration(); + private static readonly Regex DayTypeRegex = + new Regex(DateTimeDefinitions.DayTypeRegex, RegexFlags, RegexTimeOut); - private static readonly IDateTimeExtractor TimeExtractor = new JapaneseTimeExtractorConfiguration(); + private static readonly Regex WeekTypeRegex = + new Regex(DateTimeDefinitions.WeekTypeRegex, RegexFlags, RegexTimeOut); - private static readonly IDateTimeExtractor DateExtractor = new JapaneseDateExtractorConfiguration(); + private static readonly Regex BiWeekTypeRegex = + new Regex(DateTimeDefinitions.BiWeekTypeRegex, RegexFlags, RegexTimeOut); - private static readonly IDateTimeExtractor DateTimeExtractor = new JapaneseDateTimeExtractorConfiguration(); + private static readonly Regex MonthTypeRegex = + new Regex(DateTimeDefinitions.MonthTypeRegex, RegexFlags, RegexTimeOut); - private readonly IFullDateTimeParserConfiguration config; + private static readonly Regex YearTypeRegex = + new Regex(DateTimeDefinitions.YearTypeRegex, RegexFlags, RegexTimeOut); - public JapaneseSetParserConfiguration(IFullDateTimeParserConfiguration configuration) + public JapaneseSetParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) { - config = configuration; + DurationExtractor = config.DurationExtractor; + TimeExtractor = config.TimeExtractor; + TimePeriodExtractor = config.TimePeriodExtractor; + DateExtractor = config.DateExtractor; + DateTimeExtractor = config.DateTimeExtractor; + + DurationParser = config.DurationParser; + TimeParser = config.TimeParser; + TimePeriodParser = config.TimePeriodParser; + DateParser = config.DateParser; + DateTimeParser = config.DateTimeParser; + + EachPrefixRegex = JapaneseSetExtractorConfiguration.EachPrefixRegex; + EachUnitRegex = JapaneseSetExtractorConfiguration.EachUnitRegex; + EachDayRegex = JapaneseSetExtractorConfiguration.EachDayRegex; + EachDateUnitRegex = JapaneseSetExtractorConfiguration.EachDateUnitRegex; + UnitMap = config.UnitMap; } - public ParseResult Parse(ExtractResult extResult) - { - return this.Parse(extResult, DateObject.Now); - } + public IDateTimeExtractor DurationExtractor { get; } - public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) - { - var referenceDate = refDate; - object value = null; - if (er.Type.Equals(ParserName, StringComparison.Ordinal)) - { - var innerResult = ParseEachUnit(er.Text); - if (!innerResult.Success) - { - innerResult = ParseEachDuration(er.Text, refDate); - } - - if (!innerResult.Success) - { - innerResult = ParserTimeEveryday(er.Text, refDate); - } - - // NOTE: Please do not change the order of following function - // we must consider datetime before date - if (!innerResult.Success) - { - innerResult = ParseEachDateTime(er.Text, refDate); - } - - if (!innerResult.Success) - { - innerResult = ParseEachDate(er.Text, refDate); - } - - if (innerResult.Success) - { - innerResult.FutureResolution = new Dictionary - { - { TimeTypeConstants.SET, (string)innerResult.FutureValue }, - }; - - innerResult.PastResolution = new Dictionary - { - { TimeTypeConstants.SET, (string)innerResult.PastValue }, - }; - - value = innerResult; - } - } + public IDateTimeExtractor TimeExtractor { get; } - var ret = new DateTimeParseResult - { - Text = er.Text, - Start = er.Start, - Length = er.Length, - Type = er.Type, - Data = er.Data, - Value = value, - TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, - ResolutionStr = string.Empty, - }; - return ret; - } + public IDateTimeExtractor TimePeriodExtractor { get; } - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } + public IDateTimeExtractor DateExtractor { get; } - private static bool IsLessThanDay(string unit) - { - return unit.Equals("S") || unit.Equals("M") || unit.Equals("H"); - } + public IDateTimeExtractor DateTimeExtractor { get; } - private DateTimeResolutionResult ParseEachDuration(string text, DateObject refDate) - { - var ret = new DateTimeResolutionResult(); - var ers = DurationExtractor.Extract(text, refDate); - if (ers.Count != 1 || !string.IsNullOrWhiteSpace(text.Substring(ers[0].Start + ers[0].Length ?? 0))) - { - return ret; - } + public IDateTimeParser DurationParser { get; } - var beforeStr = text.Substring(0, ers[0].Start ?? 0); - if (JapaneseSetExtractorConfiguration.EachPrefixRegex.IsMatch(beforeStr)) - { - var pr = this.config.DurationParser.Parse(ers[0], DateObject.Now); - ret.Timex = pr.TimexStr; - ret.FutureValue = ret.PastValue = "Set: " + pr.TimexStr; - ret.Success = true; - return ret; - } + public IDateTimeParser TimeParser { get; } - return ret; - } + public IDateTimeParser TimePeriodParser { get; } - private DateTimeResolutionResult ParseEachUnit(string text) - { - var ret = new DateTimeResolutionResult(); + public IDateTimeParser DateParser { get; } - // handle "each month" - var match = JapaneseSetExtractorConfiguration.EachUnitRegex.MatchExact(text, trim: true); + public IDateTimeParser DateTimeParser { get; } - if (match.Success) - { - var sourceUnit = match.Groups["unit"].Value; - if (!string.IsNullOrEmpty(sourceUnit) && this.config.UnitMap.ContainsKey(sourceUnit)) - { - if (sourceUnit.Equals("天") || sourceUnit.Equals("日")) - { - ret.Timex = "P1D"; - } - else if (sourceUnit.Equals("周") || sourceUnit.Equals("星期")) - { - ret.Timex = "P1W"; - } - else if (sourceUnit.Equals("月")) - { - ret.Timex = "P1M"; - } - else if (sourceUnit.Equals("年")) - { - ret.Timex = "P1Y"; - } - else - { - return ret; - } - - ret.FutureValue = ret.PastValue = "Set: " + ret.Timex; - ret.Success = true; - return ret; - } - } + public Regex EachPrefixRegex { get; } - return ret; - } + public Regex EachUnitRegex { get; } + + public Regex EachDayRegex { get; } + + public Regex EachDateUnitRegex { get; } - private DateTimeResolutionResult ParserTimeEveryday(string text, DateObject refDate) + public IImmutableDictionary UnitMap { get; } + + public bool GetMatchedUnitTimex(string text, out string timex) { - var ret = new DateTimeResolutionResult(); - var ers = TimeExtractor.Extract(text, refDate); - if (ers.Count != 1) + var trimmedText = text.Trim(); + + if (DayTypeRegex.IsMatch(trimmedText)) { - return ret; + timex = "P1D"; } - - var beforeStr = text.Substring(0, ers[0].Start ?? 0); - var match = JapaneseSetExtractorConfiguration.EachDayRegex.Match(beforeStr); - if (match.Success) + else if (BiWeekTypeRegex.IsMatch(trimmedText)) { - var pr = this.config.TimeParser.Parse(ers[0], DateObject.Now); - ret.Timex = pr.TimexStr; - ret.FutureValue = ret.PastValue = "Set: " + ret.Timex; - ret.Success = true; - return ret; + timex = "P2W"; } - - return ret; - } - - private DateTimeResolutionResult ParseEachDate(string text, DateObject refDate) - { - var ret = new DateTimeResolutionResult(); - var ers = DateExtractor.Extract(text, refDate); - if (ers.Count != 1) + else if (WeekTypeRegex.IsMatch(trimmedText)) { - return ret; + timex = "P1W"; } - - var beforeStr = text.Substring(0, ers[0].Start ?? 0); - var match = JapaneseSetExtractorConfiguration.EachPrefixRegex.Match(beforeStr); - if (match.Success) + else if (MonthTypeRegex.IsMatch(trimmedText)) { - var pr = this.config.DateParser.Parse(ers[0], DateObject.Now); - ret.Timex = pr.TimexStr; - ret.FutureValue = ret.PastValue = "Set: " + ret.Timex; - ret.Success = true; - return ret; + timex = "P1M"; } - - return ret; - } - - private DateTimeResolutionResult ParseEachDateTime(string text, DateObject refDate) - { - var ret = new DateTimeResolutionResult(); - var ers = DateTimeExtractor.Extract(text, refDate); - if (ers.Count != 1) + else if (YearTypeRegex.IsMatch(trimmedText)) { - return ret; + timex = "P1Y"; } - - var beforeStr = text.Substring(0, ers[0].Start ?? 0); - var match = JapaneseSetExtractorConfiguration.EachPrefixRegex.Match(beforeStr); - if (match.Success) + else { - var pr = this.config.DateTimeParser.Parse(ers[0], DateObject.Now); - ret.Timex = pr.TimexStr; - ret.FutureValue = ret.PastValue = "Set: " + ret.Timex; - ret.Success = true; - return ret; + timex = null; + return false; } - return ret; + return true; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseTimeParserConfiguration.cs index 0d1260a181..44a0a6e318 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseTimeParserConfiguration.cs @@ -1,93 +1,43 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using Microsoft.Recognizers.Definitions.Japanese; using Microsoft.Recognizers.Text.DateTime.Utilities; using DateObject = System.DateTime; -using TimeExtractorJpn = Microsoft.Recognizers.Text.DateTime.Japanese.JapaneseTimeExtractorConfiguration; namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseTimeParserConfiguration : IDateTimeParser + public class JapaneseTimeParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKTimeParserConfiguration { - public static readonly IDateTimeExtractor TimeExtractor = new JapaneseTimeExtractorConfiguration(); - - private static TimeFunctions timeFunctions = new TimeFunctions + private static TimeFunctions timeFunc = new TimeFunctions { NumberDictionary = DateTimeDefinitions.TimeNumberDictionary, LowBoundDesc = DateTimeDefinitions.TimeLowBoundDesc, - DayDescRegex = TimeExtractorJpn.DayDescRegex, + DayDescRegex = JapaneseTimeExtractorConfiguration.DayDescRegex, }; private static readonly Dictionary FunctionMap = new Dictionary { - { TimeType.DigitTime, timeFunctions.HandleDigit }, - { TimeType.CjkTime, timeFunctions.HandleKanji }, - { TimeType.LessTime, timeFunctions.HandleLess }, + { TimeType.DigitTime, timeFunc.HandleDigit }, + { TimeType.CjkTime, timeFunc.HandleKanji }, + { TimeType.LessTime, timeFunc.HandleLess }, }; - private readonly IFullDateTimeParserConfiguration config; - - public JapaneseTimeParserConfiguration(IFullDateTimeParserConfiguration configuration) - { - config = configuration; - } - - private delegate TimeResult TimeFunction(DateTimeExtra extra); - - public ParseResult Parse(ExtractResult extResult) + public JapaneseTimeParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) { - return this.Parse(extResult, DateObject.Now); + TimeExtractor = config.TimeExtractor; } - public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) - { - var referenceTime = refDate; - var extra = er.Data as DateTimeExtra; - if (extra == null) - { - var result = TimeExtractor.Extract(er.Text, refDate); - extra = result[0]?.Data as DateTimeExtra; - } - - if (extra != null) - { - var timeResult = FunctionMap[extra.Type](extra); - var parseResult = timeFunctions.PackTimeResult(extra, timeResult, referenceTime); - if (parseResult.Success) - { - parseResult.FutureResolution = new Dictionary - { - { TimeTypeConstants.TIME, DateTimeFormatUtil.FormatTime((DateObject)parseResult.FutureValue) }, - }; - - parseResult.PastResolution = new Dictionary - { - { TimeTypeConstants.TIME, DateTimeFormatUtil.FormatTime((DateObject)parseResult.PastValue) }, - }; - } + // public delegate TimeResult TimeFunction(DateTimeExtra extra); - var ret = new DateTimeParseResult - { - Start = er.Start, - Text = er.Text, - Type = er.Type, - Length = er.Length, - Value = parseResult, - Data = timeResult, - ResolutionStr = string.Empty, - TimexStr = parseResult.Timex, - }; + public IDateTimeExtractor TimeExtractor { get; } - return ret; - } + TimeFunctions ICJKTimeParserConfiguration.TimeFunc => timeFunc; - return null; - } - - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } + Dictionary ICJKTimeParserConfiguration.FunctionMap => FunctionMap; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseTimePeriodParserConfiguration.cs index d11786a31d..ee4abb9d44 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseTimePeriodParserConfiguration.cs @@ -1,5 +1,9 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Linq; using Microsoft.Recognizers.Definitions.Japanese; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -7,7 +11,7 @@ namespace Microsoft.Recognizers.Text.DateTime.Japanese { - public class JapaneseTimePeriodParserConfiguration : IDateTimeParser + public class JapaneseTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKTimePeriodParserConfiguration { private static TimeFunctions timeFunc = new TimeFunctions { @@ -16,124 +20,55 @@ public class JapaneseTimePeriodParserConfiguration : IDateTimeParser DayDescRegex = JapaneseTimeExtractorConfiguration.DayDescRegex, }; - private readonly IFullDateTimeParserConfiguration config; - - public JapaneseTimePeriodParserConfiguration(IFullDateTimeParserConfiguration configuration) - { - config = configuration; - } - - public ParseResult Parse(ExtractResult extResult) + public JapaneseTimePeriodParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) { - return this.Parse(extResult, DateObject.Now); + TimeExtractor = config.TimeExtractor; + TimeParser = config.TimeParser; } - public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) - { - var referenceTime = refDate; - var extra = er.Data as DateTimeExtra; - if (extra == null) - { - var result = new JapaneseTimeExtractorConfiguration().Extract(er.Text, refDate); - extra = result[0]?.Data as DateTimeExtra; - } - - if (extra != null) - { - // Handle special case like '上午', '下午' - var parseResult = ParseJapaneseTimeOfDay(er.Text, referenceTime); - - if (!parseResult.Success) - { - parseResult = TimePeriodFunctions.Handle(this.config.TimeParser, extra, referenceTime, timeFunc); - } - - if (parseResult.Success) - { - parseResult.FutureResolution = new Dictionary - { - { - TimeTypeConstants.START_TIME, - DateTimeFormatUtil.FormatTime(((Tuple)parseResult.FutureValue).Item1) - }, - { - TimeTypeConstants.END_TIME, - DateTimeFormatUtil.FormatTime(((Tuple)parseResult.FutureValue).Item2) - }, - }; - - parseResult.PastResolution = new Dictionary - { - { - TimeTypeConstants.START_TIME, - DateTimeFormatUtil.FormatTime(((Tuple)parseResult.PastValue).Item1) - }, - { - TimeTypeConstants.END_TIME, - DateTimeFormatUtil.FormatTime(((Tuple)parseResult.PastValue).Item2) - }, - }; - } + public IDateTimeExtractor TimeExtractor { get; } - var ret = new DateTimeParseResult - { - Start = er.Start, - Text = er.Text, - Type = er.Type, - Length = er.Length, - Value = parseResult, - ResolutionStr = string.Empty, - TimexStr = parseResult.Timex, - }; + public IDateTimeParser TimeParser { get; } - return ret; - } + TimeFunctions ICJKTimePeriodParserConfiguration.TimeFunc => timeFunc; - return null; - } - - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } - - private static bool GetMatchedTimexRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) + public bool GetMatchedTimexRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) { var trimmedText = text.Trim(); beginHour = 0; endHour = 0; endMin = 0; - if (trimmedText.EndsWith("上午")) + var timeOfDay = string.Empty; + + if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { - timex = "TMO"; - beginHour = 8; - endHour = Constants.HalfDayHourCount; + timeOfDay = Constants.Morning; } - else if (trimmedText.EndsWith("下午")) + else if (DateTimeDefinitions.MidDayTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { - timex = "TAF"; - beginHour = Constants.HalfDayHourCount; - endHour = 16; + timeOfDay = Constants.MidDay; } - else if (trimmedText.EndsWith("晚上")) + else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { - timex = "TEV"; - beginHour = 16; - endHour = 20; + timeOfDay = Constants.Afternoon; } - else if (trimmedText.Equals("白天")) + else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { - timex = "TDT"; - beginHour = 8; - endHour = 18; + timeOfDay = Constants.Evening; } - else if (trimmedText.EndsWith("深夜")) + else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o, StringComparison.Ordinal))) { - timex = "TNI"; - beginHour = 20; - endHour = 23; - endMin = 59; + timeOfDay = Constants.Daytime; + } + else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Night; + } + else if (DateTimeDefinitions.BusinessHourTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.BusinessHour; } else { @@ -141,28 +76,30 @@ private static bool GetMatchedTimexRange(string text, out string timex, out int return false; } - return true; - } - - private DateTimeResolutionResult ParseJapaneseTimeOfDay(string text, DateObject referenceTime) - { - int day = referenceTime.Day, - month = referenceTime.Month, - year = referenceTime.Year; - var ret = new DateTimeResolutionResult(); + var parseResult = TimexUtility.ResolveTimeOfDay(timeOfDay); + timex = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMin = parseResult.EndMin; - if (!GetMatchedTimexRange(text, out string timex, out int beginHour, out int endHour, out int endMinSeg)) + // Modify time period if "early"/"late" is present + if (DateTimeDefinitions.EarlyHourTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { - return new DateTimeResolutionResult(); + endHour = beginHour + Constants.HalfMidDayDurationHourCount; + + // Handling special case: night ends with 23:59. + if (endMin == 59) + { + endMin = 0; + } } - ret.Timex = timex; - ret.FutureValue = ret.PastValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(year, month, day, beginHour, 0, 0), - DateObject.MinValue.SafeCreateFromValue(year, month, day, endHour, endMinSeg, 0)); - ret.Success = true; + if (DateTimeDefinitions.LateHourTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + beginHour = beginHour + Constants.HalfMidDayDurationHourCount; + } - return ret; + return true; } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDateExtractorConfiguration.cs new file mode 100644 index 0000000000..d7fa051949 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDateExtractorConfiguration.cs @@ -0,0 +1,146 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Definitions.Utilities; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Korean; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanDateExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKDateExtractorConfiguration + { + + public static readonly Regex WeekDayRegex = new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LunarRegex = new Regex(DateTimeDefinitions.LunarRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ThisRegex = new Regex(DateTimeDefinitions.DateThisRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DateLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DateNextRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialDayRegex = new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayOfMonthRegex = new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayAndDayRegex = new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DurationRelativeDurationUnitRegex = new Regex(DateTimeDefinitions.DurationRelativeDurationUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialDayWithNumRegex = new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialDate = new Regex(DateTimeDefinitions.SpecialDate, RegexFlags, RegexTimeOut); + + public static readonly Regex DurationFromSpecialDayRegex = new Regex(DateTimeDefinitions.DurationFromSpecialDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayStartEnd = new Regex(DateTimeDefinitions.WeekDayStartEnd, RegexFlags, RegexTimeOut); + + public static readonly Regex DateTimePeriodUnitRegex = new Regex(DateTimeDefinitions.DateTimePeriodUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangeConnectorSymbolRegex = new Regex(DateTimeDefinitions.DatePeriodTillRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthRegex = new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DayRegex = new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DayRegexInCJK = new Regex(DateTimeDefinitions.DateDayRegexInCJK, RegexFlags, RegexTimeOut); + public static readonly Regex DayRegexNumInCJK = new Regex(DateTimeDefinitions.DayRegexNumInCJK, RegexFlags, RegexTimeOut); + public static readonly Regex MonthNumRegex = new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); + public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + public static readonly Regex RelativeRegex = new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); + public static readonly Regex RelativeMonthRegex = new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex ZeroToNineIntegerRegexCJK = new Regex(DateTimeDefinitions.ZeroToNineIntegerRegexCJK, RegexFlags, RegexTimeOut); + public static readonly Regex YearInCJKRegex = new Regex(DateTimeDefinitions.DateYearInCJKRegex, RegexFlags, RegexTimeOut); + public static readonly Regex ThisRe = new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastRe = new Regex(DateTimeDefinitions.LastPrefixRegex, RegexFlags, RegexTimeOut); + public static readonly Regex NextRe = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DynastyYearRegex = new Regex(DateTimeDefinitions.DynastyYearRegex, RegexFlags, RegexTimeOut); + public static readonly string DynastyStartYear = DateTimeDefinitions.DynastyStartYear; + public static readonly ImmutableDictionary DynastyYearMap = DateTimeDefinitions.DynastyYearMap.ToImmutableDictionary(); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public KoreanDateExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + var durationConfig = new BaseDateTimeOptionsConfiguration(config.Culture, DateTimeOptions.None); + + DurationExtractor = new BaseCJKDurationExtractor(new KoreanDurationExtractorConfiguration(durationConfig)); + + ImplicitDateList = new List + { + LunarRegex, SpecialDayRegex, ThisRegex, LastRegex, NextRegex, + WeekDayRegex, WeekDayOfMonthRegex, SpecialDate, DurationFromSpecialDayRegex, + }; + + // (음력)? (2016)? 1 월 3 일 (수)? + var dateRegex1 = new Regex(DateTimeDefinitions.DateRegexList1, RegexFlags, RegexTimeOut); + + // (2015)? (음력)? 10 월 1 일 (수)? + var dateRegex2 = new Regex(DateTimeDefinitions.DateRegexList2, RegexFlags, RegexTimeOut); + + // (2015)? (음력)? 10 월 20 일 (수)? + var dateRegex3 = new Regex(DateTimeDefinitions.DateRegexList3, RegexFlags, RegexTimeOut); + + // 2015-12-23 + var dateRegex8 = new Regex(DateTimeDefinitions.DateRegexList8, RegexFlags, RegexTimeOut); + + var dateRegex9 = new Regex(DateTimeDefinitions.DateRegexList9, RegexFlags, RegexTimeOut); + + // 23/7 + var dateRegex5 = new Regex(DateTimeDefinitions.DateRegexList5, RegexFlags, RegexTimeOut); + + // 7/23 + var dateRegex4 = new Regex(DateTimeDefinitions.DateRegexList4, RegexFlags, RegexTimeOut); + + // 23-3-2017 + var dateRegex7 = new Regex(DateTimeDefinitions.DateRegexList7, RegexFlags, RegexTimeOut); + + // 3-23-2015 + var dateRegex6 = new Regex(DateTimeDefinitions.DateRegexList6, RegexFlags, RegexTimeOut); + + // Regex precedence where the order between D and M varies is controlled by DefaultLanguageFallback + var enableDmy = DateTimeDefinitions.DefaultLanguageFallback == Constants.DefaultLanguageFallback_DMY; + var enableYmd = DateTimeDefinitions.DefaultLanguageFallback == Constants.DefaultLanguageFallback_YMD; + + DateRegexList = new List { dateRegex1, dateRegex2, dateRegex3, dateRegex8, dateRegex9 }; + DateRegexList = DateRegexList.Concat( + enableDmy ? + new[] { dateRegex5, dateRegex4, dateRegex7, dateRegex6 } : + enableYmd ? + new[] { dateRegex4, dateRegex5, dateRegex7, dateRegex6 } : + new[] { dateRegex4, dateRegex5, dateRegex6, dateRegex7 }); + + } + + public IEnumerable DateRegexList { get; } + + public IEnumerable ImplicitDateList { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + Regex ICJKDateExtractorConfiguration.DateTimePeriodUnitRegex => DateTimePeriodUnitRegex; + + Regex ICJKDateExtractorConfiguration.BeforeRegex => BeforeRegex; + + Regex ICJKDateExtractorConfiguration.AfterRegex => AfterRegex; + + Regex ICJKDateExtractorConfiguration.WeekDayStartEnd => WeekDayStartEnd; + + Regex ICJKDateExtractorConfiguration.RangeConnectorSymbolRegex => RangeConnectorSymbolRegex; + + public Dictionary AmbiguityDateFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityDateFiltersDict); + + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDatePeriodExtractorConfiguration.cs new file mode 100644 index 0000000000..3af95168f0 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDatePeriodExtractorConfiguration.cs @@ -0,0 +1,186 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Korean; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKDatePeriodExtractorConfiguration + { + public static readonly Regex TillRegex = new Regex(DateTimeDefinitions.DatePeriodTillRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangePrefixRegex = new Regex(DateTimeDefinitions.DatePeriodRangePrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangeSuffixRegex = new Regex(DateTimeDefinitions.DatePeriodRangeSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex StrictYearRegex = new Regex(DateTimeDefinitions.StrictYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearInCJKRegex = new Regex(DateTimeDefinitions.DatePeriodYearInCJKRegex, RegexFlags, RegexTimeOut); + + // for case "(에서)? (2017)? 12 월 10 일" + public static readonly Regex SimpleCasesRegex = new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearAndMonth = new Regex(DateTimeDefinitions.YearAndMonth, RegexFlags, RegexTimeOut); + + // 2017.12, 2017-12, 2017/12, 12/2017 + public static readonly Regex PureNumYearAndMonth = new Regex(DateTimeDefinitions.PureNumYearAndMonth, RegexFlags, RegexTimeOut); + + public static readonly Regex SimpleYearAndMonth = new Regex(DateTimeDefinitions.SimpleYearAndMonth, RegexFlags, RegexTimeOut); + + public static readonly Regex OneWordPeriodRegex = new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekOfMonthRegex = new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekOfYearRegex = new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekOfDateRegex = new Regex(DateTimeDefinitions.WeekOfDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthOfDateRegex = new Regex(DateTimeDefinitions.MonthOfDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WhichWeekRegex = new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FollowedUnit = new Regex(DateTimeDefinitions.FollowedUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex NumberCombinedWithUnit = new Regex(DateTimeDefinitions.NumberCombinedWithUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex YearToYear = new Regex(DateTimeDefinitions.YearToYear, RegexFlags, RegexTimeOut); + + public static readonly Regex YearToYearSuffixRequired = new Regex(DateTimeDefinitions.YearToYearSuffixRequired, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthToMonth = new Regex(DateTimeDefinitions.MonthToMonth, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthToMonthSuffixRequired = new Regex(DateTimeDefinitions.MonthToMonthSuffixRequired, RegexFlags, RegexTimeOut); + + public static readonly Regex DayToDay = new Regex(DateTimeDefinitions.DayToDay, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthDayRange = new Regex(DateTimeDefinitions.MonthDayRange, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekToWeek = new Regex(DateTimeDefinitions.WeekToWeek, RegexFlags, RegexTimeOut); + + public static readonly Regex DayRegexForPeriod = new Regex(DateTimeDefinitions.DayRegexForPeriod, RegexFlags, RegexTimeOut); + + public static readonly Regex PastRegex = new Regex(DateTimeDefinitions.PastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FutureRegex = new Regex(DateTimeDefinitions.FutureRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekWithWeekDayRangeRegex = new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FirstLastOfYearRegex = new Regex(DateTimeDefinitions.FirstLastOfYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SeasonWithYear = new Regex(DateTimeDefinitions.SeasonWithYear, RegexFlags, RegexTimeOut); + + public static readonly Regex QuarterRegex = new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DecadeRegex = new Regex(DateTimeDefinitions.DecadeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex CenturyRegex = new Regex(DateTimeDefinitions.CenturyRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativePeriodRegex = new Regex(DateTimeDefinitions.RelativePeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialMonthRegex = new Regex(DateTimeDefinitions.SpecialMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialYearRegex = new Regex(DateTimeDefinitions.SpecialYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DayRegex = new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DayRegexInCJK = new Regex(DateTimeDefinitions.DatePeriodDayRegexInCJK, RegexFlags, RegexTimeOut); + public static readonly Regex MonthNumRegex = new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); + public static readonly Regex ThisRegex = new Regex(DateTimeDefinitions.DatePeriodThisRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DateUnitRegex = new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DatePeriodLastRegex, RegexFlags, RegexTimeOut); + public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DatePeriodNextRegex, RegexFlags, RegexTimeOut); + public static readonly Regex RelativeMonthRegex = new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LaterEarlyPeriodRegex = new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DatePointWithAgoAndLater = new Regex(DateTimeDefinitions.DatePointWithAgoAndLater, RegexFlags, RegexTimeOut); + public static readonly Regex ReferenceDatePeriodRegex = new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags, RegexTimeOut); + public static readonly Regex ComplexDatePeriodRegex = new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags, RegexTimeOut); + public static readonly Regex MonthRegex = new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + public static readonly Regex YearRegexInNumber = new Regex(DateTimeDefinitions.YearRegexInNumber, RegexFlags, RegexTimeOut); + public static readonly Regex ZeroToNineIntegerRegexCJK = new Regex(DateTimeDefinitions.ZeroToNineIntegerRegexCJK, RegexFlags, RegexTimeOut); + public static readonly Regex MonthSuffixRegex = new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags, RegexTimeOut); + public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.UnitRegex, RegexFlags, RegexTimeOut); + public static readonly Regex DurationUnitRegex = new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); + public static readonly Regex SeasonRegex = new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex[] SimpleCasesRegexes = + { + SimpleCasesRegex, + OneWordPeriodRegex, + YearRegex, + StrictYearRegex, + YearAndMonth, + PureNumYearAndMonth, + YearInCJKRegex, + SpecialMonthRegex, + SpecialYearRegex, + WeekOfMonthRegex, + WeekOfYearRegex, + WeekOfDateRegex, + MonthOfDateRegex, + WhichWeekRegex, + WeekToWeek, + SeasonWithYear, + QuarterRegex, + DecadeRegex, + CenturyRegex, + RelativePeriodRegex, + ComplexDatePeriodRegex, + }; + + public KoreanDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DatePointExtractor = new BaseCJKDateExtractor(new KoreanDateExtractorConfiguration(this)); + DurationExtractor = new BaseCJKDurationExtractor(new KoreanDurationExtractorConfiguration(this)); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = new IntegerExtractor(numConfig); + } + + public IDateTimeExtractor DatePointExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IExtractor IntegerExtractor { get; } + + IEnumerable ICJKDatePeriodExtractorConfiguration.SimpleCasesRegexes => SimpleCasesRegexes; + + Regex ICJKDatePeriodExtractorConfiguration.TillRegex => TillRegex; + + Regex ICJKDatePeriodExtractorConfiguration.FutureRegex => FutureRegex; + + Regex ICJKDatePeriodExtractorConfiguration.PastRegex => PastRegex; + + Regex ICJKDatePeriodExtractorConfiguration.DateUnitRegex => DateUnitRegex; + + Regex ICJKDatePeriodExtractorConfiguration.FirstLastOfYearRegex => FirstLastOfYearRegex; + + Regex ICJKDatePeriodExtractorConfiguration.UnitRegex => UnitRegex; + + Regex ICJKDatePeriodExtractorConfiguration.NumberCombinedWithUnit => NumberCombinedWithUnit; + + Regex ICJKDatePeriodExtractorConfiguration.FollowedUnit => FollowedUnit; + + Regex ICJKDatePeriodExtractorConfiguration.RangePrefixRegex => RangePrefixRegex; + + Regex ICJKDatePeriodExtractorConfiguration.RangeSuffixRegex => RangeSuffixRegex; + + public Dictionary AmbiguityFiltersDict => null; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDateTimeExtractorConfiguration.cs new file mode 100644 index 0000000000..9e35a304bb --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDateTimeExtractorConfiguration.cs @@ -0,0 +1,77 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Definitions.Utilities; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanDateTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKDateTimeExtractorConfiguration + { + public static readonly string ExtractorName = Constants.SYS_DATETIME_DATETIME; // "DateTime"; + + public static readonly Regex PrepositionRegex = new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NowRegex = new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NightRegex = new Regex(DateTimeDefinitions.NightRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeOfSpecialDayRegex = new Regex(DateTimeDefinitions.TimeOfSpecialDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeOfDayRegex = new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DurationRelativeDurationUnitRegex = new Regex(DateTimeDefinitions.DurationRelativeDurationUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AgoLaterRegex = new Regex(DateTimeDefinitions.AgoLaterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DateTimePeriodUnitRegex = new Regex(DateTimeDefinitions.DateTimePeriodUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ConnectorRegex = new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public KoreanDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + + DatePointExtractor = new BaseCJKDateExtractor(new KoreanDateExtractorConfiguration(this)); + TimePointExtractor = new BaseCJKTimeExtractor(new KoreanTimeExtractorConfiguration(this)); + DurationExtractor = new BaseCJKDurationExtractor(new KoreanDurationExtractorConfiguration(this)); + } + + public IDateTimeExtractor DatePointExtractor { get; } + + public IDateTimeExtractor TimePointExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + Regex ICJKDateTimeExtractorConfiguration.NowRegex => NowRegex; + + Regex ICJKDateTimeExtractorConfiguration.PrepositionRegex => PrepositionRegex; + + Regex ICJKDateTimeExtractorConfiguration.NightRegex => NightRegex; + + Regex ICJKDateTimeExtractorConfiguration.TimeOfSpecialDayRegex => TimeOfSpecialDayRegex; + + Regex ICJKDateTimeExtractorConfiguration.TimeOfDayRegex => TimeOfDayRegex; + + Regex ICJKDateTimeExtractorConfiguration.BeforeRegex => BeforeRegex; + + Regex ICJKDateTimeExtractorConfiguration.AfterRegex => AfterRegex; + + Regex ICJKDateTimeExtractorConfiguration.ConnectorRegex => ConnectorRegex; + + public Dictionary AmbiguityDateTimeFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityDateTimeFiltersDict); + + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDateTimePeriodExtractorConfiguration.cs new file mode 100644 index 0000000000..74c436555f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDateTimePeriodExtractorConfiguration.cs @@ -0,0 +1,154 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Korean; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, + ICJKDateTimePeriodExtractorConfiguration + { + + public static readonly Regex TillRegex = new Regex(DateTimeDefinitions.DateTimePeriodTillRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PrepositionRegex = new Regex(DateTimeDefinitions.DateTimePeriodPrepositionRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ZhijianRegex = new Regex(DateTimeDefinitions.ZhijianRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeOfDayRegex = new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificTimeOfDayRegex = new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.DateTimePeriodUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FollowedUnit = new Regex(DateTimeDefinitions.DateTimePeriodFollowedUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex PastRegex = new Regex(DateTimeDefinitions.PastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FutureRegex = new Regex(DateTimeDefinitions.FutureRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayRegex = new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimePeriodLeftRegex = new Regex(DateTimeDefinitions.TimePeriodLeftRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeRegex = new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RestOfDateRegex = new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmPmDescRegex = new Regex(DateTimeDefinitions.AmPmDescRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BeforeAfterRegex = new Regex(DateTimeDefinitions.BeforeAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex HourRegex = new Regex(DateTimeDefinitions.HourRegex, RegexFlags, RegexTimeOut); + public static readonly Regex HourNumRegex = new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags, RegexTimeOut); + public static readonly Regex ThisRegex = new Regex(DateTimeDefinitions.DateTimePeriodThisRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DateTimePeriodLastRegex, RegexFlags, RegexTimeOut); + public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DateTimePeriodNextRegex, RegexFlags, RegexTimeOut); + public static readonly Regex NumberCombinedWithUnit = new Regex(DateTimeDefinitions.DateTimePeriodNumberCombinedWithUnit, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public KoreanDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = new CardinalExtractor(numConfig); + + SingleDateExtractor = new BaseCJKDateExtractor(new KoreanDateExtractorConfiguration(this)); + SingleTimeExtractor = new BaseCJKTimeExtractor(new KoreanTimeExtractorConfiguration(this)); + SingleDateTimeExtractor = new BaseCJKDateTimeExtractor(new KoreanDateTimeExtractorConfiguration(this)); + DurationExtractor = new BaseCJKDurationExtractor(new KoreanDurationExtractorConfiguration(this)); + TimePeriodExtractor = new BaseCJKTimePeriodExtractor(new KoreanTimePeriodExtractorConfiguration(this)); + } + + public IExtractor CardinalExtractor { get; } + + public IDateTimeExtractor SingleDateExtractor { get; } + + public IDateTimeExtractor SingleTimeExtractor { get; } + + public IDateTimeExtractor SingleDateTimeExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + Regex ICJKDateTimePeriodExtractorConfiguration.PrepositionRegex => PrepositionRegex; + + Regex ICJKDateTimePeriodExtractorConfiguration.TillRegex => TillRegex; + + Regex ICJKDateTimePeriodExtractorConfiguration.SpecificTimeOfDayRegex => SpecificTimeOfDayRegex; + + Regex ICJKDateTimePeriodExtractorConfiguration.TimeOfDayRegex => TimeOfDayRegex; + + Regex ICJKDateTimePeriodExtractorConfiguration.FollowedUnit => FollowedUnit; + + Regex ICJKDateTimePeriodExtractorConfiguration.UnitRegex => UnitRegex; + + Regex ICJKDateTimePeriodExtractorConfiguration.PastRegex => PastRegex; + + Regex ICJKDateTimePeriodExtractorConfiguration.FutureRegex => FutureRegex; + + Regex ICJKDateTimePeriodExtractorConfiguration.TimePeriodLeftRegex => TimePeriodLeftRegex; + + Regex ICJKDateTimePeriodExtractorConfiguration.RelativeRegex => RelativeRegex; + + Regex ICJKDateTimePeriodExtractorConfiguration.RestOfDateRegex => RestOfDateRegex; + + Regex ICJKDateTimePeriodExtractorConfiguration.AmPmDescRegex => AmPmDescRegex; + + Regex ICJKDateTimePeriodExtractorConfiguration.ThisRegex => ThisRegex; + + Regex ICJKDateTimePeriodExtractorConfiguration.BeforeAfterRegex => BeforeAfterRegex; + + public bool GetFromTokenIndex(string text, out int index) + { + index = -1; + + // @TODO move hardcoded values to resources file + if (text.Trim().EndsWith("从", StringComparison.Ordinal)) + { + index = text.LastIndexOf("从", StringComparison.Ordinal); + return true; + } + + return false; + } + + public bool GetBetweenTokenIndex(string text, out int index) + { + index = -1; + var match = ZhijianRegex.Match(text); + if (match.Success) + { + index = match.Length; + return true; + } + + return false; + } + + public bool HasConnectorToken(string text) + { + // @TODO move hardcoded values to resources file + return text.Equals("和", StringComparison.Ordinal) || + text.Equals("与", StringComparison.Ordinal) || + text.Equals("到", StringComparison.Ordinal); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDurationExtractorConfiguration.cs new file mode 100644 index 0000000000..db6ec81fde --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDurationExtractorConfiguration.cs @@ -0,0 +1,99 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Globalization; +using System.Linq; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Text.NumberWithUnit; +using Microsoft.Recognizers.Text.NumberWithUnit.Korean; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + + public class KoreanDurationExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKDurationExtractorConfiguration + { + + public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.DurationYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DurationUnitRegex = new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AnUnitRegex = new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DurationConnectorRegex = new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AllRegex = new Regex(DateTimeDefinitions.DurationAllRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex HalfRegex = new Regex(DateTimeDefinitions.DurationHalfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeDurationUnitRegex = new Regex(DateTimeDefinitions.DurationRelativeDurationUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DuringRegex = new Regex(DateTimeDefinitions.DurationDuringRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SomeRegex = new Regex(DateTimeDefinitions.DurationSomeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MoreOrLessRegex = new Regex(DateTimeDefinitions.DurationMoreOrLessRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private readonly bool merge; + + public KoreanDurationExtractorConfiguration(IDateTimeOptionsConfiguration config, bool merge = true) + : base(config) + { + this.merge = merge; + + InternalExtractor = new NumberWithUnitExtractor(new DurationExtractorConfiguration()); + + UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToDictionary(k => k.Key, k => k.Value); + UnitValueMap = DateTimeDefinitions.DurationUnitValueMap; + } + + public IExtractor InternalExtractor { get; } + + public Dictionary UnitMap { get; } + + public Dictionary UnitValueMap { get; } + + public Dictionary AmbiguityDurationFiltersDict => null; + + Regex ICJKDurationExtractorConfiguration.DurationUnitRegex => DurationUnitRegex; + + Regex ICJKDurationExtractorConfiguration.DurationConnectorRegex => DurationConnectorRegex; + + Regex ICJKDurationExtractorConfiguration.YearRegex => YearRegex; + + Regex ICJKDurationExtractorConfiguration.AllRegex => AllRegex; + + Regex ICJKDurationExtractorConfiguration.HalfRegex => HalfRegex; + + Regex ICJKDurationExtractorConfiguration.RelativeDurationUnitRegex => RelativeDurationUnitRegex; + + Regex ICJKDurationExtractorConfiguration.DuringRegex => DuringRegex; + + Regex ICJKDurationExtractorConfiguration.SomeRegex => SomeRegex; + + Regex ICJKDurationExtractorConfiguration.MoreOrLessRegex => MoreOrLessRegex; + + internal class DurationExtractorConfiguration : KoreanNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary DurationSuffixList = DateTimeDefinitions.DurationSuffixList.ToImmutableDictionary(); + + public DurationExtractorConfiguration() + : base(new CultureInfo(Text.Culture.Korean)) + { + } + + public override ImmutableDictionary SuffixList => DurationSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override string ExtractType => Constants.SYS_DATETIME_DURATION; + + public override ImmutableList AmbiguousUnitList => DateTimeDefinitions.DurationAmbiguousUnits.ToImmutableList(); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanHolidayExtractorConfiguration.cs new file mode 100644 index 0000000000..07df3f7d0b --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanHolidayExtractorConfiguration.cs @@ -0,0 +1,31 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Korean; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanHolidayExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKHolidayExtractorConfiguration + { + + public static readonly Regex LunarHolidayRegex = new Regex(DateTimeDefinitions.LunarHolidayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex[] HolidayRegexList = + { + new Regex(DateTimeDefinitions.HolidayRegexList1, RegexFlags, RegexTimeOut), + new Regex(DateTimeDefinitions.HolidayRegexList2, RegexFlags, RegexTimeOut), + LunarHolidayRegex, + }; + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public KoreanHolidayExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + } + + public IEnumerable HolidayRegexes => HolidayRegexList; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanMergedExtractorConfiguration.cs new file mode 100644 index 0000000000..8a667b4f65 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanMergedExtractorConfiguration.cs @@ -0,0 +1,89 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Definitions.Utilities; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanMergedExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKMergedExtractorConfiguration + { + public static readonly Regex BeforeRegex = new Regex(DateTimeDefinitions.ParserConfigurationBefore, RegexFlags, RegexTimeOut); + public static readonly Regex UnspecificDatePeriodRegex = new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags, RegexTimeOut); + public static readonly Regex AfterRegex = new Regex(DateTimeDefinitions.ParserConfigurationAfter, RegexFlags, RegexTimeOut); + public static readonly Regex UntilRegex = new Regex(DateTimeDefinitions.ParserConfigurationUntil, RegexFlags, RegexTimeOut); + public static readonly Regex SincePrefixRegex = new Regex(DateTimeDefinitions.ParserConfigurationSincePrefix, RegexFlags, RegexTimeOut); + public static readonly Regex SinceSuffixRegex = new Regex(DateTimeDefinitions.ParserConfigurationSinceSuffix, RegexFlags, RegexTimeOut); + public static readonly Regex AroundPrefixRegex = new Regex(DateTimeDefinitions.ParserConfigurationAroundPrefix, RegexFlags, RegexTimeOut); + public static readonly Regex AroundSuffixRegex = new Regex(DateTimeDefinitions.ParserConfigurationAroundSuffix, RegexFlags, RegexTimeOut); + public static readonly Regex EqualRegex = new Regex(BaseDateTime.EqualRegex, RegexFlags, RegexTimeOut); + public static readonly Regex PotentialAmbiguousRangeRegex = new Regex(DateTimeDefinitions.FromToRegex, RegexFlags, RegexTimeOut); + public static readonly Regex AmbiguousRangeModifierPrefix = new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public KoreanMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); + + DateExtractor = new BaseCJKDateExtractor(new KoreanDateExtractorConfiguration(this)); + TimeExtractor = new BaseCJKTimeExtractor(new KoreanTimeExtractorConfiguration(this)); + DateTimeExtractor = new BaseCJKDateTimeExtractor(new KoreanDateTimeExtractorConfiguration(this)); + DatePeriodExtractor = new BaseCJKDatePeriodExtractor(new KoreanDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseCJKTimePeriodExtractor(new KoreanTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseCJKDateTimePeriodExtractor(new KoreanDateTimePeriodExtractorConfiguration(this)); + DurationExtractor = new BaseCJKDurationExtractor(new KoreanDurationExtractorConfiguration(this)); + SetExtractor = new BaseCJKSetExtractor(new KoreanSetExtractorConfiguration(this)); + HolidayExtractor = new BaseCJKHolidayExtractor(new KoreanHolidayExtractorConfiguration(this)); + } + + public IDateTimeExtractor DateExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeExtractor DatePeriodExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IDateTimeExtractor DateTimePeriodExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor SetExtractor { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + Regex ICJKMergedExtractorConfiguration.AfterRegex => AfterRegex; + + Regex ICJKMergedExtractorConfiguration.BeforeRegex => BeforeRegex; + + Regex ICJKMergedExtractorConfiguration.UnspecificDatePeriodRegex => UnspecificDatePeriodRegex; + + Regex ICJKMergedExtractorConfiguration.SincePrefixRegex => SincePrefixRegex; + + Regex ICJKMergedExtractorConfiguration.SinceSuffixRegex => SinceSuffixRegex; + + Regex ICJKMergedExtractorConfiguration.AroundPrefixRegex => AroundPrefixRegex; + + Regex ICJKMergedExtractorConfiguration.AroundSuffixRegex => AroundSuffixRegex; + + Regex ICJKMergedExtractorConfiguration.UntilRegex => UntilRegex; + + Regex ICJKMergedExtractorConfiguration.EqualRegex => EqualRegex; + + Regex ICJKMergedExtractorConfiguration.PotentialAmbiguousRangeRegex => PotentialAmbiguousRangeRegex; + + Regex ICJKMergedExtractorConfiguration.AmbiguousRangeModifierPrefix => AmbiguousRangeModifierPrefix; + + public Dictionary AmbiguityFiltersDict { get; } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanSetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanSetExtractorConfiguration.cs new file mode 100644 index 0000000000..062eef3efe --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanSetExtractorConfiguration.cs @@ -0,0 +1,70 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Korean; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanSetExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKSetExtractorConfiguration + { + public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.SetUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachUnitRegex = new Regex(DateTimeDefinitions.SetEachUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachPrefixRegex = new Regex(DateTimeDefinitions.SetEachPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachSuffixRegex = new Regex(DateTimeDefinitions.SetEachSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachDayRegex = new Regex(DateTimeDefinitions.SetEachDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachDateUnitRegex = new Regex(DateTimeDefinitions.SetEachDateUnitRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public KoreanSetExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DurationExtractor = new BaseCJKDurationExtractor(new KoreanDurationExtractorConfiguration(this)); + TimeExtractor = new BaseCJKTimeExtractor(new KoreanTimeExtractorConfiguration(this)); + DateExtractor = new BaseCJKDateExtractor(new KoreanDateExtractorConfiguration(this)); + DateTimeExtractor = new BaseCJKDateTimeExtractor(new KoreanDateTimeExtractorConfiguration(this)); + DatePeriodExtractor = new BaseCJKDatePeriodExtractor(new KoreanDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseCJKTimePeriodExtractor(new KoreanTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseCJKDateTimePeriodExtractor(new KoreanDateTimePeriodExtractorConfiguration(this)); + } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeExtractor DateExtractor { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeExtractor DatePeriodExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IDateTimeExtractor DateTimePeriodExtractor { get; } + + Regex ICJKSetExtractorConfiguration.LastRegex => LastRegex; + + Regex ICJKSetExtractorConfiguration.EachPrefixRegex => EachPrefixRegex; + + Regex ICJKSetExtractorConfiguration.EachSuffixRegex => EachSuffixRegex; + + Regex ICJKSetExtractorConfiguration.EachUnitRegex => EachUnitRegex; + + Regex ICJKSetExtractorConfiguration.UnitRegex => UnitRegex; + + Regex ICJKSetExtractorConfiguration.EachDayRegex => EachDayRegex; + + Regex ICJKSetExtractorConfiguration.EachDateUnitRegex => EachDateUnitRegex; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanTimeExtractorConfiguration.cs new file mode 100644 index 0000000000..c86be06e42 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanTimeExtractorConfiguration.cs @@ -0,0 +1,88 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Definitions.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKTimeExtractorConfiguration + { + public static readonly string HourNumRegex = DateTimeDefinitions.TimeHourNumRegex; + + public static readonly string MinuteNumRegex = DateTimeDefinitions.TimeMinuteNumRegex; + + public static readonly string SecondNumRegex = DateTimeDefinitions.TimeSecondNumRegex; + + public static readonly string HourCJKRegex = DateTimeDefinitions.TimeHourCJKRegex; + + public static readonly string MinuteCJKRegex = DateTimeDefinitions.TimeMinuteCJKRegex; + + public static readonly string SecondCJKRegex = DateTimeDefinitions.TimeSecondCJKRegex; + + public static readonly string ClockDescRegex = DateTimeDefinitions.TimeClockDescRegex; + + public static readonly string MinuteDescRegex = DateTimeDefinitions.TimeMinuteDescRegex; + + public static readonly string SecondDescRegex = DateTimeDefinitions.TimeSecondDescRegex; + + public static readonly string BanHourPrefixRegex = DateTimeDefinitions.TimeBanHourPrefixRegex; + + // e.g: 12시 + public static readonly string HourRegex = DateTimeDefinitions.TimeHourRegex; + + public static readonly string MinuteRegex = DateTimeDefinitions.TimeMinuteRegex; + + public static readonly string SecondRegex = DateTimeDefinitions.TimeSecondRegex; + + public static readonly string HalfRegex = DateTimeDefinitions.TimeHalfRegex; + + public static readonly string QuarterRegex = DateTimeDefinitions.TimeQuarterRegex; + + // e.g: 열두 쉰여덟 | 반 | 순간 + public static readonly string CJKTimeRegex = DateTimeDefinitions.TimeCJKTimeRegex; + + // e.g: 12:23 + public static readonly string DigitTimeRegex = DateTimeDefinitions.TimeDigitTimeRegex; + + // e.g: 오전 9시 + public static readonly string DayDescRegex = DateTimeDefinitions.TimeDayDescRegex; + + public static readonly string ApproximateDescPreffixRegex = DateTimeDefinitions.TimeApproximateDescPreffixRegex; + + public static readonly string ApproximateDescSuffixRegex = DateTimeDefinitions.TimeApproximateDescSuffixRegex; + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public KoreanTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + var regexes = new Dictionary + { + { + new Regex(DateTimeDefinitions.TimeRegexes1, RegexFlags, RegexTimeOut), + TimeType.CjkTime + }, + { + new Regex(DateTimeDefinitions.TimeRegexes2, RegexFlags, RegexTimeOut), + TimeType.DigitTime + }, + { + new Regex(DateTimeDefinitions.TimeRegexes3, RegexFlags, RegexTimeOut), + TimeType.LessTime + }, + }; + Regexes = regexes.ToImmutableDictionary(); + AmbiguityTimeFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityTimeFiltersDict); + } + + public ImmutableDictionary Regexes { get; } + + public Dictionary AmbiguityTimeFiltersDict { get; } + + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanTimePeriodExtractorConfiguration.cs new file mode 100644 index 0000000000..4fef7ba3be --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanTimePeriodExtractorConfiguration.cs @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Definitions.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, ICJKTimePeriodExtractorConfiguration + { + public const string TimePeriodConnectWords = DateTimeDefinitions.TimePeriodTimePeriodConnectWords; + + // 다섯 점 충분히 마흔 여덟 초 + public static readonly string CJKTimeRegex = KoreanTimeExtractorConfiguration.CJKTimeRegex; + + // 6 ~ 9시 | 6 ~ 9시 + public static readonly string LeftCJKTimeRegex = DateTimeDefinitions.TimePeriodLeftCJKTimeRegex; + + public static readonly string RightCJKTimeRegex = DateTimeDefinitions.TimePeriodRightCJKTimeRegex; + + // 2:45 + public static readonly string DigitTimeRegex = KoreanTimeExtractorConfiguration.DigitTimeRegex; + + public static readonly string LeftDigitTimeRegex = DateTimeDefinitions.TimePeriodLeftDigitTimeRegex; + + public static readonly string RightDigitTimeRegex = DateTimeDefinitions.TimePeriodRightDigitTimeRegex; + + public static readonly string ShortLeftCJKTimeRegex = DateTimeDefinitions.TimePeriodShortLeftCJKTimeRegex; + + public static readonly string ShortLeftDigitTimeRegex = DateTimeDefinitions.TimePeriodShortLeftDigitTimeRegex; + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public KoreanTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + var regexes = new Dictionary + { + { + new Regex(DateTimeDefinitions.TimePeriodRegexes1, RegexFlags, RegexTimeOut), + PeriodType.FullTime + }, + { + new Regex(DateTimeDefinitions.TimePeriodRegexes2, RegexFlags, RegexTimeOut), + PeriodType.ShortTime + }, + { + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut), + PeriodType.ShortTime + }, + }; + + Regexes = regexes.ToImmutableDictionary(); + } + + public ImmutableDictionary Regexes { get; } + + public Dictionary AmbiguityTimePeriodFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityTimePeriodFiltersDict); + + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanCommonDateTimeParserConfiguration.cs new file mode 100644 index 0000000000..8a9230ac44 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanCommonDateTimeParserConfiguration.cs @@ -0,0 +1,63 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Text.DateTime.Korean; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Korean; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanCommonDateTimeParserConfiguration : BaseCJKDateParserConfiguration, ICJKCommonDateTimeParserConfiguration + { + public KoreanCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToImmutableDictionary(k => k.Key, k => k.Value); + UnitValueMap = DateTimeDefinitions.DurationUnitValueMap.ToImmutableDictionary(); + CardinalMap = DateTimeDefinitions.ParserConfigurationCardinalMap.ToImmutableDictionary(); + DayOfMonth = DateTimeDefinitions.ParserConfigurationDayOfMonth.ToImmutableDictionary(); + DayOfWeek = DateTimeDefinitions.ParserConfigurationDayOfWeek.ToImmutableDictionary(); + MonthOfYear = DateTimeDefinitions.ParserConfigurationMonthOfYear.ToImmutableDictionary(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = new IntegerExtractor(numConfig); + CardinalExtractor = new CardinalExtractor(numConfig); + OrdinalExtractor = Number.Korean.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseCJKNumberParser(new KoreanNumberParserConfiguration(numConfig)); + + // Do not change order. The order of initialization can lead to side-effects + DateExtractor = new BaseCJKDateExtractor(new KoreanDateExtractorConfiguration(this)); + TimeExtractor = new BaseCJKTimeExtractor(new KoreanTimeExtractorConfiguration(this)); + DateTimeExtractor = new BaseCJKDateTimeExtractor(new KoreanDateTimeExtractorConfiguration(this)); + DurationExtractor = new BaseCJKDurationExtractor(new KoreanDurationExtractorConfiguration(this)); + DatePeriodExtractor = new BaseCJKDatePeriodExtractor(new KoreanDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseCJKTimePeriodExtractor(new KoreanTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseCJKDateTimePeriodExtractor(new KoreanDateTimePeriodExtractorConfiguration(this)); + HolidayExtractor = new BaseCJKDurationExtractor(new KoreanDurationExtractorConfiguration(this)); + SetExtractor = new BaseCJKDurationExtractor(new KoreanDurationExtractorConfiguration(this)); + + DurationParser = new BaseCJKDurationParser(new KoreanDurationParserConfiguration(this)); + DateParser = new BaseCJKDateParser(new KoreanDateParserConfiguration(this)); + TimeParser = new BaseCJKTimeParser(new KoreanTimeParserConfiguration(this)); + DateTimeParser = new BaseCJKDateTimeParser(new KoreanDateTimeParserConfiguration(this)); + DatePeriodParser = new BaseCJKDatePeriodParser(new KoreanDatePeriodParserConfiguration(this)); + TimePeriodParser = new BaseCJKTimePeriodParser(new KoreanTimePeriodParserConfiguration(this)); + DateTimePeriodParser = new BaseCJKDateTimePeriodParser(new KoreanDateTimePeriodParserConfiguration(this)); + HolidayParser = new BaseCJKHolidayParser(new KoreanHolidayParserConfiguration(this)); + SetParser = new BaseCJKSetParser(new KoreanSetParserConfiguration(this)); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanDateParserConfiguration.cs new file mode 100644 index 0000000000..9da05d8bbc --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanDateParserConfiguration.cs @@ -0,0 +1,172 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Korean; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanDateParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKDateParserConfiguration + { + public static readonly Regex NextMonthRegex = new Regex(DateTimeDefinitions.ParserConfigurationNextMonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastMonthRegex = new Regex(DateTimeDefinitions.ParserConfigurationLastMonthRegex, RegexFlags, RegexTimeOut); + public static readonly Regex LastWeekDayRegex = new Regex(DateTimeDefinitions.ParserConfigurationLastWeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly string ParserName = Constants.SYS_DATETIME_DATE; // "Date"; + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public KoreanDateParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + IntegerExtractor = config.IntegerExtractor; + OrdinalExtractor = config.OrdinalExtractor; + + NumberParser = config.NumberParser; + + DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; + DurationParser = config.DurationParser; + + DateRegexList = new KoreanDateExtractorConfiguration(this).DateRegexList; + SpecialDate = KoreanDateExtractorConfiguration.SpecialDate; + NextRe = KoreanDateExtractorConfiguration.NextRe; + LastRe = KoreanDateExtractorConfiguration.LastRe; + SpecialDayRegex = KoreanDateExtractorConfiguration.SpecialDayRegex; + StrictWeekDayRegex = KoreanDateExtractorConfiguration.WeekDayRegex; + LunarRegex = KoreanDateExtractorConfiguration.LunarRegex; + UnitRegex = KoreanDateExtractorConfiguration.UnitRegex; + BeforeRegex = KoreanDateExtractorConfiguration.BeforeRegex; + AfterRegex = KoreanDateExtractorConfiguration.AfterRegex; + DynastyYearRegex = KoreanDateExtractorConfiguration.DynastyYearRegex; + DynastyStartYear = KoreanDateExtractorConfiguration.DynastyStartYear; + DynastyYearMap = KoreanDateExtractorConfiguration.DynastyYearMap; + NextRegex = KoreanDateExtractorConfiguration.NextRegex; + ThisRegex = KoreanDateExtractorConfiguration.ThisRegex; + LastRegex = KoreanDateExtractorConfiguration.LastRegex; + WeekDayOfMonthRegex = KoreanDateExtractorConfiguration.WeekDayOfMonthRegex; + WeekDayAndDayRegex = KoreanDateExtractorConfiguration.WeekDayAndDayRegex; + DurationRelativeDurationUnitRegex = KoreanDateExtractorConfiguration.DurationRelativeDurationUnitRegex; + SpecialDayWithNumRegex = KoreanDateExtractorConfiguration.SpecialDayWithNumRegex; + + CardinalMap = config.CardinalMap; + UnitMap = config.UnitMap; + DayOfMonth = config.DayOfMonth; + DayOfWeek = config.DayOfWeek; + MonthOfYear = config.MonthOfYear; + + } + + public IExtractor IntegerExtractor { get; } + + public IExtractor OrdinalExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeExtractor DateExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeParser DurationParser { get; } + + public IEnumerable DateRegexList { get; } + + public Regex SpecialDate { get; } + + public Regex NextRe { get; } + + public Regex LastRe { get; } + + public Regex SpecialDayRegex { get; } + + public Regex StrictWeekDayRegex { get; } + + public Regex LunarRegex { get; } + + public Regex UnitRegex { get; } + + public Regex BeforeRegex { get; } + + public Regex AfterRegex { get; } + + public Regex NextRegex { get; } + + public Regex ThisRegex { get; } + + public Regex LastRegex { get; } + + public Regex WeekDayOfMonthRegex { get; } + + public Regex WeekDayAndDayRegex { get; } + + public Regex DurationRelativeDurationUnitRegex { get; } + + public Regex SpecialDayWithNumRegex { get; } + + public Regex DynastyYearRegex { get; } + + public ImmutableDictionary DynastyYearMap { get; } + + public IImmutableDictionary CardinalMap { get; } + + public IImmutableDictionary UnitMap { get; } + + public IImmutableDictionary DayOfMonth { get; } + + public IImmutableDictionary DayOfWeek { get; } + + public IImmutableDictionary MonthOfYear { get; } + + public string DynastyStartYear { get; } + + Regex ICJKDateParserConfiguration.LastWeekDayRegex => LastWeekDayRegex; + + Regex ICJKDateParserConfiguration.NextMonthRegex => NextMonthRegex; + + Regex ICJKDateParserConfiguration.LastMonthRegex => LastMonthRegex; + + public int GetSwiftDay(string text) + { + var value = 0; + + // @TODO move hardcoded values to resources file + if (text.Equals("今天", StringComparison.Ordinal) || + text.Equals("今日", StringComparison.Ordinal) || + text.Equals("最近", StringComparison.Ordinal)) + { + value = 0; + } + else if (text.StartsWith("明", StringComparison.Ordinal)) + { + value = 1; + } + else if (text.StartsWith("昨", StringComparison.Ordinal)) + { + value = -1; + } + else if (text.Equals("大后天", StringComparison.Ordinal) || + text.Equals("大後天", StringComparison.Ordinal)) + { + value = 3; + } + else if (text.Equals("大前天", StringComparison.Ordinal)) + { + value = -3; + } + else if (text.Equals("后天", StringComparison.Ordinal) || + text.Equals("後天", StringComparison.Ordinal)) + { + value = 2; + } + else if (text.Equals("前天", StringComparison.Ordinal)) + { + value = -2; + } + + return value; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanDatePeriodParserConfiguration.cs new file mode 100644 index 0000000000..9b2f107545 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanDatePeriodParserConfiguration.cs @@ -0,0 +1,331 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Globalization; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Korean; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanDatePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKDatePeriodParserConfiguration + { + + public static readonly Regex WoMLastRegex = new Regex(DateTimeDefinitions.WoMLastRegex, RegexFlags, RegexTimeOut); + public static readonly Regex WoMPreviousRegex = new Regex(DateTimeDefinitions.WoMPreviousRegex, RegexFlags, RegexTimeOut); + public static readonly Regex WoMNextRegex = new Regex(DateTimeDefinitions.WoMNextRegex, RegexFlags, RegexTimeOut); + + public static readonly ImmutableDictionary MonthOfYear = DateTimeDefinitions.ParserConfigurationMonthOfYear.ToImmutableDictionary(); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public KoreanDatePeriodParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + IntegerExtractor = config.IntegerExtractor; + CardinalExtractor = config.CardinalExtractor; + NumberParser = config.NumberParser; + DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; + DurationParser = config.DurationParser; + DateParser = config.DateParser; + + DynastyYearRegex = KoreanDateExtractorConfiguration.DynastyYearRegex; + DynastyStartYear = KoreanDateExtractorConfiguration.DynastyStartYear; + DynastyYearMap = KoreanDateExtractorConfiguration.DynastyYearMap; + SimpleCasesRegex = KoreanDatePeriodExtractorConfiguration.SimpleCasesRegex; + ThisRegex = KoreanDatePeriodExtractorConfiguration.ThisRegex; + NextRegex = KoreanDatePeriodExtractorConfiguration.NextRegex; + LastRegex = KoreanDatePeriodExtractorConfiguration.LastRegex; + YearToYear = KoreanDatePeriodExtractorConfiguration.YearToYear; + YearToYearSuffixRequired = KoreanDatePeriodExtractorConfiguration.YearToYearSuffixRequired; + YearRegex = KoreanDatePeriodExtractorConfiguration.YearRegex; + YearInCJKRegex = KoreanDatePeriodExtractorConfiguration.YearInCJKRegex; + MonthToMonth = KoreanDatePeriodExtractorConfiguration.MonthToMonth; + MonthToMonthSuffixRequired = KoreanDatePeriodExtractorConfiguration.MonthToMonthSuffixRequired; + DayToDay = KoreanDatePeriodExtractorConfiguration.DayToDay; + MonthDayRange = KoreanDatePeriodExtractorConfiguration.MonthDayRange; + DayRegexForPeriod = KoreanDatePeriodExtractorConfiguration.DayRegexForPeriod; + MonthRegex = KoreanDatePeriodExtractorConfiguration.MonthRegex; + SpecialMonthRegex = KoreanDatePeriodExtractorConfiguration.SpecialMonthRegex; + SpecialYearRegex = KoreanDatePeriodExtractorConfiguration.SpecialYearRegex; + YearAndMonth = KoreanDatePeriodExtractorConfiguration.YearAndMonth; + PureNumYearAndMonth = KoreanDatePeriodExtractorConfiguration.PureNumYearAndMonth; + SimpleYearAndMonth = KoreanDatePeriodExtractorConfiguration.SimpleYearAndMonth; + OneWordPeriodRegex = KoreanDatePeriodExtractorConfiguration.OneWordPeriodRegex; + NumberCombinedWithUnit = KoreanDatePeriodExtractorConfiguration.NumberCombinedWithUnit; + PastRegex = KoreanDatePeriodExtractorConfiguration.PastRegex; + FutureRegex = KoreanDatePeriodExtractorConfiguration.FutureRegex; + WeekWithWeekDayRangeRegex = KoreanDatePeriodExtractorConfiguration.WeekWithWeekDayRangeRegex; + UnitRegex = KoreanDatePeriodExtractorConfiguration.UnitRegex; + DurationUnitRegex = KoreanDatePeriodExtractorConfiguration.DurationUnitRegex; + WeekOfMonthRegex = KoreanDatePeriodExtractorConfiguration.WeekOfMonthRegex; + WeekOfYearRegex = KoreanDatePeriodExtractorConfiguration.WeekOfYearRegex; + WeekOfDateRegex = KoreanDatePeriodExtractorConfiguration.WeekOfDateRegex; + MonthOfDateRegex = KoreanDatePeriodExtractorConfiguration.MonthOfDateRegex; + WhichWeekRegex = KoreanDatePeriodExtractorConfiguration.WhichWeekRegex; + FirstLastOfYearRegex = KoreanDatePeriodExtractorConfiguration.FirstLastOfYearRegex; + SeasonWithYear = KoreanDatePeriodExtractorConfiguration.SeasonWithYear; + QuarterRegex = KoreanDatePeriodExtractorConfiguration.QuarterRegex; + DecadeRegex = KoreanDatePeriodExtractorConfiguration.DecadeRegex; + CenturyRegex = KoreanDatePeriodExtractorConfiguration.CenturyRegex; + RelativeMonthRegex = KoreanDateExtractorConfiguration.RelativeMonthRegex; + LaterEarlyPeriodRegex = KoreanDatePeriodExtractorConfiguration.LaterEarlyPeriodRegex; + DatePointWithAgoAndLater = KoreanDatePeriodExtractorConfiguration.DatePointWithAgoAndLater; + ReferenceDatePeriodRegex = KoreanDatePeriodExtractorConfiguration.ReferenceDatePeriodRegex; + ComplexDatePeriodRegex = KoreanDatePeriodExtractorConfiguration.ComplexDatePeriodRegex; + DurationRelativeDurationUnitRegex = KoreanDateExtractorConfiguration.DurationRelativeDurationUnitRegex; + RelativeRegex = KoreanDateExtractorConfiguration.RelativeRegex; + UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToImmutableDictionary(); + CardinalMap = DateTimeDefinitions.ParserConfigurationCardinalMap.ToImmutableDictionary(); + DayOfMonth = DateTimeDefinitions.ParserConfigurationDayOfMonth.ToImmutableDictionary(); + SeasonMap = DateTimeDefinitions.ParserConfigurationSeasonMap.ToImmutableDictionary(); + + } + + public IDateTimeExtractor DateExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeParser DurationParser { get; } + + public IDateTimeParser DateParser { get; } + + public IExtractor IntegerExtractor { get; } + + public IExtractor CardinalExtractor { get; } + + public IParser NumberParser { get; } + + public ImmutableDictionary DynastyYearMap { get; } + + public IImmutableDictionary UnitMap { get; } + + public IImmutableDictionary CardinalMap { get; } + + public IImmutableDictionary DayOfMonth { get; } + + IImmutableDictionary ICJKDatePeriodParserConfiguration.MonthOfYear => MonthOfYear; + + public IImmutableDictionary SeasonMap { get; } + + public string DynastyStartYear { get; } + + public string TokenBeforeDate => string.Empty; + + public Regex DynastyYearRegex { get; } + + public Regex SimpleCasesRegex { get; } + + public Regex ThisRegex { get; } + + public Regex NextRegex { get; } + + public Regex LastRegex { get; } + + public Regex YearToYear { get; } + + public Regex YearToYearSuffixRequired { get; } + + public Regex YearRegex { get; } + + public Regex RelativeRegex { get; } + + public Regex RelativeMonthRegex { get; } + + public Regex LaterEarlyPeriodRegex { get; } + + public Regex DatePointWithAgoAndLater { get; } + + public Regex ReferenceDatePeriodRegex { get; } + + public Regex ComplexDatePeriodRegex { get; } + + public Regex DurationRelativeDurationUnitRegex { get; } + + public Regex YearInCJKRegex { get; } + + public Regex MonthToMonth { get; } + + public Regex MonthToMonthSuffixRequired { get; } + + public Regex MonthRegex { get; } + + public Regex YearAndMonth { get; } + + public Regex PureNumYearAndMonth { get; } + + public Regex OneWordPeriodRegex { get; } + + public Regex NumberCombinedWithUnit { get; } + + public Regex PastRegex { get; } + + public Regex FutureRegex { get; } + + public Regex WeekWithWeekDayRangeRegex { get; } + + public Regex UnitRegex { get; } + + public Regex DurationUnitRegex { get; } + + public Regex WeekOfMonthRegex { get; } + + public Regex WeekOfYearRegex { get; } + + public Regex WeekOfDateRegex { get; } + + public Regex MonthOfDateRegex { get; } + + public Regex WhichWeekRegex { get; } + + public Regex FirstLastOfYearRegex { get; } + + public Regex SeasonWithYear { get; } + + public Regex QuarterRegex { get; } + + public Regex DecadeRegex { get; } + + public Regex CenturyRegex { get; } + + public Regex DayToDay { get; } + + public Regex MonthDayRange { get; } + + public Regex DayRegexForPeriod { get; } + + public Regex SimpleYearAndMonth { get; } + + public Regex SpecialMonthRegex { get; } + + public Regex SpecialYearRegex { get; } + + Regex ICJKDatePeriodParserConfiguration.WoMLastRegex => WoMLastRegex; + + Regex ICJKDatePeriodParserConfiguration.WoMPreviousRegex => WoMPreviousRegex; + + Regex ICJKDatePeriodParserConfiguration.WoMNextRegex => WoMNextRegex; + + public int TwoNumYear => int.Parse(DateTimeDefinitions.TwoNumYear, CultureInfo.InvariantCulture); + + public int ToMonthNumber(string monthStr) + { + return MonthOfYear[monthStr] > 12 ? MonthOfYear[monthStr] % 12 : MonthOfYear[monthStr]; + } + + public bool IsMonthOnly(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); + } + + public bool IsWeekend(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); + } + + public bool IsWeekOnly(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); + } + + public bool IsYearOnly(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); + } + + public bool IsThisYear(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.ThisYearTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } + + public bool IsYearToDate(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } + + public bool IsLastYear(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.LastYearTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } + + public bool IsNextYear(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.NextYearTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } + + public bool IsYearAfterNext(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.YearAfterNextTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } + + public bool IsYearBeforeLast(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.YearBeforeLastTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } + + public int GetSwiftMonth(string text) + { + // Current month: 今月 + var value = 0; + + // @TODO move hardcoded values to resources file + + if (text.Equals("来月", StringComparison.Ordinal)) + { + value = 1; + } + else if (text.Equals("前月", StringComparison.Ordinal) || + text.Equals("先月", StringComparison.Ordinal) || + text.Equals("昨月", StringComparison.Ordinal) || + text.Equals("先々月", StringComparison.Ordinal)) + { + value = -1; + } + else if (text.Equals("再来月", StringComparison.Ordinal)) + { + value = 2; + } + + return value; + } + + public int GetSwiftYear(string text) + { + // Current year: 今年 + var value = 0; + + // @TODO move hardcoded values to resources file + + if (text.Equals("来年", StringComparison.Ordinal) || + text.Equals("らいねん", StringComparison.Ordinal)) + { + value = 1; + } + else if (text.Equals("昨年", StringComparison.Ordinal) || + text.Equals("前年", StringComparison.Ordinal)) + { + value = -1; + } + + return value; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanDateTimeParserConfiguration.cs new file mode 100644 index 0000000000..b80d712c2e --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanDateTimeParserConfiguration.cs @@ -0,0 +1,206 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Korean; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanDateTimeParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKDateTimeParserConfiguration + { + public static readonly Regex LunarRegex = new Regex(DateTimeDefinitions.LunarRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LunarHolidayRegex = new Regex(DateTimeDefinitions.LunarHolidayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SimpleAmRegex = new Regex(DateTimeDefinitions.DateTimeSimpleAmRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SimplePmRegex = new Regex(DateTimeDefinitions.DateTimeSimplePmRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public KoreanDateTimeParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + IntegerExtractor = config.IntegerExtractor; + DateExtractor = config.DateExtractor; + TimeExtractor = config.TimeExtractor; + DurationExtractor = config.DurationExtractor; + + DateParser = config.DateParser; + TimeParser = config.TimeParser; + NumberParser = config.NumberParser; + + UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToImmutableDictionary(); + NowRegex = KoreanDateTimeExtractorConfiguration.NowRegex; + TimeOfSpecialDayRegex = KoreanDateTimeExtractorConfiguration.TimeOfSpecialDayRegex; + DateTimePeriodUnitRegex = KoreanDateTimeExtractorConfiguration.DateTimePeriodUnitRegex; + BeforeRegex = KoreanDateTimeExtractorConfiguration.BeforeRegex; + AfterRegex = KoreanDateTimeExtractorConfiguration.AfterRegex; + DurationRelativeDurationUnitRegex = KoreanDateTimeExtractorConfiguration.DurationRelativeDurationUnitRegex; + AgoLaterRegex = KoreanDateTimeExtractorConfiguration.AgoLaterRegex; + } + + public IDateTimeExtractor DateExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser DurationParser { get; } + + public IDateTimeParser TimeParser { get; } + + public IExtractor IntegerExtractor { get; } + + public IParser NumberParser { get; } + + public ImmutableDictionary UnitMap { get; } + + public Regex NowRegex { get; } + + public Regex TimeOfSpecialDayRegex { get; } + + public Regex DateTimePeriodUnitRegex { get; } + + public Regex BeforeRegex { get; } + + public Regex AfterRegex { get; } + + public Regex DurationRelativeDurationUnitRegex { get; } + + public Regex AgoLaterRegex { get; } + + Regex ICJKDateTimeParserConfiguration.LunarRegex => LunarRegex; + + Regex ICJKDateTimeParserConfiguration.LunarHolidayRegex => LunarHolidayRegex; + + Regex ICJKDateTimeParserConfiguration.SimpleAmRegex => SimpleAmRegex; + + Regex ICJKDateTimeParserConfiguration.SimplePmRegex => SimplePmRegex; + + public bool GetMatchedNowTimex(string text, out string timex) + { + var trimmedText = text.Trim(); + + // @TODO move hardcoded values to resources file + if (trimmedText.EndsWith("现在", StringComparison.Ordinal)) + { + timex = "PRESENT_REF"; + } + else if (trimmedText.Equals("刚刚才", StringComparison.Ordinal) || + trimmedText.Equals("刚刚", StringComparison.Ordinal) || + trimmedText.Equals("刚才", StringComparison.Ordinal)) + { + timex = "PAST_REF"; + } + else if (trimmedText.Equals("立刻", StringComparison.Ordinal) || + trimmedText.Equals("马上", StringComparison.Ordinal)) + { + timex = "FUTURE_REF"; + } + else + { + timex = null; + return false; + } + + return true; + } + + public int GetSwiftDay(string text) + { + var value = 0; + + // @TODO move hardcoded values to resources file + if (text.Equals("今天", StringComparison.Ordinal) || + text.Equals("今日", StringComparison.Ordinal) || + text.Equals("最近", StringComparison.Ordinal)) + { + value = 0; + } + else if (text.StartsWith("明", StringComparison.Ordinal)) + { + value = 1; + } + else if (text.StartsWith("昨", StringComparison.Ordinal)) + { + value = -1; + } + else if (text.Equals("大后天", StringComparison.Ordinal) || + text.Equals("大後天", StringComparison.Ordinal)) + { + value = 3; + } + else if (text.Equals("大前天", StringComparison.Ordinal)) + { + value = -3; + } + else if (text.Equals("后天", StringComparison.Ordinal) || + text.Equals("後天", StringComparison.Ordinal)) + { + value = 2; + } + else if (text.Equals("前天", StringComparison.Ordinal)) + { + value = -2; + } + + return value; + } + + public void AdjustByTimeOfDay(string matchStr, ref int hour, ref int swift) + { + // @TODO move hardcoded values to resources file + switch (matchStr) + { + case "今晚": + if (hour < Constants.HalfDayHourCount) + { + hour += Constants.HalfDayHourCount; + } + + break; + case "今早": + case "今晨": + if (hour >= Constants.HalfDayHourCount) + { + hour -= Constants.HalfDayHourCount; + } + + break; + case "明晚": + swift = 1; + if (hour < Constants.HalfDayHourCount) + { + hour += Constants.HalfDayHourCount; + } + + break; + case "明早": + case "明晨": + swift = 1; + if (hour >= Constants.HalfDayHourCount) + { + hour -= Constants.HalfDayHourCount; + } + + break; + case "昨晚": + swift = -1; + if (hour < Constants.HalfDayHourCount) + { + hour += Constants.HalfDayHourCount; + } + + break; + default: + break; + } + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanDateTimePeriodParserConfiguration.cs new file mode 100644 index 0000000000..94bd07f04e --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanDateTimePeriodParserConfiguration.cs @@ -0,0 +1,201 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Globalization; +using System.Text; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Korean; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanDateTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKDateTimePeriodParserConfiguration + { + + public static readonly Regex MORegex = new Regex(DateTimeDefinitions.DateTimePeriodMORegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MIRegex = new Regex(DateTimeDefinitions.DateTimePeriodMIRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AFRegex = new Regex(DateTimeDefinitions.DateTimePeriodAFRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EVRegex = new Regex(DateTimeDefinitions.DateTimePeriodEVRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NIRegex = new Regex(DateTimeDefinitions.DateTimePeriodNIRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public KoreanDateTimePeriodParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = config.CardinalExtractor; + CardinalParser = AgnosticNumberParserFactory.GetParser( + AgnosticNumberParserType.Cardinal, new KoreanNumberParserConfiguration(numConfig)); + + DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; + DateTimeExtractor = config.DateTimeExtractor; + TimeExtractor = config.TimeExtractor; + TimePeriodExtractor = config.TimePeriodExtractor; + DateParser = config.DateParser; + TimeParser = config.TimeParser; + DateTimeParser = config.DateTimeParser; + TimePeriodParser = config.TimePeriodParser; + DurationParser = config.DurationParser; + + SpecificTimeOfDayRegex = KoreanDateTimePeriodExtractorConfiguration.SpecificTimeOfDayRegex; + TimeOfDayRegex = KoreanDateTimePeriodExtractorConfiguration.TimeOfDayRegex; + NextRegex = KoreanDateTimePeriodExtractorConfiguration.NextRegex; + LastRegex = KoreanDateTimePeriodExtractorConfiguration.LastRegex; + PastRegex = KoreanDateTimePeriodExtractorConfiguration.PastRegex; + FutureRegex = KoreanDateTimePeriodExtractorConfiguration.FutureRegex; + WeekDayRegex = KoreanDateTimePeriodExtractorConfiguration.WeekDayRegex; + TimePeriodLeftRegex = KoreanDateTimePeriodExtractorConfiguration.TimePeriodLeftRegex; + UnitRegex = KoreanDateTimePeriodExtractorConfiguration.UnitRegex; + RestOfDateRegex = KoreanDateTimePeriodExtractorConfiguration.RestOfDateRegex; + AmPmDescRegex = KoreanDateTimePeriodExtractorConfiguration.AmPmDescRegex; + UnitMap = config.UnitMap; + } + + public IDateTimeExtractor DateExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IExtractor CardinalExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IParser CardinalParser { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser TimeParser { get; } + + public IDateTimeParser DateTimeParser { get; } + + public IDateTimeParser TimePeriodParser { get; } + + public IDateTimeParser DurationParser { get; } + + public Regex SpecificTimeOfDayRegex { get; } + + public Regex TimeOfDayRegex { get; } + + public Regex NextRegex { get; } + + public Regex LastRegex { get; } + + public Regex PastRegex { get; } + + public Regex FutureRegex { get; } + + public Regex WeekDayRegex { get; } + + public Regex TimePeriodLeftRegex { get; } + + public Regex UnitRegex { get; } + + public Regex RestOfDateRegex { get; } + + public Regex AmPmDescRegex { get; } + + public IImmutableDictionary UnitMap { get; } + + public bool GetMatchedTimeRangeAndSwift(string text, out string todSymbol, out int beginHour, out int endHour, out int endMinute, out int swift) + { + var trimmedText = text.Trim(); + + // @TODO move hardcoded values to resources file + beginHour = 0; + endHour = 0; + endMinute = 0; + swift = 0; + + var tod = string.Empty; + + switch (trimmedText) + { + case "今晚": + swift = 0; + tod = Constants.Evening; + break; + case "今早": + case "今晨": + swift = 0; + tod = Constants.Morning; + break; + case "明晚": + swift = 1; + tod = Constants.Evening; + break; + case "明早": + case "明晨": + swift = 1; + tod = Constants.Morning; + break; + case "昨晚": + swift = -1; + tod = Constants.Evening; + break; + } + + if (MORegex.IsMatch(trimmedText)) + { + tod = Constants.Morning; + } + else if (MIRegex.IsMatch(trimmedText)) + { + tod = Constants.MidDay; + } + else if (AFRegex.IsMatch(trimmedText)) + { + tod = Constants.Afternoon; + } + else if (EVRegex.IsMatch(trimmedText)) + { + tod = Constants.Evening; + } + else if (NIRegex.IsMatch(trimmedText)) + { + tod = Constants.Night; + } + else if (string.IsNullOrEmpty(tod)) + { + todSymbol = null; + return false; + } + + var parseResult = TimexUtility.ResolveTimeOfDay(tod); + todSymbol = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMinute = parseResult.EndMin; + + return true; + } + + public bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMin) + { + return GetMatchedTimeRangeAndSwift(text, out todSymbol, out beginHour, out endHour, out endMin, out int swift); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanDurationParserConfiguration.cs new file mode 100644 index 0000000000..07c6ba3724 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanDurationParserConfiguration.cs @@ -0,0 +1,64 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.NumberWithUnit; +using Microsoft.Recognizers.Text.NumberWithUnit.Korean; +using static Microsoft.Recognizers.Text.DateTime.Korean.KoreanDurationExtractorConfiguration; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanDurationParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKDurationParserConfiguration + { + + public KoreanDurationParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + InternalParser = new NumberWithUnitParser(new DurationParserConfiguration()); + + var durationConfig = new BaseDateTimeOptionsConfiguration(config.Culture, DateTimeOptions.None); + DurationExtractor = new BaseCJKDurationExtractor(new KoreanDurationExtractorConfiguration(durationConfig), false); + + YearRegex = KoreanDurationExtractorConfiguration.YearRegex; + SomeRegex = KoreanDurationExtractorConfiguration.SomeRegex; + MoreOrLessRegex = KoreanDurationExtractorConfiguration.MoreOrLessRegex; + DurationUnitRegex = KoreanDurationExtractorConfiguration.DurationUnitRegex; + AnUnitRegex = KoreanDurationExtractorConfiguration.AnUnitRegex; + DurationConnectorRegex = KoreanDurationExtractorConfiguration.DurationConnectorRegex; + + UnitMap = config.UnitMap; + UnitValueMap = config.UnitValueMap; + } + + public IDateTimeExtractor DurationExtractor { get; } + + public IParser InternalParser { get; } + + public Regex YearRegex { get; } + + public Regex SomeRegex { get; } + + public Regex MoreOrLessRegex { get; } + + public Regex DurationUnitRegex { get; } + + public Regex AnUnitRegex { get; } + + public Regex DurationConnectorRegex { get; } + + public IImmutableDictionary UnitMap { get; } + + public IImmutableDictionary UnitValueMap { get; } + + internal class DurationParserConfiguration : KoreanNumberWithUnitParserConfiguration + { + public DurationParserConfiguration() + : base(new CultureInfo(Text.Culture.Korean)) + { + this.BindDictionary(DurationExtractorConfiguration.DurationSuffixList); + } + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanHolidayParserConfiguration.cs new file mode 100644 index 0000000000..43f2764cc8 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanHolidayParserConfiguration.cs @@ -0,0 +1,227 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Korean; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanHolidayParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKHolidayParserConfiguration + { + // @TODO Move dictionaries and hardcoded terms to resource file + public static readonly Dictionary> FixedHolidaysDict = new Dictionary> + { + { "元旦", NewYear }, + { "元旦节", NewYear }, + { "教师节", TeacherDay }, + { "青年节", YouthDay }, + { "儿童节", ChildrenDay }, + { "妇女节", FemaleDay }, + { "植树节", TreePlantDay }, + { "情人节", LoverDay }, + { "平安夜", ChristmasEve }, + { "圣诞节", ChristmasDay }, + { "新年", NewYear }, + { "愚人节", FoolDay }, + { "五一", LaborDay }, + { "劳动节", LaborDay }, + { "万圣节", HalloweenDay }, + { "中秋节", MidautumnDay }, + { "中秋", MidautumnDay }, + { "春节", SpringDay }, + { "除夕", NewYearEve }, + { "元宵节", LanternDay }, + { "清明节", QingMingDay }, + { "清明", QingMingDay }, + { "端午节", DragonBoatDay }, + { "端午", DragonBoatDay }, + { "国庆节", ChsNationalDay }, + { "建军节", ChsMilBuildDay }, + { "女生节", GirlsDay }, + { "光棍节", SinglesDay }, + { "双十一", SinglesDay }, + { "重阳节", ChongYangDay }, + }; + + public static readonly Dictionary> HolidayFuncDict = new Dictionary + > + { + { "父亲节", GetFathersDayOfYear }, + { "母亲节", GetMothersDayOfYear }, + { "感恩节", GetThanksgivingDayOfYear }, + }; + + public static readonly Dictionary NoFixedTimex = DateTimeDefinitions.HolidayNoFixedTimex; + + public KoreanHolidayParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + IntegerExtractor = config.IntegerExtractor; + NumberParser = config.NumberParser; + + HolidayRegexList = KoreanHolidayExtractorConfiguration.HolidayRegexList; + LunarHolidayRegex = KoreanHolidayExtractorConfiguration.LunarHolidayRegex; + } + + public IExtractor IntegerExtractor { get; } + + public IParser NumberParser { get; } + + Dictionary> ICJKHolidayParserConfiguration.FixedHolidaysDict => FixedHolidaysDict; + + Dictionary> ICJKHolidayParserConfiguration.HolidayFuncDict => HolidayFuncDict; + + Dictionary ICJKHolidayParserConfiguration.NoFixedTimex => NoFixedTimex; + + public IEnumerable HolidayRegexList { get; } + + public Regex LunarHolidayRegex { get; } + + public int GetSwiftYear(string text) + { + // @TODO move hardcoded values to resource file + var trimmedText = text.Trim(); + var swift = -10; + + if (text.EndsWith("去年", StringComparison.Ordinal)) + { + swift = -1; + } + else if (text.EndsWith("明年", StringComparison.Ordinal)) + { + swift = +1; + } + + return swift; + } + + public string SanitizeYearToken(string yearStr) + { + // @TODO move hardcoded values to resource file + if (yearStr.EndsWith("年", StringComparison.Ordinal)) + { + yearStr = yearStr.Substring(0, yearStr.Length - 1); + } + + return yearStr; + } + + private static DateObject NewYear(int year) => new DateObject(year, 1, 1); + + private static DateObject TeacherDay(int year) => new DateObject(year, 9, 10); + + private static DateObject YouthDay(int year) => new DateObject(year, 5, 4); + + private static DateObject ChildrenDay(int year) => new DateObject(year, 6, 1); + + private static DateObject FemaleDay(int year) => new DateObject(year, 3, 8); + + private static DateObject TreePlantDay(int year) => new DateObject(year, 3, 12); + + private static DateObject LoverDay(int year) => new DateObject(year, 2, 14); + + private static DateObject ChristmasEve(int year) => new DateObject(year, 12, 24); + + private static DateObject ChristmasDay(int year) => new DateObject(year, 12, 25); + + private static DateObject FoolDay(int year) => new DateObject(year, 4, 1); + + private static DateObject LaborDay(int year) => new DateObject(year, 5, 1); + + private static DateObject HalloweenDay(int year) => new DateObject(year, 10, 31); + + private static DateObject MidautumnDay(int year) => new DateObject(year, 8, 15); + + private static DateObject SpringDay(int year) => new DateObject(year, 1, 1); + + private static DateObject NewYearEve(int year) => new DateObject(year, 1, 1).AddDays(-1); + + private static DateObject LanternDay(int year) => new DateObject(year, 1, 15); + + private static DateObject QingMingDay(int year) => new DateObject(year, 4, 4); + + private static DateObject DragonBoatDay(int year) => new DateObject(year, 5, 5); + + private static DateObject ChsNationalDay(int year) => new DateObject(year, 10, 1); + + private static DateObject ChsMilBuildDay(int year) => new DateObject(year, 8, 1); + + private static DateObject GirlsDay(int year) => new DateObject(year, 3, 7); + + private static DateObject SinglesDay(int year) => new DateObject(year, 11, 11); + + private static DateObject ChongYangDay(int year) => new DateObject(year, 9, 9); + + private static DateObject GetMothersDayOfYear(int year) + { + return DateObject.MinValue.SafeCreateFromValue(year, 5, (from day in Enumerable.Range(1, 31) + where DateObject.MinValue.SafeCreateFromValue(year, 5, day).DayOfWeek == DayOfWeek.Sunday + select day).ElementAt(1)); + } + + private static DateObject GetFathersDayOfYear(int year) + { + return DateObject.MinValue.SafeCreateFromValue(year, 6, (from day in Enumerable.Range(1, 30) + where DateObject.MinValue.SafeCreateFromValue(year, 6, day).DayOfWeek == DayOfWeek.Sunday + select day).ElementAt(2)); + } + + private static DateObject GetMartinLutherKingDayOfYear(int year) + { + return DateObject.MinValue.SafeCreateFromValue(year, 1, (from day in Enumerable.Range(1, 31) + where DateObject.MinValue.SafeCreateFromValue(year, 1, day).DayOfWeek == DayOfWeek.Monday + select day).ElementAt(2)); + } + + private static DateObject GetWashingtonsBirthdayOfYear(int year) + { + return DateObject.MinValue.SafeCreateFromValue(year, 2, (from day in Enumerable.Range(1, 29) + where DateObject.MinValue.SafeCreateFromValue(year, 2, day).DayOfWeek == DayOfWeek.Monday + select day).ElementAt(2)); + } + + private static DateObject GetCanberraDayOfYear(int year) + { + return DateObject.MinValue.SafeCreateFromValue(year, 3, (from day in Enumerable.Range(1, 31) + where DateObject.MinValue.SafeCreateFromValue(year, 3, day).DayOfWeek == DayOfWeek.Monday + select day).ElementAt(0)); + } + + private static DateObject GetMemorialDayOfYear(int year) + { + return DateObject.MinValue.SafeCreateFromValue(year, 5, (from day in Enumerable.Range(1, 31) + where DateObject.MinValue.SafeCreateFromValue(year, 5, day).DayOfWeek == DayOfWeek.Monday + select day).Last()); + } + + private static DateObject GetLabourDayOfYear(int year) + { + return DateObject.MinValue.SafeCreateFromValue(year, 9, (from day in Enumerable.Range(1, 30) + where DateObject.MinValue.SafeCreateFromValue(year, 9, day).DayOfWeek == DayOfWeek.Monday + select day).ElementAt(0)); + } + + private static DateObject GetColumbusDayOfYear(int year) + { + return DateObject.MinValue.SafeCreateFromValue(year, 10, (from day in Enumerable.Range(1, 31) + where DateObject.MinValue.SafeCreateFromValue(year, 10, day).DayOfWeek == DayOfWeek.Monday + select day).ElementAt(1)); + } + + private static DateObject GetThanksgivingDayOfYear(int year) + { + return DateObject.MinValue.SafeCreateFromValue(year, 11, (from day in Enumerable.Range(1, 30) + where DateObject.MinValue.SafeCreateFromValue(year, 11, day).DayOfWeek == DayOfWeek.Thursday + select day).ElementAt(3)); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanMergedParserConfiguration.cs new file mode 100644 index 0000000000..409e40cbed --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanMergedParserConfiguration.cs @@ -0,0 +1,43 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Korean; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanMergedParserConfiguration : KoreanCommonDateTimeParserConfiguration, ICJKMergedParserConfiguration + { + public KoreanMergedParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + BeforeRegex = KoreanMergedExtractorConfiguration.BeforeRegex; + AfterRegex = KoreanMergedExtractorConfiguration.AfterRegex; + SincePrefixRegex = KoreanMergedExtractorConfiguration.SincePrefixRegex; + SinceSuffixRegex = KoreanMergedExtractorConfiguration.SinceSuffixRegex; + AroundPrefixRegex = KoreanMergedExtractorConfiguration.AroundPrefixRegex; + AroundSuffixRegex = KoreanMergedExtractorConfiguration.AroundSuffixRegex; + EqualRegex = KoreanMergedExtractorConfiguration.EqualRegex; + UntilRegex = KoreanMergedExtractorConfiguration.UntilRegex; + } + + public Regex BeforeRegex { get; } + + public Regex AfterRegex { get; } + + public Regex SincePrefixRegex { get; } + + public Regex SinceSuffixRegex { get; } + + public Regex AroundPrefixRegex { get; } + + public Regex AroundSuffixRegex { get; } + + public Regex UntilRegex { get; } + + public Regex EqualRegex { get; } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanSetParserConfiguration.cs new file mode 100644 index 0000000000..986c5f35ec --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanSetParserConfiguration.cs @@ -0,0 +1,99 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanSetParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKSetParserConfiguration + { + public KoreanSetParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + DurationExtractor = config.DurationExtractor; + TimeExtractor = config.TimeExtractor; + TimePeriodExtractor = config.TimePeriodExtractor; + DateExtractor = config.DateExtractor; + DateTimeExtractor = config.DateTimeExtractor; + + DurationParser = config.DurationParser; + TimeParser = config.TimeParser; + TimePeriodParser = config.TimePeriodParser; + DateParser = config.DateParser; + DateTimeParser = config.DateTimeParser; + + EachPrefixRegex = KoreanSetExtractorConfiguration.EachPrefixRegex; + EachUnitRegex = KoreanSetExtractorConfiguration.EachUnitRegex; + EachDayRegex = KoreanSetExtractorConfiguration.EachDayRegex; + EachDateUnitRegex = KoreanSetExtractorConfiguration.EachDateUnitRegex; + UnitMap = config.UnitMap; + } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IDateTimeExtractor DateExtractor { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeParser DurationParser { get; } + + public IDateTimeParser TimeParser { get; } + + public IDateTimeParser TimePeriodParser { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser DateTimeParser { get; } + + public Regex EachPrefixRegex { get; } + + public Regex EachUnitRegex { get; } + + public Regex EachDayRegex { get; } + + public Regex EachDateUnitRegex { get; } + + public IImmutableDictionary UnitMap { get; } + + public bool GetMatchedUnitTimex(string text, out string timex) + { + var trimmedText = text.Trim(); + + // @TODO move hardcoded values to resources file + if (trimmedText.Equals("天", StringComparison.Ordinal) || + trimmedText.Equals("日", StringComparison.Ordinal)) + { + timex = "P1D"; + } + else if (trimmedText.Equals("周", StringComparison.Ordinal) || + trimmedText.Equals("星期", StringComparison.Ordinal)) + { + timex = "P1W"; + } + else if (trimmedText.Equals("月", StringComparison.Ordinal)) + { + timex = "P1M"; + } + else if (trimmedText.Equals("年", StringComparison.Ordinal)) + { + timex = "P1Y"; + } + else + { + timex = null; + return false; + } + + return true; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanTimeParserConfiguration.cs new file mode 100644 index 0000000000..ca84f28a67 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanTimeParserConfiguration.cs @@ -0,0 +1,43 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; + +using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanTimeParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKTimeParserConfiguration + { + private static TimeFunctions timeFunc = new TimeFunctions + { + NumberDictionary = DateTimeDefinitions.TimeNumberDictionary, + LowBoundDesc = DateTimeDefinitions.TimeLowBoundDesc, + DayDescRegex = KoreanTimeExtractorConfiguration.DayDescRegex, + }; + + private static readonly Dictionary FunctionMap = + new Dictionary + { + { TimeType.DigitTime, timeFunc.HandleDigit }, + { TimeType.CjkTime, timeFunc.HandleKanji }, + { TimeType.LessTime, timeFunc.HandleLess }, + }; + + public KoreanTimeParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + TimeExtractor = config.TimeExtractor; + } + + // public delegate TimeResult TimeFunction(DateTimeExtra extra); + + public IDateTimeExtractor TimeExtractor { get; } + + TimeFunctions ICJKTimeParserConfiguration.TimeFunc => timeFunc; + + Dictionary ICJKTimeParserConfiguration.FunctionMap => FunctionMap; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanTimePeriodParserConfiguration.cs new file mode 100644 index 0000000000..465e476d83 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Parsers/KoreanTimePeriodParserConfiguration.cs @@ -0,0 +1,84 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; + +using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Korean +{ + public class KoreanTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKTimePeriodParserConfiguration + { + private static TimeFunctions timeFunc = new TimeFunctions + { + NumberDictionary = DateTimeDefinitions.TimeNumberDictionary, + LowBoundDesc = DateTimeDefinitions.TimeLowBoundDesc, + DayDescRegex = KoreanTimeExtractorConfiguration.DayDescRegex, + }; + + public KoreanTimePeriodParserConfiguration(ICJKCommonDateTimeParserConfiguration config) + : base(config) + { + TimeExtractor = config.TimeExtractor; + TimeParser = config.TimeParser; + } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeParser TimeParser { get; } + + TimeFunctions ICJKTimePeriodParserConfiguration.TimeFunc => timeFunc; + + public bool GetMatchedTimexRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) + { + var trimmedText = text.Trim(); + beginHour = 0; + endHour = 0; + endMin = 0; + + var timeOfDay = string.Empty; + + if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Morning; + } + else if (DateTimeDefinitions.MidDayTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.MidDay; + } + else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Afternoon; + } + else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Evening; + } + else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Daytime; + } + else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Night; + } + else + { + timex = null; + return false; + } + + var parseResult = TimexUtility.ResolveTimeOfDay(timeOfDay); + timex = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMin = parseResult.EndMin; + + return true; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.csproj b/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.csproj index eb8be9a7c0..e662b0f56f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.csproj +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.csproj @@ -1,13 +1,18 @@  - netstandard2.0;net462;net452;net45 + netstandard2.1;netstandard2.0;net462;net6.0 + 9 false false ../Recognizers-Text.ruleset - + + + true + ..\buildtools\35MSSharedLib1024.snk + true $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + Microsoft + nlp, entity-extraction, parser-library, recognizer, date, time, datetime, numex, timex, netstandard2.0 + Microsoft.Recognizers.Text.DateTime provides robust recognition and resolution of Date and Time expressed in English, Spanish, French, Portuguese, Chinese, + German, Italian, and Turkish. + MIT + https://github.com/Microsoft/Recognizers-Text + images\icon.png + © Microsoft Corporation. All rights reserved. + + - + all runtime; build; native; contentfiles; analyzers - + all runtime; build; native; contentfiles; analyzers - - + + @@ -44,4 +58,5 @@ + diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.nuspec b/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.nuspec index 399c5d9479..eee054326c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.nuspec +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.nuspec @@ -6,24 +6,24 @@ $title$ Microsoft true - Microsoft.Recognizers.Text.DateTime provides robust recognition and resolution of Date and Time expressed in English, Spanish, French, Portuguese, and Chinese. + Microsoft.Recognizers.Text.DateTime provides robust recognition and resolution of Date and Time expressed in English, Spanish, French, Portuguese, Chinese, + German, Italian, and Turkish. MIT https://github.com/Microsoft/Recognizers-Text - http://docs.botframework.com/images/bot_icon.png + images\icon.png © Microsoft Corporation. All rights reserved. nlp entity-extraction parser-library recognizer date time datetime numex timex netstandard2.0 - + - - + diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.xml b/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.xml index a51e52154d..617c00fbf0 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.xml +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.xml @@ -4,11 +4,6 @@ Microsoft.Recognizers.Text.DateTime - - - Types of DurationType. - - Represents a day Period @@ -19,6 +14,11 @@ Represents a week Period + + + Represents a fortnight Period + + Represents a month Period @@ -79,6 +79,16 @@ ExtendedTypes + + + NoProtoCache + + + + + TasksMode, specific functionality that changes default behaviour for business reasons. + + FailFast, mode that aborts extraction/tagging quickly for non-entity cases. May be removed later. @@ -94,11 +104,6 @@ EnablePreview - - - Types of DurationType. - - Date @@ -129,5 +134,37 @@ Represents the time of the date + + Ramadan + + + Eid al-Adha (Feast of the Sacrifice) + + + Eid al-Fitr (Festival of Breaking the Fast) + + + Islamic New Year + + + + None + + + + + NonspecificYear + + + + + NonspecificMonth + + + + + NonspecificDay + + diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Models/DateTimeModel.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Models/DateTimeModel.cs index 91e0f5e077..30a32a191d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Models/DateTimeModel.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Models/DateTimeModel.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using Microsoft.Recognizers.Text.Utilities; @@ -8,6 +11,11 @@ namespace Microsoft.Recognizers.Text.DateTime { public class DateTimeModel : IModel { + + private string culture; + + private string requestedCulture; + public DateTimeModel(IDateTimeParser parser, IDateTimeExtractor extractor) { this.Parser = parser; @@ -16,6 +24,10 @@ public DateTimeModel(IDateTimeParser parser, IDateTimeExtractor extractor) public string ModelTypeName => Constants.MODEL_DATETIME; + public string Culture => this.culture; + + public string RequestedCulture => this.requestedCulture; + protected IDateTimeExtractor Extractor { get; private set; } protected IDateTimeParser Parser { get; private set; } @@ -25,6 +37,7 @@ public List Parse(string query) return this.Parse(query, DateObject.Now); } + [System.Diagnostics.CodeAnalysis.SuppressMessage("Design", "CA1031:Do not catch general exception types", Justification = "By design")] public List Parse(string query, DateObject refTime) { var parsedDateTimes = new List(); @@ -62,6 +75,12 @@ public List Parse(string query, DateObject refTime) return parsedDateTimes.Select(o => GetModelResult(o)).ToList(); } + public void SetCultureInfo(string culture, string requestedCulture = null) + { + this.culture = culture; + this.requestedCulture = requestedCulture; + } + private static string GetParentText(DateTimeParseResult parsedDateTime) { return ((Dictionary)parsedDateTime.Data)[ExtendedModelResult.ParentTextKey].ToString(); @@ -94,5 +113,6 @@ private ModelResult GetModelResult(DateTimeParseResult parsedDateTime) return ret; } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateParser.cs index 3405f360c4..458a251673 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateParser.cs @@ -1,6 +1,10 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; +using System.Globalization; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Utilities; @@ -39,6 +43,11 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject reference) innerResult = ParseImplicitDate(er.Text, referenceDate); } + if (!innerResult.Success && ((config.Options & DateTimeOptions.TasksMode) != 0)) + { + innerResult = ParseTasksModeDurationToDatePattern(er.Text, referenceDate); + } + if (!innerResult.Success) { innerResult = ParseWeekdayOfMonth(er.Text, referenceDate); @@ -49,6 +58,11 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject reference) innerResult = ParseDurationWithAgoAndLater(er.Text, referenceDate); } + if (!innerResult.Success) + { + innerResult = ParseDurationWithDate(er.Text, referenceDate); + } + // NumberWithMonth must be the second last one, because it only need to find a number and a month to get a "success" if (!innerResult.Success) { @@ -61,6 +75,13 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject reference) innerResult = ParseSingleNumber(er.Text, referenceDate); } + // In cases like "Monday two weeks from now", the resolution of "two weeks from now" needs to be shifted + // to correspond to the weekday "Monday". + if (innerResult.Success && er.Metadata != null && er.Metadata.IsDurationDateWithWeekday) + { + innerResult = SwiftResolutionByWeekday(innerResult, er.Text); + } + if (innerResult.Success) { innerResult.FutureResolution = new Dictionary @@ -127,7 +148,7 @@ private static bool EndsWithTerms(string text, IImmutableList terms) foreach (var term in terms) { - if (text.EndsWith(term)) + if (text.EndsWith(term, StringComparison.Ordinal)) { result = true; break; @@ -141,11 +162,14 @@ private static bool EndsWithTerms(string text, IImmutableList terms) private DateTimeResolutionResult ParseBasicRegexMatch(string text, DateObject referenceDate) { var trimmedText = text.Trim(); + foreach (var regex in this.config.DateRegexes) { var offset = 0; string relativeStr = null; + var match = regex.Match(trimmedText); + if (!match.Success) { match = regex.Match(this.config.DateTokenPrefix + trimmedText); @@ -172,6 +196,7 @@ private DateTimeResolutionResult ParseBasicRegexMatch(string text, DateObject re // Value string will be set in Match2Date method var ret = Match2Date(match, referenceDate, relativeStr); + return ret; } } @@ -303,6 +328,10 @@ private DateTimeResolutionResult ParseImplicitDate(string text, DateObject refer { value = referenceDate.Upcoming((DayOfWeek)this.config.DayOfWeek[weekdayStr]); } + else if (config.GetSwiftMonthOrYear(trimmedText) == 2) + { + value = value.AddDays(7); + } ret.Timex = DateTimeFormatUtil.LuisDate(value); ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day); @@ -407,19 +436,31 @@ private DateTimeResolutionResult ParseImplicitDate(string text, DateObject refer ret.Timex = DateTimeFormatUtil.LuisDate(-1, -1, day); - DateObject futureDate; + DateObject futureDate, pastDate; var tryStr = DateTimeFormatUtil.LuisDate(year, month, day); if (DateObject.TryParse(tryStr, out DateObject _)) { futureDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); + pastDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); + + if (futureDate < referenceDate) + { + futureDate = futureDate.AddMonths(+1); + } + + if (pastDate >= referenceDate) + { + pastDate = pastDate.AddMonths(-1); + } } else { futureDate = DateObject.MinValue.SafeCreateFromValue(year, month + 1, day); + pastDate = DateObject.MinValue.SafeCreateFromValue(year, month - 1, day); } ret.FutureValue = futureDate; - ret.PastValue = ret.FutureValue; + ret.PastValue = pastDate; ret.Success = true; return ret; @@ -456,6 +497,13 @@ private DateTimeResolutionResult ParseImplicitDate(string text, DateObject refer match = this.config.WeekDayAndDayRegex.Match(text); if (match.Success) { + // avoid parsing "Monday 3" from "Monday 3 weeks from now" + var afterStr = text.Substring(match.Index + match.Length); + if (config.UnitRegex.MatchBegin(afterStr, trim: true).Success) + { + return ret; + } + int month = referenceDate.Month, year = referenceDate.Year; // Create a extract result which content ordinal string of text @@ -552,6 +600,54 @@ private DateTimeResolutionResult ParseImplicitDate(string text, DateObject refer return ret; } + /* + under tasksmode parse addtitonal Implicit date references under tasksmode. + eg next week will get mapped to same day of next week, + next month will get mapped to starting day of comming month, + next year will get mapped to starting date of coming year. + + Input text : meet me next week (refrence time 01-08-2022) + Tasksmode: next week --> 08-08-2022 datetime type: date + Default mode: next week --> (08-08-2022 - 15-08-2022) datetime type: daterange + */ + private DateTimeResolutionResult ParseTasksModeDurationToDatePattern(string text, DateObject referenceDate) + { + var trimmedText = text.Trim(); + var ret = new DateTimeResolutionResult(); + + var match = this.config.TasksModeDurationToDatePatterns.Match(trimmedText); + if (match.Success) + { + if (match.Groups["week"].Value.Trim() != string.Empty) + { + var value = referenceDate.AddDays(TasksModeConstants.WeekDayCount); + ret.Timex = DateTimeFormatUtil.LuisDate(value); + ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day); + ret.Success = true; + return ret; + } + else if (match.Groups["month"].Value.Trim() != string.Empty) + { + var value = referenceDate.AddMonths(1); + ret.Timex = DateTimeFormatUtil.LuisDate(value.Year, value.Month, 1); + ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, 1); + ret.Success = true; + return ret; + } + else if (match.Groups["year"].Value.Trim() != string.Empty) + { + var value = referenceDate.AddYears(1); + ret.Timex = DateTimeFormatUtil.LuisDate(value.Year, 1, 1); + ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, 1, 1); + ret.Success = true; + return ret; + } + + } + + return ret; + } + // Handle cases like "January first", "twenty-two of August" // Handle cases like "20th of next month" private DateTimeResolutionResult ParseNumberWithMonth(string text, DateObject referenceDate) @@ -563,7 +659,9 @@ private DateTimeResolutionResult ParseNumberWithMonth(string text, DateObject re bool ambiguous = true; var er = this.config.OrdinalExtractor.Extract(trimmedText); - if (er.Count == 0) + + // check if the extraction is empty or a relative ordinal (e.g. "next", "previous") + if (er.Count == 0 || er[0].Metadata.IsOrdinalRelative) { er = this.config.IntegerExtractor.Extract(trimmedText); } @@ -619,8 +717,23 @@ private DateTimeResolutionResult ParseNumberWithMonth(string text, DateObject re var wantedWeekDay = this.config.DayOfWeek[match.Groups["weekday"].Value]; var firstDate = DateObject.MinValue.SafeCreateFromValue(referenceDate.Year, referenceDate.Month, 1); var firstWeekDay = (int)firstDate.DayOfWeek; - var firstWantedWeekDay = firstDate.AddDays(wantedWeekDay > firstWeekDay ? wantedWeekDay - firstWeekDay : wantedWeekDay - firstWeekDay + 7); + var firstWantedWeekDay = firstDate.AddDays(wantedWeekDay >= firstWeekDay ? wantedWeekDay - firstWeekDay : wantedWeekDay - firstWeekDay + 7); var answerDay = firstWantedWeekDay.Day + ((num - 1) * 7); + if ((answerDay < referenceDate.Day) && ((config.Options & DateTimeOptions.TasksMode) != 0)) + { + DateObject nextReferenceDate = referenceDate.AddMonths(1); + month = nextReferenceDate.Month; + firstDate = DateObject.MinValue.SafeCreateFromValue(nextReferenceDate.Year, nextReferenceDate.Month, 1); + firstWeekDay = (int)firstDate.DayOfWeek; + firstWantedWeekDay = firstDate.AddDays(wantedWeekDay >= firstWeekDay ? wantedWeekDay - firstWeekDay : wantedWeekDay - firstWeekDay + 7); + answerDay = firstWantedWeekDay.Day + ((num - 1) * 7); + } + else + { + firstWantedWeekDay = firstDate.AddDays(wantedWeekDay > firstWeekDay ? wantedWeekDay - firstWeekDay : wantedWeekDay - firstWeekDay + 7); + answerDay = firstWantedWeekDay.Day + ((num - 1) * 7); + } + day = answerDay; ambiguous = false; } @@ -718,31 +831,127 @@ private DateTimeResolutionResult ParseDurationWithAgoAndLater(string text, DateO GetSwiftDay); } + // Parse combined patterns Duration + Date, e.g. '3 days before Monday', '4 weeks after January 15th' + private DateTimeResolutionResult ParseDurationWithDate(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var durationRes = config.DurationExtractor.Extract(text, referenceDate); + + foreach (var duration in durationRes) + { + var matches = config.UnitRegex.Matches(duration.Text); + if (matches.Count > 0) + { + var afterStr = text.Substring((int)duration.Start + (int)duration.Length); + + // Check if the Duration entity is followed by "before|from|after" + var connector = config.BeforeAfterRegex.MatchBegin(afterStr, trim: true); + if (connector.Success) + { + // Parse Duration + var pr = config.DurationParser.Parse(duration, referenceDate); + + // Parse Date + if (pr.Value != null) + { + var dateString = afterStr.Substring(connector.Index + connector.Length).Trim(); + var innerResult = ParseBasicRegexMatch(dateString, referenceDate); + if (!innerResult.Success) + { + innerResult = ParseImplicitDate(dateString, referenceDate); + } + + if (!innerResult.Success && ((config.Options & DateTimeOptions.TasksMode) != 0)) + { + innerResult = ParseTasksModeDurationToDatePattern(dateString, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseWeekdayOfMonth(dateString, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseNumberWithMonth(dateString, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseSingleNumber(dateString, referenceDate); + } + + if (!innerResult.Success) + { + var holidayEr = new ExtractResult + { + Start = 0, + Length = dateString.Length, + Text = dateString, + Type = Constants.SYS_DATETIME_DATE, + Data = null, + Metadata = new Metadata { IsHoliday = true }, + }; + innerResult = (DateTimeResolutionResult)config.HolidayParser.Parse(holidayEr, referenceDate).Value; + } + + // Combine parsed results Duration + Date + if (innerResult.Success) + { + var isFuture = connector.Groups["after"].Success ? true : false; + DateObject date = (DateObject)innerResult.FutureValue; + var resultDateTime = DurationParsingUtil.ShiftDateTime(pr.TimexStr, date, future: isFuture); + ret.Timex = $"{DateTimeFormatUtil.LuisDate(resultDateTime)}"; + + ret.FutureValue = ret.PastValue = resultDateTime; + ret.SubDateTimeEntities = new List { pr }; + ret.Success = true; + } + } + } + } + } + + return ret; + } + // Parse a regex match which includes 'day', 'month' and 'year' (optional) group private DateTimeResolutionResult Match2Date(Match match, DateObject referenceDate, string relativeStr) { var ret = new DateTimeResolutionResult(); + int month = 0, day = 0, year = 0; var monthStr = match.Groups["month"].Value; var dayStr = match.Groups["day"].Value; - var yearStr = match.Groups["year"].Value; var weekdayStr = match.Groups["weekday"].Value; - int month = 0, day = 0, year = 0; + var yearStr = match.Groups["year"].Value; + var writtenYear = match.Groups["fullyear"].Value; + var ambiguousCentury = false; if (this.config.MonthOfYear.ContainsKey(monthStr) && this.config.DayOfMonth.ContainsKey(dayStr)) { month = this.config.MonthOfYear[monthStr]; day = this.config.DayOfMonth[dayStr]; - if (!string.IsNullOrEmpty(yearStr)) + + if (!string.IsNullOrEmpty(writtenYear)) { - year = int.Parse(yearStr); + year = this.config.DateExtractor.GetYearFromText(match); + } + else if (!string.IsNullOrEmpty(yearStr)) + { + year = int.Parse(yearStr, CultureInfo.InvariantCulture); if (year < 100 && year >= Constants.MinTwoDigitYearPastNum) { - year += 1900; + year += Constants.BASE_YEAR_PAST_CENTURY; } else if (year >= 0 && year < Constants.MaxTwoDigitYearFutureNum) { - year += 2000; + year += Constants.BASE_YEAR_CURRENT_CENTURY; + } + else if (year >= Constants.MaxTwoDigitYearFutureNum && year < Constants.MinTwoDigitYearPastNum) + { + // Two-digit years in the range [30, 40) are ambiguos + ambiguousCentury = true; } } } @@ -775,23 +984,19 @@ private DateTimeResolutionResult Match2Date(Match match, DateObject referenceDat ret.Timex = DateTimeFormatUtil.LuisDate(year, month, day); } - var futureDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); - var pastDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); - - if (noYear && futureDate < referenceDate && !futureDate.IsDefaultValue()) - { - futureDate = futureDate.AddYears(+1); - } + var futurePastDates = DateContext.GenerateDates(noYear, referenceDate, year, month, day); + ret.FutureValue = futurePastDates.future; + ret.PastValue = futurePastDates.past; + ret.Success = true; - if (noYear && pastDate >= referenceDate && !pastDate.IsDefaultValue()) + // Ambiguous two-digit years are assigned values in both centuries (e.g. 35 -> 1935, 2035) + if (ambiguousCentury) { - pastDate = pastDate.AddYears(-1); + ret.PastValue = futurePastDates.past.AddYears(Constants.BASE_YEAR_PAST_CENTURY); + ret.FutureValue = futurePastDates.future.AddYears(Constants.BASE_YEAR_CURRENT_CENTURY); + ret.Timex = TimexUtility.ModifyAmbiguousCenturyTimex(ret.Timex); } - ret.FutureValue = futureDate; - ret.PastValue = pastDate; - ret.Success = true; - return ret; } @@ -858,7 +1063,7 @@ private DateTimeResolutionResult ParseWeekdayOfMonth(string text, DateObject ref } // Here is a very special case, timeX follow future date - ret.Timex = $@"XXXX-{month.ToString("D2")}-WXX-{weekday}-#{cardinal}"; + ret.Timex = $@"XXXX-{month.ToString("D2", CultureInfo.InvariantCulture)}-WXX-{weekday}-#{cardinal}"; ret.FutureValue = futureDate; ret.PastValue = pastDate; ret.Success = true; @@ -933,5 +1138,52 @@ private void GetYearInAffix(string affix, ref int year, ref bool ambiguous, out } } } + + private DateTimeResolutionResult SwiftResolutionByWeekday(DateTimeResolutionResult ret, string text) + { + var match = config.WeekDayRegex.MatchBegin(text, trim: true); + if (!match.Success) + { + match = config.WeekDayRegex.MatchEnd(text, trim: true); + } + + if (match.Success) + { + DateObject futureValue = (DateObject)ret.FutureValue; + var weekDayNr = (int)futureValue.DayOfWeek; + var extractedWeekDayStr = match.Groups["weekday"].Value; + var extractedWeekDayNr = this.config.DayOfWeek[extractedWeekDayStr]; + if (weekDayNr != extractedWeekDayNr) + { + var diffDay = extractedWeekDayNr - weekDayNr; + if (ret.SubDateTimeEntities.Count > 0) + { + var retDuration = (DateTimeParseResult)ret.SubDateTimeEntities[0]; + if (retDuration.TimexStr.EndsWith("M")) + { + // In cases like "two months/years from now", the returned date cannot be less than 2 months/years from now. + if (futureValue.Day + diffDay < 1) + { + diffDay += 7; + } + } + else if (retDuration.TimexStr.EndsWith("Y")) + { + if (futureValue.DayOfYear + diffDay < 1) + { + diffDay += 7; + } + } + } + + var newFutureValue = futureValue.AddDays(diffDay); + ret.FutureValue = newFutureValue; + ret.PastValue = newFutureValue; + ret.Timex = DateTimeFormatUtil.LuisDate(newFutureValue.Year, newFutureValue.Month, newFutureValue.Day); + } + } + + return ret; + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateParserConfiguration.cs index e8a5595d75..506e86245e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -46,6 +49,8 @@ protected BaseDateParserConfiguration(IDateTimeOptionsConfiguration config) public virtual IDateTimeParser TimePeriodParser { get; protected set; } + public virtual IDateTimeExtractor HolidayExtractor { get; protected set; } + public virtual IDateTimeParser DateTimePeriodParser { get; protected set; } public virtual IDateTimeParser DateTimeAltParser { get; protected set; } @@ -77,5 +82,8 @@ protected BaseDateParserConfiguration(IDateTimeOptionsConfiguration config) public virtual IImmutableDictionary SpecialDecadeCases { get; protected set; } public virtual IDateTimeUtilityConfiguration UtilityConfiguration { get; protected set; } + + public virtual IDateTimeParser HolidayTimeParser { get; protected set; } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDatePeriodParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDatePeriodParser.cs index 2f71413909..f94f322ab1 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDatePeriodParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDatePeriodParser.cs @@ -1,8 +1,12 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.English; using Microsoft.Recognizers.Text.DateTime.Utilities; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -24,6 +28,36 @@ public BaseDatePeriodParser(IDatePeriodParserConfiguration configuration) config = configuration; } + public static bool IsPresent(int swift) + { + return swift == 0; + } + + public static Tuple GetWeekRangeFromDate(DateObject date) + { + var startDate = date.This(DayOfWeek.Monday); + var endDate = inclusiveEndPeriod ? startDate.AddDays(Constants.WeekDayCount - 1) : startDate.AddDays(Constants.WeekDayCount); + return new Tuple(startDate, endDate); + } + + public static Tuple GetMonthRangeFromDate(DateObject date) + { + var startDate = DateObject.MinValue.SafeCreateFromValue(date.Year, date.Month, 1); + DateObject endDate; + if (date.Month < 12) + { + endDate = DateObject.MinValue.SafeCreateFromValue(date.Year, date.Month + 1, 1); + } + else + { + endDate = DateObject.MinValue.SafeCreateFromValue(date.Year + 1, 1, 1); + } + + endDate = inclusiveEndPeriod ? endDate.AddDays(-1) : endDate; + + return new Tuple(startDate, endDate); + } + // @TODO Refactor code to remove the cycle between BaseDatePeriodParser and its config. public static DateContext GetYearContext(ISimpleDatePeriodParserConfiguration config, string startDateStr, string endDateStr, string text) { @@ -182,142 +216,28 @@ public List FilterResults(string query, List date, Match match, bool start) { - DateObject beginDateResult = beginDate; - DateObject endDateResult = endDate; - var isBusinessDay = timex.EndsWith(Constants.TimexBusinessDay); - var businessDayCount = 0; - List dateList = null; - - if (isBusinessDay) - { - businessDayCount = int.Parse(timex.Substring(1, timex.Length - 3)); - } - - if (future) - { - string mod = Constants.AFTER_MOD; - - // For future the beginDate should add 1 first - if (isBusinessDay) - { - beginDateResult = DurationParsingUtil.GetNextBusinessDay(referenceDate); - endDateResult = DurationParsingUtil.GetNthBusinessDay(beginDateResult, businessDayCount - 1, true, out dateList); - endDateResult = endDateResult.AddDays(1); - return new ModAndDateResult(beginDateResult, endDateResult, mod, dateList); - } - else - { - beginDateResult = referenceDate.AddDays(1); - endDateResult = DurationParsingUtil.ShiftDateTime(timex, beginDateResult, true); - return new ModAndDateResult(beginDateResult, endDateResult, mod, null); - } - } - else + DateObject result; + int i = start ? 0 : 1; + if (match.Groups["EndOf"].Captures.Count >= 2 && match.Groups["EndOf"].Captures[i].Length > 0) { - const string mod = Constants.BEFORE_MOD; - - if (isBusinessDay) - { - endDateResult = DurationParsingUtil.GetNextBusinessDay(endDateResult, false); - beginDateResult = DurationParsingUtil.GetNthBusinessDay(endDateResult, businessDayCount - 1, false, out dateList); - endDateResult = endDateResult.AddDays(1); - return new ModAndDateResult(beginDateResult, endDateResult, mod, dateList); - } - else - { - beginDateResult = DurationParsingUtil.ShiftDateTime(timex, endDateResult, false); - return new ModAndDateResult(beginDateResult, endDateResult, mod, null); - } + result = date.Item2; } - } - - private static bool IsPresent(int swift) - { - return swift == 0; - } - - private static Tuple GetWeekRangeFromDate(DateObject date) - { - var startDate = date.This(DayOfWeek.Monday); - var endDate = inclusiveEndPeriod ? startDate.AddDays(Constants.WeekDayCount - 1) : startDate.AddDays(Constants.WeekDayCount); - return new Tuple(startDate, endDate); - } - - private static Tuple GetMonthRangeFromDate(DateObject date) - { - var startDate = DateObject.MinValue.SafeCreateFromValue(date.Year, date.Month, 1); - DateObject endDate; - if (date.Month < 12) + else if (match.Groups["MiddleOf"].Captures.Count >= 2 && match.Groups["MiddleOf"].Captures[i].Length > 0) { - endDate = DateObject.MinValue.SafeCreateFromValue(date.Year, date.Month + 1, 1); + var startDate = date.Item1; + var endDate = date.Item2; + var shift = (int)((endDate - startDate).TotalDays / 2); + result = startDate.AddDays(shift); } else { - endDate = DateObject.MinValue.SafeCreateFromValue(date.Year + 1, 1, 1); - } - - endDate = inclusiveEndPeriod ? endDate.AddDays(-1) : endDate; - - return new Tuple(startDate, endDate); - } - - private static DateObject GetFirstThursday(int year, int month = Constants.InvalidMonth) - { - var targetMonth = month; - - if (month == Constants.InvalidMonth) - { - targetMonth = 1; + result = date.Item1; } - var firstDay = DateObject.MinValue.SafeCreateFromValue(year, targetMonth, 1); - DateObject firstThursday = firstDay.This(DayOfWeek.Thursday); - - // Thursday falls into previous year or previous month - if (firstThursday.Month != targetMonth) - { - firstThursday = firstDay.AddDays(Constants.WeekDayCount); - } - - return firstThursday; - } - - private static DateObject GetLastThursday(int year, int month = Constants.InvalidMonth) - { - var targetMonth = month; - - if (month == Constants.InvalidMonth) - { - targetMonth = 12; - } - - var lastDay = GetLastDay(year, targetMonth); - DateObject lastThursday = lastDay.This(DayOfWeek.Thursday); - - // Thursday falls into next year or next month - if (lastThursday.Month != targetMonth) - { - lastThursday = lastThursday.AddDays(-Constants.WeekDayCount); - } - - return lastThursday; - } - - private static DateObject GetLastDay(int year, int month) - { - month++; - - if (month == 13) - { - year++; - month = 1; - } - - var firstDayOfNextMonth = DateObject.MinValue.SafeCreateFromValue(year, month, 1); - - return firstDayOfNextMonth.AddDays(-1); + return result; } // Process case like "from|between START to|and END" where START/END can be daterange or datepoint @@ -335,24 +255,37 @@ private DateTimeResolutionResult ParseComplexDatePeriod(string text, DateObject var isSpecificDate = false; var isStartByWeek = false; var isEndByWeek = false; + bool isAmbiguousStart = false, isAmbiguousEnd = false; + var ambiguousRes = new DateTimeResolutionResult(); var dateContext = GetYearContext(this.config, match.Groups["start"].Value.Trim(), match.Groups["end"].Value.Trim(), text); var startResolution = ParseSingleTimePoint(match.Groups["start"].Value.Trim(), referenceDate, dateContext); if (startResolution.Success) { - futureBegin = (DateObject)startResolution.FutureValue; - pastBegin = (DateObject)startResolution.PastValue; - isSpecificDate = true; + // Check if the extraction is ambiguous (e.g. "mar" can be resolved to both "March" and "Tuesday" in FR, IT and ES) + if (this.config.AmbiguousPointRangeRegex != null && this.config.AmbiguousPointRangeRegex.IsMatch(match.Groups["start"].Value.Trim())) + { + ambiguousRes = startResolution; + isAmbiguousStart = true; + } + else + { + futureBegin = (DateObject)startResolution.FutureValue; + pastBegin = (DateObject)startResolution.PastValue; + isSpecificDate = true; + } } - else + + if (!startResolution.Success || isAmbiguousStart) { startResolution = ParseBaseDatePeriod(match.Groups["start"].Value.Trim(), referenceDate, dateContext); if (startResolution.Success) { - futureBegin = ((Tuple)startResolution.FutureValue).Item1; - pastBegin = ((Tuple)startResolution.PastValue).Item1; + // When the start group contains modifiers such as 'end of', 'middle of', the begin resolution must be updated accordingly. + futureBegin = ShiftResolution((Tuple)startResolution.FutureValue, match, start: true); + pastBegin = ShiftResolution((Tuple)startResolution.PastValue, match, start: true); if (startResolution.Timex.Contains("-W")) { @@ -367,18 +300,30 @@ private DateTimeResolutionResult ParseComplexDatePeriod(string text, DateObject if (endResolution.Success) { - futureEnd = (DateObject)endResolution.FutureValue; - pastEnd = (DateObject)endResolution.PastValue; - isSpecificDate = true; + // Check if the extraction is ambiguous + if (this.config.AmbiguousPointRangeRegex != null && this.config.AmbiguousPointRangeRegex.IsMatch(match.Groups["end"].Value.Trim())) + { + ambiguousRes = endResolution; + isAmbiguousEnd = true; + } + else + { + futureEnd = (DateObject)endResolution.FutureValue; + pastEnd = (DateObject)endResolution.PastValue; + isSpecificDate = true; + } } - else + + if (!endResolution.Success || isAmbiguousEnd) { endResolution = ParseBaseDatePeriod(match.Groups["end"].Value.Trim(), referenceDate, dateContext); if (endResolution.Success) { - futureEnd = ((Tuple)endResolution.FutureValue).Item1; - pastEnd = ((Tuple)endResolution.PastValue).Item1; + // When the end group contains modifiers such as 'end of', 'middle of', the end resolution must be updated accordingly. + futureEnd = ShiftResolution((Tuple)endResolution.FutureValue, match, start: false); + pastEnd = ShiftResolution((Tuple)endResolution.PastValue, match, start: false); + if (endResolution.Timex.Contains("-W")) { isEndByWeek = true; @@ -388,6 +333,22 @@ private DateTimeResolutionResult ParseComplexDatePeriod(string text, DateObject if (endResolution.Success) { + // When start or end is ambiguous it is better to resolve it to the type of the unambiguous extraction. + // In Spanish, for example, 'de lunes a mar' (from Monday to Tuesday) or 'de enero a mar' (from January to March). + // In the first case 'mar' is resolved as Date (weekday), in the second case it is resolved as DatePeriod (month). + if (isAmbiguousStart && isSpecificDate) + { + startResolution = ambiguousRes; + futureBegin = (DateObject)startResolution.FutureValue; + pastBegin = (DateObject)startResolution.PastValue; + } + else if (isAmbiguousEnd && isSpecificDate) + { + endResolution = ambiguousRes; + futureEnd = (DateObject)endResolution.FutureValue; + pastEnd = (DateObject)endResolution.PastValue; + } + if (futureBegin > futureEnd) { if (dateContext == null || dateContext.IsEmpty()) @@ -427,8 +388,11 @@ private DateTimeResolutionResult ParseComplexDatePeriod(string text, DateObject datePeriodTimexType = DatePeriodTimexType.ByWeek; } - ret.Timex = TimexUtility.GenerateDatePeriodTimex(futureBegin, futureEnd, datePeriodTimexType, pastBegin, pastEnd); + var hasYear = !startResolution.Timex.StartsWith(Constants.TimexFuzzyYear, StringComparison.Ordinal) || + !endResolution.Timex.StartsWith(Constants.TimexFuzzyYear, StringComparison.Ordinal); + // If the year is not specified, the combined range timex will use fuzzy years. + ret.Timex = TimexUtility.GenerateDatePeriodTimex(futureBegin, futureEnd, datePeriodTimexType, pastBegin, pastEnd, hasYear); ret.FutureValue = new Tuple(futureBegin, futureEnd); ret.PastValue = new Tuple(pastBegin, pastEnd); ret.Success = true; @@ -452,6 +416,12 @@ private DateTimeResolutionResult ParseBaseDatePeriod(string text, DateObject ref innerResult = ParseOneWordPeriod(text, referenceDate); } + // Cases like "x weeks/days starting (from) today/12 sep etc." + if (!innerResult.Success) + { + innerResult = ParseStartingWithDuration(text, referenceDate); + } + if (!innerResult.Success) { innerResult = MergeTwoTimePoints(text, referenceDate); @@ -513,6 +483,12 @@ private DateTimeResolutionResult ParseBaseDatePeriod(string text, DateObject ref innerResult = ParseDatePointWithAgoAndLater(text, referenceDate); } + // Cases like "for x weeks/days from today/12 sep etc." + if (!innerResult.Success) + { + innerResult = ParseDatePointWithForPrefix(text, referenceDate); + } + // Parse duration should be at the end since it will extract "the last week" from "the last week of July" if (!innerResult.Success) { @@ -588,9 +564,8 @@ private DateTimeResolutionResult ParseDatePointWithAgoAndLater(string text, Date var isLessThanOrWithIn = false; var isMoreThan = false; - // @TODO move hardcoded English strings to definition // cases like "within 3 days from yesterday/tomorrow" does not make any sense - if (er.Text.Contains("today") || er.Text.Contains("now")) + if (this.config.TodayNowRegex.IsMatch(er.Text)) { MatchWithinNextPrefix(beforeString, isAgo, ref isLessThanOrWithIn, ref isMoreThan); } @@ -607,7 +582,7 @@ private DateTimeResolutionResult ParseDatePointWithAgoAndLater(string text, Date } var pr = this.config.DateParser.Parse(er, referenceDate); - var durationExtractionResult = this.config.DurationExtractor.Extract(er.Text).FirstOrDefault(); + var durationExtractionResult = this.config.DurationExtractor.Extract(er.Text, referenceDate).FirstOrDefault(); if (durationExtractionResult != null) { @@ -658,6 +633,111 @@ private DateTimeResolutionResult ParseDatePointWithAgoAndLater(string text, Date return ret; } + // Only handle cases like "for x weeks/days from today/tomorrow/some day" + private DateTimeResolutionResult ParseDatePointWithForPrefix(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var er = this.config.DateExtractor.Extract(text, referenceDate).FirstOrDefault(); + + if (er != null) + { + var beforeString = text.Substring(0, (int)er.Start); + var isAgo = this.config.AgoRegex.Match(er.Text).Success; + var config = this.config as EnglishDatePeriodParserConfiguration; + + if (!string.IsNullOrEmpty(beforeString) && config != null) + { + var matchFor = config.ForPrefixRegex.Match(beforeString); + + if (matchFor.Success && matchFor.Groups[Constants.ForGroupName].Success) + { + var pr = this.config.DateParser.Parse(er, referenceDate); + var durationExtractionResult = this.config.DurationExtractor.Extract(er.Text, referenceDate).FirstOrDefault(); + + if (durationExtractionResult != null) + { + var duration = this.config.DurationParser.Parse(durationExtractionResult); + var durationInSeconds = (double)((DateTimeResolutionResult)duration.Value).PastValue; + + DateObject startDate; + DateObject endDate; + + if (isAgo) + { + startDate = (DateObject)((DateTimeResolutionResult)pr.Value).PastValue; + endDate = startDate.AddSeconds(durationInSeconds); + } + else + { + endDate = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; + startDate = endDate.AddSeconds(-durationInSeconds); + } + + if (startDate != DateObject.MinValue) + { + var startLuisStr = DateTimeFormatUtil.LuisDate(startDate); + var endLuisStr = DateTimeFormatUtil.LuisDate(endDate); + var durationTimex = ((DateTimeResolutionResult)duration.Value).Timex; + + ret.Timex = $"({startLuisStr},{endLuisStr},{durationTimex})"; + ret.FutureValue = new Tuple(startDate, endDate); + ret.PastValue = new Tuple(startDate, endDate); + ret.Success = true; + } + } + } + } + } + + return ret; + } + + // Only handle cases like "x weeks/days starting (from) today/tomorrow/some day" + private DateTimeResolutionResult ParseStartingWithDuration(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var dateER = this.config.DateExtractor.Extract(text, referenceDate); + var enConfig = this.config as EnglishDatePeriodParserConfiguration; + + if (enConfig != null && enConfig.StartingRegex.Match(text).Success && dateER.Count == 1) + { + var beforeString = text.Substring(0, (int)dateER[0].Start); + + if (!string.IsNullOrEmpty(beforeString) && enConfig.StartingRegex.MatchEnd(beforeString, true).Success) + { + var pr = this.config.DateParser.Parse(dateER[0], referenceDate); + var durationER = this.config.DurationExtractor.Extract(beforeString, referenceDate); + + if (durationER.Count == 1) + { + var duration = this.config.DurationParser.Parse(durationER[0]); + var durationInSeconds = (double)((DateTimeResolutionResult)duration.Value).PastValue; + + DateObject startDate; + DateObject endDate; + + startDate = (DateObject)((DateTimeResolutionResult)pr.Value).PastValue; + endDate = startDate.AddSeconds(durationInSeconds); + + if (startDate != DateObject.MinValue) + { + var startLuisStr = DateTimeFormatUtil.LuisDate(startDate); + var endLuisStr = DateTimeFormatUtil.LuisDate(endDate); + var durationTimex = ((DateTimeResolutionResult)duration.Value).Timex; + + ret.Timex = $"({startLuisStr},{endLuisStr},{durationTimex})"; + ret.FutureValue = new Tuple(startDate, endDate); + ret.PastValue = new Tuple(startDate, endDate); + ret.SubDateTimeEntities = new List { pr, duration }; + ret.Success = true; + } + } + } + } + + return ret; + } + private DateTimeResolutionResult ParseSingleTimePoint(string text, DateObject referenceDate, DateContext dateContext = null) { var ret = new DateTimeResolutionResult(); @@ -687,12 +767,26 @@ private DateTimeResolutionResult ParseSingleTimePoint(string text, DateObject re ret.Success = true; } - if (dateContext != null) + // Expressions like "today", "tomorrow",... should keep their original year + if (dateContext != null && !this.config.SpecialDayRegex.IsMatch(er.Text)) { ret = dateContext.ProcessDateEntityResolution(ret); } } + // Handle expressions with "now" + if (er == null) + { + var nowPr = ParseNowAsDate(text, referenceDate); + if (nowPr.Value != null) + { + ret.Timex = $"({nowPr.TimexStr}"; + ret.FutureValue = (DateObject)((DateTimeResolutionResult)nowPr.Value).FutureValue; + ret.PastValue = (DateObject)((DateTimeResolutionResult)nowPr.Value).PastValue; + ret.Success = true; + } + } + return ret; } @@ -723,9 +817,47 @@ private DateTimeResolutionResult ParseSimpleCases(string text, DateObject refere if (match.Success) { - var days = match.Groups["day"]; - beginDay = this.config.DayOfMonth[days.Captures[0].Value]; - endDay = this.config.DayOfMonth[days.Captures[1].Value]; + var days = match.Groups[Constants.DayGroupName]; + var writtenDay = match.Groups[Constants.OrdinalGroupName]; + if (writtenDay.Captures.Count > 0 && days.Captures[0].Value == writtenDay.Captures[0].Value) + { + // Parse beginDay in written form + var dayMatch = writtenDay.Captures[0]; + var dayEr = new ExtractResult + { + Start = dayMatch.Index, + Length = dayMatch.Length, + Text = dayMatch.Value, + Type = Constants.SYS_NUMBER_ORDINAL, + Metadata = new Metadata { IsOrdinalRelative = false, }, + }; + var dayPr = this.config.NumberParser.Parse(dayEr); + beginDay = (int)(double)dayPr.Value; + } + else + { + beginDay = this.config.DayOfMonth[days.Captures[0].Value]; + } + + if (writtenDay.Captures.Count > 0 && days.Captures[1].Value == writtenDay.Captures[writtenDay.Captures.Count - 1].Value) + { + // Parse endDay in written form + var dayMatch = writtenDay.Captures[writtenDay.Captures.Count - 1]; + var dayEr = new ExtractResult + { + Start = dayMatch.Index, + Length = dayMatch.Length, + Text = dayMatch.Value, + Type = Constants.SYS_NUMBER_ORDINAL, + Metadata = new Metadata { IsOrdinalRelative = false, }, + }; + var dayPr = this.config.NumberParser.Parse(dayEr); + endDay = (int)(double)dayPr.Value; + } + else + { + endDay = this.config.DayOfMonth[days.Captures[1].Value]; + } // parse year year = config.DateExtractor.GetYearFromText(match.Match); @@ -801,26 +933,12 @@ private DateTimeResolutionResult ParseSimpleCases(string text, DateObject refere endLuisStr = DateTimeFormatUtil.LuisDate(year, month, endDay); } - int futureYear = year, pastYear = year; - var startDate = DateObject.MinValue.SafeCreateFromValue(year, month, beginDay); - - if (noYear && startDate < referenceDate) - { - futureYear++; - } - - if (noYear && startDate >= referenceDate) - { - pastYear--; - } + var futurePastBeginDates = DateContext.GenerateDates(noYear, referenceDate, year, month, beginDay); + var futurePastEndDates = DateContext.GenerateDates(noYear, referenceDate, year, month, endDay); ret.Timex = $"({beginLuisStr},{endLuisStr},P{endDay - beginDay}D)"; - ret.FutureValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(futureYear, month, beginDay), - DateObject.MinValue.SafeCreateFromValue(futureYear, month, endDay)); - ret.PastValue = new Tuple( - DateObject.MinValue.SafeCreateFromValue(pastYear, month, beginDay), - DateObject.MinValue.SafeCreateFromValue(pastYear, month, endDay)); + ret.FutureValue = new Tuple(futurePastBeginDates.future, futurePastEndDates.future); + ret.PastValue = new Tuple(futurePastBeginDates.past, futurePastEndDates.past); ret.Success = true; return ret; @@ -914,7 +1032,7 @@ private DateTimeResolutionResult ParseOneWordPeriod(string text, DateObject refe var monthStr = match.Groups["month"].Value; if (this.config.IsYearToDate(trimmedText)) { - ret.Timex = referenceDate.Year.ToString("D4"); + ret.Timex = referenceDate.Year.ToString("D4", CultureInfo.InvariantCulture); ret.FutureValue = ret.PastValue = new Tuple(DateObject.MinValue.SafeCreateFromValue(referenceDate.Year, 1, 1), referenceDate); @@ -924,7 +1042,7 @@ private DateTimeResolutionResult ParseOneWordPeriod(string text, DateObject refe if (this.config.IsMonthToDate(trimmedText)) { - ret.Timex = referenceDate.Year.ToString("D4") + "-" + referenceDate.Month.ToString("D2"); + ret.Timex = referenceDate.Year.ToString("D4", CultureInfo.InvariantCulture) + "-" + referenceDate.Month.ToString("D2", CultureInfo.InvariantCulture); ret.FutureValue = ret.PastValue = new Tuple( @@ -933,6 +1051,16 @@ private DateTimeResolutionResult ParseOneWordPeriod(string text, DateObject refe return ret; } + // Parse expressions "till date", "to date" + if (match.Groups["toDate"].Success) + { + ret.Timex = "PRESENT_REF"; + ret.FutureValue = ret.PastValue = referenceDate; + ret.Mod = Constants.BEFORE_MOD; + ret.Success = true; + return ret; + } + if (!string.IsNullOrEmpty(monthStr)) { swift = this.config.GetSwiftYear(trimmedText); @@ -941,13 +1069,13 @@ private DateTimeResolutionResult ParseOneWordPeriod(string text, DateObject refe if (swift >= -1) { - ret.Timex = (referenceDate.Year + swift).ToString("D4") + "-" + month.ToString("D2"); + ret.Timex = (referenceDate.Year + swift).ToString("D4", CultureInfo.InvariantCulture) + "-" + month.ToString("D2", CultureInfo.InvariantCulture); year = year + swift; futureYear = pastYear = year; } else { - ret.Timex = "XXXX-" + month.ToString("D2"); + ret.Timex = "XXXX-" + month.ToString("D2", CultureInfo.InvariantCulture); if (month < referenceDate.Month) { futureYear++; @@ -963,32 +1091,45 @@ private DateTimeResolutionResult ParseOneWordPeriod(string text, DateObject refe { swift = this.config.GetSwiftDayOrMonth(trimmedText); - if (this.config.IsWeekOnly(trimmedText)) + var isWorkingWeek = match.Groups["business"].Success; + var isFortnight = this.config.IsFortnight(trimmedText); + + if (isWorkingWeek || this.config.IsWeekOnly(trimmedText) || isFortnight) { - var monday = referenceDate.This(DayOfWeek.Monday).AddDays(Constants.WeekDayCount * swift); + var delta = Constants.WeekDayCount * swift; + var endDelta = delta; - ret.Timex = isReferenceDatePeriod ? TimexUtility.GenerateWeekTimex() : TimexUtility.GenerateWeekTimex(monday); - var beginDate = referenceDate.This(DayOfWeek.Monday).AddDays(Constants.WeekDayCount * swift); - var endDate = inclusiveEndPeriod - ? referenceDate.This(DayOfWeek.Sunday).AddDays(Constants.WeekDayCount * swift) - : referenceDate.This(DayOfWeek.Sunday).AddDays(Constants.WeekDayCount * swift).AddDays(1); + if (isFortnight) + { + // One more week + delta *= 2; + endDelta = delta + Constants.WeekDayCount; + } + + var monday = referenceDate.This(DayOfWeek.Monday).AddDays(delta); + var endDay = isWorkingWeek ? DayOfWeek.Friday : DayOfWeek.Sunday; + + var beginDate = referenceDate.This(DayOfWeek.Monday).AddDays(delta); + var endDate = inclusiveEndPeriod ? + referenceDate.This(endDay).AddDays(endDelta) : + referenceDate.This(endDay).AddDays(endDelta).AddDays(1); if (earlyPrefix) { - endDate = inclusiveEndPeriod - ? referenceDate.This(DayOfWeek.Wednesday).AddDays(Constants.WeekDayCount * swift) - : referenceDate.This(DayOfWeek.Wednesday).AddDays(Constants.WeekDayCount * swift).AddDays(1); + endDate = inclusiveEndPeriod ? + referenceDate.This(DayOfWeek.Wednesday).AddDays(endDelta) : + referenceDate.This(DayOfWeek.Wednesday).AddDays(endDelta).AddDays(1); } else if (midPrefix) { - beginDate = referenceDate.This(DayOfWeek.Tuesday).AddDays(Constants.WeekDayCount * swift); - endDate = inclusiveEndPeriod - ? referenceDate.This(DayOfWeek.Friday).AddDays(Constants.WeekDayCount * swift) - : referenceDate.This(DayOfWeek.Friday).AddDays(Constants.WeekDayCount * swift).AddDays(1); + beginDate = referenceDate.This(DayOfWeek.Tuesday).AddDays(delta); + endDate = inclusiveEndPeriod ? + referenceDate.This(DayOfWeek.Friday).AddDays(endDelta) : + referenceDate.This(DayOfWeek.Friday).AddDays(endDelta).AddDays(1); } else if (latePrefix) { - beginDate = referenceDate.This(DayOfWeek.Thursday).AddDays(Constants.WeekDayCount * swift); + beginDate = referenceDate.This(DayOfWeek.Thursday).AddDays(delta); } if (earlierPrefix && swift == 0) @@ -1006,6 +1147,15 @@ private DateTimeResolutionResult ParseOneWordPeriod(string text, DateObject refe } } + if (isFortnight) + { + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDate, endDate, DatePeriodTimexType.ByFortnight); + } + else + { + ret.Timex = isReferenceDatePeriod ? TimexUtility.GenerateWeekTimex() : TimexUtility.GenerateWeekTimex(monday); + } + if (latePrefix && swift != 0) { ret.Mod = Constants.LATE_MOD; @@ -1016,6 +1166,7 @@ private DateTimeResolutionResult ParseOneWordPeriod(string text, DateObject refe new Tuple(beginDate, endDate); ret.Success = true; + return ret; } @@ -1105,6 +1256,45 @@ private DateTimeResolutionResult ParseOneWordPeriod(string text, DateObject refe ret.Success = true; return ret; } + + // Early/mid/late are resolved in this policy to 4 month ranges at the start/middle/end of the year. + else if (!string.IsNullOrEmpty(match.Groups["FourDigitYear"].Value)) + { + var date = referenceDate.AddYears(swift); + year = int.Parse(match.Groups["FourDigitYear"].Value, CultureInfo.InvariantCulture); + + var beginDate = DateObject.MinValue.SafeCreateFromValue(year, 1, 1); + var endDate = inclusiveEndPeriod ? + DateObject.MinValue.SafeCreateFromValue(year, 12, 31) : + DateObject.MinValue.SafeCreateFromValue(year, 12, 31).AddDays(1); + + if (earlyPrefix) + { + endDate = inclusiveEndPeriod ? + DateObject.MinValue.SafeCreateFromValue(year, 4, 30) : + DateObject.MinValue.SafeCreateFromValue(year, 4, 30).AddDays(1); + } + else if (midPrefix) + { + beginDate = DateObject.MinValue.SafeCreateFromValue(year, 5, 1); + endDate = inclusiveEndPeriod ? + DateObject.MinValue.SafeCreateFromValue(year, 8, 31) : + DateObject.MinValue.SafeCreateFromValue(year, 8, 31).AddDays(1); + } + else if (latePrefix) + { + beginDate = DateObject.MinValue.SafeCreateFromValue(year, 9, 1); + } + + ret.Timex = isReferenceDatePeriod ? TimexUtility.GenerateYearTimex() : TimexUtility.GenerateYearTimex(beginDate); + + ret.FutureValue = + ret.PastValue = + new Tuple(beginDate, endDate); + + ret.Success = true; + return ret; + } } } else @@ -1188,8 +1378,12 @@ private DateTimeResolutionResult ParseMonthWithYear(string text, DateObject refe { var monthStr = match.Groups["month"].Value; var orderStr = match.Groups["order"].Value; + var cardinalStr = match.Groups["cardinal"].Value; - var month = this.config.MonthOfYear[monthStr]; + // Get month number from MonthOfYear dictionary if month name is defined (e.g. 'May 2018'), + // otherwise use CardinalMap (e.g. 'third month of 2018'). + var month = match.Groups["month"].Success ? this.config.MonthOfYear[monthStr] : + this.config.IsLastCardinal(cardinalStr) ? 12 : this.config.CardinalMap[cardinalStr]; var year = config.DateExtractor.GetYearFromText(match.Match); @@ -1263,7 +1457,7 @@ private DateTimeResolutionResult ParseYear(string text, DateObject referenceDate DateObject.MinValue.SafeCreateFromValue(endYear, 1, 1).AddDays(-1) : DateObject.MinValue.SafeCreateFromValue(endYear, 1, 1); - ret.Timex = $"({DateTimeFormatUtil.LuisDate(beginDay)},{DateTimeFormatUtil.LuisDate(endDay)},P{endYear - beginYear}Y)"; + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDay, endDay, DatePeriodTimexType.ByYear); ret.FutureValue = ret.PastValue = new Tuple(beginDay, endDay); ret.Success = true; @@ -1392,8 +1586,23 @@ private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject refe return ret; } - pr1 = dateContext.ProcessDateEntityParsingResult(pr1); - pr2 = dateContext.ProcessDateEntityParsingResult(pr2); + // When the case has no specified year, we should sync the future/past year due to invalid date Feb 29th. + if (dateContext.IsEmpty() && (DateContext.IsFeb29th((DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue) + || DateContext.IsFeb29th((DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue))) + { + (pr1, pr2) = dateContext.SyncYear(pr1, pr2); + } + + // Expressions like "today", "tomorrow",... should keep their original year + if (!this.config.SpecialDayRegex.IsMatch(pr1.Text)) + { + pr1 = dateContext.ProcessDateEntityParsingResult(pr1); + } + + if (!this.config.SpecialDayRegex.IsMatch(pr2.Text)) + { + pr2 = dateContext.ProcessDateEntityParsingResult(pr2); + } } ret.SubDateTimeEntities = new List { pr1, pr2 }; @@ -1414,13 +1623,17 @@ private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject refe pastEnd = futureEnd; } - if (!futureEnd.IsDefaultValue() && !futureBegin.IsDefaultValue()) - { - ret.Timex = $"({pr1.TimexStr},{pr2.TimexStr},P{(futureEnd - futureBegin).TotalDays}D)"; - } - else + ret.Timex = TimexUtility.GenerateDatePeriodTimex(futureBegin, futureEnd, DatePeriodTimexType.ByDay, pr1.TimexStr, pr2.TimexStr); + + if (pr1.TimexStr.StartsWith(Constants.TimexFuzzyYear, StringComparison.Ordinal) && + futureBegin.CompareTo(DateObject.MinValue.SafeCreateFromValue(futureBegin.Year, 2, 28)) <= 0 && + futureEnd.CompareTo(DateObject.MinValue.SafeCreateFromValue(futureBegin.Year, 3, 1)) >= 0) { - ret.Timex = $"({pr1.TimexStr},{pr2.TimexStr})"; + // Handle cases like "Feb 28th - March 1st". + // There may be different timexes for FutureValue and PastValue due to the different validity of Feb 29th. + ret.Comment = Constants.Comment_DoubleTimex; + var pastTimex = TimexUtility.GenerateDatePeriodTimex(pastBegin, pastEnd, DatePeriodTimexType.ByDay, pr1.TimexStr, pr2.TimexStr); + ret.Timex = TimexUtility.MergeTimexAlternatives(ret.Timex, pastTimex); } ret.FutureValue = new Tuple(futureBegin, futureEnd); @@ -1469,7 +1682,7 @@ private DateTimeResolutionResult ParseDuration(string text, DateObject reference var restNowSunday = false; var durationErs = config.DurationExtractor.Extract(text, referenceDate); - if (durationErs.Count == 1) + if (durationErs.Count > 0) { var durationPr = config.DurationParser.Parse(durationErs[0]); var beforeStr = text.Substring(0, durationPr.Start ?? 0).Trim(); @@ -1508,21 +1721,23 @@ private DateTimeResolutionResult ParseDuration(string text, DateObject reference if (config.PastRegex.IsMatch(beforeStr) || config.PastRegex.IsMatch(afterStr)) { - modAndDateResult = GetModAndDate(beginDate, endDate, referenceDate, durationResult.Timex, false); + modAndDateResult = ModAndDateResult.GetModAndDate(beginDate, endDate, referenceDate, durationResult.Timex, false); beginDate = modAndDateResult.BeginDate; } // Handle the "within two weeks" case which means from today to the end of next two weeks // Cases like "within 3 days before/after today" is not handled here (4th condition) + var isMatch = false; if (config.WithinNextPrefixRegex.IsExactMatch(beforeStr, trim: true) && DurationParsingUtil.IsDateDuration(durationResult.Timex) && string.IsNullOrEmpty(afterStr)) { - modAndDateResult = GetModAndDate(beginDate, endDate, referenceDate, durationResult.Timex, true); + modAndDateResult = ModAndDateResult.GetModAndDate(beginDate, endDate, referenceDate, durationResult.Timex, true); // In GetModAndDate, this "future" resolution will add one day to beginDate/endDate, // but for the "within" case it should start from the current day. beginDate = modAndDateResult.BeginDate.AddDays(-1); endDate = modAndDateResult.EndDate.AddDays(-1); + isMatch = true; } else if (this.config.CheckBothBeforeAfter) { @@ -1530,7 +1745,7 @@ private DateTimeResolutionResult ParseDuration(string text, DateObject reference if (config.WithinNextPrefixRegex.IsExactMatch(afterStr, trim: true) && DurationParsingUtil.IsDateDuration(durationResult.Timex) && (config.FutureRegex.MatchEnd(beforeStr, trim: true).Success || string.IsNullOrEmpty(beforeStr))) { - modAndDateResult = GetModAndDate(beginDate, endDate, referenceDate, durationResult.Timex, true); + modAndDateResult = ModAndDateResult.GetModAndDate(beginDate, endDate, referenceDate, durationResult.Timex, true); // In GetModAndDate, this "future" resolution will add one day to beginDate/endDate, // but for the "within" case it should start from the current day. @@ -1542,29 +1757,24 @@ private DateTimeResolutionResult ParseDuration(string text, DateObject reference if (config.FutureRegex.IsExactMatch(beforeStr, trim: true)) { - modAndDateResult = GetModAndDate(beginDate, endDate, referenceDate, durationResult.Timex, true); - beginDate = modAndDateResult.BeginDate; - endDate = modAndDateResult.EndDate; - } - else if (config.FutureRegex.IsMatch(afterStr)) - { - modAndDateResult = GetModAndDate(beginDate, endDate, referenceDate, durationResult.Timex, true); + modAndDateResult = ModAndDateResult.GetModAndDate(beginDate, endDate, referenceDate, durationResult.Timex, true); beginDate = modAndDateResult.BeginDate; endDate = modAndDateResult.EndDate; + isMatch = true; } if (config.FutureSuffixRegex.IsMatch(afterStr)) { - modAndDateResult = GetModAndDate(beginDate, endDate, referenceDate, durationResult.Timex, true); + modAndDateResult = ModAndDateResult.GetModAndDate(beginDate, endDate, referenceDate, durationResult.Timex, true); beginDate = modAndDateResult.BeginDate; endDate = modAndDateResult.EndDate; } // Handle the "in two weeks" case which means the second week if (config.InConnectorRegex.IsExactMatch(beforeStr, trim: true) && - !DurationParsingUtil.IsMultipleDuration(durationResult.Timex)) + !DurationParsingUtil.IsMultipleDuration(durationResult.Timex) && !isMatch) { - modAndDateResult = GetModAndDate(beginDate, endDate, referenceDate, durationResult.Timex, true); + modAndDateResult = ModAndDateResult.GetModAndDate(beginDate, endDate, referenceDate, durationResult.Timex, true); // Change the duration value and the beginDate var unit = durationResult.Timex.Substring(durationResult.Timex.Length - 1); @@ -1574,6 +1784,53 @@ private DateTimeResolutionResult ParseDuration(string text, DateObject reference endDate = modAndDateResult.EndDate; } + // Handle cases like "first 2 weeks of 2019", "last 3 months of this year" + var matchBefore = this.config.FirstLastRegex.Match(beforeStr); + if (matchBefore.Success) + { + var matchAfter = this.config.OfYearRegex.Match(afterStr); + + if (matchAfter.Success) + { + // Get year + var year = config.DateExtractor.GetYearFromText(matchAfter); + if (year == Constants.InvalidYear) + { + var orderStr = matchAfter.Groups["order"].Value; + var swift = this.config.GetSwiftYear(orderStr); + if (swift < -1) + { + return ret; + } + + year = referenceDate.Year + swift; + } + + // Get begin/end dates for year + if (durationResult.Timex.EndsWith(Constants.TimexWeek, StringComparison.Ordinal)) + { + // First/last week of the year is calculated according to ISO definition + beginDate = DateObjectExtension.GetFirstThursday(year).This(DayOfWeek.Monday); + endDate = DateObjectExtension.GetLastThursday(year).This(DayOfWeek.Monday).AddDays(7); + } + else + { + beginDate = DateObject.MinValue.SafeCreateFromValue(year, 1, 1); + endDate = DateObject.MinValue.SafeCreateFromValue(year, 12, 31).AddDays(1); + } + + // Shift begin/end dates by duration span + if (matchBefore.Groups[Constants.FirstGroupName].Success) + { + endDate = DurationParsingUtil.ShiftDateTime(durationResult.Timex, beginDate, true); + } + else + { + beginDate = DurationParsingUtil.ShiftDateTime(durationResult.Timex, endDate, false); + } + } + } + if (!string.IsNullOrEmpty(modAndDateResult.Mod)) { ((DateTimeResolutionResult)durationPr.Value).Mod = modAndDateResult.Mod; @@ -1627,8 +1884,8 @@ private DateTimeResolutionResult ParseDuration(string text, DateObject reference { endDate = inclusiveEndPeriod ? endDate.AddDays(-1) : endDate; - ret.Timex = - $"({DateTimeFormatUtil.LuisDate(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)},{durationTimex})"; + // TODO: analyse upper code and use GenerateDatePeriodTimex to create this Timex. + ret.Timex = $"({DateTimeFormatUtil.LuisDate(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)},{durationTimex})"; ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); ret.Success = true; } @@ -1659,7 +1916,9 @@ private DateTimeResolutionResult ParseWeekOfMonth(string text, DateObject refere int month; if (string.IsNullOrEmpty(monthStr)) { - var swift = this.config.GetSwiftDayOrMonth(trimmedText); + var relMonthValue = match.Groups["relmonth"].Value; + var monthText = !string.IsNullOrEmpty(relMonthValue) ? relMonthValue : trimmedText; + var swift = this.config.GetSwiftDayOrMonth(monthText); month = referenceDate.AddMonths(swift).Month; year = referenceDate.AddMonths(swift).Year; @@ -1714,14 +1973,14 @@ private DateTimeResolutionResult ParseWeekOfYear(string text, DateObject referen if (this.config.IsLastCardinal(cardinalStr)) { - targetWeekMonday = GetLastThursday(year).This(DayOfWeek.Monday); + targetWeekMonday = DateObjectExtension.GetLastThursday(year).This(DayOfWeek.Monday); ret.Timex = TimexUtility.GenerateWeekTimex(targetWeekMonday); } else { var weekNum = this.config.CardinalMap[cardinalStr]; - targetWeekMonday = GetFirstThursday(year).This(DayOfWeek.Monday) + targetWeekMonday = DateObjectExtension.GetFirstThursday(year).This(DayOfWeek.Monday) .AddDays(Constants.WeekDayCount * (weekNum - 1)); ret.Timex = TimexUtility.GenerateWeekOfYearTimex(year, weekNum); @@ -1761,7 +2020,7 @@ private DateTimeResolutionResult ParseHalfYear(string text, DateObject reference var swift = this.config.GetSwiftYear(orderStr); if (swift < -1) { - return ret; + swift = 0; } year = referenceDate.Year + swift; @@ -1770,7 +2029,7 @@ private DateTimeResolutionResult ParseHalfYear(string text, DateObject reference int halfNum; if (!string.IsNullOrEmpty(numberStr)) { - halfNum = int.Parse(numberStr); + halfNum = int.Parse(numberStr, CultureInfo.InvariantCulture); } else { @@ -1780,7 +2039,8 @@ private DateTimeResolutionResult ParseHalfYear(string text, DateObject reference var beginDate = DateObject.MinValue.SafeCreateFromValue(year, ((halfNum - 1) * Constants.SemesterMonthCount) + 1, 1); var endDate = DateObject.MinValue.SafeCreateFromValue(year, halfNum * Constants.SemesterMonthCount, 1).AddMonths(1); ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); - ret.Timex = $"({DateTimeFormatUtil.LuisDate(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)},P6M)"; + + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDate, endDate, DatePeriodTimexType.ByMonth); ret.Success = true; return ret; @@ -1822,9 +2082,10 @@ private DateTimeResolutionResult ParseQuarter(string text, DateObject referenceD } int quarterNum; + int numOfQuarters = 0; if (!string.IsNullOrEmpty(numberStr)) { - quarterNum = int.Parse(numberStr); + quarterNum = int.Parse(numberStr, CultureInfo.InvariantCulture); } else if (!string.IsNullOrEmpty(orderQuarterStr)) { @@ -1832,6 +2093,18 @@ private DateTimeResolutionResult ParseQuarter(string text, DateObject referenceD quarterNum = decimal.ToInt16(Math.Ceiling((decimal)month / Constants.TrimesterMonthCount)); var swift = this.config.GetSwiftYear(orderQuarterStr); quarterNum += swift; + var numStr = match.Groups[Constants.NumGroupName].Value; + var er = this.config.IntegerExtractor.Extract(numStr); + if (er.Count == 1) + { + numOfQuarters = Convert.ToInt32((double)(this.config.NumberParser.Parse(er[0]).Value ?? 0)) - 1; + } + + if (numOfQuarters > 0 && swift >= 0) + { + quarterNum += numOfQuarters; + } + if (quarterNum <= 0) { quarterNum += Constants.QuarterCount; @@ -1839,8 +2112,8 @@ private DateTimeResolutionResult ParseQuarter(string text, DateObject referenceD } else if (quarterNum > Constants.QuarterCount) { - quarterNum -= Constants.QuarterCount; - year += 1; + year += quarterNum / Constants.QuarterCount; + quarterNum = quarterNum % Constants.QuarterCount; } } else @@ -1848,7 +2121,7 @@ private DateTimeResolutionResult ParseQuarter(string text, DateObject referenceD quarterNum = this.config.CardinalMap[cardinalStr]; } - var beginDate = DateObject.MinValue.SafeCreateFromValue(year, ((quarterNum - 1) * Constants.TrimesterMonthCount) + 1, 1); + var beginDate = DateObject.MinValue.SafeCreateFromValue(year, ((quarterNum - 1) * Constants.TrimesterMonthCount) + 1, 1).AddMonths(-numOfQuarters * Constants.TrimesterMonthCount); var endDate = DateObject.MinValue.SafeCreateFromValue(year, quarterNum * Constants.TrimesterMonthCount, 1).AddMonths(1); if (noSpecificYear) @@ -1873,13 +2146,15 @@ private DateTimeResolutionResult ParseQuarter(string text, DateObject referenceD { ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); } + + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDate, endDate, DatePeriodTimexType.ByMonth, UnspecificDateTimeTerms.NonspecificYear); } else { ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDate, endDate, DatePeriodTimexType.ByMonth); } - ret.Timex = $"({DateTimeFormatUtil.LuisDate(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)},P3M)"; ret.Success = true; return ret; @@ -1921,7 +2196,7 @@ private DateTimeResolutionResult ParseSeason(string text, DateObject referenceDa year = referenceDate.Year + swift; } - var yearStr = year.ToString("D4"); + var yearStr = year.ToString("D4", CultureInfo.InvariantCulture); ret.Timex = yearStr + "-" + seasonTimex; ret.Success = true; @@ -1994,14 +2269,27 @@ private DateTimeResolutionResult ParseWhichWeek(string text, DateObject referenc if (match.Success) { - var num = int.Parse(match.Groups["number"].ToString()); + var num = int.Parse(match.Groups[Constants.NumberGroupName].ToString(), CultureInfo.InvariantCulture); if (num == 0) { return ret; } - var year = referenceDate.Year; - ret.Timex = year.ToString("D4") + "-W" + num.ToString("D2"); + // cases like "week 23 of 2019", "week 12 of last year" + var year = config.DateExtractor.GetYearFromText(match.Match); + if (year == Constants.InvalidYear) + { + var orderStr = match.Groups[Constants.OrderGroupName].Value; + var swift = this.config.GetSwiftYear(orderStr); + if (swift < -1) + { + swift = 0; + } + + year = referenceDate.Year + swift; + } + + ret.Timex = year.ToString("D4", CultureInfo.InvariantCulture) + "-W" + num.ToString("D2", CultureInfo.InvariantCulture); var firstDay = DateObject.MinValue.SafeCreateFromValue(year, 1, 1); var firstThursday = firstDay.AddDays(DayOfWeek.Thursday - firstDay.DayOfWeek); @@ -2072,13 +2360,13 @@ private DateObject GetMondayOfTargetWeek(string cardinalStr, int month, int year DateObject result; if (config.IsLastCardinal(cardinalStr)) { - var lastThursday = GetLastThursday(year, month); + var lastThursday = DateObjectExtension.GetLastThursday(year, month); result = lastThursday.This(DayOfWeek.Monday); } else { int cardinal = GetWeekNumberForMonth(cardinalStr); - var firstThursday = GetFirstThursday(year, month); + var firstThursday = DateObjectExtension.GetFirstThursday(year, month); result = firstThursday.This(DayOfWeek.Monday) .AddDays(Constants.WeekDayCount * (cardinal - 1)); @@ -2108,7 +2396,6 @@ private DateTimeResolutionResult ParseDecade(string text, DateObject referenceDa var trimmedText = text.Trim(); var match = this.config.DecadeWithCenturyRegex.MatchExact(trimmedText, trim: true); - string beginLuisStr, endLuisStr; if (match.Success) { @@ -2199,23 +2486,7 @@ private DateTimeResolutionResult ParseDecade(string text, DateObject referenceDa // swift = 0 corresponding to the/this decade var totalLastYear = decadeLastYear * Math.Abs(swift == 0 ? 1 : swift); - if (inputCentury) - { - beginLuisStr = DateTimeFormatUtil.LuisDate(beginYear, 1, 1); - endLuisStr = DateTimeFormatUtil.LuisDate(beginYear + totalLastYear, 1, 1); - } - else - { - var beginYearStr = "XX" + decade; - beginLuisStr = DateTimeFormatUtil.LuisDate(-1, 1, 1); - beginLuisStr = beginLuisStr.Replace("XXXX", beginYearStr); - - var endYearStr = "XX" + (decade + totalLastYear); - endLuisStr = DateTimeFormatUtil.LuisDate(-1, 1, 1); - endLuisStr = endLuisStr.Replace("XXXX", endYearStr); - } - - ret.Timex = $"({beginLuisStr},{endLuisStr},P{totalLastYear}Y)"; + ret.Timex = TimexUtility.GenerateDecadeTimex(beginYear, totalLastYear, decade, inputCentury); int futureYear = beginYear, pastYear = beginYear; var startDate = DateObject.MinValue.SafeCreateFromValue(beginYear, 1, 1); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimeAltParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimeAltParser.cs index c319cf605c..776098e07f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimeAltParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimeAltParser.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using DateObject = System.DateTime; @@ -25,7 +28,7 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) var referenceTime = refTime; object value = null; - if (er.Type.Equals(ParserName)) + if (er.Type.Equals(ParserName, StringComparison.Ordinal)) { var innerResult = ParseDateTimeAndTimeAlt(er, referenceTime); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimeParser.cs index 0dba2cf074..7dce1b5c4d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimeParser.cs @@ -1,5 +1,10 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Globalization; + using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -26,9 +31,15 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) var referenceTime = refTime; object value = null; - if (er.Type.Equals(ParserName)) + if (er.Type.Equals(ParserName, StringComparison.Ordinal)) { var innerResult = MergeDateAndTime(er.Text, referenceTime); + + if ((!innerResult.Success) && ((config.Options & DateTimeOptions.TasksMode) != 0)) + { + innerResult = MergeHolidayAndTime(er.Text, referenceTime); + } + if (!innerResult.Success) { innerResult = ParseBasicRegex(er.Text, referenceTime); @@ -85,19 +96,6 @@ public List FilterResults(string query, List 0 && er1.Count == 1) + { + foreach (var num in numErs) + { + var middleBegin = er1[0].Start + er1[0].Length ?? 0; + var middleEnd = num.Start ?? 0; + if (middleBegin > middleEnd) + { + continue; + } + + var middleStr = text.Substring(middleBegin, middleEnd - middleBegin).Trim(); + var match = this.config.DateNumberConnectorRegex.Match(middleStr); + if (string.IsNullOrEmpty(middleStr) || match.Success) + { + num.Type = Constants.SYS_DATETIME_TIME; + er2.Add(num); + hasTimeNumber = true; + } + } + } + + if (!hasTimeNumber) + { + return ret; + } + } + } + + var correctTimeIdx = 0; + while (correctTimeIdx < er2.Count && er2[correctTimeIdx].IsOverlap(er1[0])) + { + correctTimeIdx++; + } + + if (correctTimeIdx >= er2.Count) + { + return ret; + } + + var pr1 = this.config.HolidayTimeParser.Parse(er1[0], referenceTime.Date); + + var pr2 = this.config.TimeParser.Parse(er2[correctTimeIdx], referenceTime); + if (pr1.Value == null || pr2.Value == null) + { + return ret; + } + + var futureDate = (DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue; + var pastDate = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue; + var time = (DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue; + + var hour = time.Hour; + var min = time.Minute; + var sec = time.Second; + + // Handle morning, afternoon + if (this.config.PMTimeRegex.IsMatch(text) && WithinAfternoonHours(hour)) + { + hour += Constants.HalfDayHourCount; + } + else if (this.config.AMTimeRegex.IsMatch(text) && WithinMorningHoursAndNoon(hour, min, sec)) + { + hour -= Constants.HalfDayHourCount; + } + + var timeStr = pr2.TimexStr; + if (timeStr.EndsWith(Constants.Comment_AmPm, StringComparison.Ordinal)) + { + timeStr = timeStr.Substring(0, timeStr.Length - 4); + } + + timeStr = "T" + hour.ToString("D2", CultureInfo.InvariantCulture) + timeStr.Substring(3); + ret.Timex = pr1.TimexStr + timeStr; + + var val = (DateTimeResolutionResult)pr2.Value; + if (hour <= Constants.HalfDayHourCount && !this.config.PMTimeRegex.IsMatch(text) && !this.config.AMTimeRegex.IsMatch(text) && + !string.IsNullOrEmpty(val.Comment)) + { + ret.Comment = Constants.Comment_AmPm; + } + + ret.FutureValue = DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, hour, min, sec); + ret.PastValue = DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, hour, min, sec); + + // Handle case like "on christmas 15:50:06 2016" which year and holiday separated by time. + var timeSuffix = text.Substring(er2[0].Start + er2[0].Length ?? 0); + var matchYear = this.config.YearRegex.Match(timeSuffix); + if (matchYear.Success && ((DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue).Year != ((DateObject)((DateTimeResolutionResult)pr1.Value).PastValue).Year) + { + var year = ((BaseDateExtractor)this.config.DateExtractor).GetYearFromText(matchYear); + var dateSuffix = text.Substring(er1[0].Start + er1[0].Length ?? 0); + var checkYear = this.config.DateExtractor.GetYearFromText(this.config.YearRegex.Match(dateSuffix)); + + if (year >= Constants.MinYearNum && year <= Constants.MaxYearNum && year == checkYear) + { + ret.FutureValue = DateObject.MinValue.SafeCreateFromValue(year, futureDate.Month, futureDate.Day, hour, min, sec); + ret.PastValue = DateObject.MinValue.SafeCreateFromValue(year, pastDate.Month, pastDate.Day, hour, min, sec); + ret.Timex = year + pr1.TimexStr.Substring(4) + timeStr; + } + } + + ret.Success = true; + + // Change the value of time object + pr2.TimexStr = timeStr; + if (!string.IsNullOrEmpty(ret.Comment)) + { + ((DateTimeResolutionResult)pr2.Value).Comment = ret.Comment.Equals(Constants.Comment_AmPm, StringComparison.Ordinal) ? + Constants.Comment_AmPm : string.Empty; + } + + // Add the date and time object in case we want to split them + ret.SubDateTimeEntities = new List { pr1, pr2 }; + + // Add timezone + ret.TimeZoneResolution = ((DateTimeResolutionResult)pr2.Value).TimeZoneResolution; + + return ret; + } + private DateTimeResolutionResult ParseTimeOfToday(string text, DateObject referenceTime) { var ret = new DateTimeResolutionResult(); @@ -315,20 +481,22 @@ private DateTimeResolutionResult ParseTimeOfToday(string text, DateObject refere } else { - hour = int.Parse(hourStr); + hour = int.Parse(hourStr, CultureInfo.InvariantCulture); } - timeStr = "T" + hour.ToString("D2"); + timeStr = "T" + hour.ToString("D2", CultureInfo.InvariantCulture); } else { var ers = this.config.TimeExtractor.Extract(trimmedText, referenceTime); if (ers.Count != 1) { - ers = this.config.TimeExtractor.Extract(this.config.TokenBeforeTime + trimmedText, referenceTime); + var prefixToken = this.config.TokenBeforeTime; + ers = this.config.TimeExtractor.Extract(prefixToken + trimmedText, referenceTime); + if (ers.Count == 1) { - ers[0].Start -= this.config.TokenBeforeTime.Length; + ers[0].Start -= prefixToken.Length; } else { @@ -342,6 +510,9 @@ private DateTimeResolutionResult ParseTimeOfToday(string text, DateObject refere return ret; } + // Add timezone + ret.TimeZoneResolution = ((DateTimeResolutionResult)pr.Value).TimeZoneResolution; + var time = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; hour = time.Hour; @@ -370,7 +541,7 @@ private DateTimeResolutionResult ParseTimeOfToday(string text, DateObject refere timeStr = timeStr.Substring(0, timeStr.Length - 4); } - timeStr = "T" + hour.ToString("D2") + timeStr.Substring(3); + timeStr = "T" + hour.ToString("D2", CultureInfo.InvariantCulture) + timeStr.Substring(3); ret.Timex = DateTimeFormatUtil.FormatDate(date) + timeStr; ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(date.Year, date.Month, date.Day, hour, min, sec); @@ -383,9 +554,8 @@ private DateTimeResolutionResult ParseTimeOfToday(string text, DateObject refere private DateTimeResolutionResult ParseSpecialTimeOfDate(string text, DateObject refDateTime) { - var ret = new DateTimeResolutionResult(); + var ret = ParseUnspecificTimeOfDate(text, refDateTime); - ret = ParseUnspecificTimeOfDate(text, refDateTime); if (ret.Success) { return ret; @@ -405,7 +575,7 @@ private DateTimeResolutionResult ParseSpecialTimeOfDate(string text, DateObject var futureDate = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; var pastDate = (DateObject)((DateTimeResolutionResult)pr.Value).PastValue; - ret = ResolveEndOfDay(pr.TimexStr, futureDate, pastDate); + ret = DateTimeFormatUtil.ResolveEndOfDay(pr.TimexStr, futureDate, pastDate); } return ret; @@ -418,7 +588,7 @@ private DateTimeResolutionResult ParseUnspecificTimeOfDate(string text, DateObje var eod = this.config.UnspecificEndOfRegex.Match(text); if (eod.Success) { - ret = ResolveEndOfDay(DateTimeFormatUtil.FormatDate(refDateTime), refDateTime, refDateTime); + ret = DateTimeFormatUtil.ResolveEndOfDay(DateTimeFormatUtil.FormatDate(refDateTime), refDateTime, refDateTime); } return ret; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimePeriodParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimePeriodParser.cs index 108603304c..fcad359082 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimePeriodParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimePeriodParser.cs @@ -1,7 +1,13 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Globalization; using System.Linq; +using System.Reflection; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.English; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -16,6 +22,8 @@ public BaseDateTimePeriodParser(IDateTimePeriodParserConfiguration configuration Config = configuration; } + protected static TimeSpan RegexTimeOut => DateTimeRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + protected IDateTimePeriodParserConfiguration Config { get; private set; } public ParseResult Parse(ExtractResult result) @@ -28,7 +36,7 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) var referenceTime = refTime; object value = null; - if (er.Type.Equals(ParserName)) + if (er.Type.Equals(ParserName, StringComparison.Ordinal)) { var innerResult = InternalParse(er.Text, referenceTime); @@ -154,23 +162,29 @@ protected DateTimeResolutionResult InternalParse(string entityText, DateObject r if (!innerResult.Success) { - innerResult = ParseDuration(entityText, referenceTime); + innerResult = ParseDateWithPeriodPrefix(entityText, referenceTime); } if (!innerResult.Success) { - innerResult = ParseRelativeUnit(entityText, referenceTime); + // Cases like "today after 2:00pm", "1/1/2015 before 2:00 in the afternoon" + innerResult = ParseDateWithTimePeriodSuffix(entityText, referenceTime); } if (!innerResult.Success) { - innerResult = ParseDateWithPeriodPrefix(entityText, referenceTime); + // Parsing cases like [duration] starting [datetime] + innerResult = ParseStartingWithDuration(entityText, referenceTime); } if (!innerResult.Success) { - // Cases like "today after 2:00pm", "1/1/2015 before 2:00 in the afternoon" - innerResult = ParseDateWithTimePeriodSuffix(entityText, referenceTime); + innerResult = ParseDuration(entityText, referenceTime); + } + + if (!innerResult.Success) + { + innerResult = ParseRelativeUnit(entityText, referenceTime); } return innerResult; @@ -233,9 +247,9 @@ protected virtual DateTimeResolutionResult ParseSpecificTimeOfDay(string text, D // the first 2 hours represent early, the later 2 hours represent late if (hasEarly) { - endHour = beginHour + 2; + endHour = beginHour + Constants.EARLY_LATE_TIME_DELTA; - // Handling special case: night ends with 23:59 due to C# issues. + // Handling special case: night ends at 23:59 due to .NET issues. if (endMin == 59) { endMin = 0; @@ -243,7 +257,7 @@ protected virtual DateTimeResolutionResult ParseSpecificTimeOfDay(string text, D } else if (hasLate) { - beginHour = beginHour + 2; + beginHour += Constants.EARLY_LATE_TIME_DELTA; } if (Config.SpecificTimeOfDayRegex.IsExactMatch(trimmedText, trim: true)) @@ -277,20 +291,26 @@ protected virtual DateTimeResolutionResult ParseSpecificTimeOfDay(string text, D } } + if (!match.Success && ((Config.Options & DateTimeOptions.TasksMode) != 0) + && this.Config.TasksmodeMealTimeofDayRegex != null) + { + match = this.Config.TasksmodeMealTimeofDayRegex.Match(trimmedText); + } + if (match.Success) { var beforeStr = trimmedText.Substring(0, match.Index).Trim(); var afterStr = trimmedText.Substring(match.Index + match.Length).Trim(); // Eliminate time period, if any - var timePeriodErs = this.Config.TimePeriodExtractor.Extract(beforeStr); + var timePeriodErs = this.Config.TimePeriodExtractor.Extract(beforeStr, referenceTime); if (timePeriodErs.Count > 0) { beforeStr = beforeStr.Remove(timePeriodErs[0].Start ?? 0, timePeriodErs[0].Length ?? 0).Trim(); } else { - timePeriodErs = this.Config.TimePeriodExtractor.Extract(afterStr); + timePeriodErs = this.Config.TimePeriodExtractor.Extract(afterStr, referenceTime); if (timePeriodErs.Count > 0) { afterStr = afterStr.Remove(timePeriodErs[0].Start ?? 0, timePeriodErs[0].Length ?? 0).Trim(); @@ -299,6 +319,89 @@ protected virtual DateTimeResolutionResult ParseSpecificTimeOfDay(string text, D var ers = this.Config.DateExtractor.Extract(beforeStr + ' ' + afterStr, referenceTime); + if ((Config.Options & DateTimeOptions.TasksMode) != 0) + { + ers.AddRange(this.Config.HolidayExtractor.Extract(beforeStr + ' ' + afterStr, referenceTime)); + } + + // Consider cases with specific time of day e.g. "between 7 and 9:30 last night" + if (ers.Count == 0) + { + match = Config.SpecificTimeOfDayRegex.Match(trimmedText); + if (match.Success) + { + var matchStr = match.Value; + + // Handle "last", "next" + var swift = this.Config.GetSwiftPrefix(matchStr); + var timeOfDayDate = referenceTime.AddDays(swift).Date; + + var dateTimexStr = DateTimeFormatUtil.FormatDate(timeOfDayDate); + + var futDate = DateObject.MinValue.SafeCreateFromValue(timeOfDayDate.Year, timeOfDayDate.Month, timeOfDayDate.Day, 0, 0, 0); + var pasDate = futDate; + + var timePeriodParseResult = timePeriodErs.Count > 0 ? Config.TimePeriodParser.Parse(timePeriodErs[0]) : null; + var timePeriodResolutionResult = timePeriodErs.Count > 0 ? (DateTimeResolutionResult)timePeriodParseResult.Value : null; + + if (timePeriodResolutionResult == null) + { + return ParsePureNumberCases(text, referenceTime); + } + + var periodTimex = timePeriodResolutionResult.Timex; + + var rangeTimexComponents = TimexUtility.GetRangeTimexComponents(periodTimex); + + if (rangeTimexComponents.IsValid) + { + var beginTimex = TimexUtility.CombineDateAndTimeTimex(dateTimexStr, rangeTimexComponents.BeginTimex); + var endTimex = TimexUtility.CombineDateAndTimeTimex(dateTimexStr, rangeTimexComponents.EndTimex); + + var timePeriodFutureValue = (Tuple)timePeriodResolutionResult.FutureValue; + var beginTime = timePeriodFutureValue.Item1; + var endTime = timePeriodFutureValue.Item2; + var hour1 = beginTime.Hour; + var hour2 = endTime.Hour; + + if (match.Groups["pm"].Success) + { + if (hour1 <= Constants.HalfDayHourCount) + { + hour1 += Constants.HalfDayHourCount; + List timexList = new List(beginTimex.Split('T')); + beginTimex = timexList[0] + 'T' + hour1 + timexList[1].Substring(2); + } + + if (hour2 <= Constants.HalfDayHourCount) + { + hour2 += Constants.HalfDayHourCount; + List timexList = new List(endTimex.Split('T')); + endTimex = timexList[0] + 'T' + hour2 + timexList[1].Substring(2); + } + } + + ret.Timex = TimexUtility.GenerateDateTimePeriodTimex(beginTimex, endTimex, rangeTimexComponents.DurationTimex); + + ret.FutureValue = new Tuple( + DateObject.MinValue.SafeCreateFromValue( + futDate.Year, futDate.Month, futDate.Day, hour1, beginTime.Minute, beginTime.Second), + DateObject.MinValue.SafeCreateFromValue( + futDate.Year, futDate.Month, futDate.Day, hour2, endTime.Minute, endTime.Second)); + + ret.PastValue = new Tuple( + DateObject.MinValue.SafeCreateFromValue( + pasDate.Year, pasDate.Month, pasDate.Day, hour1, beginTime.Minute, beginTime.Second), + DateObject.MinValue.SafeCreateFromValue( + pasDate.Year, pasDate.Month, pasDate.Day, hour2, endTime.Minute, endTime.Second)); + + ret.Success = true; + + return ret; + } + } + } + if (ers.Count == 0 || ers[0].Length < beforeStr.Length) { var valid = false; @@ -316,6 +419,11 @@ protected virtual DateTimeResolutionResult ParseSpecificTimeOfDay(string text, D { ers = this.Config.DateExtractor.Extract(afterStr, referenceTime); + if ((Config.Options & DateTimeOptions.TasksMode) != 0) + { + ers.AddRange(this.Config.HolidayExtractor.Extract(beforeStr + ' ' + afterStr, referenceTime)); + } + if (ers.Count == 0 || ers[0].Length != afterStr.Length) { if (ers.Count > 0 && ers[0].Start + ers[0].Length == afterStr.Length) @@ -372,6 +480,12 @@ protected virtual DateTimeResolutionResult ParseSpecificTimeOfDay(string text, D } var pr = this.Config.DateParser.Parse(ers[0], referenceTime); + + if (pr.Value == null && ((Config.Options & DateTimeOptions.TasksMode) != 0)) + { + pr = this.Config.HolidayTimeParser.Parse(ers[0], referenceTime); + } + var futureDate = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; var pastDate = (DateObject)((DateTimeResolutionResult)pr.Value).PastValue; @@ -381,7 +495,7 @@ protected virtual DateTimeResolutionResult ParseSpecificTimeOfDay(string text, D } else { - ret.Timex = string.Format("({0}T{1},{0}T{2},PT{3}H)", pr.TimexStr, beginHour, endHour, endHour - beginHour); + ret.Timex = string.Format(CultureInfo.InvariantCulture, "({0}T{1},{0}T{2},PT{3}H)", pr.TimexStr, beginHour, endHour, endHour - beginHour); } ret.FutureValue = @@ -404,7 +518,15 @@ protected virtual DateTimeResolutionResult ParseSpecificTimeOfDay(string text, D private bool IsBeforeOrAfterMod(string mod) { - return !string.IsNullOrEmpty(mod) && (mod == Constants.BEFORE_MOD || mod == Constants.AFTER_MOD); + if (!this.Config.CheckBothBeforeAfter) + { + return !string.IsNullOrEmpty(mod) && (mod == Constants.BEFORE_MOD || mod == Constants.AFTER_MOD); + } + else + { + // matches with InclusiveModPrepositions are also parsed here + return !string.IsNullOrEmpty(mod) && (mod == Constants.BEFORE_MOD || mod == Constants.AFTER_MOD || mod == Constants.UNTIL_MOD || mod == Constants.SINCE_MOD); + } } // Cases like "today after 2:00pm", "1/1/2015 before 2:00 in the afternoon" @@ -412,20 +534,42 @@ private DateTimeResolutionResult ParseDateWithTimePeriodSuffix(string text, Date { var ret = new DateTimeResolutionResult(); - var dateEr = this.Config.DateExtractor.Extract(text).FirstOrDefault(); - var timeEr = this.Config.TimeExtractor.Extract(text).FirstOrDefault(); + var dateEr = this.Config.DateExtractor.Extract(text, referenceTime).FirstOrDefault(); + + if (dateEr == null && ((Config.Options & DateTimeOptions.TasksMode) != 0)) + { + dateEr = this.Config.HolidayExtractor.Extract(text, referenceTime).FirstOrDefault(); + } + + var timeEr = this.Config.TimeExtractor.Extract(text, referenceTime).FirstOrDefault(); if (dateEr != null && timeEr != null) { var dateStrEnd = (int)(dateEr.Start + dateEr.Length); + var timeStrEnd = (int)(timeEr.Start + timeEr.Length); if (dateStrEnd < timeEr.Start) { var midStr = text.Substring(dateStrEnd, timeEr.Start.Value - dateStrEnd).Trim(); + var afterStr = text.Substring(timeStrEnd); + + string modStr = GetValidConnectorModForDateAndTimePeriod(midStr, inPrefix: true); - if (IsValidConnectorForDateAndTimePeriod(midStr)) + // check also afterStr + if (string.IsNullOrEmpty(modStr) && this.Config.CheckBothBeforeAfter) + { + modStr = midStr.Length <= 4 ? GetValidConnectorModForDateAndTimePeriod(afterStr, inPrefix: false) : null; + } + + if (!string.IsNullOrEmpty(modStr)) { var datePr = this.Config.DateParser.Parse(dateEr, referenceTime); + + if (((Config.Options & DateTimeOptions.TasksMode) != 0) && (datePr.Value == null)) + { + datePr = this.Config.HolidayTimeParser.Parse(dateEr, referenceTime); + } + var timePr = this.Config.TimeParser.Parse(timeEr, referenceTime); if (datePr != null && timePr != null) @@ -444,7 +588,7 @@ private DateTimeResolutionResult ParseDateWithTimePeriodSuffix(string text, Date ret.PastValue = DateObject.MinValue.SafeCreateFromValue(pastDateValue.Year, pastDateValue.Month, pastDateValue.Day, pastTimeValue.Hour, pastTimeValue.Minute, pastTimeValue.Second); - ret.Mod = this.Config.BeforeRegex.Match(midStr).Success ? Constants.BEFORE_MOD : Constants.AFTER_MOD; + ret.Mod = modStr; ret.SubDateTimeEntities = new List() { datePr, @@ -463,39 +607,77 @@ private DateTimeResolutionResult ParseDateWithTimePeriodSuffix(string text, Date // Cases like "today after 2:00pm", "1/1/2015 before 2:00 in the afternoon" // Valid connector in English for Before include: "before", "no later than", "in advance of", "prior to", "earlier than", "sooner than", "by", "till", "until"... // Valid connector in English for After include: "after", "later than" - private bool IsValidConnectorForDateAndTimePeriod(string text) + private string GetValidConnectorModForDateAndTimePeriod(string text, bool inPrefix) { - var beforeAfterRegexes = new List() + string mod = null; + + // Item1 is the regex to be tested + // Item2 is the mod corresponding to an inclusive match (i.e. containing an InclusiveModPrepositions, e.g. "at or before 3") + // Item3 is the mod corresponding to a non-inclusive match (e.g. "before 3") + var beforeAfterRegexTuples = new List<(Regex, string, string)> { - this.Config.BeforeRegex, - this.Config.AfterRegex, + (this.Config.BeforeRegex, Constants.UNTIL_MOD, Constants.BEFORE_MOD), + (this.Config.AfterRegex, Constants.SINCE_MOD, Constants.AFTER_MOD), }; - foreach (var regex in beforeAfterRegexes) + foreach (var regex in beforeAfterRegexTuples) { - var match = regex.MatchExact(text, trim: true); - + var match = inPrefix ? regex.Item1.MatchExact(text, trim: true) : regex.Item1.MatchBegin(text, trim: true); if (match.Success) { - return true; + mod = inPrefix ? regex.Item3 : (match.Groups[Constants.IncludeGroupName].Success ? regex.Item2 : regex.Item3); + return mod; } } - return false; + return mod; } private DateTimeResolutionResult ParseDateWithPeriodPrefix(string text, DateObject referenceTime) { var ret = new DateTimeResolutionResult(); - var dateResult = this.Config.DateExtractor.Extract(text); + var dateResult = this.Config.DateExtractor.Extract(text, referenceTime); + + if ((Config.Options & DateTimeOptions.TasksMode) != 0) + { + dateResult.AddRange(this.Config.HolidayExtractor.Extract(text, referenceTime)); + } + if (dateResult.Count > 0) { + DateTimeParseResult pr = new DateTimeParseResult(); var beforeString = text.Substring(0, (int)dateResult.Last().Start).TrimEnd(); var match = Config.PrefixDayRegex.Match(beforeString); if (match.Success) { - var pr = this.Config.DateParser.Parse(dateResult.Last(), referenceTime); + pr = this.Config.DateParser.Parse(dateResult.Last(), referenceTime); + + if ((pr.Value == null) && ((Config.Options & DateTimeOptions.TasksMode) != 0)) + { + pr = this.Config.HolidayTimeParser.Parse(dateResult.Last(), referenceTime); + } + } + + // Check also afterString + if (!match.Success && this.Config.CheckBothBeforeAfter) + { + var afterString = text.Substring((int)(dateResult.First().Start + dateResult.First().Length), + text.Length - ((int)(dateResult.First().Start + dateResult.First().Length))).TrimStart(); + match = Config.PrefixDayRegex.Match(afterString); + if (match.Success) + { + pr = this.Config.DateParser.Parse(dateResult.First(), referenceTime); + + if ((pr.Value == null) && ((Config.Options & DateTimeOptions.TasksMode) != 0)) + { + pr = this.Config.HolidayTimeParser.Parse(dateResult.First(), referenceTime); + } + } + } + + if (match.Success) + { if (pr.Value != null) { var startTime = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; @@ -564,8 +746,27 @@ private DateTimeResolutionResult MergeDateWithSingleTimePeriod(string text, Date { var dateResult = this.Config.DateExtractor.Extract(trimmedText.Replace(ers[0].Text, string.Empty), referenceTime); - // check if TokenBeforeDate is null - var dateText = !string.IsNullOrEmpty(Config.TokenBeforeDate) ? trimmedText.Replace(ers[0].Text, string.Empty).Replace(Config.TokenBeforeDate, string.Empty).Trim() : trimmedText.Replace(ers[0].Text, string.Empty).Trim(); + if ((Config.Options & DateTimeOptions.TasksMode) != 0) + { + dateResult.AddRange(this.Config.HolidayExtractor.Extract(trimmedText.Replace(ers[0].Text, string.Empty), referenceTime)); + } + + // Try to add TokenBeforeDate if no result is found because it is not always included in the DateTimePeriod extraction + // (e.g. "I'll leave on the 17 from 2 to 4 pm" -> "the 17 from 2 to 4 pm") + if (dateResult.Count == 0) + { + dateResult = this.Config.DateExtractor.Extract(Config.TokenBeforeDate + trimmedText.Substring(0, (int)ers[0].Start), referenceTime); + + if ((Config.Options & DateTimeOptions.TasksMode) != 0) + { + dateResult.AddRange(this.Config.HolidayExtractor.Extract(Config.TokenBeforeDate + trimmedText.Substring(0, (int)ers[0].Start), referenceTime)); + } + } + + // check if TokenBeforeDate and TokenBeforeTime are null + var dateText = trimmedText.Replace(ers[0].Text, string.Empty).Trim(); + dateText = !string.IsNullOrEmpty(Config.TokenBeforeDate) && dateText.StartsWith(Config.TokenBeforeDate) ? dateText.Replace(Config.TokenBeforeDate, string.Empty).Trim() : dateText; + dateText = !string.IsNullOrEmpty(Config.TokenBeforeTime) ? dateText.Replace(Config.TokenBeforeTime.Trim(), string.Empty).Trim() : dateText; if (this.Config.CheckBothBeforeAfter) { List tokenListBeforeDate = Config.TokenBeforeDate.Split('|').ToList(); @@ -576,7 +777,7 @@ private DateTimeResolutionResult MergeDateWithSingleTimePeriod(string text, Date } // If only one Date is extracted and the Date text equals to the rest part of source text - if (dateResult.Count == 1 && dateText.Equals(dateResult[0].Text)) + if (dateResult.Count == 1 && dateText.Equals(dateResult[0].Text, StringComparison.Ordinal)) { string dateTimex; DateObject futureTime; @@ -584,6 +785,11 @@ private DateTimeResolutionResult MergeDateWithSingleTimePeriod(string text, Date var pr = this.Config.DateParser.Parse(dateResult[0], referenceTime); + if ((pr.Value == null) && ((Config.Options & DateTimeOptions.TasksMode) != 0)) + { + pr = this.Config.HolidayTimeParser.Parse(dateResult[0], referenceTime); + } + if (pr.Value != null) { futureTime = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; @@ -661,6 +867,15 @@ private DateTimeResolutionResult ParsePureNumberCases(string text, DateObject re if (match.Success && (match.Index == 0 || match.Index + match.Length == trimmedText.Length)) { + + // Just because we think we found a time period doesn't mean it is one, it could be the start of a hyphenated date + var hyphenDateMatch = this.Config.HyphenDateRegex.Match(trimmedText); + + if (hyphenDateMatch.Success && hyphenDateMatch.Index >= match.Index && (match.Index + match.Length) <= (hyphenDateMatch.Index + hyphenDateMatch.Length)) + { + return ret; + } + int beginHour, endHour; ret.Comment = ParseTimePeriod(match, out beginHour, out endHour); @@ -669,10 +884,33 @@ private DateTimeResolutionResult ParsePureNumberCases(string text, DateObject re // Parse following date var dateExtractResult = this.Config.DateExtractor.Extract(trimmedText.Replace(match.Value, string.Empty), referenceTime); + if ((Config.Options & DateTimeOptions.TasksMode) != 0) + { + dateExtractResult.AddRange(this.Config.HolidayExtractor.Extract(trimmedText.Replace(match.Value, string.Empty), referenceTime)); + } + + // Try to add TokenBeforeDate if no result is found because it is not always included in the DateTimePeriod extraction + // (e.g. "I'll leave on the 17 from 2 to 4 pm" -> "the 17 from 2 to 4 pm") + if (dateExtractResult.Count == 0) + { + dateExtractResult = this.Config.DateExtractor.Extract(Config.TokenBeforeDate + trimmedText.Substring(0, match.Index), referenceTime); + + if ((Config.Options & DateTimeOptions.TasksMode) != 0) + { + dateExtractResult.AddRange(this.Config.HolidayExtractor.Extract(Config.TokenBeforeDate + trimmedText.Substring(0, match.Index), referenceTime)); + } + } + DateObject futureDate, pastDate; if (dateExtractResult.Count > 0) { var pr = this.Config.DateParser.Parse(dateExtractResult[0], referenceTime); + + if (((Config.Options & DateTimeOptions.TasksMode) != 0) && (pr.Value == null)) + { + pr = this.Config.HolidayTimeParser.Parse(dateExtractResult[0], referenceTime); + } + if (pr.Value != null) { futureDate = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; @@ -688,7 +926,39 @@ private DateTimeResolutionResult ParsePureNumberCases(string text, DateObject re } else { - return ret; + // Consider cases with specific time of day e.g. "between 7 and 9 last night" + match = Config.SpecificTimeOfDayRegex.Match(trimmedText); + if (match.Success) + { + var matchStr = match.Value; + ret.Comment = null; + + // Handle "last", "next" + var swift = this.Config.GetSwiftPrefix(matchStr); + var timeOfDayDate = referenceTime.AddDays(swift).Date; + + dateStr = DateTimeFormatUtil.FormatDate(timeOfDayDate); + + futureDate = DateObject.MinValue.SafeCreateFromValue(timeOfDayDate.Year, timeOfDayDate.Month, timeOfDayDate.Day, 0, 0, 0); + pastDate = futureDate; + + if (match.Groups["pm"].Success) + { + if (beginHour <= Constants.HalfDayHourCount) + { + beginHour += Constants.HalfDayHourCount; + } + + if (endHour <= Constants.HalfDayHourCount) + { + endHour += Constants.HalfDayHourCount; + } + } + } + else + { + return ret; + } } var pastHours = endHour - beginHour; @@ -729,7 +999,7 @@ private string ParseTimePeriod(Match match, out int beginHour, out int endHour) } else { - beginHour = int.Parse(hourStr); + beginHour = int.Parse(hourStr, CultureInfo.InvariantCulture); } hourStr = hourGroup.Captures[1].Value; @@ -740,7 +1010,7 @@ private string ParseTimePeriod(Match match, out int beginHour, out int endHour) } else { - endHour = int.Parse(hourStr); + endHour = int.Parse(hourStr, CultureInfo.InvariantCulture); } // Parse "pm" @@ -752,7 +1022,7 @@ private string ParseTimePeriod(Match match, out int beginHour, out int endHour) if (!string.IsNullOrEmpty(beginDescStr) && !string.IsNullOrEmpty(endDescStr)) { - if (beginDescStr.StartsWith("a")) + if (beginDescStr.StartsWith("a", StringComparison.Ordinal)) { if (beginHour >= Constants.HalfDayHourCount) { @@ -761,7 +1031,7 @@ private string ParseTimePeriod(Match match, out int beginHour, out int endHour) hasAm = true; } - else if (beginDescStr.StartsWith("p")) + else if (beginDescStr.StartsWith("p", StringComparison.Ordinal)) { if (beginHour < Constants.HalfDayHourCount) { @@ -771,7 +1041,7 @@ private string ParseTimePeriod(Match match, out int beginHour, out int endHour) hasPm = true; } - if (!string.IsNullOrEmpty(endDescStr) && endDescStr.StartsWith("a")) + if (!string.IsNullOrEmpty(endDescStr) && endDescStr.StartsWith("a", StringComparison.Ordinal)) { if (endHour >= Constants.HalfDayHourCount) { @@ -780,7 +1050,7 @@ private string ParseTimePeriod(Match match, out int beginHour, out int endHour) hasAm = true; } - else if (endDescStr.StartsWith("p")) + else if (endDescStr.StartsWith("p", StringComparison.Ordinal)) { if (endHour < Constants.HalfDayHourCount) { @@ -792,7 +1062,7 @@ private string ParseTimePeriod(Match match, out int beginHour, out int endHour) } else { - if (!string.IsNullOrEmpty(matchAmStr) || (!string.IsNullOrEmpty(descStr) && descStr.StartsWith("a"))) + if (!string.IsNullOrEmpty(matchAmStr) || (!string.IsNullOrEmpty(descStr) && descStr.StartsWith("a", StringComparison.Ordinal))) { if (beginHour >= Constants.HalfDayHourCount) { @@ -806,7 +1076,7 @@ private string ParseTimePeriod(Match match, out int beginHour, out int endHour) hasAm = true; } - else if (!string.IsNullOrEmpty(matchPmStr) || (!string.IsNullOrEmpty(descStr) && descStr.StartsWith("p"))) + else if (!string.IsNullOrEmpty(matchPmStr) || (!string.IsNullOrEmpty(descStr) && descStr.StartsWith("p", StringComparison.Ordinal))) { if (beginHour < Constants.HalfDayHourCount) { @@ -918,54 +1188,60 @@ private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject refe DateObject pastBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue, pastEnd = (DateObject)((DateTimeResolutionResult)pr2.Value).PastValue; - if (bothHaveDates) + // If one side contains "ampm" while the other doesn't, shift the time appropriately + var ampmStr1 = ((DateTimeResolutionResult)pr1.Value).Comment; + var ampmStr2 = ((DateTimeResolutionResult)pr2.Value).Comment; + if (ampmStr1 is Constants.Comment_AmPm ^ ampmStr2 is Constants.Comment_AmPm) { - if (futureBegin > futureEnd) + if (futureBegin > futureEnd && futureBegin.Date == futureEnd.Date) { - futureBegin = pastBegin; + futureEnd = futureEnd.AddHours(Constants.HalfDayHourCount); } - if (pastEnd < pastBegin) + if (pastBegin > pastEnd && pastBegin.Date == pastEnd.Date) { - pastEnd = futureEnd; + pastEnd = pastEnd.AddHours(Constants.HalfDayHourCount); } } + var leftTimex = pr1.TimexStr; + var rightTimex = pr2.TimexStr; + if (bothHaveDates) { - ret.Timex = $"({pr1.TimexStr},{pr2.TimexStr},PT{Convert.ToInt32((futureEnd - futureBegin).TotalHours)}H)"; + if (futureBegin > futureEnd) + { + futureBegin = pastBegin; + } - // Do nothing + if (pastEnd < pastBegin) + { + pastEnd = futureEnd; + } } else if (beginHasDate) { futureEnd = DateObject.MinValue.SafeCreateFromValue( futureBegin.Year, futureBegin.Month, futureBegin.Day, futureEnd.Hour, futureEnd.Minute, futureEnd.Second); - pastEnd = DateObject.MinValue.SafeCreateFromValue( pastBegin.Year, pastBegin.Month, pastBegin.Day, pastEnd.Hour, pastEnd.Minute, pastEnd.Second); - var dateStr = pr1.TimexStr.Split('T')[0]; - var durationStr = DateTimeFormatUtil.LuisTimeSpan(futureEnd - futureBegin); - ret.Timex = $"({pr1.TimexStr},{dateStr + pr2.TimexStr},{durationStr})"; + rightTimex = TimexUtility.CombineDateTimeTimex(pr2.TimexStr, pr1.TimexStr, futureEnd); } else if (endHasDate) { futureBegin = DateObject.MinValue.SafeCreateFromValue( futureEnd.Year, futureEnd.Month, futureEnd.Day, futureBegin.Hour, futureBegin.Minute, futureBegin.Second); - pastBegin = DateObject.MinValue.SafeCreateFromValue( pastEnd.Year, pastEnd.Month, pastEnd.Day, pastBegin.Hour, pastBegin.Minute, pastBegin.Second); - var dateStr = pr2.TimexStr.Split('T')[0]; - var durationStr = DateTimeFormatUtil.LuisTimeSpan(pastEnd - pastBegin); - ret.Timex = $"({dateStr + pr1.TimexStr},{pr2.TimexStr},{durationStr})"; + leftTimex = TimexUtility.CombineDateTimeTimex(pr1.TimexStr, pr2.TimexStr, pastBegin); } - var ampmStr1 = ((DateTimeResolutionResult)pr1.Value).Comment; - var ampmStr2 = ((DateTimeResolutionResult)pr2.Value).Comment; - if (!string.IsNullOrEmpty(ampmStr1) && ampmStr1.EndsWith(Constants.Comment_AmPm) && - !string.IsNullOrEmpty(ampmStr2) && ampmStr2.EndsWith(Constants.Comment_AmPm)) + ret.Timex = TimexUtility.GenerateDateTimePeriodTimex(leftTimex, rightTimex, futureEnd - futureBegin); + + if (!string.IsNullOrEmpty(ampmStr1) && ampmStr1.EndsWith(Constants.Comment_AmPm, StringComparison.Ordinal) && + !string.IsNullOrEmpty(ampmStr2) && ampmStr2.EndsWith(Constants.Comment_AmPm, StringComparison.Ordinal)) { ret.Comment = Constants.Comment_AmPm; } @@ -1109,6 +1385,51 @@ private DateTimeResolutionResult ParseDuration(string text, DateObject reference return ret; } + private DateTimeResolutionResult ParseStartingWithDuration(string text, DateObject referenceTime) + { + var ret = new DateTimeResolutionResult(); + var datetimeERs = Config.DateTimeExtractor.Extract(text, referenceTime); + var enConfig = Config as EnglishDateTimePeriodParserConfiguration; + + if (enConfig != null && enConfig.StartingRegex.Match(text).Success && datetimeERs.Count == 1) + { + var beforeString = text.Substring(0, (int)datetimeERs[0].Start); + + if (!string.IsNullOrEmpty(beforeString) && enConfig.StartingRegex.MatchEnd(beforeString, true).Success) + { + var pr = Config.DateTimeParser.Parse(datetimeERs[0], referenceTime); + var durationERs = Config.DurationExtractor.Extract(beforeString, referenceTime); + + if (durationERs.Count == 1) + { + var duration = Config.DurationParser.Parse(durationERs[0]); + var durationInSeconds = (double)((DateTimeResolutionResult)duration.Value).PastValue; + + DateObject startDate; + DateObject endDate; + + startDate = (DateObject)((DateTimeResolutionResult)pr.Value).PastValue; + endDate = startDate.AddSeconds(durationInSeconds); + + if (startDate != DateObject.MinValue) + { + var startLuisStr = $"{DateTimeFormatUtil.LuisDate(startDate)}{DateTimeFormatUtil.ShortTime(startDate.Hour, startDate.Minute, startDate.Second)}"; + var endLuisStr = $"{DateTimeFormatUtil.LuisDate(endDate)}{DateTimeFormatUtil.ShortTime(endDate.Hour, endDate.Minute, endDate.Second)}"; + var durationTimex = ((DateTimeResolutionResult)duration.Value).Timex; + + ret.Timex = $"({startLuisStr},{endLuisStr},{durationTimex})"; + ret.FutureValue = new Tuple(startDate, endDate); + ret.PastValue = new Tuple(startDate, endDate); + ret.SubDateTimeEntities = new List { pr, duration }; + ret.Success = true; + } + } + } + } + + return ret; + } + // Parse "last minute", "next hour" private DateTimeResolutionResult ParseRelativeUnit(string text, DateObject referenceTime) { @@ -1136,42 +1457,13 @@ private DateTimeResolutionResult ParseRelativeUnit(string text, DateObject refer DateObject beginTime; var endTime = beginTime = referenceTime; - var sufixPtTimex = string.Empty; if (Config.UnitMap.ContainsKey(srcUnit)) { - switch (unitStr) - { - case "D": - endTime = DateObject.MinValue.SafeCreateFromValue(beginTime.Year, beginTime.Month, beginTime.Day); - endTime = endTime.AddDays(1).AddSeconds(-1); - sufixPtTimex = "PT" + (endTime - beginTime).TotalSeconds + "S"; - break; - case "H": - beginTime = swiftValue > 0 ? beginTime : referenceTime.AddHours(swiftValue); - endTime = swiftValue > 0 ? referenceTime.AddHours(swiftValue) : endTime; - sufixPtTimex = "PT1H"; - break; - case "M": - beginTime = swiftValue > 0 ? beginTime : referenceTime.AddMinutes(swiftValue); - endTime = swiftValue > 0 ? referenceTime.AddMinutes(swiftValue) : endTime; - sufixPtTimex = "PT1M"; - break; - case "S": - beginTime = swiftValue > 0 ? beginTime : referenceTime.AddSeconds(swiftValue); - endTime = swiftValue > 0 ? referenceTime.AddSeconds(swiftValue) : endTime; - sufixPtTimex = "PT1S"; - break; - default: - return ret; - } - - ret.Timex = - $"({DateTimeFormatUtil.LuisDate(beginTime)}T{DateTimeFormatUtil.LuisTime(beginTime)}," + - $"{DateTimeFormatUtil.LuisDate(endTime)}T{DateTimeFormatUtil.LuisTime(endTime)},{sufixPtTimex})"; + ret.Timex = TimexUtility.GenerateRelativeUnitDateTimePeriodTimex(ref beginTime, ref endTime, referenceTime, unitStr, swiftValue); ret.FutureValue = ret.PastValue = new Tuple(beginTime, endTime); - ret.Success = true; + ret.Success = !string.IsNullOrEmpty(ret.Timex); return ret; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDurationParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDurationParser.cs index 4a9c3abf8c..3088611999 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDurationParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDurationParser.cs @@ -1,7 +1,12 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime @@ -17,13 +22,6 @@ public BaseDurationParser(IDurationParserConfiguration configuration) config = configuration; } - public static bool IsLessThanDay(string unit) - { - return unit.Equals("S", StringComparison.Ordinal) || - unit.Equals("M", StringComparison.Ordinal) || - unit.Equals("H", StringComparison.Ordinal); - } - public ParseResult Parse(ExtractResult result) { return this.Parse(result, DateObject.Now); @@ -134,7 +132,7 @@ private DateTimeResolutionResult ParseNumberWithUnit(string text, DateObject ref return ret; } - if ((ret = ParseInexactNumberUnit(text)).Success) + if ((ret = DurationParsingUtil.ParseInexactNumberUnit(text, this.config)).Success) { return ret; } @@ -163,13 +161,27 @@ private DateTimeResolutionResult ParseNumberSpaceUnit(string text) { srcUnit = match.Groups["unit"].Value; suffixStr = match.Groups[Constants.SuffixGroupName].Value; + + // check also beforeStr for "and an half" + if (this.config.CheckBothBeforeAfter && string.IsNullOrEmpty(suffixStr)) + { + noNum = text.Substring(0, (int)ers[0].Start).Trim(); + var prefixMatch = this.config.SuffixAndRegex.Match(noNum); + if (prefixMatch.Success) + { + suffixStr = prefixMatch.Groups[Constants.SuffixGroupName].Value; + } + } } if (match.Success && match.Groups[Constants.BusinessDayGroupName].Success) { var numVal = int.Parse(pr.Value.ToString(), CultureInfo.InvariantCulture); ret.Timex = TimexUtility.GenerateDurationTimex(numVal, Constants.TimexBusinessDay, false); - ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit.Split()[1]]; + + // The line below was containing this.config.UnitValueMap[srcUnit.Split()[1]] + // it was updated to accommodate single word "business day" expressions. + ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit.Split()[srcUnit.Split().Length - 1]]; ret.Success = true; return ret; @@ -177,9 +189,19 @@ private DateTimeResolutionResult ParseNumberSpaceUnit(string text) if (this.config.UnitMap.TryGetValue(srcUnit, out var unitStr)) { - var numVal = double.Parse(pr.Value.ToString(), CultureInfo.InvariantCulture) + ParseNumberWithUnitAndSuffix(suffixStr); + // First try to parse combined expression 'num + suffix' + double numVal; + var combStr = pr.Text + " " + suffixStr; + if (this.config.DoubleNumbers.ContainsKey(combStr)) + { + numVal = ParseNumberWithUnitAndSuffix(combStr); + } + else + { + numVal = double.Parse(pr.Value.ToString(), CultureInfo.InvariantCulture) + ParseNumberWithUnitAndSuffix(suffixStr); + } - ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, IsLessThanDay(unitStr)); + ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; ret.Success = true; return ret; @@ -235,7 +257,7 @@ private DateTimeResolutionResult ParseNumberCombinedUnit(string text) return ret; } - ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, IsLessThanDay(unitStr)); + ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; ret.Success = true; @@ -270,51 +292,17 @@ private DateTimeResolutionResult ParseAnUnit(string text) { var unitStr = this.config.UnitMap[srcUnit]; - ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, IsLessThanDay(unitStr)); + ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; ret.Success = true; } else if (match.Groups[Constants.BusinessDayGroupName].Success) { ret.Timex = TimexUtility.GenerateDurationTimex(numVal, Constants.TimexBusinessDay, false); - ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit.Split()[1]]; - ret.Success = true; - } - } - - return ret; - } - private DateTimeResolutionResult ParseInexactNumberUnit(string text) - { - var ret = new DateTimeResolutionResult(); - - var match = config.InexactNumberUnitRegex.Match(text); - if (match.Success) - { - // set the inexact number "few", "some" to 3 for now - double numVal = match.Groups["NumTwoTerm"].Success ? 2 : 3; - var srcUnit = match.Groups["unit"].Value; - - if (this.config.UnitMap.ContainsKey(srcUnit)) - { - var unitStr = this.config.UnitMap[srcUnit]; - - if (numVal > 1000 && (unitStr.Equals(Constants.TimexYear, StringComparison.Ordinal) || - unitStr.Equals(Constants.TimexMonthFull, StringComparison.Ordinal) || - unitStr.Equals(Constants.TimexWeek, StringComparison.Ordinal))) - { - return ret; - } - - ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, IsLessThanDay(unitStr)); - ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; - ret.Success = true; - } - else if (match.Groups[Constants.BusinessDayGroupName].Success) - { - ret.Timex = TimexUtility.GenerateDurationTimex(numVal, Constants.TimexBusinessDay, false); - ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit.Split()[1]]; + // The line below was containing this.config.UnitValueMap[srcUnit.Split()[1]] + // it was updated to accommodate single word "business day" expressions. + ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit.Split()[srcUnit.Split().Length - 1]]; ret.Success = true; } } @@ -333,13 +321,6 @@ private DateTimeResolutionResult ParseImplicitDuration(string text, DateObject r ret = result; } - // handle "during/for the day/week/month/year" - if ((config.Options & DateTimeOptions.CalendarMode) != 0 && - TryGetResultFromRegex(config.DuringRegex, text, "1", out result)) - { - ret = result; - } - // handle "half day", "half year" if (TryGetResultFromRegex(config.HalfDateUnitRegex, text, "0.5", out result)) { @@ -352,6 +333,41 @@ private DateTimeResolutionResult ParseImplicitDuration(string text, DateObject r ret = result; } + // handle "during/for the day/week/month/year" + if ((config.Options & DateTimeOptions.CalendarMode) != 0 && + TryGetResultFromRegex(config.DuringRegex, text, "1", out result)) + { + ret = result; + } + else + { + // handle cases like "the hour", which are special durations always not in CalendarMode + if ((this.config.Options & DateTimeOptions.CalendarMode) == 0) + { + var regex = this.config.PrefixArticleRegex; + + if (regex != null) + { + var match = RegExpUtility.MatchBegin(regex, text, false); + if (match.Success) + { + var srcUnit = text.Substring(match.Length); + if (this.config.UnitValueMap.ContainsKey(srcUnit)) + { + var numStr = "1"; + var unitStr = this.config.UnitMap[srcUnit]; + var numVal = double.Parse(numStr, CultureInfo.InvariantCulture); + + ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); + ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; + ret.Success = true; + } + } + } + } + + } + return ret; } @@ -367,7 +383,7 @@ private bool TryGetResultFromRegex(Regex regex, string text, string numStr, out { var unitStr = this.config.UnitMap[srcUnit]; var numVal = double.Parse(numStr, CultureInfo.InvariantCulture); - ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, IsLessThanDay(unitStr)); + ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[srcUnit]; ret.Success = true; } @@ -382,7 +398,36 @@ private DateTimeResolutionResult ParseMergedDuration(string text, DateObject ref var durationExtractor = this.config.DurationExtractor; // DurationExtractor without parameter will not extract merged duration - var ers = durationExtractor.Extract(text); + + // TrimStart() was added to address a bug with french duration expression "depuis ans" + // for which the basecase of the recursive call (i.e., if(ers.Count <= 1)) + // would never be reached in which case the stack would overflow. + // The statement if(minStart){...} is meant to find the isolated unit as explained in + // the below comment. However, if there is whitespace before the extacted entity + // (as in " ans") the minStart will be greater than 1 and the Followed Unit regex + // keeps on matching with "ans" and it adds it to ers and it always has + // more than one item in it, hence the recursion never ends. + var ers = durationExtractor.Extract(text.TrimStart(), referenceTime); + + // If the duration extractions do not start at 0, check if the input starts with an isolated unit. + // This happens for example with patterns like "next week and 3 days" where "next" is not part of the extraction. + var minStart = ers.Min(er => er.Start); + if (minStart > 0) + { + var match = config.FollowedUnit.Match(text); + if (match.Success) + { + var er = new ExtractResult + { + Start = match.Index, + Length = match.Length, + Text = match.Value, + Type = ParserName, + Data = null, + }; + ers.Insert(0, er); + } + } // only handle merged duration cases like "1 month 21 days" if (ers.Count <= 1) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseHolidayParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseHolidayParser.cs index 4bb64069d6..5786517dcb 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseHolidayParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseHolidayParser.cs @@ -1,5 +1,9 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Globalization; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Utilities; @@ -11,6 +15,8 @@ public class BaseHolidayParser : IDateTimeParser { public static readonly string ParserName = Constants.SYS_DATETIME_DATE; // "Date" + private static bool inclusiveEndPeriod = false; + private readonly IHolidayParserConfiguration config; public BaseHolidayParser(IHolidayParserConfiguration config) @@ -24,13 +30,31 @@ public ParseResult Parse(ExtractResult result) } public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) + { + if (er.Metadata?.IsHolidayWeekend ?? false) + { + return ParseHolidayWeekend(er, refDate); + } + else + { + return ParseSingleDate(er, refDate); + } + } + + public List FilterResults(string query, List candidateResults) + { + return candidateResults; + } + + // This will parse a holiday to the date of a single day + private DateTimeParseResult ParseSingleDate(ExtractResult er, DateObject refDate) { var referenceDate = refDate; object value = null; if (er.Type.Equals(ParserName, StringComparison.Ordinal)) { - var innerResult = ParseHolidayRegexMatch(er.Text, referenceDate); + var innerResult = ParseHolidayRegexMatch(er, referenceDate); if (innerResult.Success) { @@ -61,21 +85,147 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) return ret; } - public List FilterResults(string query, List candidateResults) + // This will parse to a date ranging between the holiday and the closest weekend + // cases: "Thanksgiving weekend", "weekend of Halloween" + private DateTimeParseResult ParseHolidayWeekend(ExtractResult er, DateObject referenceDate) { - return candidateResults; + var dateTimeRes = new DateTimeResolutionResult(); + + if (!string.IsNullOrEmpty(er.Metadata?.HolidayName)) + { + var holidayName = er.Metadata.HolidayName; + + // resolve holiday + var holidayEr = new ExtractResult + { + Start = 0, + Length = holidayName.Length, + Text = holidayName, + Type = Constants.SYS_DATETIME_DATE, + Data = null, + Metadata = new Metadata { IsHoliday = true }, + }; + var result = (DateTimeResolutionResult)this.Parse(holidayEr, referenceDate).Value; + + if (!result.Success) + { + dateTimeRes.FutureResolution = dateTimeRes.PastResolution = new Dictionary(); + } + else + { + // get closest weekend to the holiday(s) + var futureWeekend = GetClosestHolidayWeekend((DateObject)result.FutureValue); + var pastWeekend = futureWeekend; + + if (result.FutureValue == result.PastValue) + { + dateTimeRes.Timex = TimexUtility.GenerateWeekendTimex(futureWeekend.Item1); + } + else + { + dateTimeRes.Timex = result.Timex; + pastWeekend = GetClosestHolidayWeekend((DateObject)result.PastValue); + } + + dateTimeRes.Success = true; + dateTimeRes.FutureValue = futureWeekend; + dateTimeRes.PastValue = pastWeekend; + + dateTimeRes.FutureResolution = new Dictionary + { + { + TimeTypeConstants.START_DATE, + DateTimeFormatUtil.FormatDate(((Tuple)dateTimeRes.FutureValue).Item1) + }, + { + TimeTypeConstants.END_DATE, + DateTimeFormatUtil.FormatDate(((Tuple)dateTimeRes.FutureValue).Item2) + }, + { + DateTimeResolutionKey.Timex, + TimexUtility.GenerateWeekendTimex(futureWeekend.Item1) + }, + }; + + dateTimeRes.PastResolution = new Dictionary + { + { + TimeTypeConstants.START_DATE, + DateTimeFormatUtil.FormatDate(((Tuple)dateTimeRes.PastValue).Item1) + }, + { + TimeTypeConstants.END_DATE, + DateTimeFormatUtil.FormatDate(((Tuple)dateTimeRes.PastValue).Item2) + }, + { + DateTimeResolutionKey.Timex, + TimexUtility.GenerateWeekendTimex(pastWeekend.Item1) + }, + }; + } + } + else + { + dateTimeRes.FutureResolution = dateTimeRes.PastResolution = new Dictionary(); + } + + var ret = new DateTimeParseResult + { + Text = er.Text, + Start = er.Start, + Length = er.Length, + Type = er.Type, + Data = er.Data, + Metadata = er.Metadata, + Value = dateTimeRes, + TimexStr = dateTimeRes == null ? string.Empty : ((DateTimeResolutionResult)dateTimeRes).Timex, + ResolutionStr = string.Empty, + }; + + return ret; + } + + private Tuple GetClosestHolidayWeekend(DateObject dateTimeObject) + { + // this week's Saturday + var startDate = dateTimeObject.This(DayOfWeek.Saturday); + var endDate = dateTimeObject.This(DayOfWeek.Sunday); + + // is last weekend closer than this one? i.e. is the input Monday or Tuesday? + if (dateTimeObject.DayOfWeek == DayOfWeek.Monday || dateTimeObject.DayOfWeek == DayOfWeek.Tuesday) + { + startDate = startDate.AddDays(-7); + endDate = dateTimeObject; + } + else if (dateTimeObject.DayOfWeek != DayOfWeek.Sunday) + { + startDate = dateTimeObject; + } + + endDate = inclusiveEndPeriod ? endDate : endDate.AddDays(1); + + return new Tuple(startDate, endDate); } - private DateTimeResolutionResult ParseHolidayRegexMatch(string text, DateObject referenceDate) + private DateTimeResolutionResult ParseHolidayRegexMatch(ExtractResult er, DateObject referenceDate) { foreach (var regex in this.config.HolidayRegexList) { - var match = regex.MatchExact(text, trim: true); + Match match; + if (er.Metadata != null && er.Metadata.IsHoliday) + { + match = regex.Match(er.Text); + } + else + { + var exacMatch = regex.MatchExact(er.Text, trim: true); + match = exacMatch.Match; + } if (match.Success) { // Value string will be set in Match2Date method - var ret = Match2Date(match.Match, referenceDate); + var ret = Match2Date(match, referenceDate); return ret; } } @@ -93,15 +243,16 @@ private DateTimeResolutionResult Match2Date(Match match, DateObject referenceDat var orderStr = match.Groups["order"].Value; int year; var hasYear = false; + var swift = 0; if (!string.IsNullOrEmpty(yearStr)) { - year = int.Parse(yearStr); + year = int.Parse(yearStr, CultureInfo.InvariantCulture); hasYear = true; } else if (!string.IsNullOrEmpty(orderStr)) { - var swift = this.config.GetSwiftYear(orderStr); + swift = this.config.GetSwiftYear(orderStr); if (swift < -1) { return ret; @@ -131,7 +282,26 @@ private DateTimeResolutionResult Match2Date(Match match, DateObject referenceDat var value = referenceDate; if (this.config.HolidayFuncDictionary.TryGetValue(holidayKey, out Func function)) { - value = function(year); + // With relative holidays like 'next(last) easter' the year must not be shifted + // when the reference date precedes(follows) the holiday date. + if (string.IsNullOrEmpty(yearStr) && swift != 0) + { + value = function(referenceDate.Year); + if ((swift > 0 && value < referenceDate) || (swift < 0 && value > referenceDate)) + { + value = function(year); + } + else + { + year = referenceDate.Year; + } + } + else + { + value = function(year); + } + + // @TODO should be checking if variable holiday to produce better timex. Fixing is a breaking change. this.config.VariableHolidaysTimexDictionary.TryGetValue(holidayKey, out timexStr); if (string.IsNullOrEmpty(timexStr)) { @@ -154,7 +324,7 @@ private DateTimeResolutionResult Match2Date(Match match, DateObject referenceDat if (hasYear) { - ret.Timex = year.ToString("D4") + timexStr; + ret.Timex = year.ToString("D4", CultureInfo.InvariantCulture) + timexStr; ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(year, value.Month, value.Day); ret.Success = true; return ret; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseHolidayParserConfiguration.cs index 4efae82901..d2168d7e97 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseHolidayParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseHolidayParserConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; @@ -29,13 +32,16 @@ protected BaseHolidayParserConfiguration(IDateTimeOptionsConfiguration config) public abstract string SanitizeHolidayToken(string holiday); + // @TODO move all old holiday definitions to resource files protected static DateObject MothersDay(int year) => DateObject.MinValue.SafeCreateFromValue(year, 5, GetDay(year, 5, 1, DayOfWeek.Sunday)); protected static DateObject FathersDay(int year) => DateObject.MinValue.SafeCreateFromValue(year, 6, GetDay(year, 6, 2, DayOfWeek.Sunday)); protected static DateObject MemorialDay(int year) => DateObject.MinValue.SafeCreateFromValue(year, 5, GetLastDay(year, 5, DayOfWeek.Monday)); - protected static DateObject LabourDay(int year) => DateObject.MinValue.SafeCreateFromValue(year, 9, GetDay(year, 9, 0, DayOfWeek.Monday)); + protected static DateObject UsLabourDay(int year) => DateObject.MinValue.SafeCreateFromValue(year, 9, GetDay(year, 9, 0, DayOfWeek.Monday)); + + protected static DateObject InternationalWorkersDay(int year) => new DateObject(year, 5, 1); protected static DateObject ColumbusDay(int year) => DateObject.MinValue.SafeCreateFromValue(year, 10, GetDay(year, 10, 1, DayOfWeek.Monday)); @@ -61,14 +67,14 @@ protected virtual IDictionary> InitHolidayFuncs() { { "fathers", FathersDay }, { "mothers", MothersDay }, - { "thanksgivingday", ThanksgivingDay }, { "thanksgiving", ThanksgivingDay }, { "blackfriday", BlackFriday }, { "cybermonday", CyberMonday }, { "martinlutherking", MartinLutherKingDay }, { "washingtonsbirthday", WashingtonsBirthday }, { "canberra", CanberraDay }, - { "labour", LabourDay }, + { "labour", UsLabourDay }, + { "internationalworkers", InternationalWorkersDay }, { "columbus", ColumbusDay }, { "memorial", MemorialDay }, }; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseMergedDateTimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseMergedDateTimeParser.cs index beee1785bd..a6052eb607 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseMergedDateTimeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseMergedDateTimeParser.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Globalization; using System.Linq; @@ -9,11 +12,6 @@ namespace Microsoft.Recognizers.Text.DateTime { public class BaseMergedDateTimeParser : IDateTimeParser { - public const string ParserTypeName = "datetimeV2"; - - public static readonly string DateMinString = DateTimeFormatUtil.FormatDate(DateObject.MinValue); - public static readonly string DateTimeMinString = DateTimeFormatUtil.FormatDateTime(DateObject.MinValue); - private static readonly Calendar Cal = DateTimeFormatInfo.InvariantInfo.Calendar; public BaseMergedDateTimeParser(IMergedParserConfiguration configuration) { @@ -22,273 +20,117 @@ public BaseMergedDateTimeParser(IMergedParserConfiguration configuration) protected IMergedParserConfiguration Config { get; private set; } - public static void AddAltSingleDateTimeToResolution(Dictionary resolutionDic, string type, string mod, - Dictionary res) + public List FilterResults(string query, List candidateResults) { - if (resolutionDic.ContainsKey(TimeTypeConstants.DATE)) - { - AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.DATE, mod, res); - } - else if (resolutionDic.ContainsKey(TimeTypeConstants.DATETIME)) - { - AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.DATETIME, mod, res); - } - else if (resolutionDic.ContainsKey(TimeTypeConstants.TIME)) - { - AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.TIME, mod, res); - } + return candidateResults; } - public static void AddSingleDateTimeToResolution(Dictionary resolutionDic, string type, string mod, Dictionary res) + public ParseResult Parse(ExtractResult er) { - // If an "invalid" Date or DateTime is extracted, it should not have an assigned resolution. - // Only valid entities should pass this condition. - if (resolutionDic.ContainsKey(type) && - !resolutionDic[type].StartsWith(DateMinString, StringComparison.Ordinal)) - { - if (!string.IsNullOrEmpty(mod)) - { - if (mod.StartsWith(Constants.BEFORE_MOD, StringComparison.Ordinal)) - { - res.Add(DateTimeResolutionKey.End, resolutionDic[type]); - return; - } - - if (mod.StartsWith(Constants.AFTER_MOD, StringComparison.Ordinal)) - { - res.Add(DateTimeResolutionKey.Start, resolutionDic[type]); - return; - } - - if (mod.StartsWith(Constants.SINCE_MOD, StringComparison.Ordinal)) - { - res.Add(DateTimeResolutionKey.Start, resolutionDic[type]); - return; - } - - if (mod.StartsWith(Constants.UNTIL_MOD, StringComparison.Ordinal)) - { - res.Add(DateTimeResolutionKey.End, resolutionDic[type]); - return; - } - } - - res.Add(ResolutionKey.Value, resolutionDic[type]); - } + return Parse(er, DateObject.Now); } - public static void AddPeriodToResolution(Dictionary resolutionDic, string startType, string endType, string mod, Dictionary res) + public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) { - var start = string.Empty; - var end = string.Empty; - - if (resolutionDic.ContainsKey(startType)) - { - start = resolutionDic[startType]; - } + var referenceTime = refTime; + DateTimeParseResult pr = null; - if (resolutionDic.ContainsKey(endType)) + var originText = er.Text; + if ((this.Config.Options & DateTimeOptions.EnablePreview) != 0) { - end = resolutionDic[endType]; + er.Text = MatchingUtil.PreProcessTextRemoveSuperfluousWords(er.Text, Config.SuperfluousWordMatcher, out var _); + er.Length += er.Text.Length - originText.Length; } - if (!string.IsNullOrEmpty(mod)) - { - // For the 'before' mod - // 1. Cases like "Before December", the start of the period should be the end of the new period, not the start - // 2. Cases like "More than 3 days before today", the date point should be the end of the new period - if (mod.StartsWith(Constants.BEFORE_MOD, StringComparison.Ordinal)) - { - if (!string.IsNullOrEmpty(start) && !string.IsNullOrEmpty(end)) - { - res.Add(DateTimeResolutionKey.End, start); - } - else - { - res.Add(DateTimeResolutionKey.End, end); - } + bool hasBefore = false, hasAfter = false, hasSince = false, hasAround = false, hasEqual = false, hasDateAfter = false; - return; - } + // "InclusiveModifier" means MOD should include the start/end time + // For example, cases like "on or later than", "earlier than or in" have inclusive modifier + var hasInclusiveModifier = false; + var matchIsAfter = false; + var modStr = string.Empty; - // For the 'after' mod - // 1. Cases like "After January", the end of the period should be the start of the new period, not the end - // 2. Cases like "More than 3 days after today", the date point should be the start of the new period - if (mod.StartsWith(Constants.AFTER_MOD, StringComparison.Ordinal)) + // Analyze and process modifiers + // Push, save the MOD string + if (er.Metadata != null && er.Metadata.HasMod) + { + var beforeMatch = Config.BeforeRegex.MatchBegin(er.Text, trim: true); + var afterMatch = Config.AfterRegex.MatchBegin(er.Text, trim: true); + var sinceMatch = Config.SinceRegex.MatchBegin(er.Text, trim: true); + var preLength = 0; + if (beforeMatch.Success) { - if (!string.IsNullOrEmpty(start) && !string.IsNullOrEmpty(end)) - { - res.Add(DateTimeResolutionKey.Start, end); - } - else - { - res.Add(DateTimeResolutionKey.Start, start); - } - - return; + preLength = beforeMatch.Index + beforeMatch.Length; } - - // For the 'since' mod, the start of the period should be the start of the new period, not the end - if (mod.StartsWith(Constants.SINCE_MOD, StringComparison.Ordinal)) + else if (afterMatch.Success) { - res.Add(DateTimeResolutionKey.Start, start); - return; + preLength = afterMatch.Index + afterMatch.Length; } - - // For the 'until' mod, the end of the period should be the end of the new period, not the start - if (mod.StartsWith(Constants.UNTIL_MOD, StringComparison.Ordinal)) + else if (sinceMatch.Success) { - res.Add(DateTimeResolutionKey.End, end); - return; + preLength = sinceMatch.Index + sinceMatch.Length; } - } - if (!AreUnresolvedDates(start, end)) - { - res.Add(DateTimeResolutionKey.Start, start); - res.Add(DateTimeResolutionKey.End, end); - } - } - - public static string GenerateEndInclusiveTimex(string originalTimex, DatePeriodTimexType datePeriodTimexType, DateObject startDate, DateObject endDate) - { - var timexEndInclusive = TimexUtility.GenerateDatePeriodTimex(startDate, endDate, datePeriodTimexType); - - // Sometimes the original timex contains fuzzy part like "XXXX-05-31" - // The fuzzy part needs to stay the same in the new end-inclusive timex - if (originalTimex.Contains(Constants.TimexFuzzy) && originalTimex.Length == timexEndInclusive.Length) - { - var timexCharSet = new char[timexEndInclusive.Length]; + var aroundText = er.Text.Substring(preLength); + var aroundMatch = Config.AroundRegex.MatchBegin(aroundText, trim: true); + var equalMatch = Config.EqualRegex.MatchBegin(er.Text, trim: true); - for (int i = 0; i < originalTimex.Length; i++) + // check also after match + if (this.Config.CheckBothBeforeAfter && er.Data != null && er.Data.Equals(Constants.HAS_MOD)) { - if (originalTimex[i] != Constants.TimexFuzzy) + if (!beforeMatch.Success) { - timexCharSet[i] = timexEndInclusive[i]; + beforeMatch = Config.BeforeRegex.MatchEnd(er.Text, trim: true); + matchIsAfter = matchIsAfter || beforeMatch.Success; } - else + + if (!afterMatch.Success) { - timexCharSet[i] = Constants.TimexFuzzy; + afterMatch = Config.AfterRegex.MatchEnd(er.Text, trim: true); + matchIsAfter = matchIsAfter || afterMatch.Success; } - } - timexEndInclusive = new string(timexCharSet); - } - - return timexEndInclusive; - } - - public static DateTimeParseResult SetInclusivePeriodEnd(DateTimeParseResult slot) - { - if (slot.Type == $"{ParserTypeName}.{Constants.SYS_DATETIME_DATEPERIOD}") - { - var timexComponents = slot.TimexStr.Split(Constants.DatePeriodTimexSplitter, StringSplitOptions.RemoveEmptyEntries); - - // Only handle DatePeriod like "(StartDate,EndDate,Duration)" - if (timexComponents.Length == 3) - { - var value = (SortedDictionary)slot.Value; - var altTimex = string.Empty; + if (!sinceMatch.Success) + { + sinceMatch = Config.SinceRegex.MatchEnd(er.Text, trim: true); + matchIsAfter = matchIsAfter || sinceMatch.Success; + } - if (value != null && value.ContainsKey(ResolutionKey.ValueSet)) + if (!aroundMatch.Success) { - if (value[ResolutionKey.ValueSet] is IList> valueSet && valueSet.Any()) - { - foreach (var values in valueSet) - { - // This is only a sanity check, as here we only handle DatePeriod like "(StartDate,EndDate,Duration)" - if (values.ContainsKey(DateTimeResolutionKey.Start) && values.ContainsKey(DateTimeResolutionKey.End) && - values.ContainsKey(DateTimeResolutionKey.Timex)) - { - var startDate = DateObject.Parse(values[DateTimeResolutionKey.Start]); - var endDate = DateObject.Parse(values[DateTimeResolutionKey.End]); - var durationStr = timexComponents[2]; - var datePeriodTimexType = TimexUtility.GetDatePeriodTimexType(durationStr); - endDate = TimexUtility.OffsetDateObject(endDate, offset: 1, timexType: datePeriodTimexType); - values[DateTimeResolutionKey.End] = DateTimeFormatUtil.LuisDate(endDate); - values[DateTimeResolutionKey.Timex] = - GenerateEndInclusiveTimex(slot.TimexStr, datePeriodTimexType, startDate, endDate); - - if (string.IsNullOrEmpty(altTimex)) - { - altTimex = values[DateTimeResolutionKey.Timex]; - } - } - } - } + aroundMatch = Config.AroundRegex.MatchEnd(er.Text, trim: true); + matchIsAfter = matchIsAfter || aroundMatch.Success; } - slot.Value = value; - slot.TimexStr = altTimex; + if (!equalMatch.Success) + { + equalMatch = Config.EqualRegex.MatchEnd(er.Text, trim: true); + matchIsAfter = matchIsAfter || equalMatch.Success; + } } - } - return slot; - } - - public static void AddAltPeriodToResolution(Dictionary resolutionDic, string mod, Dictionary res) - { - if (resolutionDic.ContainsKey(TimeTypeConstants.START_DATETIME) || resolutionDic.ContainsKey(TimeTypeConstants.END_DATETIME)) - { - AddPeriodToResolution(resolutionDic, TimeTypeConstants.START_DATETIME, TimeTypeConstants.END_DATETIME, mod, res); - } - else if (resolutionDic.ContainsKey(TimeTypeConstants.START_DATE) || resolutionDic.ContainsKey(TimeTypeConstants.END_DATE)) - { - AddPeriodToResolution(resolutionDic, TimeTypeConstants.START_DATE, TimeTypeConstants.END_DATE, mod, res); - } - else if (resolutionDic.ContainsKey(TimeTypeConstants.START_TIME) || resolutionDic.ContainsKey(TimeTypeConstants.END_TIME)) - { - AddPeriodToResolution(resolutionDic, TimeTypeConstants.START_TIME, TimeTypeConstants.END_TIME, mod, res); - } - } - - public static bool AreUnresolvedDates(string startDate, string endDate) - { - return string.IsNullOrEmpty(startDate) || string.IsNullOrEmpty(endDate) || - startDate.StartsWith(DateMinString, StringComparison.Ordinal) || endDate.StartsWith(DateMinString, StringComparison.Ordinal); - } - - public ParseResult Parse(ExtractResult er) - { - return Parse(er, DateObject.Now); - } - - public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) - { - var referenceTime = refTime; - DateTimeParseResult pr = null; - - var originText = er.Text; - if ((this.Config.Options & DateTimeOptions.EnablePreview) != 0) - { - er.Text = MatchingUtil.PreProcessTextRemoveSuperfluousWords(er.Text, Config.SuperfluousWordMatcher, out var _); - er.Length += er.Text.Length - originText.Length; - } - - // Push, save the MOD string - bool hasBefore = false, hasAfter = false, hasSince = false, hasAround = false, hasEqual = false, hasDateAfter = false; - - // "InclusiveModifier" means MOD should include the start/end time - // For example, cases like "on or later than", "earlier than or in" have inclusive modifier - var hasInclusiveModifier = false; - var modStr = string.Empty; - if (er.Metadata != null && er.Metadata.HasMod) - { - var beforeMatch = Config.BeforeRegex.MatchBegin(er.Text, trim: true); - var afterMatch = Config.AfterRegex.MatchBegin(er.Text, trim: true); - var sinceMatch = Config.SinceRegex.MatchBegin(er.Text, trim: true); - var aroundMatch = Config.AroundRegex.MatchBegin(er.Text, trim: true); - var equalMatch = Config.EqualRegex.MatchBegin(er.Text, trim: true); + if (aroundMatch.Success) + { + hasAround = true; + er.Start += matchIsAfter ? 0 : preLength + aroundMatch.Index + aroundMatch.Length; + er.Length -= matchIsAfter ? aroundMatch.Length : preLength + aroundMatch.Index + aroundMatch.Length; + er.Text = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(preLength + aroundMatch.Index + aroundMatch.Length); + modStr = matchIsAfter ? aroundMatch.Value : aroundText.Substring(0, aroundMatch.Index + aroundMatch.Length); + } if (beforeMatch.Success) { hasBefore = true; - er.Start += beforeMatch.Length; - er.Length -= beforeMatch.Length; - er.Text = er.Text.Substring(beforeMatch.Length); - modStr = beforeMatch.Value; + if (!hasAround) + { + er.Start += matchIsAfter ? 0 : beforeMatch.Length; + er.Length -= beforeMatch.Length; + er.Text = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(beforeMatch.Length); + } + + modStr = beforeMatch.Value + modStr; - if (!string.IsNullOrEmpty(beforeMatch.Groups["include"].Value)) + if (!string.IsNullOrEmpty(beforeMatch.Groups[Constants.IncludeGroupName].Value)) { hasInclusiveModifier = true; } @@ -296,12 +138,16 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) else if (afterMatch.Success) { hasAfter = true; - er.Start += afterMatch.Length; - er.Length -= afterMatch.Length; - er.Text = er.Text.Substring(afterMatch.Length); - modStr = afterMatch.Value; + if (!hasAround) + { + er.Start += matchIsAfter ? 0 : afterMatch.Length; + er.Length -= afterMatch.Length; + er.Text = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(afterMatch.Length); + } + + modStr = afterMatch.Value + modStr; - if (!string.IsNullOrEmpty(afterMatch.Groups["include"].Value)) + if (!string.IsNullOrEmpty(afterMatch.Groups[Constants.IncludeGroupName].Value)) { hasInclusiveModifier = true; } @@ -309,30 +155,26 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) else if (sinceMatch.Success) { hasSince = true; - er.Start += sinceMatch.Length; - er.Length -= sinceMatch.Length; - er.Text = er.Text.Substring(sinceMatch.Length); - modStr = sinceMatch.Value; - } - else if (aroundMatch.Success) - { - hasAround = true; - er.Start += aroundMatch.Length; - er.Length -= aroundMatch.Length; - er.Text = er.Text.Substring(aroundMatch.Length); - modStr = aroundMatch.Value; + if (!hasAround) + { + er.Start += matchIsAfter ? 0 : sinceMatch.Length; + er.Length -= sinceMatch.Length; + er.Text = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(sinceMatch.Length); + } + + modStr = sinceMatch.Value + modStr; } else if (equalMatch.Success) { hasEqual = true; - er.Start += equalMatch.Length; + er.Start += matchIsAfter ? 0 : equalMatch.Length; er.Length -= equalMatch.Length; - er.Text = er.Text.Substring(equalMatch.Length); + er.Text = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(equalMatch.Length); modStr = equalMatch.Value; } else if ((er.Type.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal) && Config.YearRegex.Match(er.Text).Success) || - er.Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) || - er.Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) + er.Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) || + er.Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) { // This has to be put at the end of the if, or cases like "before 2012" and "after 2012" would fall into this // 2012 or after/above @@ -348,21 +190,29 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) } } + // Parse extracted datetime mention pr = ParseResult(er, referenceTime); if (pr == null) { return null; } + // Apply processed modifiers // Pop, restore the MOD string if (hasBefore && pr.Value != null) { pr.Length += modStr.Length; - pr.Start -= modStr.Length; - pr.Text = modStr + pr.Text; + pr.Start -= matchIsAfter ? 0 : modStr.Length; + pr.Text = matchIsAfter ? pr.Text + modStr : modStr + pr.Text; var val = (DateTimeResolutionResult)pr.Value; - val.Mod = CombineMod(val.Mod, !hasInclusiveModifier ? Constants.BEFORE_MOD : Constants.UNTIL_MOD); + val.Mod = MergedParserUtil.CombineMod(val.Mod, !hasInclusiveModifier ? Constants.BEFORE_MOD : Constants.UNTIL_MOD); + + if (hasAround) + { + val.Mod = MergedParserUtil.CombineMod(Constants.APPROX_MOD, val.Mod); + hasAround = false; + } pr.Value = val; } @@ -370,17 +220,16 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) if (hasAfter && pr.Value != null) { pr.Length += modStr.Length; - pr.Start -= modStr.Length; - pr.Text = modStr + pr.Text; + pr.Start -= matchIsAfter ? 0 : modStr.Length; + pr.Text = matchIsAfter ? pr.Text + modStr : modStr + pr.Text; var val = (DateTimeResolutionResult)pr.Value; - if (!hasInclusiveModifier) - { - val.Mod = CombineMod(val.Mod, Constants.AFTER_MOD); - } - else + val.Mod = MergedParserUtil.CombineMod(val.Mod, !hasInclusiveModifier ? Constants.AFTER_MOD : Constants.SINCE_MOD); + + if (hasAround) { - val.Mod = CombineMod(val.Mod, Constants.SINCE_MOD); + val.Mod = MergedParserUtil.CombineMod(Constants.APPROX_MOD, val.Mod); + hasAround = false; } pr.Value = val; @@ -389,46 +238,53 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) if (hasSince && pr.Value != null) { pr.Length += modStr.Length; - pr.Start -= modStr.Length; - pr.Text = modStr + pr.Text; + pr.Start -= matchIsAfter ? 0 : modStr.Length; + pr.Text = matchIsAfter ? pr.Text + modStr : modStr + pr.Text; var val = (DateTimeResolutionResult)pr.Value; - val.Mod = CombineMod(val.Mod, Constants.SINCE_MOD); + val.Mod = MergedParserUtil.CombineMod(val.Mod, Constants.SINCE_MOD); + + if (hasAround) + { + val.Mod = MergedParserUtil.CombineMod(Constants.APPROX_MOD, val.Mod); + hasAround = false; + } + pr.Value = val; } if (hasAround && pr.Value != null) { pr.Length += modStr.Length; - pr.Start -= modStr.Length; - pr.Text = modStr + pr.Text; + pr.Start -= matchIsAfter ? 0 : modStr.Length; + pr.Text = matchIsAfter ? pr.Text + modStr : modStr + pr.Text; var val = (DateTimeResolutionResult)pr.Value; - val.Mod = CombineMod(val.Mod, Constants.APPROX_MOD); + val.Mod = MergedParserUtil.CombineMod(val.Mod, Constants.APPROX_MOD); pr.Value = val; } if (hasEqual && pr.Value != null) { pr.Length += modStr.Length; - pr.Start -= modStr.Length; - pr.Text = modStr + pr.Text; + pr.Start -= matchIsAfter ? 0 : modStr.Length; + pr.Text = matchIsAfter ? pr.Text + modStr : modStr + pr.Text; } if (hasDateAfter && pr.Value != null) { pr.Length += modStr.Length; - pr.Text = pr.Text + modStr; + pr.Text += modStr; var val = (DateTimeResolutionResult)pr.Value; - val.Mod = CombineMod(val.Mod, Constants.SINCE_MOD); + val.Mod = MergedParserUtil.CombineMod(val.Mod, Constants.SINCE_MOD); pr.Value = val; hasSince = true; } // For cases like "3 pm or later on monday" if (pr.Value != null && Config.SuffixAfter.Match(pr.Text)?.Index != 0 && - pr.Type.Equals(Constants.SYS_DATETIME_DATETIME, StringComparison.Ordinal)) + pr.Type.Equals(Constants.SYS_DATETIME_DATETIME, StringComparison.Ordinal) && !this.Config.CheckBothBeforeAfter) { var val = (DateTimeResolutionResult)pr.Value; - val.Mod = CombineMod(val.Mod, Constants.SINCE_MOD); + val.Mod = MergedParserUtil.CombineMod(val.Mod, Constants.SINCE_MOD); pr.Value = val; hasSince = true; } @@ -436,7 +292,7 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) if ((Config.Options & DateTimeOptions.SplitDateAndTime) != 0 && ((DateTimeResolutionResult)pr?.Value)?.SubDateTimeEntities != null) { - pr.Value = DateTimeResolutionForSplit(pr); + pr.Value = MergedParserUtil.DateTimeResolutionForSplit(pr, this.Config); } else { @@ -446,7 +302,7 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) ((DateTimeResolutionResult)pr.Value).HasRangeChangingMod = hasRangeChangingMod; } - pr = SetParseResult(pr, hasRangeChangingMod); + pr = MergedParserUtil.SetParseResult(pr, hasRangeChangingMod, this.Config); } // In this version, ExperimentalMode only cope with the "IncludePeriodEnd" case @@ -454,7 +310,7 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) { if (pr?.Metadata != null && pr.Metadata.PossiblyIncludePeriodEnd) { - pr = SetInclusivePeriodEnd(pr); + pr = MergedParserUtil.SetInclusivePeriodEnd(pr); } } @@ -467,469 +323,31 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) } } - return pr; - } - - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } - - public DateTimeParseResult SetParseResult(DateTimeParseResult slot, bool hasMod) - { - slot.Value = DateTimeResolution(slot); - - // Change the type at last for the after or before modes - slot.Type = $"{ParserTypeName}.{DetermineDateTimeType(slot.Type, hasMod)}"; - return slot; - } - - public string DetermineDateTimeType(string type, bool hasMod) - { - if ((Config.Options & DateTimeOptions.SplitDateAndTime) != 0) - { - if (type.Equals(Constants.SYS_DATETIME_DATETIME, StringComparison.Ordinal)) - { - return Constants.SYS_DATETIME_TIME; - } - } - else - { - if (hasMod) - { - if (type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal)) - { - return Constants.SYS_DATETIME_DATEPERIOD; - } - - if (type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) - { - return Constants.SYS_DATETIME_TIMEPERIOD; - } - - if (type.Equals(Constants.SYS_DATETIME_DATETIME, StringComparison.Ordinal)) - { - return Constants.SYS_DATETIME_DATETIMEPERIOD; - } - } - } - - return type; - } - - public string DetermineSourceEntityType(string sourceType, string newType, bool hasMod) - { - if (!hasMod) - { - return null; - } - - if (!newType.Equals(sourceType, StringComparison.Ordinal)) - { - return Constants.SYS_DATETIME_DATETIMEPOINT; - } - - if (newType.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal)) - { - return Constants.SYS_DATETIME_DATETIMEPERIOD; - } - - return null; - } - - public List DateTimeResolutionForSplit(DateTimeParseResult slot) - { - var results = new List(); - if (((DateTimeResolutionResult)slot.Value).SubDateTimeEntities != null) - { - var subEntities = ((DateTimeResolutionResult)slot.Value).SubDateTimeEntities; - foreach (var subEntity in subEntities) - { - var result = (DateTimeParseResult)subEntity; - result.Start += slot.Start; - results.AddRange(DateTimeResolutionForSplit(result)); - } - } - else - { - slot.Value = DateTimeResolution(slot); - slot.Type = $"{ParserTypeName}.{DetermineDateTimeType(slot.Type, hasMod: false)}"; - results.Add(slot); - } - - return results; - } - - public SortedDictionary DateTimeResolution(DateTimeParseResult slot) - { - if (slot == null) - { - return null; - } - - var resolutions = new List>(); - var res = new Dictionary(); - - var type = slot.Type; - var timex = slot.TimexStr; - - var val = (DateTimeResolutionResult)slot.Value; - if (val == null) - { - return null; - } - - var isLunar = val.IsLunar; - var mod = val.Mod; - string list = null; - - // Resolve dates list for date periods - if (slot.Type.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal) && val.List != null) - { - list = string.Join(",", val.List.Select(o => DateTimeFormatUtil.LuisDate((DateObject)o)).ToArray()); - } - - // With modifier, output Type might not be the same with type in resolution result - // For example, if the resolution type is "date", with modifier the output type should be "daterange" - var typeOutput = DetermineDateTimeType(slot.Type, hasMod: !string.IsNullOrEmpty(mod)); - - var sourceEntity = DetermineSourceEntityType(slot.Type, typeOutput, val.HasRangeChangingMod); - - var comment = val.Comment; - - // The following should be added to res first, since ResolveAmPm requires these fields. - AddResolutionFields(res, DateTimeResolutionKey.Timex, timex); - AddResolutionFields(res, Constants.Comment, comment); - AddResolutionFields(res, DateTimeResolutionKey.Mod, mod); - AddResolutionFields(res, ResolutionKey.Type, typeOutput); - AddResolutionFields(res, DateTimeResolutionKey.IsLunar, isLunar ? isLunar.ToString() : string.Empty); - - var hasTimeZone = false; - - // For standalone timezone entity recognition, we generate TimeZoneResolution for each entity we extracted. - // We also merge time entity with timezone entity and add the information in TimeZoneResolution to every DateTime resolutions. - if (val.TimeZoneResolution != null) - { - if (slot.Type.Equals(Constants.SYS_DATETIME_TIMEZONE, StringComparison.Ordinal)) - { - // single timezone - AddResolutionFields(res, Constants.ResolveTimeZone, new Dictionary - { - { ResolutionKey.Value, val.TimeZoneResolution.Value }, - { Constants.UtcOffsetMinsKey, val.TimeZoneResolution.UtcOffsetMins.ToString() }, - }); - } - else - { - // timezone as clarification of datetime - hasTimeZone = true; - AddResolutionFields(res, Constants.TimeZone, val.TimeZoneResolution.Value); - AddResolutionFields(res, Constants.TimeZoneText, val.TimeZoneResolution.TimeZoneText); - AddResolutionFields(res, Constants.UtcOffsetMinsKey, val.TimeZoneResolution.UtcOffsetMins.ToString()); - } - } - - var pastResolutionStr = ((DateTimeResolutionResult)slot.Value).PastResolution; - var futureResolutionStr = ((DateTimeResolutionResult)slot.Value).FutureResolution; - - if (typeOutput == Constants.SYS_DATETIME_DATETIMEALT && pastResolutionStr.Count > 0) - { - typeOutput = DetermineResolutionDateTimeType(pastResolutionStr); - } - - var resolutionPast = GenerateResolution(type, pastResolutionStr, mod); - var resolutionFuture = GenerateResolution(type, futureResolutionStr, mod); - - // If past and future are same, keep only one - if (resolutionFuture.OrderBy(t => t.Key).Select(t => t.Value) - .SequenceEqual(resolutionPast.OrderBy(t => t.Key).Select(t => t.Value))) + /* Modification of datetime value under tasksmode, + for example when input text is 9 april at 2 pm and current datetime value is 9 april 2022 1pm, + then output for current query should be "9 april 2022, 9pm" under tasksmode which is different + from default mode, under which it is "9 april 2023, 9pm". + */ + if ((this.Config.Options & DateTimeOptions.TasksMode) != 0) { - if (resolutionPast.Count > 0) - { - AddResolutionFields(res, Constants.Resolve, resolutionPast); - } - } - else - { - if (resolutionPast.Count > 0) - { - AddResolutionFields(res, Constants.ResolveToPast, resolutionPast); - } - - if (resolutionFuture.Count > 0) - { - AddResolutionFields(res, Constants.ResolveToFuture, resolutionFuture); - } - } - - // If 'ampm', double our resolution accordingly - if (!string.IsNullOrEmpty(comment) && comment.Equals(Constants.Comment_AmPm, StringComparison.Ordinal)) - { - if (res.ContainsKey(Constants.Resolve)) - { - ResolveAmpm(res, Constants.Resolve); - } - else - { - ResolveAmpm(res, Constants.ResolveToPast); - ResolveAmpm(res, Constants.ResolveToFuture); - } - } - - // If WeekOf and in CalendarMode, modify the past part of our resolution - if ((Config.Options & DateTimeOptions.CalendarMode) != 0 && - !string.IsNullOrEmpty(comment) && comment.Equals(Constants.Comment_WeekOf, StringComparison.Ordinal)) - { - ResolveWeekOf(res, Constants.ResolveToPast); - } - - foreach (var p in res) - { - if (p.Value is Dictionary dictionary) - { - var value = new Dictionary(); - - AddResolutionFields(value, DateTimeResolutionKey.Timex, timex); - AddResolutionFields(value, DateTimeResolutionKey.Mod, mod); - AddResolutionFields(value, ResolutionKey.Type, typeOutput); - AddResolutionFields(value, DateTimeResolutionKey.IsLunar, isLunar ? isLunar.ToString() : string.Empty); - AddResolutionFields(value, DateTimeResolutionKey.List, list); - AddResolutionFields(value, DateTimeResolutionKey.SourceEntity, sourceEntity); - - if (hasTimeZone) - { - AddResolutionFields(value, Constants.TimeZone, val.TimeZoneResolution.Value); - AddResolutionFields(value, Constants.TimeZoneText, val.TimeZoneResolution.TimeZoneText); - AddResolutionFields(value, Constants.UtcOffsetMinsKey, val.TimeZoneResolution.UtcOffsetMins.ToString()); - } - - foreach (var q in dictionary) - { - if (value.ContainsKey(q.Key)) - { - value[q.Key] = q.Value; - } - else - { - value.Add(q.Key, q.Value); - } - } - - resolutions.Add(value); - } - } - - if (resolutionPast.Count == 0 && resolutionFuture.Count == 0 && val.TimeZoneResolution == null) - { - var notResolved = new Dictionary - { - { - DateTimeResolutionKey.Timex, timex - }, - { - ResolutionKey.Type, typeOutput - }, - { - ResolutionKey.Value, "not resolved" - }, - }; - - resolutions.Add(notResolved); - } - - return new SortedDictionary { { ResolutionKey.ValueSet, resolutions } }; - } - - internal static void AddResolutionFields(Dictionary dic, string key, string value) - { - if (!string.IsNullOrEmpty(value)) - { - dic.Add(key, value); - } - } - - internal static void AddResolutionFields(Dictionary dic, string key, object value) - { - if (value != null) - { - dic.Add(key, value); - } - } - - internal static void ResolveAmpm(Dictionary resolutionDic, string keyName) - { - if (resolutionDic.ContainsKey(keyName)) - { - var resolution = (Dictionary)resolutionDic[keyName]; - var resolutionPm = new Dictionary(); - - if (!resolutionDic.ContainsKey(DateTimeResolutionKey.Timex)) - { - return; - } - - var timex = (string)resolutionDic[DateTimeResolutionKey.Timex]; - - resolutionDic.Remove(keyName); - resolutionDic.Add(keyName + "Am", resolution); - - switch ((string)resolutionDic[ResolutionKey.Type]) - { - case Constants.SYS_DATETIME_TIME: - resolutionPm[ResolutionKey.Value] = DateTimeFormatUtil.ToPm(resolution[ResolutionKey.Value]); - resolutionPm[DateTimeResolutionKey.Timex] = DateTimeFormatUtil.ToPm(timex); - break; - - case Constants.SYS_DATETIME_DATETIME: - var split = resolution[ResolutionKey.Value].Split(' '); - resolutionPm[ResolutionKey.Value] = split[0] + " " + DateTimeFormatUtil.ToPm(split[1]); - resolutionPm[DateTimeResolutionKey.Timex] = DateTimeFormatUtil.AllStringToPm(timex); - break; - - case Constants.SYS_DATETIME_TIMEPERIOD: - if (resolution.ContainsKey(DateTimeResolutionKey.Start)) - { - resolutionPm[DateTimeResolutionKey.Start] = DateTimeFormatUtil.ToPm(resolution[DateTimeResolutionKey.Start]); - } - - if (resolution.ContainsKey(DateTimeResolutionKey.End)) - { - resolutionPm[DateTimeResolutionKey.End] = DateTimeFormatUtil.ToPm(resolution[DateTimeResolutionKey.End]); - } - - resolutionPm[DateTimeResolutionKey.Timex] = DateTimeFormatUtil.AllStringToPm(timex); - break; - - case Constants.SYS_DATETIME_DATETIMEPERIOD: - if (resolution.ContainsKey(DateTimeResolutionKey.Start)) - { - var start = Convert.ToDateTime(resolution[DateTimeResolutionKey.Start]); - start = start.Hour == Constants.HalfDayHourCount ? start.AddHours(-Constants.HalfDayHourCount) : start.AddHours(Constants.HalfDayHourCount); - - resolutionPm[DateTimeResolutionKey.Start] = DateTimeFormatUtil.FormatDateTime(start); - } - - if (resolution.ContainsKey(DateTimeResolutionKey.End)) - { - var end = Convert.ToDateTime(resolution[DateTimeResolutionKey.End]); - end = end.Hour == Constants.HalfDayHourCount ? end.AddHours(-Constants.HalfDayHourCount) : end.AddHours(Constants.HalfDayHourCount); - - resolutionPm[DateTimeResolutionKey.End] = DateTimeFormatUtil.FormatDateTime(end); - } - - resolutionPm[DateTimeResolutionKey.Timex] = DateTimeFormatUtil.AllStringToPm(timex); - break; - } - - resolutionDic.Add(keyName + "Pm", resolutionPm); - } - } - - internal static void ResolveWeekOf(Dictionary resolutionDic, string keyName) - { - if (resolutionDic.ContainsKey(keyName)) - { - var resolution = (Dictionary)resolutionDic[keyName]; - - var monday = DateObject.Parse(resolution[DateTimeResolutionKey.Start]); - resolution[DateTimeResolutionKey.Timex] = DateTimeFormatUtil.ToIsoWeekTimex(monday); - - resolutionDic.Remove(keyName); - resolutionDic.Add(keyName, resolution); - } - } - - internal static Dictionary GenerateResolution(string type, Dictionary resolutionDic, string mod) - { - var res = new Dictionary(); - - if (type.Equals(Constants.SYS_DATETIME_DATETIME, StringComparison.Ordinal)) - { - AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.DATETIME, mod, res); - } - else if (type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) - { - AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.TIME, mod, res); - } - else if (type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal)) - { - AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.DATE, mod, res); - } - else if (type.Equals(Constants.SYS_DATETIME_DURATION, StringComparison.Ordinal)) - { - if (resolutionDic.ContainsKey(TimeTypeConstants.DURATION)) - { - res.Add(ResolutionKey.Value, resolutionDic[TimeTypeConstants.DURATION]); - } - } - else if (type.Equals(Constants.SYS_DATETIME_TIMEPERIOD, StringComparison.Ordinal)) - { - AddPeriodToResolution(resolutionDic, TimeTypeConstants.START_TIME, TimeTypeConstants.END_TIME, mod, res); - } - else if (type.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal)) - { - AddPeriodToResolution(resolutionDic, TimeTypeConstants.START_DATE, TimeTypeConstants.END_DATE, mod, res); - } - else if (type.Equals(Constants.SYS_DATETIME_DATETIMEPERIOD, StringComparison.Ordinal)) - { - AddPeriodToResolution(resolutionDic, TimeTypeConstants.START_DATETIME, TimeTypeConstants.END_DATETIME, mod, res); - } - else if (type.Equals(Constants.SYS_DATETIME_DATETIMEALT, StringComparison.Ordinal)) - { - // for a period - if (resolutionDic.Count > 2 || !string.IsNullOrEmpty(mod)) - { - AddAltPeriodToResolution(resolutionDic, mod, res); - } - else + if (pr != null) { - // for a datetime point - AddAltSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.DATETIMEALT, mod, res); + pr = TasksModeProcessing.TasksModeModification(pr, referenceTime); + pr.Text = originText; } } - return res; - } - - private static string CombineMod(string originalMod, string newMod) - { - var combinedMod = newMod; - - if (!string.IsNullOrEmpty(originalMod)) - { - combinedMod = $"{newMod}-{originalMod}"; - } - - return combinedMod; - } - - private static string DetermineResolutionDateTimeType(Dictionary pastResolutionStr) - { - switch (pastResolutionStr.Keys.First()) - { - case TimeTypeConstants.START_DATE: - return Constants.SYS_DATETIME_DATEPERIOD; - - case TimeTypeConstants.START_DATETIME: - return Constants.SYS_DATETIME_DATETIMEPERIOD; - - case TimeTypeConstants.START_TIME: - return Constants.SYS_DATETIME_TIMEPERIOD; - - default: - // ToLowerInvariant needed for legacy reasons with subtype code. - // @TODO remove in future refactoring of test code and double-check there's no impact in output schema. - return pastResolutionStr.Keys.First().ToLowerInvariant(); - } + return pr; } + // @TODO move to MergedParserUtil (if possible) private DateTimeParseResult ParseResult(ExtractResult extractResult, DateObject referenceTime) { DateTimeParseResult parseResult = null; switch (extractResult.Type) { case Constants.SYS_DATETIME_DATE: - if (extractResult.Metadata != null && extractResult.Metadata.IsHoliday) + if (extractResult.Metadata != null && extractResult.Metadata.IsHoliday && !extractResult.Metadata.IsHolidayRange) { parseResult = Config.HolidayParser.Parse(extractResult, referenceTime); } @@ -948,7 +366,14 @@ private DateTimeParseResult ParseResult(ExtractResult extractResult, DateObject break; case Constants.SYS_DATETIME_DATEPERIOD: - parseResult = this.Config.DatePeriodParser.Parse(extractResult, referenceTime); + if (extractResult.Metadata != null && extractResult.Metadata.IsHolidayRange) + { + parseResult = this.Config.HolidayParser.Parse(extractResult, referenceTime); + } + else + { + parseResult = this.Config.DatePeriodParser.Parse(extractResult, referenceTime); + } break; case Constants.SYS_DATETIME_TIMEPERIOD: @@ -972,6 +397,7 @@ private DateTimeParseResult ParseResult(ExtractResult extractResult, DateObject break; case Constants.SYS_DATETIME_TIMEZONE: + if ((Config.Options & DateTimeOptions.EnablePreview) != 0) { parseResult = this.Config.TimeZoneParser.Parse(extractResult, referenceTime); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseSetParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseSetParser.cs index 7044b3b43d..20e17f8217 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseSetParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseSetParser.cs @@ -1,6 +1,10 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; +using Microsoft.Recognizers.Text.DateTime.Utilities; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -25,9 +29,11 @@ public ParseResult Parse(ExtractResult result) public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) { object value = null; - if (er.Type.Equals(ParserName)) + + if (er.Type.Equals(ParserName, StringComparison.Ordinal)) { var innerResult = ParseEachUnit(er.Text); + if (!innerResult.Success) { innerResult = ParseEachDuration(er.Text, refDate); @@ -38,8 +44,8 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) innerResult = ParserTimeEveryday(er.Text, refDate); } - // NOTE: Please do not change the order of following function - // datetimeperiod>dateperiod>timeperiod>datetime>date>time + // NOTE: Do not change the order of the following calls, due to type precedence + // datetimeperiod > dateperiod > timeperiod > datetime > date > time if (!innerResult.Success) { innerResult = ParseEach(config.DateTimePeriodExtractor, config.DateTimePeriodParser, er.Text, refDate); @@ -70,17 +76,34 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) innerResult = ParseEach(config.TimeExtractor, config.TimeParser, er.Text, refDate); } + if (!innerResult.Success) + { + innerResult = ParserDayEveryweek(er.Text, refDate); + } + + if (!innerResult.Success) + { + innerResult = ParserSingleNumberMonth(er.Text, refDate); + } + if (innerResult.Success) { - innerResult.FutureResolution = new Dictionary + if ((config.Options & DateTimeOptions.TasksMode) != 0) { - { TimeTypeConstants.SET, (string)innerResult.FutureValue }, - }; - - innerResult.PastResolution = new Dictionary + innerResult = TasksModeSetHandler.TasksModeAddResolution(ref innerResult, er, refDate); + } + else { - { TimeTypeConstants.SET, (string)innerResult.PastValue }, - }; + innerResult.FutureResolution = new Dictionary + { + { TimeTypeConstants.SET, (string)innerResult.FutureValue }, + }; + + innerResult.PastResolution = new Dictionary + { + { TimeTypeConstants.SET, (string)innerResult.PastValue }, + }; + } value = innerResult; } @@ -109,7 +132,9 @@ public List FilterResults(string query, List 0) + { + var timePr = this.config.TimeParser.Parse(timeErs[0], DateObject.Now); + ret = SetHandler.ResolveSet(ref ret, pr.TimexStr + timePr.TimexStr); + if ((config.Options & DateTimeOptions.TasksMode) != 0) + { + ret = TasksModeSetHandler.TasksModeResolveSet(ref ret, pr.TimexStr + timePr.TimexStr + eachResult.Timex); + } + } + else + { + ret = SetHandler.ResolveSet(ref ret, pr.TimexStr); + if ((config.Options & DateTimeOptions.TasksMode) != 0) + { + ret = TasksModeSetHandler.TasksModeResolveSet(ref ret, pr.TimexStr + eachResult.Timex); + } + } } return ret; } - private DateTimeResolutionResult ParseEach(IDateTimeExtractor extractor, IDateTimeParser parser, string text, DateObject refDate) + // parse value for input date like 19th for every month + private DateTimeResolutionResult ParserSingleNumberMonth(string text, DateObject refDate) { var ret = new DateTimeResolutionResult(); + List ers = null; + var success = false; // remove key words of set type from text var match = config.SetEachRegex.Match(text); + if (match.Success) + { + // if match value equals 19th of every month then newText = 19th of this month + var newText = config.ReplaceValueInTextWithFutTerm(text, match.Value); + + ers = this.config.DateExtractor.Extract(newText, refDate); + if (ers.Count == 1 && ers.First().Length == newText.Length) + { + success = true; + } + } + + if (success) + { + var eachMatch = this.config.EachUnitRegex.Match(text); + if (!eachMatch.Success) + { + eachMatch = this.config.PeriodicRegex.Match(text); + } + + if (eachMatch.Success) + { + var pr = this.config.DateParser.Parse(ers[0], DateObject.Now); + var eachResult = ParseEachUnit(eachMatch.Value); + + if ((config.Options & DateTimeOptions.TasksMode) != 0) + { + ret = TasksModeSetHandler.TasksModeResolveSet(ref ret, TasksModeConstants.FuzzyYearAndMonth + pr.TimexStr.Substring(8) + eachResult.Timex, pr); + } + else + { + ret = SetHandler.ResolveSet(ref ret, TasksModeConstants.FuzzyYearAndMonth + pr.TimexStr.Substring(8)); + + } + } + } + + return ret; + } + + private DateTimeResolutionResult ParseEach(IDateTimeExtractor extractor, IDateTimeParser parser, string text, DateObject refDate) + { + var ret = new DateTimeResolutionResult(); + + List ers = null; var success = false; + + // remove key words of set type from text + var match = config.SetEachRegex.Match(text); if (match.Success) { var trimmedText = text.Remove(match.Index, match.Length); + ers = extractor.Extract(trimmedText, refDate); if (ers.Count == 1 && ers.First().Length == trimmedText.Length) { @@ -219,12 +388,12 @@ private DateTimeResolutionResult ParseEach(IDateTimeExtractor extractor, IDateTi } // remove suffix 's' and "on" if existed and re-try - match = this.config.SetWeekDayRegex.Match(text); - if (match.Success) + var matchWeekDay = this.config.SetWeekDayRegex.Match(text); + if (matchWeekDay.Success) { - var trimmedText = text.Remove(match.Index, match.Length); + var trimmedText = text.Remove(matchWeekDay.Index, matchWeekDay.Length); + trimmedText = trimmedText.Insert(matchWeekDay.Index, config.WeekDayGroupMatchString(matchWeekDay)); - trimmedText = trimmedText.Insert(match.Index, config.WeekDayGroupMatchString(match)); ers = extractor.Extract(trimmedText, refDate); if (ers.Count == 1 && ers.First().Length == trimmedText.Length) { @@ -235,10 +404,26 @@ private DateTimeResolutionResult ParseEach(IDateTimeExtractor extractor, IDateTi if (success) { var pr = parser.Parse(ers[0], refDate); - ret.Timex = pr.TimexStr; - ret.FutureValue = ret.PastValue = "Set: " + ret.Timex; - ret.Success = true; - return ret; + + if ((config.Options & DateTimeOptions.TasksMode) != 0) + { + if (match.Success) + { + pr.TimexStr = TasksModeSetHandler.TasksModeTimexIntervalExt(pr.TimexStr); + } + + if (match.Groups["other"].Success) + { + // function replaces timex P1 with timex P2 when parsing values i.e. every other day at 2pm. + pr.TimexStr = TasksModeSetHandler.TasksModeTimexIntervalReplace(pr.TimexStr); + } + + ret = TasksModeSetHandler.TasksModeResolveSet(ref ret, pr.TimexStr, pr); + } + else + { + ret = SetHandler.ResolveSet(ref ret, pr.TimexStr); + } } return ret; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimeParser.cs index a3d4a3db82..427ead5bfa 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimeParser.cs @@ -1,6 +1,13 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Globalization; using System.Text.RegularExpressions; + using Microsoft.Recognizers.Text.Utilities; + using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime @@ -24,7 +31,7 @@ public ParseResult Parse(ExtractResult result) public DateTimeParseResult Parse(ExtractResult er, DateObject referenceTime) { object value = null; - if (er.Type.Equals(ParserName)) + if (er.Type.Equals(ParserName, StringComparison.Ordinal)) { DateTimeResolutionResult innerResult; @@ -127,7 +134,7 @@ private DateTimeResolutionResult ParseBasicRegexMatch(string text, DateObject re ret.Comment = Constants.Comment_AmPm; } - ret.Timex = "T" + hour.ToString("D2"); + ret.Timex = "T" + hour.ToString("D2", CultureInfo.InvariantCulture); ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day, hour, 0, 0); ret.Success = true; @@ -193,6 +200,12 @@ private DateTimeResolutionResult Match2Time(Match match, DateObject referenceTim min = 0; second = 0; } + else if (!string.IsNullOrEmpty(match.Groups["midearlymorning"].Value)) + { + hour = 6; + min = 0; + second = 0; + } else if (!string.IsNullOrEmpty(match.Groups["midmorning"].Value)) { hour = 10; @@ -255,7 +268,7 @@ private DateTimeResolutionResult Match2Time(Match match, DateObject referenceTim } else { - min = int.Parse(minStr); + min = int.Parse(minStr, CultureInfo.InvariantCulture); hasMin = true; } @@ -263,9 +276,19 @@ private DateTimeResolutionResult Match2Time(Match match, DateObject referenceTim var secStr = match.Groups[Constants.SecondGroupName].Value; if (!string.IsNullOrEmpty(secStr)) { - second = int.Parse(secStr); + second = int.Parse(secStr, CultureInfo.InvariantCulture); hasSec = true; } + else + { + // as for minStr, check if secStr is defined in Numbers + secStr = match.Groups["secnum"].Value; + if (!string.IsNullOrEmpty(secStr)) + { + second = this.config.Numbers[secStr]; + hasSec = true; + } + } } // Adjust by desc string @@ -316,18 +339,18 @@ private DateTimeResolutionResult Match2Time(Match match, DateObject referenceTim hour = 0; } - ret.Timex = "T" + hour.ToString("D2"); + ret.Timex = "T" + hour.ToString("D2", CultureInfo.InvariantCulture); if (hasMin) { - ret.Timex += ":" + min.ToString("D2"); + ret.Timex += ":" + min.ToString("D2", CultureInfo.InvariantCulture); } if (hasSec) { - ret.Timex += ":" + second.ToString("D2"); + ret.Timex += ":" + second.ToString("D2", CultureInfo.InvariantCulture); } - if (hour <= Constants.HalfDayHourCount && !hasPm && !hasAm && !hasMid) + if (hour <= Constants.HalfDayHourCount && hour != 0 && !hasPm && !hasAm && !hasMid) { ret.Comment = Constants.Comment_AmPm; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimePeriodParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimePeriodParser.cs index 298ae9725d..35dff5df1f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimePeriodParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimePeriodParser.cs @@ -1,6 +1,12 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Globalization; +using Microsoft.Recognizers.Text.DateTime.English; using Microsoft.Recognizers.Text.Utilities; + using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime @@ -26,7 +32,7 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) var referenceTime = refTime; object value = null; - if (er.Type.Equals(ParserName)) + if (er.Type.Equals(ParserName, StringComparison.Ordinal)) { DateTimeResolutionResult innerResult; @@ -114,6 +120,11 @@ private DateTimeResolutionResult InternalParse(string entityText, DateObject ref innerResult = ParseTimeOfDay(entityText, referenceTime); } + if (!innerResult.Success) + { + innerResult = ParseTimePeroidWithDuration(entityText, referenceTime); + } + return innerResult; } @@ -170,22 +181,22 @@ private DateTimeResolutionResult ParsePureDigitNumCases(string text, DateObject if (!this.config.Numbers.TryGetValue(startHourStr, out int beginHour)) { - beginHour = int.Parse(startHourStr); + beginHour = int.Parse(startHourStr, CultureInfo.InvariantCulture); } if (!this.config.Numbers.TryGetValue(startMinuteStr, out int beginMinute)) { - beginMinute = int.Parse(startMinuteStr); + beginMinute = int.Parse(startMinuteStr, CultureInfo.InvariantCulture); } if (!this.config.Numbers.TryGetValue(endHourStr, out int endHour)) { - endHour = int.Parse(endHourStr); + endHour = int.Parse(endHourStr, CultureInfo.InvariantCulture); } if (!this.config.Numbers.TryGetValue(endMinuteStr, out int endMinute)) { - endMinute = int.Parse(endMinuteStr); + endMinute = int.Parse(endMinuteStr, CultureInfo.InvariantCulture); } var beginDateTime = DateObject.MinValue.SafeCreateFromValue(year, month, day, beginHour, beginMinute, 0); @@ -259,17 +270,17 @@ private DateTimeResolutionResult ParsePureNumCases(string text, DateObject refer { if (!this.config.Numbers.TryGetValue(hourStr, out int beginHour)) { - beginHour = int.Parse(hourStr); + beginHour = int.Parse(hourStr, CultureInfo.InvariantCulture); } hourStr = hourGroup.Captures[1].Value; afterHourIndex = hourGroup.Captures[1].Index + hourGroup.Captures[1].Length; - if (afterHourIndex == trimmedText.Length || !trimmedText.Substring(afterHourIndex).Trim().StartsWith(":")) + if (afterHourIndex == trimmedText.Length || !trimmedText.Substring(afterHourIndex).Trim().StartsWith(":", StringComparison.Ordinal)) { if (!this.config.Numbers.TryGetValue(hourStr, out int endHour)) { - endHour = int.Parse(hourStr); + endHour = int.Parse(hourStr, CultureInfo.InvariantCulture); } // parse "pm" @@ -289,12 +300,12 @@ private DateTimeResolutionResult ParsePureNumCases(string text, DateObject refer if (!string.IsNullOrEmpty(matchAmStr) || rightAmValid) { - if (endHour >= Constants.HalfDayHourCount) + if (endHour > Constants.HalfDayHourCount) { endHour -= Constants.HalfDayHourCount; } - if (beginHour >= Constants.HalfDayHourCount && beginHour - Constants.HalfDayHourCount < endHour) + if (beginHour > Constants.HalfDayHourCount && beginHour - Constants.HalfDayHourCount < endHour) { beginHour -= Constants.HalfDayHourCount; } @@ -309,7 +320,7 @@ private DateTimeResolutionResult ParsePureNumCases(string text, DateObject refer } else if (!string.IsNullOrEmpty(matchPmStr) || rightPmValid) { - if (endHour < Constants.HalfDayHourCount) + if (endHour <= Constants.HalfDayHourCount) { endHour += Constants.HalfDayHourCount; } @@ -326,8 +337,8 @@ private DateTimeResolutionResult ParsePureNumCases(string text, DateObject refer if (isValid) { - var beginStr = "T" + beginHour.ToString("D2"); - var endStr = "T" + endHour.ToString("D2"); + var beginStr = "T" + beginHour.ToString("D2", CultureInfo.InvariantCulture); + var endStr = "T" + endHour.ToString("D2", CultureInfo.InvariantCulture); if (endHour >= beginHour) { @@ -339,7 +350,7 @@ private DateTimeResolutionResult ParsePureNumCases(string text, DateObject refer } // Try to get the timezone resolution - var timeErs = config.TimeExtractor.Extract(trimmedText); + var timeErs = config.TimeExtractor.Extract(trimmedText, referenceTime); foreach (var er in timeErs) { var pr = config.TimeParser.Parse(er, referenceTime); @@ -405,7 +416,7 @@ private DateTimeResolutionResult ParseSpecificTimeCases(string text, DateObject } else { - beginHour = int.Parse(hourStr); + beginHour = int.Parse(hourStr, CultureInfo.InvariantCulture); } hourStr = hourGroup.Captures[1].Value; @@ -416,7 +427,7 @@ private DateTimeResolutionResult ParseSpecificTimeCases(string text, DateObject } else { - endHour = int.Parse(hourStr); + endHour = int.Parse(hourStr, CultureInfo.InvariantCulture); } var time1StartIndex = match.Groups["time1"].Index; @@ -430,11 +441,11 @@ private DateTimeResolutionResult ParseSpecificTimeCases(string text, DateObject var minuteCapture = match.Groups[Constants.MinuteGroupName].Captures[i]; if (minuteCapture.Index >= time1StartIndex && minuteCapture.Index + minuteCapture.Length <= time1EndIndex) { - beginMinute = int.Parse(minuteCapture.Value); + beginMinute = int.Parse(minuteCapture.Value, CultureInfo.InvariantCulture); } else if (minuteCapture.Index >= time2StartIndex && minuteCapture.Index + minuteCapture.Length <= time2EndIndex) { - endMinute = int.Parse(minuteCapture.Value); + endMinute = int.Parse(minuteCapture.Value, CultureInfo.InvariantCulture); } } @@ -444,11 +455,11 @@ private DateTimeResolutionResult ParseSpecificTimeCases(string text, DateObject var secondCapture = match.Groups[Constants.SecondGroupName].Captures[i]; if (secondCapture.Index >= time1StartIndex && secondCapture.Index + secondCapture.Length <= time1EndIndex) { - beginSecond = int.Parse(secondCapture.Value); + beginSecond = int.Parse(secondCapture.Value, CultureInfo.InvariantCulture); } else if (secondCapture.Index >= time2StartIndex && secondCapture.Index + secondCapture.Length <= time2EndIndex) { - endSecond = int.Parse(secondCapture.Value); + endSecond = int.Parse(secondCapture.Value, CultureInfo.InvariantCulture); } } @@ -552,7 +563,7 @@ private DateTimeResolutionResult ParseSpecificTimeCases(string text, DateObject if (hasRightAm) { - if (endHour >= Constants.HalfDayHourCount) + if (endHour > Constants.HalfDayHourCount) { endDateTime = endDateTime.AddHours(-Constants.HalfDayHourCount); } @@ -567,7 +578,7 @@ private DateTimeResolutionResult ParseSpecificTimeCases(string text, DateObject } else if (hasRightPm) { - if (endHour < Constants.HalfDayHourCount) + if (endHour <= Constants.HalfDayHourCount) { endDateTime = endDateTime.AddHours(Constants.HalfDayHourCount); } @@ -663,6 +674,138 @@ private DateTimeResolutionResult ParseSpecificTimeCases(string text, DateObject return ret; } + // Cases like "from 6am for 3 hours" and "for 3 hours from 6am" are parsing here. + private DateTimeResolutionResult ParseTimePeroidWithDuration(string text, DateObject referenceTime) + { + var parserConfig = this.config as EnglishTimePeriodParserConfiguration; + var ret = new DateTimeResolutionResult(); + if (parserConfig != null) + { + var match = parserConfig.TimePeriodWithDurationRegex.MatchExact(text, trim: true); + + if (match.Success) + { + var erDuration = parserConfig.DurationExtractor.Extract(text); + + if (erDuration is null || erDuration.Count == 0) + { + return ret; + } + + var prDuration = parserConfig.DurationParser.Parse(erDuration[0]); + int year = referenceTime.Year, month = referenceTime.Month, day = referenceTime.Day; + + // Cases like "half past seven" are not handled here + if (match.Groups[Constants.PrefixGroupName].Success) + { + return ret; + } + + // Cases like "4" is different with "4:00" as the Timex is different "T04H" vs "T04H00M" + int beginHour; + int beginMinute = Constants.InvalidMinute; + int beginSecond = Constants.InvalidSecond; + + // Get time1 + var hourGroup = match.Groups[Constants.HourGroupName]; + + var hourStr = hourGroup.Captures[0].Value; + + if (config.Numbers.ContainsKey(hourStr)) + { + beginHour = config.Numbers[hourStr]; + } + else + { + beginHour = int.Parse(hourStr, CultureInfo.InvariantCulture); + } + + var time1StartIndex = match.Groups["time1"].Index; + var time1EndIndex = time1StartIndex + match.Groups["time1"].Length; + + // Get beginMinute (if exists) + for (int i = 0; i < match.Groups[Constants.MinuteGroupName].Captures.Count; i++) + { + var minuteCapture = match.Groups[Constants.MinuteGroupName].Captures[i]; + if (minuteCapture.Index >= time1StartIndex && minuteCapture.Index + minuteCapture.Length <= time1EndIndex) + { + beginMinute = int.Parse(minuteCapture.Value, CultureInfo.InvariantCulture); + } + } + + // Get beginSecond (if exists) + for (int i = 0; i < match.Groups[Constants.SecondGroupName].Captures.Count; i++) + { + var secondCapture = match.Groups[Constants.SecondGroupName].Captures[i]; + if (secondCapture.Index >= time1StartIndex && secondCapture.Index + secondCapture.Length <= time1EndIndex) + { + beginSecond = int.Parse(secondCapture.Value, CultureInfo.InvariantCulture); + } + } + + // Desc here means descriptions like "am / pm / o'clock" + // Get leftDesc (if exists) + var leftDesc = match.Groups["leftDesc"].Value; + for (int i = 0; i < match.Groups[Constants.DescGroupName].Captures.Count; i++) + { + var descCapture = match.Groups[Constants.DescGroupName].Captures[i]; + if (descCapture.Index >= time1StartIndex && descCapture.Index + descCapture.Length <= time1EndIndex && string.IsNullOrEmpty(leftDesc)) + { + leftDesc = descCapture.Value; + } + } + + var beginDateTime = DateObject.MinValue.SafeCreateFromValue(year, month, day, beginHour, beginMinute >= 0 ? beginMinute : 0, beginSecond >= 0 ? beginSecond : 0); + + var hasLeftAm = !string.IsNullOrEmpty(leftDesc) && leftDesc.StartsWith("a", StringComparison.Ordinal); + var hasLeftPm = !string.IsNullOrEmpty(leftDesc) && leftDesc.StartsWith("p", StringComparison.Ordinal); + + // one of the time point has description like 'am' or 'pm' + if (hasLeftAm) + { + if (beginHour >= Constants.HalfDayHourCount) + { + beginDateTime = beginDateTime.AddHours(-Constants.HalfDayHourCount); + } + } + else if (hasLeftPm) + { + if (beginHour < Constants.HalfDayHourCount) + { + beginDateTime = beginDateTime.AddHours(Constants.HalfDayHourCount); + } + } + + var endDateTime = beginDateTime.AddSeconds(Convert.ToInt32((prDuration.Value as DateTimeResolutionResult).FutureValue, CultureInfo.InvariantCulture)); + + var beginStr = DateTimeFormatUtil.ShortTime(beginDateTime.Hour, beginMinute, beginSecond); + var endStr = DateTimeFormatUtil.ShortTime(endDateTime.Hour, endDateTime.Minute, endDateTime.Second); + + ret.Success = true; + + ret.Timex = $"({beginStr},{endStr},{DateTimeFormatUtil.LuisTimeSpan(endDateTime - beginDateTime)})"; + + ret.FutureValue = ret.PastValue = new Tuple( + beginDateTime, + endDateTime); + + ret.SubDateTimeEntities = new List(); + var er = new ExtractResult() + { + Start = time1StartIndex, + Length = time1EndIndex - time1StartIndex, + Text = text.Substring(time1StartIndex, time1EndIndex - time1StartIndex), + Type = $"{Constants.SYS_DATETIME_TIME}", + }; + + var pr = this.config.TimeParser.Parse(er, referenceTime); + ret.SubDateTimeEntities.Add(pr); + } + } + + return ret; + } + private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject referenceTime) { var ret = new DateTimeResolutionResult(); @@ -717,7 +860,9 @@ private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject refe pr1 = this.config.TimeParser.Parse(ers[0], referenceTime); pr2 = this.config.TimeParser.Parse(ers[1], referenceTime); - if (pr1.Value == null || pr2.Value == null) + // cases with time1 = time2 are excluded to avoid parsing here expressions like + // "morning-morning" (which in Hindi means "early-morning") + if (pr1.Value == null || pr2.Value == null || pr1.Text == pr2.Text) { return ret; } @@ -728,7 +873,7 @@ private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject refe var beginTime = (DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue; var endTime = (DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue; - if (!string.IsNullOrEmpty(ampmStr2) && ampmStr2.EndsWith(Constants.Comment_AmPm) && endTime <= beginTime && endTime.AddHours(Constants.HalfDayHourCount) > beginTime) + if (!string.IsNullOrEmpty(ampmStr2) && ampmStr2.EndsWith(Constants.Comment_AmPm, StringComparison.Ordinal) && endTime <= beginTime && endTime.AddHours(Constants.HalfDayHourCount) > beginTime) { endTime = endTime.AddHours(Constants.HalfDayHourCount); ((DateTimeResolutionResult)pr2.Value).FutureValue = endTime; @@ -739,7 +884,7 @@ private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject refe } } - if (!string.IsNullOrEmpty(ampmStr1) && ampmStr1.EndsWith(Constants.Comment_AmPm) && endTime > beginTime.AddHours(Constants.HalfDayHourCount)) + if (!string.IsNullOrEmpty(ampmStr1) && ampmStr1.EndsWith(Constants.Comment_AmPm, StringComparison.Ordinal) && endTime > beginTime.AddHours(Constants.HalfDayHourCount)) { beginTime = beginTime.AddHours(Constants.HalfDayHourCount); ((DateTimeResolutionResult)pr1.Value).FutureValue = beginTime; @@ -762,8 +907,8 @@ private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject refe ret.FutureValue = ret.PastValue = new Tuple(beginTime, endTime); ret.Success = true; - if (!string.IsNullOrEmpty(ampmStr1) && ampmStr1.EndsWith(Constants.Comment_AmPm) && - !string.IsNullOrEmpty(ampmStr2) && ampmStr2.EndsWith(Constants.Comment_AmPm)) + if (!string.IsNullOrEmpty(ampmStr1) && ampmStr1.EndsWith(Constants.Comment_AmPm, StringComparison.Ordinal) && + !string.IsNullOrEmpty(ampmStr2) && ampmStr2.EndsWith(Constants.Comment_AmPm, StringComparison.Ordinal)) { ret.Comment = Constants.Comment_AmPm; } @@ -782,7 +927,7 @@ private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject refe return ret; } - // parse "morning", "afternoon", "night" + // Parse "morning", "afternoon", "night" private DateTimeResolutionResult ParseTimeOfDay(string text, DateObject referenceTime) { int day = referenceTime.Day, @@ -790,7 +935,7 @@ private DateTimeResolutionResult ParseTimeOfDay(string text, DateObject referenc year = referenceTime.Year; var ret = new DateTimeResolutionResult(); - // extract early/late prefix from text + // Extract early/late prefix from text var match = this.config.TimeOfDayRegex.Match(text); bool hasEarly = false, hasLate = false; if (match.Success) @@ -814,17 +959,17 @@ private DateTimeResolutionResult ParseTimeOfDay(string text, DateObject referenc } } - if (!this.config.GetMatchedTimexRange(text, out string timex, out int beginHour, out int endHour, out int endMinSeg)) + if (!this.config.GetMatchedTimeRange(text, out string timex, out int beginHour, out int endHour, out int endMinSeg)) { return new DateTimeResolutionResult(); } - // modify time period if "early" or "late" is existed + // Modify time period if "early" or "late" modifiers exist if (hasEarly) { - endHour = beginHour + 2; + endHour = beginHour + Constants.EARLY_LATE_TIME_DELTA; - // handling case: night end with 23:59 + // Handling case: night ends at 23:59, due to .NET limitation if (endMinSeg == 59) { endMinSeg = 0; @@ -832,7 +977,7 @@ private DateTimeResolutionResult ParseTimeOfDay(string text, DateObject referenc } else if (hasLate) { - beginHour = beginHour + 2; + beginHour += Constants.EARLY_LATE_TIME_DELTA; } ret.Timex = timex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimeZoneParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimeZoneParser.cs index 5557d91354..015bc410c5 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimeZoneParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimeZoneParser.cs @@ -1,8 +1,11 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Globalization; using System.Linq; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions.English; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime @@ -11,7 +14,15 @@ public class BaseTimeZoneParser : IDateTimeParser { public static readonly string ParserName = Constants.SYS_DATETIME_TIMEZONE; // "TimeZone"; - public static readonly Regex TimeZoneEndRegex = new Regex("time$|timezone$", RegexOptions.Singleline); + private readonly ITimeZoneParserConfiguration config; + + public BaseTimeZoneParser(ITimeZoneParserConfiguration config) + { + this.config = config; + TimeZoneEndRegex = new Regex(config.TimeZoneEndRegex, RegexOptions.Singleline); + } + + public Regex TimeZoneEndRegex { get; } // Compute UTC offset in minutes from matched timezone offset in text. e.g. "-4:30" -> -270; "+8"-> 480. public static int ComputeMinutes(string utcOffset) @@ -21,12 +32,16 @@ public static int ComputeMinutes(string utcOffset) return Constants.InvalidOffsetValue; } - utcOffset = utcOffset.Trim(); + utcOffset = utcOffset.Trim().TrimEnd('h'); int sign = Constants.PositiveSign; // later than utc, default value - if (utcOffset.StartsWith("+") || utcOffset.StartsWith("-") || utcOffset.StartsWith("±")) + bool hasOffset = utcOffset.StartsWith("+", StringComparison.Ordinal) || + utcOffset.StartsWith("-", StringComparison.Ordinal) || + utcOffset.StartsWith("±", StringComparison.Ordinal); + + if (hasOffset) { - if (utcOffset.StartsWith("-")) + if (utcOffset.StartsWith("-", StringComparison.Ordinal)) { sign = Constants.NegativeSign; // earlier than utc 0 } @@ -39,15 +54,16 @@ public static int ComputeMinutes(string utcOffset) if (utcOffset.Contains(":")) { var tokens = utcOffset.Split(':').ToList(); - hours = int.Parse(tokens[0]); - minutes = int.Parse(tokens[1]); + hours = int.Parse(tokens[0], CultureInfo.InvariantCulture); + minutes = int.Parse(tokens[1], CultureInfo.InvariantCulture); } else if (int.TryParse(utcOffset, out hours)) { minutes = 0; } - if (hours > Constants.HalfDayHourCount) + // Timezones go from -12 to +14 + if (sign < 0 ? hours > Constants.HalfDayHourCount : hours > Constants.HalfDayHourCount + 2) { return Constants.InvalidOffsetValue; } @@ -70,10 +86,10 @@ public static string ConvertOffsetInMinsToOffsetString(int offsetMins) public static string ConvertMinsToRegularFormat(int offsetMins) { - return TimeSpan.FromMinutes(offsetMins).ToString(@"hh\:mm"); + return TimeSpan.FromMinutes(offsetMins).ToString(@"hh\:mm", CultureInfo.InvariantCulture); } - public static string NormalizeText(string text) + public string NormalizeText(string text) { text = Regex.Replace(text, @"\s+", " "); text = TimeZoneEndRegex.Replace(text, string.Empty); @@ -102,7 +118,8 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) string text = er.Text; string normalizedText = NormalizeText(text); - string matched = Regex.Match(text, TimeZoneDefinitions.DirectUtcRegex).Groups[2].Value; + string matched = config.DirectUtcRegex.Match(text).Groups[2].Value; + int offsetInMinutes = ComputeMinutes(matched); if (offsetInMinutes != Constants.InvalidOffsetValue) @@ -110,24 +127,25 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) result.Value = GetDateTimeResolutionResult(offsetInMinutes, text); result.ResolutionStr = Constants.UtcOffsetMinsKey + ": " + offsetInMinutes; } - else if (TimeZoneDefinitions.AbbrToMinMapping.ContainsKey(normalizedText) && - TimeZoneDefinitions.AbbrToMinMapping[normalizedText] != Constants.InvalidOffsetValue) + else if (config.AbbrToMinMapping.ContainsKey(normalizedText) && + config.AbbrToMinMapping[normalizedText] != Constants.InvalidOffsetValue) { - int utcMinuteShift = TimeZoneDefinitions.AbbrToMinMapping[normalizedText]; + int utcMinuteShift = config.AbbrToMinMapping[normalizedText]; result.Value = GetDateTimeResolutionResult(utcMinuteShift, text); result.ResolutionStr = Constants.UtcOffsetMinsKey + ": " + utcMinuteShift; } - else if (TimeZoneDefinitions.FullToMinMapping.ContainsKey(normalizedText) && - TimeZoneDefinitions.FullToMinMapping[normalizedText] != Constants.InvalidOffsetValue) + else if (config.FullToMinMapping.ContainsKey(normalizedText) && + config.FullToMinMapping[normalizedText] != Constants.InvalidOffsetValue) { - int utcMinuteShift = TimeZoneDefinitions.FullToMinMapping[normalizedText]; + int utcMinuteShift = config.FullToMinMapping[normalizedText]; + result.Value = GetDateTimeResolutionResult(utcMinuteShift, text); result.ResolutionStr = Constants.UtcOffsetMinsKey + ": " + utcMinuteShift; } else { - // TODO: Temporary solution for city timezone and ambiguous data + // @TODO: Temporary solution for city timezone and ambiguous data result.Value = new DateTimeResolutionResult { Success = true, diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParser.cs new file mode 100644 index 0000000000..e7d7b3d296 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParser.cs @@ -0,0 +1,965 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.Chinese; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKDateParser : IDateTimeParser + { + public static readonly string ParserName = Constants.SYS_DATETIME_DATE; // "Date"; + + public static readonly DateObject NoDate = DateObject.MinValue.SafeCreateFromValue(0, 0, 0); + + private readonly ICJKDateParserConfiguration config; + + public BaseCJKDateParser(ICJKDateParserConfiguration config) + { + this.config = config; + } + + public ParseResult Parse(ExtractResult extResult) + { + return this.Parse(extResult, DateObject.Now); + } + + public virtual DateTimeParseResult Parse(ExtractResult er, DateObject referenceDate) + { + object value = null; + + if (er.Type.Equals(ParserName, StringComparison.Ordinal)) + { + value = InnerParser(er.Text, referenceDate); + } + + var ret = new DateTimeParseResult + { + Text = er.Text, + Start = er.Start, + Length = er.Length, + Type = er.Type, + Data = er.Data, + Value = value, + TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, + ResolutionStr = string.Empty, + }; + + return ret; + } + + public List FilterResults(string query, List candidateResults) + { + return candidateResults; + } + + protected DateTimeResolutionResult InnerParser(string text, DateObject reference) + { + var innerResult = ParseBasicRegexMatch(text, reference); + + if (!innerResult.Success) + { + innerResult = ParseWeekdayOfMonth(text, reference); + } + + if (!innerResult.Success) + { + innerResult = ParseImplicitDate(text, reference); + } + + if (!innerResult.Success) + { + innerResult = ParserDurationWithAgoAndLater(text, reference); + } + + if (innerResult.Success) + { + innerResult.FutureResolution = new Dictionary + { + { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)innerResult.FutureValue) }, + }; + + innerResult.PastResolution = new Dictionary + { + { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)innerResult.PastValue) }, + }; + + innerResult.IsLunar = IsLunarCalendar(text); + + return innerResult; + } + + return null; + } + + // parse basic patterns in DateRegexList + protected DateTimeResolutionResult ParseBasicRegexMatch(string text, DateObject referenceDate) + { + foreach (var regex in this.config.DateRegexList) + { + var match = regex.MatchExact(text, trim: true); + + if (match.Success) + { + // Value string will be set in Match2Date method + var ret = Match2Date(match.Match, referenceDate); + return ret; + } + } + + return new DateTimeResolutionResult(); + } + + // match several other cases + // including '今天', '后天', '十三日' + protected DateTimeResolutionResult ParseImplicitDate(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + + // handle "十二日" "明年这个月三日" "本月十一日" + var match = this.config.SpecialDate.MatchExact(text, trim: true); + if (match.Success) + { + var yearStr = match.Groups["thisyear"].Value; + var monthStr = match.Groups["thismonth"].Value; + var dayStr = match.Groups["day"].Value; + + int month = referenceDate.Month, year = referenceDate.Year; + var day = this.config.DayOfMonth[dayStr]; + + bool hasYear = false, hasMonth = false; + + if (!string.IsNullOrEmpty(monthStr)) + { + hasMonth = true; + hasYear = true; + if (this.config.NextRe.Match(monthStr).Success) + { + month++; + if (month == Constants.MaxMonth + 1) + { + month = Constants.MinMonth; + year++; + } + } + else if (this.config.LastRe.Match(monthStr).Success) + { + month--; + if (month == Constants.MinMonth - 1) + { + month = Constants.MaxMonth; + year--; + } + } + + if (!string.IsNullOrEmpty(yearStr)) + { + hasYear = true; + if (this.config.NextRe.Match(yearStr).Success) + { + ++year; + } + else if (this.config.LastRe.Match(yearStr).Success) + { + --year; + } + } + } + + ret.Timex = DateTimeFormatUtil.LuisDate(hasYear ? year : -1, hasMonth ? month : -1, day); + + DateObject futureDate, pastDate; + + if (day > DateObjectExtension.GetMonthMaxDay(year, month)) + { + var futureMonth = month + 1; + var pastMonth = month - 1; + var futureYear = year; + var pastYear = year; + + if (futureMonth == Constants.MaxMonth + 1) + { + futureMonth = Constants.MinMonth; + futureYear = year++; + } + + if (pastMonth == Constants.MinMonth - 1) + { + pastMonth = Constants.MaxMonth; + pastYear = year--; + } + + var isFutureValid = DateObjectExtension.IsValidDate(futureYear, futureMonth, day); + var isPastValid = DateObjectExtension.IsValidDate(pastYear, pastMonth, day); + + if (isFutureValid && isPastValid) + { + futureDate = DateObject.MinValue.SafeCreateFromValue(futureYear, futureMonth, day); + pastDate = DateObject.MinValue.SafeCreateFromValue(pastYear, pastMonth, day); + } + else if (isFutureValid && !isPastValid) + { + futureDate = pastDate = DateObject.MinValue.SafeCreateFromValue(futureYear, futureMonth, day); + } + else if (!isFutureValid && !isPastValid) + { + futureDate = pastDate = DateObject.MinValue.SafeCreateFromValue(pastYear, pastMonth, day); + } + else + { + // Fall back to normal cases, might lead to resolution failure + // TODO: Ideally, this failure should be filtered out in extract phase + futureDate = pastDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); + } + } + else + { + futureDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); + pastDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); + + if (!hasMonth) + { + if (futureDate < referenceDate) + { + if (IsValidDate(year, month + 1, day)) + { + futureDate = futureDate.AddMonths(1); + } + } + + if (pastDate >= referenceDate) + { + if (IsValidDate(year, month - 1, day)) + { + pastDate = pastDate.AddMonths(-1); + } + else if (DateContext.IsFeb29th(year, month - 1, day)) + { + pastDate = pastDate.AddMonths(-2); + } + } + } + else if (!hasYear) + { + if (futureDate < referenceDate) + { + if (IsValidDate(year + 1, month, day)) + { + futureDate = futureDate.AddYears(1); + } + } + + if (pastDate >= referenceDate) + { + if (IsValidDate(year - 1, month, day)) + { + pastDate = pastDate.AddYears(-1); + } + } + } + } + + ret.FutureValue = futureDate; + ret.PastValue = pastDate; + ret.Success = true; + + return ret; + } + + // handle cases like "昨日", "明日", "大后天" + match = this.config.SpecialDayRegex.MatchExact(text, trim: true); + + if (match.Success) + { + var value = referenceDate.AddDays(this.config.GetSwiftDay(match.Value)); + ret.Timex = DateTimeFormatUtil.LuisDate(value); + ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day); + ret.Success = true; + + return ret; + } + + // Handle "今から2日曜日" (2 Sundays from now) + var exactMatch = this.config.SpecialDayWithNumRegex.MatchExact(text, trim: true); + + if (exactMatch.Success) + { + var numErs = this.config.IntegerExtractor.Extract(text); + var weekdayStr = exactMatch.Groups["weekday"].Value; + + if (!string.IsNullOrEmpty(weekdayStr) && numErs.Count > 0) + { + var num = Convert.ToInt32((double)(this.config.NumberParser.Parse(numErs[0]).Value ?? 0)); + var value = referenceDate; + + // Check whether the determined day of this week has passed. + if (value.DayOfWeek > (DayOfWeek)this.config.DayOfWeek[weekdayStr]) + { + num--; + } + + while (num-- > 0) + { + value = value.Next((DayOfWeek)this.config.DayOfWeek[weekdayStr]); + } + + ret.Timex = DateTimeFormatUtil.LuisDate(value); + ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day); + ret.Success = true; + + return ret; + } + + } + + // handle "明日から3週間" (3 weeks from tomorrow) + var durationResult = this.config.DurationExtractor.Extract(text, referenceDate); + var unitMatch = this.config.DurationRelativeDurationUnitRegex.Match(text); + var isWithin = this.config.DurationRelativeDurationUnitRegex.MatchEnd(text, trim: true).Groups[Constants.WithinGroupName].Success; + + if ((exactMatch.Success || isWithin) && unitMatch.Success && (durationResult.Count > 0) && + string.IsNullOrEmpty(unitMatch.Groups["few"].Value)) + { + var pr = this.config.DurationParser.Parse(durationResult[0], referenceDate); + var dayStr = unitMatch.Groups["later"].Value; + var future = true; + int swift = 0; + + if (pr != null) + { + if (!string.IsNullOrEmpty(dayStr)) + { + swift = this.config.GetSwiftDay(dayStr); + } + + var resultDateTime = DurationParsingUtil.ShiftDateTime(pr.TimexStr, referenceDate.AddDays(swift), future); + ret.Timex = $"{DateTimeFormatUtil.LuisDate(resultDateTime)}"; + ret.FutureValue = ret.PastValue = resultDateTime; + ret.Success = true; + return ret; + } + + } + + if (!ret.Success) + { + ret = MatchWeekdayAndDay(text, referenceDate); + } + + if (!ret.Success) + { + ret = MatchThisWeekday(text, referenceDate); + } + + if (!ret.Success) + { + ret = MatchNextNextWeekday(text, referenceDate); + } + + if (!ret.Success) + { + ret = MatchNextWeekday(text, referenceDate); + } + + if (!ret.Success) + { + ret = MatchLastLastWeekday(text, referenceDate); + } + + if (!ret.Success) + { + ret = MatchLastWeekday(text, referenceDate); + } + + if (!ret.Success) + { + ret = MatchWeekdayAlone(text, referenceDate); + } + + return ret; + } + + protected DateTimeResolutionResult MatchWeekdayAndDay(string text, DateObject reference) + { + var ret = new DateTimeResolutionResult(); + + // Handling cases like 'Monday 21', which both 'Monday' and '21' refer to the same date. + // The year of expected date can be different to the year of referenceDate. + var match = this.config.WeekDayAndDayRegex.Match(text); + + if (match.Success) + { + int month = reference.Month, year = reference.Year; + + // Create a extract result which content ordinal string of text + ExtractResult ertmp = new ExtractResult + { + Text = match.Groups["day"].Value, + Start = match.Groups["day"].Index, + Length = match.Groups["day"].Length, + }; + + // Parse the day in text into number + var day = ConvertCJKToNum(match.Groups["day"].Value); + + // Firstly, find a latest date with the "day" as pivotDate. + // Secondly, if the pivotDate equals the referenced date, in other word, the day of the referenced date is exactly the "day". + // In this way, check if the pivotDate is the weekday. If so, then the futureDate and the previousDate are the same date (referenced date). + // Otherwise, increase the pivotDate month by month to find the latest futureDate and decrease the pivotDate month + // by month to the latest previousDate. + // Notice: if the "day" is larger than 28, some months should be ignored in the increase or decrease procedure. + var pivotDate = new DateObject(year, month, 1); + var daysInMonth = DateObject.DaysInMonth(year, month); + if (daysInMonth >= day) + { + pivotDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); + } + else + { + // Add 1 month is enough, since 1, 3, 5, 7, 8, 10, 12 months has 31 days + pivotDate = pivotDate.AddMonths(1); + pivotDate = DateObject.MinValue.SafeCreateFromValue(pivotDate.Year, pivotDate.Month, day); + } + + var numWeekDayInt = (int)pivotDate.DayOfWeek; + var extractedWeekDayStr = match.Groups["weekday"].Value; + var weekDay = this.config.DayOfWeek[extractedWeekDayStr]; + if (!pivotDate.Equals(DateObject.MinValue)) + { + if (numWeekDayInt == weekDay) + { + // The referenceDate is the weekday and with the "day". + ret.FutureValue = new DateObject(year, month, day); + ret.PastValue = new DateObject(year, month, day); + ret.Timex = DateTimeFormatUtil.LuisDate(year, month, day); + } + else + { + var futureDate = pivotDate; + var pastDate = pivotDate; + + while ((int)futureDate.DayOfWeek != weekDay || futureDate.Day != day || futureDate < reference) + { + // Increase the futureDate month by month to find the expected date (the "day" is the weekday) and + // make sure the futureDate not less than the referenceDate. + futureDate = futureDate.AddMonths(1); + var tmp = DateObject.DaysInMonth(futureDate.Year, futureDate.Month); + if (tmp >= day) + { + // For months like January 31, after add 1 month, February 31 won't be returned, so the day should be revised ASAP. + futureDate = futureDate.SafeCreateFromValue(futureDate.Year, futureDate.Month, day); + } + } + + ret.FutureValue = futureDate; + + while ((int)pastDate.DayOfWeek != weekDay || pastDate.Day != day || pastDate > reference) + { + // Decrease the pastDate month by month to find the expected date (the "day" is the weekday) and + // make sure the pastDate not larger than the referenceDate. + pastDate = pastDate.AddMonths(-1); + var tmp = DateObject.DaysInMonth(pastDate.Year, pastDate.Month); + if (tmp >= day) + { + // For months like March 31, after minus 1 month, February 31 won't be returned, so the day should be revised ASAP. + pastDate = pastDate.SafeCreateFromValue(pastDate.Year, pastDate.Month, day); + } + } + + ret.PastValue = pastDate; + + if (weekDay == 0) + { + weekDay = 7; + } + + ret.Timex = TimexUtility.GenerateWeekdayTimex(weekDay); + } + } + + ret.Success = true; + + return ret; + } + + return ret; + } + + protected DateTimeResolutionResult MatchNextWeekday(string text, DateObject reference) + { + var result = new DateTimeResolutionResult(); + var match = this.config.NextRegex.MatchExact(text, trim: true); + + if (match.Success) + { + var weekdayKey = match.Groups["weekday"].Value; + var value = reference.Next((DayOfWeek)this.config.DayOfWeek[weekdayKey]); + + result.Timex = DateTimeFormatUtil.LuisDate(value); + result.FutureValue = result.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day); + result.Success = true; + } + + return result; + } + + protected DateTimeResolutionResult MatchNextNextWeekday(string text, DateObject reference) + { + var result = new DateTimeResolutionResult(); + var cnConfig = this.config as ChineseDateParserConfiguration; + if (cnConfig != null) + { + var match = cnConfig.NextNextRegex.MatchExact(text, trim: true); + if (match.Success) + { + var weekdayKey = match.Groups["weekday"].Value; + var value = reference.Next((DayOfWeek)this.config.DayOfWeek[weekdayKey]); + value = value.Next((DayOfWeek)this.config.DayOfWeek[weekdayKey]); + + result.Timex = DateTimeFormatUtil.LuisDate(value); + result.FutureValue = result.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day); + result.Success = true; + } + } + + return result; + } + + protected DateTimeResolutionResult MatchThisWeekday(string text, DateObject reference) + { + var result = new DateTimeResolutionResult(); + var match = this.config.ThisRegex.MatchExact(text, trim: true); + + if (match.Success) + { + var weekdayKey = match.Groups["weekday"].Value; + var value = reference.This((DayOfWeek)this.config.DayOfWeek[weekdayKey]); + + result.Timex = DateTimeFormatUtil.LuisDate(value); + result.FutureValue = result.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day); + result.Success = true; + } + + return result; + } + + protected DateTimeResolutionResult MatchLastWeekday(string text, DateObject reference) + { + var result = new DateTimeResolutionResult(); + var match = this.config.LastRegex.MatchExact(text, trim: true); + + if (match.Success) + { + var weekdayKey = match.Groups["weekday"].Value; + var value = reference.Last((DayOfWeek)this.config.DayOfWeek[weekdayKey]); + + result.Timex = DateTimeFormatUtil.LuisDate(value); + result.FutureValue = result.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day); + result.Success = true; + } + + return result; + } + + protected DateTimeResolutionResult MatchLastLastWeekday(string text, DateObject reference) + { + var result = new DateTimeResolutionResult(); + var cnConfig = this.config as ChineseDateParserConfiguration; + if (cnConfig != null) + { + var match = cnConfig.LastLastRegex.MatchExact(text, trim: true); + + if (match.Success) + { + var weekdayKey = match.Groups["weekday"].Value; + var value = reference.Last((DayOfWeek)this.config.DayOfWeek[weekdayKey]); + value = value.Last((DayOfWeek)this.config.DayOfWeek[weekdayKey]); + + result.Timex = DateTimeFormatUtil.LuisDate(value); + result.FutureValue = result.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day); + result.Success = true; + } + } + + return result; + } + + protected DateTimeResolutionResult MatchWeekdayAlone(string text, DateObject reference) + { + var result = new DateTimeResolutionResult(); + var match = this.config.StrictWeekDayRegex.MatchExact(text, trim: true); + + if (match.Success) + { + var weekdayKey = match.Groups[Constants.WeekdayGroupName].Value; + var weekday = this.config.DayOfWeek[weekdayKey]; + var value = reference.This((DayOfWeek)weekday); + + if (weekday == 0) + { + weekday = 7; + } + + if (weekday < (int)reference.DayOfWeek) + { + value = reference.Next((DayOfWeek)weekday); + } + + result.Timex = TimexUtility.GenerateWeekdayTimex(weekday); + var futureDate = value; + var pastDate = value; + if (futureDate < reference) + { + futureDate = futureDate.AddDays(7); + } + + if (pastDate >= reference) + { + pastDate = pastDate.AddDays(-7); + } + + result.FutureValue = futureDate; + result.PastValue = pastDate; + result.Success = true; + } + + return result; + } + + protected virtual DateTimeResolutionResult ParseWeekdayOfMonth(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + + var trimmedText = text.Trim(); + var match = this.config.WeekDayOfMonthRegex.Match(trimmedText); + if (!match.Success) + { + return ret; + } + + var cardinalStr = match.Groups["cardinal"].Value; + var weekdayStr = match.Groups["weekday"].Value; + var monthStr = match.Groups["month"].Value; + var noYear = false; + int year; + + int cardinal; + if (this.config.LastWeekDayRegex.IsExactMatch(cardinalStr, trim: true)) + { + cardinal = 5; + } + else + { + cardinal = this.config.CardinalMap[cardinalStr]; + } + + var weekday = this.config.DayOfWeek[weekdayStr]; + int month; + if (string.IsNullOrEmpty(monthStr)) + { + var swift = 0; + if (this.config.NextMonthRegex.MatchBegin(trimmedText, trim: true).Success) + { + swift = 1; + } + else if (this.config.LastMonthRegex.MatchBegin(trimmedText, trim: true).Success) + { + swift = -1; + } + + month = referenceDate.AddMonths(swift).Month; + year = referenceDate.AddMonths(swift).Year; + } + else + { + month = this.config.MonthOfYear[monthStr]; + year = referenceDate.Year; + noYear = true; + } + + var value = ComputeDate(cardinal, weekday, month, year); + if (value.Month != month) + { + cardinal -= 1; + value = value.AddDays(-7); + } + + var futureDate = value; + var pastDate = value; + if (noYear && futureDate < referenceDate) + { + futureDate = ComputeDate(cardinal, weekday, month, year + 1); + if (futureDate.Month != month) + { + futureDate = futureDate.AddDays(-7); + } + } + + if (noYear && pastDate >= referenceDate) + { + pastDate = ComputeDate(cardinal, weekday, month, year - 1); + if (pastDate.Month != month) + { + pastDate = pastDate.AddDays(-7); + } + } + + // here is a very special case, timeX follows future date + ret.Timex = $@"XXXX-{month:D2}-WXX-{weekday}-#{cardinal}"; + ret.FutureValue = futureDate; + ret.PastValue = pastDate; + ret.Success = true; + + return ret; + } + + // parse a regex match which includes 'day', 'month' and 'year' (optional) group + protected DateTimeResolutionResult Match2Date(Match match, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + + var monthStr = match.Groups["month"].Value; + var dayStr = match.Groups["day"].Value; + var yearStr = match.Groups["year"].Value; + var yearCJKStr = match.Groups[Constants.YearCJKGroupName].Value; + int month = 0, day = 0, year = 0; + + var tmp = ConvertCJKYearToInteger(yearCJKStr); + year = tmp == -1 ? 0 : tmp; + + if (this.config.MonthOfYear.ContainsKey(monthStr) && this.config.DayOfMonth.ContainsKey(dayStr)) + { + month = this.config.MonthOfYear[monthStr] > 12 ? this.config.MonthOfYear[monthStr] % 12 : this.config.MonthOfYear[monthStr]; + day = this.config.DayOfMonth[dayStr] > 31 ? this.config.DayOfMonth[dayStr] % 31 : this.config.DayOfMonth[dayStr]; + if (!string.IsNullOrEmpty(yearStr)) + { + year = int.Parse(yearStr, CultureInfo.InvariantCulture); + if (year < 100 && year >= Constants.MinTwoDigitYearPastNum) + { + year += Constants.BASE_YEAR_PAST_CENTURY; + } + else if (year >= 0 && year < Constants.MaxTwoDigitYearFutureNum) + { + year += Constants.BASE_YEAR_CURRENT_CENTURY; + } + } + } + + var noYear = false; + if (year == 0) + { + year = referenceDate.Year; + ret.Timex = DateTimeFormatUtil.LuisDate(-1, month, day); + noYear = true; + } + else + { + ret.Timex = DateTimeFormatUtil.LuisDate(year, month, day); + } + + var futurePastDates = DateContext.GenerateDates(noYear, referenceDate, year, month, day); + + ret.FutureValue = futurePastDates.future; + ret.PastValue = futurePastDates.past; + ret.Success = true; + + return ret; + } + + private static DateObject ComputeDate(int cardinal, int weekday, int month, int year) + { + var firstDay = DateObject.MinValue.SafeCreateFromValue(year, month, 1); + var firstWeekday = firstDay.This((DayOfWeek)weekday); + int dayOfWeekOfFirstDay = (int)firstDay.DayOfWeek; + + if (weekday == 0) + { + weekday = 7; + } + + if (dayOfWeekOfFirstDay == 0) + { + dayOfWeekOfFirstDay = 7; + } + + if (weekday < dayOfWeekOfFirstDay) + { + firstWeekday = firstDay.Next((DayOfWeek)weekday); + } + + return firstWeekday.AddDays(7 * (cardinal - 1)); + } + + // Judge if a date is valid + private static bool IsValidDate(int year, int month, int day) + { + if (month < Constants.MinMonth) + { + year--; + month = Constants.MaxMonth; + } + + if (month > Constants.MaxMonth) + { + year++; + month = Constants.MinMonth; + } + + return DateObjectExtension.IsValidDate(year, month, day); + } + + // parse if lunar contains + private bool IsLunarCalendar(string text) + { + var trimmedText = text.Trim(); + var match = this.config.LunarRegex.Match(trimmedText); + + return match.Success; + } + + // Handle cases like "三天前" "Three days ago" + private DateTimeResolutionResult ParserDurationWithAgoAndLater(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var numStr = string.Empty; + var unitStr = string.Empty; + + var durationRes = this.config.DurationExtractor.Extract(text, referenceDate); + + if (durationRes.Count > 0) + { + var match = this.config.UnitRegex.Match(text); + if (match.Success) + { + var suffix = text.Substring((int)durationRes[0].Start + (int)durationRes[0].Length).Trim(); + var srcUnit = match.Groups["unit"].Value; + + var numberStr = text.Substring((int)durationRes[0].Start, match.Index - (int)durationRes[0].Start).Trim(); + + var unitMatch = this.config.DurationRelativeDurationUnitRegex.Match(text); + + // set the inexact number "数" (few) to 3 for now + var number = numberStr.Equals(unitMatch.Groups["few"].Value, StringComparison.Ordinal) ? 3 : ConvertCJKToNum(numberStr); + + if (!numberStr.Equals(unitMatch.Groups["few"].Value, StringComparison.Ordinal)) + { + if (suffix.Equals(unitMatch.Value, StringComparison.Ordinal)) + { + var pr = this.config.DurationParser.Parse(durationRes[0], referenceDate); + var future = suffix.Equals(unitMatch.Groups["later"].Value, StringComparison.Ordinal); + int swift = 0; + + if (pr != null) + { + var resultDateTime = DurationParsingUtil.ShiftDateTime(pr.TimexStr, referenceDate.AddDays(swift), future); + ret.Timex = $"{DateTimeFormatUtil.LuisDate(resultDateTime)}"; + ret.FutureValue = ret.PastValue = resultDateTime; + ret.Success = true; + return ret; + } + } + } + + if (this.config.UnitMap.ContainsKey(srcUnit)) + { + unitStr = this.config.UnitMap[srcUnit]; + ret.Timex = TimexUtility.GenerateDurationTimex(number, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); + DateObject date = Constants.InvalidDate; + + var beforeMatch = this.config.BeforeRegex.Match(suffix); + if (beforeMatch.Success && suffix.StartsWith(beforeMatch.Value, StringComparison.Ordinal)) + { + date = DurationParsingUtil.ShiftDateTime(ret.Timex, referenceDate, future: false); + } + + var afterMatch = this.config.AfterRegex.Match(suffix); + if (afterMatch.Success && suffix.StartsWith(afterMatch.Value, StringComparison.Ordinal)) + { + date = DurationParsingUtil.ShiftDateTime(ret.Timex, referenceDate, future: true); + } + + if (date != Constants.InvalidDate) + { + ret.Timex = $"{DateTimeFormatUtil.LuisDate(date)}"; + ret.FutureValue = ret.PastValue = date; + ret.Success = true; + return ret; + } + } + } + } + + return ret; + } + + // Convert CJK Number to Integer + private int ConvertCJKToNum(string numStr) + { + var num = -1; + var er = this.config.IntegerExtractor.Extract(numStr); + if (er.Count != 0) + { + if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) + { + num = Convert.ToInt32((double)(this.config.NumberParser.Parse(er[0]).Value ?? 0)); + } + } + + return num; + } + + // convert CJK Year to Integer + private int ConvertCJKYearToInteger(string yearCJKStr) + { + var year = 0; + var num = 0; + int dynastyYear = DateTimeFormatUtil.ParseDynastyYear(yearCJKStr, + this.config.DynastyYearRegex, + this.config.DynastyStartYear, + this.config.DynastyYearMap, + this.config.IntegerExtractor, + this.config.NumberParser); + if (dynastyYear > 0) + { + return dynastyYear; + } + + var er = this.config.IntegerExtractor.Extract(yearCJKStr); + if (er.Count != 0) + { + if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) + { + num = Convert.ToInt32((double)(this.config.NumberParser.Parse(er[0]).Value ?? 0)); + } + } + + if (num < 10) + { + num = 0; + foreach (var ch in yearCJKStr) + { + num *= 10; + + er = this.config.IntegerExtractor.Extract(ch.ToString(CultureInfo.InvariantCulture)); + + if (er.Count != 0) + { + if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) + { + num += Convert.ToInt32((double)(this.config.NumberParser.Parse(er[0]).Value ?? 0)); + } + } + } + } + + year = num; + + return year < 10 ? -1 : year; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParserConfiguration.cs new file mode 100644 index 0000000000..b953c19bbc --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParserConfiguration.cs @@ -0,0 +1,93 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; + +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public abstract class BaseCJKDateParserConfiguration : BaseDateTimeOptionsConfiguration, ICJKCommonDateTimeParserConfiguration + { + protected BaseCJKDateParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + } + + public virtual IExtractor CardinalExtractor { get; protected set; } + + public virtual IExtractor IntegerExtractor { get; protected set; } + + public virtual IExtractor OrdinalExtractor { get; protected set; } + + public virtual IParser NumberParser { get; protected set; } + + public virtual IDateTimeExtractor DateExtractor { get; protected set; } + + public virtual IDateTimeExtractor TimeExtractor { get; protected set; } + + public virtual IDateTimeExtractor DateTimeExtractor { get; protected set; } + + public virtual IDateTimeExtractor DurationExtractor { get; protected set; } + + public virtual IDateTimeExtractor DatePeriodExtractor { get; protected set; } + + public virtual IDateTimeExtractor TimePeriodExtractor { get; protected set; } + + public virtual IDateTimeExtractor DateTimePeriodExtractor { get; protected set; } + + public virtual IDateTimeExtractor SetExtractor { get; protected set; } + + public virtual IDateTimeExtractor HolidayExtractor { get; protected set; } + + public virtual IDateTimeParser DateParser { get; protected set; } + + public virtual IDateTimeParser TimeParser { get; protected set; } + + public virtual IDateTimeParser DateTimeParser { get; protected set; } + + public virtual IDateTimeParser DurationParser { get; protected set; } + + public virtual IDateTimeParser DatePeriodParser { get; protected set; } + + public virtual IDateTimeParser TimePeriodParser { get; protected set; } + + public virtual IDateTimeParser DateTimePeriodParser { get; protected set; } + + public virtual IDateTimeParser SetParser { get; protected set; } + + public virtual IDateTimeParser HolidayParser { get; protected set; } + + public virtual IDateTimeParser DateTimeAltParser { get; protected set; } + + public virtual IDateTimeParser TimeZoneParser { get; protected set; } + + public virtual IImmutableDictionary MonthOfYear { get; protected set; } + + public virtual IImmutableDictionary Numbers { get; protected set; } + + public virtual IImmutableDictionary DoubleNumbers { get; protected set; } + + public virtual IImmutableDictionary UnitValueMap { get; protected set; } + + public virtual IImmutableDictionary SeasonMap { get; protected set; } + + public virtual IImmutableDictionary SpecialYearPrefixesMap { get; protected set; } + + public virtual IImmutableDictionary UnitMap { get; protected set; } + + public virtual IImmutableDictionary CardinalMap { get; protected set; } + + public virtual IImmutableDictionary DayOfWeek { get; protected set; } + + public virtual IImmutableDictionary DayOfMonth { get; protected set; } + + public virtual IImmutableDictionary WrittenDecades { get; protected set; } + + public virtual IImmutableDictionary SpecialDecadeCases { get; protected set; } + + public virtual IDateTimeUtilityConfiguration UtilityConfiguration { get; protected set; } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDatePeriodParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDatePeriodParser.cs new file mode 100644 index 0000000000..cc2544d471 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDatePeriodParser.cs @@ -0,0 +1,3102 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.Chinese; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKDatePeriodParser : IDateTimeParser + { + public static readonly string ParserName = Constants.SYS_DATETIME_DATEPERIOD; // "DatePeriod"; + + private static bool inclusiveEndPeriod = false; + + private static readonly Calendar Cal = DateTimeFormatInfo.InvariantInfo.Calendar; + + private readonly ICJKDatePeriodParserConfiguration config; + + public BaseCJKDatePeriodParser(ICJKDatePeriodParserConfiguration configuration) + { + config = configuration; + } + + // @TODO Refactor code to remove the cycle between BaseDatePeriodParser and its config. + public DateContext GetYearContext(string startDateStr, string endDateStr, string text) + { + var isEndDatePureYear = false; + var isDateRelative = false; + int contextYear = Constants.InvalidYear; + + var yearMatchForEndDate = this.config.YearRegex.Match(endDateStr); + + if (yearMatchForEndDate.Success && yearMatchForEndDate.Length == endDateStr.Length) + { + isEndDatePureYear = true; + } + + var relativeMatchForStartDate = this.config.RelativeRegex.Match(startDateStr); + var relativeMatchForEndDate = this.config.RelativeRegex.Match(endDateStr); + isDateRelative = relativeMatchForStartDate.Success || relativeMatchForEndDate.Success; + + if (!isEndDatePureYear && !isDateRelative) + { + foreach (Match match in this.config.YearRegex.Matches(text)) + { + var year = GetYearFromText(match); + + if (year != Constants.InvalidYear) + { + if (contextYear == Constants.InvalidYear) + { + contextYear = year; + } + else + { + // This indicates that the text has two different year value, no common context year + if (contextYear != year) + { + contextYear = Constants.InvalidYear; + break; + } + } + } + } + } + + return new DateContext() { Year = contextYear }; + } + + public ParseResult Parse(ExtractResult extResult) + { + return this.Parse(extResult, DateObject.Now); + } + + public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) + { + var referenceDate = refDate; + + object value = null; + + if (er.Type.Equals(ParserName, StringComparison.Ordinal)) + { + var innerResult = ParseBaseDatePeriod(er.Text, refDate); + + if (!innerResult.Success) + { + innerResult = ParseComplexDatePeriod(er.Text, refDate); + } + + if (innerResult.Success) + { + if (innerResult.Mod == Constants.BEFORE_MOD) + { + innerResult.FutureResolution = new Dictionary + { + { + TimeTypeConstants.END_DATE, + DateTimeFormatUtil.FormatDate((DateObject)innerResult.FutureValue) + }, + }; + + innerResult.PastResolution = new Dictionary + { + { + TimeTypeConstants.END_DATE, + DateTimeFormatUtil.FormatDate((DateObject)innerResult.PastValue) + }, + }; + } + else if (innerResult.Mod == Constants.AFTER_MOD) + { + innerResult.FutureResolution = new Dictionary + { + { + TimeTypeConstants.START_DATE, + DateTimeFormatUtil.FormatDate((DateObject)innerResult.FutureValue) + }, + }; + + innerResult.PastResolution = new Dictionary + { + { + TimeTypeConstants.START_DATE, + DateTimeFormatUtil.FormatDate((DateObject)innerResult.PastValue) + }, + }; + } + else if (innerResult.FutureValue != null && innerResult.PastValue != null) + { + innerResult.FutureResolution = new Dictionary + { + { + TimeTypeConstants.START_DATE, + DateTimeFormatUtil.FormatDate(((Tuple)innerResult.FutureValue).Item1) + }, + { + TimeTypeConstants.END_DATE, + DateTimeFormatUtil.FormatDate(((Tuple)innerResult.FutureValue).Item2) + }, + }; + + innerResult.PastResolution = new Dictionary + { + { + TimeTypeConstants.START_DATE, + DateTimeFormatUtil.FormatDate(((Tuple)innerResult.PastValue).Item1) + }, + { + TimeTypeConstants.END_DATE, + DateTimeFormatUtil.FormatDate(((Tuple)innerResult.PastValue).Item2) + }, + }; + } + else + { + innerResult.PastResolution = innerResult.FutureResolution = new Dictionary(); + } + + value = innerResult; + } + } + + var ret = new DateTimeParseResult + { + Text = er.Text, + Start = er.Start, + Length = er.Length, + Type = er.Type, + Data = er.Data, + Value = value, + TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, + ResolutionStr = string.Empty, + }; + + return ret; + } + + public List FilterResults(string query, List candidateResults) + { + return candidateResults; + } + + // @TODO use the method defined in AbstractYearExtractor + public int GetYearFromText(Match match) + { + int year = Constants.InvalidYear; + + var yearStr = match.Groups[Constants.YearGroupName].Value; + var writtenYearStr = match.Groups[Constants.FullYearGroupName].Value; + + if (!string.IsNullOrEmpty(yearStr) && !yearStr.Equals(writtenYearStr, StringComparison.Ordinal)) + { + year = ConvertCJKToInteger(yearStr); + if (year < 100 && year >= Constants.MinTwoDigitYearPastNum) + { + year += Constants.BASE_YEAR_PAST_CENTURY; + } + else if (year >= 0 && year < Constants.MaxTwoDigitYearFutureNum) + { + year += Constants.BASE_YEAR_CURRENT_CENTURY; + } + } + else + { + var firstTwoYearNumStr = match.Groups[Constants.FirstTwoYearGroupName].Value; + if (!string.IsNullOrEmpty(firstTwoYearNumStr)) + { + var er = new ExtractResult + { + Text = firstTwoYearNumStr, + Start = match.Groups[Constants.FirstTwoYearGroupName].Index, + Length = match.Groups[Constants.FirstTwoYearGroupName].Length, + }; + + var firstTwoYearNum = Convert.ToInt32((double)(this.config.NumberParser.Parse(er).Value ?? 0)); + + var lastTwoYearNum = 0; + var lastTwoYearNumStr = match.Groups[Constants.LastTwoYearGroupName].Value; + if (!string.IsNullOrEmpty(lastTwoYearNumStr)) + { + er.Text = lastTwoYearNumStr; + er.Start = match.Groups[Constants.LastTwoYearGroupName].Index; + er.Length = match.Groups[Constants.LastTwoYearGroupName].Length; + + lastTwoYearNum = Convert.ToInt32((double)(this.config.NumberParser.Parse(er).Value ?? 0)); + } + + // Exclude pure number like "nineteen", "twenty four" + if ((firstTwoYearNum < 100 && lastTwoYearNum == 0) || + (firstTwoYearNum < 100 && firstTwoYearNum % 10 == 0 && lastTwoYearNumStr.Trim().Split(' ').Length == 1)) + { + year = Constants.InvalidYear; + return year; + } + + if (firstTwoYearNum >= 100) + { + year = firstTwoYearNum + lastTwoYearNum; + } + else + { + year = (firstTwoYearNum * 100) + lastTwoYearNum; + } + } + else + { + + if (!string.IsNullOrEmpty(writtenYearStr)) + { + var er = new ExtractResult + { + Text = writtenYearStr, + Start = match.Groups[Constants.FullYearGroupName].Index, + Length = match.Groups[Constants.FullYearGroupName].Length, + }; + + year = Convert.ToInt32((double)(this.config.NumberParser.Parse(er).Value ?? 0)); + + if (year < 100 && year >= Constants.MinTwoDigitYearPastNum) + { + year += Constants.BASE_YEAR_PAST_CENTURY; + } + else if (year >= 0 && year < Constants.MaxTwoDigitYearFutureNum) + { + year += Constants.BASE_YEAR_CURRENT_CENTURY; + } + } + } + } + + return year; + } + + private static DateObject ShiftResolution(Tuple date, Match match, bool start) + { + DateObject result; + result = date.Item1; + + return result; + } + + // convert CJK Number to Integer + private int ConvertCJKToNum(string numStr) + { + var num = -1; + var er = this.config.IntegerExtractor.Extract(numStr); + if (er.Count != 0) + { + if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) + { + num = Convert.ToInt32((double)(this.config.NumberParser.Parse(er[0]).Value ?? 0)); + } + } + + return num; + } + + // convert CJK Year to Integer + private int ConvertCJKToInteger(string yearCJKStr) + { + var year = 0; + var num = 0; + + int dynastyYear = DateTimeFormatUtil.ParseDynastyYear(yearCJKStr, + this.config.DynastyYearRegex, + this.config.DynastyStartYear, + this.config.DynastyYearMap, + this.config.IntegerExtractor, + this.config.NumberParser); + if (dynastyYear > 0) + { + return dynastyYear; + } + + var er = this.config.IntegerExtractor.Extract(yearCJKStr); + if (er.Count != 0) + { + if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) + { + num = Convert.ToInt32((double)(this.config.NumberParser.Parse(er[0]).Value ?? 0)); + } + } + + if (num < 10) + { + num = 0; + foreach (var ch in yearCJKStr) + { + num *= 10; + + er = this.config.IntegerExtractor.Extract(ch.ToString(CultureInfo.InvariantCulture)); + + if (er.Count != 0) + { + if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) + { + num += Convert.ToInt32((double)(this.config.NumberParser.Parse(er[0]).Value ?? 0)); + } + } + } + + year = num; + } + else + { + year = num; + } + + return year == 0 ? -1 : year; + } + + private DateTimeResolutionResult ParseSingleTimePoint(string text, DateObject referenceDate, DateContext dateContext = null) + { + var ret = new DateTimeResolutionResult(); + var er = this.config.DateExtractor.Extract(text, referenceDate).FirstOrDefault(); + + if (er != null) + { + var pr = this.config.DateParser.Parse(er, referenceDate); + + if (pr != null) + { + ret.Timex = $"({pr.TimexStr}"; + ret.FutureValue = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; + ret.PastValue = (DateObject)((DateTimeResolutionResult)pr.Value).PastValue; + ret.Success = true; + } + } + + return ret; + } + + private DateTimeResolutionResult ParseSimpleCases(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + int year = referenceDate.Year, month = referenceDate.Month; + int beginDay, endDay; + var noYear = false; + var inputYear = false; + + var match = this.config.SimpleCasesRegex.MatchExact(text, trim: true); + string beginLuisStr, endLuisStr; + + if (!match.Success) + { + match = this.config.MonthDayRange.MatchExact(text, trim: true); + } + + if (match.Success) + { + var days = match.Groups[Constants.DayGroupName]; + beginDay = this.config.DayOfMonth[days.Captures[0].Value]; + endDay = this.config.DayOfMonth[days.Captures[1].Value]; + + var monthStr = match.Groups[Constants.MonthGroupName].Value; + var yearStr = match.Groups[Constants.YearGroupName].Value; + if (!string.IsNullOrEmpty(yearStr)) + { + year = ConvertCJKToInteger(yearStr); + if (year < 100 && year >= this.config.TwoNumYear) + { + year += Constants.BASE_YEAR_PAST_CENTURY; + } + else if (year < 100 && year < this.config.TwoNumYear) + { + year += Constants.BASE_YEAR_CURRENT_CENTURY; + } + + inputYear = true; + } + else + { + noYear = true; + } + + if (!string.IsNullOrEmpty(monthStr)) + { + month = this.config.ToMonthNumber(monthStr); + } + else + { + monthStr = match.Groups[Constants.RelMonthGroupName].Value.Trim(); + var thisMatch = this.config.ThisRegex.Match(monthStr); + var nextMatch = this.config.NextRegex.Match(monthStr); + var lastMatch = this.config.LastRegex.Match(monthStr); + + if (thisMatch.Success) + { + // do nothing + } + else if (nextMatch.Success) + { + if (month != 12) + { + month += 1; + } + else + { + month = 1; + year += 1; + } + } + else + { + if (month != 1) + { + month -= 1; + } + else + { + month = 12; + year -= 1; + } + } + } + + if (inputYear || this.config.ThisRegex.Match(monthStr).Success || + this.config.NextRegex.Match(monthStr).Success) + { + beginLuisStr = DateTimeFormatUtil.LuisDate(year, month, beginDay); + endLuisStr = DateTimeFormatUtil.LuisDate(year, month, endDay); + } + else + { + beginLuisStr = DateTimeFormatUtil.LuisDate(-1, month, beginDay); + endLuisStr = DateTimeFormatUtil.LuisDate(-1, month, endDay); + } + } + else + { + match = this.config.SpecialYearRegex.MatchExact(text, trim: true); + + if (match.Success) + { + var value = referenceDate.AddYears(this.config.GetSwiftYear(match.Value)); + ret.Timex = DateTimeFormatUtil.LuisDate(value); + ret.FutureValue = ret.PastValue = value; + ret.Success = true; + + return ret; + } + + return ret; + } + + var futurePastBeginDates = DateContext.GenerateDates(noYear, referenceDate, year, month, beginDay); + var futurePastEndDates = DateContext.GenerateDates(noYear, referenceDate, year, month, endDay); + + ret.Timex = $"({beginLuisStr},{endLuisStr},P{endDay - beginDay}D)"; + ret.FutureValue = new Tuple(futurePastBeginDates.future, futurePastEndDates.future); + ret.PastValue = new Tuple(futurePastBeginDates.past, futurePastEndDates.past); + ret.Success = true; + + return ret; + } + + // handle like "2016年到2017年", "2016年和2017年之间" + private DateTimeResolutionResult ParseYearToYear(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var match = this.config.YearToYear.Match(text); + + if (!match.Success) + { + match = this.config.YearToYearSuffixRequired.Match(text); + } + + if (match.Success) + { + var yearMatch = this.config.YearRegex.Matches(text); + var yearInCJKMatch = this.config.YearInCJKRegex.Matches(text); + var beginYear = 0; + var endYear = 0; + + if (yearMatch.Count == 2) + { + var yearFrom = yearMatch[0].Groups[Constants.YearGroupName].Value; + var yearTo = yearMatch[1].Groups[Constants.YearGroupName].Value; + beginYear = ConvertCJKToInteger(yearFrom); + endYear = ConvertCJKToInteger(yearTo); + } + else if (yearInCJKMatch.Count == 2) + { + var yearFrom = yearInCJKMatch[0].Groups[Constants.YearCJKGroupName].Value; + var yearTo = yearInCJKMatch[1].Groups[Constants.YearCJKGroupName].Value; + beginYear = ConvertCJKToInteger(yearFrom); + endYear = ConvertCJKToInteger(yearTo); + } + else if (yearInCJKMatch.Count == 1 && yearMatch.Count == 1) + { + if (yearMatch[0].Index < yearInCJKMatch[0].Index) + { + var yearFrom = yearMatch[0].Groups[Constants.YearGroupName].Value; + var yearTo = yearInCJKMatch[0].Groups[Constants.YearCJKGroupName].Value; + beginYear = int.Parse(yearFrom, CultureInfo.InvariantCulture); + endYear = ConvertCJKToInteger(yearTo); + } + else + { + var yearFrom = yearInCJKMatch[0].Groups[Constants.YearCJKGroupName].Value; + var yearTo = yearMatch[0].Groups[Constants.YearGroupName].Value; + beginYear = ConvertCJKToInteger(yearFrom); + endYear = int.Parse(yearTo, CultureInfo.InvariantCulture); + } + } + + if (beginYear < 100 && beginYear >= this.config.TwoNumYear) + { + beginYear += Constants.BASE_YEAR_PAST_CENTURY; + } + else if (beginYear < 100 && beginYear < this.config.TwoNumYear) + { + beginYear += Constants.BASE_YEAR_CURRENT_CENTURY; + } + + if (endYear < 100 && endYear >= this.config.TwoNumYear) + { + endYear += Constants.BASE_YEAR_PAST_CENTURY; + } + else if (endYear < 100 && endYear < this.config.TwoNumYear) + { + endYear += Constants.BASE_YEAR_CURRENT_CENTURY; + } + + var beginDay = DateObject.MinValue.SafeCreateFromValue(beginYear, 1, 1); + var endDay = DateObject.MinValue.SafeCreateFromValue(endYear, 1, 1); + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDay, endDay, DatePeriodTimexType.ByYear); + ret.FutureValue = ret.PastValue = new Tuple(beginDay, endDay); + ret.Success = true; + return ret; + } + + return ret; + } + + // handle like "3月到5月", "3月和5月之间" + private DateTimeResolutionResult ParseMonthToMonth(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var match = this.config.MonthToMonth.Match(text); + + if (!match.Success) + { + match = this.config.MonthToMonthSuffixRequired.Match(text); + } + + if (match.Success) + { + var monthMatch = this.config.MonthRegex.Matches(text); + var beginMonth = 0; + var endMonth = 0; + + if (monthMatch.Count == 2) + { + var monthFrom = monthMatch[0].Groups[Constants.MonthGroupName].Value; + var monthTo = monthMatch[1].Groups[Constants.MonthGroupName].Value; + beginMonth = this.config.ToMonthNumber(monthFrom); + endMonth = this.config.ToMonthNumber(monthTo); + } + else if (match.Groups[Constants.MonthFromGroupName].Success && match.Groups[Constants.MonthToGroupName].Success) + { + var monthFrom = match.Groups[Constants.MonthFromGroupName].Value; + var monthTo = match.Groups[Constants.MonthToGroupName].Value; + beginMonth = this.config.ToMonthNumber(monthFrom); + endMonth = this.config.ToMonthNumber(monthTo); + } + + var yearMatch = this.config.YearRegex.Matches(text); + var hasYear = false; + var beginYear = 0; + var endYear = 0; + if (yearMatch.Count > 0 && match.Groups[Constants.YearGroupName].Success) + { + hasYear = true; + if (yearMatch.Count == 2) + { + var yearFrom = yearMatch[0].Groups[Constants.YearGroupName].Value; + var yearTo = yearMatch[1].Groups[Constants.YearGroupName].Value; + beginYear = ParseNumYear(yearFrom); + endYear = ParseNumYear(yearTo); + } + else + { + var year = yearMatch[0].Groups[Constants.YearGroupName].Value; + beginYear = endYear = ParseNumYear(year); + } + } + else + { + beginYear = endYear = referenceDate.Year; + } + + var currentYear = referenceDate.Year; + var currentMonth = referenceDate.Month; + var beginYearForPastResolution = beginYear; + var endYearForPastResolution = endYear; + var beginYearForFutureResolution = beginYear; + var endYearForFutureResolution = endYear; + var durationMonths = 0; + + if (hasYear) + { + var diffmoths = endMonth - beginMonth; + var diffyear = endYear - beginYear; + durationMonths = (diffyear * 12) + diffmoths; + } + else + { + if (beginMonth < endMonth) + { + // For this case, FutureValue and PastValue share the same resolution + if (beginMonth < currentMonth && endMonth >= currentMonth) + { + // Keep the beginYear and endYear equal to currentYear + } + else if (beginMonth >= currentMonth) + { + beginYearForPastResolution = endYearForPastResolution = currentYear - 1; + } + else if (endMonth < currentMonth) + { + beginYearForFutureResolution = endYearForFutureResolution = currentYear + 1; + } + + durationMonths = endMonth - beginMonth; + } + else if (beginMonth > endMonth) + { + // For this case, FutureValue and PastValue share the same resolution + if (beginMonth < currentMonth) + { + endYearForPastResolution = endYearForFutureResolution = currentYear + 1; + } + else + { + beginYearForPastResolution = currentYear - 1; + endYearForFutureResolution = currentYear + 1; + } + + durationMonths = beginMonth - endMonth; + } + } + + if (durationMonths != 0) + { + var beginDateForPastResolution = DateObject.MinValue.SafeCreateFromValue(beginYearForPastResolution, beginMonth, 1); + var endDateForPastResolution = DateObject.MinValue.SafeCreateFromValue(endYearForPastResolution, endMonth, 1); + var beginDateForFutureResolution = DateObject.MinValue.SafeCreateFromValue(beginYearForFutureResolution, beginMonth, 1); + var endDateForFutureResolution = DateObject.MinValue.SafeCreateFromValue(endYearForFutureResolution, endMonth, 1); + + /*var beginTimex = hasYear || beginYearForPastResolution == endYearForFutureResolution ? DateTimeFormatUtil.LuisDate(beginDateForPastResolution, beginDateForFutureResolution) : + DateTimeFormatUtil.LuisDate(-1, beginMonth, 1); + var endTimex = hasYear || beginYearForPastResolution == endYearForFutureResolution ? DateTimeFormatUtil.LuisDate(endDateForPastResolution, endDateForFutureResolution) : + DateTimeFormatUtil.LuisDate(-1, endMonth, 1);*/ + var dayMatch = this.config.DayRegexForPeriod.Matches(text); + + // handle cases like 2019年2月1日から3月まで + if (dayMatch.Count > 0 && match.Groups[Constants.DayGroupName].Success) + { + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDateForFutureResolution, endDateForFutureResolution, DatePeriodTimexType.ByDay, beginDateForPastResolution, endDateForPastResolution, hasYear); + } + + // If the year is not specified, the combined range timex will use fuzzy years. + else + { + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDateForFutureResolution, endDateForFutureResolution, DatePeriodTimexType.ByMonth, beginDateForPastResolution, endDateForPastResolution, hasYear); + } + + ret.PastValue = new Tuple(beginDateForPastResolution, endDateForPastResolution); + ret.FutureValue = new Tuple(beginDateForFutureResolution, endDateForFutureResolution); + ret.Success = true; + } + } + + return ret; + } + + private int ParseNumYear(string yearNum) + { + int year = int.Parse(yearNum, CultureInfo.InvariantCulture); + + if (year < 100 && year >= this.config.TwoNumYear) + { + year += Constants.BASE_YEAR_PAST_CENTURY; + } + else if (year < 100 && year < this.config.TwoNumYear) + { + year += Constants.BASE_YEAR_CURRENT_CENTURY; + } + + return year; + } + + private DateTimeResolutionResult ParseDayToDay(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var match = this.config.DayToDay.Match(text); + + if (match.Success) + { + var dayMatchMatch = this.config.DayRegexForPeriod.Matches(text); + var beginDay = 0; + var endDay = 0; + + if (dayMatchMatch.Count == 2) + { + var dayFrom = dayMatchMatch[0].Groups[Constants.DayGroupName].Value; + var dayTo = dayMatchMatch[1].Groups[Constants.DayGroupName].Value; + beginDay = this.config.DayOfMonth[dayFrom]; + endDay = this.config.DayOfMonth[dayTo]; + } + else if (match.Groups[Constants.HalfGroupName].Success) + { + var er = this.config.DurationExtractor.Extract(match.Groups[Constants.HalfGroupName].Value, referenceDate); + var pr = this.config.DurationParser.Parse(er[0], referenceDate); + + float number = TimexUtility.ParseNumberFromDurationTimex(pr.TimexStr); + + DateObject beginDay1 = referenceDate; + DateObject endDay1 = referenceDate.AddDays(number); + + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDay1, endDay1, DatePeriodTimexType.ByDay); + ret.PastValue = ret.FutureValue = new Tuple(beginDay1, endDay1); + ret.Success = true; + return ret; + + } + + var beginYearForPastResolution = referenceDate.Year; + var endYearForPastResolution = referenceDate.Year; + var beginYearForFutureResolution = referenceDate.Year; + var endYearForFutureResolution = referenceDate.Year; + var currentMonth = referenceDate.Month; + var currentDay = referenceDate.Day; + var durationDays = 0; + + var relativeMonth = this.config.RelativeMonthRegex.Match(text); + currentMonth += this.config.GetSwiftMonth(relativeMonth.Value); + + var beginMonthForPastResolution = currentMonth; + var endMonthForPastResolution = currentMonth; + var beginMonthForFutureResolution = currentMonth; + var endMonthForFutureResolution = currentMonth; + + if (beginDay < endDay) + { + // For this case, FutureValue and PastValue share the same resolution + if (beginDay < currentDay && endDay >= currentDay) + { + // Keep the beginMonth and endMonth equal to currentMonth + } + else if (beginDay >= currentDay) + { + if (currentMonth == 1) + { + beginMonthForPastResolution = endMonthForPastResolution = Constants.MaxMonth; + beginYearForPastResolution--; + endYearForPastResolution--; + } + else + { + beginMonthForPastResolution = endMonthForPastResolution = currentMonth - 1; + } + } + else if (endDay < currentDay) + { + if (currentMonth == Constants.MaxMonth) + { + beginMonthForFutureResolution = endMonthForFutureResolution = 1; + beginYearForFutureResolution++; + endYearForFutureResolution++; + } + else + { + beginMonthForFutureResolution = endMonthForFutureResolution = currentMonth + 1; + } + } + + durationDays = endDay - beginDay; + } + else if (beginDay > endDay) + { + // For this case, FutureValue and PastValue share the same resolution + if (beginDay < currentDay) + { + if (currentMonth == Constants.MaxMonth) + { + endMonthForPastResolution = endMonthForFutureResolution = 1; + endYearForPastResolution++; + endYearForFutureResolution++; + } + else + { + endMonthForPastResolution = endMonthForFutureResolution = currentMonth + 1; + } + } + else + { + if (currentMonth == Constants.MaxMonth) + { + beginMonthForPastResolution = currentMonth - 1; + endMonthForFutureResolution = 1; + endYearForFutureResolution++; + } + else if (currentMonth == 1) + { + beginMonthForPastResolution = 12; + beginYearForPastResolution--; + endMonthForFutureResolution = currentMonth + 1; + } + else + { + beginMonthForPastResolution = currentMonth - 1; + endMonthForFutureResolution = currentMonth + 1; + } + } + + durationDays = beginDay - endDay; + } + + if (durationDays != 0) + { + var beginDateForPastResolution = DateObject.MinValue.SafeCreateFromValue(beginYearForPastResolution, beginMonthForPastResolution, beginDay); + var endDateForPastResolution = DateObject.MinValue.SafeCreateFromValue(endYearForPastResolution, endMonthForPastResolution, endDay); + var beginDateForFutureResolution = DateObject.MinValue.SafeCreateFromValue(beginYearForFutureResolution, beginMonthForFutureResolution, beginDay); + var endDateForFutureResolution = DateObject.MinValue.SafeCreateFromValue(endYearForFutureResolution, endMonthForFutureResolution, endDay); + + ret.Timex = relativeMonth.Success ? TimexUtility.GenerateDatePeriodTimex(beginDateForFutureResolution, endDateForFutureResolution, DatePeriodTimexType.ByDay) : + TimexUtility.GenerateDatePeriodTimex(beginDateForFutureResolution, endDateForFutureResolution, DatePeriodTimexType.ByDay, UnspecificDateTimeTerms.NonspecificYear | UnspecificDateTimeTerms.NonspecificMonth); + + ret.PastValue = new Tuple(beginDateForPastResolution, endDateForPastResolution); + ret.FutureValue = new Tuple(beginDateForFutureResolution, endDateForFutureResolution); + ret.Success = true; + } + } + + return ret; + } + + // for case "2016年5月" + private DateTimeResolutionResult ParseYearAndMonth(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var match = this.config.YearAndMonth.MatchExact(text, trim: true); + + if (!match.Success) + { + match = this.config.PureNumYearAndMonth.MatchExact(text, trim: true); + } + + if (!match.Success) + { + return ret; + } + + // parse year + var year = referenceDate.Year; + var yearNum = match.Groups[Constants.YearGroupName].Value; + var yearCJK = match.Groups[Constants.YearCJKGroupName].Value; + var yearRel = match.Groups[Constants.YearRelGroupName].Value; + var cardinalStr = match.Groups[Constants.CardinalGroupName].Value; + if (!string.IsNullOrEmpty(yearNum)) + { + if (this.config.IsYearOnly(yearNum)) + { + yearNum = yearNum.Substring(0, yearNum.Length - 1); + } + + year = ConvertCJKToInteger(yearNum); + } + else if (!string.IsNullOrEmpty(yearCJK)) + { + if (this.config.IsYearOnly(yearCJK)) + { + yearCJK = yearCJK.Substring(0, yearCJK.Length - 1); + } + + year = ConvertCJKToInteger(yearCJK); + } + else if (!string.IsNullOrEmpty(yearRel)) + { + if (this.config.IsLastYear(yearRel)) + { + year--; + } + else if (this.config.IsNextYear(yearRel)) + { + year++; + } + } + + if (year < 100 && year >= this.config.TwoNumYear) + { + year += Constants.BASE_YEAR_PAST_CENTURY; + } + else if (year < this.config.TwoNumYear) + { + year += Constants.BASE_YEAR_CURRENT_CENTURY; + } + + var monthStr = match.Groups[Constants.MonthGroupName].Value; + var month = match.Groups[Constants.MonthGroupName].Success ? this.config.ToMonthNumber(monthStr) : + config.WoMLastRegex.IsExactMatch(cardinalStr, trim: true) ? 12 : this.config.CardinalMap[cardinalStr]; + var beginDay = DateObject.MinValue.SafeCreateFromValue(year, month, 1); + DateObject endDay; + + if (month == 12) + { + endDay = DateObject.MinValue.SafeCreateFromValue(year + 1, 1, 1); + } + else + { + endDay = DateObject.MinValue.SafeCreateFromValue(year, month + 1, 1); + } + + ret.Timex = DateTimeFormatUtil.LuisDate(year, month); + ret.FutureValue = ret.PastValue = new Tuple(beginDay, endDay); + ret.Success = true; + return ret; + } + + // case like "今年三月" "这个周末" "五月" + private DateTimeResolutionResult ParseOneWordPeriod(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + int year = referenceDate.Year, month = referenceDate.Month; + int futureYear = year, pastYear = year; + + var isReferenceDatePeriod = false; + + var trimmedText = text.Trim(); + var match = this.config.OneWordPeriodRegex.MatchExact(trimmedText, trim: true); + + // For cases "that week|month|year" + if (!match.Success) + { + match = this.config.ReferenceDatePeriodRegex.MatchExact(trimmedText, trim: true); + if (match.Success) + { + isReferenceDatePeriod = true; + ret.Mod = Constants.REF_UNDEF_MOD; + } + } + + if (!match.Success) + { + match = this.config.LaterEarlyPeriodRegex.MatchExact(trimmedText, trim: true); + + if (match.Success) + { + return ParseLaterEarlyPeriod(text, referenceDate); + } + } + + if (match.Success) + { + var monthStr = match.Groups[Constants.MonthGroupName].Value; + if (this.config.IsThisYear(trimmedText)) + { + ret.Timex = TimexUtility.GenerateYearTimex(referenceDate); + ret.FutureValue = + ret.PastValue = + new Tuple(DateObject.MinValue.SafeCreateFromValue(referenceDate.Year, 1, 1), referenceDate); + ret.Success = true; + return ret; + } + + if (this.config.IsYearToDate(trimmedText)) + { + ret.Timex = TimexUtility.GenerateYearTimex(referenceDate.Year); + ret.FutureValue = + ret.PastValue = + new Tuple(DateObject.MinValue.SafeCreateFromValue(referenceDate.Year, 1, 1), referenceDate); + ret.Success = true; + return ret; + } + + // In Chinese, "下" means next, "下下周" means next next week, "下下周末" means next next weekend, need to check whether the text match "下下" + // "上" means last, "上上周" means last last week, "上上周末" means last last weekend, need to check whether the text match "上上" + ChineseDatePeriodParserConfiguration config = this.config as ChineseDatePeriodParserConfiguration; + bool nextNextMatch = config == null ? false : config.NextNextRegex.Match(trimmedText).Success; + bool lastlastMatch = config == null ? false : config.LastLastRegex.Match(trimmedText).Success; + + var nextMatch = this.config.NextRegex.Match(trimmedText); + var lastMatch = this.config.LastRegex.Match(trimmedText); + + if (!string.IsNullOrEmpty(monthStr)) + { + var swift = -10; + + var yearRel = match.Groups[Constants.YearRelGroupName].Value; + + if (!string.IsNullOrEmpty(yearRel)) + { + if (this.config.IsNextYear(yearRel)) + { + swift = 1; + } + else if (this.config.IsLastYear(yearRel)) + { + swift = -1; + } + else if (this.config.IsThisYear(yearRel)) + { + swift = 0; + } + } + + month = this.config.ToMonthNumber(monthStr); + + if (swift >= -1) + { + year += swift; + ret.Timex = DateTimeFormatUtil.LuisDate(year, month); + futureYear = pastYear = year; + } + else + { + ret.Timex = DateTimeFormatUtil.LuisDate(Constants.InvalidYear, month); + if (month < referenceDate.Month) + { + futureYear++; + } + + if (month >= referenceDate.Month) + { + pastYear--; + } + } + } + else + { + var swift = 0; + if (nextNextMatch) + { + // If it is Chinese "下下周" (next next week), "下下周末" (next next weekend), then swift is 2 + swift = 2; + } + else if (lastlastMatch) + { + // If it is Chinese "上上周" (last last week), "上上周末" (last last weekend), then swift is -2 + swift = -2; + } + else if (nextMatch.Success) + { + if (nextMatch.Groups[Constants.AfterGroupName].Success) + { + swift = 2; + } + else + { + swift = 1; + } + } + else if (lastMatch.Success) + { + swift = -1; + } + + // Handle cases with "(上|下)半" like "上半月"、 "下半年" + if (!string.IsNullOrEmpty(match.Groups[Constants.HalfTagGroupName].Value)) + { + return HandleWithHalfTag(trimmedText, referenceDate, ret, swift); + } + + if (this.config.IsWeekOnly(trimmedText)) + { + var monday = referenceDate.This(DayOfWeek.Monday).AddDays(7 * swift); + ret.Timex = isReferenceDatePeriod ? TimexUtility.GenerateWeekTimex() : TimexUtility.GenerateWeekTimex(monday); + ret.FutureValue = + ret.PastValue = + new Tuple( + referenceDate.This(DayOfWeek.Monday).AddDays(7 * swift), + referenceDate.This(DayOfWeek.Sunday).AddDays(7 * swift).AddDays(1)); + ret.Success = true; + return ret; + } + else if (!string.IsNullOrEmpty(match.Groups[Constants.RestOfGroupName].Value)) + { + var durationStr = match.Groups[Constants.DurationGroupName].Value; + var durationUnit = this.config.UnitMap[durationStr]; + DateObject beginDate; + DateObject endDate = beginDate = referenceDate; + + ret.Timex = TimexUtility.GenerateDatePeriodTimexWithDiff(beginDate, ref endDate, durationUnit); + ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); + ret.Success = true; + return ret; + } + + if (this.config.IsWeekend(trimmedText)) + { + var beginDate = referenceDate.This(DayOfWeek.Saturday).AddDays(7 * swift); + var endDate = referenceDate.This(DayOfWeek.Sunday).AddDays(7 * swift); + match = this.config.ReferenceDatePeriodRegex.MatchExact(trimmedText, trim: true); + + if (match.Success) + { + isReferenceDatePeriod = true; + ret.Mod = Constants.REF_UNDEF_MOD; + } + + ret.Timex = isReferenceDatePeriod ? TimexUtility.GenerateWeekendTimex() : TimexUtility.GenerateWeekendTimex(beginDate); + ret.FutureValue = + ret.PastValue = new Tuple(beginDate, endDate.AddDays(1)); + ret.Success = true; + return ret; + } + + if (this.config.IsMonthOnly(trimmedText)) + { + month = referenceDate.AddMonths(swift).Month; + year = referenceDate.AddMonths(swift).Year; + ret.Timex = DateTimeFormatUtil.LuisDate(year, month); + ret.Timex = isReferenceDatePeriod ? TimexUtility.GenerateMonthTimex() : DateTimeFormatUtil.LuisDate(year, month); + futureYear = pastYear = year; + } + else if (this.config.IsYearOnly(trimmedText)) + { + // Handle like "今年上半年","明年下半年" + swift = 0; + trimmedText = HandleWithHalfYear(match, trimmedText, out bool hasHalf, out bool isFirstHalf); + swift = hasHalf ? 0 : swift; + + year = referenceDate.AddYears(swift).Year; + if (this.config.IsLastYear(trimmedText)) + { + year--; + } + else if (this.config.IsNextYear(trimmedText)) + { + year++; + } + else if (this.config.IsYearBeforeLast(trimmedText)) + { + year -= 2; + } + else if (this.config.IsYearAfterNext(trimmedText)) + { + year += 2; + } + + return HandleYearResult(ret, hasHalf, isFirstHalf, isReferenceDatePeriod, year); + } + } + } + else + { + return ret; + } + + // only "month" will come to here + ret.FutureValue = new Tuple( + DateObject.MinValue.SafeCreateFromValue(futureYear, month, 1), + DateObject.MinValue.SafeCreateFromValue(futureYear, month, 1).AddMonths(1)); + + ret.PastValue = new Tuple( + DateObject.MinValue.SafeCreateFromValue(pastYear, month, 1), + DateObject.MinValue.SafeCreateFromValue(pastYear, month, 1).AddMonths(1)); + + ret.Success = true; + + return ret; + } + + private DateTimeResolutionResult ParseLaterEarlyPeriod(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + int year = referenceDate.Year, month = referenceDate.Month; + int futureYear = year, pastYear = year; + var earlyPrefix = false; + var latePrefix = false; + var midPrefix = false; + var earlierPrefix = false; + var laterPrefix = false; + var isReferenceDatePeriod = false; + + var trimmedText = text.Trim(); + + var match = this.config.LaterEarlyPeriodRegex.MatchExact(trimmedText, trim: true); + + if (match.Success) + { + if (match.Groups[Constants.EarlyPrefixGroupName].Success) + { + earlyPrefix = true; + trimmedText = match.Groups[Constants.SuffixGroupName].ToString(); + ret.Mod = Constants.EARLY_MOD; + } + else if (match.Groups[Constants.LatePrefixGroupName].Success) + { + latePrefix = true; + trimmedText = match.Groups[Constants.SuffixGroupName].ToString(); + ret.Mod = Constants.LATE_MOD; + } + else if (match.Groups[Constants.MidPrefixGroupName].Success) + { + midPrefix = true; + trimmedText = match.Groups[Constants.SuffixGroupName].ToString(); + ret.Mod = Constants.MID_MOD; + } + + var swift = 0; + if (!string.IsNullOrEmpty(match.Groups[Constants.MonthGroupName].Value)) + { + swift = this.config.GetSwiftYear(trimmedText); + } + else + { + if (match.Groups[Constants.NextGroupName].Success) + { + swift = 1; + } + else if (match.Groups[Constants.LastGroupName].Success) + { + swift = -1; + } + } + + if (match.Groups[Constants.RelEarlyGroupName].Success) + { + earlierPrefix = true; + if (BaseDatePeriodParser.IsPresent(swift)) + { + ret.Mod = null; + } + } + else if (match.Groups[Constants.RelLateGroupName].Success) + { + laterPrefix = true; + if (BaseDatePeriodParser.IsPresent(swift)) + { + ret.Mod = null; + } + } + + var monthStr = match.Groups[Constants.MonthGroupName].Value; + + // Parse expressions "till date", "to date" + if (match.Groups[Constants.ToDateGroupName].Success) + { + ret.Timex = "PRESENT_REF"; + ret.FutureValue = ret.PastValue = referenceDate; + ret.Mod = Constants.BEFORE_MOD; + ret.Success = true; + return ret; + } + + if (!string.IsNullOrEmpty(monthStr)) + { + swift = this.config.GetSwiftYear(trimmedText); + + month = this.config.MonthOfYear[monthStr]; + + if (swift >= -1) + { + ret.Timex = (referenceDate.Year + swift).ToString("D4", CultureInfo.InvariantCulture) + "-" + month.ToString("D2", CultureInfo.InvariantCulture); + year = year + swift; + futureYear = pastYear = year; + } + else + { + ret.Timex = "XXXX-" + month.ToString("D2", CultureInfo.InvariantCulture); + if (month < referenceDate.Month) + { + futureYear++; + } + + if (month >= referenceDate.Month) + { + pastYear--; + } + } + } + else + { + if (match.Groups[Constants.NextGroupName].Success) + { + swift = 1; + } + else if (match.Groups[Constants.LastGroupName].Success) + { + swift = -1; + } + + var isWorkingWeek = match.Groups[Constants.BusinessDayGroupName].Success; + + var isWeekOnly = this.config.IsWeekOnly(trimmedText); + + if (isWorkingWeek || isWeekOnly) + { + var delta = Constants.WeekDayCount * swift; + var endDelta = delta; + + var monday = referenceDate.This(DayOfWeek.Monday).AddDays(delta); + var endDay = isWorkingWeek ? DayOfWeek.Friday : DayOfWeek.Sunday; + + var beginDate = referenceDate.This(DayOfWeek.Monday).AddDays(delta); + var endDate = inclusiveEndPeriod ? + referenceDate.This(endDay).AddDays(endDelta) : + referenceDate.This(endDay).AddDays(endDelta).AddDays(1); + + if (earlyPrefix) + { + endDate = inclusiveEndPeriod ? + referenceDate.This(DayOfWeek.Wednesday).AddDays(endDelta) : + referenceDate.This(DayOfWeek.Wednesday).AddDays(endDelta).AddDays(1); + } + else if (midPrefix) + { + beginDate = referenceDate.This(DayOfWeek.Tuesday).AddDays(delta); + endDate = inclusiveEndPeriod ? + referenceDate.This(DayOfWeek.Friday).AddDays(endDelta) : + referenceDate.This(DayOfWeek.Friday).AddDays(endDelta).AddDays(1); + } + else if (latePrefix) + { + beginDate = referenceDate.This(DayOfWeek.Thursday).AddDays(delta); + } + + if (earlierPrefix && swift == 0) + { + if (endDate > referenceDate) + { + endDate = referenceDate; + } + } + else if (laterPrefix && swift == 0) + { + if (beginDate < referenceDate) + { + beginDate = referenceDate; + } + } + + ret.Timex = isReferenceDatePeriod ? TimexUtility.GenerateWeekTimex() : TimexUtility.GenerateWeekTimex(monday); + + if (latePrefix && swift != 0) + { + ret.Mod = Constants.LATE_MOD; + } + + ret.FutureValue = + ret.PastValue = + new Tuple(beginDate, endDate); + + ret.Success = true; + + return ret; + } + + if (this.config.IsWeekend(trimmedText)) + { + var beginDate = referenceDate.This(DayOfWeek.Saturday).AddDays(Constants.WeekDayCount * swift); + var endDate = referenceDate.This(DayOfWeek.Sunday).AddDays(Constants.WeekDayCount * swift); + ret.Timex = isReferenceDatePeriod ? TimexUtility.GenerateWeekendTimex() : TimexUtility.GenerateWeekendTimex(beginDate); + endDate = inclusiveEndPeriod ? endDate : endDate.AddDays(1); + ret.FutureValue = + ret.PastValue = new Tuple(beginDate, endDate); + ret.Success = true; + return ret; + } + + if (this.config.IsMonthOnly(trimmedText)) + { + var date = referenceDate.AddMonths(swift); + month = date.Month; + year = date.Year; + ret.Timex = isReferenceDatePeriod ? TimexUtility.GenerateMonthTimex() : TimexUtility.GenerateMonthTimex(date); + futureYear = pastYear = year; + } + else if (this.config.IsYearOnly(trimmedText)) + { + var date = referenceDate.AddYears(swift); + year = date.Year; + + if (!string.IsNullOrEmpty(match.Groups[Constants.SpecialGroupName].Value)) + { + swift = this.config.GetSwiftYear(trimmedText); + date = swift < -1 ? Constants.InvalidDate : date; + ret.Timex = TimexUtility.GenerateYearTimex(date, null); + ret.Success = true; + return ret; + } + + var beginDate = DateObject.MinValue.SafeCreateFromValue(year, 1, 1); + var endDate = inclusiveEndPeriod ? + DateObject.MinValue.SafeCreateFromValue(year, 12, 31) : + DateObject.MinValue.SafeCreateFromValue(year, 12, 31).AddDays(1); + + if (earlyPrefix) + { + endDate = inclusiveEndPeriod ? + DateObject.MinValue.SafeCreateFromValue(year, 6, 30) : + DateObject.MinValue.SafeCreateFromValue(year, 6, 30).AddDays(1); + } + else if (midPrefix) + { + beginDate = DateObject.MinValue.SafeCreateFromValue(year, 4, 1); + endDate = inclusiveEndPeriod ? + DateObject.MinValue.SafeCreateFromValue(year, 9, 30) : + DateObject.MinValue.SafeCreateFromValue(year, 9, 30).AddDays(1); + } + else if (latePrefix) + { + beginDate = DateObject.MinValue.SafeCreateFromValue(year, Constants.WeekDayCount, 1); + } + + if (earlierPrefix && swift == 0) + { + if (endDate > referenceDate) + { + endDate = referenceDate; + } + } + else if (laterPrefix && swift == 0) + { + if (beginDate < referenceDate) + { + beginDate = referenceDate; + } + } + + ret.Timex = isReferenceDatePeriod ? TimexUtility.GenerateYearTimex() : TimexUtility.GenerateYearTimex(date); + ret.FutureValue = + ret.PastValue = + new Tuple(beginDate, endDate); + ret.Success = true; + return ret; + } + + // Early/mid/late are resolved in this policy to 4 month ranges at the start/middle/end of the year. + else if (!string.IsNullOrEmpty(match.Groups[Constants.FourDigitYearGroupName].Value)) + { + var date = referenceDate.AddYears(swift); + year = int.Parse(match.Groups[Constants.FourDigitYearGroupName].Value, CultureInfo.InvariantCulture); + + var beginDate = DateObject.MinValue.SafeCreateFromValue(year, 1, 1); + var endDate = inclusiveEndPeriod ? + DateObject.MinValue.SafeCreateFromValue(year, 12, 31) : + DateObject.MinValue.SafeCreateFromValue(year, 12, 31).AddDays(1); + + if (earlyPrefix) + { + endDate = inclusiveEndPeriod ? + DateObject.MinValue.SafeCreateFromValue(year, 4, 30) : + DateObject.MinValue.SafeCreateFromValue(year, 4, 30).AddDays(1); + } + else if (midPrefix) + { + beginDate = DateObject.MinValue.SafeCreateFromValue(year, 5, 1); + endDate = inclusiveEndPeriod ? + DateObject.MinValue.SafeCreateFromValue(year, 8, 31) : + DateObject.MinValue.SafeCreateFromValue(year, 8, 31).AddDays(1); + } + else if (latePrefix) + { + beginDate = DateObject.MinValue.SafeCreateFromValue(year, 9, 1); + } + + ret.Timex = isReferenceDatePeriod ? TimexUtility.GenerateYearTimex() : TimexUtility.GenerateYearTimex(beginDate); + ret.FutureValue = + ret.PastValue = + new Tuple(beginDate, endDate); + ret.Success = true; + return ret; + } + } + } + else + { + return ret; + } + + // only "month" will come to here + var futureStart = DateObject.MinValue.SafeCreateFromValue(futureYear, month, 1); + var futureEnd = inclusiveEndPeriod ? + DateObject.MinValue.SafeCreateFromValue(futureYear, month, 1).AddMonths(1).AddDays(-1) : + DateObject.MinValue.SafeCreateFromValue(futureYear, month, 1).AddMonths(1); + + var pastStart = DateObject.MinValue.SafeCreateFromValue(pastYear, month, 1); + var pastEnd = inclusiveEndPeriod ? + DateObject.MinValue.SafeCreateFromValue(pastYear, month, 1).AddMonths(1).AddDays(-1) : + DateObject.MinValue.SafeCreateFromValue(pastYear, month, 1).AddMonths(1); + + if (earlyPrefix) + { + futureEnd = inclusiveEndPeriod ? + DateObject.MinValue.SafeCreateFromValue(futureYear, month, 15) : + DateObject.MinValue.SafeCreateFromValue(futureYear, month, 15).AddDays(1); + + pastEnd = inclusiveEndPeriod ? + DateObject.MinValue.SafeCreateFromValue(pastYear, month, 15) : + DateObject.MinValue.SafeCreateFromValue(pastYear, month, 15).AddDays(1); + } + else if (midPrefix) + { + futureStart = DateObject.MinValue.SafeCreateFromValue(futureYear, month, 10); + pastStart = DateObject.MinValue.SafeCreateFromValue(pastYear, month, 10); + futureEnd = inclusiveEndPeriod ? + DateObject.MinValue.SafeCreateFromValue(futureYear, month, 20) : + DateObject.MinValue.SafeCreateFromValue(futureYear, month, 20).AddDays(1); + + pastEnd = inclusiveEndPeriod ? + DateObject.MinValue.SafeCreateFromValue(pastYear, month, 20) : + DateObject.MinValue.SafeCreateFromValue(pastYear, month, 20).AddDays(1); + } + else if (latePrefix) + { + futureStart = DateObject.MinValue.SafeCreateFromValue(futureYear, month, 16); + pastStart = DateObject.MinValue.SafeCreateFromValue(pastYear, month, 16); + } + + if (earlierPrefix && futureEnd == pastEnd) + { + if (futureEnd > referenceDate) + { + futureEnd = pastEnd = referenceDate; + } + } + else if (laterPrefix && futureStart == pastStart) + { + if (futureStart < referenceDate) + { + futureStart = pastStart = referenceDate; + } + } + + ret.FutureValue = new Tuple(futureStart, futureEnd); + ret.PastValue = new Tuple(pastStart, pastEnd); + ret.Success = true; + + return ret; + } + + private DateTimeResolutionResult HandleWithHalfTag(string text, DateObject referenceDate, DateTimeResolutionResult ret, int swift) + { + DateObject beginDay, endDay; + int year = referenceDate.Year, month = referenceDate.Month; + + if (this.config.IsWeekOnly(text)) + { + // Handle like "上半周","下半周" + beginDay = swift == -1 ? referenceDate.This(DayOfWeek.Monday) : referenceDate.This(DayOfWeek.Thursday); + endDay = swift == -1 ? referenceDate.This(DayOfWeek.Thursday) : referenceDate.This(DayOfWeek.Sunday).AddDays(1); + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDay, endDay, DatePeriodTimexType.ByDay); + } + else if (this.config.IsMonthOnly(text)) + { + // Handle like "上半月","下半月" + var monthStartDay = DateObject.MinValue.SafeCreateFromValue(year, month, 1); + var monthEndDay = DateObject.MinValue.SafeCreateFromValue(year, month + 1, 1); + var halfMonthDay = (int)((monthEndDay - monthStartDay).TotalDays / 2); + + beginDay = swift == -1 ? monthStartDay : monthStartDay.AddDays(halfMonthDay); + endDay = swift == -1 ? monthStartDay.AddDays(halfMonthDay) : monthEndDay; + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDay, endDay, DatePeriodTimexType.ByDay); + } + else + { + // Handle like "上(个)半年","下(个)半年" + beginDay = swift == -1 ? DateObject.MinValue.SafeCreateFromValue(year, 1, 1) : DateObject.MinValue.SafeCreateFromValue(year, 7, 1); + endDay = swift == -1 ? DateObject.MinValue.SafeCreateFromValue(year, 7, 1) : DateObject.MinValue.SafeCreateFromValue(year + 1, 1, 1); + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDay, endDay, DatePeriodTimexType.ByMonth); + } + + ret.FutureValue = ret.PastValue = new Tuple(beginDay, endDay); + ret.Success = true; + return ret; + } + + // only contains year like "2016年" or "2016年上半年" + private DateTimeResolutionResult ParseYear(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var match = this.config.YearRegex.MatchExact(text, trim: true); + + if (match.Success) + { + var yearStr = match.Value; + + // Handle like "2016年上半年","2017年下半年" + yearStr = HandleWithHalfYear(match, yearStr, out bool hasHalf, out bool isFirstHalf); + + // Trim() to handle extra whitespaces like '07 年' + if (this.config.IsYearOnly(yearStr)) + { + yearStr = yearStr.Substring(0, yearStr.Length - 1).Trim(); + } + + var year = ConvertCJKToInteger(yearStr); + + return HandleYearResult(ret, hasHalf, isFirstHalf, false, year); + } + + match = this.config.YearInCJKRegex.MatchExact(text, trim: true); + + if (match.Success) + { + var yearStr = match.Value; + + // Handle like "二零一七年上半年","二零一七年下半年" + yearStr = HandleWithHalfYear(match, yearStr, out bool hasHalf, out bool isFirstHalf); + + if (this.config.IsYearOnly(yearStr)) + { + yearStr = yearStr.Substring(0, yearStr.Length - 1); + } + + if (yearStr.Length == 1) + { + return ret; + } + + var year = ConvertCJKToInteger(yearStr); + + return HandleYearResult(ret, hasHalf, isFirstHalf, false, year); + } + + return ret; + } + + private string HandleWithHalfYear(ConditionalMatch match, string text, out bool hasHalf, out bool isFirstHalf) + { + var firstHalf = match.Groups[Constants.FirstHalfGroupName].Value; + var secondHalf = match.Groups[Constants.SecondHalfGroupName].Value; + + hasHalf = false; + isFirstHalf = !string.IsNullOrEmpty(firstHalf); + + if (isFirstHalf || !string.IsNullOrEmpty(secondHalf)) + { + var halfText = isFirstHalf ? firstHalf : secondHalf; + text = text.Substring(0, text.Length - halfText.Length); + hasHalf = true; + } + + return text.Trim(); + } + + private DateTimeResolutionResult HandleYearResult(DateTimeResolutionResult ret, bool hasHalf, bool isFirstHalf, bool isReferenceDatePeriod, int year) + { + if (year < 100 && year >= this.config.TwoNumYear) + { + year += Constants.BASE_YEAR_PAST_CENTURY; + } + else if (year < 100 && year < this.config.TwoNumYear) + { + year += Constants.BASE_YEAR_CURRENT_CENTURY; + } + + var beginDay = DateObject.MinValue.SafeCreateFromValue(year, 1, 1); + var endDay = DateObject.MinValue.SafeCreateFromValue(year + 1, 1, 1); + + ret.Timex = isReferenceDatePeriod ? TimexUtility.GenerateYearTimex() : DateTimeFormatUtil.LuisDate(year); + + if (hasHalf) + { + if (isFirstHalf) + { + endDay = DateObject.MinValue.SafeCreateFromValue(year, 7, 1); + } + else + { + beginDay = DateObject.MinValue.SafeCreateFromValue(year, 7, 1); + } + + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDay, endDay, DatePeriodTimexType.ByMonth); + } + + ret.FutureValue = ret.PastValue = new Tuple(beginDay, endDay); + ret.Success = true; + + return ret; + } + + // @TODO Unify this method with its counterpart in BaseDatePeriodParser (if possible) and move it to Utilities + // parse entities that made up by two time points + private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var er = this.config.DateExtractor.Extract(text, referenceDate); + if (er.Count < 2) + { + er = this.config.DateExtractor.Extract(this.config.TokenBeforeDate + text, referenceDate); + if (er.Count < 2) + { + return ret; + } + + er[0].Start -= this.config.TokenBeforeDate.Length; + er[1].Start -= this.config.TokenBeforeDate.Length; + } + + var pr1 = this.config.DateParser.Parse(er[0], referenceDate); + var pr2 = this.config.DateParser.Parse(er[1], referenceDate); + + if (er.Count >= 2) + { + var match = this.config.WeekWithWeekDayRangeRegex.Match(text); + + if (match.Success) + { + var weekPrefix = match.Groups[Constants.WeekGroupName].ToString(); + + // Check if weekPrefix is already included in the extractions otherwise include it + if (!string.IsNullOrEmpty(weekPrefix)) + { + if (!er[0].Text.Contains(weekPrefix)) + { + er[0].Text = weekPrefix + er[0].Text; + } + + if (!er[1].Text.Contains(weekPrefix)) + { + er[1].Text = weekPrefix + er[1].Text; + } + } + + pr1 = this.config.DateParser.Parse(er[0], referenceDate); + pr2 = this.config.DateParser.Parse(er[1], referenceDate); + + } + else + { + // @TODO Refactor code to remove the cycle between BaseDatePeriodParser and its config. + var dateContext = GetYearContext(er[0].Text, er[1].Text, text); + + if (pr1.Value == null || pr2.Value == null) + { + return ret; + } + + pr1 = dateContext.ProcessDateEntityParsingResult(pr1); + pr2 = dateContext.ProcessDateEntityParsingResult(pr2); + + // When the case has no specified year, we should sync the future/past year due to invalid date Feb 29th. + if (dateContext.IsEmpty() && (DateContext.IsFeb29th((DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue) + || DateContext.IsFeb29th((DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue))) + { + (pr1, pr2) = dateContext.SyncYear(pr1, pr2); + } + } + } + + DateObject futureBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue, + futureEnd = (DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue; + DateObject pastBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue, + pastEnd = (DateObject)((DateTimeResolutionResult)pr2.Value).PastValue; + + if (futureBegin > futureEnd) + { + futureBegin = pastBegin; + } + + if (pastEnd < pastBegin) + { + pastEnd = futureEnd; + } + + ret.Timex = TimexUtility.GenerateDatePeriodTimex(futureBegin, futureEnd, DatePeriodTimexType.ByDay, pr1.TimexStr, pr2.TimexStr); + + if (pr1.TimexStr.StartsWith(Constants.TimexFuzzyYear, StringComparison.Ordinal) && + futureBegin.CompareTo(DateObject.MinValue.SafeCreateFromValue(futureBegin.Year, 2, 28)) <= 0 && + futureEnd.CompareTo(DateObject.MinValue.SafeCreateFromValue(futureBegin.Year, 3, 1)) >= 0) + { + // Handle cases like "2月28日到3月1日". + // There may be different timexes for FutureValue and PastValue due to the different validity of Feb 29th. + ret.Comment = Constants.Comment_DoubleTimex; + var pastTimex = TimexUtility.GenerateDatePeriodTimex(pastBegin, pastEnd, DatePeriodTimexType.ByDay, pr1.TimexStr, pr2.TimexStr); + ret.Timex = TimexUtility.MergeTimexAlternatives(ret.Timex, pastTimex); + } + + ret.FutureValue = new Tuple(futureBegin, futureEnd); + ret.PastValue = new Tuple(pastBegin, pastEnd); + ret.Success = true; + + return ret; + } + + // handle like "前两年" "前三个月" + private DateTimeResolutionResult ParseNumberWithUnit(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + + string numStr, unitStr; + + // if there are NO spaces between number and unit + var match = this.config.NumberCombinedWithUnit.Match(text); + + if (match.Success) + { + var srcUnit = match.Groups[Constants.UnitGroupName].Value; + var beforeStr = text.Substring(0, match.Index); + + if (this.config.UnitMap.ContainsKey(srcUnit)) + { + unitStr = this.config.UnitMap[srcUnit]; + numStr = match.Groups[Constants.NumGroupName].Value; + + if (this.config.PastRegex.IsExactMatch(beforeStr, trim: true)) + { + DateObject beginDate, endDate; + switch (unitStr) + { + case Constants.TimexDay: + beginDate = referenceDate.AddDays(-double.Parse(numStr, CultureInfo.InvariantCulture)); + endDate = referenceDate; + break; + case Constants.TimexWeek: + beginDate = referenceDate.AddDays(-7 * double.Parse(numStr, CultureInfo.InvariantCulture)); + endDate = referenceDate; + break; + case Constants.TimexMonthFull: + beginDate = referenceDate.AddMonths(-Convert.ToInt32(double.Parse(numStr, CultureInfo.InvariantCulture))); + endDate = referenceDate; + break; + case Constants.TimexYear: + beginDate = referenceDate.AddYears(-Convert.ToInt32(double.Parse(numStr, CultureInfo.InvariantCulture))); + endDate = referenceDate; + break; + default: + return ret; + } + + ret.Timex = $"({DateTimeFormatUtil.LuisDate(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)},P{numStr}{unitStr[0]})"; + ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); + ret.Success = true; + return ret; + } + + if (this.config.FutureRegex.IsExactMatch(beforeStr, trim: true)) + { + DateObject beginDate, endDate; + switch (unitStr) + { + case Constants.TimexDay: + beginDate = referenceDate; + endDate = referenceDate.AddDays(double.Parse(numStr)); + break; + case Constants.TimexWeek: + beginDate = referenceDate; + endDate = referenceDate.AddDays(7 * double.Parse(numStr)); + break; + case Constants.TimexMonthFull: + beginDate = referenceDate; + endDate = referenceDate.AddMonths(Convert.ToInt32(double.Parse(numStr))); + break; + case Constants.TimexYear: + beginDate = referenceDate; + endDate = referenceDate.AddYears(Convert.ToInt32(double.Parse(numStr))); + break; + default: + return ret; + } + + ret.Timex = + $"({DateTimeFormatUtil.LuisDate(beginDate.AddDays(1))},{DateTimeFormatUtil.LuisDate(endDate.AddDays(1))},P{numStr}{unitStr[0]})"; + ret.FutureValue = + ret.PastValue = new Tuple(beginDate.AddDays(1), endDate.AddDays(1)); + ret.Success = true; + return ret; + } + } + } + + return ret; + } + + // Analogous to the same method in BaseDatePeriodParser, it deals with date periods that involve durations + // e.g. "past 2 years", "within 2 days", "first 2 weeks of 2018". + private DateTimeResolutionResult ParseDuration(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + + // For cases like 'first 2 weeks of 2021' (2021年的前2周), 'past 2 years' (前两年), 'next 3 years' (后三年) + var durationRes = this.config.DurationExtractor.Extract(text, referenceDate); + + var matchHalf = this.config.OneWordPeriodRegex.MatchExact(text, trim: true); + + // halfTag cases are processed in ParseOneWordPeriod + if (!string.IsNullOrEmpty(matchHalf.Groups[Constants.HalfTagGroupName].Value)) + { + ret.Success = false; + return ret; + } + + if (durationRes.Count > 0) + { + var beforeStr = text.Substring(0, (int)durationRes[0].Start); + var afterStr = text.Substring((int)durationRes[0].Start + (int)durationRes[0].Length).Trim(); + var matches = this.config.UnitRegex.Matches(durationRes[0].Text); + var matchBusinessDays = this.config.DurationUnitRegex.MatchExact(text, trim: true); + + // handle duration cases like "5 years 1 month 21 days" and "multiple business days" + if ((matches.Count > 1 && matches.Count <= 3) || + matchBusinessDays.Groups[Constants.BusinessDayGroupName].Success) + { + ret = ParseMultipleDatesDuration(text, referenceDate); + return ret; + } + else if (matches.Count == 1 && matches[0].Success) + { + var srcUnit = matches[0].Groups[Constants.UnitGroupName].Value; + + var numberStr = durationRes[0].Text.Substring(0, matches[0].Index).Trim(); + var matchFew = this.config.DurationRelativeDurationUnitRegex.Match(text); + var number = numberStr.Equals(matchFew.Groups[Constants.FewGroupName].Value, StringComparison.Ordinal) ? 3 : ConvertCJKToNum(numberStr); + + if (this.config.UnitMap.ContainsKey(srcUnit)) + { + var beginDate = referenceDate; + var endDate = referenceDate; + + var unitStr = this.config.UnitMap[srcUnit]; + + // Get prefix + var prefixMatch = new ConditionalMatch(Match.Empty, false); + if (this.config.UnitRegex.Match(srcUnit).Groups[Constants.UnitOfYearGroupName].Success) + { + // Patterns like 'first 2 weeks of 2018' are considered only if the unit is compatible + prefixMatch = this.config.FirstLastOfYearRegex.MatchExact(beforeStr, trim: true); + } + + var isOfYearMatch = prefixMatch.Success; + var isPastMatch = prefixMatch.Groups[Constants.LastGroupName].Success; + var isFuture = false; + + if (!prefixMatch.Success) + { + prefixMatch = this.config.PastRegex.MatchExact(beforeStr, trim: true); + isPastMatch = prefixMatch.Success; + } + + if (!prefixMatch.Success) + { + prefixMatch = this.config.FutureRegex.MatchExact(beforeStr, trim: true); + isFuture = prefixMatch.Success; + } + + if (!prefixMatch.Success) + { + prefixMatch = this.config.FutureRegex.MatchExact(afterStr, trim: true); + isFuture = prefixMatch.Success; + } + + if (isFuture && !this.config.FutureRegex.MatchExact(afterStr, trim: true).Groups[Constants.WithinGroupName].Success) + { + // for the "within" case it should start from the current day + beginDate = beginDate.AddDays(1); + endDate = endDate.AddDays(1); + } + + // Shift by year (if present) + if (isOfYearMatch) + { + // Get year + var year = GetYearFromText(prefixMatch.Match); + if (year == Constants.InvalidYear) + { + var swift = 0; + var yearRel = prefixMatch.Groups[Constants.YearRelGroupName].Value; + if (this.config.IsLastYear(yearRel)) + { + swift = -1; + } + else if (this.config.IsNextYear(yearRel)) + { + swift = 1; + } + + year = referenceDate.Year + swift; + } + + // Get begin/end dates for year + if (unitStr == Constants.TimexWeek) + { + // First/last week of the year is calculated according to ISO definition + beginDate = DateObjectExtension.GetFirstThursday(year).This(DayOfWeek.Monday); + endDate = DateObjectExtension.GetLastThursday(year).This(DayOfWeek.Monday).AddDays(7); + } + else + { + beginDate = DateObject.MinValue.SafeCreateFromValue(year, 1, 1); + endDate = DateObject.MinValue.SafeCreateFromValue(year, 12, 31).AddDays(1); + } + } + + // Shift begin/end dates by duration span + if (prefixMatch.Success) + { + if (isPastMatch) + { + beginDate = endDate; + switch (unitStr) + { + case Constants.TimexDay: + beginDate = beginDate.AddDays(-number); + break; + case Constants.TimexWeek: + beginDate = beginDate.AddDays(-7 * number); + break; + case Constants.TimexMonthFull: + beginDate = beginDate.AddMonths(-number); + break; + case Constants.TimexYear: + beginDate = beginDate.AddYears(-number); + break; + default: + return ret; + } + } + else + { + endDate = beginDate; + switch (unitStr) + { + case Constants.TimexDay: + endDate = endDate.AddDays(number); + break; + case Constants.TimexWeek: + endDate = endDate.AddDays(7 * number); + break; + case Constants.TimexMonthFull: + endDate = endDate.AddMonths(number); + break; + case Constants.TimexYear: + endDate = endDate.AddYears(number); + break; + default: + return ret; + } + + } + + ret.Timex = $"({DateTimeFormatUtil.LuisDate(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)},P{number}{unitStr[0]})"; + ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); + ret.Success = true; + return ret; + } + } + } + } + + return ret; + } + + private DateTimeResolutionResult ParseMultipleDatesDuration(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + DateObject beginDate; + DateObject endDate = beginDate = referenceDate; + var durationTimex = string.Empty; + var restNowSunday = false; + + var durationErs = config.DurationExtractor.Extract(text, referenceDate); + + if (durationErs.Count > 0) + { + var durationPr = config.DurationParser.Parse(durationErs[0]); + var beforeStr = text.Substring(0, durationPr.Start ?? 0).Trim(); + var afterStr = text.Substring((durationPr.Start ?? 0) + (durationPr.Length ?? 0)).Trim(); + + ModAndDateResult modAndDateResult = new ModAndDateResult(beginDate, endDate); + + if (durationPr.Value != null) + { + var durationResult = (DateTimeResolutionResult)durationPr.Value; + + if (string.IsNullOrEmpty(durationResult.Timex)) + { + return ret; + } + + if (config.PastRegex.IsMatch(beforeStr) || config.PastRegex.IsMatch(afterStr)) + { + modAndDateResult = ModAndDateResult.GetModAndDate(beginDate, endDate, referenceDate, durationResult.Timex, false); + beginDate = modAndDateResult.BeginDate; + } + + if ((config.FutureRegex.IsExactMatch(beforeStr, trim: true) || config.FutureRegex.IsExactMatch(afterStr, trim: true)) && + DurationParsingUtil.IsDateDuration(durationResult.Timex)) + { + modAndDateResult = ModAndDateResult.GetModAndDate(beginDate, endDate, referenceDate, durationResult.Timex, true); + + beginDate = modAndDateResult.BeginDate; + endDate = modAndDateResult.EndDate; + + // In GetModAndDate, this "future" resolution will add one day to beginDate/endDate, + // but for the "within" case it should start from the current day. + if (this.config.FutureRegex.MatchExact(afterStr, trim: true).Groups[Constants.WithinGroupName].Success) + { + beginDate = beginDate.AddDays(-1); + endDate = endDate.AddDays(-1); + } + } + + if (!string.IsNullOrEmpty(modAndDateResult.Mod)) + { + ((DateTimeResolutionResult)durationPr.Value).Mod = modAndDateResult.Mod; + } + + durationTimex = durationResult.Timex; + ret.SubDateTimeEntities = new List { durationPr }; + if (modAndDateResult.DateList != null) + { + ret.List = modAndDateResult.DateList.Cast().ToList(); + } + } + + if (!beginDate.Equals(endDate) || restNowSunday) + { + endDate = inclusiveEndPeriod ? endDate.AddDays(-1) : endDate; + + // TODO: analyse upper code and use GenerateDatePeriodTimex to create this Timex. + ret.Timex = $"({DateTimeFormatUtil.LuisDate(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)},{durationTimex})"; + ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); + ret.Success = true; + return ret; + } + } + + ret.Success = false; + return ret; + } + + // case like "三月的第一周" + private DateTimeResolutionResult ParseWeekOfMonth(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var trimmedText = text.Trim(); + + var match = this.config.WeekOfMonthRegex.Match(text); + if (!match.Success) + { + return ret; + } + + var cardinalStr = match.Groups[Constants.CardinalGroupName].Value; + var monthStr = match.Groups[Constants.MonthGroupName].Value; + var noYear = false; + int year; + + int cardinal; + if (config.WoMLastRegex.IsExactMatch(cardinalStr, trim: true)) + { + cardinal = 5; + } + else + { + cardinal = this.config.CardinalMap[cardinalStr]; + } + + int month; + if (string.IsNullOrEmpty(monthStr)) + { + var swift = 0; + if (config.WoMNextRegex.MatchBegin(trimmedText, trim: true).Success) + { + swift = 1; + } + else if (config.WoMPreviousRegex.MatchBegin(trimmedText, trim: true).Success) + { + swift = -1; + } + + month = referenceDate.AddMonths(swift).Month; + year = referenceDate.AddMonths(swift).Year; + ret.Timex = DateTimeFormatUtil.LuisDate(referenceDate.Year, month); + } + else + { + month = this.config.ToMonthNumber(monthStr); + year = GetYearFromText(match); + + if (year == Constants.InvalidYear) + { + year = referenceDate.Year; + noYear = true; + } + + } + + ret = GetWeekOfMonth(cardinalStr, month, year, referenceDate, noYear); + + return ret; + } + + private DateTimeResolutionResult ParseWeekOfDate(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var match = config.WeekOfDateRegex.Match(text); + var dateErs = config.DateExtractor.Extract(text, referenceDate); + + // Cases like 'week of september 16th' (9月16日の週) + if (match.Success && dateErs.Count == 1) + { + var pr = (DateTimeResolutionResult)config.DateParser.Parse(dateErs[0], referenceDate).Value; + if ((config.Options & DateTimeOptions.CalendarMode) != 0) + { + var monday = ((DateObject)pr.FutureValue).This(DayOfWeek.Monday); + ret.Timex = DateTimeFormatUtil.ToIsoWeekTimex(monday); + } + else + { + ret.Timex = pr.Timex; + } + + ret.Comment = Constants.Comment_WeekOf; + ret.FutureValue = BaseDatePeriodParser.GetWeekRangeFromDate((DateObject)pr.FutureValue); + ret.PastValue = BaseDatePeriodParser.GetWeekRangeFromDate((DateObject)pr.PastValue); + ret.Success = true; + } + + return ret; + } + + private DateTimeResolutionResult ParseMonthOfDate(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var match = config.MonthOfDateRegex.Match(text); + var ex = config.DateExtractor.Extract(text, referenceDate); + + // Cases like 'month of september 16th' (9月16日の月) + if (match.Success && ex.Count == 1) + { + var pr = (DateTimeResolutionResult)config.DateParser.Parse(ex[0], referenceDate).Value; + ret.Timex = pr.Timex; + ret.Comment = Constants.Comment_MonthOf; + ret.FutureValue = BaseDatePeriodParser.GetMonthRangeFromDate((DateObject)pr.FutureValue); + ret.PastValue = BaseDatePeriodParser.GetMonthRangeFromDate((DateObject)pr.PastValue); + ret.Success = true; + } + + return ret; + } + + private DateTimeResolutionResult ParseWhichWeek(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var match = this.config.WhichWeekRegex.MatchExact(text, trim: true); + + if (match.Success) + { + var num = int.Parse(match.Groups[Constants.NumberGroupName].ToString(), CultureInfo.InvariantCulture); + if (num == 0) + { + return ret; + } + + var year = referenceDate.Year; + ret.Timex = year.ToString("D4", CultureInfo.InvariantCulture) + "-W" + num.ToString("D2", CultureInfo.InvariantCulture); + + var firstDay = DateObject.MinValue.SafeCreateFromValue(year, 1, 1); + var firstThursday = firstDay.AddDays(DayOfWeek.Thursday - firstDay.DayOfWeek); + var firstWeek = Cal.GetWeekOfYear(firstThursday, CalendarWeekRule.FirstFourDayWeek, DayOfWeek.Monday); + + if (firstWeek == 1) + { + num -= 1; + } + + var value = firstThursday.AddDays((num * 7) - 3); + var futureDate = value; + var pastDate = value; + + ret.FutureValue = new Tuple(futureDate, futureDate.AddDays(Constants.WeekDayCount)); + ret.PastValue = new Tuple(pastDate, pastDate.AddDays(Constants.WeekDayCount)); + ret.Success = true; + } + + return ret; + } + + private DateTimeResolutionResult GetWeekOfMonth(string cardinalStr, int month, int year, DateObject referenceDate, bool noYear) + { + var ret = new DateTimeResolutionResult(); + var targetMonday = GetMondayOfTargetWeek(cardinalStr, month, year); + + var futureDate = targetMonday; + var pastDate = targetMonday; + + if (noYear && futureDate < referenceDate) + { + futureDate = GetMondayOfTargetWeek(cardinalStr, month, year + 1); + } + + if (noYear && pastDate >= referenceDate) + { + pastDate = GetMondayOfTargetWeek(cardinalStr, month, year - 1); + } + + if (noYear) + { + year = Constants.InvalidYear; + } + + // Note that if the cardinalStr equals to "last", the weekNumber would be fixed at "5" + // This may lead to some inconsistency between Timex and Resolution + // the StartDate and EndDate of the resolution would always be correct (following ISO week definition) + // But week number for "last week" might be inconsistent with the resolution as we only have one Timex, but we may have past and future resolutions which may have different week numbers + var weekNum = GetWeekNumberForMonth(cardinalStr); + ret.Timex = TimexUtility.GenerateWeekOfMonthTimex(year, month, weekNum); + + ret.FutureValue = inclusiveEndPeriod + ? new Tuple(futureDate, futureDate.AddDays(Constants.WeekDayCount - 1)) + : new Tuple(futureDate, futureDate.AddDays(Constants.WeekDayCount)); + + ret.PastValue = inclusiveEndPeriod + ? new Tuple(pastDate, pastDate.AddDays(Constants.WeekDayCount - 1)) + : new Tuple(pastDate, pastDate.AddDays(Constants.WeekDayCount)); + + ret.Success = true; + + return ret; + } + + private DateObject GetMondayOfTargetWeek(string cardinalStr, int month, int year) + { + DateObject result; + if (config.WoMLastRegex.IsExactMatch(cardinalStr, trim: true)) + { + var lastThursday = DateObjectExtension.GetLastThursday(year, month); + result = lastThursday.This(DayOfWeek.Monday); + } + else + { + int cardinal = GetWeekNumberForMonth(cardinalStr); + var firstThursday = DateObjectExtension.GetFirstThursday(year, month); + + result = firstThursday.This(DayOfWeek.Monday) + .AddDays(Constants.WeekDayCount * (cardinal - 1)); + } + + return result; + } + + private int GetWeekNumberForMonth(string cardinalStr) + { + // "last week of month" might not be "5th week of month" + // Sometimes it can also be "4th week of month" depends on specific year and month + // But as we only have one Timex, we use "5" to indicate it's the "last week" + int cardinal; + if (config.WoMLastRegex.IsExactMatch(cardinalStr, trim: true)) + { + cardinal = 5; + } + else + { + cardinal = this.config.CardinalMap[cardinalStr]; + } + + return cardinal; + } + + // Cases like 'second week of 2021' (2021年的第二周) + private DateTimeResolutionResult ParseWeekOfYear(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var trimmedText = text.Trim(); + + var match = this.config.WeekOfYearRegex.Match(text); + if (!match.Success) + { + return ret; + } + + var cardinalStr = match.Groups[Constants.CardinalGroupName].Value; + var orderStr = match.Groups[Constants.OrderGroupName].Value; + + var year = GetYearFromText(match); + if (year == Constants.InvalidYear) + { + var swift = this.config.GetSwiftYear(orderStr); + if (swift < -1) + { + return ret; + } + + year = referenceDate.Year + swift; + } + + DateObject targetWeekMonday; + + if (config.WoMLastRegex.IsExactMatch(cardinalStr, trim: true)) + { + targetWeekMonday = DateObjectExtension.GetLastThursday(year).This(DayOfWeek.Monday); + + ret.Timex = TimexUtility.GenerateWeekTimex(targetWeekMonday); + } + else + { + var weekNum = this.config.CardinalMap[cardinalStr]; + targetWeekMonday = DateObjectExtension.GetFirstThursday(year).This(DayOfWeek.Monday) + .AddDays(Constants.WeekDayCount * (weekNum - 1)); + + ret.Timex = TimexUtility.GenerateWeekOfYearTimex(year, weekNum); + } + + ret.FutureValue = inclusiveEndPeriod ? + new Tuple(targetWeekMonday, targetWeekMonday.AddDays(Constants.WeekDayCount - 1)) : + new Tuple(targetWeekMonday, targetWeekMonday.AddDays(Constants.WeekDayCount)); + + ret.PastValue = inclusiveEndPeriod ? + new Tuple(targetWeekMonday, targetWeekMonday.AddDays(Constants.WeekDayCount - 1)) : + new Tuple(targetWeekMonday, targetWeekMonday.AddDays(Constants.WeekDayCount)); + + ret.Success = true; + + return ret; + } + + // parse "今年夏天" + private DateTimeResolutionResult ParseSeason(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var match = this.config.SeasonWithYear.MatchExact(text, trim: true); + + if (match.Success) + { + // parse year + var year = referenceDate.Year; + var hasYear = false; + var yearNum = match.Groups[Constants.YearGroupName].Value; + var yearCJK = match.Groups[Constants.YearCJKGroupName].Value; + var yearRel = match.Groups[Constants.YearRelGroupName].Value; + + if (!string.IsNullOrEmpty(yearNum)) + { + hasYear = true; + if (this.config.IsYearOnly(yearNum)) + { + yearNum = yearNum.Substring(0, yearNum.Length - 1); + } + + year = int.Parse(yearNum, CultureInfo.InvariantCulture); + } + else if (!string.IsNullOrEmpty(yearCJK)) + { + hasYear = true; + if (this.config.IsYearOnly(yearCJK)) + { + yearCJK = yearCJK.Substring(0, yearCJK.Length - 1); + } + + year = ConvertCJKToInteger(yearCJK); + } + else if (!string.IsNullOrEmpty(yearRel)) + { + hasYear = true; + if (this.config.IsLastYear(yearRel)) + { + year--; + } + else if (this.config.IsNextYear(yearRel)) + { + year++; + } + } + + // handle cases like "this summer" 今夏 + if (!hasYear && config.ThisRegex.MatchBegin(text, trim: true).Success) + { + hasYear = true; + year = referenceDate.Year; + } + else if (!hasYear && config.NextRegex.MatchBegin(text, trim: true).Success) + { + hasYear = true; + year = referenceDate.Year + 1; + } + else if (!hasYear && config.LastRegex.MatchBegin(text, trim: true).Success) + { + hasYear = true; + year = referenceDate.Year - 1; + } + + if (year < 100 && year >= this.config.TwoNumYear) + { + year += Constants.BASE_YEAR_PAST_CENTURY; + } + else if (year < 100 && year < this.config.TwoNumYear) + { + year += Constants.BASE_YEAR_CURRENT_CENTURY; + } + + // parse season + var seasonStr = match.Groups[Constants.SeasonGroupName].Value; + + if (match.Groups[Constants.EarlyPrefixGroupName].Success) + { + ret.Mod = Constants.EARLY_MOD; + } + else if (match.Groups[Constants.MidPrefixGroupName].Success) + { + ret.Mod = Constants.MID_MOD; + } + else if (match.Groups[Constants.LatePrefixGroupName].Success) + { + ret.Mod = Constants.LATE_MOD; + } + + ret.Timex = this.config.SeasonMap[seasonStr]; + + if (hasYear) + { + ret.Timex = year.ToString("D4", CultureInfo.InvariantCulture) + "-" + ret.Timex; + } + + ret.Success = true; + return ret; + } + + return ret; + } + + private DateTimeResolutionResult ParseQuarter(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var match = this.config.QuarterRegex.MatchExact(text, trim: true); + + if (!match.Success) + { + return ret; + } + + // parse year + var year = referenceDate.Year; + var yearNum = match.Groups[Constants.YearGroupName].Value; + var yearCJK = match.Groups[Constants.YearCJKGroupName].Value; + var yearRel = match.Groups[Constants.YearRelGroupName].Value; + if (!string.IsNullOrEmpty(yearNum)) + { + if (this.config.IsYearOnly(yearNum)) + { + yearNum = yearNum.Substring(0, yearNum.Length - 1); + } + + year = int.Parse(yearNum, CultureInfo.InvariantCulture); + } + else if (!string.IsNullOrEmpty(yearCJK)) + { + if (this.config.IsYearOnly(yearCJK)) + { + yearCJK = yearCJK.Substring(0, yearCJK.Length - 1); + } + + year = ConvertCJKToInteger(yearCJK); + } + else if (!string.IsNullOrEmpty(yearRel)) + { + if (this.config.IsLastYear(yearRel)) + { + year--; + } + else if (this.config.IsNextYear(yearRel)) + { + year++; + } + } + + if (year < 100 && year >= this.config.TwoNumYear) + { + year += Constants.BASE_YEAR_PAST_CENTURY; + } + else if (year < 100 && year < this.config.TwoNumYear) + { + year += Constants.BASE_YEAR_CURRENT_CENTURY; + } + + // parse quarterNum + var cardinalStr = match.Groups[Constants.CardinalGroupName].Value; + var quarterNum = this.config.CardinalMap[cardinalStr]; + + if (!string.IsNullOrEmpty(yearNum) || !string.IsNullOrEmpty(yearRel)) + { + var beginDate = DateObject.MinValue.SafeCreateFromValue(year, ((quarterNum - 1) * Constants.TrimesterMonthCount) + 1, 1); + var endDate = DateObject.MinValue.SafeCreateFromValue(year, quarterNum * Constants.TrimesterMonthCount, 1).AddMonths(1); + ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); + ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDate, endDate, DatePeriodTimexType.ByMonth); + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDate, endDate, DatePeriodTimexType.ByMonth); + ret.Success = true; + } + else + { + var beginDate = DateObject.MinValue.SafeCreateFromValue(year, ((quarterNum - 1) * Constants.TrimesterMonthCount) + 1, 1); + var endDate = DateObject.MinValue.SafeCreateFromValue(year, quarterNum * Constants.TrimesterMonthCount, 1).AddMonths(1); + ret.PastValue = new Tuple(beginDate, endDate); + ret.Timex = TimexUtility.GenerateDatePeriodTimex(beginDate, endDate, DatePeriodTimexType.ByMonth); + beginDate = DateObject.MinValue.SafeCreateFromValue(year + 1, ((quarterNum - 1) * Constants.TrimesterMonthCount) + 1, 1); + endDate = DateObject.MinValue.SafeCreateFromValue(year + 1, quarterNum * Constants.TrimesterMonthCount, 1).AddMonths(1); + ret.FutureValue = new Tuple(beginDate, endDate); + } + + ret.Success = true; + return ret; + } + + private DateTimeResolutionResult ParseDecade(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + int century = (referenceDate.Year / 100) + 1; + int decade; + int decadeLastYear = 10; + var inputCentury = false; + + var match = this.config.DecadeRegex.MatchExact(text, trim: true); + + if (match.Success) + { + var decadeStr = match.Groups[Constants.DecadeGroupName].Value; + if (!int.TryParse(decadeStr, out decade)) + { + decade = ConvertCJKToNum(decadeStr); + } + + var centuryStr = match.Groups[Constants.CenturyGroupName].Value; + if (!string.IsNullOrEmpty(centuryStr)) + { + if (!int.TryParse(centuryStr, out century)) + { + century = ConvertCJKToNum(centuryStr); + } + + inputCentury = true; + } + else + { + centuryStr = match.Groups[Constants.RelCenturyGroupName].Value; + + if (!string.IsNullOrEmpty(centuryStr)) + { + centuryStr = centuryStr.Trim(); + var thisMatch = this.config.ThisRegex.Match(centuryStr); + var nextMatch = this.config.NextRegex.Match(centuryStr); + var lastMatch = this.config.LastRegex.Match(centuryStr); + + if (thisMatch.Success) + { + // do nothing + } + else if (nextMatch.Success) + { + century++; + } + else + { + century--; + } + + inputCentury = true; + } + } + } + else + { + return ret; + } + + var beginYear = ((century - 1) * 100) + decade; + var firstTwoNumOfYear = match.Groups[Constants.FirstTwoYearGroupName].Value; + + // handle cases like "2000年代" + if (!string.IsNullOrEmpty(firstTwoNumOfYear)) + { + beginYear = (ConvertCJKToInteger(firstTwoNumOfYear) * 100) + decade; + } + + ret.Timex = TimexUtility.GenerateDecadeTimex(beginYear, decadeLastYear, decade, inputCentury); + + int futureYear = beginYear, pastYear = beginYear; + var startDate = DateObject.MinValue.SafeCreateFromValue(beginYear, 1, 1); + if (!inputCentury && startDate < referenceDate && string.IsNullOrEmpty(firstTwoNumOfYear)) + { + futureYear += 100; + } + + if (!inputCentury && startDate >= referenceDate && string.IsNullOrEmpty(firstTwoNumOfYear)) + { + pastYear -= 100; + } + + ret.FutureValue = new Tuple( + DateObject.MinValue.SafeCreateFromValue(futureYear, 1, 1), + DateObject.MinValue.SafeCreateFromValue(futureYear + decadeLastYear, 1, 1)); + + ret.PastValue = new Tuple( + DateObject.MinValue.SafeCreateFromValue(pastYear, 1, 1), + DateObject.MinValue.SafeCreateFromValue(pastYear + decadeLastYear, 1, 1)); + + ret.Success = true; + + return ret; + } + + private DateTimeResolutionResult ParseCentury(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + int century = (referenceDate.Year / 100) + 1; + + var match = this.config.CenturyRegex.MatchExact(text, trim: true); + + if (match.Success) + { + var centuryStr = match.Groups[Constants.CenturyGroupName].Value; + if (!string.IsNullOrEmpty(centuryStr)) + { + if (!int.TryParse(centuryStr, out century)) + { + century = ConvertCJKToNum(centuryStr); + } + } + + var beginYear = (century - 1) * Constants.CenturyYearsCount; + var endYear = beginYear + Constants.CenturyYearsCount; + + var startDate = new DateObject(beginYear, 1, 1); + var endDate = new DateObject(endYear, 1, 1); + + ret.Timex = TimexUtility.GenerateDatePeriodTimex(startDate, endDate, DatePeriodTimexType.ByYear); + ret.FutureValue = new Tuple(startDate, endDate); + ret.PastValue = new Tuple(startDate, endDate); + ret.Success = true; + } + + return ret; + } + + // Only handle cases like "within/less than/more than x weeks from/before/after today" + private DateTimeResolutionResult ParseDatePointWithAgoAndLater(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var er = this.config.DateExtractor.Extract(text, referenceDate).FirstOrDefault(); + var trimmedText = text.Trim(); + var match = this.config.DatePointWithAgoAndLater.MatchExact(trimmedText, trim: true); + + if (er != null && match.Success) + { + var isAgo = match.Groups[Constants.AgoGroupName].Success; + var isWithin = match.Groups[Constants.WithinGroupName].Success; + var isMoreThan = match.Groups[Constants.MoreGroupName].Success; + + if (match.Groups[Constants.YesterdayGroupName].Success) + { + referenceDate = referenceDate.AddDays(-1); + } + + er.Text = text; + var pr = this.config.DateParser.Parse(er, referenceDate); + var durationExtractionResult = this.config.DurationExtractor.Extract(er.Text, referenceDate).FirstOrDefault(); + + if (durationExtractionResult != null) + { + var duration = this.config.DurationParser.Parse(durationExtractionResult); + var durationInSeconds = (double)((DateTimeResolutionResult)duration.Value).PastValue; + + if (isWithin) + { + DateObject startDate; + DateObject endDate; + + if (isAgo) + { + startDate = (DateObject)((DateTimeResolutionResult)pr.Value).PastValue; + endDate = startDate.AddSeconds(durationInSeconds); + } + else + { + endDate = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; + startDate = endDate.AddSeconds(-durationInSeconds); + } + + if (startDate != DateObject.MinValue) + { + var durationTimex = ((DateTimeResolutionResult)duration.Value).Timex; + + ret.Timex = TimexUtility.GenerateDatePeriodTimexWithDuration(startDate, endDate, durationTimex); + ret.FutureValue = new Tuple(startDate, endDate); + ret.PastValue = new Tuple(startDate, endDate); + ret.Success = true; + return ret; + } + } + else if (isMoreThan) + { + ret.Mod = isAgo ? Constants.BEFORE_MOD : Constants.AFTER_MOD; + ret.Timex = pr.TimexStr; + ret.FutureValue = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; + ret.PastValue = (DateObject)((DateTimeResolutionResult)pr.Value).PastValue; + ret.Success = true; + return ret; + } + } + } + + ret.Success = false; + return ret; + } + + private DateTimeResolutionResult ParseComplexDatePeriod(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var match = this.config.ComplexDatePeriodRegex.Match(text); + + if (match.Success) + { + var futureBegin = DateObject.MinValue; + var futureEnd = DateObject.MinValue; + var pastBegin = DateObject.MinValue; + var pastEnd = DateObject.MinValue; + var isSpecificDate = false; + var isStartByWeek = false; + var isEndByWeek = false; + bool isAmbiguousStart = false, isAmbiguousEnd = false; + var ambiguousRes = new DateTimeResolutionResult(); + var dateContext = GetYearContext(match.Groups[Constants.StartGroupName].Value.Trim(), match.Groups[Constants.EndGroupName].Value.Trim(), text); + + var startResolution = ParseSingleTimePoint(match.Groups[Constants.StartGroupName].Value.Trim(), referenceDate, dateContext); + + if (startResolution.Success) + { + futureBegin = (DateObject)startResolution.FutureValue; + pastBegin = (DateObject)startResolution.PastValue; + isSpecificDate = true; + } + + if (!startResolution.Success) + { + startResolution = ParseBaseDatePeriod(match.Groups[Constants.StartGroupName].Value.Trim(), referenceDate, dateContext); + + if (startResolution.Success) + { + futureBegin = ShiftResolution((Tuple)startResolution.FutureValue, match, start: true); + pastBegin = ShiftResolution((Tuple)startResolution.PastValue, match, start: true); + + if (startResolution.Timex.Contains("-W")) + { + isStartByWeek = true; + } + } + } + + if (startResolution.Success) + { + var endResolution = ParseSingleTimePoint(match.Groups[Constants.EndGroupName].Value.Trim(), referenceDate, dateContext); + + if (endResolution.Success) + { + + futureEnd = (DateObject)endResolution.FutureValue; + pastEnd = (DateObject)endResolution.PastValue; + isSpecificDate = true; + } + + if (!endResolution.Success || isAmbiguousEnd) + { + endResolution = ParseBaseDatePeriod(match.Groups[Constants.EndGroupName].Value.Trim(), referenceDate, dateContext); + + if (endResolution.Success) + { + // When the end group contains modifiers such as 'end of', 'middle of', the end resolution must be updated accordingly. + futureEnd = ShiftResolution((Tuple)endResolution.FutureValue, match, start: false); + pastEnd = ShiftResolution((Tuple)endResolution.PastValue, match, start: false); + + if (endResolution.Timex.Contains("-W")) + { + isEndByWeek = true; + } + } + } + + if (endResolution.Success) + { + // When start or end is ambiguous it is better to resolve it to the type of the unambiguous extraction. + // In Spanish, for example, 'de lunes a mar' (from Monday to Tuesday) or 'de enero a mar' (from January to March). + // In the first case 'mar' is resolved as Date (weekday), in the second case it is resolved as DatePeriod (month). + if (isAmbiguousStart && isSpecificDate) + { + startResolution = ambiguousRes; + futureBegin = (DateObject)startResolution.FutureValue; + pastBegin = (DateObject)startResolution.PastValue; + } + else if (isAmbiguousEnd && isSpecificDate) + { + endResolution = ambiguousRes; + futureEnd = (DateObject)endResolution.FutureValue; + pastEnd = (DateObject)endResolution.PastValue; + } + + if (futureBegin > futureEnd) + { + if (dateContext == null || dateContext.IsEmpty()) + { + futureBegin = pastBegin; + } + else + { + futureBegin = DateContext.SwiftDateObject(futureBegin, futureEnd); + } + } + + if (pastEnd < pastBegin) + { + if (dateContext == null || dateContext.IsEmpty()) + { + pastEnd = futureEnd; + } + else + { + pastBegin = DateContext.SwiftDateObject(pastBegin, pastEnd); + } + } + + // If both begin/end are date ranges in "Month", the Timex should be ByMonth + // The year period case should already be handled in Basic Cases + var datePeriodTimexType = DatePeriodTimexType.ByMonth; + + if (isSpecificDate) + { + // If at least one of the begin/end is specific date, the Timex should be ByDay + datePeriodTimexType = DatePeriodTimexType.ByDay; + } + else if (isStartByWeek && isEndByWeek) + { + // If both begin/end are date ranges in "Week", the Timex should be ByWeek + datePeriodTimexType = DatePeriodTimexType.ByWeek; + } + + var hasYear = !startResolution.Timex.StartsWith(Constants.TimexFuzzyYear, StringComparison.Ordinal) || + !endResolution.Timex.StartsWith(Constants.TimexFuzzyYear, StringComparison.Ordinal); + + // If the year is not specified, the combined range timex will use fuzzy years. + ret.Timex = TimexUtility.GenerateDatePeriodTimex(futureBegin, futureEnd, datePeriodTimexType, pastBegin, pastEnd, hasYear); + ret.FutureValue = new Tuple(futureBegin, futureEnd); + ret.PastValue = new Tuple(pastBegin, pastEnd); + ret.Success = true; + } + } + } + + return ret; + } + + private DateTimeResolutionResult ParseBaseDatePeriod(string text, DateObject referenceDate, DateContext dateContext = null) + { + var innerResult = ParseSimpleCases(text, referenceDate); + + if (!innerResult.Success) + { + innerResult = ParseDuration(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseOneWordPeriod(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseNumberWithUnit(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseDayToDay(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = MergeTwoTimePoints(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseYearAndMonth(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseYearToYear(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseMonthToMonth(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseYear(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseWeekOfMonth(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseWeekOfYear(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseWeekOfDate(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseMonthOfDate(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseWhichWeek(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseSeason(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseQuarter(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseDecade(text, referenceDate); + } + + // Cases like "21st century" + if (!innerResult.Success) + { + innerResult = ParseCentury(text, referenceDate); + } + + if (!innerResult.Success) + { + innerResult = ParseDatePointWithAgoAndLater(text, referenceDate); + } + + if (innerResult.Success && dateContext != null) + { + innerResult = dateContext.ProcessDatePeriodEntityResolution(innerResult); + } + + return innerResult; + } + + } + + } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateTimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimeParser.cs similarity index 57% rename from .NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateTimeParser.cs rename to .NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimeParser.cs index f299cf56e0..f0f7c6135f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateTimeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimeParser.cs @@ -1,37 +1,22 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; -using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions.Chinese; -using Microsoft.Recognizers.Text.Number; -using Microsoft.Recognizers.Text.Number.Chinese; +using System.Globalization; + using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; -namespace Microsoft.Recognizers.Text.DateTime.Chinese +namespace Microsoft.Recognizers.Text.DateTime { - public class ChineseDateTimeParser : IDateTimeParser + public class BaseCJKDateTimeParser : IDateTimeParser { - public static readonly string ParserName = Constants.SYS_DATETIME_DATETIME; - - public static readonly Regex SimpleAmRegex = new Regex(DateTimeDefinitions.DateTimeSimpleAmRegex, RegexFlags); - - public static readonly Regex SimplePmRegex = new Regex(DateTimeDefinitions.DateTimeSimplePmRegex, RegexFlags); - - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - - private static readonly IDateTimeExtractor SingleDateExtractor = new ChineseDateExtractorConfiguration(); + public static readonly string ParserName = Constants.SYS_DATETIME_DATETIME; // "DateTime"; - private static readonly IDateTimeExtractor SingleTimeExtractor = new ChineseTimeExtractorConfiguration(); + private readonly ICJKDateTimeParserConfiguration config; - private readonly IDateTimeExtractor durationExtractor = new ChineseDurationExtractorConfiguration(); - - private readonly IExtractor integerExtractor = new IntegerExtractor(); - - private readonly IParser numberParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Chinese))); - - private readonly IFullDateTimeParserConfiguration config; - - public ChineseDateTimeParser(IFullDateTimeParserConfiguration configuration) + public BaseCJKDateTimeParser(ICJKDateTimeParserConfiguration configuration) { config = configuration; } @@ -56,12 +41,12 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) if (!innerResult.Success) { - innerResult = ParseTimeOfToday(er.Text, referenceTime); + innerResult = ParseTimeOfSpecialDayRegex(er.Text, referenceTime); } if (!innerResult.Success) { - innerResult = ParserDurationWithBeforeAndAfter(er.Text, referenceTime); + innerResult = ParserDurationWithAgoAndLater(er.Text, referenceTime); } if (innerResult.Success) @@ -101,29 +86,19 @@ public List FilterResults(string query, List Constants.DayHourCount) + { + futureDate = futureDate.AddDays(1); + pastDate = pastDate.AddDays(1); + } + var hour = time.Hour; var min = time.Minute; var sec = time.Second; // handle morning, afternoon - if (SimplePmRegex.IsMatch(text) && hour < Constants.HalfDayHourCount) + if (this.config.SimplePmRegex.IsMatch(text) && hour < Constants.HalfDayHourCount) { hour += Constants.HalfDayHourCount; } - else if (SimpleAmRegex.IsMatch(text) && hour >= Constants.HalfDayHourCount) + else if (this.config.SimpleAmRegex.IsMatch(text) && hour >= Constants.HalfDayHourCount) { hour -= Constants.HalfDayHourCount; } @@ -196,12 +186,11 @@ private DateTimeResolutionResult MergeDateAndTime(string text, DateObject refere timeStr = timeStr.Substring(0, timeStr.Length - 4); } - timeStr = "T" + hour.ToString("D2") + timeStr.Substring(3); ret.Timex = pr1.TimexStr + timeStr; var val = (DateTimeResolutionResult)pr2.Value; - if (hour <= Constants.HalfDayHourCount && !SimplePmRegex.IsMatch(text) && !SimpleAmRegex.IsMatch(text) && + if (hour <= Constants.HalfDayHourCount && !this.config.SimplePmRegex.IsMatch(text) && !this.config.SimpleAmRegex.IsMatch(text) && !string.IsNullOrEmpty(val.Comment)) { // ret.Timex += "ampm"; @@ -215,10 +204,60 @@ private DateTimeResolutionResult MergeDateAndTime(string text, DateObject refere return ret; } - private DateTimeResolutionResult ParseTimeOfToday(string text, DateObject referenceTime) + private DateTimeResolutionResult ParseTimeOfSpecialDayRegex(string text, DateObject referenceTime) { var ret = new DateTimeResolutionResult(); - var ers = SingleTimeExtractor.Extract(text, referenceTime); + var ers = this.config.TimeExtractor.Extract(text, referenceTime); + + // Handle 'eod', 'end of day' + var eod = this.config.TimeOfSpecialDayRegex.Match(text); + var matchAgoLater = this.config.AgoLaterRegex.Match(text); + if (matchAgoLater.Success) + { + var durationRes = this.config.DurationExtractor.Extract(text, referenceTime); + var pr1 = config.DurationParser.Parse(durationRes[0], referenceTime); + var isFuture = matchAgoLater.Groups[Constants.LaterGroupName].Success; + var timex = pr1.TimexStr; + + // handle less and more mode + if (eod.Groups[Constants.LessGroupName].Success) + { + ret.Mod = Constants.LESS_THAN_MOD; + } + else if (eod.Groups[Constants.MoreGroupName].Success) + { + ret.Mod = Constants.MORE_THAN_MOD; + } + + var resultDateTime = DurationParsingUtil.ShiftDateTime(timex, referenceTime, future: isFuture); + ret.Timex = TimexUtility.GenerateDateTimeTimex(resultDateTime); + ret.FutureValue = ret.PastValue = resultDateTime; + ret.SubDateTimeEntities = new List { pr1 }; + + ret.Success = true; + return ret; + } + + if (eod.Groups[Constants.SpecificEndOfGroupName].Success && ers.Count == 0) + { + ret = ParseSpecialTimeOfDate(text, referenceTime); + return ret; + } + + if (eod.Success && ers.Count != 1) + { + if (eod.Groups[Constants.TomorrowGroupName].Success) + { + ret = DateTimeFormatUtil.ResolveEndOfDay(DateTimeFormatUtil.FormatDate(referenceTime.AddDays(1)), referenceTime.AddDays(1), referenceTime.AddDays(1)); + } + else + { + ret = DateTimeFormatUtil.ResolveEndOfDay(DateTimeFormatUtil.FormatDate(referenceTime), referenceTime, referenceTime); + } + + return ret; + } + if (ers.Count != 1) { return ret; @@ -237,58 +276,15 @@ private DateTimeResolutionResult ParseTimeOfToday(string text, DateObject refere var min = time.Minute; var sec = time.Second; - var match = ChineseDateTimeExtractorConfiguration.TimeOfTodayRegex.Match(text); + var match = this.config.TimeOfSpecialDayRegex.Match(text); if (match.Success) { var matchStr = match.Value; var swift = 0; - switch (matchStr) - { - case "今晚": - if (hour < Constants.HalfDayHourCount) - { - hour += Constants.HalfDayHourCount; - } - - break; - case "今早": - case "今晨": - if (hour >= Constants.HalfDayHourCount) - { - hour -= Constants.HalfDayHourCount; - } - - break; - case "明晚": - swift = 1; - if (hour < Constants.HalfDayHourCount) - { - hour += Constants.HalfDayHourCount; - } - break; - case "明早": - case "明晨": - swift = 1; - if (hour >= Constants.HalfDayHourCount) - { - hour -= Constants.HalfDayHourCount; - } - - break; - case "昨晚": - swift = -1; - if (hour < Constants.HalfDayHourCount) - { - hour += Constants.HalfDayHourCount; - } - - break; - default: - break; - } + this.config.AdjustByTimeOfDay(matchStr, ref hour, ref swift); var date = referenceTime.AddDays(swift).Date; @@ -299,7 +295,17 @@ private DateTimeResolutionResult ParseTimeOfToday(string text, DateObject refere timeStr = timeStr.Substring(0, timeStr.Length - 4); } - timeStr = "T" + hour.ToString("D2") + timeStr.Substring(3); + // handle less and more mode + if (match.Groups[Constants.LessGroupName].Success) + { + ret.Mod = Constants.LESS_THAN_MOD; + } + else if (match.Groups[Constants.MoreGroupName].Success) + { + ret.Mod = Constants.MORE_THAN_MOD; + } + + timeStr = "T" + hour.ToString("D2", CultureInfo.InvariantCulture) + timeStr.Substring(3); ret.Timex = DateTimeFormatUtil.FormatDate(date) + timeStr; ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(date.Year, date.Month, date.Day, hour, min, sec); @@ -310,28 +316,64 @@ private DateTimeResolutionResult ParseTimeOfToday(string text, DateObject refere return ret; } + private DateTimeResolutionResult ParseSpecialTimeOfDate(string text, DateObject refDateTime) + { + + var ret = new DateTimeResolutionResult(); + var ers = this.config.DateExtractor.Extract(text, refDateTime); + if (ers.Count != 1) + { + return ret; + } + + var pr = this.config.DateParser.Parse(ers[0], refDateTime); + var futureDate = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; + var pastDate = (DateObject)((DateTimeResolutionResult)pr.Value).PastValue; + + ret = DateTimeFormatUtil.ResolveEndOfDay(pr.TimexStr, futureDate, pastDate); + + return ret; + } + // handle cases like "5分钟前", "1小时以后" - private DateTimeResolutionResult ParserDurationWithBeforeAndAfter(string text, DateObject referenceDate) + private DateTimeResolutionResult ParserDurationWithAgoAndLater(string text, DateObject referenceDate) { var ret = new DateTimeResolutionResult(); - var durationRes = durationExtractor.Extract(text, referenceDate); + var durationRes = this.config.DurationExtractor.Extract(text, referenceDate); if (durationRes.Count > 0) { - var match = ChineseDateTimeExtractorConfiguration.DateTimePeriodUnitRegex.Match(text); + var matchAgoLater = this.config.AgoLaterRegex.Match(text); + + if (matchAgoLater.Success) + { + var pr = config.DurationParser.Parse(durationRes[0], referenceDate); + var isFuture = matchAgoLater.Groups[Constants.LaterGroupName].Success; + var timex = pr.TimexStr; + + var resultDateTime = DurationParsingUtil.ShiftDateTime(timex, referenceDate, future: isFuture); + ret.Timex = TimexUtility.GenerateDateTimeTimex(resultDateTime); + ret.FutureValue = ret.PastValue = resultDateTime; + ret.SubDateTimeEntities = new List { pr }; + + ret.Success = true; + return ret; + } + + var match = this.config.DateTimePeriodUnitRegex.Match(text); if (match.Success) { var suffix = text.Substring((int)durationRes[0].Start + (int)durationRes[0].Length).Trim(); - var srcUnit = match.Groups["unit"].Value; + var srcUnit = match.Groups[Constants.UnitGroupName].Value; var numberStr = text.Substring((int)durationRes[0].Start, match.Index - (int)durationRes[0].Start).Trim(); - var number = ConvertChineseToNum(numberStr); + var number = ConvertCJKToNum(numberStr); if (this.config.UnitMap.ContainsKey(srcUnit)) { var unitStr = this.config.UnitMap[srcUnit]; - var beforeMatch = ChineseDateTimeExtractorConfiguration.BeforeRegex.Match(suffix); + var beforeMatch = this.config.BeforeRegex.Match(suffix); if (beforeMatch.Success && suffix.StartsWith(beforeMatch.Value, StringComparison.InvariantCulture)) { DateObject date; @@ -356,8 +398,8 @@ private DateTimeResolutionResult ParserDurationWithBeforeAndAfter(string text, D return ret; } - var afterMatch = ChineseDateTimeExtractorConfiguration.AfterRegex.Match(suffix); - if (afterMatch.Success && suffix.StartsWith(afterMatch.Value)) + var afterMatch = this.config.AfterRegex.Match(suffix); + if (afterMatch.Success && suffix.StartsWith(afterMatch.Value, StringComparison.Ordinal)) { DateObject date; switch (unitStr) @@ -387,16 +429,16 @@ private DateTimeResolutionResult ParserDurationWithBeforeAndAfter(string text, D return ret; } - // convert Chinese Number to Integer - private int ConvertChineseToNum(string numStr) + // convert CJK Number to Integer + private int ConvertCJKToNum(string numStr) { var num = -1; - var er = integerExtractor.Extract(numStr); + var er = this.config.IntegerExtractor.Extract(numStr); if (er.Count != 0) { if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) { - num = Convert.ToInt32((double)(numberParser.Parse(er[0]).Value ?? 0)); + num = Convert.ToInt32((double)(this.config.NumberParser.Parse(er[0]).Value ?? 0)); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimePeriodParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimePeriodParser.cs new file mode 100644 index 0000000000..eb96de978f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimePeriodParser.cs @@ -0,0 +1,815 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKDateTimePeriodParser : IDateTimeParser + { + public static readonly string ParserName = Constants.SYS_DATETIME_DATETIMEPERIOD; + + private readonly ICJKDateTimePeriodParserConfiguration config; + + public BaseCJKDateTimePeriodParser(ICJKDateTimePeriodParserConfiguration configuration) + { + config = configuration; + } + + public ParseResult Parse(ExtractResult extResult) + { + return this.Parse(extResult, DateObject.Now); + } + + public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) + { + var referenceTime = refDate; + + object value = null; + if (er.Type.Equals(ParserName, StringComparison.Ordinal)) + { + var innerResult = MergeDateAndTimePeriod(er.Text, referenceTime); + if (!innerResult.Success) + { + innerResult = MergeTwoTimePoints(er.Text, referenceTime); + } + + if (!innerResult.Success) + { + innerResult = ParseDuration(er.Text, referenceTime); + } + + if (!innerResult.Success) + { + innerResult = ParseSpecificNight(er.Text, referenceTime); + } + + if (!innerResult.Success) + { + innerResult = ParseNumberWithUnit(er.Text, referenceTime); + } + + if (!innerResult.Success) + { + innerResult = ParseRelativeUnit(er.Text, referenceTime); + } + + if (!innerResult.Success) + { + innerResult = ParseDateWithPeriodSuffix(er.Text, referenceTime); + } + + if (!innerResult.Success) + { + innerResult = ParseDateWithTimePeriodSuffix(er.Text, referenceTime); + } + + if (innerResult.Success) + { + if (innerResult.Mod == Constants.BEFORE_MOD) + { + // Cases like "last tuesday by 2:00 pm" there is no StartTime + innerResult.FutureResolution = new Dictionary + { + { + TimeTypeConstants.END_DATETIME, + DateTimeFormatUtil.FormatDateTime((DateObject)innerResult.FutureValue) + }, + }; + + innerResult.PastResolution = new Dictionary + { + { + TimeTypeConstants.END_DATETIME, + DateTimeFormatUtil.FormatDateTime((DateObject)innerResult.PastValue) + }, + }; + } + else + { + innerResult.FutureResolution = new Dictionary + { + { + TimeTypeConstants.START_DATETIME, + DateTimeFormatUtil.FormatDateTime(((Tuple)innerResult.FutureValue).Item1) + }, + { + TimeTypeConstants.END_DATETIME, + DateTimeFormatUtil.FormatDateTime(((Tuple)innerResult.FutureValue).Item2) + }, + }; + + innerResult.PastResolution = new Dictionary + { + { + TimeTypeConstants.START_DATETIME, + DateTimeFormatUtil.FormatDateTime(((Tuple)innerResult.PastValue).Item1) + }, + { + TimeTypeConstants.END_DATETIME, + DateTimeFormatUtil.FormatDateTime(((Tuple)innerResult.PastValue).Item2) + }, + }; + } + + value = innerResult; + } + } + + var ret = new DateTimeParseResult + { + Text = er.Text, + Start = er.Start, + Length = er.Length, + Type = er.Type, + Data = er.Data, + Value = value, + TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, + ResolutionStr = string.Empty, + }; + + return ret; + } + + public List FilterResults(string query, List candidateResults) + { + return candidateResults; + } + + private DateTimeResolutionResult MergeDateAndTimePeriod(string text, DateObject referenceTime) + { + var ret = new DateTimeResolutionResult(); + + var er1 = this.config.DateExtractor.Extract(text, referenceTime); + var er2 = this.config.TimePeriodExtractor.Extract(text, referenceTime); + if (er1.Count != 1 || er2.Count != 1) + { + return ret; + } + + var pr1 = this.config.DateParser.Parse(er1[0], referenceTime); + var pr2 = this.config.TimePeriodParser.Parse(er2[0], referenceTime); + var timeRange = (Tuple)((DateTimeResolutionResult)pr2.Value).FutureValue; + var beginTime = timeRange.Item1; + var endTime = timeRange.Item2; + var futureDate = (DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue; + var pastDate = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue; + + // handle cases with time like 25時 which resolve to the next day + var swiftDay = 0; + var timexHours = TimexUtility.ParseHoursFromTimePeriodTimex(pr2.TimexStr); + if (!this.config.AmPmDescRegex.Match(text).Success && timexHours.Item1 < Constants.HalfDayHourCount && timexHours.Item2 < Constants.HalfDayHourCount) + { + ret.Comment = Constants.Comment_AmPm; + } + + if (timexHours.Item1 > Constants.DayHourCount) + { + pastDate = pastDate.AddDays(1); + futureDate = futureDate.AddDays(1); + } + else if (timexHours.Item2 > Constants.DayHourCount) + { + swiftDay++; + } + + var pastDateAlt = pastDate.AddDays(swiftDay); + var futureDateAlt = futureDate.AddDays(swiftDay); + + ret.FutureValue = + new Tuple( + DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, beginTime.Hour, beginTime.Minute, beginTime.Second), + DateObject.MinValue.SafeCreateFromValue(futureDateAlt.Year, futureDateAlt.Month, futureDateAlt.Day, endTime.Hour, endTime.Minute, endTime.Second)); + + ret.PastValue = + new Tuple( + DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, beginTime.Hour, beginTime.Minute, beginTime.Second), + DateObject.MinValue.SafeCreateFromValue(pastDateAlt.Year, pastDateAlt.Month, pastDateAlt.Day, endTime.Hour, endTime.Minute, endTime.Second)); + + ret.Timex = TimexUtility.GenerateSplitDateTimePeriodTimex(pr1.TimexStr, pr2.TimexStr); + ret.Success = !string.IsNullOrEmpty(ret.Timex); + + return ret; + } + + // Cases like "last tuesday by 2:00pm" + private DateTimeResolutionResult ParseDateWithTimePeriodSuffix(string text, DateObject referenceTime) + { + var ret = new DateTimeResolutionResult(); + + var dateEr = this.config.DateExtractor.Extract(text, referenceTime); + var timeEr = this.config.TimeExtractor.Extract(text, referenceTime); + + if (dateEr.Count > 0 && timeEr.Count > 0) + { + var match = config.PastRegex.MatchEnd(text, trim: true); + + if (match.Groups[Constants.BEFORE_MOD].Success) + { + ret.Mod = Constants.BEFORE_MOD; + } + + var datePr = this.config.DateParser.Parse(dateEr[0], referenceTime); + var timePr = this.config.TimeParser.Parse(timeEr[0], referenceTime); + + if (datePr != null && timePr != null) + { + var timeResolutionResult = (DateTimeResolutionResult)timePr.Value; + var dateResolutionResult = (DateTimeResolutionResult)datePr.Value; + var futureDateValue = (DateObject)dateResolutionResult.FutureValue; + var pastDateValue = (DateObject)dateResolutionResult.PastValue; + var futureTimeValue = (DateObject)timeResolutionResult.FutureValue; + var pastTimeValue = (DateObject)timeResolutionResult.PastValue; + + ret.Comment = timeResolutionResult.Comment; + ret.Timex = TimexUtility.CombineDateAndTimeTimex(datePr.TimexStr, timePr.TimexStr); + + ret.FutureValue = DateObject.MinValue.SafeCreateFromValue(futureDateValue.Year, futureDateValue.Month, futureDateValue.Day, futureTimeValue.Hour, futureTimeValue.Minute, futureTimeValue.Second); + ret.PastValue = DateObject.MinValue.SafeCreateFromValue(pastDateValue.Year, pastDateValue.Month, pastDateValue.Day, pastTimeValue.Hour, pastTimeValue.Minute, pastTimeValue.Second); + + ret.SubDateTimeEntities = new List() + { + datePr, + timePr, + }; + + ret.Success = true; + } + } + + return ret; + } + + private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject referenceTime) + { + var ret = new DateTimeResolutionResult(); + DateTimeParseResult pr1 = null, pr2 = null; + bool bothHaveDates = false, beginHasDate = false, endHasDate = false; + + var er1 = this.config.TimeExtractor.Extract(text, referenceTime); + var er2 = this.config.DateTimeExtractor.Extract(text, referenceTime); + + var rightTime = DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day); + var leftTime = DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day); + + var match = config.FutureRegex.Match(text); + + // cases including 'within' are processed in ParseDuration + if (match.Groups[Constants.WithinGroupName].Success) + { + return ParseDuration(text, referenceTime); + } + + var matchWeekDay = config.WeekDayRegex.Match(text); + + if (matchWeekDay.Success) + { + return ret; + } + + if (er2.Count == 2) + { + pr1 = this.config.DateTimeParser.Parse(er2[0], referenceTime); + pr2 = this.config.DateTimeParser.Parse(er2[1], referenceTime); + bothHaveDates = true; + } + else if (er2.Count == 1 && er1.Count == 2) + { + if (!er2[0].IsOverlap(er1[0])) + { + pr1 = this.config.TimeParser.Parse(er1[0], referenceTime); + pr2 = this.config.DateTimeParser.Parse(er2[0], referenceTime); + endHasDate = true; + } + else + { + pr1 = this.config.DateTimeParser.Parse(er2[0], referenceTime); + pr2 = this.config.TimeParser.Parse(er1[1], referenceTime); + beginHasDate = true; + } + } + else if (er2.Count == 1 && er1.Count == 1) + { + if (er1[0].Start < er2[0].Start) + { + pr1 = this.config.TimeParser.Parse(er1[0], referenceTime); + pr2 = this.config.DateTimeParser.Parse(er2[0], referenceTime); + endHasDate = true; + } + else + { + pr1 = this.config.DateTimeParser.Parse(er2[0], referenceTime); + pr2 = this.config.TimeParser.Parse(er1[0], referenceTime); + beginHasDate = true; + } + } + else if (er1.Count == 2) + { + // if both ends are Time. then this is a TimePeriod, not a DateTimePeriod + return ret; + } + else + { + return ret; + } + + if (pr1.Value == null || pr2.Value == null) + { + return ret; + } + + DateObject futureBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue, + futureEnd = (DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue; + + DateObject pastBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue; + + if (futureBegin > futureEnd) + { + futureBegin = pastBegin; + } + + if (bothHaveDates) + { + rightTime = DateObject.MinValue.SafeCreateFromValue(futureEnd.Year, futureEnd.Month, futureEnd.Day); + leftTime = DateObject.MinValue.SafeCreateFromValue(futureBegin.Year, futureBegin.Month, futureBegin.Day); + } + else if (beginHasDate) + { + leftTime = DateObject.MinValue.SafeCreateFromValue(futureBegin.Year, futureBegin.Month, futureBegin.Day); + } + else if (endHasDate) + { + rightTime = DateObject.MinValue.SafeCreateFromValue(futureEnd.Year, futureEnd.Month, futureEnd.Day); + } + + var leftResult = (DateTimeResolutionResult)pr1.Value; + var rightResult = (DateTimeResolutionResult)pr2.Value; + var leftResultTime = (DateObject)leftResult.FutureValue; + var rightResultTime = (DateObject)rightResult.FutureValue; + + // check if the right time is smaller than the left time, if yes, add one day + int hour = leftResultTime.Hour > 0 ? leftResultTime.Hour : 0, + min = leftResultTime.Minute > 0 ? leftResultTime.Minute : 0, + second = leftResultTime.Second > 0 ? leftResultTime.Second : 0; + + leftTime = leftTime.AddHours(hour).AddMinutes(min).AddSeconds(second); + + hour = rightResultTime.Hour > 0 ? rightResultTime.Hour : 0; + min = rightResultTime.Minute > 0 ? rightResultTime.Minute : 0; + second = rightResultTime.Second > 0 ? rightResultTime.Second : 0; + + rightTime = rightTime.AddHours(hour).AddMinutes(min).AddSeconds(second); + + // the right side time contains "ampm", while the left side doesn't + if (rightResult.Comment is Constants.Comment_AmPm && + leftResult.Comment == null && rightTime < leftTime) + { + rightTime = rightTime.AddHours(Constants.HalfDayHourCount); + } + + if (rightTime < leftTime) + { + rightTime = rightTime.AddDays(1); + } + + ret.FutureValue = ret.PastValue = new Tuple(leftTime, rightTime); + + var leftTimex = pr1.TimexStr; + var rightTimex = pr2.TimexStr; + if (beginHasDate) + { + rightTimex = DateTimeFormatUtil.LuisDateShortTime(rightTime, pr2.TimexStr); + } + else if (endHasDate) + { + leftTimex = DateTimeFormatUtil.LuisDateShortTime(leftTime, pr1.TimexStr); + } + + ret.Timex = TimexUtility.GenerateDateTimePeriodTimex(leftTimex, rightTimex, rightTime - leftTime); + ret.Success = true; + return ret; + } + + private DateTimeResolutionResult ParseDuration(string text, DateObject referenceTime) + { + var ret = new DateTimeResolutionResult(); + var ers = config.DurationExtractor.Extract(text, referenceTime); + + if (ers.Count == 1) + { + var pr = config.DurationParser.Parse(ers[0]); + var afterStr = text.Substring((pr.Start ?? 0) + (pr.Length ?? 0)).Trim(); + + if (pr.Value != null) + { + var swiftSeconds = 0; + var mod = string.Empty; + var durationResult = (DateTimeResolutionResult)pr.Value; + if (durationResult.PastValue is double && durationResult.FutureValue is double) + { + swiftSeconds = (int)((double)durationResult.FutureValue); + } + + DateObject beginTime; + var endTime = beginTime = referenceTime; + var match = config.FutureRegex.Match(afterStr); + + if (match.Groups[Constants.WithinGroupName].Success) + { + endTime = beginTime.AddSeconds(swiftSeconds); + + ret.Timex = TimexUtility.GenerateDateTimePeriodTimex(beginTime, endTime, durationResult.Timex); + + ret.FutureValue = ret.PastValue = new Tuple(beginTime, endTime); + ret.Success = true; + + if (!string.IsNullOrEmpty(mod)) + { + ((DateTimeResolutionResult)pr.Value).Mod = mod; + } + + ret.SubDateTimeEntities = new List { pr }; + return ret; + } + } + } + + return ret; + } + + // Parse cases like "this night" + private DateTimeResolutionResult ParseSpecificNight(string text, DateObject referenceTime) + { + var ret = new DateTimeResolutionResult(); + var trimmedText = text.Trim(); + int beginHour, endHour, endMin = 0; + string timeStr; + + // Handle 昨晚 (last night),今晨 (this morning) + if (this.config.SpecificTimeOfDayRegex.IsExactMatch(trimmedText, trim: true)) + { + // handle the ambiguous case "ぎりぎり" [the latest possible time] + var latest = this.config.SpecificTimeOfDayRegex.Match(text); + if (latest.Groups[Constants.LatestGroupName].Success) + { + DateObject beginDate, endDate; + beginDate = referenceTime.AddMinutes(-1); + endDate = referenceTime; + var diff = endDate - beginDate; + ret.Timex = TimexUtility.GenerateDateTimePeriodTimex(beginDate, endDate); + ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); + ret.Success = true; + return ret; + } + + if (!this.config.GetMatchedTimeRangeAndSwift(trimmedText, out timeStr, out beginHour, out endHour, out endMin, out int swift)) + { + return ret; + } + + if (this.config.NextRegex.IsMatch(trimmedText)) + { + swift = 1; + } + else if (this.config.LastRegex.IsMatch(trimmedText)) + { + swift = -1; + } + + var date = referenceTime.AddDays(swift).Date; + int day = date.Day, month = date.Month, year = date.Year; + + ret.Timex = DateTimeFormatUtil.FormatDate(date) + timeStr; + ret.FutureValue = + ret.PastValue = + new Tuple( + DateObject.MinValue.SafeCreateFromValue(year, month, day, beginHour, 0, 0), + DateObject.MinValue.SafeCreateFromValue(year, month, day, endHour, endMin, endMin)); + ret.Success = true; + return ret; + } + + // Handle cases like morning, afternoon + if (!this.config.GetMatchedTimeRange(trimmedText, out timeStr, out beginHour, out endHour, out endMin)) + { + return ret; + } + + if (this.config.SpecificTimeOfDayRegex.IsExactMatch(trimmedText, trim: true)) + { + var swift = 0; + if (this.config.NextRegex.IsMatch(trimmedText)) + { + swift = 1; + } + else if (this.config.LastRegex.IsMatch(trimmedText)) + { + swift = -1; + } + + var date = referenceTime.AddDays(swift).Date; + int day = date.Day, month = date.Month, year = date.Year; + + ret.Timex = DateTimeFormatUtil.FormatDate(date) + timeStr; + ret.FutureValue = + ret.PastValue = + new Tuple( + DateObject.MinValue.SafeCreateFromValue(year, month, day, beginHour, 0, 0), + DateObject.MinValue.SafeCreateFromValue(year, month, day, endHour, endMin, endMin)); + ret.Success = true; + return ret; + } + + // handle Date followed by morning, afternoon + var match = this.config.TimeOfDayRegex.Match(trimmedText); + + if (match.Success) + { + var beforeStr = trimmedText.Substring(0, match.Index).Trim(); + var ers = this.config.DateExtractor.Extract(beforeStr, referenceTime); + + if (ers.Count == 0 || ers[0].Length != beforeStr.Length) + { + return ret; + } + + var pr = this.config.DateParser.Parse(ers[0], referenceTime); + var futureDate = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; + var pastDate = (DateObject)((DateTimeResolutionResult)pr.Value).PastValue; + + ret.Timex = pr.TimexStr + timeStr; + + ret.FutureValue = + new Tuple( + DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, beginHour, 0, 0), + DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, endHour, endMin, endMin)); + + ret.PastValue = + new Tuple( + DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, beginHour, 0, 0), + DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, endHour, endMin, endMin)); + + ret.Success = true; + + return ret; + } + + return ret; + } + + // parse "in 20 minutes" + private DateTimeResolutionResult ParseNumberWithUnit(string text, DateObject referenceTime) + { + var ret = new DateTimeResolutionResult(); + string unitStr; + + // if there are spaces between number and unit + var ers = this.config.CardinalExtractor.Extract(text); + if (ers.Count == 1) + { + var pr = this.config.CardinalParser.Parse(ers[0]); + var srcUnit = text.Substring(ers[0].Start + ers[0].Length ?? 0).Trim(); + + if (srcUnit.StartsWith("个", StringComparison.Ordinal)) + { + srcUnit = srcUnit.Substring(1); + } + + var beforeStr = text.Substring(0, ers[0].Start ?? 0); + if (this.config.UnitMap.ContainsKey(srcUnit)) + { + var numStr = pr.ResolutionStr; + unitStr = this.config.UnitMap[srcUnit]; + var prefixMatch = this.config.PastRegex.MatchExact(beforeStr, trim: true); + + if (prefixMatch.Success) + { + DateObject beginDate, endDate; + switch (unitStr) + { + case "H": + beginDate = referenceTime.AddHours(-(double)pr.Value); + endDate = referenceTime; + break; + case "M": + beginDate = referenceTime.AddMinutes(-(double)pr.Value); + endDate = referenceTime; + break; + case "S": + beginDate = referenceTime.AddSeconds(-(double)pr.Value); + endDate = referenceTime; + break; + default: + return ret; + } + + ret.Timex = + $"({DateTimeFormatUtil.LuisDate(beginDate)}T{DateTimeFormatUtil.LuisTime(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)}T{DateTimeFormatUtil.LuisTime(endDate)},PT{numStr}{unitStr[0]})"; + ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); + ret.Success = true; + return ret; + } + + prefixMatch = this.config.FutureRegex.MatchExact(beforeStr, trim: true); + + if (!prefixMatch.Success) + { + prefixMatch = this.config.TimePeriodLeftRegex.MatchEnd(beforeStr, trim: true); + + } + + if (prefixMatch.Success) + { + DateObject beginDate, endDate; + switch (unitStr) + { + case "H": + beginDate = referenceTime; + endDate = referenceTime.AddHours((double)pr.Value); + break; + case "M": + beginDate = referenceTime; + endDate = referenceTime.AddMinutes((double)pr.Value); + break; + case "S": + beginDate = referenceTime; + endDate = referenceTime.AddSeconds((double)pr.Value); + break; + default: + return ret; + } + + ret.Timex = + $"({DateTimeFormatUtil.LuisDate(beginDate)}T{DateTimeFormatUtil.LuisTime(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)}T{DateTimeFormatUtil.LuisTime(endDate)},PT{numStr}{unitStr[0]})"; + ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); + ret.Success = true; + return ret; + } + } + } + + // handle "last hour" + var match = this.config.UnitRegex.Match(text); + if (match.Success) + { + var srcUnit = match.Groups[Constants.UnitGroupName].Value; + var beforeStr = text.Substring(0, match.Index).Trim(); + if (this.config.UnitMap.ContainsKey(srcUnit)) + { + unitStr = this.config.UnitMap[srcUnit]; + + if (this.config.PastRegex.IsExactMatch(beforeStr, trim: true)) + { + DateObject beginDate, endDate; + switch (unitStr) + { + case "H": + beginDate = referenceTime.AddHours(-1); + endDate = referenceTime; + break; + case "M": + beginDate = referenceTime.AddMinutes(-1); + endDate = referenceTime; + break; + case "S": + beginDate = referenceTime.AddSeconds(-1); + endDate = referenceTime; + break; + default: + return ret; + } + + ret.Timex = + $"({DateTimeFormatUtil.LuisDate(beginDate)}T{DateTimeFormatUtil.LuisTime(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)}T{DateTimeFormatUtil.LuisTime(endDate)},PT1{unitStr[0]})"; + ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); + ret.Success = true; + return ret; + } + + if (this.config.FutureRegex.IsExactMatch(beforeStr, trim: true)) + { + DateObject beginDate, endDate; + switch (unitStr) + { + case "H": + beginDate = referenceTime; + endDate = referenceTime.AddHours(1); + break; + case "M": + beginDate = referenceTime; + endDate = referenceTime.AddMinutes(1); + break; + case "S": + beginDate = referenceTime; + endDate = referenceTime.AddSeconds(1); + break; + default: + return ret; + } + + ret.Timex = + $"({DateTimeFormatUtil.LuisDate(beginDate)}T{DateTimeFormatUtil.LuisTime(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)}T{DateTimeFormatUtil.LuisTime(endDate)},PT1{unitStr[0]})"; + ret.FutureValue = ret.PastValue = new Tuple(beginDate, endDate); + ret.Success = true; + return ret; + } + } + } + + return ret; + } + + private DateTimeResolutionResult ParseRelativeUnit(string text, DateObject referenceTime) + { + var ret = new DateTimeResolutionResult(); + + var match = this.config.RestOfDateRegex.Match(text); + + if (match.Success) + { + var srcUnit = match.Groups[Constants.UnitGroupName].Value; + + var unitStr = config.UnitMap[srcUnit]; + + int swiftValue = 1; + DateObject beginTime; + var endTime = beginTime = referenceTime; + + if (config.UnitMap.ContainsKey(srcUnit)) + { + ret.Timex = TimexUtility.GenerateRelativeUnitDateTimePeriodTimex(ref beginTime, ref endTime, referenceTime, unitStr, swiftValue); + + ret.FutureValue = ret.PastValue = new Tuple(beginTime, endTime); + ret.Success = !string.IsNullOrEmpty(ret.Timex); + + return ret; + } + } + + return ret; + } + + // cases like "Early in the day Wednesday" + private DateTimeResolutionResult ParseDateWithPeriodSuffix(string text, DateObject referenceTime) + { + var ret = new DateTimeResolutionResult(); + + var dateResult = this.config.DateExtractor.Extract(text, referenceTime); + if (dateResult.Count > 0) + { + DateTimeParseResult pr = new DateTimeParseResult(); + var afterString = text.Substring((int)(dateResult[0].Start + dateResult[0].Length), + text.Length - ((int)(dateResult[0].Start + dateResult[0].Length))).TrimStart(); + var match = config.TimePeriodLeftRegex.Match(afterString); + if (match.Success) + { + pr = this.config.DateParser.Parse(dateResult[0], referenceTime); + } + + if (match.Success) + { + if (pr.Value != null) + { + var startTime = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; + startTime = new DateObject(startTime.Year, startTime.Month, startTime.Day); + var endTime = startTime; + + if (match.Groups[Constants.EarlyPrefixGroupName].Success) + { + endTime = endTime.AddHours(Constants.HalfDayHourCount); + ret.Mod = Constants.EARLY_MOD; + } + else if (match.Groups[Constants.MidPrefixGroupName].Success) + { + startTime = startTime.AddHours(Constants.HalfDayHourCount - Constants.HalfMidDayDurationHourCount); + endTime = endTime.AddHours(Constants.HalfDayHourCount + Constants.HalfMidDayDurationHourCount); + ret.Mod = Constants.MID_MOD; + } + else if (match.Groups[Constants.LatePrefixGroupName].Success) + { + startTime = startTime.AddHours(Constants.HalfDayHourCount); + endTime = startTime.AddHours(Constants.HalfDayHourCount); + ret.Mod = Constants.LATE_MOD; + } + else + { + return ret; + } + + ret.Timex = pr.TimexStr; + + ret.PastValue = ret.FutureValue = new Tuple(startTime, endTime); + + ret.Success = true; + } + } + } + + return ret; + } + + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDurationParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDurationParser.cs new file mode 100644 index 0000000000..6d4eed4616 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDurationParser.cs @@ -0,0 +1,210 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Globalization; +using Microsoft.Recognizers.Text.NumberWithUnit; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKDurationParser : IDateTimeParser + { + public static readonly string ParserName = Constants.SYS_DATETIME_DURATION; + + private readonly ICJKDurationParserConfiguration config; + + public BaseCJKDurationParser(ICJKDurationParserConfiguration configuration) + { + config = configuration; + } + + public ParseResult Parse(ExtractResult extResult) + { + return this.Parse(extResult, DateObject.Now); + } + + public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) + { + var referenceTime = refDate; + + var dateTimeParseResult = ParseMergedDuration(er.Text, referenceTime); + + if (!dateTimeParseResult.Success) + { + dateTimeParseResult = DurationParsingUtil.ParseInexactNumberUnit(er.Text, this.config); + } + + if (!dateTimeParseResult.Success) + { + dateTimeParseResult = ParseAnUnit(er.Text); + } + + if (!dateTimeParseResult.Success) + { + var parseResult = this.config.InternalParser.Parse(er); + var unitResult = parseResult.Value as UnitValue; + + if (unitResult == null) + { + return null; + } + + var unitStr = unitResult.Unit; + var number = string.IsNullOrEmpty(unitResult.Number) ? 1 : double.Parse(unitResult.Number, CultureInfo.InvariantCulture); + + dateTimeParseResult.Timex = TimexUtility.GenerateDurationTimex(number, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); + dateTimeParseResult.FutureValue = dateTimeParseResult.PastValue = number * this.config.UnitValueMap[unitStr]; + dateTimeParseResult.Success = true; + } + + if (dateTimeParseResult.Success) + { + dateTimeParseResult.FutureResolution = new Dictionary + { + { TimeTypeConstants.DURATION, dateTimeParseResult.FutureValue.ToString() }, + }; + + dateTimeParseResult.PastResolution = new Dictionary + { + { TimeTypeConstants.DURATION, dateTimeParseResult.PastValue.ToString() }, + }; + } + + if (dateTimeParseResult.Success) + { + var moreOrLessMatch = config.MoreOrLessRegex.Match(er.Text); + if (moreOrLessMatch.Success) + { + if (moreOrLessMatch.Groups[Constants.LessGroupName].Success) + { + dateTimeParseResult.Mod = Constants.LESS_THAN_MOD; + } + else if (moreOrLessMatch.Groups[Constants.MoreGroupName].Success) + { + dateTimeParseResult.Mod = Constants.MORE_THAN_MOD; + } + } + } + + var ret = new DateTimeParseResult + { + Text = er.Text, + Start = er.Start, + Length = er.Length, + Type = er.Type, + Data = er.Data, + Value = dateTimeParseResult, + TimexStr = dateTimeParseResult.Timex, + ResolutionStr = string.Empty, + }; + + return ret; + } + + public List FilterResults(string query, List candidateResults) + { + return candidateResults; + } + + private DateTimeResolutionResult ParseAnUnit(string text) + { + var ret = new DateTimeResolutionResult(); + + var match = this.config.AnUnitRegex.Match(text); + + if (match.Groups[Constants.AnotherGroupName].Success) + { + var numVal = match.Groups[Constants.HalfGroupName].Success ? 0.5 : 1; + numVal = match.Groups[Constants.QuarterGroupName].Success ? 0.25 : numVal; + numVal = match.Groups[Constants.ThreeQuarterGroupName].Success ? 0.75 : numVal; + + var srcUnit = match.Groups[Constants.UnitGroupName].Value; + if (this.config.UnitMap.ContainsKey(srcUnit)) + { + var unitStr = this.config.UnitMap[srcUnit]; + + ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, DurationParsingUtil.IsLessThanDay(unitStr)); + ret.FutureValue = ret.PastValue = numVal * this.config.UnitValueMap[unitStr]; + ret.Success = true; + } + + } + + return ret; + } + + private DateTimeResolutionResult ParseMergedDuration(string text, DateObject referenceTime) + { + var ret = new DateTimeResolutionResult(); + var durationExtractor = this.config.DurationExtractor; + + // DurationExtractor without parameter will not extract merged duration + var ers = durationExtractor.Extract(text, referenceTime); + + // only handle merged duration cases like "1 month 21 days" + if (ers.Count <= 1) + { + ret.Success = false; + return ret; + } + + var start = ers[0].Start ?? 0; + if (start != 0) + { + var beforeStr = text.Substring(0, start - 1); + if (!string.IsNullOrWhiteSpace(beforeStr)) + { + return ret; + } + } + + var end = ers[ers.Count - 1].Start + ers[ers.Count - 1].Length ?? 0; + if (end != text.Length) + { + var afterStr = text.Substring(end); + if (!string.IsNullOrWhiteSpace(afterStr)) + { + return ret; + } + } + + var prs = new List(); + var timexDict = new Dictionary(); + + // insert timex into a dictionary + foreach (var er in ers) + { + var unitRegex = this.config.DurationUnitRegex; + var unitMatch = unitRegex.Match(er.Text); + if (unitMatch.Success) + { + var pr = (DateTimeParseResult)Parse(er); + if (pr != null && pr.Value != null) + { + timexDict.Add(this.config.UnitMap[unitMatch.Groups[Constants.UnitGroupName].Value], pr.TimexStr); + prs.Add(pr); + } + } + } + + // sort the timex using the granularity of the duration, "P1M23D" for "1 month 23 days" and "23 days 1 month" + if (prs.Count > 0) + { + ret.Timex = TimexUtility.GenerateCompoundDurationTimex(timexDict, this.config.UnitValueMap); + + double value = 0; + foreach (var pr in prs) + { + value += double.Parse(((DateTimeResolutionResult)pr.Value).FutureValue.ToString(), CultureInfo.InvariantCulture); + } + + ret.FutureValue = ret.PastValue = value; + } + + ret.Success = true; + return ret; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKHolidayParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKHolidayParser.cs new file mode 100644 index 0000000000..7280dace17 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKHolidayParser.cs @@ -0,0 +1,259 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Text.RegularExpressions; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKHolidayParser : IDateTimeParser + { + public static readonly string ParserName = Constants.SYS_DATETIME_DATE; // "Date" + + private readonly ICJKHolidayParserConfiguration config; + + public BaseCJKHolidayParser(ICJKHolidayParserConfiguration config) + { + this.config = config; + } + + public ParseResult Parse(ExtractResult extResult) + { + return this.Parse(extResult, DateObject.Now); + } + + public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) + { + var referenceDate = refDate; + object value = null; + + if (er.Type.Equals(ParserName, StringComparison.Ordinal)) + { + var innerResult = ParseHolidayRegexMatch(er.Text, referenceDate); + + if (innerResult.Success) + { + innerResult.FutureResolution = new Dictionary + { + { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)innerResult.FutureValue) }, + }; + + innerResult.PastResolution = new Dictionary + { + { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)innerResult.PastValue) }, + }; + + innerResult.IsLunar = IsLunarCalendar(er.Text); + value = innerResult; + } + } + + var ret = new DateTimeParseResult + { + Text = er.Text, + Start = er.Start, + Length = er.Length, + Type = er.Type, + Data = er.Data, + Value = value, + TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, + ResolutionStr = string.Empty, + }; + + return ret; + } + + public List FilterResults(string query, List candidateResults) + { + return candidateResults; + } + + private DateObject GetFutureValue(DateObject value, DateObject referenceDate, string holiday) + { + if (value < referenceDate) + { + if (this.config.FixedHolidaysDict.ContainsKey(holiday)) + { + return value.AddYears(1); + } + + if (this.config.HolidayFuncDict.ContainsKey(holiday)) + { + value = this.config.HolidayFuncDict[holiday](referenceDate.Year + 1); + } + } + + return value; + } + + private DateObject GetPastValue(DateObject value, DateObject referenceDate, string holiday) + { + if (value >= referenceDate) + { + if (this.config.FixedHolidaysDict.ContainsKey(holiday)) + { + return value.AddYears(-1); + } + + if (this.config.HolidayFuncDict.ContainsKey(holiday)) + { + value = this.config.HolidayFuncDict[holiday](referenceDate.Year - 1); + } + } + + return value; + } + + private DateTimeResolutionResult ParseHolidayRegexMatch(string text, DateObject referenceDate) + { + foreach (var regex in this.config.HolidayRegexList) + { + var match = regex.Match(text); + + if (match.Success) + { + // Value string will be set in Match2Date method + var ret = Match2Date(match, referenceDate); + return ret; + } + } + + return new DateTimeResolutionResult(); + } + + private DateTimeResolutionResult Match2Date(Match match, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var holidayStr = match.Groups["holiday"].Value; + + var year = referenceDate.Year; + var hasYear = false; + var yearNum = match.Groups["year"].Value; + var yearCJK = match.Groups[Constants.YearCJKGroupName].Value; + var yearRel = match.Groups["yearrel"].Value; + + if (!string.IsNullOrEmpty(yearNum)) + { + hasYear = true; + yearNum = this.config.SanitizeYearToken(yearNum); + + year = int.Parse(yearNum, CultureInfo.InvariantCulture); + } + else if (!string.IsNullOrEmpty(yearCJK)) + { + hasYear = true; + yearCJK = this.config.SanitizeYearToken(yearCJK); + + year = ConvertToInteger(yearCJK); + } + else if (!string.IsNullOrEmpty(yearRel)) + { + hasYear = true; + int swift = this.config.GetSwiftYear(yearRel); + if (swift >= -1) + { + year += swift; + } + } + + if (year < 100 && year >= 90) + { + year += Constants.BASE_YEAR_PAST_CENTURY; + } + else if (year < 20) + { + year += Constants.BASE_YEAR_CURRENT_CENTURY; + } + + if (!string.IsNullOrEmpty(holidayStr)) + { + DateObject value; + string timexStr; + if (this.config.FixedHolidaysDict.ContainsKey(holidayStr)) + { + value = this.config.FixedHolidaysDict[holidayStr](year); + timexStr = $"-{value.Month:D2}-{value.Day:D2}"; + } + else + { + if (this.config.HolidayFuncDict.ContainsKey(holidayStr)) + { + value = this.config.HolidayFuncDict[holidayStr](year); + timexStr = this.config.NoFixedTimex[holidayStr]; + } + else + { + return ret; + } + } + + if (hasYear) + { + ret.Timex = year.ToString("D4", CultureInfo.InvariantCulture) + timexStr; + ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(year, value.Month, value.Day); + ret.Success = true; + return ret; + } + + ret.Timex = "XXXX" + timexStr; + ret.FutureValue = GetFutureValue(value, referenceDate, holidayStr); + ret.PastValue = GetPastValue(value, referenceDate, holidayStr); + ret.Success = true; + return ret; + } + + return ret; + } + + private int ConvertToInteger(string yearCJKStr) + { + var year = 0; + var num = 0; + + var er = this.config.IntegerExtractor.Extract(yearCJKStr); + if (er.Count != 0) + { + if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) + { + num = Convert.ToInt32((double)(this.config.NumberParser.Parse(er[0]).Value ?? 0)); + } + } + + if (num < 10) + { + num = 0; + foreach (var ch in yearCJKStr) + { + num *= 10; + er = this.config.IntegerExtractor.Extract(ch.ToString(CultureInfo.InvariantCulture)); + if (er.Count != 0) + { + if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) + { + num += Convert.ToInt32((double)(this.config.NumberParser.Parse(er[0]).Value ?? 0)); + } + } + } + + year = num; + } + else + { + year = num; + } + + return year == 0 ? -1 : year; + } + + // parse if lunar contains + private bool IsLunarCalendar(string text) + { + var trimmedText = text.Trim(); + var match = this.config.LunarHolidayRegex.Match(trimmedText); + return match.Success; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKMergedDateTimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKMergedDateTimeParser.cs new file mode 100644 index 0000000000..a3a7be23b6 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKMergedDateTimeParser.cs @@ -0,0 +1,272 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKMergedDateTimeParser : IDateTimeParser + { + private readonly ICJKMergedParserConfiguration config; + + public BaseCJKMergedDateTimeParser(ICJKMergedParserConfiguration configuration) + { + config = configuration; + } + + public List FilterResults(string query, List candidateResults) + { + return candidateResults; + } + + public ParseResult Parse(ExtractResult er) + { + return Parse(er, DateObject.Now); + } + + public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) + { + var referenceTime = refTime; + DateTimeParseResult pr; + + // push, save teh MOD string + var hasInclusiveModifier = false; + bool hasBefore = false, hasAfter = false, hasUntil = false, hasSince = false, hasEqual = false, hasAround = false; + string modStr = string.Empty, modStrPrefix = string.Empty, modStrSuffix = string.Empty; + if (er.Metadata != null && er.Metadata.HasMod) + { + var beforeMatch = config.BeforeRegex.MatchEnd(er.Text, trim: true); + var afterMatch = config.AfterRegex.MatchEnd(er.Text, trim: true); + var untilMatch = config.UntilRegex.MatchBegin(er.Text, trim: true); + var sinceMatchPrefix = config.SincePrefixRegex.MatchBegin(er.Text, trim: true); + var sinceMatchSuffix = config.SinceSuffixRegex.MatchEnd(er.Text, trim: true); + var equalMatch = config.EqualRegex.MatchBegin(er.Text, trim: true); + var aroundMatchPrefix = config.AroundPrefixRegex.MatchBegin(er.Text, trim: true); + var aroundMatchSuffix = config.AroundSuffixRegex.MatchEnd(er.Text, trim: true); + + if (beforeMatch.Success && !MergedParserUtil.IsDurationWithAgoAndLater(er)) + { + hasBefore = true; + er.Length -= beforeMatch.Length; + er.Text = er.Text.Substring(0, er.Length ?? 0); + modStr = beforeMatch.Value; + + if (!string.IsNullOrEmpty(beforeMatch.Groups[Constants.IncludeGroupName].Value)) + { + hasInclusiveModifier = true; + } + } + else if (afterMatch.Success && !MergedParserUtil.IsDurationWithAgoAndLater(er) && !sinceMatchSuffix.Success) + { + hasAfter = true; + er.Length -= afterMatch.Length; + er.Text = er.Text.Substring(0, er.Length ?? 0); + modStr = afterMatch.Value; + + if (!string.IsNullOrEmpty(afterMatch.Groups[Constants.IncludeGroupName].Value)) + { + hasInclusiveModifier = true; + } + } + else if (untilMatch.Success) + { + hasUntil = true; + er.Start += untilMatch.Length; + er.Length -= untilMatch.Length; + er.Text = er.Text.Substring(untilMatch.Length); + modStr = untilMatch.Value; + } + else if (equalMatch.Success) + { + hasEqual = true; + er.Start += equalMatch.Length; + er.Length -= equalMatch.Length; + er.Text = er.Text.Substring(equalMatch.Length); + modStr = equalMatch.Value; + } + else + { + if (sinceMatchPrefix.Success) + { + hasSince = true; + er.Start += sinceMatchPrefix.Length; + er.Length -= sinceMatchPrefix.Length; + er.Text = er.Text.Substring(sinceMatchPrefix.Length); + modStrPrefix = sinceMatchPrefix.Value; + } + + if (sinceMatchSuffix.Success) + { + hasSince = true; + er.Length -= sinceMatchSuffix.Length; + er.Text = er.Text.Substring(0, er.Length ?? 0); + modStrSuffix = sinceMatchSuffix.Value; + } + + if (aroundMatchPrefix.Success) + { + hasAround = true; + er.Start += aroundMatchPrefix.Length; + er.Length -= aroundMatchPrefix.Length; + er.Text = er.Text.Substring(aroundMatchPrefix.Length); + modStrPrefix = aroundMatchPrefix.Value; + } + + if (aroundMatchSuffix.Success) + { + hasAround = true; + er.Length -= aroundMatchSuffix.Length; + er.Text = er.Text.Substring(0, er.Length ?? 0); + modStrSuffix = aroundMatchSuffix.Value; + } + } + } + + // Parse extracted datetime mention + pr = ParseResult(er, referenceTime); + if (pr == null) + { + return null; + } + + // pop, restore the MOD string + if (hasBefore) + { + pr.Length += modStr.Length; + pr.Text = pr.Text + modStr; + var val = (DateTimeResolutionResult)pr.Value; + + val.Mod = MergedParserUtil.CombineMod(val.Mod, !hasInclusiveModifier ? Constants.BEFORE_MOD : Constants.UNTIL_MOD); + + pr.Value = val; + } + + if (hasAfter) + { + pr.Length += modStr.Length; + pr.Text = pr.Text + modStr; + var val = (DateTimeResolutionResult)pr.Value; + + val.Mod = MergedParserUtil.CombineMod(val.Mod, !hasInclusiveModifier ? Constants.AFTER_MOD : Constants.SINCE_MOD); + + pr.Value = val; + } + + if (hasUntil) + { + pr.Length += modStr.Length; + pr.Start -= modStr.Length; + pr.Text = modStr + pr.Text; + var val = (DateTimeResolutionResult)pr.Value; + val.Mod = Constants.BEFORE_MOD; + pr.Value = val; + hasBefore = true; + } + + if (hasSince) + { + pr.Length += modStrPrefix.Length + modStrSuffix.Length; + pr.Start -= modStrPrefix.Length; + pr.Text = modStrPrefix + pr.Text + modStrSuffix; + var val = (DateTimeResolutionResult)pr.Value; + val.Mod = Constants.SINCE_MOD; + pr.Value = val; + } + + if (hasEqual) + { + pr.Length += modStr.Length; + pr.Start -= modStr.Length; + pr.Text = modStr + pr.Text; + } + + if (hasAround) + { + pr.Length += modStrPrefix.Length + modStrSuffix.Length; + pr.Start -= modStrPrefix.Length; + pr.Text = modStrPrefix + pr.Text + modStrSuffix; + var val = (DateTimeResolutionResult)pr.Value; + val.Mod = Constants.APPROX_MOD; + pr.Value = val; + } + + var hasRangeChangingMod = hasBefore || hasAfter || hasSince; + if (pr.Value != null) + { + ((DateTimeResolutionResult)pr.Value).HasRangeChangingMod = hasRangeChangingMod; + } + + pr = MergedParserUtil.SetParseResult(pr, hasRangeChangingMod, this.config); + + return pr; + } + + // @TODO move to MergedParserUtil (if possible) + private DateTimeParseResult ParseResult(ExtractResult extractResult, DateObject referenceTime) + { + DateTimeParseResult parseResult = null; + switch (extractResult.Type) + { + case Constants.SYS_DATETIME_DATE: + if (extractResult.Metadata != null && extractResult.Metadata.IsHoliday) + { + parseResult = config.HolidayParser.Parse(extractResult, referenceTime); + } + else + { + parseResult = this.config.DateParser.Parse(extractResult, referenceTime); + } + + break; + case Constants.SYS_DATETIME_TIME: + parseResult = this.config.TimeParser.Parse(extractResult, referenceTime); + + break; + case Constants.SYS_DATETIME_DATETIME: + parseResult = this.config.DateTimeParser.Parse(extractResult, referenceTime); + + break; + case Constants.SYS_DATETIME_DATEPERIOD: + parseResult = this.config.DatePeriodParser.Parse(extractResult, referenceTime); + + break; + case Constants.SYS_DATETIME_TIMEPERIOD: + parseResult = this.config.TimePeriodParser.Parse(extractResult, referenceTime); + + break; + case Constants.SYS_DATETIME_DATETIMEPERIOD: + parseResult = this.config.DateTimePeriodParser.Parse(extractResult, referenceTime); + + break; + case Constants.SYS_DATETIME_DURATION: + parseResult = this.config.DurationParser.Parse(extractResult, referenceTime); + + break; + case Constants.SYS_DATETIME_SET: + parseResult = this.config.SetParser.Parse(extractResult, referenceTime); + + break; + case Constants.SYS_DATETIME_DATETIMEALT: + parseResult = this.config.DateTimeAltParser.Parse(extractResult, referenceTime); + + break; + case Constants.SYS_DATETIME_TIMEZONE: + if ((config.Options & DateTimeOptions.EnablePreview) != 0) + { + parseResult = this.config.TimeZoneParser.Parse(extractResult, referenceTime); + } + + break; + default: + return null; + } + + return parseResult; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKSetParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKSetParser.cs new file mode 100644 index 0000000000..c1f18c37ae --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKSetParser.cs @@ -0,0 +1,178 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKSetParser : IDateTimeParser + { + public static readonly string ParserName = Constants.SYS_DATETIME_SET; + + private readonly ICJKSetParserConfiguration config; + + public BaseCJKSetParser(ICJKSetParserConfiguration configuration) + { + config = configuration; + } + + public ParseResult Parse(ExtractResult extResult) + { + return this.Parse(extResult, DateObject.Now); + } + + public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) + { + object value = null; + if (er.Type.Equals(ParserName, StringComparison.Ordinal)) + { + var innerResult = ParseEachUnit(er.Text); + if (!innerResult.Success) + { + innerResult = ParseEachDuration(er.Text, refDate); + } + + // NOTE: Please do not change the order of following function + // we must consider datetime before date + if (!innerResult.Success) + { + innerResult = ParseEach(config.DateTimeExtractor, config.DateTimeParser, er.Text, refDate); + } + + if (!innerResult.Success) + { + innerResult = ParseEach(config.DateExtractor, config.DateParser, er.Text, refDate); + } + + if (!innerResult.Success) + { + innerResult = ParseEach(config.TimePeriodExtractor, config.TimePeriodParser, er.Text, refDate); + } + + if (!innerResult.Success) + { + innerResult = ParseEach(config.TimeExtractor, config.TimeParser, er.Text, refDate); + } + + if (innerResult.Success) + { + innerResult.FutureResolution = new Dictionary + { + { TimeTypeConstants.SET, (string)innerResult.FutureValue }, + }; + + innerResult.PastResolution = new Dictionary + { + { TimeTypeConstants.SET, (string)innerResult.PastValue }, + }; + + value = innerResult; + } + } + + var ret = new DateTimeParseResult + { + Text = er.Text, + Start = er.Start, + Length = er.Length, + Type = er.Type, + Data = er.Data, + Value = value, + TimexStr = value == null ? string.Empty : ((DateTimeResolutionResult)value).Timex, + ResolutionStr = string.Empty, + }; + return ret; + } + + public List FilterResults(string query, List candidateResults) + { + return candidateResults; + } + + private DateTimeResolutionResult ParseEachDuration(string text, DateObject refDate) + { + var ret = new DateTimeResolutionResult(); + + var ers = this.config.DurationExtractor.Extract(text, refDate); + + if (ers.Count != 1 || string.IsNullOrWhiteSpace(text.Substring(ers[0].Start + ers[0].Length ?? 0))) + { + return ret; + } + + var afterStr = text.Substring(ers[0].Start + ers[0].Length ?? 0); + if (this.config.EachPrefixRegex.IsMatch(afterStr)) + { + var pr = this.config.DurationParser.Parse(ers[0], DateObject.Now); + ret = SetHandler.ResolveSet(ref ret, pr.TimexStr); + return ret; + } + + return ret; + } + + private DateTimeResolutionResult ParseEachUnit(string text) + { + var ret = new DateTimeResolutionResult(); + + // handle "each month" + var match = this.config.EachUnitRegex.MatchExact(text, trim: true); + + if (match.Success) + { + var sourceUnit = match.Groups["unit"].Value; + if (!string.IsNullOrEmpty(sourceUnit) && this.config.UnitMap.ContainsKey(sourceUnit)) + { + + if (this.config.GetMatchedUnitTimex(sourceUnit, out string timexStr)) + { + ret = SetHandler.ResolveSet(ref ret, timexStr); + } + } + } + + return ret; + } + + private DateTimeResolutionResult ParseEach(IDateTimeExtractor extractor, IDateTimeParser parser, string text, DateObject refDate) + { + var ret = new DateTimeResolutionResult(); + var ers = extractor.Extract(text, refDate); + var success = false; + foreach (var er in ers) + { + var beforeStr = text.Substring(0, er.Start ?? 0); + var match = this.config.EachPrefixRegex.Match(beforeStr); + + if (match.Success && match.Length + er.Length == text.Length) + { + success = true; + } + else if (er.Type == Constants.SYS_DATETIME_TIME || er.Type == Constants.SYS_DATETIME_DATE) + { + // Cases like "every day at 2pm" or "every year on April 15th" + var eachRegex = er.Type == Constants.SYS_DATETIME_TIME ? this.config.EachDayRegex : this.config.EachDateUnitRegex; + match = eachRegex.Match(beforeStr); + if (match.Success && match.Length + er.Length == text.Length) + { + success = true; + } + } + + if (success) + { + var pr = parser.Parse(er, refDate); + ret = SetHandler.ResolveSet(ref ret, pr.TimexStr); + break; + } + } + + return ret; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKTimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKTimeParser.cs new file mode 100644 index 0000000000..6062b2c367 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKTimeParser.cs @@ -0,0 +1,81 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKTimeParser : IDateTimeParser + { + public static readonly string ParserName = Constants.SYS_DATETIME_TIME; // "Time"; + + private readonly ICJKTimeParserConfiguration config; + + public BaseCJKTimeParser(ICJKTimeParserConfiguration configuration) + { + config = configuration; + } + + public ParseResult Parse(ExtractResult extResult) + { + return this.Parse(extResult, DateObject.Now); + } + + public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) + { + var referenceTime = refDate; + var extra = er.Data as DateTimeExtra; + if (extra == null) + { + var result = this.config.TimeExtractor.Extract(er.Text, refDate); + extra = result[0]?.Data as DateTimeExtra; + } + + if (extra != null) + { + var timeResult = this.config.FunctionMap[extra.Type](extra); + var parseResult = this.config.TimeFunc.PackTimeResult(extra, timeResult, referenceTime); + if (parseResult.Success) + { + parseResult.FutureResolution = new Dictionary + { + { TimeTypeConstants.TIME, DateTimeFormatUtil.FormatTime((DateObject)parseResult.FutureValue) }, + }; + + parseResult.PastResolution = new Dictionary + { + { TimeTypeConstants.TIME, DateTimeFormatUtil.FormatTime((DateObject)parseResult.PastValue) }, + }; + } + + var ret = new DateTimeParseResult + { + Start = er.Start, + Text = er.Text, + Type = er.Type, + Length = er.Length, + Value = parseResult, + Data = timeResult, + ResolutionStr = string.Empty, + TimexStr = parseResult.Timex, + }; + + return ret; + } + + return null; + } + + public List FilterResults(string query, List candidateResults) + { + return candidateResults; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKTimePeriodParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKTimePeriodParser.cs new file mode 100644 index 0000000000..e9e96f5350 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKTimePeriodParser.cs @@ -0,0 +1,133 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Globalization; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public class BaseCJKTimePeriodParser : IDateTimeParser + { + public static readonly string ParserName = Constants.SYS_DATETIME_TIMEPERIOD; // "TimePeriod"; + + private readonly ICJKTimePeriodParserConfiguration config; + + public BaseCJKTimePeriodParser(ICJKTimePeriodParserConfiguration configuration) + { + config = configuration; + } + + public ParseResult Parse(ExtractResult extResult) + { + return this.Parse(extResult, DateObject.Now); + } + + public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) + { + var referenceTime = refDate; + var extra = er.Data as DateTimeExtra; + if (extra == null) + { + var result = this.config.TimeExtractor.Extract(er.Text, refDate); + extra = result[0]?.Data as DateTimeExtra; + } + + if (extra != null) + { + // Handle special case like '上午' (morning), '下午' (afternoon) + var parseResult = ParseTimeOfDay(er.Text, referenceTime); + + if (!parseResult.Success) + { + parseResult = TimePeriodFunctions.Handle(this.config.TimeParser, extra, referenceTime, this.config.TimeFunc); + } + + if (parseResult.Success) + { + parseResult.FutureResolution = new Dictionary + { + { + TimeTypeConstants.START_TIME, + DateTimeFormatUtil.FormatTime(((Tuple)parseResult.FutureValue).Item1) + }, + { + TimeTypeConstants.END_TIME, + DateTimeFormatUtil.FormatTime(((Tuple)parseResult.FutureValue).Item2) + }, + }; + + parseResult.PastResolution = new Dictionary + { + { + TimeTypeConstants.START_TIME, + DateTimeFormatUtil.FormatTime(((Tuple)parseResult.PastValue).Item1) + }, + { + TimeTypeConstants.END_TIME, + DateTimeFormatUtil.FormatTime(((Tuple)parseResult.PastValue).Item2) + }, + }; + } + + var ret = new DateTimeParseResult + { + Start = er.Start, + Text = er.Text, + Type = er.Type, + Length = er.Length, + Value = parseResult, + ResolutionStr = string.Empty, + TimexStr = parseResult.Timex, + }; + + return ret; + } + + return null; + } + + public List FilterResults(string query, List candidateResults) + { + return candidateResults; + } + + private DateTimeResolutionResult ParseTimeOfDay(string text, DateObject referenceTime) + { + int day = referenceTime.Day, + month = referenceTime.Month, + year = referenceTime.Year; + + var ret = new DateTimeResolutionResult(); + + if (!this.config.GetMatchedTimexRange(text, out string timex, out int beginHour, out int endHour, out int endMinSeg)) + { + return new DateTimeResolutionResult(); + } + + // Add "early"/"late" Mod + if (endHour == beginHour + Constants.HalfMidDayDurationHourCount && (beginHour == Constants.MorningBeginHour || beginHour == Constants.AfternoonBeginHour)) + { + ret.Comment = Constants.Comment_Early; + ret.Mod = Constants.EARLY_MOD; + } + else if (beginHour == endHour - Constants.HalfMidDayDurationHourCount && (endHour == Constants.MorningEndHour || endHour == Constants.AfternoonEndHour)) + { + ret.Comment = Constants.Comment_Late; + ret.Mod = Constants.LATE_MOD; + } + + ret.Timex = timex; + ret.FutureValue = ret.PastValue = new Tuple( + DateObject.MinValue.SafeCreateFromValue(year, month, day, beginHour, 0, 0), + DateObject.MinValue.SafeCreateFromValue(year, month, day, endHour, endMinSeg, endMinSeg)); + ret.Success = true; + + return ret; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKCommonDateTimeParserConfiguration.cs new file mode 100644 index 0000000000..1d2c83cb12 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKCommonDateTimeParserConfiguration.cs @@ -0,0 +1,87 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; + +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKCommonDateTimeParserConfiguration : IDateTimeOptionsConfiguration + { + IExtractor CardinalExtractor { get; } + + IExtractor IntegerExtractor { get; } + + IExtractor OrdinalExtractor { get; } + + IParser NumberParser { get; } + + IDateTimeExtractor DateExtractor { get; } + + IDateTimeExtractor TimeExtractor { get; } + + IDateTimeExtractor DateTimeExtractor { get; } + + IDateTimeExtractor DurationExtractor { get; } + + IDateTimeExtractor DatePeriodExtractor { get; } + + IDateTimeExtractor TimePeriodExtractor { get; } + + IDateTimeExtractor DateTimePeriodExtractor { get; } + + IDateTimeExtractor SetExtractor { get; } + + IDateTimeExtractor HolidayExtractor { get; } + + IDateTimeParser DateParser { get; } + + IDateTimeParser TimeParser { get; } + + IDateTimeParser DateTimeParser { get; } + + IDateTimeParser DurationParser { get; } + + IDateTimeParser DatePeriodParser { get; } + + IDateTimeParser TimePeriodParser { get; } + + IDateTimeParser DateTimePeriodParser { get; } + + IDateTimeParser SetParser { get; } + + IDateTimeParser HolidayParser { get; } + + IDateTimeParser DateTimeAltParser { get; } + + IDateTimeParser TimeZoneParser { get; } + + IImmutableDictionary MonthOfYear { get; } + + IImmutableDictionary Numbers { get; } + + IImmutableDictionary UnitValueMap { get; } + + IImmutableDictionary SeasonMap { get; } + + IImmutableDictionary SpecialYearPrefixesMap { get; } + + IImmutableDictionary UnitMap { get; } + + IImmutableDictionary CardinalMap { get; } + + IImmutableDictionary DayOfMonth { get; } + + IImmutableDictionary DayOfWeek { get; } + + IImmutableDictionary DoubleNumbers { get; } + + IImmutableDictionary WrittenDecades { get; } + + IImmutableDictionary SpecialDecadeCases { get; } + + IDateTimeUtilityConfiguration UtilityConfiguration { get; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKDateParserConfiguration.cs new file mode 100644 index 0000000000..6013694a5d --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKDateParserConfiguration.cs @@ -0,0 +1,83 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKDateParserConfiguration : IDateTimeOptionsConfiguration + { + IExtractor IntegerExtractor { get; } + + IExtractor OrdinalExtractor { get; } + + IParser NumberParser { get; } + + IDateTimeExtractor DurationExtractor { get; } + + IDateTimeParser DurationParser { get; } + + IDateTimeExtractor DateExtractor { get; } + + IEnumerable DateRegexList { get; } + + Regex SpecialDate { get; } + + Regex NextRe { get; } + + Regex LastRe { get; } + + Regex SpecialDayRegex { get; } + + Regex StrictWeekDayRegex { get; } + + Regex SpecialDayWithNumRegex { get; } + + Regex LunarRegex { get; } + + Regex UnitRegex { get; } + + Regex BeforeRegex { get; } + + Regex AfterRegex { get; } + + Regex DynastyYearRegex { get; } + + ImmutableDictionary DynastyYearMap { get; } + + string DynastyStartYear { get; } + + Regex NextRegex { get; } + + Regex ThisRegex { get; } + + Regex LastRegex { get; } + + Regex WeekDayOfMonthRegex { get; } + + Regex WeekDayAndDayRegex { get; } + + Regex DurationRelativeDurationUnitRegex { get; } + + IImmutableDictionary UnitMap { get; } + + IImmutableDictionary DayOfMonth { get; } + + IImmutableDictionary DayOfWeek { get; } + + IImmutableDictionary MonthOfYear { get; } + + IImmutableDictionary CardinalMap { get; } + + Regex LastWeekDayRegex { get; } + + Regex NextMonthRegex { get; } + + Regex LastMonthRegex { get; } + + int GetSwiftDay(string text); + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKDatePeriodParserConfiguration.cs new file mode 100644 index 0000000000..58831bfb67 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKDatePeriodParserConfiguration.cs @@ -0,0 +1,163 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKDatePeriodParserConfiguration : IDateTimeOptionsConfiguration + { + IExtractor IntegerExtractor { get; } + + IParser NumberParser { get; } + + IDateTimeExtractor DateExtractor { get; } + + IDateTimeExtractor DurationExtractor { get; } + + IExtractor CardinalExtractor { get; } + + IDateTimeParser DurationParser { get; } + + IDateTimeParser DateParser { get; } + + ImmutableDictionary DynastyYearMap { get; } + + IImmutableDictionary UnitMap { get; } + + IImmutableDictionary CardinalMap { get; } + + IImmutableDictionary DayOfMonth { get; } + + IImmutableDictionary MonthOfYear { get; } + + IImmutableDictionary SeasonMap { get; } + + string DynastyStartYear { get; } + + string TokenBeforeDate { get; } + + int TwoNumYear { get; } + + Regex SimpleCasesRegex { get; } + + Regex DynastyYearRegex { get; } + + Regex YearRegex { get; } + + Regex RelativeRegex { get; } + + Regex RelativeMonthRegex { get; } + + Regex DurationRelativeDurationUnitRegex { get; } + + Regex ThisRegex { get; } + + Regex LastRegex { get; } + + Regex NextRegex { get; } + + Regex YearToYear { get; } + + Regex YearToYearSuffixRequired { get; } + + Regex YearInCJKRegex { get; } + + Regex MonthToMonth { get; } + + Regex MonthToMonthSuffixRequired { get; } + + Regex MonthRegex { get; } + + Regex YearAndMonth { get; } + + Regex PureNumYearAndMonth { get; } + + Regex OneWordPeriodRegex { get; } + + Regex NumberCombinedWithUnit { get; } + + Regex PastRegex { get; } + + Regex FutureRegex { get; } + + Regex WeekWithWeekDayRangeRegex { get; } + + Regex UnitRegex { get; } + + Regex DurationUnitRegex { get; } + + Regex WeekOfMonthRegex { get; } + + Regex WeekOfYearRegex { get; } + + Regex WeekOfDateRegex { get; } + + Regex MonthOfDateRegex { get; } + + Regex WhichWeekRegex { get; } + + Regex FirstLastOfYearRegex { get; } + + Regex SeasonWithYear { get; } + + Regex QuarterRegex { get; } + + Regex DecadeRegex { get; } + + Regex CenturyRegex { get; } + + Regex DayToDay { get; } + + Regex MonthDayRange { get; } + + Regex DayRegexForPeriod { get; } + + Regex SimpleYearAndMonth { get; } + + Regex SpecialMonthRegex { get; } + + Regex SpecialYearRegex { get; } + + public Regex LaterEarlyPeriodRegex { get; } + + public Regex DatePointWithAgoAndLater { get; } + + public Regex ReferenceDatePeriodRegex { get; } + + Regex WoMLastRegex { get; } + + Regex WoMPreviousRegex { get; } + + Regex WoMNextRegex { get; } + + Regex ComplexDatePeriodRegex { get; } + + int ToMonthNumber(string monthStr); + + bool IsMonthOnly(string text); + + bool IsWeekend(string text); + + bool IsWeekOnly(string text); + + bool IsYearOnly(string text); + + bool IsThisYear(string text); + + bool IsYearToDate(string text); + + bool IsLastYear(string text); + + bool IsNextYear(string text); + + bool IsYearAfterNext(string text); + + bool IsYearBeforeLast(string text); + + int GetSwiftMonth(string text); + + int GetSwiftYear(string text); + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKDateTimeParserConfiguration.cs new file mode 100644 index 0000000000..7464637fb4 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKDateTimeParserConfiguration.cs @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKDateTimeParserConfiguration : IDateTimeOptionsConfiguration + { + IDateTimeExtractor DateExtractor { get; } + + IDateTimeExtractor TimeExtractor { get; } + + IDateTimeParser DateParser { get; } + + IDateTimeParser DurationParser { get; } + + IDateTimeParser TimeParser { get; } + + IExtractor IntegerExtractor { get; } + + IParser NumberParser { get; } + + IDateTimeExtractor DurationExtractor { get; } + + Regex NowRegex { get; } + + Regex LunarRegex { get; } + + Regex LunarHolidayRegex { get; } + + Regex SimplePmRegex { get; } + + Regex SimpleAmRegex { get; } + + Regex TimeOfSpecialDayRegex { get; } + + Regex DateTimePeriodUnitRegex { get; } + + Regex DurationRelativeDurationUnitRegex { get; } + + Regex AgoLaterRegex { get; } + + Regex BeforeRegex { get; } + + Regex AfterRegex { get; } + + ImmutableDictionary UnitMap { get; } + + bool GetMatchedNowTimex(string text, out string timex); + + int GetSwiftDay(string text); + + void AdjustByTimeOfDay(string matchStr, ref int hour, ref int swift); + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKDateTimePeriodParserConfiguration.cs new file mode 100644 index 0000000000..ca78cca743 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKDateTimePeriodParserConfiguration.cs @@ -0,0 +1,63 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKDateTimePeriodParserConfiguration : IDateTimeOptionsConfiguration + { + IDateTimeExtractor DateExtractor { get; } + + IDateTimeExtractor TimeExtractor { get; } + + IDateTimeExtractor DateTimeExtractor { get; } + + IDateTimeExtractor TimePeriodExtractor { get; } + + IDateTimeExtractor DurationExtractor { get; } + + IDateTimeParser DurationParser { get; } + + IExtractor CardinalExtractor { get; } + + IParser CardinalParser { get; } + + IDateTimeParser DateParser { get; } + + IDateTimeParser TimeParser { get; } + + IDateTimeParser DateTimeParser { get; } + + IDateTimeParser TimePeriodParser { get; } + + Regex SpecificTimeOfDayRegex { get; } + + Regex TimeOfDayRegex { get; } + + Regex NextRegex { get; } + + Regex LastRegex { get; } + + Regex PastRegex { get; } + + Regex FutureRegex { get; } + + Regex WeekDayRegex { get; } + + Regex TimePeriodLeftRegex { get; } + + Regex UnitRegex { get; } + + Regex RestOfDateRegex { get; } + + Regex AmPmDescRegex { get; } + + IImmutableDictionary UnitMap { get; } + + bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMinute); + + bool GetMatchedTimeRangeAndSwift(string text, out string todSymbol, out int beginHour, out int endHour, out int endMinute, out int swift); + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKDurationParserConfiguration.cs new file mode 100644 index 0000000000..ec600d0ee7 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKDurationParserConfiguration.cs @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKDurationParserConfiguration : IDateTimeOptionsConfiguration + { + IDateTimeExtractor DurationExtractor { get; } + + IParser InternalParser { get; } + + Regex YearRegex { get; } + + Regex SomeRegex { get; } + + Regex MoreOrLessRegex { get; } + + Regex DurationUnitRegex { get; } + + Regex AnUnitRegex { get; } + + Regex DurationConnectorRegex { get; } + + IImmutableDictionary UnitMap { get; } + + IImmutableDictionary UnitValueMap { get; } + + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKHolidayParserConfiguration.cs new file mode 100644 index 0000000000..e2218d7ed5 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKHolidayParserConfiguration.cs @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKHolidayParserConfiguration : IDateTimeOptionsConfiguration + { + IExtractor IntegerExtractor { get; } + + IParser NumberParser { get; } + + Dictionary> FixedHolidaysDict { get; } + + Dictionary> HolidayFuncDict { get; } + + Dictionary NoFixedTimex { get; } + + IEnumerable HolidayRegexList { get; } + + Regex LunarHolidayRegex { get; } + + int GetSwiftYear(string text); + + string SanitizeYearToken(string holiday); + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKMergedParserConfiguration.cs new file mode 100644 index 0000000000..b8f3038c57 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKMergedParserConfiguration.cs @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Text.Matcher; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKMergedParserConfiguration : ICJKCommonDateTimeParserConfiguration + { + Regex BeforeRegex { get; } + + Regex AfterRegex { get; } + + Regex SincePrefixRegex { get; } + + Regex SinceSuffixRegex { get; } + + Regex UntilRegex { get; } + + Regex EqualRegex { get; } + + Regex AroundPrefixRegex { get; } + + Regex AroundSuffixRegex { get; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKSetParserConfiguration.cs new file mode 100644 index 0000000000..d030e3dee4 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKSetParserConfiguration.cs @@ -0,0 +1,43 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKSetParserConfiguration : IDateTimeOptionsConfiguration + { + IDateTimeExtractor DurationExtractor { get; } + + IDateTimeParser DurationParser { get; } + + IDateTimeExtractor TimeExtractor { get; } + + IDateTimeParser TimeParser { get; } + + IDateTimeExtractor TimePeriodExtractor { get; } + + IDateTimeParser TimePeriodParser { get; } + + IDateTimeExtractor DateExtractor { get; } + + IDateTimeParser DateParser { get; } + + IDateTimeExtractor DateTimeExtractor { get; } + + IDateTimeParser DateTimeParser { get; } + + IImmutableDictionary UnitMap { get; } + + Regex EachPrefixRegex { get; } + + Regex EachUnitRegex { get; } + + Regex EachDayRegex { get; } + + Regex EachDateUnitRegex { get; } + + bool GetMatchedUnitTimex(string text, out string timex); + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKTimeParserConfiguration.cs new file mode 100644 index 0000000000..8f4825e0c2 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKTimeParserConfiguration.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public delegate TimeResult TimeFunction(DateTimeExtra extra); + + public interface ICJKTimeParserConfiguration : IDateTimeOptionsConfiguration + { + IDateTimeExtractor TimeExtractor { get; } + + TimeFunctions TimeFunc { get; } + + Dictionary FunctionMap { get; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKTimePeriodParserConfiguration.cs new file mode 100644 index 0000000000..dac8e094a1 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/ICJKTimePeriodParserConfiguration.cs @@ -0,0 +1,20 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ICJKTimePeriodParserConfiguration : IDateTimeOptionsConfiguration + { + IDateTimeExtractor TimeExtractor { get; } + + IDateTimeParser TimeParser { get; } + + TimeFunctions TimeFunc { get; } + + bool GetMatchedTimexRange(string text, out string timex, out int beginHour, out int endHour, out int endMin); + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/DummyTimeZoneParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/DummyTimeZoneParser.cs index 9e51de48b3..b5380081e5 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/DummyTimeZoneParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/DummyTimeZoneParser.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using DateObject = System.DateTime; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/FullDateTimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/FullDateTimeParser.cs deleted file mode 100644 index 19e5d76df7..0000000000 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/FullDateTimeParser.cs +++ /dev/null @@ -1,570 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using Microsoft.Recognizers.Text.Utilities; -using DateObject = System.DateTime; - -namespace Microsoft.Recognizers.Text.DateTime -{ - public class FullDateTimeParser : IDateTimeParser - { - public const string ParserTypeName = "datetimeV2"; - - private readonly IFullDateTimeParserConfiguration config; - - public FullDateTimeParser(IFullDateTimeParserConfiguration configuration) - { - config = configuration; - } - - public static void AddSingleDateTimeToResolution(Dictionary resolutionDic, string type, string mod, Dictionary res) - { - if (resolutionDic.ContainsKey(type)) - { - if (!string.IsNullOrEmpty(mod)) - { - if (mod.Equals(Constants.BEFORE_MOD, StringComparison.Ordinal)) - { - res.Add(DateTimeResolutionKey.End, resolutionDic[type]); - return; - } - - if (mod.Equals(Constants.AFTER_MOD, StringComparison.Ordinal)) - { - res.Add(DateTimeResolutionKey.Start, resolutionDic[type]); - return; - } - } - - res.Add(ResolutionKey.Value, resolutionDic[type]); - } - } - - public static void AddPeriodToResolution(Dictionary resolutionDic, string startType, string endType, string mod, Dictionary res) - { - var start = string.Empty; - var end = string.Empty; - - if (resolutionDic.ContainsKey(startType)) - { - start = resolutionDic[startType]; - } - - if (resolutionDic.ContainsKey(endType)) - { - end = resolutionDic[endType]; - } - - if (!string.IsNullOrEmpty(mod)) - { - // For before mode, the start of the period should be the end the new period, no start - if (mod.Equals(Constants.BEFORE_MOD, StringComparison.Ordinal)) - { - res.Add(DateTimeResolutionKey.End, start); - return; - } - - // For after mode, the end of the period should be the start the new period, no end - if (mod.Equals(Constants.AFTER_MOD, StringComparison.Ordinal)) - { - res.Add(DateTimeResolutionKey.Start, end); - return; - } - - // For since mode, the start of the period should be the start the new period, no end - if (mod.Equals(Constants.SINCE_MOD, StringComparison.Ordinal)) - { - res.Add(DateTimeResolutionKey.Start, start); - return; - } - - // For until mode, the end of the period should be the end the new period, no start - if (mod.Equals(Constants.UNTIL_MOD, StringComparison.Ordinal)) - { - res.Add(DateTimeResolutionKey.End, start); - return; - } - } - - if (!string.IsNullOrEmpty(start) && !string.IsNullOrEmpty(end)) - { - res.Add(DateTimeResolutionKey.Start, start); - res.Add(DateTimeResolutionKey.End, end); - } - } - - public static string DetermineDateTimeType(string type, bool hasRangeChangingMod) - { - if (hasRangeChangingMod) - { - if (type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal)) - { - return Constants.SYS_DATETIME_DATEPERIOD; - } - - if (type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) - { - return Constants.SYS_DATETIME_TIMEPERIOD; - } - - if (type.Equals(Constants.SYS_DATETIME_DATETIME, StringComparison.Ordinal)) - { - return Constants.SYS_DATETIME_DATETIMEPERIOD; - } - } - - return type; - } - - public ParseResult Parse(ExtractResult extResult) - { - return Parse(extResult, DateObject.Now); - } - - public DateTimeParseResult Parse(ExtractResult er, DateObject referenceTime) - { - DateTimeParseResult pr = null; - - // push, save teh MOD string - bool hasBefore = false, hasAfter = false, hasUntil = false, hasSince = false, hasEqual = false; - string modStr = string.Empty, modStrPrefix = string.Empty, modStrSuffix = string.Empty; - var beforeMatch = config.BeforeRegex.MatchEnd(er.Text, trim: true); - var afterMatch = config.AfterRegex.MatchEnd(er.Text, trim: true); - var untilMatch = config.UntilRegex.MatchBegin(er.Text, trim: true); - var sinceMatchPrefix = config.SincePrefixRegex.MatchBegin(er.Text, trim: true); - var sinceMatchSuffix = config.SinceSuffixRegex.MatchEnd(er.Text, trim: true); - var equalMatch = config.EqualRegex.MatchBegin(er.Text, trim: true); - - if (beforeMatch.Success && !IsDurationWithBeforeAndAfter(er)) - { - hasBefore = true; - er.Length -= beforeMatch.Length; - er.Text = er.Text.Substring(0, er.Length ?? 0); - modStr = beforeMatch.Value; - } - else if (afterMatch.Success && !IsDurationWithBeforeAndAfter(er)) - { - hasAfter = true; - er.Length -= afterMatch.Length; - er.Text = er.Text.Substring(0, er.Length ?? 0); - modStr = afterMatch.Value; - } - else if (untilMatch.Success) - { - hasUntil = true; - er.Start += untilMatch.Length; - er.Length -= untilMatch.Length; - er.Text = er.Text.Substring(untilMatch.Length); - modStr = untilMatch.Value; - } - else if (equalMatch.Success) - { - hasEqual = true; - er.Start += equalMatch.Length; - er.Length -= equalMatch.Length; - er.Text = er.Text.Substring(equalMatch.Length); - modStr = equalMatch.Value; - } - else - { - if (sinceMatchPrefix.Success) - { - hasSince = true; - er.Start += sinceMatchPrefix.Length; - er.Length -= sinceMatchPrefix.Length; - er.Text = er.Text.Substring(sinceMatchPrefix.Length); - modStrPrefix = sinceMatchPrefix.Value; - } - - if (sinceMatchSuffix.Success) - { - hasSince = true; - er.Length -= sinceMatchSuffix.Length; - er.Text = er.Text.Substring(0, er.Length ?? 0); - modStrSuffix = sinceMatchSuffix.Value; - } - } - - if (er.Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal)) - { - pr = config.DateParser.Parse(er, referenceTime); - if (pr.Value == null) - { - pr = config.HolidayParser.Parse(er, referenceTime); - } - } - else if (er.Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) - { - pr = config.TimeParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_DATETIME, StringComparison.Ordinal)) - { - pr = config.DateTimeParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal)) - { - pr = config.DatePeriodParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_TIMEPERIOD, StringComparison.Ordinal)) - { - pr = config.TimePeriodParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_DATETIMEPERIOD, StringComparison.Ordinal)) - { - pr = config.DateTimePeriodParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_DURATION, StringComparison.Ordinal)) - { - pr = config.DurationParser.Parse(er, referenceTime); - } - else if (er.Type.Equals(Constants.SYS_DATETIME_SET, StringComparison.Ordinal)) - { - pr = config.GetParser.Parse(er, referenceTime); - } - else - { - return null; - } - - // pop, restore the MOD string - if (hasBefore) - { - pr.Length += modStr.Length; - pr.Text = pr.Text + modStr; - var val = (DateTimeResolutionResult)pr.Value; - val.Mod = Constants.BEFORE_MOD; - pr.Value = val; - } - - if (hasAfter) - { - pr.Length += modStr.Length; - pr.Text = pr.Text + modStr; - var val = (DateTimeResolutionResult)pr.Value; - val.Mod = Constants.AFTER_MOD; - pr.Value = val; - } - - if (hasUntil) - { - pr.Length += modStr.Length; - pr.Start -= modStr.Length; - pr.Text = modStr + pr.Text; - var val = (DateTimeResolutionResult)pr.Value; - val.Mod = Constants.BEFORE_MOD; - pr.Value = val; - hasBefore = true; - } - - if (hasSince) - { - pr.Length += modStrPrefix.Length + modStrSuffix.Length; - pr.Start -= modStrPrefix.Length; - pr.Text = modStrPrefix + pr.Text + modStrSuffix; - var val = (DateTimeResolutionResult)pr.Value; - val.Mod = Constants.SINCE_MOD; - pr.Value = val; - } - - if (hasEqual) - { - pr.Length += modStr.Length; - pr.Start -= modStr.Length; - pr.Text = modStr + pr.Text; - } - - var hasRangeChangingMod = hasBefore || hasAfter || hasSince; - if (pr.Value != null) - { - ((DateTimeResolutionResult)pr.Value).HasRangeChangingMod = hasRangeChangingMod; - } - - pr.Value = DateTimeResolution(pr, hasRangeChangingMod); - - // change the type at last for the after or before mode - pr.Type = $"{ParserTypeName}.{DetermineDateTimeType(er.Type, hasRangeChangingMod)}"; - - return pr; - } - - public SortedDictionary DateTimeResolution(DateTimeParseResult slot, bool hasRangeChangingMod) - { - var resolutions = new List>(); - var res = new Dictionary(); - - var val = (DateTimeResolutionResult)slot.Value; - if (val == null) - { - return null; - } - - var type = slot.Type; - var typeOutput = DetermineDateTimeType(slot.Type, hasRangeChangingMod); - var sourceEntity = DetermineSourceEntityType(slot.Type, typeOutput, val.HasRangeChangingMod); - var timex = slot.TimexStr; - - var isLunar = val.IsLunar; - var mod = val.Mod; - var comment = val.Comment; - - if (!string.IsNullOrEmpty(timex)) - { - res.Add(DateTimeResolutionKey.Timex, timex); - } - - if (!string.IsNullOrEmpty(comment)) - { - res.Add(Constants.Comment, comment); - } - - if (!string.IsNullOrEmpty(mod)) - { - res.Add(DateTimeResolutionKey.Mod, mod); - } - - if (!string.IsNullOrEmpty(type)) - { - res.Add(ResolutionKey.Type, typeOutput); - } - - var pastResolutionStr = ((DateTimeResolutionResult)slot.Value).PastResolution; - var futureResolutionStr = ((DateTimeResolutionResult)slot.Value).FutureResolution; - - var resolutionPast = GenerateResolution(type, pastResolutionStr, mod); - var resolutionFuture = GenerateResolution(type, futureResolutionStr, mod); - - // if past and future are same, keep only one - if (resolutionFuture.OrderBy(t => t.Key).Select(t => t.Value).SequenceEqual(resolutionPast.OrderBy(t => t.Key).Select(t => t.Value))) - { - if (resolutionPast.Count > 0) - { - res.Add(Constants.Resolve, resolutionPast); - } - } - else - { - if (resolutionPast.Count > 0) - { - res.Add(Constants.ResolveToPast, resolutionPast); - } - - if (resolutionFuture.Count > 0) - { - res.Add(Constants.ResolveToFuture, resolutionFuture); - } - } - - // if ampm, double our resolution accordingly - if (!string.IsNullOrEmpty(comment) && comment.Equals(Constants.Comment_AmPm, StringComparison.Ordinal)) - { - if (res.ContainsKey(Constants.Resolve)) - { - ResolveAmpm(res, Constants.Resolve); - } - else - { - ResolveAmpm(res, Constants.ResolveToPast); - ResolveAmpm(res, Constants.ResolveToFuture); - } - } - - if (isLunar) - { - res.Add(DateTimeResolutionKey.IsLunar, isLunar); - } - - foreach (var p in res) - { - if (p.Value is Dictionary dictionary) - { - var value = new Dictionary(); - - if (!string.IsNullOrEmpty(timex)) - { - value.Add(DateTimeResolutionKey.Timex, timex); - } - - if (!string.IsNullOrEmpty(mod)) - { - value.Add(DateTimeResolutionKey.Mod, mod); - } - - if (!string.IsNullOrEmpty(type)) - { - value.Add(ResolutionKey.Type, typeOutput); - } - - if (!string.IsNullOrEmpty(sourceEntity)) - { - value.Add(DateTimeResolutionKey.SourceEntity, sourceEntity); - } - - foreach (var q in dictionary) - { - if (value.ContainsKey(q.Key)) - { - value[q.Key] = q.Value; - } - else - { - value.Add(q.Key, q.Value); - } - } - - resolutions.Add(value); - } - } - - if (resolutionPast.Count == 0 && resolutionFuture.Count == 0) - { - var notResolved = new Dictionary - { - { - DateTimeResolutionKey.Timex, timex - }, - { - ResolutionKey.Type, typeOutput - }, - { - ResolutionKey.Value, "not resolved" - }, - }; - - resolutions.Add(notResolved); - } - - return new SortedDictionary { { ResolutionKey.ValueSet, resolutions } }; - } - - public List FilterResults(string query, List candidateResults) - { - return candidateResults; - } - - public string DetermineSourceEntityType(string sourceType, string newType, bool hasMod) - { - if (!hasMod) - { - return null; - } - - if (!newType.Equals(sourceType, StringComparison.Ordinal)) - { - return Constants.SYS_DATETIME_DATETIMEPOINT; - } - - if (newType.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal)) - { - return Constants.SYS_DATETIME_DATETIMEPERIOD; - } - - return null; - } - - internal static void ResolveAmpm(Dictionary resolutionDic, string keyName) - { - if (resolutionDic.ContainsKey(keyName)) - { - var resolution = (Dictionary)resolutionDic[keyName]; - if (!resolutionDic.ContainsKey(DateTimeResolutionKey.Timex)) - { - return; - } - - var timex = (string)resolutionDic[DateTimeResolutionKey.Timex]; - resolutionDic.Remove(keyName); - - resolutionDic.Add(keyName + "Am", resolution); - - var resolutionPm = new Dictionary(); - switch ((string)resolutionDic[ResolutionKey.Type]) - { - case Constants.SYS_DATETIME_TIME: - resolutionPm[ResolutionKey.Value] = DateTimeFormatUtil.ToPm(resolution[ResolutionKey.Value]); - resolutionPm[DateTimeResolutionKey.Timex] = DateTimeFormatUtil.ToPm(timex); - break; - case Constants.SYS_DATETIME_DATETIME: - var splited = resolution[ResolutionKey.Value].Split(' '); - resolutionPm[ResolutionKey.Value] = splited[0] + " " + DateTimeFormatUtil.ToPm(splited[1]); - resolutionPm[DateTimeResolutionKey.Timex] = DateTimeFormatUtil.AllStringToPm(timex); - break; - case Constants.SYS_DATETIME_TIMEPERIOD: - if (resolution.ContainsKey(DateTimeResolutionKey.Start)) - { - resolutionPm[DateTimeResolutionKey.Start] = DateTimeFormatUtil.ToPm(resolution[DateTimeResolutionKey.Start]); - } - - if (resolution.ContainsKey(DateTimeResolutionKey.End)) - { - resolutionPm[DateTimeResolutionKey.End] = DateTimeFormatUtil.ToPm(resolution[DateTimeResolutionKey.End]); - } - - resolutionPm[DateTimeResolutionKey.Timex] = DateTimeFormatUtil.AllStringToPm(timex); - break; - case Constants.SYS_DATETIME_DATETIMEPERIOD: - splited = resolution[DateTimeResolutionKey.Start].Split(' '); - if (resolution.ContainsKey(DateTimeResolutionKey.Start)) - { - resolutionPm[DateTimeResolutionKey.Start] = splited[0] + " " + DateTimeFormatUtil.ToPm(splited[1]); - } - - splited = resolution[DateTimeResolutionKey.End].Split(' '); - - if (resolution.ContainsKey(DateTimeResolutionKey.End)) - { - resolutionPm[DateTimeResolutionKey.End] = splited[0] + " " + DateTimeFormatUtil.ToPm(splited[1]); - } - - resolutionPm[DateTimeResolutionKey.Timex] = DateTimeFormatUtil.AllStringToPm(timex); - break; - } - - resolutionDic.Add(keyName + "Pm", resolutionPm); - } - } - - internal static Dictionary GenerateResolution(string type, Dictionary resolutionDic, string mod) - { - var res = new Dictionary(); - - if (type.Equals(Constants.SYS_DATETIME_DATETIME, StringComparison.Ordinal)) - { - AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.DATETIME, mod, res); - } - else if (type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) - { - AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.TIME, mod, res); - } - else if (type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal)) - { - AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.DATE, mod, res); - } - else if (type.Equals(Constants.SYS_DATETIME_DURATION, StringComparison.Ordinal)) - { - if (resolutionDic.ContainsKey(TimeTypeConstants.DURATION)) - { - res.Add(ResolutionKey.Value, resolutionDic[TimeTypeConstants.DURATION]); - } - } - else if (type.Equals(Constants.SYS_DATETIME_TIMEPERIOD, StringComparison.Ordinal)) - { - AddPeriodToResolution(resolutionDic, TimeTypeConstants.START_TIME, TimeTypeConstants.END_TIME, mod, res); - } - else if (type.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal)) - { - AddPeriodToResolution(resolutionDic, TimeTypeConstants.START_DATE, TimeTypeConstants.END_DATE, mod, res); - } - else if (type.Equals(Constants.SYS_DATETIME_DATETIMEPERIOD, StringComparison.Ordinal)) - { - AddPeriodToResolution(resolutionDic, TimeTypeConstants.START_DATETIME, TimeTypeConstants.END_DATETIME, mod, res); - } - - return res; - } - - private bool IsDurationWithBeforeAndAfter(ExtractResult er) - { - return er.Metadata != null && er.Metadata.IsDurationWithBeforeAndAfter; - } - } -} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ICommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ICommonDateTimeParserConfiguration.cs index ae6c538265..7e1159727a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ICommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ICommonDateTimeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -71,5 +74,10 @@ public interface ICommonDateTimeParserConfiguration : IDateTimeOptionsConfigurat IImmutableDictionary SpecialDecadeCases { get; } IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + IDateTimeExtractor HolidayExtractor { get; } + + IDateTimeParser HolidayTimeParser { get; } + } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateParserConfiguration.cs index d9692f568d..aea52a0768 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -23,6 +26,8 @@ public interface IDateParserConfiguration : IDateTimeOptionsConfiguration IDateTimeParser DurationParser { get; } + IDateTimeParser HolidayParser { get; } + IEnumerable DateRegexes { get; } Regex OnRegex { get; } @@ -69,6 +74,10 @@ public interface IDateParserConfiguration : IDateTimeOptionsConfiguration Regex PreviousPrefixRegex { get; } + Regex BeforeAfterRegex { get; } + + Regex TasksModeDurationToDatePatterns { get; } + IImmutableDictionary UnitMap { get; } IImmutableDictionary DayOfMonth { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDatePeriodParserConfiguration.cs index 6ae21b18ec..69eebbf824 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDatePeriodParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime @@ -101,8 +104,18 @@ public interface IDatePeriodParserConfiguration : ISimpleDatePeriodParserConfigu Regex UnspecificEndOfRangeRegex { get; } + Regex AmbiguousPointRangeRegex { get; } + Regex NowRegex { get; } + Regex SpecialDayRegex { get; } + + Regex TodayNowRegex { get; } + + Regex FirstLastRegex { get; } + + Regex OfYearRegex { get; } + IImmutableDictionary UnitMap { get; } IImmutableDictionary CardinalMap { get; } @@ -131,6 +144,8 @@ public interface IDatePeriodParserConfiguration : ISimpleDatePeriodParserConfigu bool IsWeekOnly(string text); + bool IsFortnight(string text); + bool IsWeekend(string text); bool IsMonthOnly(string text); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimeAltParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimeAltParserConfiguration.cs index a0970259c0..9b1812cf2b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimeAltParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimeAltParserConfiguration.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.DateTime +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.DateTime { public interface IDateTimeAltParserConfiguration { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimeParser.cs index 9cc7b5a322..5ff6b2a153 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimeParser.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using DateObject = System.DateTime; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimeParserConfiguration.cs index 695193b388..9267a2df23 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -10,14 +13,18 @@ public interface IDateTimeParserConfiguration : IDateTimeOptionsConfiguration string TokenBeforeTime { get; } - IDateExtractor DateExtractor { get; } - + IDateExtractor DateExtractor { get; } + + IDateTimeExtractor HolidayExtractor { get; } + IDateTimeExtractor TimeExtractor { get; } IDateTimeParser DateParser { get; } - IDateTimeParser TimeParser { get; } - + IDateTimeParser TimeParser { get; } + + IDateTimeParser HolidayTimeParser { get; } + IExtractor CardinalExtractor { get; } IExtractor IntegerExtractor { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimePeriodParserConfiguration.cs index 0da0ddde7a..abe2f7377b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDateTimePeriodParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime @@ -7,8 +10,12 @@ public interface IDateTimePeriodParserConfiguration : IDateTimeOptionsConfigurat { string TokenBeforeDate { get; } + string TokenBeforeTime { get; } + IDateExtractor DateExtractor { get; } + IDateTimeExtractor HolidayExtractor { get; } + IDateTimeExtractor TimeExtractor { get; } IDateTimeExtractor DateTimeExtractor { get; } @@ -23,6 +30,8 @@ public interface IDateTimePeriodParserConfiguration : IDateTimeOptionsConfigurat IDateTimeParser DateParser { get; } + IDateTimeParser HolidayTimeParser { get; } + IDateTimeParser TimeParser { get; } IDateTimeParser DateTimeParser { get; } @@ -35,6 +44,8 @@ public interface IDateTimePeriodParserConfiguration : IDateTimeOptionsConfigurat Regex PureNumberFromToRegex { get; } + Regex HyphenDateRegex { get; } + Regex PureNumberBetweenAndRegex { get; } Regex SpecificTimeOfDayRegex { get; } @@ -69,13 +80,15 @@ public interface IDateTimePeriodParserConfiguration : IDateTimeOptionsConfigurat Regex AfterRegex { get; } + Regex TasksmodeMealTimeofDayRegex { get; } + bool CheckBothBeforeAfter { get; } IImmutableDictionary UnitMap { get; } IImmutableDictionary Numbers { get; } - bool GetMatchedTimeRange(string text, out string timeStr, out int beginHour, out int endHour, out int endMin); + bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMin); int GetSwiftPrefix(string text); } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDurationParserConfiguration.cs index f31f319ebe..18dd34dd18 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDurationParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime @@ -7,7 +10,7 @@ public interface IDurationParserConfiguration : IDateTimeOptionsConfiguration { IExtractor CardinalExtractor { get; } - IExtractor DurationExtractor { get; } + IDateTimeExtractor DurationExtractor { get; } IParser NumberParser { get; } @@ -15,6 +18,8 @@ public interface IDurationParserConfiguration : IDateTimeOptionsConfiguration Regex AnUnitRegex { get; } + Regex PrefixArticleRegex { get; } + Regex DuringRegex { get; } Regex AllDateUnitRegex { get; } @@ -35,6 +40,8 @@ public interface IDurationParserConfiguration : IDateTimeOptionsConfiguration Regex SpecialNumberUnitRegex { get; } + bool CheckBothBeforeAfter { get; } + IImmutableDictionary UnitMap { get; } IImmutableDictionary UnitValueMap { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IFullDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IFullDateTimeParserConfiguration.cs deleted file mode 100644 index 0b9cb65f1f..0000000000 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IFullDateTimeParserConfiguration.cs +++ /dev/null @@ -1,80 +0,0 @@ -using System.Collections.Generic; -using System.Collections.Immutable; -using System.Text.RegularExpressions; - -namespace Microsoft.Recognizers.Text.DateTime -{ - public interface IFullDateTimeParserConfiguration : ISimpleDatePeriodParserConfiguration, IDateTimeOptionsConfiguration - { - int TwoNumYear { get; } - - string LastWeekDayToken { get; } - - string NextMonthToken { get; } - - string LastMonthToken { get; } - - string DatePrefix { get; } - - IEnumerable DateRegexList { get; } - - Regex NextRegex { get; } - - Regex ThisRegex { get; } - - Regex LastRegex { get; } - - Regex StrictWeekDayRegex { get; } - - Regex WeekDayOfMonthRegex { get; } - - Regex BeforeRegex { get; } - - Regex AfterRegex { get; } - - Regex UntilRegex { get; } - - Regex SincePrefixRegex { get; } - - Regex SinceSuffixRegex { get; } - - Regex EqualRegex { get; } - - ImmutableDictionary UnitMap { get; } - - ImmutableDictionary UnitValueMap { get; } - - ImmutableDictionary SeasonMap { get; } - - ImmutableDictionary SeasonValueMap { get; } - - ImmutableDictionary CardinalMap { get; } - - ImmutableDictionary DayOfMonth { get; } - - ImmutableDictionary DayOfWeek { get; } - - ImmutableDictionary MonthOfYear { get; } - - // TODO we need to use number parser - ImmutableDictionary Numbers { get; } - - IDateTimeParser DateParser { get; } - - IDateTimeParser TimeParser { get; } - - IDateTimeParser DateTimeParser { get; } - - IDateTimeParser DatePeriodParser { get; } - - IDateTimeParser TimePeriodParser { get; } - - IDateTimeParser DateTimePeriodParser { get; } - - IDateTimeParser DurationParser { get; } - - IDateTimeParser GetParser { get; } - - IDateTimeParser HolidayParser { get; } - } -} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IHolidayParserConfiguration.cs index bd0897c3a4..494286e2ef 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IHolidayParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IHolidayParserConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IMergedParserConfiguration.cs index c994e0c550..89886ea1ab 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IMergedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IMergedParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Matcher; @@ -25,5 +28,7 @@ public interface IMergedParserConfiguration : ICommonDateTimeParserConfiguration IDateTimeParser HolidayParser { get; } StringMatcher SuperfluousWordMatcher { get; } + + bool CheckBothBeforeAfter { get; } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ISetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ISetParserConfiguration.cs index 56964d7fd2..9f12a023ab 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ISetParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ISetParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime @@ -52,5 +55,7 @@ public interface ISetParserConfiguration : IDateTimeOptionsConfiguration bool GetMatchedUnitTimex(string text, out string timex); string WeekDayGroupMatchString(Match match); + + string ReplaceValueInTextWithFutTerm(string text, string value); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ISimpleDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ISimpleDatePeriodParserConfiguration.cs index 910d6b57d0..aeb026c403 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ISimpleDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ISimpleDatePeriodParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ITimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ITimeParserConfiguration.cs index 1f746f76b7..bad751930e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ITimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ITimeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.DateTime.Utilities; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ITimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ITimePeriodParserConfiguration.cs index c4c7d8fded..27319231c8 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ITimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ITimePeriodParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -32,6 +35,6 @@ public interface ITimePeriodParserConfiguration : IDateTimeOptionsConfiguration IDateTimeUtilityConfiguration UtilityConfiguration { get; } - bool GetMatchedTimexRange(string text, out string timex, out int beginHour, out int endHour, out int endMin); + bool GetMatchedTimeRange(string text, out string timex, out int beginHour, out int endHour, out int endMin); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ITimeZoneParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ITimeZoneParserConfiguration.cs new file mode 100644 index 0000000000..37072bf278 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/ITimeZoneParserConfiguration.cs @@ -0,0 +1,18 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public interface ITimeZoneParserConfiguration : IDateTimeOptionsConfiguration + { + Dictionary AbbrToMinMapping { get; } + + Dictionary FullToMinMapping { get; } + + Regex DirectUtcRegex { get; } + + string TimeZoneEndRegex { get; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateExtractorConfiguration.cs index f12be3acdd..1d222fb441 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; @@ -14,73 +17,73 @@ namespace Microsoft.Recognizers.Text.DateTime.Portuguese public class PortugueseDateExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateExtractorConfiguration { public static readonly Regex MonthRegex = - new Regex(DateTimeDefinitions.MonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex OnRegex = - new Regex(DateTimeDefinitions.OnRegex, RegexFlags); + new Regex(DateTimeDefinitions.OnRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelaxedOnRegex = - new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisRegex = - new Regex(DateTimeDefinitions.ThisRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisRegex, RegexFlags, RegexTimeOut); public static readonly Regex LastDateRegex = - new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextDateRegex = - new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDayRegex = - new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayOfMonthRegex = - new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDateRegex = - new Regex(DateTimeDefinitions.SpecialDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDayWithNumRegex = - new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeWeekDayRegex = - new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex ForTheRegex = - new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags); + new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayOfMothRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex StrictRelativeRegex = - new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixArticleRegex = - new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeConnectorSymbolRegex = - new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags); + new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] ImplicitDateList = { @@ -89,34 +92,37 @@ public class PortugueseDateExtractorConfiguration : BaseDateTimeOptionsConfigura }; public static readonly Regex OfMonth = - new Regex(DateTimeDefinitions.OfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.OfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthEnd = - new Regex(DateTimeDefinitions.MonthEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayEnd = - new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayStart = - new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BeforeAfterRegex = + new Regex(DateTimeDefinitions.BeforeAfterRegex, RegexFlags, RegexTimeOut); public static readonly ImmutableDictionary DayOfWeek = DateTimeDefinitions.DayOfWeek.ToImmutableDictionary(); @@ -127,46 +133,57 @@ public class PortugueseDateExtractorConfiguration : BaseDateTimeOptionsConfigura public PortugueseDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Portuguese.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new PortugueseNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Portuguese.OrdinalExtractor.GetInstance(numConfig); + NumberParser = new BaseNumberParser(new PortugueseNumberParserConfiguration(numConfig)); + DurationExtractor = new BaseDurationExtractor(new PortugueseDurationExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new PortugueseHolidayExtractorConfiguration(this)); UtilityConfiguration = new PortugueseDatetimeUtilityConfiguration(); // 3-23-2017 - var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags); + var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags, RegexTimeOut); // 23-3-2015 - var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags); + var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags, RegexTimeOut); // no|em 1.3 - var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags); + var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags, RegexTimeOut); // no|em 24-12 - var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags); + var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags, RegexTimeOut); // 7/23 - var dateRegex7 = new Regex(DateTimeDefinitions.DateExtractor7, RegexFlags); + var dateRegex7 = new Regex(DateTimeDefinitions.DateExtractor7, RegexFlags, RegexTimeOut); // 23/7 - var dateRegex9 = new Regex(DateTimeDefinitions.DateExtractor9, RegexFlags); + var dateRegex9 = new Regex(DateTimeDefinitions.DateExtractor9, RegexFlags, RegexTimeOut); // 2015-12-23 - var dateRegex10 = new Regex(DateTimeDefinitions.DateExtractor10, RegexFlags); + var dateRegex10 = new Regex(DateTimeDefinitions.DateExtractor10, RegexFlags, RegexTimeOut); // dia 15 - var dateRegex11 = new Regex(DateTimeDefinitions.DateExtractor11, RegexFlags); + var dateRegex11 = new Regex(DateTimeDefinitions.DateExtractor11, RegexFlags, RegexTimeOut); DateRegexList = new List { // (domingo,)? 5 de Abril - new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags, RegexTimeOut), // (domingo,)? 5 de Abril 5, 2016 - new Regex(DateTimeDefinitions.DateExtractor2, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor2, RegexFlags, RegexTimeOut), - // (domingo,)? 6 de Abril - new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags), + // (domingo,)? Abril 6 + new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags, RegexTimeOut), }; var enableDmy = DmyDateFormat || @@ -187,6 +204,8 @@ public PortugueseDateExtractorConfiguration(IDateTimeOptionsConfiguration config public IDateTimeExtractor DurationExtractor { get; } + public IDateTimeExtractor HolidayExtractor { get; } + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } IEnumerable IDateExtractorConfiguration.ImplicitDateList => ImplicitDateList; @@ -234,5 +253,7 @@ public PortugueseDateExtractorConfiguration(IDateTimeOptionsConfiguration config Regex IDateExtractorConfiguration.RangeUnitRegex => RangeUnitRegex; Regex IDateExtractorConfiguration.RangeConnectorSymbolRegex => RangeConnectorSymbolRegex; + + Regex IDateExtractorConfiguration.BeforeAfterRegex => BeforeAfterRegex; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDatePeriodExtractorConfiguration.cs index ea9057717c..ce3c229f84 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDatePeriodExtractorConfiguration.cs @@ -1,9 +1,13 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Portuguese; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Portuguese; +using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Portuguese { @@ -11,169 +15,172 @@ public class PortugueseDatePeriodExtractorConfiguration : BaseDateTimeOptionsCon { // base regexes public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); - - public static readonly Regex AndRegex = - new Regex(DateTimeDefinitions.AndRegex, RegexFlags); + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); public static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex IllegalYearRegex = - new Regex(BaseDateTime.IllegalYearRegex, RegexFlags); + new Regex(BaseDateTime.IllegalYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthRegex = - new Regex(DateTimeDefinitions.MonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthSuffixRegex = - new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PastRegex = - new Regex(DateTimeDefinitions.PastRegex, RegexFlags); + new Regex(DateTimeDefinitions.PastRegex, RegexFlags, RegexTimeOut); public static readonly Regex FutureRegex = - new Regex(DateTimeDefinitions.FutureRegex, RegexFlags); + new Regex(DateTimeDefinitions.FutureRegex, RegexFlags, RegexTimeOut); public static readonly Regex FutureSuffixRegex = - new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags, RegexTimeOut); // composite regexes public static readonly Regex SimpleCasesRegex = - new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthFrontSimpleCasesRegex = - new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthFrontBetweenRegex = - new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags, RegexTimeOut); public static readonly Regex DayBetweenRegex = - new Regex(DateTimeDefinitions.DayBetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayBetweenRegex, RegexFlags, RegexTimeOut); // TODO: modify it according to the related regex in English public static readonly Regex OneWordPeriodRegex = - new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthWithYearRegex = - new Regex(DateTimeDefinitions.MonthWithYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthWithYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumWithYearRegex = - new Regex(DateTimeDefinitions.MonthNumWithYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumWithYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfMonthRegex = - new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfYearRegex = - new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex FollowedDateUnit = - new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithDateUnit = - new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags, RegexTimeOut); public static readonly Regex QuarterRegex = - new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags, RegexTimeOut); public static readonly Regex QuarterRegexYearFront = - new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags, RegexTimeOut); public static readonly Regex AllHalfYearRegex = - new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex SeasonRegex = - new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags); + new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags, RegexTimeOut); public static readonly Regex WhichWeekRegex = - new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags); + new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfRegex = - new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthOfRegex = - new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterEarlyPeriodRegex = - new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut); // TODO: add this regex, let it correspond to the one in English public static readonly Regex RestOfDateRegex = - new Regex(@"^[.]", RegexFlags); + new Regex(@"^[.]", RegexFlags, RegexTimeOut); // TODO: add this regex, let it correspond to the one in English public static readonly Regex WeekWithWeekDayRangeRegex = - new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPlusNumberRegex = - new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex DecadeWithCenturyRegex = - new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags); + new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPeriodRegex = - new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex ComplexDatePeriodRegex = - new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDecadeRegex = - new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags, RegexTimeOut); public static readonly Regex ReferenceDatePeriodRegex = - new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoRegex, RegexFlags); + new Regex(DateTimeDefinitions.AgoRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterRegex, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex CenturySuffixRegex = - new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FirstLastRegex = + new Regex(DateTimeDefinitions.FirstLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OfYearRegex = + new Regex(DateTimeDefinitions.OfYearRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex FromRegex = - new Regex(DateTimeDefinitions.FromRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); - private static readonly Regex ConnectorAndRegex = - new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags); + private static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); private static readonly Regex BetweenRegex = - new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags, RegexTimeOut); private static readonly Regex[] SimpleCasesRegexes = { @@ -191,6 +198,7 @@ public class PortugueseDatePeriodExtractorConfiguration : BaseDateTimeOptionsCon QuarterRegex, QuarterRegexYearFront, SeasonRegex, + WhichWeekRegex, RestOfDateRegex, LaterEarlyPeriodRegex, WeekWithWeekDayRangeRegex, @@ -203,10 +211,20 @@ public PortugueseDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration : base(config) { DatePointExtractor = new BaseDateExtractor(new PortugueseDateExtractorConfiguration(this)); - CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(); - OrdinalExtractor = Number.Portuguese.OrdinalExtractor.GetInstance(); DurationExtractor = new BaseDurationExtractor(new PortugueseDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new PortugueseNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Portuguese.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new PortugueseNumberParserConfiguration(numConfig)); } public IDateExtractor DatePointExtractor { get; } @@ -273,6 +291,10 @@ public PortugueseDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration Regex IDatePeriodExtractorConfiguration.NowRegex => NowRegex; + Regex IDatePeriodExtractorConfiguration.FirstLastRegex => FirstLastRegex; + + Regex IDatePeriodExtractorConfiguration.OfYearRegex => OfYearRegex; + bool IDatePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; string[] IDatePeriodExtractorConfiguration.DurationDateRestrictions => DateTimeDefinitions.DurationDateRestrictions; @@ -305,7 +327,7 @@ public bool GetBetweenTokenIndex(string text, out int index) public bool HasConnectorToken(string text) { - return ConnectorAndRegex.IsMatch(text); + return RangeConnectorRegex.IsExactMatch(text, true); } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimeAltExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimeAltExtractorConfiguration.cs index a3dbb11e0f..3491a60919 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimeAltExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimeAltExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; @@ -7,22 +10,22 @@ namespace Microsoft.Recognizers.Text.DateTime.Portuguese public class PortugueseDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeAltExtractorConfiguration { public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PastPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangePrefixRegex = - new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] RelativePrefixList = { @@ -37,10 +40,10 @@ public class PortugueseDateTimeAltExtractorConfiguration : BaseDateTimeOptionsCo private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex OrRegex = - new Regex(DateTimeDefinitions.OrRegex, RegexFlags); + new Regex(DateTimeDefinitions.OrRegex, RegexFlags, RegexTimeOut); private static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public PortugueseDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimeExtractorConfiguration.cs index b901d63e28..074815c634 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimeExtractorConfiguration.cs @@ -1,79 +1,94 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; using Microsoft.Recognizers.Text.DateTime.Portuguese.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Portuguese { public class PortugueseDateTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeExtractorConfiguration { public static readonly Regex PrepositionRegex = - new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); // TODO: modify it according to the corresponding English regex public static readonly Regex TimeOfDayRegex = - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificEndOfRegex = - new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); // TODO: add this for Portuguese public static readonly Regex UnitRegex = - new Regex(DateTimeDefinitions.UnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectorRegex = - new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex NumberAsTimeRegex = - new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateNumberConnectorRegex = - new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public PortugueseDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(numConfig); + DatePointExtractor = new BaseDateExtractor(new PortugueseDateExtractorConfiguration(this)); TimePointExtractor = new BaseTimeExtractor(new PortugueseTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new PortugueseDurationExtractorConfiguration(this)); UtilityConfiguration = new PortugueseDatetimeUtilityConfiguration(); + HolidayExtractor = new BaseHolidayExtractor(new PortugueseHolidayExtractorConfiguration(this)); + } public IExtractor IntegerExtractor { get; } @@ -86,6 +101,8 @@ public PortugueseDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration co public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + public IDateTimeExtractor HolidayExtractor { get; } + Regex IDateTimeExtractorConfiguration.NowRegex => NowRegex; Regex IDateTimeExtractorConfiguration.SuffixRegex => SuffixRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimePeriodExtractorConfiguration.cs index b8d5c1fabd..7e807fbac0 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimePeriodExtractorConfiguration.cs @@ -1,73 +1,90 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Portuguese; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Portuguese { public class PortugueseDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimePeriodExtractorConfiguration { public static readonly Regex NumberCombinedWithUnit = - new Regex(DateTimeDefinitions.DateTimePeriodNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.DateTimePeriodNumberCombinedWithUnit, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex RestOfDateTimeRegex = - new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex HyphenDateRegex = + new Regex(BaseDateTime.HyphenDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodTimeOfDayWithDateRegex = - new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeTimeUnitRegex = - new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); public static readonly Regex MiddlePauseRegex = - new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags); + new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixDayRegex = new Regex(DateTimeDefinitions.PrefixDayRegex, RegexFlags | RegexOptions.RightToLeft); public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex FromRegex = - new Regex(DateTimeDefinitions.FromRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); - private static readonly Regex ConnectorAndRegex = - new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags); + private static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); private static readonly Regex BetweenRegex = - new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags, RegexTimeOut); public PortugueseDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; - CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(numConfig); SingleDateExtractor = new BaseDateExtractor(new PortugueseDateExtractorConfiguration(this)); SingleTimeExtractor = new BaseTimeExtractor(new PortugueseTimeExtractorConfiguration(this)); @@ -75,6 +92,8 @@ public PortugueseDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfigurat DurationExtractor = new BaseDurationExtractor(new PortugueseDurationExtractorConfiguration(this)); TimePeriodExtractor = new BaseTimePeriodExtractor(new PortugueseTimePeriodExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new PortugueseTimeZoneExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new PortugueseHolidayExtractorConfiguration(this)); + } public string TokenBeforeDate { get; } @@ -93,6 +112,8 @@ public PortugueseDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfigurat public IDateTimeExtractor TimeZoneExtractor { get; } + public IDateTimeExtractor HolidayExtractor { get; } + public IEnumerable SimpleCasesRegex => new[] { PortugueseTimePeriodExtractorConfiguration.PureNumFromTo, @@ -149,6 +170,8 @@ public PortugueseDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfigurat Regex IDateTimePeriodExtractorConfiguration.AfterRegex => AfterRegex; + Regex IDateTimePeriodExtractorConfiguration.TasksmodeMealTimeofDayRegex => null; + public bool GetFromTokenIndex(string text, out int index) { index = -1; @@ -176,7 +199,7 @@ public bool GetBetweenTokenIndex(string text, out int index) public bool HasConnectorToken(string text) { - return ConnectorAndRegex.IsMatch(text); + return RangeConnectorRegex.IsExactMatch(text, true); } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDurationExtractorConfiguration.cs index fb2c71138e..fb0eeb5ae7 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDurationExtractorConfiguration.cs @@ -1,69 +1,89 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Portuguese { public class PortugueseDurationExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDurationExtractorConfiguration { public static readonly Regex UnitRegex = - new Regex(DateTimeDefinitions.UnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnitRegex, RegexFlags, RegexTimeOut); // TODO: improve Portuguese the SuffixAndRegex public static readonly Regex SuffixAndRegex = - new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags, RegexTimeOut); public static readonly Regex FollowedUnit = - new Regex(DateTimeDefinitions.FollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.FollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithUnit = - new Regex(DateTimeDefinitions.DurationNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.DurationNumberCombinedWithUnit, RegexFlags, RegexTimeOut); // TODO: add half in AnUnitRegex public static readonly Regex AnUnitRegex = - new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex AllRegex = - new Regex(DateTimeDefinitions.AllRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllRegex, RegexFlags, RegexTimeOut); public static readonly Regex DuringRegex = - new Regex(DateTimeDefinitions.DuringRegex, RegexFlags); + new Regex(DateTimeDefinitions.DuringRegex, RegexFlags, RegexTimeOut); public static readonly Regex HalfRegex = - new Regex(DateTimeDefinitions.HalfRegex, RegexFlags); + new Regex(DateTimeDefinitions.HalfRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConjunctionRegex = - new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags, RegexTimeOut); public static readonly Regex InexactNumberRegex = - new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex InexactNumberUnitRegex = - new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDurationUnitRegex = - new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationUnitRegex = - new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationConnectorRegex = - new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModPrefixRegex = + new Regex(DateTimeDefinitions.ModPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModSuffixRegex = + new Regex(DateTimeDefinitions.ModSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialNumberUnitRegex = null; public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public PortugueseDurationExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(numConfig); + UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); } @@ -107,5 +127,11 @@ public PortugueseDurationExtractorConfiguration(IDateTimeOptionsConfiguration co Regex IDurationExtractorConfiguration.MoreThanRegex => MoreThanRegex; Regex IDurationExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex; + + Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex; + + public Dictionary AmbiguityFiltersDict => null; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseHolidayExtractorConfiguration.cs index 589dac80ac..817ac7aad9 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseHolidayExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseHolidayExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; @@ -9,9 +12,9 @@ public class PortugueseHolidayExtractorConfiguration : BaseDateTimeOptionsConfig { public static readonly Regex[] HolidayRegexList = { - new Regex(DateTimeDefinitions.HolidayRegex1, RegexFlags), - new Regex(DateTimeDefinitions.HolidayRegex2, RegexFlags), - new Regex(DateTimeDefinitions.HolidayRegex3, RegexFlags), + new Regex(DateTimeDefinitions.HolidayRegex1, RegexFlags, RegexTimeOut), + new Regex(DateTimeDefinitions.HolidayRegex2, RegexFlags, RegexTimeOut), + new Regex(DateTimeDefinitions.HolidayRegex3, RegexFlags, RegexTimeOut), }; private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseMergedExtractorConfiguration.cs index c50ba058ec..af18af3127 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseMergedExtractorConfiguration.cs @@ -1,53 +1,62 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; + using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Portuguese; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Matcher; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Portuguese { public class PortugueseMergedExtractorConfiguration : BaseDateTimeOptionsConfiguration, IMergedExtractorConfiguration { public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex SinceRegex = - new Regex(DateTimeDefinitions.SinceRegex, RegexFlags); + new Regex(DateTimeDefinitions.SinceRegex, RegexFlags, RegexTimeOut); public static readonly Regex AroundRegex = - new Regex(DateTimeDefinitions.AroundRegex, RegexFlags); + new Regex(DateTimeDefinitions.AroundRegex, RegexFlags, RegexTimeOut); public static readonly Regex EqualRegex = - new Regex(BaseDateTime.EqualRegex, RegexFlags); + new Regex(BaseDateTime.EqualRegex, RegexFlags, RegexTimeOut); // TODO: change the following three regexes to Portuguese if there are the same requirement of splitting from A to B as two time points public static readonly Regex FromToRegex = - new Regex(DateTimeDefinitions.FromToRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromToRegex, RegexFlags, RegexTimeOut); public static readonly Regex SingleAmbiguousMonthRegex = - new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrepositionSuffixRegex = - new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmbiguousRangeModifierPrefix = - new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags); + new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags, RegexTimeOut); public static readonly Regex NumberEndingPattern = - new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags); + new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificDatePeriodRegex = - new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); - public static readonly Regex[] TermFilterRegexes = { }; + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex[] TermFilterRegexes = System.Array.Empty(); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -64,8 +73,19 @@ public PortugueseMergedExtractorConfiguration(IDateTimeOptionsConfiguration conf SetExtractor = new BaseSetExtractor(new PortugueseSetExtractorConfiguration(this)); HolidayExtractor = new BaseHolidayExtractor(new PortugueseHolidayExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new PortugueseTimeZoneExtractorConfiguration(this)); - IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(); DateTimeAltExtractor = new BaseDateTimeAltExtractor(new PortugueseDateTimeAltExtractorConfiguration(this)); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(numConfig); + + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); } public IDateExtractor DateExtractor { get; } @@ -92,7 +112,7 @@ public PortugueseMergedExtractorConfiguration(IDateTimeOptionsConfiguration conf public IDateTimeListExtractor DateTimeAltExtractor { get; } - public Dictionary AmbiguityFiltersDict { get; } = null; + public Dictionary AmbiguityFiltersDict { get; } Regex IMergedExtractorConfiguration.AfterRegex => AfterRegex; @@ -122,10 +142,17 @@ public PortugueseMergedExtractorConfiguration(IDateTimeOptionsConfiguration conf Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + public Regex FailFastRegex { get; } = null; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; StringMatcher IMergedExtractorConfiguration.SuperfluousWordMatcher => SuperfluousWordMatcher; + + bool IMergedExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public Regex TasksModeMentionFilters { get; } + } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseSetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseSetExtractorConfiguration.cs index 12608386d1..9cf7f17e4f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseSetExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseSetExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; @@ -11,25 +14,25 @@ public class PortugueseSetExtractorConfiguration : BaseDateTimeOptionsConfigurat public static readonly string ExtractorName = Constants.SYS_DATETIME_SET; public static readonly Regex PeriodicRegex = - new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachUnitRegex = - new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachPrefixRegex = - new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachDayRegex = - new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex BeforeEachDayRegex = - new Regex(DateTimeDefinitions.BeforeEachDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeEachDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetWeekDayRegex = - new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetEachRegex = - new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -59,6 +62,8 @@ public PortugueseSetExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DateTimePeriodExtractor { get; } + bool ISetExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + Regex ISetExtractorConfiguration.LastRegex => PortugueseDateExtractorConfiguration.LastDateRegex; Regex ISetExtractorConfiguration.EachPrefixRegex => EachPrefixRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimeExtractorConfiguration.cs index 0cd2fe70f0..a7735953d9 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimeExtractorConfiguration.cs @@ -1,7 +1,11 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; +using Microsoft.Recognizers.Definitions.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Portuguese { @@ -10,47 +14,47 @@ public class PortugueseTimeExtractorConfiguration : BaseDateTimeOptionsConfigura // part 1: smallest component // -------------------------------------- public static readonly Regex DescRegex = - new Regex(DateTimeDefinitions.DescRegex, RegexFlags); + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); public static readonly Regex HourNumRegex = - new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex MinuteNumRegex = - new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags, RegexTimeOut); // part 2: middle level component // -------------------------------------- // handle "... en punto" public static readonly Regex OclockRegex = - new Regex(DateTimeDefinitions.OclockRegex, RegexFlags); + new Regex(DateTimeDefinitions.OclockRegex, RegexFlags, RegexTimeOut); // handle "... tarde" public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); // handle "... de la mañana" public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); // handle "y media ..." "menos cuarto ..." public static readonly Regex LessThanOneHour = - new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags); + new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags, RegexTimeOut); public static readonly Regex TensTimeRegex = - new Regex(DateTimeDefinitions.TensTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.TensTimeRegex, RegexFlags, RegexTimeOut); // handle "seis treinta", "seis veintiuno", "seis menos diez" public static readonly Regex WrittenTimeRegex = - new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimePrefix = - new Regex(DateTimeDefinitions.TimePrefix, RegexFlags); + new Regex(DateTimeDefinitions.TimePrefix, RegexFlags, RegexTimeOut); public static readonly Regex TimeSuffix = - new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags); + new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags, RegexTimeOut); public static readonly Regex BasicTime = - new Regex(DateTimeDefinitions.BasicTime, RegexFlags); + new Regex(DateTimeDefinitions.BasicTime, RegexFlags, RegexTimeOut); // part 3: regex for time // -------------------------------------- @@ -58,50 +62,48 @@ public class PortugueseTimeExtractorConfiguration : BaseDateTimeOptionsConfigura // TODO: add some new regex which have used in AtRegex // TODO: modify according to corresponding English regex public static readonly Regex AtRegex = - new Regex(DateTimeDefinitions.AtRegex, RegexFlags); + new Regex(DateTimeDefinitions.AtRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectNumRegex = - new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeBeforeAfterRegex = - new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] TimeRegexList = { // (tres min pasadas las)? siete|7|(siete treinta) pm - new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags, RegexTimeOut), // (tres min pasadas las)? 3:00(:00)? (pm)? - new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags, RegexTimeOut), // (tres min pasadas las)? 3.00 (pm) - new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags, RegexTimeOut), // (tres min pasadas las) (cinco treinta|siete|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags, RegexTimeOut), // (tres min pasadas las) (cinco treinta|siete|7|7:00(:00)?) (pm)? (de la noche) - new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags, RegexTimeOut), // (cinco treinta|siete|7|7:00(:00)?) (pm)? (de la noche) - new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags, RegexTimeOut), // (En la noche) a las (cinco treinta|siete|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags, RegexTimeOut), // (En la noche) (cinco treinta|siete|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex8, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex8, RegexFlags, RegexTimeOut), // once (y)? veinticinco - new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags), - - new Regex(DateTimeDefinitions.TimeRegex10, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags, RegexTimeOut), // (tres menos veinte) (pm)? - new Regex(DateTimeDefinitions.TimeRegex11, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex11, RegexFlags, RegexTimeOut), // (tres min pasadas las)? 3h00 (pm)? - new Regex(DateTimeDefinitions.TimeRegex12, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex12, RegexFlags, RegexTimeOut), WrittenTimeRegex, @@ -129,5 +131,9 @@ public PortugueseTimeExtractorConfiguration(IDateTimeOptionsConfiguration config public IDateTimeExtractor DurationExtractor { get; } public IDateTimeExtractor TimeZoneExtractor { get; } + + public string TimeTokenPrefix => DateTimeDefinitions.TimeTokenPrefix; + + public Dictionary AmbiguityFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityTimeFiltersDict); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimePeriodExtractorConfiguration.cs index 4b052e70fb..188ac6a401 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimePeriodExtractorConfiguration.cs @@ -1,9 +1,14 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; using Microsoft.Recognizers.Text.DateTime.Portuguese.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Portuguese { @@ -12,49 +17,49 @@ public class PortugueseTimePeriodExtractorConfiguration : BaseDateTimeOptionsCon public static readonly string ExtractorName = Constants.SYS_DATETIME_TIMEPERIOD; // "TimePeriod"; public static readonly Regex HourNumRegex = - new Regex(DateTimeDefinitions.TimeHourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeHourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex PureNumFromTo = - new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags); + new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags, RegexTimeOut); public static readonly Regex PureNumBetweenAnd = - new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeFromTo = - new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeBetweenAnd = - new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex UnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex FollowedUnit = - new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithUnit = - new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); // TODO: add this according to corresponding English regex public static readonly Regex TimeOfDayRegex = - new Regex(string.Empty, RegexFlags); + new Regex(string.Empty, RegexFlags, RegexTimeOut); public static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex FromRegex = - new Regex(DateTimeDefinitions.FromRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); - private static readonly Regex ConnectorAndRegex = - new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags); + private static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); private static readonly Regex BetweenRegex = - new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags, RegexTimeOut); public PortugueseTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) @@ -62,7 +67,17 @@ public PortugueseTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; SingleTimeExtractor = new BaseTimeExtractor(new PortugueseTimeExtractorConfiguration(this)); UtilityConfiguration = new PortugueseDatetimeUtilityConfiguration(); - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.English.IntegerExtractor.GetInstance(numConfig); + TimeZoneExtractor = new BaseTimeZoneExtractor(new PortugueseTimeZoneExtractorConfiguration(this)); } @@ -114,7 +129,9 @@ public bool GetBetweenTokenIndex(string text, out int index) public bool IsConnectorToken(string text) { - return ConnectorAndRegex.IsMatch(text); + return RangeConnectorRegex.IsExactMatch(text, true); } + + public List ApplyPotentialPeriodAmbiguityHotfix(string text, List timePeriodErs) => TimePeriodFunctions.ApplyPotentialPeriodAmbiguityHotfix(text, timePeriodErs); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimeZoneExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimeZoneExtractorConfiguration.cs index 554c7ee0e9..fad9cedc71 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimeZoneExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimeZoneExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Matcher; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/DateTimePeriodParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/DateTimePeriodParser.cs index e278b6816d..fce04784d0 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/DateTimePeriodParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/DateTimePeriodParser.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using Microsoft.Recognizers.Definitions.Portuguese; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseCommonDateTimeParserConfiguration.cs index 1c20072293..4d0946691a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseCommonDateTimeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using Microsoft.Recognizers.Definitions.Portuguese; using Microsoft.Recognizers.Text.DateTime.Portuguese.Utilities; @@ -26,22 +29,35 @@ public PortugueseCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); - CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(); - IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Portuguese.OrdinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(numConfig); + IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Portuguese.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new PortugueseNumberParserConfiguration(numConfig)); - NumberParser = new BaseNumberParser(new PortugueseNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + // Do not change order. The order of initialization can lead to side-effects DateExtractor = new BaseDateExtractor(new PortugueseDateExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new PortugueseHolidayExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new PortugueseTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new PortugueseDateTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new PortugueseDurationExtractorConfiguration(this)); DatePeriodExtractor = new BaseDatePeriodExtractor(new PortugueseDatePeriodExtractorConfiguration(this)); TimePeriodExtractor = new BaseTimePeriodExtractor(new PortugueseTimePeriodExtractorConfiguration(this)); DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new PortugueseDateTimePeriodExtractorConfiguration(this)); + + DurationParser = new BaseDurationParser(new PortugueseDurationParserConfiguration(this)); DateParser = new BaseDateParser(new PortugueseDateParserConfiguration(this)); + HolidayTimeParser = new BaseHolidayParser(new PortugueseHolidayParserConfiguration(this)); TimeParser = new BaseTimeParser(new PortugueseTimeParserConfiguration(this)); DateTimeParser = new BaseDateTimeParser(new PortugueseDateTimeParserConfiguration(this)); - DurationParser = new BaseDurationParser(new PortugueseDurationParserConfiguration(this)); DatePeriodParser = new BaseDatePeriodParser(new PortugueseDatePeriodParserConfiguration(this)); TimePeriodParser = new BaseTimePeriodParser(new PortugueseTimePeriodParserConfiguration(this)); DateTimePeriodParser = new BaseDateTimePeriodParser(new PortugueseDateTimePeriodParserConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDateParserConfiguration.cs index 92387bb48e..0e33336bbc 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDateParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDateParserConfiguration.cs @@ -1,7 +1,10 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; -using System.Diagnostics.CodeAnalysis; using System.Text.RegularExpressions; + using Microsoft.Recognizers.Definitions.Portuguese; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -35,12 +38,13 @@ public PortugueseDateParserConfiguration(ICommonDateTimeParserConfiguration conf StrictRelativeRegex = PortugueseDateExtractorConfiguration.StrictRelativeRegex; YearSuffix = PortugueseDateExtractorConfiguration.YearSuffix; RelativeWeekDayRegex = PortugueseDateExtractorConfiguration.RelativeWeekDayRegex; + BeforeAfterRegex = PortugueseDateExtractorConfiguration.BeforeAfterRegex; - RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexFlags); - NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); - PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); - UpcomingPrefixRegex = new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags); - PastPrefixRegex = new Regex(DateTimeDefinitions.PastPrefixRegex, RegexFlags); + RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + UpcomingPrefixRegex = new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags, RegexTimeOut); + PastPrefixRegex = new Regex(DateTimeDefinitions.PastPrefixRegex, RegexFlags, RegexTimeOut); DayOfMonth = config.DayOfMonth; DayOfWeek = config.DayOfWeek; @@ -50,9 +54,10 @@ public PortugueseDateParserConfiguration(ICommonDateTimeParserConfiguration conf OrdinalExtractor = config.OrdinalExtractor; CardinalExtractor = config.CardinalExtractor; NumberParser = config.NumberParser; - DurationExtractor = config.DurationExtractor; DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; DurationParser = config.DurationParser; + HolidayParser = new BaseHolidayParser(new PortugueseHolidayParserConfiguration(this)); UnitMap = config.UnitMap; UtilityConfiguration = config.UtilityConfiguration; @@ -79,6 +84,8 @@ public PortugueseDateParserConfiguration(ICommonDateTimeParserConfiguration conf public IDateTimeParser DurationParser { get; } + public IDateTimeParser HolidayParser { get; } + public IImmutableDictionary UnitMap { get; } public IEnumerable DateRegexes { get; } @@ -127,6 +134,10 @@ public PortugueseDateParserConfiguration(ICommonDateTimeParserConfiguration conf public Regex PastPrefixRegex { get; } + public Regex BeforeAfterRegex { get; } + + public Regex TasksModeDurationToDatePatterns { get; } + public IImmutableDictionary DayOfMonth { get; } public IImmutableDictionary DayOfWeek { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDatePeriodParserConfiguration.cs index e5bced8ef1..188cbdd0fc 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDatePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; @@ -11,19 +15,22 @@ public class PortugueseDatePeriodParserConfiguration : BaseDateTimeOptionsConfig { // TODO: config this according to English public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeRegex = - new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRangeRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialYearPrefixes = + new Regex(DateTimeDefinitions.SpecialYearPrefixes, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -77,6 +84,10 @@ public PortugueseDatePeriodParserConfiguration(ICommonDateTimeParserConfiguratio MoreThanRegex = PortugueseDatePeriodExtractorConfiguration.MoreThanRegex; CenturySuffixRegex = PortugueseDatePeriodExtractorConfiguration.CenturySuffixRegex; NowRegex = PortugueseDatePeriodExtractorConfiguration.NowRegex; + FirstLastRegex = PortugueseDatePeriodExtractorConfiguration.FirstLastRegex; + OfYearRegex = PortugueseDatePeriodExtractorConfiguration.OfYearRegex; + SpecialDayRegex = PortugueseDateExtractorConfiguration.SpecialDayRegex; + TodayNowRegex = new Regex(DateTimeDefinitions.TodayNowRegex, RegexOptions.Singleline); UnitMap = config.UnitMap; CardinalMap = config.CardinalMap; DayOfMonth = config.DayOfMonth; @@ -186,6 +197,14 @@ public PortugueseDatePeriodParserConfiguration(ICommonDateTimeParserConfiguratio public Regex NowRegex { get; } + public Regex SpecialDayRegex { get; } + + public Regex TodayNowRegex { get; } + + public Regex FirstLastRegex { get; } + + public Regex OfYearRegex { get; } + Regex IDatePeriodParserConfiguration.NextPrefixRegex => NextPrefixRegex; Regex IDatePeriodParserConfiguration.PreviousPrefixRegex => PreviousPrefixRegex; @@ -196,6 +215,8 @@ public PortugueseDatePeriodParserConfiguration(ICommonDateTimeParserConfiguratio Regex IDatePeriodParserConfiguration.UnspecificEndOfRangeRegex => UnspecificEndOfRangeRegex; + Regex IDatePeriodParserConfiguration.AmbiguousPointRangeRegex => null; + bool IDatePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; public IImmutableDictionary UnitMap { get; } @@ -270,38 +291,45 @@ public bool IsLastCardinal(string text) public bool IsMonthOnly(string text) { var trimmedText = text.Trim().Normalized(DateTimeDefinitions.SpecialCharactersEquivalent); - return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); } public bool IsMonthToDate(string text) { var trimmedText = text.Trim().Normalized(DateTimeDefinitions.SpecialCharactersEquivalent); - return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } public bool IsWeekend(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); } public bool IsWeekOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o)) && - !DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) && + !DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); + } + + public bool IsFortnight(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.FortnightTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); } public bool IsYearOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (DateTimeDefinitions.YearTerms.Any(o => trimmedText.Contains(o)) && SpecialYearPrefixes.IsMatch(trimmedText)); } public bool IsYearToDate(string text) { var trimmedText = text.Trim().Normalized(DateTimeDefinitions.SpecialCharactersEquivalent); - return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDateTimeParserConfiguration.cs index 3bc3458238..0ba928a05e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDateTimeParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; @@ -20,10 +24,12 @@ public PortugueseDateTimeParserConfiguration(ICommonDateTimeParserConfiguration TimeExtractor = config.TimeExtractor; DateParser = config.DateParser; TimeParser = config.TimeParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; NowRegex = PortugueseDateTimeExtractorConfiguration.NowRegex; - AMTimeRegex = new Regex(DateTimeDefinitions.AmTimeRegex, RegexFlags); - PMTimeRegex = new Regex(DateTimeDefinitions.PmTimeRegex, RegexFlags); + AMTimeRegex = new Regex(DateTimeDefinitions.AmTimeRegex, RegexFlags, RegexTimeOut); + PMTimeRegex = new Regex(DateTimeDefinitions.PmTimeRegex, RegexFlags, RegexTimeOut); SimpleTimeOfTodayAfterRegex = PortugueseDateTimeExtractorConfiguration.SimpleTimeOfTodayAfterRegex; SimpleTimeOfTodayBeforeRegex = PortugueseDateTimeExtractorConfiguration.SimpleTimeOfTodayBeforeRegex; SpecificTimeOfDayRegex = PortugueseDateTimeExtractorConfiguration.SpecificTimeOfDayRegex; @@ -93,17 +99,25 @@ public PortugueseDateTimeParserConfiguration(ICommonDateTimeParserConfiguration public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + public int GetHour(string text, int hour) { var trimmedText = text.Trim().Normalized(DateTimeDefinitions.SpecialCharactersEquivalent); int result = hour; - // TODO: Replace with a regex - if ((trimmedText.EndsWith("manha") || trimmedText.EndsWith("madrugada")) && hour >= Constants.HalfDayHourCount) + // @TODO move hardcoded values to resources file + if ((trimmedText.EndsWith("manha", StringComparison.Ordinal) || + trimmedText.EndsWith("madrugada", StringComparison.Ordinal)) && + hour >= Constants.HalfDayHourCount) { result -= Constants.HalfDayHourCount; } - else if (!(trimmedText.EndsWith("manha") || trimmedText.EndsWith("madrugada")) && hour < Constants.HalfDayHourCount) + else if (!(trimmedText.EndsWith("manha", StringComparison.Ordinal) || + trimmedText.EndsWith("madrugada", StringComparison.Ordinal)) && + hour < Constants.HalfDayHourCount) { result += Constants.HalfDayHourCount; } @@ -115,16 +129,22 @@ public bool GetMatchedNowTimex(string text, out string timex) { var trimmedText = text.Trim().Normalized(DateTimeDefinitions.SpecialCharactersEquivalent); - if (trimmedText.EndsWith("agora") || trimmedText.EndsWith("mesmo") || trimmedText.EndsWith("momento")) + // @TODO move hardcoded values to resources file + if (trimmedText.EndsWith("agora", StringComparison.Ordinal) || + trimmedText.EndsWith("mesmo", StringComparison.Ordinal) || + trimmedText.EndsWith("momento", StringComparison.Ordinal)) { timex = "PRESENT_REF"; } - else if (trimmedText.EndsWith("possivel") || trimmedText.EndsWith("possa") || - trimmedText.EndsWith("possas") || trimmedText.EndsWith("possamos") || trimmedText.EndsWith("possam")) + else if (trimmedText.EndsWith("possivel", StringComparison.Ordinal) || + trimmedText.EndsWith("possa", StringComparison.Ordinal) || + trimmedText.EndsWith("possas", StringComparison.Ordinal) || + trimmedText.EndsWith("possamos", StringComparison.Ordinal) || + trimmedText.EndsWith("possam", StringComparison.Ordinal)) { timex = "FUTURE_REF"; } - else if (trimmedText.EndsWith("mente")) + else if (trimmedText.EndsWith("mente", StringComparison.Ordinal)) { timex = "PAST_REF"; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDateTimePeriodParserConfiguration.cs index e333603db3..2269545f71 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDateTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDateTimePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; @@ -11,7 +15,8 @@ public class PortugueseDateTimePeriodParserConfiguration : BaseDateTimeOptionsCo public PortugueseDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { - TokenBeforeDate = Definitions.Portuguese.DateTimeDefinitions.TokenBeforeDate; + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; DateExtractor = config.DateExtractor; TimeExtractor = config.TimeExtractor; @@ -26,8 +31,11 @@ public PortugueseDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigur TimePeriodParser = config.TimePeriodParser; DurationParser = config.DurationParser; TimeZoneParser = config.TimeZoneParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; PureNumberFromToRegex = PortugueseTimePeriodExtractorConfiguration.PureNumFromTo; + HyphenDateRegex = PortugueseDateTimePeriodExtractorConfiguration.HyphenDateRegex; PureNumberBetweenAndRegex = PortugueseTimePeriodExtractorConfiguration.PureNumBetweenAnd; SpecificTimeOfDayRegex = PortugueseDateTimeExtractorConfiguration.SpecificTimeOfDayRegex; TimeOfDayRegex = PortugueseDateTimeExtractorConfiguration.TimeOfDayRegex; @@ -51,6 +59,8 @@ public PortugueseDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigur public string TokenBeforeDate { get; } + public string TokenBeforeTime { get; } + public IDateExtractor DateExtractor { get; } public IDateTimeExtractor TimeExtractor { get; } @@ -79,6 +89,8 @@ public PortugueseDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigur public Regex PureNumberFromToRegex { get; } + public Regex HyphenDateRegex { get; } + public Regex PureNumberBetweenAndRegex { get; } public Regex SpecificTimeOfDayRegex { get; } @@ -115,55 +127,57 @@ public PortugueseDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigur bool IDateTimePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + Regex IDateTimePeriodParserConfiguration.TasksmodeMealTimeofDayRegex => null; + public IImmutableDictionary UnitMap { get; } public IImmutableDictionary Numbers { get; } - public bool GetMatchedTimeRange(string text, out string timeStr, out int beginHour, out int endHour, out int endMin) + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + + public bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMin) { var trimmedText = text.Trim().Normalized(DateTimeDefinitions.SpecialCharactersEquivalent); + beginHour = 0; endHour = 0; endMin = 0; - // TODO: modify it according to the corresponding function in English part - if (trimmedText.EndsWith("madrugada")) + // @TODO move hardcoded values to resources file + if (trimmedText.EndsWith("madrugada", StringComparison.Ordinal)) { - timeStr = "TDA"; - beginHour = 4; - endHour = 8; + todSymbol = Constants.EarlyMorning; } - else if (trimmedText.EndsWith("manha")) + else if (trimmedText.EndsWith("manha", StringComparison.Ordinal)) { - timeStr = "TMO"; - beginHour = 8; - endHour = Constants.HalfDayHourCount; + todSymbol = Constants.Morning; } else if (trimmedText.Contains("passado o meio dia") || trimmedText.Contains("depois do meio dia")) { - timeStr = "TAF"; - beginHour = Constants.HalfDayHourCount; - endHour = 16; + todSymbol = Constants.Afternoon; } - else if (trimmedText.EndsWith("tarde")) + else if (trimmedText.EndsWith("tarde", StringComparison.Ordinal)) { - timeStr = "TEV"; - beginHour = 16; - endHour = 20; + todSymbol = Constants.Evening; } - else if (trimmedText.EndsWith("noite")) + else if (trimmedText.EndsWith("noite", StringComparison.Ordinal)) { - timeStr = "TNI"; - beginHour = 20; - endHour = 23; - endMin = 59; + todSymbol = Constants.Night; } else { - timeStr = null; + todSymbol = null; return false; } + var parseResult = TimexUtility.ResolveTimeOfDay(todSymbol); + todSymbol = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMin = parseResult.EndMin; + return true; } @@ -172,9 +186,9 @@ public int GetSwiftPrefix(string text) var trimmedText = text.Trim(); var swift = 0; - // TODO: Replace with a regex + // @TODO move hardcoded values to resources file if (PortugueseDatePeriodParserConfiguration.PreviousPrefixRegex.IsMatch(trimmedText) || - trimmedText.Equals("anoche")) + trimmedText.Equals("anoche", StringComparison.Ordinal)) { swift = -1; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDurationParserConfiguration.cs index 36161620e6..1d60ad88a1 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDurationParserConfiguration.cs @@ -1,10 +1,19 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Portuguese; namespace Microsoft.Recognizers.Text.DateTime.Portuguese { public class PortugueseDurationParserConfiguration : BaseDateTimeOptionsConfiguration, IDurationParserConfiguration { + public static readonly Regex PrefixArticleRegex = + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public PortugueseDurationParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -36,12 +45,14 @@ public PortugueseDurationParserConfiguration(ICommonDateTimeParserConfiguration public IParser NumberParser { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public Regex NumberCombinedWithUnit { get; } public Regex AnUnitRegex { get; } + Regex IDurationParserConfiguration.PrefixArticleRegex => PrefixArticleRegex; + public Regex DuringRegex { get; } public Regex AllDateUnitRegex { get; } @@ -62,6 +73,8 @@ public PortugueseDurationParserConfiguration(ICommonDateTimeParserConfiguration public Regex SpecialNumberUnitRegex { get; } + bool IDurationParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public IImmutableDictionary UnitMap { get; } public IImmutableDictionary UnitValueMap { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseHolidayParserConfiguration.cs index 66d246cf65..05f6149574 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseHolidayParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseHolidayParserConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; using Microsoft.Recognizers.Definitions.Portuguese; @@ -25,8 +28,7 @@ public override int GetSwiftYear(string text) { swift = 1; } - - if (PortugueseDatePeriodParserConfiguration.PreviousPrefixRegex.IsMatch(trimmedText)) + else if (PortugueseDatePeriodParserConfiguration.PreviousPrefixRegex.IsMatch(trimmedText)) { swift = -1; } @@ -62,7 +64,7 @@ protected override IDictionary> InitHolidayFuncs() { "pai", FathersDay }, { "mae", MothersDay }, { "acaodegracas", ThanksgivingDay }, - { "trabalho", LabourDay }, + { "trabalho", InternationalWorkersDay }, { "pascoa", Easter }, { "natal", ChristmasDay }, { "vesperadenatal", ChristmasEve }, @@ -92,6 +94,6 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject TeacherDay(int year) => new DateObject(year, 9, 11); - private static DateObject Easter(int year) => DateObject.MinValue; + private static DateObject Easter(int year) => HolidayFunctions.CalculateHolidayByEaster(year); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseMergedParserConfiguration.cs index ef4ac26526..6f39121846 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseMergedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseMergedParserConfiguration.cs @@ -1,5 +1,8 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Portuguese; using Microsoft.Recognizers.Text.Matcher; namespace Microsoft.Recognizers.Text.DateTime.Portuguese @@ -16,6 +19,7 @@ public PortugueseMergedParserConfiguration(IDateTimeOptionsConfiguration config) EqualRegex = PortugueseMergedExtractorConfiguration.EqualRegex; SuffixAfter = PortugueseMergedExtractorConfiguration.SuffixAfterRegex; YearRegex = PortugueseDatePeriodExtractorConfiguration.YearRegex; + SuperfluousWordMatcher = PortugueseMergedExtractorConfiguration.SuperfluousWordMatcher; DatePeriodParser = new BaseDatePeriodParser(new PortugueseDatePeriodParserConfiguration(this)); @@ -45,5 +49,7 @@ public PortugueseMergedParserConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeParser HolidayParser { get; } public StringMatcher SuperfluousWordMatcher { get; } + + bool IMergedParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseSetParserConfiguration.cs index 8fef28824f..fad33023c8 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseSetParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseSetParserConfiguration.cs @@ -1,4 +1,9 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; @@ -8,6 +13,35 @@ namespace Microsoft.Recognizers.Text.DateTime.Portuguese { public class PortugueseSetParserConfiguration : BaseDateTimeOptionsConfiguration, ISetParserConfiguration { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex DayTypeRegex = + new Regex(DateTimeDefinitions.DayTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex WeekTypeRegex = + new Regex(DateTimeDefinitions.WeekTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex BiWeekTypeRegex = + new Regex(DateTimeDefinitions.BiWeekTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex MonthTypeRegex = + new Regex(DateTimeDefinitions.MonthTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex BiMonthTypeRegex = + new Regex(DateTimeDefinitions.BiMonthTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex QuarterTypeRegex = + new Regex(DateTimeDefinitions.QuarterTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex SemiAnnualTypeRegex = + new Regex(DateTimeDefinitions.SemiAnnualTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex YearTypeRegex = + new Regex(DateTimeDefinitions.YearTypeRegex, RegexFlags, RegexTimeOut); + + // pass FutureTerms as List to ReplaceValueInTextWithFutTerm function + private static readonly List ThisTerms = (List)DateTimeDefinitions.ThisTerms; + public PortugueseSetParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -82,25 +116,45 @@ public bool GetMatchedDailyTimex(string text, out string timex) { var trimmedText = text.Trim().Normalized(DateTimeDefinitions.SpecialCharactersEquivalent); - if (trimmedText.EndsWith("diario") || trimmedText.EndsWith("diaria") || trimmedText.EndsWith("diariamente")) + float durationLength = 1; // Default value + float multiplier = 1; + string durationType; + + if (DayTypeRegex.IsMatch(trimmedText)) { - timex = "P1D"; + durationType = Constants.TimexDay; } - else if (trimmedText.Equals("semanalmente")) + else if (WeekTypeRegex.IsMatch(trimmedText)) { - timex = "P1W"; + durationType = Constants.TimexWeek; } - else if (trimmedText.Equals("quinzenalmente")) + else if (BiWeekTypeRegex.IsMatch(trimmedText)) { - timex = "P2W"; + durationType = Constants.TimexWeek; + multiplier = 2; } - else if (trimmedText.Equals("mensalmente")) + else if (MonthTypeRegex.IsMatch(trimmedText)) { - timex = "P1M"; + durationType = Constants.TimexMonth; } - else if (trimmedText.Equals("anualmente")) + else if (BiMonthTypeRegex.IsMatch(trimmedText)) { - timex = "P1Y"; + durationType = Constants.TimexMonth; + multiplier = 2; + } + else if (QuarterTypeRegex.IsMatch(trimmedText)) + { + durationType = Constants.TimexMonth; + multiplier = 3; + } + else if (SemiAnnualTypeRegex.IsMatch(trimmedText)) + { + durationType = Constants.TimexYear; + multiplier = 0.5f; + } + else if (YearTypeRegex.IsMatch(trimmedText)) + { + durationType = Constants.TimexYear; } else { @@ -108,6 +162,8 @@ public bool GetMatchedDailyTimex(string text, out string timex) return false; } + timex = TimexUtility.GenerateSetTimex(durationType, durationLength, multiplier); + return true; } @@ -115,19 +171,20 @@ public bool GetMatchedUnitTimex(string text, out string timex) { var trimmedText = text.Trim().Normalized(DateTimeDefinitions.SpecialCharactersEquivalent); - if (trimmedText.Equals("dia") || trimmedText.Equals("dias")) + // @TODO move hardcoded values to resources file + if (trimmedText.Equals("dia", StringComparison.Ordinal) || trimmedText.Equals("dias", StringComparison.Ordinal)) { timex = "P1D"; } - else if (trimmedText.Equals("semana") || trimmedText.Equals("semanas")) + else if (trimmedText.Equals("semana", StringComparison.Ordinal) || trimmedText.Equals("semanas", StringComparison.Ordinal)) { timex = "P1W"; } - else if (trimmedText.Equals("mes") || trimmedText.Equals("meses")) + else if (trimmedText.Equals("mes", StringComparison.Ordinal) || trimmedText.Equals("meses", StringComparison.Ordinal)) { timex = "P1M"; } - else if (trimmedText.Equals("ano") || trimmedText.Equals("anos")) + else if (trimmedText.Equals("ano", StringComparison.Ordinal) || trimmedText.Equals("anos", StringComparison.Ordinal)) { timex = "P1Y"; } @@ -141,5 +198,7 @@ public bool GetMatchedUnitTimex(string text, out string timex) } public string WeekDayGroupMatchString(Match match) => SetHandler.WeekDayGroupMatchString(match); + + public string ReplaceValueInTextWithFutTerm(string text, string value) => TasksModeSetHandler.ReplaceValueInTextWithFutTerm(text, value, ThisTerms); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseTimeParserConfiguration.cs index dac06c962e..d6f8b7ba2c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseTimeParserConfiguration.cs @@ -1,5 +1,10 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; @@ -40,16 +45,17 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha var deltaMin = 0; var trimmedPrefix = prefix.Trim(); - if (trimmedPrefix.StartsWith("quarto") || trimmedPrefix.StartsWith("e um quarto") || - trimmedPrefix.StartsWith("quinze") || trimmedPrefix.StartsWith("e quinze")) + // @TODO move hardcoded values to resources file + if (trimmedPrefix.StartsWith("quarto", StringComparison.Ordinal) || trimmedPrefix.StartsWith("e um quarto", StringComparison.Ordinal) || + trimmedPrefix.StartsWith("quinze", StringComparison.Ordinal) || trimmedPrefix.StartsWith("e quinze", StringComparison.Ordinal)) { deltaMin = 15; } - else if (trimmedPrefix.StartsWith("menos um quarto")) + else if (trimmedPrefix.StartsWith("menos um quarto", StringComparison.Ordinal)) { deltaMin = -15; } - else if (trimmedPrefix.StartsWith("meia") || trimmedPrefix.StartsWith("e meia")) + else if (trimmedPrefix.StartsWith("meia", StringComparison.Ordinal) || trimmedPrefix.StartsWith("e meia", StringComparison.Ordinal)) { deltaMin = 30; } @@ -59,7 +65,7 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha var minStr = match.Groups["deltamin"].Value; if (!string.IsNullOrWhiteSpace(minStr)) { - deltaMin = int.Parse(minStr); + deltaMin = int.Parse(minStr, CultureInfo.InvariantCulture); } else { @@ -68,15 +74,15 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha } } - if (trimmedPrefix.EndsWith("passadas") || trimmedPrefix.EndsWith("pasados") || - trimmedPrefix.EndsWith("depois das") || trimmedPrefix.EndsWith("depois da") || trimmedPrefix.EndsWith("depois do") || - trimmedPrefix.EndsWith("passadas as") || trimmedPrefix.EndsWith("passadas das")) + if (trimmedPrefix.EndsWith("passadas", StringComparison.Ordinal) || trimmedPrefix.EndsWith("pasados", StringComparison.Ordinal) || + trimmedPrefix.EndsWith("depois das", StringComparison.Ordinal) || trimmedPrefix.EndsWith("depois da", StringComparison.Ordinal) || trimmedPrefix.EndsWith("depois do", StringComparison.Ordinal) || + trimmedPrefix.EndsWith("passadas as", StringComparison.Ordinal) || trimmedPrefix.EndsWith("passadas das", StringComparison.Ordinal)) { // deltaMin it's positive } - else if (trimmedPrefix.EndsWith("para a") || trimmedPrefix.EndsWith("para as") || - trimmedPrefix.EndsWith("pra") || trimmedPrefix.EndsWith("pras") || - trimmedPrefix.EndsWith("antes da") || trimmedPrefix.EndsWith("antes das")) + else if (trimmedPrefix.EndsWith("para a", StringComparison.Ordinal) || trimmedPrefix.EndsWith("para as", StringComparison.Ordinal) || + trimmedPrefix.EndsWith("pra", StringComparison.Ordinal) || trimmedPrefix.EndsWith("pras", StringComparison.Ordinal) || + trimmedPrefix.EndsWith("antes da", StringComparison.Ordinal) || trimmedPrefix.EndsWith("antes das", StringComparison.Ordinal)) { deltaMin = -deltaMin; } @@ -93,11 +99,11 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha public void AdjustBySuffix(string suffix, ref int hour, ref int min, ref bool hasMin, ref bool hasAm, ref bool hasPm) { - var trimedSuffix = suffix.Trim(); - AdjustByPrefix(trimedSuffix, ref hour, ref min, ref hasMin); + var trimmedSuffix = suffix.Trim(); + AdjustByPrefix(trimmedSuffix, ref hour, ref min, ref hasMin); var deltaHour = 0; - var match = PortugueseTimeExtractorConfiguration.TimeSuffix.MatchExact(trimedSuffix, trim: true); + var match = PortugueseTimeExtractorConfiguration.TimeSuffix.MatchExact(trimmedSuffix, trim: true); if (match.Success) { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseTimePeriodParserConfiguration.cs index 7fef66a4da..2967e16d95 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseTimePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; @@ -54,7 +58,7 @@ public PortugueseTimePeriodParserConfiguration(ICommonDateTimeParserConfiguratio public IDateTimeUtilityConfiguration UtilityConfiguration { get; } - public bool GetMatchedTimexRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) + public bool GetMatchedTimeRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) { var trimmedText = text.Trim(); @@ -63,23 +67,24 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou endMin = 0; var timeOfDay = string.Empty; - if (DateTimeDefinitions.EarlyMorningTermList.Any(o => trimmedText.EndsWith(o))) + + if (DateTimeDefinitions.EarlyMorningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.EarlyMorning; } - else if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Morning; } - else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Afternoon; } - else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Evening; } - else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Night; } @@ -89,7 +94,7 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou return false; } - var parseResult = TimexUtility.ParseTimeOfDay(timeOfDay); + var parseResult = TimexUtility.ResolveTimeOfDay(timeOfDay); timex = parseResult.Timex; beginHour = parseResult.BeginHour; endHour = parseResult.EndHour; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Utilities/PortugueseDatetimeUtilityConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Utilities/PortugueseDatetimeUtilityConfiguration.cs index e03e9e3ea9..e347a5cd74 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Utilities/PortugueseDatetimeUtilityConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Utilities/PortugueseDatetimeUtilityConfiguration.cs @@ -1,74 +1,32 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Portuguese.Utilities { - public class PortugueseDatetimeUtilityConfiguration : IDateTimeUtilityConfiguration + public class PortugueseDatetimeUtilityConfiguration : BaseDatetimeUtilityConfiguration { - public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoRegex, RegexFlags); - - public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); - - public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); - - public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); - - public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); - - public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); - - public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); - - public static readonly Regex AmPmDescRegex = - new Regex(DateTimeDefinitions.AmPmDescRegex, RegexFlags); - - public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); - - public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); - - public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); - - public static readonly Regex CommonDatePrefixRegex = - new Regex(DateTimeDefinitions.CommonDatePrefixRegex, RegexFlags); - - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - - Regex IDateTimeUtilityConfiguration.LaterRegex => LaterRegex; - - Regex IDateTimeUtilityConfiguration.AgoRegex => AgoRegex; - - Regex IDateTimeUtilityConfiguration.InConnectorRegex => InConnectorRegex; - - Regex IDateTimeUtilityConfiguration.SinceYearSuffixRegex => SinceYearSuffixRegex; - - Regex IDateTimeUtilityConfiguration.WithinNextPrefixRegex => WithinNextPrefixRegex; - - Regex IDateTimeUtilityConfiguration.AmDescRegex => AmDescRegex; - - Regex IDateTimeUtilityConfiguration.PmDescRegex => PmDescRegex; - - Regex IDateTimeUtilityConfiguration.AmPmDescRegex => AmPmDescRegex; - - Regex IDateTimeUtilityConfiguration.RangeUnitRegex => RangeUnitRegex; - - Regex IDateTimeUtilityConfiguration.TimeUnitRegex => TimeUnitRegex; - - Regex IDateTimeUtilityConfiguration.DateUnitRegex => DateUnitRegex; - - Regex IDateTimeUtilityConfiguration.CommonDatePrefixRegex => CommonDatePrefixRegex; - - bool IDateTimeUtilityConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public PortugueseDatetimeUtilityConfiguration() + : base( + DateTimeDefinitions.AgoRegex, + DateTimeDefinitions.LaterRegex, + DateTimeDefinitions.InConnectorRegex, + DateTimeDefinitions.SinceYearSuffixRegex, + DateTimeDefinitions.WithinNextPrefixRegex, + DateTimeDefinitions.AmDescRegex, + DateTimeDefinitions.PmDescRegex, + DateTimeDefinitions.AmPmDescRegex, + DateTimeDefinitions.RangeUnitRegex, + DateTimeDefinitions.TimeUnitRegex, + DateTimeDefinitions.DateUnitRegex, + DateTimeDefinitions.CommonDatePrefixRegex, + DateTimeDefinitions.RangePrefixRegex, + RegexOptions.Singleline | RegexOptions.ExplicitCapture, + DateTimeDefinitions.CheckBothBeforeAfter) + { + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Properties/AssemblyInfo.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Properties/AssemblyInfo.cs index 543c5dbdbc..47acccdab0 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Properties/AssemblyInfo.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Properties/AssemblyInfo.cs @@ -1,4 +1,7 @@ -using System.Reflection; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Reflection; using System.Runtime.InteropServices; // General Information about an assembly is controlled through the following diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateExtractorConfiguration.cs index f678c40728..6a5e00c420 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; @@ -14,73 +17,73 @@ namespace Microsoft.Recognizers.Text.DateTime.Spanish public class SpanishDateExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateExtractorConfiguration { public static readonly Regex MonthRegex = - new Regex(DateTimeDefinitions.MonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex OnRegex = - new Regex(DateTimeDefinitions.OnRegex, RegexFlags); + new Regex(DateTimeDefinitions.OnRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelaxedOnRegex = - new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisRegex = - new Regex(DateTimeDefinitions.ThisRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisRegex, RegexFlags, RegexTimeOut); public static readonly Regex LastDateRegex = - new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextDateRegex = - new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDayRegex = - new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDayWithNumRegex = - new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayOfMonthRegex = - new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDateRegex = - new Regex(DateTimeDefinitions.SpecialDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeWeekDayRegex = - new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex ForTheRegex = - new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags); + new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayOfMothRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex StrictRelativeRegex = - new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixArticleRegex = - new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeConnectorSymbolRegex = - new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags); + new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] ImplicitDateList = { @@ -89,34 +92,37 @@ public class SpanishDateExtractorConfiguration : BaseDateTimeOptionsConfiguratio }; public static readonly Regex OfMonth = - new Regex(DateTimeDefinitions.OfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.OfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthEnd = - new Regex(DateTimeDefinitions.MonthEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayEnd = - new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayStart = - new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BeforeAfterRegex = + new Regex(DateTimeDefinitions.BeforeAfterRegex, RegexFlags, RegexTimeOut); public static readonly ImmutableDictionary DayOfWeek = DateTimeDefinitions.DayOfWeek.ToImmutableDictionary(); @@ -127,43 +133,54 @@ public class SpanishDateExtractorConfiguration : BaseDateTimeOptionsConfiguratio public SpanishDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Spanish.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new SpanishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Spanish.OrdinalExtractor.GetInstance(numConfig); + NumberParser = new BaseNumberParser(new SpanishNumberParserConfiguration(numConfig)); + DurationExtractor = new BaseDurationExtractor(new SpanishDurationExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new SpanishHolidayExtractorConfiguration(this)); UtilityConfiguration = new SpanishDatetimeUtilityConfiguration(); // 3-23-2017 - var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags); + var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags, RegexTimeOut); // 23-3-2015 - var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags); + var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags, RegexTimeOut); // el 1.3 - var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags); + var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags, RegexTimeOut); // el 24-12 - var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags); + var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags, RegexTimeOut); // 7/23 - var dateRegex7 = new Regex(DateTimeDefinitions.DateExtractor7, RegexFlags); + var dateRegex7 = new Regex(DateTimeDefinitions.DateExtractor7, RegexFlags, RegexTimeOut); // 23/7 - var dateRegex9 = new Regex(DateTimeDefinitions.DateExtractor9, RegexFlags); + var dateRegex9 = new Regex(DateTimeDefinitions.DateExtractor9, RegexFlags, RegexTimeOut); // 2015-12-23 - var dateRegex10 = new Regex(DateTimeDefinitions.DateExtractor10, RegexFlags); + var dateRegex10 = new Regex(DateTimeDefinitions.DateExtractor10, RegexFlags, RegexTimeOut); DateRegexList = new List { // (domingo,)? 5 de Abril - new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags, RegexTimeOut), // (domingo,)? 5 de Abril 5, 2016 - new Regex(DateTimeDefinitions.DateExtractor2, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor2, RegexFlags, RegexTimeOut), // (domingo,)? 6 de Abril - new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags, RegexTimeOut), }; var enableDmy = DmyDateFormat || @@ -184,6 +201,8 @@ public SpanishDateExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DurationExtractor { get; } + public IDateTimeExtractor HolidayExtractor { get; } + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } IEnumerable IDateExtractorConfiguration.ImplicitDateList => ImplicitDateList; @@ -231,5 +250,7 @@ public SpanishDateExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IDateExtractorConfiguration.RangeUnitRegex => RangeUnitRegex; Regex IDateExtractorConfiguration.RangeConnectorSymbolRegex => RangeConnectorSymbolRegex; + + Regex IDateExtractorConfiguration.BeforeAfterRegex => BeforeAfterRegex; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDatePeriodExtractorConfiguration.cs index 9b84f67af1..270690ccbc 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDatePeriodExtractorConfiguration.cs @@ -1,9 +1,13 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Spanish; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Spanish; +using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Spanish { @@ -11,169 +15,172 @@ public class SpanishDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfig { // base regexes public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); - - public static readonly Regex AndRegex = - new Regex(DateTimeDefinitions.AndRegex, RegexFlags); + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); public static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex IllegalYearRegex = - new Regex(BaseDateTime.IllegalYearRegex, RegexFlags); + new Regex(BaseDateTime.IllegalYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthRegex = - new Regex(DateTimeDefinitions.MonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthSuffixRegex = - new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PastRegex = - new Regex(DateTimeDefinitions.PastRegex, RegexFlags); + new Regex(DateTimeDefinitions.PastRegex, RegexFlags, RegexTimeOut); public static readonly Regex FutureRegex = - new Regex(DateTimeDefinitions.FutureRegex, RegexFlags); + new Regex(DateTimeDefinitions.FutureRegex, RegexFlags, RegexTimeOut); public static readonly Regex FutureSuffixRegex = - new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags, RegexTimeOut); // composite regexes public static readonly Regex SimpleCasesRegex = - new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthFrontSimpleCasesRegex = - new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthFrontBetweenRegex = - new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags, RegexTimeOut); public static readonly Regex DayBetweenRegex = - new Regex(DateTimeDefinitions.DayBetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayBetweenRegex, RegexFlags, RegexTimeOut); // TODO: modify it according to the related regex in English public static readonly Regex OneWordPeriodRegex = - new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthWithYearRegex = - new Regex(DateTimeDefinitions.MonthWithYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthWithYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumWithYearRegex = - new Regex(DateTimeDefinitions.MonthNumWithYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumWithYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfMonthRegex = - new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfYearRegex = - new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex FollowedDateUnit = - new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithDateUnit = - new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags, RegexTimeOut); public static readonly Regex QuarterRegex = - new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags, RegexTimeOut); public static readonly Regex QuarterRegexYearFront = - new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags, RegexTimeOut); public static readonly Regex AllHalfYearRegex = - new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex SeasonRegex = - new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags); + new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags, RegexTimeOut); public static readonly Regex WhichWeekRegex = - new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags); + new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfRegex = - new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthOfRegex = - new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterEarlyPeriodRegex = - new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut); // TODO: add this regex, let it correspond to the one in English public static readonly Regex RestOfDateRegex = - new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags, RegexTimeOut); // TODO: add this regex, let it correspond to the one in English public static readonly Regex WeekWithWeekDayRangeRegex = - new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPlusNumberRegex = - new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex DecadeWithCenturyRegex = - new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags); + new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPeriodRegex = - new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex ComplexDatePeriodRegex = - new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDecadeRegex = - new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags, RegexTimeOut); public static readonly Regex ReferenceDatePeriodRegex = - new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoRegex, RegexFlags); + new Regex(DateTimeDefinitions.AgoRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterRegex, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex CenturySuffixRegex = - new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FirstLastRegex = + new Regex(DateTimeDefinitions.FirstLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OfYearRegex = + new Regex(DateTimeDefinitions.OfYearRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex FromRegex = - new Regex(DateTimeDefinitions.FromRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); - private static readonly Regex ConnectorAndRegex = - new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags); + private static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); private static readonly Regex BetweenRegex = - new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags, RegexTimeOut); private static readonly Regex[] SimpleCasesRegexes = { @@ -190,6 +197,7 @@ public class SpanishDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfig MonthFrontSimpleCasesRegex, QuarterRegex, QuarterRegexYearFront, + AllHalfYearRegex, SeasonRegex, RestOfDateRegex, LaterEarlyPeriodRegex, @@ -205,10 +213,20 @@ public SpanishDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con : base(config) { DatePointExtractor = new BaseDateExtractor(new SpanishDateExtractorConfiguration(this)); - CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(); - OrdinalExtractor = Number.Spanish.OrdinalExtractor.GetInstance(); DurationExtractor = new BaseDurationExtractor(new SpanishDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new SpanishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Spanish.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new SpanishNumberParserConfiguration(numConfig)); } public IDateExtractor DatePointExtractor { get; } @@ -275,6 +293,10 @@ public SpanishDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con Regex IDatePeriodExtractorConfiguration.NowRegex => NowRegex; + Regex IDatePeriodExtractorConfiguration.FirstLastRegex => FirstLastRegex; + + Regex IDatePeriodExtractorConfiguration.OfYearRegex => OfYearRegex; + bool IDatePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; string[] IDatePeriodExtractorConfiguration.DurationDateRestrictions => DateTimeDefinitions.DurationDateRestrictions; @@ -307,7 +329,7 @@ public bool GetBetweenTokenIndex(string text, out int index) public bool HasConnectorToken(string text) { - return ConnectorAndRegex.IsMatch(text); + return RangeConnectorRegex.IsExactMatch(text, true); } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimeAltExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimeAltExtractorConfiguration.cs index f0b71d78c2..e77a6f109a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimeAltExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimeAltExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; @@ -7,22 +10,22 @@ namespace Microsoft.Recognizers.Text.DateTime.Spanish public class SpanishDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeAltExtractorConfiguration { public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangePrefixRegex = - new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] RelativePrefixList = { @@ -37,10 +40,10 @@ public class SpanishDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfi private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex OrRegex = - new Regex(DateTimeDefinitions.OrRegex, RegexFlags); + new Regex(DateTimeDefinitions.OrRegex, RegexFlags, RegexTimeOut); private static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public SpanishDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimeExtractorConfiguration.cs index a68d55a162..44c7d26796 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimeExtractorConfiguration.cs @@ -1,79 +1,94 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; using Microsoft.Recognizers.Text.DateTime.Spanish.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Spanish { public class SpanishDateTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeExtractorConfiguration { public static readonly Regex PrepositionRegex = - new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); // TODO: modify it according to the corresponding English regex public static readonly Regex TimeOfDayRegex = - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificEndOfRegex = - new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); // TODO: add this for Spanish public static readonly Regex UnitRegex = - new Regex(DateTimeDefinitions.UnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectorRegex = - new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex NumberAsTimeRegex = - new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateNumberConnectorRegex = - new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public SpanishDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(numConfig); + DatePointExtractor = new BaseDateExtractor(new SpanishDateExtractorConfiguration(this)); TimePointExtractor = new BaseTimeExtractor(new SpanishTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new SpanishDurationExtractorConfiguration(this)); UtilityConfiguration = new SpanishDatetimeUtilityConfiguration(); + HolidayExtractor = new BaseHolidayExtractor(new SpanishHolidayExtractorConfiguration(this)); + } public IExtractor IntegerExtractor { get; } @@ -86,6 +101,8 @@ public SpanishDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration confi public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + public IDateTimeExtractor HolidayExtractor { get; } + Regex IDateTimeExtractorConfiguration.NowRegex => NowRegex; Regex IDateTimeExtractorConfiguration.SuffixRegex => SuffixRegex; @@ -119,9 +136,9 @@ public SpanishDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration confi public bool IsConnector(string text) { text = text.Trim(); - return string.IsNullOrEmpty(text) - || PrepositionRegex.IsMatch(text) - || ConnectorRegex.IsMatch(text); + return string.IsNullOrEmpty(text) || + PrepositionRegex.IsMatch(text) || + ConnectorRegex.IsMatch(text); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimePeriodExtractorConfiguration.cs index d0878ad429..900765d760 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimePeriodExtractorConfiguration.cs @@ -1,6 +1,12 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Spanish; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Spanish { @@ -8,67 +14,84 @@ public class SpanishDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsCo IDateTimePeriodExtractorConfiguration { public static readonly Regex NumberCombinedWithUnit = - new Regex(DateTimeDefinitions.DateTimePeriodNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.DateTimePeriodNumberCombinedWithUnit, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex RestOfDateTimeRegex = - new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex HyphenDateRegex = + new Regex(BaseDateTime.HyphenDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodTimeOfDayWithDateRegex = - new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeTimeUnitRegex = - new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); public static readonly Regex MiddlePauseRegex = - new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags); + new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixDayRegex = new Regex(DateTimeDefinitions.PrefixDayRegex, RegexFlags | RegexOptions.RightToLeft); public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodTimeOfDayRegex = + new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodSpecificTimeOfDayRegex = + new Regex(DateTimeDefinitions.PeriodSpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex FromRegex = - new Regex(DateTimeDefinitions.FromRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); - private static readonly Regex ConnectorAndRegex = - new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags); + private static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); private static readonly Regex BetweenRegex = - new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags, RegexTimeOut); public SpanishDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; - CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(numConfig); SingleDateExtractor = new BaseDateExtractor(new SpanishDateExtractorConfiguration(this)); SingleTimeExtractor = new BaseTimeExtractor(new SpanishTimeExtractorConfiguration(this)); @@ -76,6 +99,8 @@ public SpanishDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration DurationExtractor = new BaseDurationExtractor(new SpanishDurationExtractorConfiguration(this)); TimePeriodExtractor = new BaseTimePeriodExtractor(new SpanishTimePeriodExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new SpanishTimeZoneExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new SpanishHolidayExtractorConfiguration(this)); + } public string TokenBeforeDate { get; } @@ -92,6 +117,8 @@ public SpanishDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration public IDateTimeExtractor TimePeriodExtractor { get; } + public IDateTimeExtractor HolidayExtractor { get; } + public IDateTimeExtractor TimeZoneExtractor { get; } public IEnumerable SimpleCasesRegex => new[] @@ -104,9 +131,9 @@ public SpanishDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration public Regex TillRegex => SpanishDatePeriodExtractorConfiguration.TillRegex; - public Regex SpecificTimeOfDayRegex => SpanishDateTimeExtractorConfiguration.SpecificTimeOfDayRegex; + public Regex TimeOfDayRegex => PeriodTimeOfDayRegex; - public Regex TimeOfDayRegex => SpanishDateTimeExtractorConfiguration.TimeOfDayRegex; + public Regex SpecificTimeOfDayRegex => PeriodSpecificTimeOfDayRegex; public Regex FollowedUnit => SpanishTimePeriodExtractorConfiguration.FollowedUnit; @@ -150,6 +177,8 @@ public SpanishDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration Regex IDateTimePeriodExtractorConfiguration.AfterRegex => AfterRegex; + Regex IDateTimePeriodExtractorConfiguration.TasksmodeMealTimeofDayRegex => null; + public bool GetFromTokenIndex(string text, out int index) { index = -1; @@ -176,7 +205,7 @@ public bool GetBetweenTokenIndex(string text, out int index) public bool HasConnectorToken(string text) { - return ConnectorAndRegex.IsMatch(text); + return RangeConnectorRegex.IsExactMatch(text, true); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDurationExtractorConfiguration.cs index a8dd368966..2f76cea3aa 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDurationExtractorConfiguration.cs @@ -1,69 +1,89 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Spanish { public class SpanishDurationExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDurationExtractorConfiguration { public static readonly Regex UnitRegex = - new Regex(DateTimeDefinitions.UnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnitRegex, RegexFlags, RegexTimeOut); // TODO: improve Spanish the SuffixAndRegex public static readonly Regex SuffixAndRegex = - new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags, RegexTimeOut); public static readonly Regex FollowedUnit = - new Regex(DateTimeDefinitions.FollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.FollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithUnit = - new Regex(DateTimeDefinitions.DurationNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.DurationNumberCombinedWithUnit, RegexFlags, RegexTimeOut); // TODO: add half in AnUnitRegex public static readonly Regex AnUnitRegex = - new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DuringRegex = - new Regex(DateTimeDefinitions.DuringRegex, RegexFlags); + new Regex(DateTimeDefinitions.DuringRegex, RegexFlags, RegexTimeOut); public static readonly Regex AllRegex = - new Regex(DateTimeDefinitions.AllRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllRegex, RegexFlags, RegexTimeOut); public static readonly Regex HalfRegex = - new Regex(DateTimeDefinitions.HalfRegex, RegexFlags); + new Regex(DateTimeDefinitions.HalfRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConjunctionRegex = - new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags, RegexTimeOut); public static readonly Regex InexactNumberRegex = - new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex InexactNumberUnitRegex = - new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDurationUnitRegex = - new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationUnitRegex = - new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationConnectorRegex = - new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModPrefixRegex = + new Regex(DateTimeDefinitions.ModPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModSuffixRegex = + new Regex(DateTimeDefinitions.ModSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialNumberUnitRegex = null; public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public SpanishDurationExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(numConfig); + UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); } @@ -107,5 +127,11 @@ public SpanishDurationExtractorConfiguration(IDateTimeOptionsConfiguration confi Regex IDurationExtractorConfiguration.MoreThanRegex => MoreThanRegex; Regex IDurationExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex; + + Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex; + + public Dictionary AmbiguityFiltersDict => null; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishHolidayExtractorConfiguration.cs index ede9d9a105..6b7e3fd861 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishHolidayExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishHolidayExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; @@ -9,9 +12,9 @@ public class SpanishHolidayExtractorConfiguration : BaseDateTimeOptionsConfigura { public static readonly Regex[] HolidayRegexList = { - new Regex(DateTimeDefinitions.HolidayRegex1, RegexFlags), - new Regex(DateTimeDefinitions.HolidayRegex2, RegexFlags), - new Regex(DateTimeDefinitions.HolidayRegex3, RegexFlags), + new Regex(DateTimeDefinitions.HolidayRegex1, RegexFlags, RegexTimeOut), + new Regex(DateTimeDefinitions.HolidayRegex2, RegexFlags, RegexTimeOut), + new Regex(DateTimeDefinitions.HolidayRegex3, RegexFlags, RegexTimeOut), }; private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishMergedExtractorConfiguration.cs index 16dc3008cd..a66d97b66b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishMergedExtractorConfiguration.cs @@ -1,54 +1,58 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Spanish; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Matcher; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Spanish { public class SpanishMergedExtractorConfiguration : BaseDateTimeOptionsConfiguration, IMergedExtractorConfiguration { public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); - - public static readonly Regex SinceRegex = - new Regex(DateTimeDefinitions.SinceRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex AroundRegex = - new Regex(DateTimeDefinitions.SinceRegex, RegexFlags); + new Regex(DateTimeDefinitions.AroundRegex, RegexFlags, RegexTimeOut); public static readonly Regex EqualRegex = - new Regex(BaseDateTime.EqualRegex, RegexFlags); + new Regex(BaseDateTime.EqualRegex, RegexFlags, RegexTimeOut); // TODO: change the following three regexes to Spanish if there is same requirement of split from A to B as two time points public static readonly Regex FromToRegex = - new Regex(DateTimeDefinitions.FromToRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromToRegex, RegexFlags, RegexTimeOut); public static readonly Regex SingleAmbiguousMonthRegex = - new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrepositionSuffixRegex = - new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmbiguousRangeModifierPrefix = - new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags); + new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags, RegexTimeOut); public static readonly Regex NumberEndingPattern = - new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags); + new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificDatePeriodRegex = - new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags, RegexTimeOut); - public static readonly Regex[] TermFilterRegexes = { }; + public static readonly Regex[] TermFilterRegexes = System.Array.Empty(); public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); + public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public SpanishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) @@ -65,9 +69,31 @@ public SpanishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) DateTimeAltExtractor = new BaseDateTimeAltExtractor(new SpanishDateTimeAltExtractorConfiguration(this)); HolidayExtractor = new BaseHolidayExtractor(new SpanishHolidayExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new SpanishTimeZoneExtractorConfiguration(this)); - IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + if ((config.Options & DateTimeOptions.ExperimentalMode) != 0) + { + SinceRegex = SinceRegexExp; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(numConfig); + + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); } + // Used in Standard mode + public static Regex SinceRegex { get; set; } = new Regex(DateTimeDefinitions.SinceRegex, RegexFlags, RegexTimeOut); + + // used in Experimental mode + public static Regex SinceRegexExp { get; } = new Regex(DateTimeDefinitions.SinceRegexExp, RegexFlags, RegexTimeOut); + public IDateExtractor DateExtractor { get; } public IDateTimeExtractor TimeExtractor { get; } @@ -92,7 +118,7 @@ public SpanishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) public IExtractor IntegerExtractor { get; } - public Dictionary AmbiguityFiltersDict { get; } = null; + public Dictionary AmbiguityFiltersDict { get; } Regex IMergedExtractorConfiguration.AfterRegex => AfterRegex; @@ -122,10 +148,17 @@ public SpanishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + public Regex FailFastRegex { get; } = null; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; StringMatcher IMergedExtractorConfiguration.SuperfluousWordMatcher => SuperfluousWordMatcher; + + bool IMergedExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public Regex TasksModeMentionFilters { get; } + } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishSetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishSetExtractorConfiguration.cs index 1e11fbf06a..f55b57b531 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishSetExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishSetExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; @@ -11,25 +14,25 @@ public class SpanishSetExtractorConfiguration : BaseDateTimeOptionsConfiguration public static readonly string ExtractorName = Constants.SYS_DATETIME_SET; public static readonly Regex PeriodicRegex = - new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachUnitRegex = - new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachPrefixRegex = - new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachDayRegex = - new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex BeforeEachDayRegex = - new Regex(DateTimeDefinitions.BeforeEachDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeEachDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetWeekDayRegex = - new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetEachRegex = - new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -59,6 +62,8 @@ public SpanishSetExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DateTimePeriodExtractor { get; } + bool ISetExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + Regex ISetExtractorConfiguration.LastRegex => SpanishDateExtractorConfiguration.LastDateRegex; Regex ISetExtractorConfiguration.EachPrefixRegex => EachPrefixRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimeExtractorConfiguration.cs index 50ecf3ac49..b1689b08fe 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; @@ -10,47 +13,47 @@ public class SpanishTimeExtractorConfiguration : BaseDateTimeOptionsConfiguratio // part 1: smallest component // -------------------------------------- public static readonly Regex DescRegex = - new Regex(DateTimeDefinitions.DescRegex, RegexFlags); + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); public static readonly Regex HourNumRegex = - new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex MinuteNumRegex = - new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags, RegexTimeOut); // part 2: middle level component // -------------------------------------- // handle "... en punto" public static readonly Regex OclockRegex = - new Regex(DateTimeDefinitions.OclockRegex, RegexFlags); + new Regex(DateTimeDefinitions.OclockRegex, RegexFlags, RegexTimeOut); // handle "... tarde" public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); // handle "... de la mañana" public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); // handle "y media ..." "menos cuarto ..." public static readonly Regex LessThanOneHour = - new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags); + new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags, RegexTimeOut); public static readonly Regex TensTimeRegex = - new Regex(DateTimeDefinitions.TensTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.TensTimeRegex, RegexFlags, RegexTimeOut); // handle "seis treinta", "seis veintiuno", "seis menos diez" public static readonly Regex WrittenTimeRegex = - new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimePrefix = - new Regex(DateTimeDefinitions.TimePrefix, RegexFlags); + new Regex(DateTimeDefinitions.TimePrefix, RegexFlags, RegexTimeOut); public static readonly Regex TimeSuffix = - new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags); + new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags, RegexTimeOut); public static readonly Regex BasicTime = - new Regex(DateTimeDefinitions.BasicTime, RegexFlags); + new Regex(DateTimeDefinitions.BasicTime, RegexFlags, RegexTimeOut); // part 3: regex for time // -------------------------------------- @@ -58,50 +61,48 @@ public class SpanishTimeExtractorConfiguration : BaseDateTimeOptionsConfiguratio // TODO: add some new regex which have used in AtRegex // TODO: modify according to corresponding English regex public static readonly Regex AtRegex = - new Regex(DateTimeDefinitions.AtRegex, RegexFlags); + new Regex(DateTimeDefinitions.AtRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectNumRegex = - new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeBeforeAfterRegex = - new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] TimeRegexList = { // (tres min pasadas las)? siete|7|(siete treinta) pm - new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags, RegexTimeOut), // (tres min pasadas las)? 3:00(:00)? (pm)? - new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags, RegexTimeOut), // (tres min pasadas las)? 3.00 (pm) - new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags, RegexTimeOut), // (tres min pasadas las) (cinco treinta|siete|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags, RegexTimeOut), // (tres min pasadas las) (cinco treinta|siete|7|7:00(:00)?) (pm)? (de la noche) - new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags, RegexTimeOut), // (cinco treinta|siete|7|7:00(:00)?) (pm)? (de la noche) - new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags, RegexTimeOut), // (En la noche) a las (cinco treinta|siete|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags, RegexTimeOut), // (En la noche) (cinco treinta|siete|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex8, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex8, RegexFlags, RegexTimeOut), // once (y)? veinticinco - new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags), - - new Regex(DateTimeDefinitions.TimeRegex10, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags, RegexTimeOut), // (tres menos veinte) (pm)? - new Regex(DateTimeDefinitions.TimeRegex11, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex11, RegexFlags, RegexTimeOut), // (tres min pasadas las)? 3h00 (pm)? - new Regex(DateTimeDefinitions.TimeRegex12, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex12, RegexFlags, RegexTimeOut), // 340pm ConnectNumRegex, @@ -127,5 +128,9 @@ public SpanishTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DurationExtractor { get; } public IDateTimeExtractor TimeZoneExtractor { get; } + + public string TimeTokenPrefix => DateTimeDefinitions.TimeTokenPrefix; + + public Dictionary AmbiguityFiltersDict => null; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimePeriodExtractorConfiguration.cs index 6f8b029f1b..0e4cd13279 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimePeriodExtractorConfiguration.cs @@ -1,9 +1,15 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; using Microsoft.Recognizers.Text.DateTime.Spanish.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Spanish { @@ -12,49 +18,48 @@ public class SpanishTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfig public static readonly string ExtractorName = Constants.SYS_DATETIME_TIMEPERIOD; // "TimePeriod"; public static readonly Regex HourNumRegex = - new Regex(DateTimeDefinitions.TimeHourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeHourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex PureNumFromTo = - new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags); + new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags, RegexTimeOut); public static readonly Regex PureNumBetweenAnd = - new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeFromTo = - new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeBetweenAnd = - new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex UnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex FollowedUnit = - new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithUnit = - new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); - // TODO: add this according to corresponding English regex public static readonly Regex TimeOfDayRegex = - new Regex(string.Empty, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex FromRegex = - new Regex(DateTimeDefinitions.FromRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); - private static readonly Regex ConnectorAndRegex = - new Regex(DateTimeDefinitions.ConnectorAndRegex, RegexFlags); + private static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); private static readonly Regex BetweenRegex = - new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags, RegexTimeOut); public SpanishTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) @@ -62,7 +67,17 @@ public SpanishTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; SingleTimeExtractor = new BaseTimeExtractor(new SpanishTimeExtractorConfiguration(this)); UtilityConfiguration = new SpanishDatetimeUtilityConfiguration(); - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(numConfig); + TimeZoneExtractor = new BaseTimeZoneExtractor(new SpanishTimeZoneExtractorConfiguration(this)); } @@ -84,7 +99,7 @@ public SpanishTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con Regex ITimePeriodExtractorConfiguration.TillRegex => TillRegex; - Regex ITimePeriodExtractorConfiguration.TimeOfDayRegex => SpanishDateTimeExtractorConfiguration.TimeOfDayRegex; + Regex ITimePeriodExtractorConfiguration.TimeOfDayRegex => TimeOfDayRegex; Regex ITimePeriodExtractorConfiguration.GeneralEndingRegex => GeneralEndingRegex; @@ -114,7 +129,34 @@ public bool GetBetweenTokenIndex(string text, out int index) public bool IsConnectorToken(string text) { - return ConnectorAndRegex.IsMatch(text); + return RangeConnectorRegex.IsExactMatch(text, true); + } + + // In Spanish "mañana" can mean both "tomorrow" and "morning". This method filters the isolated occurrences of "mañana" from the + // TimePeriodExtractor results as it is more likely to mean "tomorrow" in these cases (unless it is preceded by "la"). + public List ApplyPotentialPeriodAmbiguityHotfix(string text, List timePeriodErs) + { + { + var tomorrowStr = DateTimeDefinitions.MorningTermList[0]; + var morningStr = DateTimeDefinitions.MorningTermList[1]; + List timePeriodErsResult = new List(); + foreach (var timePeriodEr in timePeriodErs) + { + if (timePeriodEr.Text.Equals(tomorrowStr, StringComparison.Ordinal)) + { + if (text.Substring(0, (int)timePeriodEr.Start + (int)timePeriodEr.Length).EndsWith(morningStr, StringComparison.Ordinal)) + { + timePeriodErsResult.Add(timePeriodEr); + } + } + else + { + timePeriodErsResult.Add(timePeriodEr); + } + } + + return timePeriodErsResult; + } } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimeZoneExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimeZoneExtractorConfiguration.cs index c72cf777d5..9610598c7b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimeZoneExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimeZoneExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Matcher; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/DateTimePeriodParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/DateTimePeriodParser.cs index 5bd6659a28..6be1faab75 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/DateTimePeriodParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/DateTimePeriodParser.cs @@ -1,4 +1,9 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Reflection; +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -7,6 +12,11 @@ namespace Microsoft.Recognizers.Text.DateTime.Spanish { public class DateTimePeriodParser : BaseDateTimePeriodParser { + public static readonly Regex ConnectorRegex = + new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public DateTimePeriodParser(IDateTimePeriodParserConfiguration configuration) : base(configuration) { @@ -25,8 +35,36 @@ protected override DateTimeResolutionResult ParseSpecificTimeOfDay(string text, var exactMatch = this.Config.SpecificTimeOfDayRegex.MatchExact(trimmedText, trim: true); + if (!exactMatch.Success) + { + exactMatch = this.Config.PeriodTimeOfDayWithDateRegex.MatchExact(trimmedText, trim: true); + } + if (exactMatch.Success) { + // Extract early/late prefix from text if any + bool hasEarly = false; + if (!string.IsNullOrEmpty(exactMatch.Groups["early"].Value)) + { + hasEarly = true; + ret.Comment = Constants.Comment_Early; + ret.Mod = Constants.EARLY_MOD; + endHour = beginHour + 2; + + // Handling special case: night ends with 23:59 due to C# issues. + if (endMin == 59) + { + endMin = 0; + } + } + + if (!hasEarly && !string.IsNullOrEmpty(exactMatch.Groups["late"].Value)) + { + ret.Comment = Constants.Comment_Late; + ret.Mod = Constants.LATE_MOD; + beginHour = beginHour + 2; + } + var swift = this.Config.GetSwiftPrefix(trimmedText); var date = referenceTime.AddDays(swift).Date; @@ -44,20 +82,26 @@ protected override DateTimeResolutionResult ParseSpecificTimeOfDay(string text, var startIndex = trimmedText.IndexOf(DateTimeDefinitions.Tomorrow, StringComparison.Ordinal) == 0 ? DateTimeDefinitions.Tomorrow.Length : 0; - // handle Date followed by morning, afternoon - // Add handling code to handle morning, afternoon followed by Date - // Add handling code to handle early/late morning, afternoon + // handle Date preceded/followed by morning, afternoon + // @TODO Add handling code to handle early/late morning, afternoon var match = this.Config.TimeOfDayRegex.Match(trimmedText.Substring(startIndex)); if (match.Success) { - var beforeStr = trimmedText.Substring(0, match.Index + startIndex).Trim(); - var ers = this.Config.DateExtractor.Extract(beforeStr, referenceTime); + var subStr = match.Index > 0 ? trimmedText.Substring(0, match.Index + startIndex).Trim() : trimmedText.Substring(match.Index + match.Length).Trim(); + var ers = this.Config.DateExtractor.Extract(subStr, referenceTime); if (ers.Count == 0) { return ret; } + // Check if Date and TimeOfDay are contiguous + var middleStr = match.Index > 0 ? subStr.Substring((int)ers[0].Start + (int)ers[0].Length).Trim() : subStr.Substring(0, (int)ers[0].Start).Trim(); + if (!(string.IsNullOrWhiteSpace(middleStr) || ConnectorRegex.IsMatch(middleStr))) + { + return ret; + } + var pr = this.Config.DateParser.Parse(ers[0], referenceTime); var futureDate = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; var pastDate = (DateObject)((DateTimeResolutionResult)pr.Value).PastValue; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishCommonDateTimeParserConfiguration.cs index 5111b7baee..7eead2ffad 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishCommonDateTimeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using Microsoft.Recognizers.Definitions.Spanish; using Microsoft.Recognizers.Text.DateTime.Spanish.Utilities; @@ -26,25 +29,39 @@ public SpanishCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration co WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); - CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(); - IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Spanish.OrdinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(numConfig); + IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Spanish.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new SpanishNumberParserConfiguration(numConfig)); - NumberParser = new BaseNumberParser(new SpanishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + // Do not change order. The order of initialization can lead to side-effects DateExtractor = new BaseDateExtractor(new SpanishDateExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new SpanishHolidayExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new SpanishTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new SpanishDateTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new SpanishDurationExtractorConfiguration(this)); DatePeriodExtractor = new BaseDatePeriodExtractor(new SpanishDatePeriodExtractorConfiguration(this)); TimePeriodExtractor = new BaseTimePeriodExtractor(new SpanishTimePeriodExtractorConfiguration(this)); DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new SpanishDateTimePeriodExtractorConfiguration(this)); + + DurationParser = new BaseDurationParser(new SpanishDurationParserConfiguration(this)); DateParser = new BaseDateParser(new SpanishDateParserConfiguration(this)); + HolidayTimeParser = new BaseHolidayParser(new SpanishHolidayParserConfiguration(this)); TimeParser = new BaseTimeParser(new SpanishTimeParserConfiguration(this)); DateTimeParser = new BaseDateTimeParser(new SpanishDateTimeParserConfiguration(this)); - DurationParser = new BaseDurationParser(new SpanishDurationParserConfiguration(this)); DatePeriodParser = new BaseDatePeriodParser(new SpanishDatePeriodParserConfiguration(this)); TimePeriodParser = new BaseTimePeriodParser(new SpanishTimePeriodParserConfiguration(this)); DateTimePeriodParser = new BaseDateTimePeriodParser(new SpanishDateTimePeriodParserConfiguration(this)); + DateTimeAltParser = new BaseDateTimeAltParser(new SpanishDateTimeAltParserConfiguration(this)); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateParserConfiguration.cs index 02604b3038..9de8c437df 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Diagnostics.CodeAnalysis; using System.Text.RegularExpressions; @@ -35,12 +38,13 @@ public SpanishDateParserConfiguration(ICommonDateTimeParserConfiguration config) StrictRelativeRegex = SpanishDateExtractorConfiguration.StrictRelativeRegex; YearSuffix = SpanishDateExtractorConfiguration.YearSuffix; RelativeWeekDayRegex = SpanishDateExtractorConfiguration.RelativeWeekDayRegex; + BeforeAfterRegex = SpanishDateExtractorConfiguration.BeforeAfterRegex; - RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexFlags); - NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); - PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); - UpcomingPrefixRegex = new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags); - PastPrefixRegex = new Regex(DateTimeDefinitions.PastPrefixRegex, RegexFlags); + RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + UpcomingPrefixRegex = new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags, RegexTimeOut); + PastPrefixRegex = new Regex(DateTimeDefinitions.PastPrefixRegex, RegexFlags, RegexTimeOut); DayOfMonth = config.DayOfMonth; DayOfWeek = config.DayOfWeek; @@ -50,11 +54,13 @@ public SpanishDateParserConfiguration(ICommonDateTimeParserConfiguration config) OrdinalExtractor = config.OrdinalExtractor; CardinalExtractor = config.CardinalExtractor; NumberParser = config.NumberParser; - DurationExtractor = config.DurationExtractor; DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; DurationParser = config.DurationParser; + HolidayParser = new BaseHolidayParser(new SpanishHolidayParserConfiguration(this)); UnitMap = config.UnitMap; UtilityConfiguration = config.UtilityConfiguration; + SameDayTerms = DateTimeDefinitions.SameDayTerms.ToImmutableList(); PlusOneDayTerms = DateTimeDefinitions.PlusOneDayTerms.ToImmutableList(); PlusTwoDayTerms = DateTimeDefinitions.PlusTwoDayTerms.ToImmutableList(); @@ -78,6 +84,8 @@ public SpanishDateParserConfiguration(ICommonDateTimeParserConfiguration config) public IDateTimeParser DurationParser { get; } + public IDateTimeParser HolidayParser { get; } + public IImmutableDictionary UnitMap { get; } public IEnumerable DateRegexes { get; } @@ -126,6 +134,10 @@ public SpanishDateParserConfiguration(ICommonDateTimeParserConfiguration config) public Regex PastPrefixRegex { get; } + public Regex BeforeAfterRegex { get; } + + public Regex TasksModeDurationToDatePatterns { get; } + public IImmutableDictionary DayOfMonth { get; } public IImmutableDictionary DayOfWeek { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDatePeriodParserConfiguration.cs index f855f7d68c..dff6012905 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDatePeriodParserConfiguration.cs @@ -1,6 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Immutable; -using System.Diagnostics; using System.Linq; using System.Text.RegularExpressions; @@ -12,25 +14,37 @@ public class SpanishDatePeriodParserConfiguration : BaseDateTimeOptionsConfigura { // TODO: config this according to English public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NextSuffixRegex = + new Regex(DateTimeDefinitions.NextSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousSuffixRegex = - new Regex(DateTimeDefinitions.PreviousSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterNextSuffixRegex = - new Regex(DateTimeDefinitions.AfterNextSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterNextSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeSuffixRegex = + new Regex(DateTimeDefinitions.RelativeSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeRegex = - new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRangeRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmbiguousPointRangeRegex = + new Regex(DateTimeDefinitions.AmbiguousPointRangeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialYearPrefixes = + new Regex(DateTimeDefinitions.SpecialYearPrefixes, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -46,6 +60,7 @@ public SpanishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c DateExtractor = config.DateExtractor; DurationParser = config.DurationParser; DateParser = config.DateParser; + MonthFrontBetweenRegex = SpanishDatePeriodExtractorConfiguration.MonthFrontBetweenRegex; BetweenRegex = SpanishDatePeriodExtractorConfiguration.DayBetweenRegex; MonthFrontSimpleCasesRegex = SpanishDatePeriodExtractorConfiguration.MonthFrontSimpleCasesRegex; @@ -84,6 +99,11 @@ public SpanishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c MoreThanRegex = SpanishDatePeriodExtractorConfiguration.MoreThanRegex; CenturySuffixRegex = SpanishDatePeriodExtractorConfiguration.CenturySuffixRegex; NowRegex = SpanishDatePeriodExtractorConfiguration.NowRegex; + FirstLastRegex = SpanishDatePeriodExtractorConfiguration.FirstLastRegex; + OfYearRegex = SpanishDatePeriodExtractorConfiguration.OfYearRegex; + SpecialDayRegex = SpanishDateExtractorConfiguration.SpecialDayRegex; + TodayNowRegex = new Regex(DateTimeDefinitions.TodayNowRegex, RegexOptions.Singleline); + UnitMap = config.UnitMap; CardinalMap = config.CardinalMap; DayOfMonth = config.DayOfMonth; @@ -193,6 +213,14 @@ public SpanishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c public Regex NowRegex { get; } + public Regex SpecialDayRegex { get; } + + public Regex TodayNowRegex { get; } + + public Regex FirstLastRegex { get; } + + public Regex OfYearRegex { get; } + Regex IDatePeriodParserConfiguration.NextPrefixRegex => NextPrefixRegex; Regex IDatePeriodParserConfiguration.PreviousPrefixRegex => PreviousPrefixRegex; @@ -203,6 +231,8 @@ public SpanishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c Regex IDatePeriodParserConfiguration.UnspecificEndOfRangeRegex => UnspecificEndOfRangeRegex; + Regex IDatePeriodParserConfiguration.AmbiguousPointRangeRegex => AmbiguousPointRangeRegex; + bool IDatePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; public IImmutableDictionary UnitMap { get; } @@ -228,12 +258,15 @@ public int GetSwiftDayOrMonth(string text) var trimmedText = text.Trim(); var swift = 0; - if (NextPrefixRegex.IsMatch(trimmedText)) + if (AfterNextSuffixRegex.IsMatch(trimmedText)) + { + swift = 2; + } + else if (NextPrefixRegex.IsMatch(trimmedText) || NextSuffixRegex.IsMatch(trimmedText)) { swift = 1; } - - if (PreviousPrefixRegex.IsMatch(trimmedText) || PreviousSuffixRegex.IsMatch(trimmedText)) + else if (PreviousPrefixRegex.IsMatch(trimmedText) || PreviousSuffixRegex.IsMatch(trimmedText)) { swift = -1; } @@ -245,12 +278,15 @@ public int GetSwiftYear(string text) { var trimmedText = text.Trim(); var swift = -10; - if (NextPrefixRegex.IsMatch(trimmedText)) + if (AfterNextSuffixRegex.IsMatch(trimmedText)) + { + swift = 2; + } + else if (NextPrefixRegex.IsMatch(trimmedText) || NextSuffixRegex.IsMatch(trimmedText)) { swift = 1; } - - if (PreviousPrefixRegex.IsMatch(trimmedText)) + else if (PreviousPrefixRegex.IsMatch(trimmedText) || PreviousSuffixRegex.IsMatch(trimmedText)) { swift = -1; } @@ -277,42 +313,48 @@ public bool IsLastCardinal(string text) public bool IsMonthOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o)) || - (DateTimeDefinitions.MonthTerms.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); + return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (DateTimeDefinitions.MonthTerms.Any(o => trimmedText.Contains(o)) && RelativeSuffixRegex.IsMatch(trimmedText)); } public bool IsMonthToDate(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } public bool IsWeekend(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o)) || - (DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); + return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.Contains(o)) && RelativeSuffixRegex.IsMatch(trimmedText)); } public bool IsWeekOnly(string text) { var trimmedText = text.Trim(); - return (DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o)) || - (DateTimeDefinitions.WeekTerms.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText))) && + return (DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (DateTimeDefinitions.WeekTerms.Any(o => trimmedText.Contains(o)) && RelativeSuffixRegex.IsMatch(trimmedText))) && !DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.Contains(o)); } + public bool IsFortnight(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.FortnightTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)); + } + public bool IsYearOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o)) || - (DateTimeDefinitions.YearTerms.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); + return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (DateTimeDefinitions.YearTerms.Any(o => trimmedText.Contains(o)) && (RelativeSuffixRegex.IsMatch(trimmedText) || SpecialYearPrefixes.IsMatch(trimmedText))); } public bool IsYearToDate(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateTimeAltParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateTimeAltParserConfiguration.cs index 9eb865598c..200866284e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateTimeAltParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateTimeAltParserConfiguration.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.DateTime.Spanish +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.DateTime.Spanish { public class SpanishDateTimeAltParserConfiguration : IDateTimeAltParserConfiguration { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateTimeParserConfiguration.cs index 1f843fc283..52509db49b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateTimeParserConfiguration.cs @@ -1,167 +1,196 @@ -using System.Collections.Immutable; -using System.Text.RegularExpressions; - -using Microsoft.Recognizers.Definitions.Spanish; -using Microsoft.Recognizers.Text.DateTime.Utilities; - -namespace Microsoft.Recognizers.Text.DateTime.Spanish -{ - public class SpanishDateTimeParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeParserConfiguration - { - - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - - public SpanishDateTimeParserConfiguration(ICommonDateTimeParserConfiguration config) - : base(config) - { - TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; - TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; - DateExtractor = config.DateExtractor; - TimeExtractor = config.TimeExtractor; - DateParser = config.DateParser; - TimeParser = config.TimeParser; - - NowRegex = SpanishDateTimeExtractorConfiguration.NowRegex; - - AMTimeRegex = new Regex(DateTimeDefinitions.AmTimeRegex, RegexFlags); - PMTimeRegex = new Regex(DateTimeDefinitions.PmTimeRegex, RegexFlags); - - SimpleTimeOfTodayAfterRegex = SpanishDateTimeExtractorConfiguration.SimpleTimeOfTodayAfterRegex; - SimpleTimeOfTodayBeforeRegex = SpanishDateTimeExtractorConfiguration.SimpleTimeOfTodayBeforeRegex; - SpecificTimeOfDayRegex = SpanishDateTimeExtractorConfiguration.SpecificTimeOfDayRegex; - SpecificEndOfRegex = SpanishDateTimeExtractorConfiguration.SpecificEndOfRegex; - UnspecificEndOfRegex = SpanishDateTimeExtractorConfiguration.UnspecificEndOfRegex; - UnitRegex = SpanishDateTimeExtractorConfiguration.UnitRegex; - DateNumberConnectorRegex = SpanishDateTimeExtractorConfiguration.DateNumberConnectorRegex; - YearRegex = SpanishDateTimeExtractorConfiguration.YearRegex; - - Numbers = config.Numbers; - CardinalExtractor = config.CardinalExtractor; - IntegerExtractor = config.IntegerExtractor; - NumberParser = config.NumberParser; - DurationExtractor = config.DurationExtractor; - DurationParser = config.DurationParser; - UnitMap = config.UnitMap; - UtilityConfiguration = config.UtilityConfiguration; - } - - public string TokenBeforeDate { get; } - - public string TokenBeforeTime { get; } - - public IDateExtractor DateExtractor { get; } - - public IDateTimeExtractor TimeExtractor { get; } - - public IDateTimeParser DateParser { get; } - - public IDateTimeParser TimeParser { get; } - - public IExtractor CardinalExtractor { get; } - - public IExtractor IntegerExtractor { get; } - - public IParser NumberParser { get; } - - public IDateTimeExtractor DurationExtractor { get; } - - public IDateTimeParser DurationParser { get; } - - public IImmutableDictionary UnitMap { get; } - - public Regex NowRegex { get; } - - public Regex AMTimeRegex { get; } - - public Regex PMTimeRegex { get; } - - public Regex SimpleTimeOfTodayAfterRegex { get; } - - public Regex SimpleTimeOfTodayBeforeRegex { get; } - - public Regex SpecificTimeOfDayRegex { get; } - - public Regex SpecificEndOfRegex { get; } - - public Regex UnspecificEndOfRegex { get; } - - public Regex UnitRegex { get; } - - public Regex DateNumberConnectorRegex { get; } - - public Regex PrepositionRegex { get; } - - public Regex ConnectorRegex { get; } - - public Regex YearRegex { get; } - - public IImmutableDictionary Numbers { get; } - - public IDateTimeUtilityConfiguration UtilityConfiguration { get; } - - public int GetHour(string text, int hour) - { - var trimmedText = text.Trim(); - int result = hour; - - // TODO: Replace with a regex - if ((trimmedText.EndsWith("mañana") || trimmedText.EndsWith("madrugada")) && hour >= Constants.HalfDayHourCount) - { - result -= Constants.HalfDayHourCount; - } - else if (!(trimmedText.EndsWith("mañana") || trimmedText.EndsWith("madrugada")) && hour < Constants.HalfDayHourCount) - { - result += Constants.HalfDayHourCount; - } - - return result; - } - - public bool GetMatchedNowTimex(string text, out string timex) - { - var trimmedText = text.Trim(); - if (trimmedText.EndsWith("ahora") || trimmedText.EndsWith("mismo") || trimmedText.EndsWith("momento")) - { - timex = "PRESENT_REF"; - } - else if (trimmedText.EndsWith("posible") || trimmedText.EndsWith("pueda") || - trimmedText.EndsWith("puedas") || trimmedText.EndsWith("podamos") || trimmedText.EndsWith("puedan")) - { - timex = "FUTURE_REF"; - } - else if (trimmedText.EndsWith("mente")) - { - timex = "PAST_REF"; - } - else - { - timex = null; - return false; - } - - return true; - } - - public int GetSwiftDay(string text) - { - var trimmedText = text.Trim(); - var swift = 0; - - if (SpanishDatePeriodParserConfiguration.PreviousPrefixRegex.IsMatch(trimmedText)) - { - swift = -1; - } - else if (SpanishDatePeriodParserConfiguration.NextPrefixRegex.IsMatch(trimmedText)) - { - swift = 1; - } - - return swift; - } - - public bool ContainsAmbiguousToken(string text, string matchedText) - { - return text.Contains("esta mañana") && matchedText.Contains("mañana"); - } - } -} +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Spanish; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Spanish +{ + public class SpanishDateTimeParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeParserConfiguration + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public SpanishDateTimeParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; + DateExtractor = config.DateExtractor; + TimeExtractor = config.TimeExtractor; + DateParser = config.DateParser; + TimeParser = config.TimeParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; + + NowRegex = SpanishDateTimeExtractorConfiguration.NowRegex; + + AMTimeRegex = new Regex(DateTimeDefinitions.AmTimeRegex, RegexFlags, RegexTimeOut); + PMTimeRegex = new Regex(DateTimeDefinitions.PmTimeRegex, RegexFlags, RegexTimeOut); + NightTimeRegex = new Regex(DateTimeDefinitions.NightTimeRegex, RegexFlags, RegexTimeOut); + LastNightTimeRegex = new Regex(DateTimeDefinitions.LastNightTimeRegex, RegexFlags, RegexTimeOut); + NowTimeRegex = new Regex(DateTimeDefinitions.NowTimeRegex, RegexFlags, RegexTimeOut); + RecentlyTimeRegex = new Regex(DateTimeDefinitions.RecentlyTimeRegex, RegexFlags, RegexTimeOut); + AsapTimeRegex = new Regex(DateTimeDefinitions.AsapTimeRegex, RegexFlags, RegexTimeOut); + + SimpleTimeOfTodayAfterRegex = SpanishDateTimeExtractorConfiguration.SimpleTimeOfTodayAfterRegex; + SimpleTimeOfTodayBeforeRegex = SpanishDateTimeExtractorConfiguration.SimpleTimeOfTodayBeforeRegex; + SpecificTimeOfDayRegex = SpanishDateTimeExtractorConfiguration.SpecificTimeOfDayRegex; + SpecificEndOfRegex = SpanishDateTimeExtractorConfiguration.SpecificEndOfRegex; + UnspecificEndOfRegex = SpanishDateTimeExtractorConfiguration.UnspecificEndOfRegex; + UnitRegex = SpanishDateTimeExtractorConfiguration.UnitRegex; + DateNumberConnectorRegex = SpanishDateTimeExtractorConfiguration.DateNumberConnectorRegex; + YearRegex = SpanishDateTimeExtractorConfiguration.YearRegex; + + Numbers = config.Numbers; + CardinalExtractor = config.CardinalExtractor; + IntegerExtractor = config.IntegerExtractor; + NumberParser = config.NumberParser; + DurationExtractor = config.DurationExtractor; + DurationParser = config.DurationParser; + UnitMap = config.UnitMap; + UtilityConfiguration = config.UtilityConfiguration; + } + + public string TokenBeforeDate { get; } + + public string TokenBeforeTime { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser TimeParser { get; } + + public IExtractor CardinalExtractor { get; } + + public IExtractor IntegerExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeParser DurationParser { get; } + + public IImmutableDictionary UnitMap { get; } + + public Regex NowRegex { get; } + + public Regex AMTimeRegex { get; } + + public Regex PMTimeRegex { get; } + + public Regex NightTimeRegex { get; } + + public Regex LastNightTimeRegex { get; } + + public Regex NowTimeRegex { get; } + + public Regex RecentlyTimeRegex { get; } + + public Regex AsapTimeRegex { get; } + + public Regex SimpleTimeOfTodayAfterRegex { get; } + + public Regex SimpleTimeOfTodayBeforeRegex { get; } + + public Regex SpecificTimeOfDayRegex { get; } + + public Regex SpecificEndOfRegex { get; } + + public Regex UnspecificEndOfRegex { get; } + + public Regex UnitRegex { get; } + + public Regex DateNumberConnectorRegex { get; } + + public Regex PrepositionRegex { get; } + + public Regex ConnectorRegex { get; } + + public Regex YearRegex { get; } + + public IImmutableDictionary Numbers { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + + public int GetHour(string text, int hour) + { + int result = hour; + + var trimmedText = text.Trim(); + + if (AMTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour >= Constants.HalfDayHourCount) + { + result -= Constants.HalfDayHourCount; + } + else if (!AMTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour < Constants.HalfDayHourCount && + !(NightTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour < Constants.QuarterDayHourCount)) + { + result += Constants.HalfDayHourCount; + } + + return result; + } + + public bool GetMatchedNowTimex(string text, out string timex) + { + var trimmedText = text.Trim(); + + if (NowTimeRegex.MatchEnd(trimmedText, trim: true).Success) + { + timex = "PRESENT_REF"; + } + else if (RecentlyTimeRegex.MatchEnd(trimmedText, trim: true).Success) + { + timex = "PAST_REF"; + } + else if (AsapTimeRegex.MatchEnd(trimmedText, trim: true).Success) + { + timex = "FUTURE_REF"; + } + else + { + timex = null; + return false; + } + + return true; + } + + public int GetSwiftDay(string text) + { + var trimmedText = text.Trim(); + var swift = 0; + + if (SpanishDatePeriodParserConfiguration.PreviousPrefixRegex.IsMatch(trimmedText) || + LastNightTimeRegex.IsMatch(trimmedText)) + { + swift = -1; + } + else if (SpanishDatePeriodParserConfiguration.NextPrefixRegex.IsMatch(trimmedText)) + { + swift = 1; + } + + return swift; + } + + public bool ContainsAmbiguousToken(string text, string matchedText) + { + // @TODO move hardcoded values to resources file + return text.Contains("esta mañana") && matchedText.Contains("mañana"); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateTimePeriodParserConfiguration.cs index ba0b5fb068..afdfb9ea9c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDateTimePeriodParserConfiguration.cs @@ -1,4 +1,9 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; @@ -7,10 +12,28 @@ namespace Microsoft.Recognizers.Text.DateTime.Spanish { public class SpanishDateTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimePeriodParserConfiguration { + public static readonly Regex EarlyMorningStartEndRegex = + new Regex(DateTimeDefinitions.EarlyMorningStartEndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MorningStartEndRegex = + new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfternoonStartEndRegex = + new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EveningStartEndRegex = + new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NightStartEndRegex = + new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public SpanishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { - TokenBeforeDate = Definitions.Spanish.DateTimeDefinitions.TokenBeforeDate; + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; DateExtractor = config.DateExtractor; TimeExtractor = config.TimeExtractor; @@ -25,10 +48,13 @@ public SpanishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati TimePeriodParser = config.TimePeriodParser; DurationParser = config.DurationParser; TimeZoneParser = config.TimeZoneParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; PureNumberFromToRegex = SpanishTimePeriodExtractorConfiguration.PureNumFromTo; + HyphenDateRegex = SpanishDateTimePeriodExtractorConfiguration.HyphenDateRegex; PureNumberBetweenAndRegex = SpanishTimePeriodExtractorConfiguration.PureNumBetweenAnd; - SpecificTimeOfDayRegex = SpanishDateTimeExtractorConfiguration.SpecificTimeOfDayRegex; + SpecificTimeOfDayRegex = SpanishDateTimePeriodExtractorConfiguration.PeriodSpecificTimeOfDayRegex; TimeOfDayRegex = SpanishDateTimeExtractorConfiguration.TimeOfDayRegex; PreviousPrefixRegex = SpanishDatePeriodExtractorConfiguration.PastRegex; FutureRegex = SpanishDatePeriodExtractorConfiguration.FutureRegex; @@ -44,12 +70,15 @@ public SpanishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati PrefixDayRegex = SpanishDateTimePeriodExtractorConfiguration.PrefixDayRegex; BeforeRegex = SpanishDateTimePeriodExtractorConfiguration.BeforeRegex; AfterRegex = SpanishDateTimePeriodExtractorConfiguration.AfterRegex; + UnitMap = config.UnitMap; Numbers = config.Numbers; } public string TokenBeforeDate { get; } + public string TokenBeforeTime { get; } + public IDateExtractor DateExtractor { get; } public IDateTimeExtractor TimeExtractor { get; } @@ -78,6 +107,8 @@ public SpanishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati public Regex PureNumberFromToRegex { get; } + public Regex HyphenDateRegex { get; } + public Regex PureNumberBetweenAndRegex { get; } public Regex SpecificTimeOfDayRegex { get; } @@ -114,55 +145,56 @@ public SpanishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati bool IDateTimePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + Regex IDateTimePeriodParserConfiguration.TasksmodeMealTimeofDayRegex => null; + public IImmutableDictionary UnitMap { get; } public IImmutableDictionary Numbers { get; } - public bool GetMatchedTimeRange(string text, out string timeStr, out int beginHour, out int endHour, out int endMin) + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + + public bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMin) { var trimmedText = text.Trim(); + beginHour = 0; endHour = 0; endMin = 0; - // TODO: modify it according to the coresponding function in English part - if (trimmedText.EndsWith("madrugada")) + if (EarlyMorningStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TDA"; - beginHour = 4; - endHour = 8; + todSymbol = Constants.EarlyMorning; } - else if (trimmedText.EndsWith("mañana")) + else if (AfternoonStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TMO"; - beginHour = 8; - endHour = Constants.HalfDayHourCount; + todSymbol = Constants.Afternoon; } - else if (trimmedText.Contains("pasado mediodia") || trimmedText.Contains("pasado el mediodia")) + else if (EveningStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TAF"; - beginHour = Constants.HalfDayHourCount; - endHour = 16; + todSymbol = Constants.Evening; } - else if (trimmedText.EndsWith("tarde")) + else if (NightStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TEV"; - beginHour = 16; - endHour = 20; + todSymbol = Constants.Night; } - else if (trimmedText.EndsWith("noche")) + else if (MorningStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TNI"; - beginHour = 20; - endHour = 23; - endMin = 59; + todSymbol = Constants.Morning; } else { - timeStr = null; + todSymbol = null; return false; } + var parseResult = TimexUtility.ResolveTimeOfDay(todSymbol); + todSymbol = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMin = parseResult.EndMin; + return true; } @@ -171,9 +203,9 @@ public int GetSwiftPrefix(string text) var trimmedText = text.Trim(); var swift = 0; - // TODO: Replace with a regex + // @TODO move hardcoded values to resources file if (SpanishDatePeriodParserConfiguration.PreviousPrefixRegex.IsMatch(trimmedText) || - trimmedText.Equals("anoche")) + trimmedText.StartsWith("anoche", StringComparison.Ordinal)) { swift = -1; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDurationParserConfiguration.cs index e721400f49..368752bb70 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDurationParserConfiguration.cs @@ -1,10 +1,19 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Spanish; namespace Microsoft.Recognizers.Text.DateTime.Spanish { public class SpanishDurationParserConfiguration : BaseDateTimeOptionsConfiguration, IDurationParserConfiguration { + public static readonly Regex PrefixArticleRegex = + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public SpanishDurationParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -35,12 +44,14 @@ public SpanishDurationParserConfiguration(ICommonDateTimeParserConfiguration con public IParser NumberParser { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public Regex NumberCombinedWithUnit { get; } public Regex AnUnitRegex { get; } + Regex IDurationParserConfiguration.PrefixArticleRegex => PrefixArticleRegex; + public Regex DuringRegex { get; } public Regex AllDateUnitRegex { get; } @@ -61,6 +72,8 @@ public SpanishDurationParserConfiguration(ICommonDateTimeParserConfiguration con public Regex SpecialNumberUnitRegex { get; } + bool IDurationParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public IImmutableDictionary UnitMap { get; } public IImmutableDictionary UnitValueMap { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishHolidayParserConfiguration.cs index de18a733b7..1fbc823d7f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishHolidayParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishHolidayParserConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; using Microsoft.Recognizers.Definitions.Spanish; @@ -25,8 +28,7 @@ public override int GetSwiftYear(string text) { swift = 1; } - - if (SpanishDatePeriodParserConfiguration.PreviousPrefixRegex.IsMatch(trimmedText)) + else if (SpanishDatePeriodParserConfiguration.PreviousPrefixRegex.IsMatch(trimmedText)) { swift = -1; } @@ -56,7 +58,7 @@ protected override IDictionary> InitHolidayFuncs() { "padres", FathersDay }, { "madres", MothersDay }, { "acciondegracias", ThanksgivingDay }, - { "trabajador", LabourDay }, + { "trabajador", InternationalWorkersDay }, { "delaraza", ColumbusDay }, { "memoria", MemorialDay }, { "pascuas", Pascuas }, @@ -69,6 +71,11 @@ protected override IDictionary> InitHolidayFuncs() { "todoslossantos", HalloweenDay }, { "niño", ChildrenDay }, { "mujer", FemaleDay }, + { "independencia", UsaIndependenceDay }, + { "earthday", EarthDay }, + { "stpatrickday", StPatrickDay }, + { "valentinesday", ValentinesDay }, + { "goodfriday", GoodFriday }, }; } @@ -88,6 +95,16 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject TeacherDay(int year) => new DateObject(year, 9, 11); - private static DateObject Pascuas(int year) => DateObject.MinValue; + private static DateObject Pascuas(int year) => HolidayFunctions.CalculateHolidayByEaster(year); + + private static DateObject GoodFriday(int year) => Pascuas(year).AddDays(-2); + + private static DateObject UsaIndependenceDay(int year) => new DateObject(year, 7, 4); + + private static DateObject EarthDay(int year) => new DateObject(year, 4, 22); + + private static DateObject ValentinesDay(int year) => new DateObject(year, 2, 14); + + private static DateObject StPatrickDay(int year) => new DateObject(year, 3, 17); } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishMergedParserConfiguration.cs index 85f141788d..a5a8e02dce 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishMergedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishMergedParserConfiguration.cs @@ -1,5 +1,8 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Spanish; using Microsoft.Recognizers.Text.Matcher; namespace Microsoft.Recognizers.Text.DateTime.Spanish @@ -11,11 +14,13 @@ public SpanishMergedParserConfiguration(IDateTimeOptionsConfiguration config) { BeforeRegex = SpanishMergedExtractorConfiguration.BeforeRegex; AfterRegex = SpanishMergedExtractorConfiguration.AfterRegex; - SinceRegex = SpanishMergedExtractorConfiguration.SinceRegex; + SinceRegex = (config.Options & DateTimeOptions.ExperimentalMode) != 0 ? SpanishMergedExtractorConfiguration.SinceRegexExp : + SpanishMergedExtractorConfiguration.SinceRegex; AroundRegex = SpanishMergedExtractorConfiguration.AroundRegex; EqualRegex = SpanishMergedExtractorConfiguration.EqualRegex; SuffixAfter = SpanishMergedExtractorConfiguration.SuffixAfterRegex; YearRegex = SpanishDatePeriodExtractorConfiguration.YearRegex; + SuperfluousWordMatcher = SpanishMergedExtractorConfiguration.SuperfluousWordMatcher; DatePeriodParser = new BaseDatePeriodParser(new SpanishDatePeriodParserConfiguration(this)); @@ -45,5 +50,7 @@ public SpanishMergedParserConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeParser HolidayParser { get; } public StringMatcher SuperfluousWordMatcher { get; } + + bool IMergedParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishSetParserConfiguration.cs index 93c1dbb4f4..8f21f0a799 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishSetParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishSetParserConfiguration.cs @@ -1,11 +1,49 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Spanish; using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Spanish { public class SpanishSetParserConfiguration : BaseDateTimeOptionsConfiguration, ISetParserConfiguration { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex DoubleMultiplierRegex = + new Regex(DateTimeDefinitions.DoubleMultiplierRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex DayTypeRegex = + new Regex(DateTimeDefinitions.DayTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex WeekTypeRegex = + new Regex(DateTimeDefinitions.WeekTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex BiWeekTypeRegex = + new Regex(DateTimeDefinitions.BiWeekTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex WeekendTypeRegex = + new Regex(DateTimeDefinitions.WeekendTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex MonthTypeRegex = + new Regex(DateTimeDefinitions.MonthTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex QuarterTypeRegex = + new Regex(DateTimeDefinitions.QuarterTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex SemiAnnualTypeRegex = + new Regex(DateTimeDefinitions.SemiAnnualTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex YearTypeRegex = + new Regex(DateTimeDefinitions.YearTypeRegex, RegexFlags, RegexTimeOut); + + // pass FutureTerms as List to ReplaceValueInTextWithFutTerm function + private static readonly List ThisTerms = (List)DateTimeDefinitions.ThisTerms; + public SpanishSetParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -80,55 +118,49 @@ public bool GetMatchedDailyTimex(string text, out string timex) { var trimmedText = text.Trim(); - if (trimmedText.EndsWith("diario") || trimmedText.EndsWith("diariamente")) + float durationLength = 1; // Default value + float multiplier = 1; + string durationType; + + if (DoubleMultiplierRegex.IsMatch(trimmedText)) { - timex = "P1D"; + multiplier = 2; } - else if (trimmedText.Equals("semanalmente")) + + if (DayTypeRegex.IsMatch(trimmedText)) { - timex = "P1W"; + durationType = Constants.TimexDay; } - else if (trimmedText.Equals("quincenalmente")) + else if (WeekTypeRegex.IsMatch(trimmedText)) { - timex = "P2W"; + durationType = Constants.TimexWeek; } - else if (trimmedText.Equals("mensualmente")) + else if (BiWeekTypeRegex.IsMatch(trimmedText)) { - timex = "P1M"; + durationType = Constants.TimexWeek; + multiplier = 2; } - else if (trimmedText.Equals("anualmente")) + else if (WeekendTypeRegex.IsMatch(trimmedText)) { - timex = "P1Y"; + durationType = Constants.TimexWeekend; } - else + else if (MonthTypeRegex.IsMatch(trimmedText)) { - timex = null; - return false; + durationType = Constants.TimexMonth; } - - return true; - } - - public bool GetMatchedUnitTimex(string text, out string timex) - { - var trimmedText = text.Trim(); - - if (trimmedText.Equals("día") || trimmedText.Equals("dia") || - trimmedText.Equals("días") || trimmedText.Equals("dias")) + else if (QuarterTypeRegex.IsMatch(trimmedText)) { - timex = "P1D"; + multiplier = 3; + durationType = Constants.TimexMonth; } - else if (trimmedText.Equals("semana") || trimmedText.Equals("semanas")) + else if (SemiAnnualTypeRegex.IsMatch(trimmedText)) { - timex = "P1W"; + multiplier = 0.5f; + durationType = Constants.TimexYear; } - else if (trimmedText.Equals("mes") || trimmedText.Equals("meses")) + else if (YearTypeRegex.IsMatch(trimmedText)) { - timex = "P1M"; - } - else if (trimmedText.Equals("año") || trimmedText.Equals("años")) - { - timex = "P1Y"; + durationType = Constants.TimexYear; } else { @@ -136,9 +168,18 @@ public bool GetMatchedUnitTimex(string text, out string timex) return false; } + timex = TimexUtility.GenerateSetTimex(durationType, durationLength, multiplier); + return true; } + public bool GetMatchedUnitTimex(string text, out string timex) + { + return GetMatchedDailyTimex(text, out timex); + } + public string WeekDayGroupMatchString(Match match) => SetHandler.WeekDayGroupMatchString(match); + + public string ReplaceValueInTextWithFutTerm(string text, string value) => TasksModeSetHandler.ReplaceValueInTextWithFutTerm(text, value, ThisTerms); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishTimeParserConfiguration.cs index b478e891d8..d261198e8b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishTimeParserConfiguration.cs @@ -1,5 +1,10 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; @@ -10,6 +15,20 @@ namespace Microsoft.Recognizers.Text.DateTime.Spanish { public class SpanishTimeParserConfiguration : BaseDateTimeOptionsConfiguration, ITimeParserConfiguration { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex HalfTokenRegex = + new Regex(DateTimeDefinitions.HalfTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex QuarterTokenRegex = + new Regex(DateTimeDefinitions.QuarterTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex PastTokenRegex = + new Regex(DateTimeDefinitions.PastTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ToTokenRegex = + new Regex(DateTimeDefinitions.ToTokenRegex, RegexFlags, RegexTimeOut); + public SpanishTimeParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -38,27 +57,31 @@ public SpanishTimeParserConfiguration(ICommonDateTimeParserConfiguration config) public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool hasMin) { var deltaMin = 0; - var trimedPrefix = prefix.Trim(); + var trimmedPrefix = prefix.Trim(); - if (trimedPrefix.StartsWith("cuarto") || trimedPrefix.StartsWith("y cuarto")) - { - deltaMin = 15; - } - else if (trimedPrefix.StartsWith("menos cuarto")) + if (QuarterTokenRegex.IsMatch(trimmedPrefix)) { - deltaMin = -15; + var match = QuarterTokenRegex.Match(trimmedPrefix); + if (match.Groups[Constants.NegativeGroupName].Success) + { + deltaMin = -15; + } + else + { + deltaMin = 15; + } } - else if (trimedPrefix.StartsWith("media") || trimedPrefix.StartsWith("y media")) + else if (HalfTokenRegex.IsMatch(trimmedPrefix)) { deltaMin = 30; } else { - var match = SpanishTimeExtractorConfiguration.LessThanOneHour.Match(trimedPrefix); + var match = SpanishTimeExtractorConfiguration.LessThanOneHour.Match(trimmedPrefix); var minStr = match.Groups["deltamin"].Value; if (!string.IsNullOrWhiteSpace(minStr)) { - deltaMin = int.Parse(minStr); + deltaMin = int.Parse(minStr, CultureInfo.InvariantCulture); } else { @@ -67,16 +90,17 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha } } - if (trimedPrefix.EndsWith("pasadas") || trimedPrefix.EndsWith("pasados") || - trimedPrefix.EndsWith("pasadas las") || trimedPrefix.EndsWith("pasados las") || - trimedPrefix.EndsWith("pasadas de las") || trimedPrefix.EndsWith("pasados de las")) - { - // deltaMin it's positive - } - else if (trimedPrefix.EndsWith("para la") || trimedPrefix.EndsWith("para las") || - trimedPrefix.EndsWith("antes de la") || trimedPrefix.EndsWith("antes de las")) + if (ToTokenRegex.IsMatch(trimmedPrefix)) { - deltaMin = -deltaMin; + var match = ToTokenRegex.Match(trimmedPrefix); + if (match.Groups[Constants.NegativeGroupName].Success) + { + min = -min; + } + else + { + deltaMin = -deltaMin; + } } min += deltaMin; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishTimePeriodParserConfiguration.cs index d4ec671236..9d4607336f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishTimePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; @@ -52,7 +56,7 @@ public SpanishTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration c public IDateTimeUtilityConfiguration UtilityConfiguration { get; } - public bool GetMatchedTimexRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) + public bool GetMatchedTimeRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) { var trimmedText = text.Trim(); @@ -61,23 +65,24 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou endMin = 0; var timeOfDay = string.Empty; - if (DateTimeDefinitions.EarlyMorningTermList.Any(o => trimmedText.EndsWith(o))) + + if (DateTimeDefinitions.EarlyMorningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.EarlyMorning; } - else if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Morning; } - else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Afternoon; } - else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Evening; } - else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Night; } @@ -87,7 +92,7 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou return false; } - var parseResult = TimexUtility.ParseTimeOfDay(timeOfDay); + var parseResult = TimexUtility.ResolveTimeOfDay(timeOfDay); timex = parseResult.Timex; beginHour = parseResult.BeginHour; endHour = parseResult.EndHour; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Utilities/SpanishDatetimeUtilityConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Utilities/SpanishDatetimeUtilityConfiguration.cs index c3ea858924..1c3f71406e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Utilities/SpanishDatetimeUtilityConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Utilities/SpanishDatetimeUtilityConfiguration.cs @@ -1,73 +1,35 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Spanish.Utilities { - public class SpanishDatetimeUtilityConfiguration : IDateTimeUtilityConfiguration + public class SpanishDatetimeUtilityConfiguration : BaseDatetimeUtilityConfiguration { - public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoRegex, RegexFlags); - - public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); - - public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); - - public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); - - public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); - - public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); - - public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); - - public static readonly Regex AmPmDescRegex = - new Regex(DateTimeDefinitions.AmPmDescRegex, RegexFlags); - - public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); - - public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); - - public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); - - public static readonly Regex CommonDatePrefixRegex = - new Regex(DateTimeDefinitions.CommonDatePrefixRegex, RegexFlags); - - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - - Regex IDateTimeUtilityConfiguration.LaterRegex => LaterRegex; - - Regex IDateTimeUtilityConfiguration.AgoRegex => AgoRegex; - - Regex IDateTimeUtilityConfiguration.InConnectorRegex => InConnectorRegex; - - Regex IDateTimeUtilityConfiguration.SinceYearSuffixRegex => SinceYearSuffixRegex; - - Regex IDateTimeUtilityConfiguration.WithinNextPrefixRegex => WithinNextPrefixRegex; - - Regex IDateTimeUtilityConfiguration.AmDescRegex => AmDescRegex; - - Regex IDateTimeUtilityConfiguration.PmDescRegex => PmDescRegex; - - Regex IDateTimeUtilityConfiguration.AmPmDescRegex => AmPmDescRegex; - - Regex IDateTimeUtilityConfiguration.RangeUnitRegex => RangeUnitRegex; - - Regex IDateTimeUtilityConfiguration.TimeUnitRegex => TimeUnitRegex; - - Regex IDateTimeUtilityConfiguration.DateUnitRegex => DateUnitRegex; - - Regex IDateTimeUtilityConfiguration.CommonDatePrefixRegex => CommonDatePrefixRegex; - - bool IDateTimeUtilityConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public SpanishDatetimeUtilityConfiguration() + : base( + DateTimeDefinitions.AgoRegex, + DateTimeDefinitions.LaterRegex, + DateTimeDefinitions.InConnectorRegex, + DateTimeDefinitions.SinceYearSuffixRegex, + DateTimeDefinitions.WithinNextPrefixRegex, + DateTimeDefinitions.AmDescRegex, + DateTimeDefinitions.PmDescRegex, + DateTimeDefinitions.AmPmDescRegex, + DateTimeDefinitions.RangeUnitRegex, + DateTimeDefinitions.TimeUnitRegex, + DateTimeDefinitions.DateUnitRegex, + DateTimeDefinitions.CommonDatePrefixRegex, + DateTimeDefinitions.RangePrefixRegex, + RegexOptions.Singleline | RegexOptions.ExplicitCapture, + true) + { + // CheckBothBeforeAfter normally gets its value from DateTimeDefinitions.CheckBothBeforeAfter which however for Spanish is false. + // It only needs to be true here to extract 'ago/later' in prefixes (e.g. 'hace 30 minutos'). + + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDateExtractorConfiguration.cs new file mode 100644 index 0000000000..f73af285c9 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDateExtractorConfiguration.cs @@ -0,0 +1,297 @@ +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.DateTime.Swedish.Utilities; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Swedish; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishDateExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateExtractorConfiguration + { + + public static readonly Regex MonthRegex = + new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthNumRegex = + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayRegex = + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SingleWeekDayRegex = + new Regex(DateTimeDefinitions.SingleWeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OnRegex = + new Regex(DateTimeDefinitions.OnRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelaxedOnRegex = + new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ThisRegex = + new Regex(DateTimeDefinitions.ThisRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LastDateRegex = + new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NextDateRegex = + new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DateUnitRegex = + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialDayRegex = + new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayOfMonthRegex = + new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeWeekDayRegex = + new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialDate = + new Regex(DateTimeDefinitions.SpecialDate, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialDayWithNumRegex = + new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ForTheRegex = + new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayAndDayOfMothRegex = + new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayAndDayRegex = + new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeMonthRegex = + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex StrictRelativeRegex = + new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PrefixArticleRegex = + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OfMonth = + new Regex(DateTimeDefinitions.OfMonth, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthEnd = + new Regex(DateTimeDefinitions.MonthEnd, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayEnd = + new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayStart = + new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags, RegexTimeOut); + + public static readonly Regex YearSuffix = + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); + + public static readonly Regex LessThanRegex = + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MoreThanRegex = + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex InConnectorRegex = + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SinceYearSuffixRegex = + new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangeUnitRegex = + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangeConnectorSymbolRegex = + new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BeforeAfterRegex = + new Regex(DateTimeDefinitions.BeforeAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly ImmutableDictionary DayOfWeek = + DateTimeDefinitions.DayOfWeek.ToImmutableDictionary(); + + public static readonly ImmutableDictionary MonthOfYear = + DateTimeDefinitions.MonthOfYear.ToImmutableDictionary(); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex DayRegex = + new Regex(DateTimeDefinitions.ImplicitDayRegex, RegexFlags, RegexTimeOut); + + public SwedishDateExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Swedish.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Swedish.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new SwedishNumberParserConfiguration(numConfig)); + DurationExtractor = new BaseDurationExtractor(new SwedishDurationExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new SwedishHolidayExtractorConfiguration(this)); + UtilityConfiguration = new SwedishDatetimeUtilityConfiguration(); + + ImplicitDateList = new List + { + // extract "12" from "on 12" + OnRegex, + + // extract "12th" from "on/at/in 12th" + RelaxedOnRegex, + + // "the day before yesterday", "previous day", "today", "yesterday", "tomorrow" + SpecialDayRegex, + + // "this Monday", "Tuesday of this week" + ThisRegex, + + // "last/previous Monday", "Monday of last week" + LastDateRegex, + + // "next/following Monday", "Monday of next week" + NextDateRegex, + + // "Sunday", "Weds" + SingleWeekDayRegex, + + // "2nd Monday of April" + WeekDayOfMonthRegex, + + // "on the 12th" + SpecialDate, + + // "two days from today", "five days from tomorrow" + SpecialDayWithNumRegex, + + // "three Monday from now" + RelativeWeekDayRegex, + }; + + if ((Options & DateTimeOptions.CalendarMode) != 0) + { + ImplicitDateList = ImplicitDateList.Concat(new[] { DayRegex }); + } + + // 3-23-2017 + var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags, RegexTimeOut); + + // 23-3-2015 + var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags, RegexTimeOut); + + // on (Sunday,)? 1.3 + var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags, RegexTimeOut); + + // on (Sunday,)? 24-12 + var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags, RegexTimeOut); + + // "(Sunday,)? 7/23, 2018", year part is required + var dateRegex7L = new Regex(DateTimeDefinitions.DateExtractor7L, RegexFlags, RegexTimeOut); + + // "(Sunday,)? 7/23", year part is not required + var dateRegex7S = new Regex(DateTimeDefinitions.DateExtractor7S, RegexFlags, RegexTimeOut); + + // "(Sunday,)? 23/7, 2018", year part is required + var dateRegex9L = new Regex(DateTimeDefinitions.DateExtractor9L, RegexFlags, RegexTimeOut); + + // "(Sunday,)? 23/7", year part is not required + var dateRegex9S = new Regex(DateTimeDefinitions.DateExtractor9S, RegexFlags, RegexTimeOut); + + // (Sunday,)? 2015-12-23 + var dateRegexA = new Regex(DateTimeDefinitions.DateExtractorA, RegexFlags, RegexTimeOut); + + DateRegexList = new List + { + // (Sunday,)? April 5 or (Sunday,)? April 5, 2016 + new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags, RegexTimeOut), + + // (Sunday,)? 6th of April + new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags, RegexTimeOut), + }; + + var enableDmy = DmyDateFormat || + DateTimeDefinitions.DefaultLanguageFallback == Constants.DefaultLanguageFallback_DMY; + + DateRegexList = DateRegexList.Concat(enableDmy ? + new[] { dateRegex5, dateRegex8, dateRegex9L, dateRegex9S, dateRegex4, dateRegex6, dateRegex7L, dateRegex7S, dateRegexA } : + new[] { dateRegex4, dateRegex6, dateRegex7L, dateRegex7S, dateRegex5, dateRegex8, dateRegex9L, dateRegex9S, dateRegexA }); + } + + public IEnumerable DateRegexList { get; } + + public IExtractor IntegerExtractor { get; } + + public IExtractor OrdinalExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IEnumerable ImplicitDateList { get; } + + IImmutableDictionary IDateExtractorConfiguration.DayOfWeek => DayOfWeek; + + IImmutableDictionary IDateExtractorConfiguration.MonthOfYear => MonthOfYear; + + bool IDateExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex IDateExtractorConfiguration.OfMonth => OfMonth; + + Regex IDateExtractorConfiguration.MonthEnd => MonthEnd; + + Regex IDateExtractorConfiguration.WeekDayEnd => WeekDayEnd; + + Regex IDateExtractorConfiguration.WeekDayStart => WeekDayStart; + + Regex IDateExtractorConfiguration.DateUnitRegex => DateUnitRegex; + + Regex IDateExtractorConfiguration.ForTheRegex => ForTheRegex; + + Regex IDateExtractorConfiguration.WeekDayAndDayOfMonthRegex => WeekDayAndDayOfMothRegex; + + Regex IDateExtractorConfiguration.WeekDayAndDayRegex => WeekDayAndDayRegex; + + Regex IDateExtractorConfiguration.RelativeMonthRegex => RelativeMonthRegex; + + Regex IDateExtractorConfiguration.StrictRelativeRegex => StrictRelativeRegex; + + Regex IDateExtractorConfiguration.WeekDayRegex => WeekDayRegex; + + Regex IDateExtractorConfiguration.PrefixArticleRegex => PrefixArticleRegex; + + Regex IDateExtractorConfiguration.YearSuffix => YearSuffix; + + Regex IDateExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDateExtractorConfiguration.MoreThanRegex => MoreThanRegex; + + Regex IDateExtractorConfiguration.InConnectorRegex => InConnectorRegex; + + Regex IDateExtractorConfiguration.SinceYearSuffixRegex => SinceYearSuffixRegex; + + Regex IDateExtractorConfiguration.RangeUnitRegex => RangeUnitRegex; + + Regex IDateExtractorConfiguration.RangeConnectorSymbolRegex => RangeConnectorSymbolRegex; + + Regex IDateExtractorConfiguration.BeforeAfterRegex => BeforeAfterRegex; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDatePeriodExtractorConfiguration.cs new file mode 100644 index 0000000000..ad74fd13be --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDatePeriodExtractorConfiguration.cs @@ -0,0 +1,375 @@ +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Swedish; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDatePeriodExtractorConfiguration + { + // Base regexes + public static readonly Regex TillRegex = + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DayRegex = + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthNumRegex = + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex IllegalYearRegex = + new Regex(BaseDateTime.IllegalYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDayRegex = + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeMonthRegex = + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WrittenMonthRegex = + new Regex(DateTimeDefinitions.WrittenMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthSuffixRegex = + new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DateUnitRegex = + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeUnitRegex = + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PreviousPrefixRegex = + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NextPrefixRegex = + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FutureSuffixRegex = + new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NowRegex = + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); + + // composite regexes + public static readonly Regex SimpleCasesRegex = + new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthFrontSimpleCasesRegex = + new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthFrontBetweenRegex = + new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BetweenRegex = + new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthWithYear = + new Regex(DateTimeDefinitions.MonthWithYear, RegexFlags, RegexTimeOut); + + public static readonly Regex OneWordPeriodRegex = + new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthNumWithYear = + new Regex(DateTimeDefinitions.MonthNumWithYear, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekOfMonthRegex = + new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekOfYearRegex = + new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FollowedDateUnit = + new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex NumberCombinedWithDateUnit = + new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex QuarterRegex = + new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex QuarterRegexYearFront = + new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags, RegexTimeOut); + + public static readonly Regex AllHalfYearRegex = + new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SeasonRegex = + new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WhichWeekRegex = + new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekOfRegex = + new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MonthOfRegex = + new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangeUnitRegex = + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex InConnectorRegex = + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WithinNextPrefixRegex = + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RestOfDateRegex = + new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LaterEarlyPeriodRegex = + new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekWithWeekDayRangeRegex = + new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearPlusNumberRegex = + new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DecadeWithCenturyRegex = + new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearPeriodRegex = + new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ComplexDatePeriodRegex = + new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeDecadeRegex = + new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ReferenceDatePeriodRegex = + new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AgoRegex = + new Regex(DateTimeDefinitions.AgoRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LaterRegex = + new Regex(DateTimeDefinitions.LaterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex LessThanRegex = + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MoreThanRegex = + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex CenturySuffixRegex = + new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FirstLastRegex = + new Regex(DateTimeDefinitions.FirstLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OfYearRegex = + new Regex(DateTimeDefinitions.OfYearRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex FromTokenRegex = + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex BetweenTokenRegex = + new Regex(DateTimeDefinitions.BetweenTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex[] SimpleCasesRegexes = + { + // "3-5 Jan, 2018", + SimpleCasesRegex, + + // "between 3 and 5 Jan, 2018" + BetweenRegex, + + // "next april", "year to date", "previous year" + OneWordPeriodRegex, + + // "January, 2018", "this year Feb" + MonthWithYear, + + // "2018-3", "2018.3", "5-2015", only FourDigitYear is allow in this Regex + MonthNumWithYear, + + // "2018", "two thousand and ten" + YearRegex, + + // "4th week of Feb" + WeekOfMonthRegex, + + // "3rd week of 2018", "4th week last year" + WeekOfYearRegex, + + // "Jan between 8-10" + MonthFrontBetweenRegex, + + // "from Jan 5th-10th", "Feb from 5-10" + MonthFrontSimpleCasesRegex, + + // "Q1 2018", "2nd quarter" + QuarterRegex, + + // "2016 Q1", "last year the 4th quarter" + QuarterRegexYearFront, + + // "2015 the H1", "H2 of 2016", "1st half 2018", "2nd half this year" + AllHalfYearRegex, + + // "last summer", "fall of 2018", "early this summer" + SeasonRegex, + + // "week 25", "week 06" + WhichWeekRegex, + + // "rest of this week", "rest of current year" + RestOfDateRegex, + + // "early this year", "late next April" + LaterEarlyPeriodRegex, + + // "this week between Mon and Wed", "next week from Tuesday to Wednesday" + WeekWithWeekDayRangeRegex, + + // "year 834", "two thousand and nine" + YearPlusNumberRegex, + + // "21st century 30's" + DecadeWithCenturyRegex, + + // "next five decades", "previous 2 decades" + RelativeDecadeRegex, + + // "this week", "same year" + ReferenceDatePeriodRegex, + }; + + public SwedishDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DatePointExtractor = new BaseDateExtractor(new SwedishDateExtractorConfiguration(this)); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Swedish.CardinalExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Swedish.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new SwedishNumberParserConfiguration(numConfig)); + + DurationExtractor = new BaseDurationExtractor(new SwedishDurationExtractorConfiguration(this)); + } + + public IDateExtractor DatePointExtractor { get; } + + public IExtractor CardinalExtractor { get; } + + public IExtractor OrdinalExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IParser NumberParser { get; } + + IEnumerable IDatePeriodExtractorConfiguration.SimpleCasesRegexes => SimpleCasesRegexes; + + Regex IDatePeriodExtractorConfiguration.IllegalYearRegex => IllegalYearRegex; + + Regex IDatePeriodExtractorConfiguration.YearRegex => YearRegex; + + Regex IDatePeriodExtractorConfiguration.TillRegex => TillRegex; + + Regex IDatePeriodExtractorConfiguration.FollowedDateUnit => FollowedDateUnit; + + Regex IDatePeriodExtractorConfiguration.DateUnitRegex => DateUnitRegex; + + Regex IDatePeriodExtractorConfiguration.TimeUnitRegex => TimeUnitRegex; + + Regex IDatePeriodExtractorConfiguration.NumberCombinedWithDateUnit => NumberCombinedWithDateUnit; + + Regex IDatePeriodExtractorConfiguration.PreviousPrefixRegex => PreviousPrefixRegex; + + Regex IDatePeriodExtractorConfiguration.FutureRegex => NextPrefixRegex; + + Regex IDatePeriodExtractorConfiguration.FutureSuffixRegex => FutureSuffixRegex; + + Regex IDatePeriodExtractorConfiguration.WeekOfRegex => WeekOfRegex; + + Regex IDatePeriodExtractorConfiguration.MonthOfRegex => MonthOfRegex; + + Regex IDatePeriodExtractorConfiguration.RangeUnitRegex => RangeUnitRegex; + + Regex IDatePeriodExtractorConfiguration.InConnectorRegex => InConnectorRegex; + + Regex IDatePeriodExtractorConfiguration.WithinNextPrefixRegex => WithinNextPrefixRegex; + + Regex IDatePeriodExtractorConfiguration.YearPeriodRegex => YearPeriodRegex; + + Regex IDatePeriodExtractorConfiguration.ComplexDatePeriodRegex => ComplexDatePeriodRegex; + + Regex IDatePeriodExtractorConfiguration.RelativeDecadeRegex => RelativeDecadeRegex; + + Regex IDatePeriodExtractorConfiguration.ReferenceDatePeriodRegex => ReferenceDatePeriodRegex; + + Regex IDatePeriodExtractorConfiguration.AgoRegex => AgoRegex; + + Regex IDatePeriodExtractorConfiguration.LaterRegex => LaterRegex; + + Regex IDatePeriodExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDatePeriodExtractorConfiguration.MoreThanRegex => MoreThanRegex; + + Regex IDatePeriodExtractorConfiguration.CenturySuffixRegex => CenturySuffixRegex; + + Regex IDatePeriodExtractorConfiguration.MonthNumRegex => MonthNumRegex; + + Regex IDatePeriodExtractorConfiguration.NowRegex => NowRegex; + + Regex IDatePeriodExtractorConfiguration.FirstLastRegex => FirstLastRegex; + + Regex IDatePeriodExtractorConfiguration.OfYearRegex => OfYearRegex; + + bool IDatePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + string[] IDatePeriodExtractorConfiguration.DurationDateRestrictions => DateTimeDefinitions.DurationDateRestrictions; + + public bool GetFromTokenIndex(string text, out int index) + { + index = -1; + var fromMatch = FromTokenRegex.Match(text); + if (fromMatch.Success) + { + index = fromMatch.Index; + } + + return fromMatch.Success; + } + + public bool GetBetweenTokenIndex(string text, out int index) + { + index = -1; + var betweenMatch = BetweenTokenRegex.Match(text); + if (betweenMatch.Success) + { + index = betweenMatch.Index; + } + + return betweenMatch.Success; + } + + public bool HasConnectorToken(string text) + { + return RangeConnectorRegex.IsExactMatch(text, trim: true); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDateTimeAltExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDateTimeAltExtractorConfiguration.cs new file mode 100644 index 0000000000..18ad623df0 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDateTimeAltExtractorConfiguration.cs @@ -0,0 +1,72 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeAltExtractorConfiguration + { + public static readonly Regex ThisPrefixRegex = + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PreviousPrefixRegex = + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NextPrefixRegex = + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmRegex = + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PmRegex = + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RangePrefixRegex = + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex[] RelativePrefixList = + { + ThisPrefixRegex, PreviousPrefixRegex, NextPrefixRegex, + }; + + public static readonly Regex[] AmPmRegexList = + { + AmRegex, PmRegex, + }; + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex OrRegex = + new Regex(DateTimeDefinitions.OrRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex DayRegex = + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); + + public SwedishDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DateExtractor = new BaseDateExtractor(new SwedishDateExtractorConfiguration(this)); + DatePeriodExtractor = new BaseDatePeriodExtractor(new SwedishDatePeriodExtractorConfiguration(this)); + } + + IEnumerable IDateTimeAltExtractorConfiguration.RelativePrefixList => RelativePrefixList; + + IEnumerable IDateTimeAltExtractorConfiguration.AmPmRegexList => AmPmRegexList; + + Regex IDateTimeAltExtractorConfiguration.OrRegex => OrRegex; + + Regex IDateTimeAltExtractorConfiguration.ThisPrefixRegex => ThisPrefixRegex; + + Regex IDateTimeAltExtractorConfiguration.DayRegex => DayRegex; + + Regex IDateTimeAltExtractorConfiguration.RangePrefixRegex => RangePrefixRegex; + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor DatePeriodExtractor { get; } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDateTimeExtractorConfiguration.cs new file mode 100644 index 0000000000..7c610ae9ff --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDateTimeExtractorConfiguration.cs @@ -0,0 +1,141 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.DateTime.Swedish.Utilities; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishDateTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeExtractorConfiguration + { + public static readonly Regex PrepositionRegex = + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NowRegex = + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SuffixRegex = + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeOfDayRegex = + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificTimeOfDayRegex = + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeOfTodayAfterRegex = + new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeOfTodayBeforeRegex = + new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SimpleTimeOfTodayAfterRegex = + new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SimpleTimeOfTodayBeforeRegex = + new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificEndOfRegex = + new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex UnspecificEndOfRegex = + new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex UnitRegex = + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ConnectorRegex = + new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NumberAsTimeRegex = + new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DateNumberConnectorRegex = + new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex YearSuffix = + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); + + public static readonly Regex SuffixAfterRegex = + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public SwedishDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Swedish.IntegerExtractor.GetInstance(numConfig); + + DatePointExtractor = new BaseDateExtractor(new SwedishDateExtractorConfiguration(this)); + TimePointExtractor = new BaseTimeExtractor(new SwedishTimeExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new SwedishDurationExtractorConfiguration(this)); + UtilityConfiguration = new SwedishDatetimeUtilityConfiguration(); + HolidayExtractor = new BaseHolidayExtractor(new SwedishHolidayExtractorConfiguration(this)); + + } + + public IExtractor IntegerExtractor { get; } + + public IDateExtractor DatePointExtractor { get; } + + public IDateTimeExtractor TimePointExtractor { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + Regex IDateTimeExtractorConfiguration.NowRegex => NowRegex; + + Regex IDateTimeExtractorConfiguration.SuffixRegex => SuffixRegex; + + Regex IDateTimeExtractorConfiguration.TimeOfTodayAfterRegex => TimeOfTodayAfterRegex; + + Regex IDateTimeExtractorConfiguration.SimpleTimeOfTodayAfterRegex => SimpleTimeOfTodayAfterRegex; + + Regex IDateTimeExtractorConfiguration.TimeOfTodayBeforeRegex => TimeOfTodayBeforeRegex; + + Regex IDateTimeExtractorConfiguration.SimpleTimeOfTodayBeforeRegex => SimpleTimeOfTodayBeforeRegex; + + Regex IDateTimeExtractorConfiguration.TimeOfDayRegex => TimeOfDayRegex; + + Regex IDateTimeExtractorConfiguration.SpecificEndOfRegex => SpecificEndOfRegex; + + Regex IDateTimeExtractorConfiguration.UnspecificEndOfRegex => UnspecificEndOfRegex; + + Regex IDateTimeExtractorConfiguration.UnitRegex => UnitRegex; + + Regex IDateTimeExtractorConfiguration.NumberAsTimeRegex => NumberAsTimeRegex; + + Regex IDateTimeExtractorConfiguration.DateNumberConnectorRegex => DateNumberConnectorRegex; + + Regex IDateTimeExtractorConfiguration.YearRegex => YearRegex; + + Regex IDateTimeExtractorConfiguration.YearSuffix => YearSuffix; + + Regex IDateTimeExtractorConfiguration.SuffixAfterRegex => SuffixAfterRegex; + + public IDateTimeExtractor DurationExtractor { get; } + + public bool IsConnector(string text) + { + text = text.Trim(); + return string.IsNullOrEmpty(text) || PrepositionRegex.IsMatch(text) || ConnectorRegex.IsMatch(text); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDateTimePeriodExtractorConfiguration.cs new file mode 100644 index 0000000000..8b75e03e5e --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDateTimePeriodExtractorConfiguration.cs @@ -0,0 +1,222 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, + IDateTimePeriodExtractorConfiguration + { + public static readonly Regex TimeNumberCombinedWithUnit = + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex HyphenDateRegex = + new Regex(BaseDateTime.HyphenDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodTimeOfDayWithDateRegex = + new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeTimeUnitRegex = + new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RestOfDateTimeRegex = + new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmDescRegex = + new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PmDescRegex = + new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WithinNextPrefixRegex = + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DateUnitRegex = + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PrefixDayRegex = + new Regex(DateTimeDefinitions.PrefixDayRegex, RegexFlags | RegexOptions.RightToLeft); + + public static readonly Regex SuffixRegex = + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex BeforeRegex = + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfterRegex = + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex WeekDaysRegex = + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodSpecificTimeOfDayRegex = + new Regex(DateTimeDefinitions.PeriodSpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex[] SimpleCases = + { + SwedishTimePeriodExtractorConfiguration.PureNumFromTo, + SwedishTimePeriodExtractorConfiguration.PureNumBetweenAnd, + }; + + private static readonly Regex PeriodTimeOfDayRegex = + new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex TimeUnitRegex = + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex TimeFollowedUnit = + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); + + private static readonly Regex GeneralEndingRegex = + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex MiddlePauseRegex = + new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex RangeConnectorRegex = + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); + + public SwedishDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Swedish.CardinalExtractor.GetInstance(numConfig); + + SingleDateExtractor = new BaseDateExtractor(new SwedishDateExtractorConfiguration(this)); + SingleTimeExtractor = new BaseTimeExtractor(new SwedishTimeExtractorConfiguration(this)); + SingleDateTimeExtractor = new BaseDateTimeExtractor(new SwedishDateTimeExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new SwedishDurationExtractorConfiguration(this)); + TimePeriodExtractor = new BaseTimePeriodExtractor(new SwedishTimePeriodExtractorConfiguration(this)); + TimeZoneExtractor = new BaseTimeZoneExtractor(new SwedishTimeZoneExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new SwedishHolidayExtractorConfiguration(this)); + + } + + public IEnumerable SimpleCasesRegex => SimpleCases; + + public Regex PrepositionRegex => SwedishTimePeriodExtractorConfiguration.PrepositionRegex; + + public Regex TillRegex => SwedishTimePeriodExtractorConfiguration.TillRegex; + + public Regex TimeOfDayRegex => PeriodTimeOfDayRegex; + + public Regex SpecificTimeOfDayRegex => PeriodSpecificTimeOfDayRegex; + + public Regex PreviousPrefixRegex => SwedishDatePeriodExtractorConfiguration.PreviousPrefixRegex; + + public Regex NextPrefixRegex => SwedishDatePeriodExtractorConfiguration.NextPrefixRegex; + + public Regex FutureSuffixRegex => SwedishDatePeriodExtractorConfiguration.FutureSuffixRegex; + + public Regex WeekDayRegex => WeekDaysRegex; + + public Regex FollowedUnit => TimeFollowedUnit; + + bool IDateTimePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex IDateTimePeriodExtractorConfiguration.PrefixDayRegex => PrefixDayRegex; + + Regex IDateTimePeriodExtractorConfiguration.DateUnitRegex => DateUnitRegex; + + Regex IDateTimePeriodExtractorConfiguration.NumberCombinedWithUnit => TimeNumberCombinedWithUnit; + + Regex IDateTimePeriodExtractorConfiguration.TimeUnitRegex => TimeUnitRegex; + + Regex IDateTimePeriodExtractorConfiguration.RelativeTimeUnitRegex => RelativeTimeUnitRegex; + + Regex IDateTimePeriodExtractorConfiguration.RestOfDateTimeRegex => RestOfDateTimeRegex; + + Regex IDateTimePeriodExtractorConfiguration.GeneralEndingRegex => GeneralEndingRegex; + + Regex IDateTimePeriodExtractorConfiguration.MiddlePauseRegex => MiddlePauseRegex; + + Regex IDateTimePeriodExtractorConfiguration.PeriodTimeOfDayWithDateRegex => PeriodTimeOfDayWithDateRegex; + + Regex IDateTimePeriodExtractorConfiguration.AmDescRegex => AmDescRegex; + + Regex IDateTimePeriodExtractorConfiguration.PmDescRegex => PmDescRegex; + + Regex IDateTimePeriodExtractorConfiguration.WithinNextPrefixRegex => WithinNextPrefixRegex; + + Regex IDateTimePeriodExtractorConfiguration.SuffixRegex => SuffixRegex; + + Regex IDateTimePeriodExtractorConfiguration.BeforeRegex => BeforeRegex; + + Regex IDateTimePeriodExtractorConfiguration.AfterRegex => AfterRegex; + + Regex IDateTimePeriodExtractorConfiguration.TasksmodeMealTimeofDayRegex => null; + + public string TokenBeforeDate { get; } + + public IExtractor CardinalExtractor { get; } + + public IDateTimeExtractor SingleDateExtractor { get; } + + public IDateTimeExtractor SingleTimeExtractor { get; } + + public IDateTimeExtractor SingleDateTimeExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IDateTimeExtractor TimeZoneExtractor { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + // TODO: these three methods are the same in DatePeriod, should be abstracted + public bool GetFromTokenIndex(string text, out int index) + { + index = -1; + + // @TODO move hardcoded values to resources file + + if (text.EndsWith("from", StringComparison.Ordinal)) + { + index = text.LastIndexOf("from", StringComparison.Ordinal); + return true; + } + + return false; + } + + public bool GetBetweenTokenIndex(string text, out int index) + { + index = -1; + + // @TODO move hardcoded values to resources file + + if (text.EndsWith("between", StringComparison.Ordinal)) + { + index = text.LastIndexOf("between", StringComparison.Ordinal); + return true; + } + + return false; + } + + public bool HasConnectorToken(string text) + { + return RangeConnectorRegex.IsExactMatch(text, trim: true); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDurationExtractorConfiguration.cs new file mode 100644 index 0000000000..bcc83b6a85 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishDurationExtractorConfiguration.cs @@ -0,0 +1,135 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Definitions.Utilities; +using Microsoft.Recognizers.Text.Number; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishDurationExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDurationExtractorConfiguration + { + public static readonly Regex DurationUnitRegex = + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SuffixAndRegex = + new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DurationFollowedUnit = + new Regex(DateTimeDefinitions.DurationFollowedUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex NumberCombinedWithDurationUnit = + new Regex(DateTimeDefinitions.NumberCombinedWithDurationUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex AnUnitRegex = + new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DuringRegex = + new Regex(DateTimeDefinitions.DuringRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AllRegex = + new Regex(DateTimeDefinitions.AllRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex HalfRegex = + new Regex(DateTimeDefinitions.HalfRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ConjunctionRegex = + new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex InexactNumberRegex = + new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex InexactNumberUnitRegex = + new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeDurationUnitRegex = + new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex DurationConnectorRegex = + new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModPrefixRegex = + new Regex(DateTimeDefinitions.ModPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModSuffixRegex = + new Regex(DateTimeDefinitions.ModSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecialNumberUnitRegex = null; + + public static readonly Regex MoreThanRegex = + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags | RegexOptions.RightToLeft); + + public static readonly Regex LessThanRegex = + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags | RegexOptions.RightToLeft); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public SwedishDurationExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Swedish.CardinalExtractor.GetInstance(numConfig); + + UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); + UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); + } + + public IExtractor CardinalExtractor { get; } + + public IImmutableDictionary UnitMap { get; } + + public IImmutableDictionary UnitValueMap { get; } + + bool IDurationExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex IDurationExtractorConfiguration.FollowedUnit => DurationFollowedUnit; + + Regex IDurationExtractorConfiguration.NumberCombinedWithUnit => NumberCombinedWithDurationUnit; + + Regex IDurationExtractorConfiguration.AnUnitRegex => AnUnitRegex; + + Regex IDurationExtractorConfiguration.DuringRegex => DuringRegex; + + Regex IDurationExtractorConfiguration.AllRegex => AllRegex; + + Regex IDurationExtractorConfiguration.HalfRegex => HalfRegex; + + Regex IDurationExtractorConfiguration.SuffixAndRegex => SuffixAndRegex; + + Regex IDurationExtractorConfiguration.ConjunctionRegex => ConjunctionRegex; + + Regex IDurationExtractorConfiguration.InexactNumberRegex => InexactNumberRegex; + + Regex IDurationExtractorConfiguration.InexactNumberUnitRegex => InexactNumberUnitRegex; + + Regex IDurationExtractorConfiguration.RelativeDurationUnitRegex => RelativeDurationUnitRegex; + + Regex IDurationExtractorConfiguration.DurationUnitRegex => DurationUnitRegex; + + Regex IDurationExtractorConfiguration.DurationConnectorRegex => DurationConnectorRegex; + + Regex IDurationExtractorConfiguration.SpecialNumberUnitRegex => SpecialNumberUnitRegex; + + Regex IDurationExtractorConfiguration.MoreThanRegex => MoreThanRegex; + + Regex IDurationExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex; + + Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex; + + public Dictionary AmbiguityFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityDurationFiltersDict); + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishHolidayExtractorConfiguration.cs new file mode 100644 index 0000000000..91377f1834 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishHolidayExtractorConfiguration.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishHolidayExtractorConfiguration : BaseDateTimeOptionsConfiguration, IHolidayExtractorConfiguration + { + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex H = + new Regex(DateTimeDefinitions.HolidayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex[] HolidayRegexList = + { + H, + }; + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public SwedishHolidayExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + } + + public IEnumerable HolidayRegexes => HolidayRegexList; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishMergedExtractorConfiguration.cs new file mode 100644 index 0000000000..6d45007314 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishMergedExtractorConfiguration.cs @@ -0,0 +1,183 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Definitions.Utilities; +using Microsoft.Recognizers.Text.Matcher; +using Microsoft.Recognizers.Text.Number; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishMergedExtractorConfiguration : BaseDateTimeOptionsConfiguration, IMergedExtractorConfiguration + { + public static readonly Regex BeforeRegex = + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfterRegex = + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AroundRegex = + new Regex(DateTimeDefinitions.AroundRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EqualRegex = + new Regex(BaseDateTime.EqualRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FromToRegex = + new Regex(DateTimeDefinitions.FromToRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SingleAmbiguousMonthRegex = + new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PrepositionSuffixRegex = + new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmbiguousRangeModifierPrefix = + new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags, RegexTimeOut); + + public static readonly Regex NumberEndingPattern = + new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags, RegexTimeOut); + + public static readonly Regex SuffixAfterRegex = + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex UnspecificDatePeriodRegex = + new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex UnspecificTimePeriodRegex = + new Regex(DateTimeDefinitions.UnspecificTimePeriodRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FailFastRegex = + new Regex(DateTimeDefinitions.FailFastRegex, RegexFlags | RegexOptions.Compiled); + + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex[] TermFilterRegexes = + { + // one on one + new Regex(DateTimeDefinitions.OneOnOneRegex, RegexFlags, RegexTimeOut), + + // (the)? (day|week|month|year) + new Regex(DateTimeDefinitions.SingleAmbiguousTermsRegex, RegexFlags, RegexTimeOut), + }; + + public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public SwedishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DateExtractor = new BaseDateExtractor(new SwedishDateExtractorConfiguration(this)); + TimeExtractor = new BaseTimeExtractor(new SwedishTimeExtractorConfiguration(this)); + DateTimeExtractor = new BaseDateTimeExtractor(new SwedishDateTimeExtractorConfiguration(this)); + DatePeriodExtractor = new BaseDatePeriodExtractor(new SwedishDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseTimePeriodExtractor(new SwedishTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new SwedishDateTimePeriodExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new SwedishDurationExtractorConfiguration(this)); + SetExtractor = new BaseSetExtractor(new SwedishSetExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new SwedishHolidayExtractorConfiguration(this)); + TimeZoneExtractor = new BaseTimeZoneExtractor(new SwedishTimeZoneExtractorConfiguration(this)); + DateTimeAltExtractor = new BaseDateTimeAltExtractor(new SwedishDateTimeAltExtractorConfiguration(this)); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + if ((config.Options & DateTimeOptions.ExperimentalMode) != 0) + { + SinceRegex = SinceRegexExp; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Swedish.IntegerExtractor.GetInstance(numConfig); + + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); + + if ((Options & DateTimeOptions.EnablePreview) != 0) + { + SuperfluousWordMatcher.Init(DateTimeDefinitions.SuperfluousWordList); + } + } + + // Used in Standard mode + public static Regex SinceRegex { get; set; } = new Regex(DateTimeDefinitions.SinceRegex, RegexFlags, RegexTimeOut); + + // used in Experimental mode + public static Regex SinceRegexExp { get; } = new Regex(DateTimeDefinitions.SinceRegexExp, RegexFlags, RegexTimeOut); + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeExtractor DatePeriodExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IDateTimeExtractor DateTimePeriodExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor SetExtractor { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeZoneExtractor TimeZoneExtractor { get; } + + public IDateTimeListExtractor DateTimeAltExtractor { get; } + + public IExtractor IntegerExtractor { get; } + + public Dictionary AmbiguityFiltersDict { get; } + + Regex IMergedExtractorConfiguration.AfterRegex => AfterRegex; + + Regex IMergedExtractorConfiguration.BeforeRegex => BeforeRegex; + + Regex IMergedExtractorConfiguration.SinceRegex => SinceRegex; + + Regex IMergedExtractorConfiguration.AroundRegex => AroundRegex; + + Regex IMergedExtractorConfiguration.EqualRegex => EqualRegex; + + Regex IMergedExtractorConfiguration.FromToRegex => FromToRegex; + + Regex IMergedExtractorConfiguration.SingleAmbiguousMonthRegex => SingleAmbiguousMonthRegex; + + Regex IMergedExtractorConfiguration.PrepositionSuffixRegex => PrepositionSuffixRegex; + + Regex IMergedExtractorConfiguration.AmbiguousRangeModifierPrefix => AmbiguousRangeModifierPrefix; + + Regex IMergedExtractorConfiguration.PotentialAmbiguousRangeRegex => FromToRegex; + + Regex IMergedExtractorConfiguration.NumberEndingPattern => NumberEndingPattern; + + Regex IMergedExtractorConfiguration.SuffixAfterRegex => SuffixAfterRegex; + + Regex IMergedExtractorConfiguration.UnspecificDatePeriodRegex => UnspecificDatePeriodRegex; + + Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => UnspecificTimePeriodRegex; + + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + + Regex IMergedExtractorConfiguration.FailFastRegex => FailFastRegex; + + IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; + + StringMatcher IMergedExtractorConfiguration.SuperfluousWordMatcher => SuperfluousWordMatcher; + + bool IMergedExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public Regex TasksModeMentionFilters { get; } + + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishSetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishSetExtractorConfiguration.cs new file mode 100644 index 0000000000..735d22045c --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishSetExtractorConfiguration.cs @@ -0,0 +1,86 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishSetExtractorConfiguration : BaseDateTimeOptionsConfiguration, ISetExtractorConfiguration + { + public static readonly Regex SetUnitRegex = + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodicRegex = + new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachUnitRegex = + new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachPrefixRegex = + new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SetLastRegex = + new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EachDayRegex = + new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SetWeekDayRegex = + new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SetEachRegex = + new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public SwedishSetExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DurationExtractor = new BaseDurationExtractor(new SwedishDurationExtractorConfiguration(this)); + TimeExtractor = new BaseTimeExtractor(new SwedishTimeExtractorConfiguration(this)); + DateExtractor = new BaseDateExtractor(new SwedishDateExtractorConfiguration(this)); + DateTimeExtractor = new BaseDateTimeExtractor(new SwedishDateTimeExtractorConfiguration(this)); + DatePeriodExtractor = new BaseDatePeriodExtractor(new SwedishDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseTimePeriodExtractor(new SwedishTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new SwedishDateTimePeriodExtractorConfiguration(this)); + } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeExtractor DatePeriodExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IDateTimeExtractor DateTimePeriodExtractor { get; } + + bool ISetExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex ISetExtractorConfiguration.LastRegex => SetLastRegex; + + Regex ISetExtractorConfiguration.EachPrefixRegex => EachPrefixRegex; + + Regex ISetExtractorConfiguration.PeriodicRegex => PeriodicRegex; + + Regex ISetExtractorConfiguration.EachUnitRegex => EachUnitRegex; + + Regex ISetExtractorConfiguration.EachDayRegex => EachDayRegex; + + Regex ISetExtractorConfiguration.BeforeEachDayRegex => null; + + Regex ISetExtractorConfiguration.SetWeekDayRegex => SetWeekDayRegex; + + Regex ISetExtractorConfiguration.SetEachRegex => SetEachRegex; + + public Tuple WeekDayGroupMatchTuple(Match match) => SetHandler.WeekDayGroupMatchTuple(match); + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishTimeExtractorConfiguration.cs new file mode 100644 index 0000000000..5ee5280052 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishTimeExtractorConfiguration.cs @@ -0,0 +1,152 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Definitions.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, ITimeExtractorConfiguration + { + // part 1: smallest component + // -------------------------------------- + public static readonly Regex DescRegex = + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex HourNumRegex = + new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MinuteNumRegex = + new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags, RegexTimeOut); + + // part 2: middle level component + // -------------------------------------- + // handle "... o'clock" + public static readonly Regex OclockRegex = + new Regex(DateTimeDefinitions.OclockRegex, RegexFlags, RegexTimeOut); + + // handle "... afternoon" + public static readonly Regex PmRegex = + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); + + // handle "... in the morning" + public static readonly Regex AmRegex = + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); + + // handle "half past ..." "a quarter to ..." + // rename 'min' group to 'deltamin' + public static readonly Regex LessThanOneHour = + new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags, RegexTimeOut); + + // handle "six thirty", "six twenty one" + public static readonly Regex WrittenTimeRegex = + new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimePrefix = + new Regex(DateTimeDefinitions.TimePrefix, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeSuffix = + new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags, RegexTimeOut); + + public static readonly Regex BasicTime = + new Regex(DateTimeDefinitions.BasicTime, RegexFlags, RegexTimeOut); + + // handle special time such as 'at midnight', 'midnight', 'midday' + public static readonly Regex MidnightRegex = + new Regex(DateTimeDefinitions.MidnightRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MidmorningRegex = + new Regex(DateTimeDefinitions.MidmorningRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MidafternoonRegex = + new Regex(DateTimeDefinitions.MidafternoonRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MiddayRegex = + new Regex(DateTimeDefinitions.MiddayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex MidTimeRegex = + new Regex(DateTimeDefinitions.MidTimeRegex, RegexFlags, RegexTimeOut); + + // part 3: regex for time + // -------------------------------------- + // handle "at four" "at 3" + public static readonly Regex AtRegex = + new Regex(DateTimeDefinitions.AtRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex IshRegex = + new Regex(DateTimeDefinitions.IshRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeUnitRegex = + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ConnectNumRegex = + new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeBeforeAfterRegex = + new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex[] TimeRegexList = + { + // (three min past)? seven|7|(seven thirty) pm + new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags, RegexTimeOut), + + // (three min past)? 3:00(:00)? (pm)? + new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags, RegexTimeOut), + + // (three min past)? 3.00 (pm) + new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags, RegexTimeOut), + + // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) + new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags, RegexTimeOut), + + // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? + new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags, RegexTimeOut), + + // (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) + new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags, RegexTimeOut), + + // (in the night) at? (five thirty|seven|7|7:00(:00)?) (pm)? + new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags, RegexTimeOut), + + new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags, RegexTimeOut), + + // (three min past)? 3h00 (pm)? + new Regex(DateTimeDefinitions.TimeRegex10, RegexFlags, RegexTimeOut), + + // at 2.30, "at" prefix is required here + // 3.30pm, "am/pm" suffix is required here + new Regex(DateTimeDefinitions.TimeRegex11, RegexFlags, RegexTimeOut), + + // 340pm + ConnectNumRegex, + }; + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public SwedishTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + DurationExtractor = new BaseDurationExtractor(new SwedishDurationExtractorConfiguration(this)); + TimeZoneExtractor = new BaseTimeZoneExtractor(new SwedishTimeZoneExtractorConfiguration(this)); + } + + IEnumerable ITimeExtractorConfiguration.TimeRegexList => TimeRegexList; + + Regex ITimeExtractorConfiguration.AtRegex => AtRegex; + + Regex ITimeExtractorConfiguration.IshRegex => IshRegex; + + Regex ITimeExtractorConfiguration.TimeBeforeAfterRegex => TimeBeforeAfterRegex; + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeExtractor TimeZoneExtractor { get; } + + public string TimeTokenPrefix => DateTimeDefinitions.TimeTokenPrefix; + + public Dictionary AmbiguityFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityTimeFiltersDict); + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishTimePeriodExtractorConfiguration.cs new file mode 100644 index 0000000000..64cc97b3ba --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishTimePeriodExtractorConfiguration.cs @@ -0,0 +1,152 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.DateTime.Swedish.Utilities; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, ITimePeriodExtractorConfiguration + { + public static readonly Regex TillRegex = + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex HourRegex = + new Regex(DateTimeDefinitions.HourRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodHourNumRegex = + new Regex(DateTimeDefinitions.PeriodHourNumRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PeriodDescRegex = + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PmRegex = + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AmRegex = + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PureNumFromTo = + new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags, RegexTimeOut); + + public static readonly Regex PureNumBetweenAnd = + new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificTimeFromTo = + new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificTimeBetweenAnd = + new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags, RegexTimeOut); + + public static readonly Regex PrepositionRegex = + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeOfDayRegex = + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SpecificTimeOfDayRegex = + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeUnitRegex = + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeFollowedUnit = + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex TimeNumberCombinedWithUnit = + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex GeneralEndingRegex = + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public SwedishTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + SingleTimeExtractor = new BaseTimeExtractor(new SwedishTimeExtractorConfiguration(this)); + UtilityConfiguration = new SwedishDatetimeUtilityConfiguration(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Swedish.IntegerExtractor.GetInstance(numConfig); + + TimeZoneExtractor = new BaseTimeZoneExtractor(new SwedishTimeZoneExtractorConfiguration(this)); + } + + public string TokenBeforeDate { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IDateTimeExtractor SingleTimeExtractor { get; } + + public IDateTimeExtractor TimeZoneExtractor { get; } + + public IExtractor IntegerExtractor { get; } + + public IEnumerable SimpleCasesRegex => new[] + { + PureNumFromTo, PureNumBetweenAnd, SpecificTimeFromTo, SpecificTimeBetweenAnd, + }; + + public IEnumerable PureNumberRegex => new[] + { + PureNumFromTo, PureNumBetweenAnd, + }; + + bool ITimePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex ITimePeriodExtractorConfiguration.TillRegex => TillRegex; + + Regex ITimePeriodExtractorConfiguration.TimeOfDayRegex => TimeOfDayRegex; + + Regex ITimePeriodExtractorConfiguration.GeneralEndingRegex => GeneralEndingRegex; + + // @TODO move hardcoded strings to YAML file + public bool GetFromTokenIndex(string text, out int index) + { + index = -1; + + if (text.EndsWith("from", StringComparison.Ordinal)) + { + index = text.LastIndexOf("from", StringComparison.Ordinal); + return true; + } + + return false; + } + + public bool GetBetweenTokenIndex(string text, out int index) + { + index = -1; + + if (text.EndsWith("between", StringComparison.Ordinal)) + { + index = text.LastIndexOf("between", StringComparison.Ordinal); + return true; + } + + return false; + } + + public bool IsConnectorToken(string text) + { + return text.Equals("and", StringComparison.Ordinal); + } + + public List ApplyPotentialPeriodAmbiguityHotfix(string text, List timePeriodErs) => TimePeriodFunctions.ApplyPotentialPeriodAmbiguityHotfix(text, timePeriodErs); + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishTimeZoneExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishTimeZoneExtractorConfiguration.cs new file mode 100644 index 0000000000..8dc62e5f16 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishTimeZoneExtractorConfiguration.cs @@ -0,0 +1,54 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.Matcher; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishTimeZoneExtractorConfiguration : BaseDateTimeOptionsConfiguration, ITimeZoneExtractorConfiguration + { + public static readonly Regex DirectUtcRegex = + new Regex(TimeZoneDefinitions.DirectUtcRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline); + + public static readonly List AbbreviationsList = + new List(TimeZoneDefinitions.AbbreviationsList); + + public static readonly List FullNameList = + new List(TimeZoneDefinitions.FullNameList); + + public static readonly StringMatcher TimeZoneMatcher = + TimeZoneUtility.BuildMatcherFromLists(FullNameList, AbbreviationsList); + + public static readonly Regex LocationTimeSuffixRegex = + new Regex(TimeZoneDefinitions.LocationTimeSuffixRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline); + + public static readonly StringMatcher LocationMatcher = new StringMatcher(); + + public static readonly List AmbiguousTimezoneList = TimeZoneDefinitions.AmbiguousTimezoneList.ToList(); + + public SwedishTimeZoneExtractorConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + if ((Options & DateTimeOptions.EnablePreview) != 0) + { + LocationMatcher.Init(TimeZoneDefinitions.MajorLocations.Select(o => QueryProcessor.RemoveDiacritics(o.ToLowerInvariant()))); + } + } + + Regex ITimeZoneExtractorConfiguration.DirectUtcRegex => DirectUtcRegex; + + Regex ITimeZoneExtractorConfiguration.LocationTimeSuffixRegex => LocationTimeSuffixRegex; + + StringMatcher ITimeZoneExtractorConfiguration.LocationMatcher => LocationMatcher; + + StringMatcher ITimeZoneExtractorConfiguration.TimeZoneMatcher => TimeZoneMatcher; + + List ITimeZoneExtractorConfiguration.AmbiguousTimezoneList => AmbiguousTimezoneList; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishCommonDateTimeParserConfiguration.cs new file mode 100644 index 0000000000..67ac6e89ed --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishCommonDateTimeParserConfiguration.cs @@ -0,0 +1,70 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.DateTime.Swedish.Utilities; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Swedish; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishCommonDateTimeParserConfiguration : BaseDateParserConfiguration, ICommonDateTimeParserConfiguration + { + public SwedishCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + UtilityConfiguration = new SwedishDatetimeUtilityConfiguration(); + + UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); + UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); + SeasonMap = DateTimeDefinitions.SeasonMap.ToImmutableDictionary(); + SpecialYearPrefixesMap = DateTimeDefinitions.SpecialYearPrefixesMap.ToImmutableDictionary(); + CardinalMap = DateTimeDefinitions.CardinalMap.ToImmutableDictionary(); + DayOfWeek = DateTimeDefinitions.DayOfWeek.ToImmutableDictionary(); + MonthOfYear = DateTimeDefinitions.MonthOfYear.ToImmutableDictionary(); + Numbers = DateTimeDefinitions.Numbers.ToImmutableDictionary(); + DoubleNumbers = DateTimeDefinitions.DoubleNumbers.ToImmutableDictionary(); + WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); + SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Swedish.CardinalExtractor.GetInstance(numConfig); + IntegerExtractor = Number.Swedish.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Swedish.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new SwedishNumberParserConfiguration(numConfig)); + + TimeZoneParser = new BaseTimeZoneParser(new SwedishTimeZoneParserConfiguration(this)); + + // Do not change order. The order of initialization can lead to side-effects + DateExtractor = new BaseDateExtractor(new SwedishDateExtractorConfiguration(this)); + TimeExtractor = new BaseTimeExtractor(new SwedishTimeExtractorConfiguration(this)); + DateTimeExtractor = new BaseDateTimeExtractor(new SwedishDateTimeExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new SwedishDurationExtractorConfiguration(this)); + DatePeriodExtractor = new BaseDatePeriodExtractor(new SwedishDatePeriodExtractorConfiguration(this)); + TimePeriodExtractor = new BaseTimePeriodExtractor(new SwedishTimePeriodExtractorConfiguration(this)); + DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new SwedishDateTimePeriodExtractorConfiguration(this)); + + DurationParser = new BaseDurationParser(new SwedishDurationParserConfiguration(this)); + DateParser = new BaseDateParser(new SwedishDateParserConfiguration(this)); + TimeParser = new TimeParser(new SwedishTimeParserConfiguration(this)); + DateTimeParser = new BaseDateTimeParser(new SwedishDateTimeParserConfiguration(this)); + DatePeriodParser = new BaseDatePeriodParser(new SwedishDatePeriodParserConfiguration(this)); + TimePeriodParser = new BaseTimePeriodParser(new SwedishTimePeriodParserConfiguration(this)); + DateTimePeriodParser = new BaseDateTimePeriodParser(new SwedishDateTimePeriodParserConfiguration(this)); + + DateTimeAltParser = new BaseDateTimeAltParser(new SwedishDateTimeAltParserConfiguration(this)); + } + + public override IImmutableDictionary DayOfMonth => BaseDateTime.DayOfMonthDictionary.ToImmutableDictionary().AddRange(DateTimeDefinitions.DayOfMonth); + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDateParserConfiguration.cs new file mode 100644 index 0000000000..497eb437bf --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDateParserConfiguration.cs @@ -0,0 +1,196 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishDateParserConfiguration : BaseDateTimeOptionsConfiguration, IDateParserConfiguration + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public SwedishDateParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + DateTokenPrefix = DateTimeDefinitions.DateTokenPrefix; + + IntegerExtractor = config.IntegerExtractor; + OrdinalExtractor = config.OrdinalExtractor; + CardinalExtractor = config.CardinalExtractor; + NumberParser = config.NumberParser; + DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; + DurationParser = config.DurationParser; + HolidayParser = new BaseHolidayParser(new SwedishHolidayParserConfiguration(this)); + + DateRegexes = new SwedishDateExtractorConfiguration(this).DateRegexList; + OnRegex = SwedishDateExtractorConfiguration.OnRegex; + SpecialDayRegex = SwedishDateExtractorConfiguration.SpecialDayRegex; + SpecialDayWithNumRegex = SwedishDateExtractorConfiguration.SpecialDayWithNumRegex; + NextRegex = SwedishDateExtractorConfiguration.NextDateRegex; + ThisRegex = SwedishDateExtractorConfiguration.ThisRegex; + LastRegex = SwedishDateExtractorConfiguration.LastDateRegex; + UnitRegex = SwedishDateExtractorConfiguration.DateUnitRegex; + WeekDayRegex = SwedishDateExtractorConfiguration.WeekDayRegex; + MonthRegex = SwedishDateExtractorConfiguration.MonthRegex; + WeekDayOfMonthRegex = SwedishDateExtractorConfiguration.WeekDayOfMonthRegex; + ForTheRegex = SwedishDateExtractorConfiguration.ForTheRegex; + WeekDayAndDayOfMothRegex = SwedishDateExtractorConfiguration.WeekDayAndDayOfMothRegex; + WeekDayAndDayRegex = SwedishDateExtractorConfiguration.WeekDayAndDayRegex; + RelativeMonthRegex = SwedishDateExtractorConfiguration.RelativeMonthRegex; + StrictRelativeRegex = SwedishDateExtractorConfiguration.StrictRelativeRegex; + YearSuffix = SwedishDateExtractorConfiguration.YearSuffix; + RelativeWeekDayRegex = SwedishDateExtractorConfiguration.RelativeWeekDayRegex; + BeforeAfterRegex = SwedishDateExtractorConfiguration.BeforeAfterRegex; + + RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + UpcomingPrefixRegex = new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags, RegexTimeOut); + PastPrefixRegex = new Regex(DateTimeDefinitions.PastPrefixRegex, RegexFlags, RegexTimeOut); + + DayOfMonth = config.DayOfMonth; + DayOfWeek = config.DayOfWeek; + MonthOfYear = config.MonthOfYear; + CardinalMap = config.CardinalMap; + UnitMap = config.UnitMap; + UtilityConfiguration = config.UtilityConfiguration; + + SameDayTerms = DateTimeDefinitions.SameDayTerms.ToImmutableList(); + PlusOneDayTerms = DateTimeDefinitions.PlusOneDayTerms.ToImmutableList(); + PlusTwoDayTerms = DateTimeDefinitions.PlusTwoDayTerms.ToImmutableList(); + MinusOneDayTerms = DateTimeDefinitions.MinusOneDayTerms.ToImmutableList(); + MinusTwoDayTerms = DateTimeDefinitions.MinusTwoDayTerms.ToImmutableList(); + } + + public string DateTokenPrefix { get; } + + public IExtractor IntegerExtractor { get; } + + public IExtractor OrdinalExtractor { get; } + + public IExtractor CardinalExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeParser DurationParser { get; } + + public IDateTimeParser HolidayParser { get; } + + public IEnumerable DateRegexes { get; } + + public IImmutableDictionary UnitMap { get; } + + public Regex OnRegex { get; } + + public Regex SpecialDayRegex { get; } + + public Regex SpecialDayWithNumRegex { get; } + + public Regex NextRegex { get; } + + public Regex ThisRegex { get; } + + public Regex LastRegex { get; } + + public Regex UnitRegex { get; } + + public Regex WeekDayRegex { get; } + + public Regex MonthRegex { get; } + + public Regex WeekDayOfMonthRegex { get; } + + public Regex ForTheRegex { get; } + + public Regex WeekDayAndDayOfMothRegex { get; } + + public Regex WeekDayAndDayRegex { get; } + + public Regex RelativeMonthRegex { get; } + + public Regex StrictRelativeRegex { get; } + + public Regex YearSuffix { get; } + + public Regex RelativeWeekDayRegex { get; } + + public Regex RelativeDayRegex { get; } + + public Regex NextPrefixRegex { get; } + + public Regex PreviousPrefixRegex { get; } + + public Regex UpcomingPrefixRegex { get; } + + public Regex PastPrefixRegex { get; } + + public Regex BeforeAfterRegex { get; } + + public Regex TasksModeDurationToDatePatterns { get; } + + public IImmutableDictionary DayOfMonth { get; } + + public IImmutableDictionary DayOfWeek { get; } + + public IImmutableDictionary MonthOfYear { get; } + + public IImmutableDictionary CardinalMap { get; } + + public IImmutableList SameDayTerms { get; } + + public IImmutableList PlusOneDayTerms { get; } + + public IImmutableList MinusOneDayTerms { get; } + + public IImmutableList PlusTwoDayTerms { get; } + + public IImmutableList MinusTwoDayTerms { get; } + + bool IDateParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public int GetSwiftMonthOrYear(string text) + { + var trimmedText = text.Trim(); + var swift = 0; + + if (NextPrefixRegex.IsMatch(trimmedText)) + { + swift = 1; + } + + if (PreviousPrefixRegex.IsMatch(trimmedText)) + { + swift = -1; + } + + return swift; + } + + public bool IsCardinalLast(string text) + { + + // @TODO move hardcoded values to resources file + + var trimmedText = text.Trim(); + + return trimmedText.Equals("last", StringComparison.Ordinal); + } + + public string Normalize(string text) + { + return text; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDatePeriodParserConfiguration.cs new file mode 100644 index 0000000000..2786916a7c --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDatePeriodParserConfiguration.cs @@ -0,0 +1,370 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishDatePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDatePeriodParserConfiguration + { + public static readonly Regex PreviousPrefixRegex = + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ThisPrefixRegex = + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfterNextSuffixRegex = + new Regex(DateTimeDefinitions.AfterNextSuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex RelativeRegex = + new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex UnspecificEndOfRangeRegex = + new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NowParseRegex = + new Regex(DateTimeDefinitions.NowParseRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static IList monthTermsPadded = + DateTimeDefinitions.MonthTerms.Select(str => $" {str} ").ToList(); + + private static IList weekendTermsPadded = + DateTimeDefinitions.WeekendTerms.Select(str => $" {str} ").ToList(); + + private static IList weekTermsPadded = + DateTimeDefinitions.WeekTerms.Select(str => $" {str} ").ToList(); + + private static IList fortnightTermsPadded = + DateTimeDefinitions.FortnightTerms.Select(str => $" {str} ").ToList(); + + private static IList yearTermsPadded = + DateTimeDefinitions.YearTerms.Select(str => $" {str} ").ToList(); + + private static readonly Regex NextPrefixRegex = + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + + public SwedishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + CardinalExtractor = config.CardinalExtractor; + OrdinalExtractor = config.OrdinalExtractor; + IntegerExtractor = config.IntegerExtractor; + NumberParser = config.NumberParser; + DateExtractor = config.DateExtractor; + DurationExtractor = config.DurationExtractor; + DurationParser = config.DurationParser; + DateParser = config.DateParser; + + MonthFrontBetweenRegex = SwedishDatePeriodExtractorConfiguration.MonthFrontBetweenRegex; + BetweenRegex = SwedishDatePeriodExtractorConfiguration.BetweenRegex; + MonthFrontSimpleCasesRegex = SwedishDatePeriodExtractorConfiguration.MonthFrontSimpleCasesRegex; + SimpleCasesRegex = SwedishDatePeriodExtractorConfiguration.SimpleCasesRegex; + OneWordPeriodRegex = SwedishDatePeriodExtractorConfiguration.OneWordPeriodRegex; + MonthWithYear = SwedishDatePeriodExtractorConfiguration.MonthWithYear; + MonthNumWithYear = SwedishDatePeriodExtractorConfiguration.MonthNumWithYear; + YearRegex = SwedishDatePeriodExtractorConfiguration.YearRegex; + PastRegex = SwedishDatePeriodExtractorConfiguration.PreviousPrefixRegex; + FutureRegex = SwedishDatePeriodExtractorConfiguration.NextPrefixRegex; + FutureSuffixRegex = SwedishDatePeriodExtractorConfiguration.FutureSuffixRegex; + NumberCombinedWithUnit = SwedishDurationExtractorConfiguration.NumberCombinedWithDurationUnit; + WeekOfMonthRegex = SwedishDatePeriodExtractorConfiguration.WeekOfMonthRegex; + WeekOfYearRegex = SwedishDatePeriodExtractorConfiguration.WeekOfYearRegex; + QuarterRegex = SwedishDatePeriodExtractorConfiguration.QuarterRegex; + QuarterRegexYearFront = SwedishDatePeriodExtractorConfiguration.QuarterRegexYearFront; + AllHalfYearRegex = SwedishDatePeriodExtractorConfiguration.AllHalfYearRegex; + SeasonRegex = SwedishDatePeriodExtractorConfiguration.SeasonRegex; + WhichWeekRegex = SwedishDatePeriodExtractorConfiguration.WhichWeekRegex; + WeekOfRegex = SwedishDatePeriodExtractorConfiguration.WeekOfRegex; + MonthOfRegex = SwedishDatePeriodExtractorConfiguration.MonthOfRegex; + RestOfDateRegex = SwedishDatePeriodExtractorConfiguration.RestOfDateRegex; + LaterEarlyPeriodRegex = SwedishDatePeriodExtractorConfiguration.LaterEarlyPeriodRegex; + WeekWithWeekDayRangeRegex = SwedishDatePeriodExtractorConfiguration.WeekWithWeekDayRangeRegex; + YearPlusNumberRegex = SwedishDatePeriodExtractorConfiguration.YearPlusNumberRegex; + DecadeWithCenturyRegex = SwedishDatePeriodExtractorConfiguration.DecadeWithCenturyRegex; + YearPeriodRegex = SwedishDatePeriodExtractorConfiguration.YearPeriodRegex; + ComplexDatePeriodRegex = SwedishDatePeriodExtractorConfiguration.ComplexDatePeriodRegex; + RelativeDecadeRegex = SwedishDatePeriodExtractorConfiguration.RelativeDecadeRegex; + InConnectorRegex = config.UtilityConfiguration.InConnectorRegex; + WithinNextPrefixRegex = SwedishDatePeriodExtractorConfiguration.WithinNextPrefixRegex; + ReferenceDatePeriodRegex = SwedishDatePeriodExtractorConfiguration.ReferenceDatePeriodRegex; + AgoRegex = SwedishDatePeriodExtractorConfiguration.AgoRegex; + LaterRegex = SwedishDatePeriodExtractorConfiguration.LaterRegex; + LessThanRegex = SwedishDatePeriodExtractorConfiguration.LessThanRegex; + MoreThanRegex = SwedishDatePeriodExtractorConfiguration.MoreThanRegex; + CenturySuffixRegex = SwedishDatePeriodExtractorConfiguration.CenturySuffixRegex; + FirstLastRegex = SwedishDatePeriodExtractorConfiguration.FirstLastRegex; + OfYearRegex = SwedishDatePeriodExtractorConfiguration.OfYearRegex; + NowRegex = NowParseRegex; + SpecialDayRegex = SwedishDateExtractorConfiguration.SpecialDayRegex; + TodayNowRegex = new Regex(DateTimeDefinitions.TodayNowRegex, RegexOptions.Singleline); + + UnitMap = config.UnitMap; + CardinalMap = config.CardinalMap; + DayOfMonth = config.DayOfMonth; + MonthOfYear = config.MonthOfYear; + SeasonMap = config.SeasonMap; + SpecialYearPrefixesMap = config.SpecialYearPrefixesMap; + WrittenDecades = config.WrittenDecades; + Numbers = config.Numbers; + SpecialDecadeCases = config.SpecialDecadeCases; + } + + public int MinYearNum { get; } + + public int MaxYearNum { get; } + + public string TokenBeforeDate { get; } + + public IDateExtractor DateExtractor { get; } + + public IExtractor CardinalExtractor { get; } + + public IExtractor OrdinalExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IExtractor IntegerExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser DurationParser { get; } + + public Regex MonthFrontBetweenRegex { get; } + + public Regex BetweenRegex { get; } + + public Regex MonthFrontSimpleCasesRegex { get; } + + public Regex SimpleCasesRegex { get; } + + public Regex OneWordPeriodRegex { get; } + + public Regex MonthWithYear { get; } + + public Regex MonthNumWithYear { get; } + + public Regex YearRegex { get; } + + public Regex PastRegex { get; } + + public Regex FutureRegex { get; } + + public Regex FutureSuffixRegex { get; } + + public Regex NumberCombinedWithUnit { get; } + + public Regex WeekOfMonthRegex { get; } + + public Regex WeekOfYearRegex { get; } + + public Regex QuarterRegex { get; } + + public Regex QuarterRegexYearFront { get; } + + public Regex AllHalfYearRegex { get; } + + public Regex SeasonRegex { get; } + + public Regex WhichWeekRegex { get; } + + public Regex WeekOfRegex { get; } + + public Regex MonthOfRegex { get; } + + public Regex InConnectorRegex { get; } + + public Regex WithinNextPrefixRegex { get; } + + public Regex RestOfDateRegex { get; } + + public Regex LaterEarlyPeriodRegex { get; } + + public Regex WeekWithWeekDayRangeRegex { get; } + + public Regex YearPlusNumberRegex { get; } + + public Regex DecadeWithCenturyRegex { get; } + + public Regex YearPeriodRegex { get; } + + public Regex ComplexDatePeriodRegex { get; } + + public Regex RelativeDecadeRegex { get; } + + public Regex ReferenceDatePeriodRegex { get; } + + public Regex AgoRegex { get; } + + public Regex LaterRegex { get; } + + public Regex LessThanRegex { get; } + + public Regex MoreThanRegex { get; } + + public Regex CenturySuffixRegex { get; } + + public Regex NowRegex { get; } + + public Regex SpecialDayRegex { get; } + + public Regex TodayNowRegex { get; } + + public Regex FirstLastRegex { get; } + + public Regex OfYearRegex { get; } + + Regex ISimpleDatePeriodParserConfiguration.RelativeRegex => RelativeRegex; + + Regex IDatePeriodParserConfiguration.NextPrefixRegex => NextPrefixRegex; + + Regex IDatePeriodParserConfiguration.PreviousPrefixRegex => PreviousPrefixRegex; + + Regex IDatePeriodParserConfiguration.ThisPrefixRegex => ThisPrefixRegex; + + Regex IDatePeriodParserConfiguration.UnspecificEndOfRangeRegex => UnspecificEndOfRangeRegex; + + Regex IDatePeriodParserConfiguration.AmbiguousPointRangeRegex => null; + + bool IDatePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public IImmutableDictionary UnitMap { get; } + + public IImmutableDictionary CardinalMap { get; } + + public IImmutableDictionary DayOfMonth { get; } + + public IImmutableDictionary MonthOfYear { get; } + + public IImmutableDictionary SeasonMap { get; } + + public IImmutableDictionary SpecialYearPrefixesMap { get; } + + public IImmutableDictionary WrittenDecades { get; } + + public IImmutableDictionary Numbers { get; } + + public IImmutableDictionary SpecialDecadeCases { get; } + + public IImmutableList InStringList { get; } + + public int GetSwiftDayOrMonth(string text) + { + var swift = 0; + + var trimmedText = text.Trim(); + + if (AfterNextSuffixRegex.IsMatch(trimmedText)) + { + swift = 2; + } + else if (NextPrefixRegex.IsMatch(trimmedText)) + { + swift = 1; + } + else if (PreviousPrefixRegex.IsMatch(trimmedText)) + { + swift = -1; + } + + return swift; + } + + public int GetSwiftYear(string text) + { + var swift = -10; + + var trimmedText = text.Trim(); + + if (AfterNextSuffixRegex.IsMatch(trimmedText)) + { + swift = 2; + } + else if (NextPrefixRegex.IsMatch(trimmedText)) + { + swift = 1; + } + else if (PreviousPrefixRegex.IsMatch(trimmedText)) + { + swift = -1; + } + else if (ThisPrefixRegex.IsMatch(trimmedText)) + { + swift = 0; + } + + return swift; + } + + public bool IsFuture(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.FutureTerms.Any(o => trimmedText.StartsWith(o, StringComparison.Ordinal)); + } + + public bool IsLastCardinal(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.LastCardinalTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } + + public bool IsMonthOnly(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (monthTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); + } + + public bool IsMonthToDate(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } + + public bool IsWeekend(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (weekendTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); + } + + public bool IsWeekOnly(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (weekTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); + } + + public bool IsFortnight(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.FortnightTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (fortnightTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); + } + + public bool IsYearOnly(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || + (yearTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)) || + (DateTimeDefinitions.GenericYearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) && + UnspecificEndOfRangeRegex.IsMatch(trimmedText)); + } + + public bool IsYearToDate(string text) + { + var trimmedText = text.Trim(); + return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDateTimeAltParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDateTimeAltParserConfiguration.cs new file mode 100644 index 0000000000..2d35a99910 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDateTimeAltParserConfiguration.cs @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishDateTimeAltParserConfiguration : IDateTimeAltParserConfiguration + { + public SwedishDateTimeAltParserConfiguration(ICommonDateTimeParserConfiguration config) + { + DateTimeParser = config.DateTimeParser; + DateParser = config.DateParser; + TimeParser = config.TimeParser; + DateTimePeriodParser = config.DateTimePeriodParser; + TimePeriodParser = config.TimePeriodParser; + DatePeriodParser = config.DatePeriodParser; + } + + public IDateTimeParser DateTimeParser { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser TimeParser { get; } + + public IDateTimeParser DateTimePeriodParser { get; } + + public IDateTimeParser TimePeriodParser { get; } + + public IDateTimeParser DatePeriodParser { get; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDateTimeParserConfiguration.cs new file mode 100644 index 0000000000..31dcbe41dc --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDateTimeParserConfiguration.cs @@ -0,0 +1,198 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishDateTimeParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeParserConfiguration + { + public static readonly Regex AmTimeRegex = + new Regex(DateTimeDefinitions.AMTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex PmTimeRegex = + new Regex(DateTimeDefinitions.PMTimeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NightTimeRegex = + new Regex(DateTimeDefinitions.NightTimeRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex NowTimeRegex = + new Regex(DateTimeDefinitions.NowTimeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex RecentlyTimeRegex = + new Regex(DateTimeDefinitions.RecentlyTimeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex AsapTimeRegex = + new Regex(DateTimeDefinitions.AsapTimeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex NextPrefixRegex = + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex PreviousPrefixRegex = + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + + public SwedishDateTimeParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; + + DateExtractor = config.DateExtractor; + TimeExtractor = config.TimeExtractor; + DateParser = config.DateParser; + TimeParser = config.TimeParser; + + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; + + NowRegex = SwedishDateTimeExtractorConfiguration.NowRegex; + + SimpleTimeOfTodayAfterRegex = SwedishDateTimeExtractorConfiguration.SimpleTimeOfTodayAfterRegex; + SimpleTimeOfTodayBeforeRegex = SwedishDateTimeExtractorConfiguration.SimpleTimeOfTodayBeforeRegex; + SpecificTimeOfDayRegex = SwedishDateTimeExtractorConfiguration.SpecificTimeOfDayRegex; + SpecificEndOfRegex = SwedishDateTimeExtractorConfiguration.SpecificEndOfRegex; + UnspecificEndOfRegex = SwedishDateTimeExtractorConfiguration.UnspecificEndOfRegex; + UnitRegex = SwedishTimeExtractorConfiguration.TimeUnitRegex; + DateNumberConnectorRegex = SwedishDateTimeExtractorConfiguration.DateNumberConnectorRegex; + YearRegex = SwedishDateTimeExtractorConfiguration.YearRegex; + + Numbers = config.Numbers; + CardinalExtractor = config.CardinalExtractor; + IntegerExtractor = config.IntegerExtractor; + NumberParser = config.NumberParser; + DurationExtractor = config.DurationExtractor; + DurationParser = config.DurationParser; + UnitMap = config.UnitMap; + UtilityConfiguration = config.UtilityConfiguration; + } + + public string TokenBeforeDate { get; } + + public string TokenBeforeTime { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser TimeParser { get; } + + public IExtractor CardinalExtractor { get; } + + public IExtractor IntegerExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeParser DurationParser { get; } + + public IImmutableDictionary UnitMap { get; } + + public Regex NowRegex { get; } + + public Regex AMTimeRegex => AmTimeRegex; + + public Regex PMTimeRegex => PmTimeRegex; + + public Regex SimpleTimeOfTodayAfterRegex { get; } + + public Regex SimpleTimeOfTodayBeforeRegex { get; } + + public Regex SpecificTimeOfDayRegex { get; } + + public Regex SpecificEndOfRegex { get; } + + public Regex UnspecificEndOfRegex { get; } + + public Regex UnitRegex { get; } + + public Regex DateNumberConnectorRegex { get; } + + public Regex PrepositionRegex { get; } + + public Regex ConnectorRegex { get; } + + public Regex YearRegex { get; } + + public IImmutableDictionary Numbers { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + + public int GetHour(string text, int hour) + { + int result = hour; + + var trimmedText = text.Trim(); + + if (AMTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour >= Constants.HalfDayHourCount) + { + result -= Constants.HalfDayHourCount; + } + else if (!AMTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour < Constants.HalfDayHourCount && + !(NightTimeRegex.MatchEnd(trimmedText, trim: true).Success && hour < Constants.QuarterDayHourCount)) + { + result += Constants.HalfDayHourCount; + } + + return result; + } + + public bool GetMatchedNowTimex(string text, out string timex) + { + var trimmedText = text.Trim(); + + if (NowTimeRegex.MatchEnd(trimmedText, trim: true).Success) + { + timex = "PRESENT_REF"; + } + else if (RecentlyTimeRegex.IsExactMatch(trimmedText, trim: true)) + { + timex = "PAST_REF"; + } + else if (AsapTimeRegex.IsExactMatch(trimmedText, trim: true)) + { + timex = "FUTURE_REF"; + } + else + { + timex = null; + return false; + } + + return true; + } + + public int GetSwiftDay(string text) + { + var trimmedText = text.Trim(); + + var swift = 0; + if (NextPrefixRegex.MatchBegin(trimmedText, trim: true).Success) + { + swift = 1; + } + else if (PreviousPrefixRegex.MatchBegin(trimmedText, trim: true).Success) + { + swift = -1; + } + + return swift; + } + + public bool ContainsAmbiguousToken(string text, string matchedText) => false; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDateTimePeriodParserConfiguration.cs new file mode 100644 index 0000000000..a08b346000 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDateTimePeriodParserConfiguration.cs @@ -0,0 +1,216 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishDateTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimePeriodParserConfiguration + { + public static readonly Regex MorningStartEndRegex = + new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfternoonStartEndRegex = + new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex EveningStartEndRegex = + new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex NightStartEndRegex = + new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public SwedishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; + + DateExtractor = config.DateExtractor; + TimeExtractor = config.TimeExtractor; + DateTimeExtractor = config.DateTimeExtractor; + TimePeriodExtractor = config.TimePeriodExtractor; + CardinalExtractor = config.CardinalExtractor; + DurationExtractor = config.DurationExtractor; + NumberParser = config.NumberParser; + DateParser = config.DateParser; + TimeParser = config.TimeParser; + TimePeriodParser = config.TimePeriodParser; + DurationParser = config.DurationParser; + DateTimeParser = config.DateTimeParser; + TimeZoneParser = config.TimeZoneParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; + + PureNumberFromToRegex = SwedishTimePeriodExtractorConfiguration.PureNumFromTo; + HyphenDateRegex = SwedishDateTimePeriodExtractorConfiguration.HyphenDateRegex; + PureNumberBetweenAndRegex = SwedishTimePeriodExtractorConfiguration.PureNumBetweenAnd; + SpecificTimeOfDayRegex = SwedishDateTimePeriodExtractorConfiguration.PeriodSpecificTimeOfDayRegex; + TimeOfDayRegex = SwedishDateTimeExtractorConfiguration.TimeOfDayRegex; + PreviousPrefixRegex = SwedishDatePeriodExtractorConfiguration.PreviousPrefixRegex; + FutureRegex = SwedishDatePeriodExtractorConfiguration.NextPrefixRegex; + FutureSuffixRegex = SwedishDatePeriodExtractorConfiguration.FutureSuffixRegex; + NumberCombinedWithUnitRegex = SwedishDateTimePeriodExtractorConfiguration.TimeNumberCombinedWithUnit; + UnitRegex = SwedishTimePeriodExtractorConfiguration.TimeUnitRegex; + PeriodTimeOfDayWithDateRegex = SwedishDateTimePeriodExtractorConfiguration.PeriodTimeOfDayWithDateRegex; + RelativeTimeUnitRegex = SwedishDateTimePeriodExtractorConfiguration.RelativeTimeUnitRegex; + RestOfDateTimeRegex = SwedishDateTimePeriodExtractorConfiguration.RestOfDateTimeRegex; + AmDescRegex = SwedishDateTimePeriodExtractorConfiguration.AmDescRegex; + PmDescRegex = SwedishDateTimePeriodExtractorConfiguration.PmDescRegex; + WithinNextPrefixRegex = SwedishDateTimePeriodExtractorConfiguration.WithinNextPrefixRegex; + PrefixDayRegex = SwedishDateTimePeriodExtractorConfiguration.PrefixDayRegex; + BeforeRegex = SwedishDateTimePeriodExtractorConfiguration.BeforeRegex; + AfterRegex = SwedishDateTimePeriodExtractorConfiguration.AfterRegex; + + UnitMap = config.UnitMap; + Numbers = config.Numbers; + } + + public string TokenBeforeDate { get; } + + public string TokenBeforeTime { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IExtractor CardinalExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IParser NumberParser { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeParser TimeParser { get; } + + public IDateTimeParser DateTimeParser { get; } + + public IDateTimeParser TimePeriodParser { get; } + + public IDateTimeParser DurationParser { get; } + + public IDateTimeParser TimeZoneParser { get; } + + public Regex PureNumberFromToRegex { get; } + + public Regex HyphenDateRegex { get; } + + public Regex PureNumberBetweenAndRegex { get; } + + public Regex SpecificTimeOfDayRegex { get; } + + public Regex TimeOfDayRegex { get; } + + public Regex PreviousPrefixRegex { get; } + + public Regex FutureRegex { get; } + + public Regex FutureSuffixRegex { get; } + + public Regex NumberCombinedWithUnitRegex { get; } + + public Regex UnitRegex { get; } + + public Regex PeriodTimeOfDayWithDateRegex { get; } + + public Regex RelativeTimeUnitRegex { get; } + + public Regex RestOfDateTimeRegex { get; } + + public Regex AmDescRegex { get; } + + public Regex PmDescRegex { get; } + + public Regex WithinNextPrefixRegex { get; } + + public Regex PrefixDayRegex { get; } + + public Regex BeforeRegex { get; } + + public Regex AfterRegex { get; } + + bool IDateTimePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + Regex IDateTimePeriodParserConfiguration.TasksmodeMealTimeofDayRegex => null; + + public IImmutableDictionary UnitMap { get; } + + public IImmutableDictionary Numbers { get; } + + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + + // @TODO Move time range resolution to common policy + + public bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMin) + { + var trimmedText = text.Trim(); + + beginHour = 0; + endHour = 0; + endMin = 0; + if (MorningStartEndRegex.IsMatch(trimmedText)) + { + todSymbol = "TMO"; + beginHour = 8; + endHour = Constants.HalfDayHourCount; + } + else if (AfternoonStartEndRegex.IsMatch(trimmedText)) + { + todSymbol = "TAF"; + beginHour = Constants.HalfDayHourCount; + endHour = 16; + } + else if (EveningStartEndRegex.IsMatch(trimmedText)) + { + todSymbol = "TEV"; + beginHour = 16; + endHour = 20; + } + else if (NightStartEndRegex.IsMatch(trimmedText)) + { + todSymbol = "TNI"; + beginHour = 20; + endHour = 23; + endMin = 59; + } + else + { + todSymbol = null; + return false; + } + + return true; + } + + public int GetSwiftPrefix(string text) + { + var trimmedText = text.Trim(); + + // @TODO Move hardcoded terms to resource file + + var swift = 0; + if (trimmedText.StartsWith("next", StringComparison.Ordinal)) + { + swift = 1; + } + else if (trimmedText.StartsWith("last", StringComparison.Ordinal)) + { + swift = -1; + } + + return swift; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDurationParserConfiguration.cs new file mode 100644 index 0000000000..85115286f9 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishDurationParserConfiguration.cs @@ -0,0 +1,85 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishDurationParserConfiguration : BaseDateTimeOptionsConfiguration, IDurationParserConfiguration + { + + public static readonly Regex PrefixArticleRegex = + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public SwedishDurationParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + CardinalExtractor = config.CardinalExtractor; + NumberParser = config.NumberParser; + + DurationExtractor = new BaseDurationExtractor(new SwedishDurationExtractorConfiguration(this), false); + + NumberCombinedWithUnit = SwedishDurationExtractorConfiguration.NumberCombinedWithDurationUnit; + + AnUnitRegex = SwedishDurationExtractorConfiguration.AnUnitRegex; + DuringRegex = SwedishDurationExtractorConfiguration.DuringRegex; + AllDateUnitRegex = SwedishDurationExtractorConfiguration.AllRegex; + HalfDateUnitRegex = SwedishDurationExtractorConfiguration.HalfRegex; + SuffixAndRegex = SwedishDurationExtractorConfiguration.SuffixAndRegex; + FollowedUnit = SwedishDurationExtractorConfiguration.DurationFollowedUnit; + ConjunctionRegex = SwedishDurationExtractorConfiguration.ConjunctionRegex; + InexactNumberRegex = SwedishDurationExtractorConfiguration.InexactNumberRegex; + InexactNumberUnitRegex = SwedishDurationExtractorConfiguration.InexactNumberUnitRegex; + DurationUnitRegex = SwedishDurationExtractorConfiguration.DurationUnitRegex; + SpecialNumberUnitRegex = SwedishDurationExtractorConfiguration.SpecialNumberUnitRegex; + + UnitMap = config.UnitMap; + UnitValueMap = config.UnitValueMap; + DoubleNumbers = config.DoubleNumbers; + } + + public IExtractor CardinalExtractor { get; } + + public IDateTimeExtractor DurationExtractor { get; } + + public IParser NumberParser { get; } + + public Regex NumberCombinedWithUnit { get; } + + public Regex AnUnitRegex { get; } + + Regex IDurationParserConfiguration.PrefixArticleRegex => PrefixArticleRegex; + + public Regex DuringRegex { get; } + + public Regex AllDateUnitRegex { get; } + + public Regex HalfDateUnitRegex { get; } + + public Regex SuffixAndRegex { get; } + + public Regex FollowedUnit { get; } + + public Regex ConjunctionRegex { get; } + + public Regex InexactNumberRegex { get; } + + public Regex InexactNumberUnitRegex { get; } + + public Regex DurationUnitRegex { get; } + + public Regex SpecialNumberUnitRegex { get; } + + bool IDurationParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public IImmutableDictionary UnitMap { get; } + + public IImmutableDictionary UnitValueMap { get; } + + public IImmutableDictionary DoubleNumbers { get; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishHolidayParserConfiguration.cs new file mode 100644 index 0000000000..3b42ab41ae --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishHolidayParserConfiguration.cs @@ -0,0 +1,224 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishHolidayParserConfiguration : BaseHolidayParserConfiguration + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public SwedishHolidayParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + ThisPrefixRegex = new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + + this.HolidayRegexList = SwedishHolidayExtractorConfiguration.HolidayRegexList; + this.HolidayNames = DateTimeDefinitions.HolidayNames.ToImmutableDictionary(); + } + + public Regex ThisPrefixRegex { get; } + + public Regex NextPrefixRegex { get; } + + public Regex PreviousPrefixRegex { get; } + + public override int GetSwiftYear(string text) + { + var trimmedText = text.Trim(); + var swift = -10; + + if (NextPrefixRegex.IsMatch(trimmedText)) + { + swift = 1; + } + else if (PreviousPrefixRegex.IsMatch(trimmedText)) + { + swift = -1; + } + else if (ThisPrefixRegex.IsMatch(trimmedText)) + { + swift = 0; + } + + return swift; + } + + public override string SanitizeHolidayToken(string holiday) + { + return holiday + .Replace("saint ", "st ") + .Replace(" ", string.Empty) + .Replace("'", string.Empty) + .Replace(".", string.Empty) + .Replace("-", string.Empty); + } + + // @TODO Change to auto-generate. + protected override IDictionary> InitHolidayFuncs() + { + return new Dictionary>(base.InitHolidayFuncs()) + { + { "maosbirthday", MaoBirthday }, + { "yuandan", NewYear }, + { "teachersday", TeacherDay }, + { "singleday", SinglesDay }, + { "allsaintsday", HalloweenDay }, + { "youthday", YouthDay }, + { "childrenday", ChildrenDay }, + { "femaleday", FemaleDay }, + { "treeplantingday", TreePlantDay }, + { "arborday", TreePlantDay }, + { "girlsday", GirlsDay }, + { "whiteloverday", WhiteLoverDay }, + { "loverday", ValentinesDay }, + { "christmas", ChristmasDay }, + { "xmas", ChristmasDay }, + { "newyear", NewYear }, + { "newyearday", NewYear }, + { "newyearsday", NewYear }, + { "inaugurationday", InaugurationDay }, + { "groundhougday", GroundhogDay }, + { "valentinesday", ValentinesDay }, + { "stpatrickday", StPatrickDay }, + { "aprilfools", FoolDay }, + { "earthday", EarthDay }, + { "stgeorgeday", StGeorgeDay }, + { "mayday", Mayday }, + { "cincodemayoday", CincoDeMayoDay }, + { "baptisteday", BaptisteDay }, + { "usindependenceday", UsaIndependenceDay }, + { "independenceday", UsaIndependenceDay }, + { "bastilleday", BastilleDay }, + { "halloweenday", HalloweenDay }, + { "allhallowday", AllHallowDay }, + { "allsoulsday", AllSoulsDay }, + { "guyfawkesday", GuyFawkesDay }, + { "veteransday", VeteransDay }, + { "christmaseve", ChristmasEve }, + { "newyeareve", NewYearEve }, + { "easterday", EasterDay }, + { "ashwednesday", AshWednesday }, + { "palmsunday", PalmSunday }, + { "maundythursday", MaundyThursday }, + { "goodfriday", GoodFriday }, + { "eastersaturday", EasterSaturday }, + { "eastermonday", EasterMonday }, + { "ascensionday", AscensionDay }, + { "whitesunday", WhiteSunday }, + { "whitemonday", WhiteMonday }, + { "trinitysunday", TrinitySunday }, + { "corpuschristi", CorpusChristi }, + { "juneteenth", Juneteenth }, + { "ramadan", Ramadan }, + { "sacrifice", Sacrifice }, + { "eidalfitr", EidAlFitr }, + { "islamicnewyear", IslamicNewYear }, + }; + } + + private static DateObject NewYear(int year) => new DateObject(year, 1, 1); + + private static DateObject NewYearEve(int year) => new DateObject(year, 12, 31); + + private static DateObject ChristmasDay(int year) => new DateObject(year, 12, 25); + + private static DateObject ChristmasEve(int year) => new DateObject(year, 12, 24); + + private static DateObject ValentinesDay(int year) => new DateObject(year, 2, 14); + + private static DateObject WhiteLoverDay(int year) => new DateObject(year, 3, 14); + + private static DateObject FoolDay(int year) => new DateObject(year, 4, 1); + + private static DateObject EarthDay(int year) => new DateObject(year, 4, 22); + + private static DateObject GirlsDay(int year) => new DateObject(year, 3, 7); + + private static DateObject TreePlantDay(int year) => new DateObject(year, 3, 12); + + private static DateObject FemaleDay(int year) => new DateObject(year, 3, 8); + + private static DateObject ChildrenDay(int year) => new DateObject(year, 6, 1); + + private static DateObject YouthDay(int year) => new DateObject(year, 5, 4); + + private static DateObject TeacherDay(int year) => new DateObject(year, 9, 10); + + private static DateObject SinglesDay(int year) => new DateObject(year, 11, 11); + + private static DateObject MaoBirthday(int year) => new DateObject(year, 12, 26); + + private static DateObject InaugurationDay(int year) => new DateObject(year, 1, 20); + + private static DateObject GroundhogDay(int year) => new DateObject(year, 2, 2); + + private static DateObject StPatrickDay(int year) => new DateObject(year, 3, 17); + + private static DateObject StGeorgeDay(int year) => new DateObject(year, 4, 23); + + private static DateObject Mayday(int year) => new DateObject(year, 5, 1); + + private static DateObject CincoDeMayoDay(int year) => new DateObject(year, 5, 5); + + private static DateObject BaptisteDay(int year) => new DateObject(year, 6, 24); + + private static DateObject UsaIndependenceDay(int year) => new DateObject(year, 7, 4); + + private static DateObject BastilleDay(int year) => new DateObject(year, 7, 14); + + private static DateObject HalloweenDay(int year) => new DateObject(year, 10, 31); + + private static DateObject AllHallowDay(int year) => new DateObject(year, 11, 1); + + private static DateObject AllSoulsDay(int year) => new DateObject(year, 11, 2); + + private static DateObject GuyFawkesDay(int year) => new DateObject(year, 11, 5); + + private static DateObject VeteransDay(int year) => new DateObject(year, 11, 11); + + private static DateObject Juneteenth(int year) => new DateObject(year, 6, 19); + + private static DateObject EasterDay(int year) => HolidayFunctions.CalculateHolidayByEaster(year); + + private static DateObject AshWednesday(int year) => EasterDay(year).AddDays(-46); + + private static DateObject PalmSunday(int year) => EasterDay(year).AddDays(-7); + + private static DateObject MaundyThursday(int year) => EasterDay(year).AddDays(-3); + + private static DateObject GoodFriday(int year) => EasterDay(year).AddDays(-2); + + private static DateObject EasterSaturday(int year) => EasterDay(year).AddDays(-1); + + private static DateObject EasterMonday(int year) => EasterDay(year).AddDays(1); + + private static DateObject AscensionDay(int year) => EasterDay(year).AddDays(39); + + private static DateObject WhiteSunday(int year) => EasterDay(year).AddDays(49); + + private static DateObject WhiteMonday(int year) => EasterDay(year).AddDays(50); + + private static DateObject TrinitySunday(int year) => EasterDay(year).AddDays(56); + + private static DateObject CorpusChristi(int year) => EasterDay(year).AddDays(60); + + private static DateObject Ramadan(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.Ramadan); + + private static DateObject Sacrifice(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.Sacrifice); + + private static DateObject EidAlFitr(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.EidAlFitr); + + private static DateObject IslamicNewYear(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.NewYear); + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishMergedParserConfiguration.cs new file mode 100644 index 0000000000..75c09c79da --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishMergedParserConfiguration.cs @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.Matcher; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public sealed class SwedishMergedParserConfiguration : SwedishCommonDateTimeParserConfiguration, IMergedParserConfiguration + { + public SwedishMergedParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + BeforeRegex = SwedishMergedExtractorConfiguration.BeforeRegex; + AfterRegex = SwedishMergedExtractorConfiguration.AfterRegex; + SinceRegex = (config.Options & DateTimeOptions.ExperimentalMode) != 0 ? SwedishMergedExtractorConfiguration.SinceRegexExp : + SwedishMergedExtractorConfiguration.SinceRegex; + AroundRegex = SwedishMergedExtractorConfiguration.AroundRegex; + EqualRegex = SwedishMergedExtractorConfiguration.EqualRegex; + SuffixAfter = SwedishMergedExtractorConfiguration.SuffixAfterRegex; + YearRegex = SwedishDatePeriodExtractorConfiguration.YearRegex; + + SuperfluousWordMatcher = SwedishMergedExtractorConfiguration.SuperfluousWordMatcher; + + DatePeriodParser = new BaseDatePeriodParser(new SwedishDatePeriodParserConfiguration(this)); + TimePeriodParser = new BaseTimePeriodParser(new SwedishTimePeriodParserConfiguration(this)); + DateTimePeriodParser = new BaseDateTimePeriodParser(new SwedishDateTimePeriodParserConfiguration(this)); + SetParser = new BaseSetParser(new SwedishSetParserConfiguration(this)); + HolidayParser = new BaseHolidayParser(new SwedishHolidayParserConfiguration(this)); + TimeZoneParser = new BaseTimeZoneParser(new SwedishTimeZoneParserConfiguration(this)); + } + + public Regex BeforeRegex { get; } + + public Regex AfterRegex { get; } + + public Regex SinceRegex { get; } + + public Regex AroundRegex { get; } + + public Regex EqualRegex { get; } + + public Regex SuffixAfter { get; } + + public Regex YearRegex { get; } + + public IDateTimeParser SetParser { get; } + + public IDateTimeParser HolidayParser { get; } + + public StringMatcher SuperfluousWordMatcher { get; } + + bool IMergedParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishSetParserConfiguration.cs new file mode 100644 index 0000000000..879416b753 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishSetParserConfiguration.cs @@ -0,0 +1,177 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishSetParserConfiguration : BaseDateTimeOptionsConfiguration, ISetParserConfiguration + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex DoubleMultiplierRegex = + new Regex(DateTimeDefinitions.DoubleMultiplierRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex HalfMultiplierRegex = + new Regex(DateTimeDefinitions.HalfMultiplierRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex DayTypeRegex = + new Regex(DateTimeDefinitions.DayTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex WeekTypeRegex = + new Regex(DateTimeDefinitions.WeekTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex WeekendTypeRegex = + new Regex(DateTimeDefinitions.WeekendTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex MonthTypeRegex = + new Regex(DateTimeDefinitions.MonthTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex QuarterTypeRegex = + new Regex(DateTimeDefinitions.QuarterTypeRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex YearTypeRegex = + new Regex(DateTimeDefinitions.YearTypeRegex, RegexFlags, RegexTimeOut); + + // pass FutureTerms as List to ReplaceValueInTextWithFutTerm function + private static readonly List ThisTerms = (List)DateTimeDefinitions.FutureTerms; + + public SwedishSetParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + DurationExtractor = config.DurationExtractor; + TimeExtractor = config.TimeExtractor; + DateExtractor = config.DateExtractor; + DateTimeExtractor = config.DateTimeExtractor; + DatePeriodExtractor = config.DatePeriodExtractor; + TimePeriodExtractor = config.TimePeriodExtractor; + DateTimePeriodExtractor = config.DateTimePeriodExtractor; + + DurationParser = config.DurationParser; + TimeParser = config.TimeParser; + DateParser = config.DateParser; + DateTimeParser = config.DateTimeParser; + DatePeriodParser = config.DatePeriodParser; + TimePeriodParser = config.TimePeriodParser; + DateTimePeriodParser = config.DateTimePeriodParser; + UnitMap = config.UnitMap; + + EachPrefixRegex = SwedishSetExtractorConfiguration.EachPrefixRegex; + PeriodicRegex = SwedishSetExtractorConfiguration.PeriodicRegex; + EachUnitRegex = SwedishSetExtractorConfiguration.EachUnitRegex; + EachDayRegex = SwedishSetExtractorConfiguration.EachDayRegex; + SetWeekDayRegex = SwedishSetExtractorConfiguration.SetWeekDayRegex; + SetEachRegex = SwedishSetExtractorConfiguration.SetEachRegex; + } + + public IDateTimeExtractor DurationExtractor { get; } + + public IDateTimeParser DurationParser { get; } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeParser TimeParser { get; } + + public IDateExtractor DateExtractor { get; } + + public IDateTimeParser DateParser { get; } + + public IDateTimeExtractor DateTimeExtractor { get; } + + public IDateTimeParser DateTimeParser { get; } + + public IDateTimeExtractor DatePeriodExtractor { get; } + + public IDateTimeParser DatePeriodParser { get; } + + public IDateTimeExtractor TimePeriodExtractor { get; } + + public IDateTimeParser TimePeriodParser { get; } + + public IDateTimeExtractor DateTimePeriodExtractor { get; } + + public IDateTimeParser DateTimePeriodParser { get; } + + public IImmutableDictionary UnitMap { get; } + + public Regex EachPrefixRegex { get; } + + public Regex PeriodicRegex { get; } + + public Regex EachUnitRegex { get; } + + public Regex EachDayRegex { get; } + + public Regex SetWeekDayRegex { get; } + + public Regex SetEachRegex { get; } + + public bool GetMatchedDailyTimex(string text, out string timex) + { + var trimmedText = text.Trim(); + + float durationLength = 1; // Default value + float multiplier = 1; + string durationType; + + if (DoubleMultiplierRegex.IsMatch(trimmedText)) + { + multiplier = 2; + } + else if (HalfMultiplierRegex.IsMatch(trimmedText)) + { + multiplier = 0.5f; + } + + if (DayTypeRegex.IsMatch(trimmedText)) + { + durationType = "D"; + } + else if (WeekTypeRegex.IsMatch(trimmedText)) + { + durationType = "W"; + } + else if (WeekendTypeRegex.IsMatch(trimmedText)) + { + durationType = "WE"; + } + else if (MonthTypeRegex.IsMatch(trimmedText)) + { + durationType = "M"; + } + else if (QuarterTypeRegex.IsMatch(trimmedText)) + { + durationLength = 3; + durationType = "M"; + } + else if (YearTypeRegex.IsMatch(trimmedText)) + { + durationType = "Y"; + } + else + { + timex = null; + return false; + } + + timex = TimexUtility.GenerateSetTimex(durationType, durationLength, multiplier); + + return true; + } + + public bool GetMatchedUnitTimex(string text, out string timex) + { + return GetMatchedDailyTimex(text, out timex); + } + + public string WeekDayGroupMatchString(Match match) => SetHandler.WeekDayGroupMatchString(match); + + public string ReplaceValueInTextWithFutTerm(string text, string value) => TasksModeSetHandler.ReplaceValueInTextWithFutTerm(text, value, ThisTerms); + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishTimeParserConfiguration.cs new file mode 100644 index 0000000000..7d2576e767 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishTimeParserConfiguration.cs @@ -0,0 +1,191 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Globalization; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishTimeParserConfiguration : BaseDateTimeOptionsConfiguration, ITimeParserConfiguration + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex TimeSuffixFull = + new Regex(DateTimeDefinitions.TimeSuffixFull, RegexFlags, RegexTimeOut); + + private static readonly Regex LunchRegex = + new Regex(DateTimeDefinitions.LunchRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex NightRegex = + new Regex(DateTimeDefinitions.NightRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex HalfTokenRegex = + new Regex(DateTimeDefinitions.HalfTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex QuarterTokenRegex = + new Regex(DateTimeDefinitions.QuarterTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ThreeQuarterTokenRegex = + new Regex(DateTimeDefinitions.ThreeQuarterTokenRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex ToTokenRegex = + new Regex(DateTimeDefinitions.ToTokenRegex, RegexFlags, RegexTimeOut); + + public SwedishTimeParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TimeTokenPrefix = DateTimeDefinitions.TimeTokenPrefix; + AtRegex = SwedishTimeExtractorConfiguration.AtRegex; + TimeRegexes = SwedishTimeExtractorConfiguration.TimeRegexList; + UtilityConfiguration = config.UtilityConfiguration; + Numbers = config.Numbers; + TimeZoneParser = config.TimeZoneParser; + } + + public string TimeTokenPrefix { get; } + + public Regex AtRegex { get; } + + public Regex MealTimeRegex { get; } + + public IEnumerable TimeRegexes { get; } + + public IImmutableDictionary Numbers { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public IDateTimeParser TimeZoneParser { get; } + + public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool hasMin) + { + int deltaMin; + + var trimedPrefix = prefix.Trim(); + + if (HalfTokenRegex.IsMatch(trimedPrefix)) + { + deltaMin = 30; + } + else if (QuarterTokenRegex.IsMatch(trimedPrefix)) + { + deltaMin = 15; + } + else if (ThreeQuarterTokenRegex.IsMatch(trimedPrefix)) + { + deltaMin = 45; + } + else + { + var match = SwedishTimeExtractorConfiguration.LessThanOneHour.Match(trimedPrefix); + var minStr = match.Groups["deltamin"].Value; + if (!string.IsNullOrWhiteSpace(minStr)) + { + deltaMin = int.Parse(minStr, CultureInfo.InvariantCulture); + } + else + { + minStr = match.Groups["deltaminnum"].Value; + deltaMin = Numbers[minStr]; + } + } + + if (ToTokenRegex.IsMatch(trimedPrefix)) + { + deltaMin = -deltaMin; + } + + min += deltaMin; + if (min < 0) + { + min += 60; + hour -= 1; + } + + hasMin = true; + } + + public void AdjustBySuffix(string suffix, ref int hour, ref int min, ref bool hasMin, ref bool hasAm, ref bool hasPm) + { + var deltaHour = 0; + var match = TimeSuffixFull.MatchExact(suffix, trim: true); + + if (match.Success) + { + var oclockStr = match.Groups["oclock"].Value; + if (string.IsNullOrEmpty(oclockStr)) + { + var matchAmStr = match.Groups[Constants.AmGroupName].Value; + if (!string.IsNullOrEmpty(matchAmStr)) + { + if (hour >= Constants.HalfDayHourCount) + { + deltaHour = -Constants.HalfDayHourCount; + } + else + { + hasAm = true; + } + } + + var matchPmStr = match.Groups[Constants.PmGroupName].Value; + if (!string.IsNullOrEmpty(matchPmStr)) + { + if (hour < Constants.HalfDayHourCount) + { + deltaHour = Constants.HalfDayHourCount; + } + + if (LunchRegex.IsMatch(matchPmStr)) + { + if (hour >= 10 && hour <= Constants.HalfDayHourCount) + { + deltaHour = 0; + if (hour == Constants.HalfDayHourCount) + { + hasPm = true; + } + else + { + hasAm = true; + } + } + else + { + hasPm = true; + } + } + else if (NightRegex.IsMatch(matchPmStr)) + { + if (hour <= 3 || hour == Constants.HalfDayHourCount) + { + if (hour == Constants.HalfDayHourCount) + { + hour = 0; + } + + deltaHour = 0; + hasAm = true; + } + else + { + hasPm = true; + } + } + else + { + hasPm = true; + } + } + } + } + + hour = (hour + deltaHour) % 24; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishTimePeriodParserConfiguration.cs new file mode 100644 index 0000000000..b45d82f524 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishTimePeriodParserConfiguration.cs @@ -0,0 +1,133 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, ITimePeriodParserConfiguration + { + public SwedishTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration config) + : base(config) + { + TimeExtractor = config.TimeExtractor; + IntegerExtractor = config.IntegerExtractor; + TimeParser = config.TimeParser; + TimeZoneParser = config.TimeZoneParser; + + PureNumberFromToRegex = SwedishTimePeriodExtractorConfiguration.PureNumFromTo; + PureNumberBetweenAndRegex = SwedishTimePeriodExtractorConfiguration.PureNumBetweenAnd; + SpecificTimeFromToRegex = SwedishTimePeriodExtractorConfiguration.SpecificTimeFromTo; + SpecificTimeBetweenAndRegex = SwedishTimePeriodExtractorConfiguration.SpecificTimeBetweenAnd; + TimeOfDayRegex = SwedishTimePeriodExtractorConfiguration.TimeOfDayRegex; + GeneralEndingRegex = SwedishTimePeriodExtractorConfiguration.GeneralEndingRegex; + TillRegex = SwedishTimePeriodExtractorConfiguration.TillRegex; + + Numbers = config.Numbers; + UtilityConfiguration = config.UtilityConfiguration; + } + + public IDateTimeExtractor TimeExtractor { get; } + + public IDateTimeParser TimeParser { get; } + + public IExtractor IntegerExtractor { get; } + + public IDateTimeParser TimeZoneParser { get; } + + public Regex SpecificTimeFromToRegex { get; } + + public Regex SpecificTimeBetweenAndRegex { get; } + + public Regex PureNumberFromToRegex { get; } + + public Regex PureNumberBetweenAndRegex { get; } + + public Regex TimeOfDayRegex { get; } + + public Regex GeneralEndingRegex { get; } + + public Regex TillRegex { get; } + + public IImmutableDictionary Numbers { get; } + + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + + public bool GetMatchedTimeRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) + { + var trimmedText = text.Trim(); + if (trimmedText.EndsWith("s", StringComparison.Ordinal)) + { + trimmedText = trimmedText.Substring(0, trimmedText.Length - 1); + } + + beginHour = 0; + endHour = 0; + endMin = 0; + + var timeOfDay = string.Empty; + if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Morning; + } + else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Afternoon; + } + else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Evening; + } + else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Daytime; + } + else if (DateTimeDefinitions.NighttimeTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Nighttime; + } + else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal))) + { + timeOfDay = Constants.Night; + } + else if (DateTimeDefinitions.BusinessHourSplitStrings.All(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.BusinessHour; + } + else if (DateTimeDefinitions.MealtimeBreakfastTermList.Any(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.MealtimeBreakfast; + } + else if (DateTimeDefinitions.MealtimeBrunchTermList.Any(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.MealtimeBrunch; + } + else if (DateTimeDefinitions.MealtimeLunchTermList.Any(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.MealtimeLunch; + } + else if (DateTimeDefinitions.MealtimeDinnerTermList.Any(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.MealtimeDinner; + } + else + { + timex = null; + return false; + } + + var parseResult = TimexUtility.ResolveTimeOfDay(timeOfDay); + timex = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMin = parseResult.EndMin; + + return true; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishTimeZoneParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishTimeZoneParserConfiguration.cs new file mode 100644 index 0000000000..ae48a64643 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/SwedishTimeZoneParserConfiguration.cs @@ -0,0 +1,33 @@ +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class SwedishTimeZoneParserConfiguration : BaseDateTimeOptionsConfiguration, ITimeZoneParserConfiguration + { + public static readonly string TimeZoneEndRegex = TimeZoneDefinitions.TimeZoneEndRegex; + + public static readonly Dictionary FullToMinMapping = TimeZoneDefinitions.FullToMinMapping; + + public static readonly Regex DirectUtcRegex = + new Regex(TimeZoneDefinitions.DirectUtcRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline); + + public static readonly Dictionary AbbrToMinMapping = TimeZoneDefinitions.AbbrToMinMapping; + + public SwedishTimeZoneParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + } + + string ITimeZoneParserConfiguration.TimeZoneEndRegex => TimeZoneEndRegex; + + Dictionary ITimeZoneParserConfiguration.FullToMinMapping => FullToMinMapping; + + Regex ITimeZoneParserConfiguration.DirectUtcRegex => DirectUtcRegex; + + Dictionary ITimeZoneParserConfiguration.AbbrToMinMapping => AbbrToMinMapping; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/TimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/TimeParser.cs new file mode 100644 index 0000000000..a116dc34ad --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Parsers/TimeParser.cs @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +using Microsoft.Recognizers.Text.Utilities; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish +{ + public class TimeParser : BaseTimeParser + { + public TimeParser(ITimeParserConfiguration configuration) + : base(configuration) + { + } + + protected override DateTimeResolutionResult InternalParse(string text, DateObject referenceTime) + { + var innerResult = base.InternalParse(text, referenceTime); + if (!innerResult.Success) + { + innerResult = ParseIsh(text, referenceTime); + } + + return innerResult; + } + + // parse "noonish", "11-ish" + private DateTimeResolutionResult ParseIsh(string text, DateObject referenceTime) + { + var ret = new DateTimeResolutionResult(); + var lowerText = text; + + var match = SwedishTimeExtractorConfiguration.IshRegex.MatchExact(lowerText, trim: true); + + if (match.Success) + { + var hourStr = match.Groups[Constants.HourGroupName].Value; + var hour = Constants.HalfDayHourCount; + if (!string.IsNullOrEmpty(hourStr)) + { + hour = int.Parse(hourStr, CultureInfo.InvariantCulture); + } + + ret.Timex = "T" + hour.ToString("D2", CultureInfo.InvariantCulture); + ret.FutureValue = + ret.PastValue = + DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day, hour, 0, 0); + ret.Success = true; + return ret; + } + + return ret; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Utilities/SwedishDatetimeUtilityConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Utilities/SwedishDatetimeUtilityConfiguration.cs new file mode 100644 index 0000000000..3fc97c0486 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Utilities/SwedishDatetimeUtilityConfiguration.cs @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Text.DateTime.Utilities; + +namespace Microsoft.Recognizers.Text.DateTime.Swedish.Utilities +{ + public class SwedishDatetimeUtilityConfiguration : BaseDatetimeUtilityConfiguration + { + public SwedishDatetimeUtilityConfiguration() + : base( + DateTimeDefinitions.AgoRegex, + DateTimeDefinitions.LaterRegex, + DateTimeDefinitions.InConnectorRegex, + DateTimeDefinitions.SinceYearSuffixRegex, + DateTimeDefinitions.WithinNextPrefixRegex, + DateTimeDefinitions.AmDescRegex, + DateTimeDefinitions.PmDescRegex, + DateTimeDefinitions.AmPmDescRegex, + DateTimeDefinitions.RangeUnitRegex, + DateTimeDefinitions.TimeUnitRegex, + DateTimeDefinitions.DateUnitRegex, + DateTimeDefinitions.CommonDatePrefixRegex, + DateTimeDefinitions.RangePrefixRegex, + RegexOptions.Singleline | RegexOptions.ExplicitCapture, + DateTimeDefinitions.CheckBothBeforeAfter) + { + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/TasksModeConstants.cs b/.NET/Microsoft.Recognizers.Text.DateTime/TasksModeConstants.cs new file mode 100644 index 0000000000..312b09ce05 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/TasksModeConstants.cs @@ -0,0 +1,85 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Diagnostics.CodeAnalysis; +using System.Globalization; + +using Microsoft.Recognizers.Definitions; + +namespace Microsoft.Recognizers.Text.DateTime +{ + [SuppressMessage("StyleCop.CSharp.NamingRules", "SA1310: CSharp.Naming : Field names must not contain underscores.", Justification = "Constant names are written in upper case so they can be readily distinguished from camel case variable names.")] + public static class TasksModeConstants + { + // These are some particular values for timezone recognition + public const int WeekDayCount = 7; + + // Hours in a half day + public const int HalfDayHourCount = 12; + + // Default boundaries for time of day resolution under TasksMode + public const int EarlyMorningBeginHour = 6; + public const int EarlyMorningEndHour = 6; + public const int MorningBeginHour = 6; + public const int MorningEndHour = 6; + public const int MidDayBeginHour = 12; + public const int MidDayEndHour = 12; + public const int AfternoonBeginHour = 12; + public const int AfternoonEndHour = 12; + public const int EveningBeginHour = 18; + public const int EveningEndHour = 18; + public const int DaytimeBeginHour = 16; + public const int DaytimeEndHour = 16; + public const int NighttimeBeginHour = 21; + public const int NighttimeEndHour = 21; + public const int BusinessBeginHour = 8; + public const int BusinessEndHour = 18; + public const int NightBeginHour = 21; + public const int NightEndHour = 21; + public const int NightEndMin = 0; + public const int MealtimeBreakfastBeginHour = 8; + public const int MealtimeBreakfastEndHour = 12; + public const int MealtimeBrunchBeginHour = 8; + public const int MealtimeBrunchEndHour = 12; + public const int MealtimeLunchBeginHour = 11; + public const int MealtimeLunchEndHour = 13; + public const int MealtimeDinnerBeginHour = 20; + public const int MealtimeDinnerEndHour = 21; + + // constants + public const string PeriodDaySuffix = "P1D"; + public const string YearlyPeriodSuffix = "P1Y"; + public const string WeeklyPeriodSuffix = "P1W"; + public const string DailyPeriodPrefix = "P1"; + public const string AlternatePeriodPrefix = "P2"; + public const string WeekEndPrefix = "WE"; + public const string WeekDayPrefix = "WD"; + + // Fuzzy timex + public const string FuzzyYearAndMonth = "XXXX-XX-"; + public const string FuzzyYearAndWeek = "XXXX-WXX-"; + public const string FuzzyYear = "XXXX-"; + + // Default time in String format + public const string StringMorningHHMMSS = "06:00:00"; + public const string StringAfternoonHHMMSS = "12:00:00"; + public const string StringEveningHHMMSS = "18:00:00"; + public const string StringNightHHMMSS = "21:00:00"; + + // timex pattern to be extracted by SetHandler + public const string PeriodString = "period"; + public const string AmountString = "amount"; + public const string DateUnitString = "dateUnit"; + + // TasksMode Additional units and constants in SetParser + public const string KeyIntSize = "intervalSize"; + public const string KeyIntType = "intervalType"; + public const string KeySetTypeName = "setTypename"; + public const string KeySet = "Set: "; + public const string NextWeekGroupName = "next week"; + + // TasksMode SetParser Date calucation constant from value + public const int IntDateStartIdx = 0; + public const int IntDateEndIdx = 10; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/TimeTypeConstants.cs b/.NET/Microsoft.Recognizers.Text.DateTime/TimeTypeConstants.cs index f9f453a5bb..51e5a469d3 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/TimeTypeConstants.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/TimeTypeConstants.cs @@ -1,4 +1,7 @@ -using System.Diagnostics.CodeAnalysis; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Diagnostics.CodeAnalysis; namespace Microsoft.Recognizers.Text.DateTime { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateExtractorConfiguration.cs index 5e7ad94c6d..cc58f383bd 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; @@ -14,103 +17,103 @@ public class TurkishDateExtractorConfiguration : BaseDateTimeOptionsConfiguratio { public static readonly Regex MonthRegex = - new Regex(DateTimeDefinitions.MonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SingleWeekDayRegex = - new Regex(DateTimeDefinitions.SingleWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SingleWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex OnRegex = - new Regex(DateTimeDefinitions.OnRegex, RegexFlags); + new Regex(DateTimeDefinitions.OnRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelaxedOnRegex = - new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelaxedOnRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisRegex = - new Regex(DateTimeDefinitions.ThisRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisRegex, RegexFlags, RegexTimeOut); public static readonly Regex LastDateRegex = - new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.LastDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextDateRegex = - new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDayRegex = - new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayOfMonthRegex = - new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeWeekDayRegex = - new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDate = - new Regex(DateTimeDefinitions.SpecialDate, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDate, RegexFlags, RegexTimeOut); public static readonly Regex SpecialDayWithNumRegex = - new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecialDayWithNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex ForTheRegex = - new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags); + new Regex(DateTimeDefinitions.ForTheRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayOfMothRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayAndDayRegex = - new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayAndDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex StrictRelativeRegex = - new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.StrictRelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixArticleRegex = - new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); public static readonly Regex OfMonth = - new Regex(DateTimeDefinitions.OfMonth, RegexFlags); + new Regex(DateTimeDefinitions.OfMonth, RegexFlags, RegexTimeOut); public static readonly Regex MonthEnd = - new Regex(DateTimeDefinitions.MonthEnd, RegexFlags); + new Regex(DateTimeDefinitions.MonthEnd, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayEnd = - new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayEnd, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayStart = - new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayStart, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeConnectorSymbolRegex = - new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags); + new Regex(Definitions.BaseDateTime.RangeConnectorSymbolRegex, RegexFlags, RegexTimeOut); public static readonly ImmutableDictionary DayOfWeek = DateTimeDefinitions.DayOfWeek.ToImmutableDictionary(); @@ -118,19 +121,32 @@ public class TurkishDateExtractorConfiguration : BaseDateTimeOptionsConfiguratio public static readonly ImmutableDictionary MonthOfYear = DateTimeDefinitions.MonthOfYear.ToImmutableDictionary(); + public static readonly Regex BeforeAfterRegex = + new Regex(DateTimeDefinitions.BeforeAfterRegex, RegexFlags, RegexTimeOut); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.ImplicitDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.ImplicitDayRegex, RegexFlags, RegexTimeOut); public TurkishDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + IntegerExtractor = Number.Turkish.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Turkish.OrdinalExtractor.GetInstance(); + OrdinalExtractor = Number.Turkish.OrdinalExtractor.GetInstance(numConfig); - NumberParser = new BaseNumberParser(new TurkishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + NumberParser = new BaseNumberParser(new TurkishNumberParserConfiguration(new BaseNumberOptionsConfiguration(numConfig))); DurationExtractor = new BaseDurationExtractor(new TurkishDurationExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new TurkishHolidayExtractorConfiguration(this)); UtilityConfiguration = new TurkishDatetimeUtilityConfiguration(); ImplicitDateList = new List @@ -175,45 +191,45 @@ public TurkishDateExtractorConfiguration(IDateTimeOptionsConfiguration config) } // Gelecek Pazar (1 Nisan 2016) - var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags); + var dateRegex4 = new Regex(DateTimeDefinitions.DateExtractor4, RegexFlags, RegexTimeOut); // 23-3-2015 (,Pazar|(Pazar))? - var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags); + var dateRegex5 = new Regex(DateTimeDefinitions.DateExtractor5, RegexFlags, RegexTimeOut); // Gelecek Pazar (1-1-2016) - var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags); + var dateRegex6 = new Regex(DateTimeDefinitions.DateExtractor6, RegexFlags, RegexTimeOut); // 6 Nisan'da or Altı Nisan'da - var dateRegex7 = new Regex(DateTimeDefinitions.DateExtractor7, RegexFlags); + var dateRegex7 = new Regex(DateTimeDefinitions.DateExtractor7, RegexFlags, RegexTimeOut); // 2015 yılı Nisan'ın 6'sı(nda)? (Pazar)? - var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags); + var dateRegex8 = new Regex(DateTimeDefinitions.DateExtractor8, RegexFlags, RegexTimeOut); // 6'ncı Çarşamba or Altıncı Çarşamba - var dateRegex9 = new Regex(DateTimeDefinitions.DateExtractor9, RegexFlags); + var dateRegex9 = new Regex(DateTimeDefinitions.DateExtractor9, RegexFlags, RegexTimeOut); // "(Sunday,)? 7/23, 2018", year part is required - var dateRegex7L = new Regex(DateTimeDefinitions.DateExtractor7L, RegexFlags); + var dateRegex7L = new Regex(DateTimeDefinitions.DateExtractor7L, RegexFlags, RegexTimeOut); // "(Sunday,)? 7/23", year part is not required - var dateRegex7S = new Regex(DateTimeDefinitions.DateExtractor7S, RegexFlags); + var dateRegex7S = new Regex(DateTimeDefinitions.DateExtractor7S, RegexFlags, RegexTimeOut); // "(Sunday,)? 23/7, 2018", year part is required - var dateRegex9L = new Regex(DateTimeDefinitions.DateExtractor9L, RegexFlags); + var dateRegex9L = new Regex(DateTimeDefinitions.DateExtractor9L, RegexFlags, RegexTimeOut); // "(Sunday,)? 23/7", year part is not required - var dateRegex9S = new Regex(DateTimeDefinitions.DateExtractor9S, RegexFlags); + var dateRegex9S = new Regex(DateTimeDefinitions.DateExtractor9S, RegexFlags, RegexTimeOut); // (Sunday,)? 2015-12-23 - var dateRegexA = new Regex(DateTimeDefinitions.DateExtractorA, RegexFlags); + var dateRegexA = new Regex(DateTimeDefinitions.DateExtractorA, RegexFlags, RegexTimeOut); DateRegexList = new List { // 5 Nisan (Pazar|(Pazar)|,Pazar)? or 5 Nisan 2016 (Pazar|(Pazar)|,Pazar)? - new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor1, RegexFlags, RegexTimeOut), // Gelecek ayın 6'sı(nda)? (Pazar)? or Gelecek ayın altısı(nda)? (Pazar)? - new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags), + new Regex(DateTimeDefinitions.DateExtractor3, RegexFlags, RegexTimeOut), }; var enableDmy = DmyDateFormat || @@ -234,6 +250,8 @@ public TurkishDateExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DurationExtractor { get; } + public IDateTimeExtractor HolidayExtractor { get; } + public IDateTimeUtilityConfiguration UtilityConfiguration { get; } public IEnumerable ImplicitDateList { get; } @@ -281,5 +299,7 @@ public TurkishDateExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IDateExtractorConfiguration.RangeUnitRegex => RangeUnitRegex; Regex IDateExtractorConfiguration.RangeConnectorSymbolRegex => RangeConnectorSymbolRegex; + + Regex IDateExtractorConfiguration.BeforeAfterRegex => BeforeAfterRegex; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDatePeriodExtractorConfiguration.cs index e9f7f0fbb3..a3b4c47d64 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDatePeriodExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; @@ -13,169 +16,175 @@ public class TurkishDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfig { // Base regexes public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeConnectorRegex = - new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumRegex = - new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex IllegalYearRegex = - new Regex(BaseDateTime.IllegalYearRegex, RegexFlags); + new Regex(BaseDateTime.IllegalYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDayRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeMonthRegex = - new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WrittenMonthRegex = - new Regex(DateTimeDefinitions.WrittenMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WrittenMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthSuffixRegex = - new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex FutureSuffixRegex = - new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.FutureSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangePrefixRegex = - new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); // composite regexes public static readonly Regex SimpleCasesRegex = - new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleCasesRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthFrontSimpleCasesRegex = - new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontSimpleCasesRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthFrontBetweenRegex = - new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthFrontBetweenRegex, RegexFlags, RegexTimeOut); public static readonly Regex BetweenRegex = - new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags); + new Regex(DateTimeDefinitions.BetweenRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthWithYear = - new Regex(DateTimeDefinitions.MonthWithYear, RegexFlags); + new Regex(DateTimeDefinitions.MonthWithYear, RegexFlags, RegexTimeOut); public static readonly Regex OneWordPeriodRegex = - new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.OneWordPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthNumWithYear = - new Regex(DateTimeDefinitions.MonthNumWithYear, RegexFlags); + new Regex(DateTimeDefinitions.MonthNumWithYear, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfMonthRegex = - new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfYearRegex = - new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex FollowedDateUnit = - new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.FollowedDateUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithDateUnit = - new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags); + new Regex(DateTimeDefinitions.NumberCombinedWithDateUnit, RegexFlags, RegexTimeOut); public static readonly Regex QuarterRegex = - new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegex, RegexFlags, RegexTimeOut); public static readonly Regex QuarterRegexYearFront = - new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags); + new Regex(DateTimeDefinitions.QuarterRegexYearFront, RegexFlags, RegexTimeOut); public static readonly Regex AllHalfYearRegex = - new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllHalfYearRegex, RegexFlags, RegexTimeOut); public static readonly Regex SeasonRegex = - new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags); + new Regex(DateTimeDefinitions.SeasonRegex, RegexFlags, RegexTimeOut); public static readonly Regex WhichWeekRegex = - new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags); + new Regex(DateTimeDefinitions.WhichWeekRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekOfRegex = - new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex MonthOfRegex = - new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.MonthOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RestOfDateRegex = - new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterEarlyPeriodRegex = - new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekWithWeekDayRangeRegex = - new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekWithWeekDayRangeRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPlusNumberRegex = - new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPlusNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex DecadeWithCenturyRegex = - new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags); + new Regex(DateTimeDefinitions.DecadeWithCenturyRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearPeriodRegex = - new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearPeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex ComplexDatePeriodRegex = - new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ComplexDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDecadeRegex = - new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDecadeRegex, RegexFlags, RegexTimeOut); public static readonly Regex ReferenceDatePeriodRegex = - new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.ReferenceDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoRegex, RegexFlags); + new Regex(DateTimeDefinitions.AgoRegex, RegexFlags, RegexTimeOut); public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); + new Regex(DateTimeDefinitions.LaterRegex, RegexFlags, RegexTimeOut); public static readonly Regex LessThanRegex = - new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.LessThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex MoreThanRegex = - new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags); + new Regex(DateTimeDefinitions.MoreThanRegex, RegexFlags, RegexTimeOut); public static readonly Regex CenturySuffixRegex = - new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.CenturySuffixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex FirstLastRegex = + new Regex(DateTimeDefinitions.FirstLastRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex OfYearRegex = + new Regex(DateTimeDefinitions.OfYearRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex ExcludeSuffixRegex = - new Regex(DateTimeDefinitions.ExcludeSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ExcludeSuffixRegex, RegexFlags, RegexTimeOut); private static readonly Regex FromRegex = - new Regex(DateTimeDefinitions.FromRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); private static readonly Regex[] SimpleCasesRegexes = { @@ -250,10 +259,20 @@ public TurkishDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con : base(config) { DatePointExtractor = new BaseDateExtractor(new TurkishDateExtractorConfiguration(this)); - CardinalExtractor = Number.Turkish.CardinalExtractor.GetInstance(); - OrdinalExtractor = Number.Turkish.OrdinalExtractor.GetInstance(); DurationExtractor = new BaseDurationExtractor(new TurkishDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new TurkishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Turkish.CardinalExtractor.GetInstance(); + OrdinalExtractor = Number.Turkish.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new TurkishNumberParserConfiguration(numConfig)); } public IDateExtractor DatePointExtractor { get; } @@ -320,6 +339,10 @@ public TurkishDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con Regex IDatePeriodExtractorConfiguration.NowRegex => NowRegex; + Regex IDatePeriodExtractorConfiguration.FirstLastRegex => FirstLastRegex; + + Regex IDatePeriodExtractorConfiguration.OfYearRegex => OfYearRegex; + bool IDatePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; string[] IDatePeriodExtractorConfiguration.DurationDateRestrictions => DateTimeDefinitions.DurationDateRestrictions; @@ -339,14 +362,6 @@ public bool GetFromTokenIndex(string text, out int index) public bool GetBetweenTokenIndex(string text, out int index) { index = -1; - var match = RangePrefixRegex.MatchEnd(text, false); - - if (match.Success) - { - index = match.Index; - return true; - } - string textTrm = text; // do not include the suffix in textTrm @@ -358,7 +373,7 @@ public bool GetBetweenTokenIndex(string text, out int index) textTrm = textTrm.TrimStart(); int diff = text.Length - textTrm.Length; - match = RangePrefixRegex.MatchBegin(textTrm, false); + var match = RangePrefixRegex.MatchBegin(textTrm, false); if (match.Success) { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateTimeAltExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateTimeAltExtractorConfiguration.cs index d723fcb999..1c99c8ea7b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateTimeAltExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateTimeAltExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; @@ -7,22 +10,22 @@ namespace Microsoft.Recognizers.Text.DateTime.Turkish public class TurkishDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeAltExtractorConfiguration { public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); public static readonly Regex RangePrefixRegex = - new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] RelativePrefixList = { @@ -37,10 +40,10 @@ public class TurkishDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfi private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex OrRegex = - new Regex(DateTimeDefinitions.OrRegex, RegexFlags); + new Regex(DateTimeDefinitions.OrRegex, RegexFlags, RegexTimeOut); private static readonly Regex DayRegex = - new Regex(DateTimeDefinitions.DayRegex, RegexFlags); + new Regex(DateTimeDefinitions.DayRegex, RegexFlags, RegexTimeOut); public TurkishDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateTimeExtractorConfiguration.cs index 67d45ff7b2..06743b1daf 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateTimeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; using Microsoft.Recognizers.Text.DateTime.Turkish.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -8,63 +11,63 @@ namespace Microsoft.Recognizers.Text.DateTime.Turkish public class TurkishDateTimeExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeExtractorConfiguration { public static readonly Regex PrepositionRegex = - new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); public static readonly Regex NowRegex = - new Regex(DateTimeDefinitions.NowRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfDayRegex = - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayAfterRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex SimpleTimeOfTodayBeforeRegex = - new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.SimpleTimeOfTodayBeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificEndOfRegex = - new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectorRegex = - new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex NumberAsTimeRegex = - new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.NumberAsTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateNumberConnectorRegex = - new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateNumberConnectorRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex YearSuffix = - new Regex(DateTimeDefinitions.YearSuffix, RegexFlags); + new Regex(DateTimeDefinitions.YearSuffix, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex ExcludeSuffixRegex = - new Regex(DateTimeDefinitions.ExcludeSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ExcludeSuffixDateTime, RegexFlags, RegexTimeOut); public TurkishDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) @@ -74,6 +77,8 @@ public TurkishDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration confi TimePointExtractor = new BaseTimeExtractor(new TurkishTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new TurkishDurationExtractorConfiguration(this)); UtilityConfiguration = new TurkishDatetimeUtilityConfiguration(); + HolidayExtractor = new BaseHolidayExtractor(new TurkishHolidayExtractorConfiguration(this)); + } public IExtractor IntegerExtractor { get; } @@ -84,6 +89,8 @@ public TurkishDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration confi public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + public IDateTimeExtractor HolidayExtractor { get; } + Regex IDateTimeExtractorConfiguration.NowRegex => NowRegex; Regex IDateTimeExtractorConfiguration.SuffixRegex => SuffixRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateTimePeriodExtractorConfiguration.cs index 2ec3e5cf71..fd6d4492c6 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateTimePeriodExtractorConfiguration.cs @@ -1,6 +1,10 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Turkish; using Microsoft.Recognizers.Text.Utilities; @@ -10,54 +14,57 @@ public class TurkishDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsCo IDateTimePeriodExtractorConfiguration { public static readonly Regex TimeNumberCombinedWithUnit = - new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); + + public static readonly Regex HyphenDateRegex = + new Regex(BaseDateTime.HyphenDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodTimeOfDayWithDateRegex = - new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodTimeOfDayWithDateRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeTimeUnitRegex = - new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeTimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RestOfDateTimeRegex = - new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RestOfDateTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags, RegexTimeOut); public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrefixDayRegex = new Regex(DateTimeDefinitions.PrefixDayRegex, RegexFlags | RegexOptions.RightToLeft); public static readonly Regex SuffixRegex = - new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex WeekDaysRegex = - new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.WeekDayRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex FromRegex = - new Regex(DateTimeDefinitions.FromRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); private static readonly Regex ExcludeSuffixRegex = - new Regex(DateTimeDefinitions.ExcludeSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ExcludeSuffixRegex, RegexFlags, RegexTimeOut); private static readonly Regex RangePrefixRegex = - new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); private static readonly Regex[] SimpleCases = { @@ -66,30 +73,30 @@ public class TurkishDateTimePeriodExtractorConfiguration : BaseDateTimeOptionsCo }; private static readonly Regex PeriodTimeOfDayRegex = - new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodTimeOfDayRegex, RegexFlags, RegexTimeOut); private static readonly Regex PeriodSpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); private static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); private static readonly Regex TimeFollowedUnit = - new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); private static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); private static readonly Regex MiddlePauseRegex = - new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags); + new Regex(DateTimeDefinitions.MiddlePauseRegex, RegexFlags, RegexTimeOut); private static readonly Regex RangeConnectorRegex = - new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); public TurkishDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; + TokenBeforeDate = DateTimeDefinitions.TokenListBeforeDate; CardinalExtractor = Number.Turkish.CardinalExtractor.GetInstance(); SingleDateExtractor = new BaseDateExtractor(new TurkishDateExtractorConfiguration(this)); @@ -97,6 +104,8 @@ public TurkishDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration SingleDateTimeExtractor = new BaseDateTimeExtractor(new TurkishDateTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new TurkishDurationExtractorConfiguration(this)); TimePeriodExtractor = new BaseTimePeriodExtractor(new TurkishTimePeriodExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new TurkishHolidayExtractorConfiguration(this)); + } public IEnumerable SimpleCasesRegex => SimpleCases; @@ -151,6 +160,8 @@ public TurkishDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration bool IDateTimePeriodExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + Regex IDateTimePeriodExtractorConfiguration.TasksmodeMealTimeofDayRegex => null; + public string TokenBeforeDate { get; } public IExtractor CardinalExtractor { get; } @@ -167,6 +178,8 @@ public TurkishDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration public IDateTimeExtractor TimeZoneExtractor { get; } + public IDateTimeExtractor HolidayExtractor { get; } + // TODO: these three methods are the same in DatePeriod, should be abstracted public bool GetFromTokenIndex(string text, out int index) { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDurationExtractorConfiguration.cs index e19ad8ef3b..a3d33ae438 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDurationExtractorConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; @@ -8,43 +12,49 @@ namespace Microsoft.Recognizers.Text.DateTime.Turkish public class TurkishDurationExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDurationExtractorConfiguration { public static readonly Regex DurationUnitRegex = - new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAndRegex = - new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAndRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationFollowedUnit = - new Regex(DateTimeDefinitions.DurationFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.DurationFollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex NumberCombinedWithDurationUnit = - new Regex(DateTimeDefinitions.NumberCombinedWithDurationUnit, RegexFlags); + new Regex(DateTimeDefinitions.NumberCombinedWithDurationUnit, RegexFlags, RegexTimeOut); public static readonly Regex AnUnitRegex = - new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.AnUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DuringRegex = - new Regex(DateTimeDefinitions.DuringRegex, RegexFlags); + new Regex(DateTimeDefinitions.DuringRegex, RegexFlags, RegexTimeOut); public static readonly Regex AllRegex = - new Regex(DateTimeDefinitions.AllRegex, RegexFlags); + new Regex(DateTimeDefinitions.AllRegex, RegexFlags, RegexTimeOut); public static readonly Regex HalfRegex = - new Regex(DateTimeDefinitions.HalfRegex, RegexFlags); + new Regex(DateTimeDefinitions.HalfRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConjunctionRegex = - new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConjunctionRegex, RegexFlags, RegexTimeOut); public static readonly Regex InexactNumberRegex = - new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberRegex, RegexFlags, RegexTimeOut); public static readonly Regex InexactNumberUnitRegex = - new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.InexactNumberUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeDurationUnitRegex = - new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeDurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex DurationConnectorRegex = - new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationConnectorRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModPrefixRegex = + new Regex(DateTimeDefinitions.ModPrefixRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex ModSuffixRegex = + new Regex(DateTimeDefinitions.ModSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecialNumberUnitRegex = null; @@ -103,5 +113,11 @@ public TurkishDurationExtractorConfiguration(IDateTimeOptionsConfiguration confi Regex IDurationExtractorConfiguration.MoreThanRegex => MoreThanRegex; Regex IDurationExtractorConfiguration.LessThanRegex => LessThanRegex; + + Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex; + + Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex; + + public Dictionary AmbiguityFiltersDict => null; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishHolidayExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishHolidayExtractorConfiguration.cs index 040ccdc092..650b55cfda 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishHolidayExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishHolidayExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; @@ -8,16 +11,16 @@ namespace Microsoft.Recognizers.Text.DateTime.Turkish public class TurkishHolidayExtractorConfiguration : BaseDateTimeOptionsConfiguration, IHolidayExtractorConfiguration { public static readonly Regex YearRegex = - new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); public static readonly Regex H1 = - new Regex(DateTimeDefinitions.HolidayRegex1, RegexFlags); + new Regex(DateTimeDefinitions.HolidayRegex1, RegexFlags, RegexTimeOut); public static readonly Regex H2 = - new Regex(DateTimeDefinitions.HolidayRegex2, RegexFlags); + new Regex(DateTimeDefinitions.HolidayRegex2, RegexFlags, RegexTimeOut); public static readonly Regex H3 = - new Regex(DateTimeDefinitions.HolidayRegex3, RegexFlags); + new Regex(DateTimeDefinitions.HolidayRegex3, RegexFlags, RegexTimeOut); public static readonly Regex[] HolidayRegexList = { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishMergedExtractorConfiguration.cs index 9567234481..eebe262240 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishMergedExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Turkish; @@ -10,51 +13,54 @@ namespace Microsoft.Recognizers.Text.DateTime.Turkish public class TurkishMergedExtractorConfiguration : BaseDateTimeOptionsConfiguration, IMergedExtractorConfiguration { public static readonly Regex BeforeRegex = - new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags); + new Regex(DateTimeDefinitions.BeforeRegexWithAnchor, RegexFlags, RegexTimeOut); public static readonly Regex AfterRegex = - new Regex(DateTimeDefinitions.AfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterRegexWithAnchor, RegexFlags, RegexTimeOut); public static readonly Regex SinceRegex = - new Regex(DateTimeDefinitions.SinceRegex, RegexFlags); + new Regex(DateTimeDefinitions.SinceRegexWithAnchor, RegexFlags, RegexTimeOut); public static readonly Regex AroundRegex = - new Regex(DateTimeDefinitions.AroundRegex, RegexFlags); + new Regex(DateTimeDefinitions.AroundRegex, RegexFlags, RegexTimeOut); public static readonly Regex EqualRegex = - new Regex(BaseDateTime.EqualRegex, RegexFlags); + new Regex(BaseDateTime.EqualRegex, RegexFlags, RegexTimeOut); public static readonly Regex FromToRegex = - new Regex(DateTimeDefinitions.FromToRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromToRegex, RegexFlags, RegexTimeOut); public static readonly Regex SingleAmbiguousMonthRegex = - new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags); + new Regex(DateTimeDefinitions.SingleAmbiguousMonthRegex, RegexFlags, RegexTimeOut); public static readonly Regex PrepositionSuffixRegex = - new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmbiguousRangeModifierPrefix = - new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags); + new Regex(DateTimeDefinitions.AmbiguousRangeModifierPrefix, RegexFlags, RegexTimeOut); public static readonly Regex NumberEndingPattern = - new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags); + new Regex(DateTimeDefinitions.NumberEndingPattern, RegexFlags, RegexTimeOut); public static readonly Regex SuffixAfterRegex = - new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.SuffixAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificDatePeriodRegex = - new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags, RegexTimeOut); public static readonly Regex FailFastRegex = new Regex(DateTimeDefinitions.FailFastRegex, RegexFlags | RegexOptions.Compiled); + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags, RegexTimeOut); + public static readonly Regex[] TermFilterRegexes = { // one on one - new Regex(DateTimeDefinitions.OneOnOneRegex, RegexFlags), + new Regex(DateTimeDefinitions.OneOnOneRegex, RegexFlags, RegexTimeOut), // (the)? (day|week|month|year) - new Regex(DateTimeDefinitions.SingleAmbiguousTermsRegex, RegexFlags), + new Regex(DateTimeDefinitions.SingleAmbiguousTermsRegex, RegexFlags, RegexTimeOut), }; public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); @@ -138,10 +144,17 @@ public TurkishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + Regex IMergedExtractorConfiguration.FailFastRegex => FailFastRegex; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; StringMatcher IMergedExtractorConfiguration.SuperfluousWordMatcher => SuperfluousWordMatcher; + + bool IMergedExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + + public Regex TasksModeMentionFilters { get; } + } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishSetExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishSetExtractorConfiguration.cs index cb51d51cf0..e0a8007771 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishSetExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishSetExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; @@ -9,28 +12,28 @@ namespace Microsoft.Recognizers.Text.DateTime.Turkish public class TurkishSetExtractorConfiguration : BaseDateTimeOptionsConfiguration, ISetExtractorConfiguration { public static readonly Regex SetUnitRegex = - new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DurationUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodicRegex = - new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodicRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachUnitRegex = - new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachPrefixRegex = - new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetLastRegex = - new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetLastRegex, RegexFlags, RegexTimeOut); public static readonly Regex EachDayRegex = - new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.EachDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetWeekDayRegex = - new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetWeekDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SetEachRegex = - new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags); + new Regex(DateTimeDefinitions.SetEachRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -60,6 +63,8 @@ public TurkishSetExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DateTimePeriodExtractor { get; } + bool ISetExtractorConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + Regex ISetExtractorConfiguration.LastRegex => SetLastRegex; Regex ISetExtractorConfiguration.EachPrefixRegex => EachPrefixRegex; @@ -70,7 +75,7 @@ public TurkishSetExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex ISetExtractorConfiguration.EachDayRegex => EachDayRegex; - Regex ISetExtractorConfiguration.BeforeEachDayRegex => null; + Regex ISetExtractorConfiguration.BeforeEachDayRegex => EachDayRegex; Regex ISetExtractorConfiguration.SetWeekDayRegex => SetWeekDayRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishTimeExtractorConfiguration.cs index 1a978945dc..3f2809ebfa 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishTimeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; @@ -10,114 +13,114 @@ public class TurkishTimeExtractorConfiguration : BaseDateTimeOptionsConfiguratio // part 1: smallest component // -------------------------------------- public static readonly Regex DescRegex = - new Regex(DateTimeDefinitions.DescRegex, RegexFlags); + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); public static readonly Regex HourNumRegex = - new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.HourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex MinuteNumRegex = - new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.MinuteNumRegex, RegexFlags, RegexTimeOut); // part 2: middle level component // -------------------------------------- // handle "... o'clock" public static readonly Regex OclockRegex = - new Regex(DateTimeDefinitions.OclockRegex, RegexFlags); + new Regex(DateTimeDefinitions.OclockRegex, RegexFlags, RegexTimeOut); // handle "... afternoon" public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); // handle "... in the morning" public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); // handle "half past ..." "a quarter to ..." // rename 'min' group to 'deltamin' public static readonly Regex LessThanOneHour = - new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags); + new Regex(DateTimeDefinitions.LessThanOneHour, RegexFlags, RegexTimeOut); // handle "six thirty", "six twenty one" public static readonly Regex WrittenTimeRegex = - new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.WrittenTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimePrefix = - new Regex(DateTimeDefinitions.TimePrefix, RegexFlags); + new Regex(DateTimeDefinitions.TimePrefix, RegexFlags, RegexTimeOut); public static readonly Regex TimeSuffix = - new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags); + new Regex(DateTimeDefinitions.TimeSuffix, RegexFlags, RegexTimeOut); public static readonly Regex BasicTime = - new Regex(DateTimeDefinitions.BasicTime, RegexFlags); + new Regex(DateTimeDefinitions.BasicTime, RegexFlags, RegexTimeOut); // handle special time such as 'at midnight', 'midnight', 'midday' public static readonly Regex MidnightRegex = - new Regex(DateTimeDefinitions.MidnightRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidnightRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidmorningRegex = - new Regex(DateTimeDefinitions.MidmorningRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidmorningRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidafternoonRegex = - new Regex(DateTimeDefinitions.MidafternoonRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidafternoonRegex, RegexFlags, RegexTimeOut); public static readonly Regex MiddayRegex = - new Regex(DateTimeDefinitions.MiddayRegex, RegexFlags); + new Regex(DateTimeDefinitions.MiddayRegex, RegexFlags, RegexTimeOut); public static readonly Regex MidTimeRegex = - new Regex(DateTimeDefinitions.MidTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.MidTimeRegex, RegexFlags, RegexTimeOut); // part 3: regex for time // -------------------------------------- // handle "at four" "at 3" public static readonly Regex AtRegex = - new Regex(DateTimeDefinitions.AtRegex, RegexFlags); + new Regex(DateTimeDefinitions.AtRegex, RegexFlags, RegexTimeOut); public static readonly Regex IshRegex = - new Regex(DateTimeDefinitions.IshRegex, RegexFlags); + new Regex(DateTimeDefinitions.IshRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex ConnectNumRegex = - new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.ConnectNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeBeforeAfterRegex = - new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeBeforeAfterRegex, RegexFlags, RegexTimeOut); public static readonly Regex[] TimeRegexList = { // (three min past)? seven|7|(seven thirty) pm - new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex1, RegexFlags, RegexTimeOut), // (three min past)? 3:00(:00)? (pm)? - new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex2, RegexFlags, RegexTimeOut), // (three min past)? 3.00 (pm) - new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex3, RegexFlags, RegexTimeOut), // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) - new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex4, RegexFlags, RegexTimeOut), // (three min past) (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex5, RegexFlags, RegexTimeOut), // (five thirty|seven|7|7:00(:00)?) (pm)? (in the night) - new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex6, RegexFlags, RegexTimeOut), // (in the night) at? (five thirty|seven|7|7:00(:00)?) (pm)? - new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex7, RegexFlags, RegexTimeOut), - new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex9, RegexFlags, RegexTimeOut), // (three min past)? 3h00 (pm)? - new Regex(DateTimeDefinitions.TimeRegex10, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex10, RegexFlags, RegexTimeOut), // at 2.30, "at" prefix is required here // 3.30pm, "am/pm" suffix is required here - new Regex(DateTimeDefinitions.TimeRegex11, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex11, RegexFlags, RegexTimeOut), // saat 12'de öğleden sonra - new Regex(DateTimeDefinitions.TimeRegex12, RegexFlags), + new Regex(DateTimeDefinitions.TimeRegex12, RegexFlags, RegexTimeOut), // 340pm ConnectNumRegex, @@ -142,5 +145,9 @@ public TurkishTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeExtractor DurationExtractor { get; } public IDateTimeExtractor TimeZoneExtractor { get; } + + public string TimeTokenPrefix => DateTimeDefinitions.TimeTokenPrefix; + + public Dictionary AmbiguityFiltersDict => null; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishTimePeriodExtractorConfiguration.cs index 81bb2ec190..2806fdd2e3 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishTimePeriodExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; @@ -11,69 +14,69 @@ namespace Microsoft.Recognizers.Text.DateTime.Turkish public class TurkishTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfiguration, ITimePeriodExtractorConfiguration { public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); + new Regex(DateTimeDefinitions.TillRegex, RegexFlags, RegexTimeOut); public static readonly Regex HourRegex = - new Regex(DateTimeDefinitions.HourRegex, RegexFlags); + new Regex(DateTimeDefinitions.HourRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodHourNumRegex = - new Regex(DateTimeDefinitions.PeriodHourNumRegex, RegexFlags); + new Regex(DateTimeDefinitions.PeriodHourNumRegex, RegexFlags, RegexTimeOut); public static readonly Regex PeriodDescRegex = - new Regex(DateTimeDefinitions.DescRegex, RegexFlags); + new Regex(DateTimeDefinitions.DescRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmRegex = - new Regex(DateTimeDefinitions.PmRegex, RegexFlags); + new Regex(DateTimeDefinitions.PmRegex, RegexFlags, RegexTimeOut); public static readonly Regex AmRegex = - new Regex(DateTimeDefinitions.AmRegex, RegexFlags); + new Regex(DateTimeDefinitions.AmRegex, RegexFlags, RegexTimeOut); public static readonly Regex PureNumFromTo = - new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags); + new Regex(DateTimeDefinitions.PureNumFromTo, RegexFlags, RegexTimeOut); public static readonly Regex PureNumBetweenAnd = - new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.PureNumBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeFromTo = - new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeFromTo, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeBetweenAnd = - new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeBetweenAnd, RegexFlags, RegexTimeOut); public static readonly Regex PrepositionRegex = - new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags); + new Regex(DateTimeDefinitions.PrepositionRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeOfDayRegex = - new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex SpecificTimeOfDayRegex = - new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags); + new Regex(DateTimeDefinitions.SpecificTimeOfDayRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags, RegexTimeOut); public static readonly Regex TimeFollowedUnit = - new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeFollowedUnit, RegexFlags, RegexTimeOut); public static readonly Regex TimeNumberCombinedWithUnit = - new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags); + new Regex(DateTimeDefinitions.TimeNumberCombinedWithUnit, RegexFlags, RegexTimeOut); public static readonly Regex GeneralEndingRegex = - new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags); + new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex FromRegex = - new Regex(DateTimeDefinitions.FromRegex, RegexFlags); + new Regex(DateTimeDefinitions.FromRegex, RegexFlags, RegexTimeOut); private static readonly Regex ExcludeSuffixRegex = - new Regex(DateTimeDefinitions.ExcludeSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ExcludeSuffixRegex, RegexFlags, RegexTimeOut); private static readonly Regex RangePrefixRegex = - new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags, RegexTimeOut); private static readonly Regex RangeConnectorRegex = - new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags); + new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags, RegexTimeOut); public TurkishTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) @@ -153,5 +156,7 @@ public bool IsConnectorToken(string text) { return RangeConnectorRegex.IsExactMatch(text, trim: true); } + + public List ApplyPotentialPeriodAmbiguityHotfix(string text, List timePeriodErs) => TimePeriodFunctions.ApplyPotentialPeriodAmbiguityHotfix(text, timePeriodErs); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TimeParser.cs index d1ee06a1ea..f4e9335adf 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TimeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TimeParser.cs @@ -1,4 +1,10 @@ -using Microsoft.Recognizers.Text.Utilities; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +using Microsoft.Recognizers.Text.Utilities; + using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Turkish @@ -35,10 +41,10 @@ private DateTimeResolutionResult ParseIsh(string text, DateObject referenceTime) var hour = Constants.HalfDayHourCount; if (!string.IsNullOrEmpty(hourStr)) { - hour = int.Parse(hourStr); + hour = int.Parse(hourStr, CultureInfo.InvariantCulture); } - ret.Timex = "T" + hour.ToString("D2"); + ret.Timex = "T" + hour.ToString("D2", CultureInfo.InvariantCulture); ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day, hour, 0, 0); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishCommonDateTimeParserConfiguration.cs index 7c28345ec0..20f5c74d0e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishCommonDateTimeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Turkish; using Microsoft.Recognizers.Text.DateTime.Turkish.Utilities; @@ -26,13 +29,24 @@ public TurkishCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration co WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + CardinalExtractor = Number.Turkish.CardinalExtractor.GetInstance(); IntegerExtractor = Number.Turkish.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Turkish.OrdinalExtractor.GetInstance(); + OrdinalExtractor = Number.Turkish.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new TurkishNumberParserConfiguration(numConfig)); + + TimeZoneParser = new BaseTimeZoneParser(new TurkishTimeZoneParserConfiguration(this)); - TimeZoneParser = new BaseTimeZoneParser(); - NumberParser = new BaseNumberParser(new TurkishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); DateExtractor = new BaseDateExtractor(new TurkishDateExtractorConfiguration(this)); + HolidayExtractor = new BaseHolidayExtractor(new TurkishHolidayExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new TurkishTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new TurkishDateTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new TurkishDurationExtractorConfiguration(this)); @@ -41,6 +55,7 @@ public TurkishCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration co DateTimePeriodExtractor = new BaseDateTimePeriodExtractor(new TurkishDateTimePeriodExtractorConfiguration(this)); DurationParser = new BaseDurationParser(new TurkishDurationParserConfiguration(this)); DateParser = new BaseDateParser(new TurkishDateParserConfiguration(this)); + HolidayTimeParser = new BaseHolidayParser(new TurkishHolidayParserConfiguration(this)); TimeParser = new TimeParser(new TurkishTimeParserConfiguration(this)); DateTimeParser = new BaseDateTimeParser(new TurkishDateTimeParserConfiguration(this)); DatePeriodParser = new BaseDatePeriodParser(new TurkishDatePeriodParserConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateParserConfiguration.cs index 449317100b..eb3b6d1d64 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateParserConfiguration.cs @@ -1,13 +1,20 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Turkish { public class TurkishDateParserConfiguration : BaseDateTimeOptionsConfiguration, IDateParserConfiguration { + public static readonly Regex LastTokenRegex = + new Regex(DateTimeDefinitions.LastRegex, RegexFlags, RegexTimeOut); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public TurkishDateParserConfiguration(ICommonDateTimeParserConfiguration config) @@ -21,6 +28,7 @@ public TurkishDateParserConfiguration(ICommonDateTimeParserConfiguration config) DurationExtractor = config.DurationExtractor; DateExtractor = config.DateExtractor; DurationParser = config.DurationParser; + HolidayParser = new BaseHolidayParser(new TurkishHolidayParserConfiguration(this)); DateRegexes = new TurkishDateExtractorConfiguration(this).DateRegexList; OnRegex = TurkishDateExtractorConfiguration.OnRegex; SpecialDayRegex = TurkishDateExtractorConfiguration.SpecialDayRegex; @@ -39,12 +47,13 @@ public TurkishDateParserConfiguration(ICommonDateTimeParserConfiguration config) StrictRelativeRegex = TurkishDateExtractorConfiguration.StrictRelativeRegex; YearSuffix = TurkishDateExtractorConfiguration.YearSuffix; RelativeWeekDayRegex = TurkishDateExtractorConfiguration.RelativeWeekDayRegex; + BeforeAfterRegex = TurkishDateExtractorConfiguration.BeforeAfterRegex; - RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexFlags); - NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); - PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); - UpcomingPrefixRegex = new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags); - PastPrefixRegex = new Regex(DateTimeDefinitions.PastPrefixRegex, RegexFlags); + RelativeDayRegex = new Regex(DateTimeDefinitions.RelativeDayRegex, RegexFlags, RegexTimeOut); + NextPrefixRegex = new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); + PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); + UpcomingPrefixRegex = new Regex(DateTimeDefinitions.UpcomingPrefixRegex, RegexFlags, RegexTimeOut); + PastPrefixRegex = new Regex(DateTimeDefinitions.PastPrefixRegex, RegexFlags, RegexTimeOut); DayOfMonth = config.DayOfMonth; DayOfWeek = config.DayOfWeek; @@ -76,6 +85,8 @@ public TurkishDateParserConfiguration(ICommonDateTimeParserConfiguration config) public IDateTimeParser DurationParser { get; } + public IDateTimeParser HolidayParser { get; } + public IEnumerable DateRegexes { get; } public IImmutableDictionary UnitMap { get; } @@ -124,6 +135,10 @@ public TurkishDateParserConfiguration(ICommonDateTimeParserConfiguration config) public Regex PastPrefixRegex { get; } + public Regex BeforeAfterRegex { get; } + + public Regex TasksModeDurationToDatePatterns { get; } + public IImmutableDictionary DayOfMonth { get; } public IImmutableDictionary DayOfWeek { get; } @@ -166,8 +181,7 @@ public int GetSwiftMonthOrYear(string text) public bool IsCardinalLast(string text) { - var trimmedText = text.Trim(); - return trimmedText.Equals("last"); + return LastTokenRegex.IsExactMatch(text, trim: true); } public string Normalize(string text) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDatePeriodParserConfiguration.cs index abfbd9d539..9888a1b17c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDatePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; @@ -10,19 +14,19 @@ namespace Microsoft.Recognizers.Text.DateTime.Turkish public class TurkishDatePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDatePeriodParserConfiguration { public static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex ThisPrefixRegex = - new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfterNextSuffixRegex = - new Regex(DateTimeDefinitions.AfterNextSuffixRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfterNextSuffixRegex, RegexFlags, RegexTimeOut); public static readonly Regex RelativeRegex = - new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags, RegexTimeOut); public static readonly Regex UnspecificEndOfRangeRegex = - new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags); + new Regex(DateTimeDefinitions.UnspecificEndOfRangeRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -39,7 +43,7 @@ public class TurkishDatePeriodParserConfiguration : BaseDateTimeOptionsConfigura DateTimeDefinitions.YearTerms.Select(str => $" {str} ").ToList(); private static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); public TurkishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) @@ -92,6 +96,10 @@ public TurkishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c MoreThanRegex = TurkishDatePeriodExtractorConfiguration.MoreThanRegex; CenturySuffixRegex = TurkishDatePeriodExtractorConfiguration.CenturySuffixRegex; NowRegex = TurkishDatePeriodExtractorConfiguration.NowRegex; + FirstLastRegex = TurkishDatePeriodExtractorConfiguration.FirstLastRegex; + OfYearRegex = TurkishDatePeriodExtractorConfiguration.OfYearRegex; + SpecialDayRegex = TurkishDateExtractorConfiguration.SpecialDayRegex; + TodayNowRegex = new Regex(DateTimeDefinitions.TodayNowRegex, RegexOptions.Singleline); UnitMap = config.UnitMap; CardinalMap = config.CardinalMap; @@ -202,6 +210,14 @@ public TurkishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c public Regex NowRegex { get; } + public Regex SpecialDayRegex { get; } + + public Regex TodayNowRegex { get; } + + public Regex FirstLastRegex { get; } + + public Regex OfYearRegex { get; } + Regex ISimpleDatePeriodParserConfiguration.RelativeRegex => RelativeRegex; Regex IDatePeriodParserConfiguration.NextPrefixRegex => NextPrefixRegex; @@ -212,6 +228,8 @@ public TurkishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c Regex IDatePeriodParserConfiguration.UnspecificEndOfRangeRegex => UnspecificEndOfRangeRegex; + Regex IDatePeriodParserConfiguration.AmbiguousPointRangeRegex => null; + bool IDatePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; public IImmutableDictionary UnitMap { get; } @@ -285,54 +303,59 @@ public int GetSwiftYear(string text) public bool IsFuture(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.FutureTerms.Any(o => trimmedText.StartsWith(o)); + return DateTimeDefinitions.FutureTerms.Any(o => trimmedText.StartsWith(o, StringComparison.Ordinal)); } public bool IsLastCardinal(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.LastCardinalTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.LastCardinalTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } public bool IsMonthOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (monthTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); } public bool IsMonthToDate(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.MonthToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } public bool IsWeekend(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (weekendTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); } public bool IsWeekOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (weekTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText) && !weekendTermsPadded.Any(o => trimmedText.Contains(o))); } + public bool IsFortnight(string text) + { + return false; + } + public bool IsYearOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o)) || + return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) || (yearTermsPadded.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)) || - (DateTimeDefinitions.GenericYearTerms.Any(o => trimmedText.EndsWith(o)) && UnspecificEndOfRangeRegex.IsMatch(trimmedText)); + (DateTimeDefinitions.GenericYearTerms.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal)) && UnspecificEndOfRangeRegex.IsMatch(trimmedText)); } public bool IsYearToDate(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o)); + return DateTimeDefinitions.YearToDateTerms.Any(o => trimmedText.Equals(o, StringComparison.Ordinal)); } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateTimeAltParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateTimeAltParserConfiguration.cs index 6b0fa968d9..323d6ddfa2 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateTimeAltParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateTimeAltParserConfiguration.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.DateTime.Turkish +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.DateTime.Turkish { public class TurkishDateTimeAltParserConfiguration : IDateTimeAltParserConfiguration { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateTimeParserConfiguration.cs index 108a87984e..c0e51b6909 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateTimeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -9,27 +12,27 @@ namespace Microsoft.Recognizers.Text.DateTime.Turkish public class TurkishDateTimeParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeParserConfiguration { public static readonly Regex AmTimeRegex = - new Regex(DateTimeDefinitions.AMTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.AMTimeRegex, RegexFlags, RegexTimeOut); public static readonly Regex PmTimeRegex = - new Regex(DateTimeDefinitions.PMTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.PMTimeRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex NowTimeRegex = - new Regex(DateTimeDefinitions.NowTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.NowTimeRegex, RegexFlags, RegexTimeOut); private static readonly Regex RecentlyTimeRegex = - new Regex(DateTimeDefinitions.RecentlyTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.RecentlyTimeRegex, RegexFlags, RegexTimeOut); private static readonly Regex AsapTimeRegex = - new Regex(DateTimeDefinitions.AsapTimeRegex, RegexFlags); + new Regex(DateTimeDefinitions.AsapTimeRegex, RegexFlags, RegexTimeOut); private static readonly Regex NextPrefixRegex = - new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags, RegexTimeOut); private static readonly Regex PreviousPrefixRegex = - new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags, RegexTimeOut); public TurkishDateTimeParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) @@ -42,6 +45,9 @@ public TurkishDateTimeParserConfiguration(ICommonDateTimeParserConfiguration con DateParser = config.DateParser; TimeParser = config.TimeParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; + NowRegex = TurkishDateTimeExtractorConfiguration.NowRegex; SimpleTimeOfTodayAfterRegex = TurkishDateTimeExtractorConfiguration.SimpleTimeOfTodayAfterRegex; @@ -117,6 +123,10 @@ public TurkishDateTimeParserConfiguration(ICommonDateTimeParserConfiguration con public IDateTimeUtilityConfiguration UtilityConfiguration { get; } + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + public int GetHour(string text, int hour) { int result = hour; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateTimePeriodParserConfiguration.cs index 9a9e7b874f..e847a51ec1 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDateTimePeriodParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; @@ -8,16 +11,16 @@ namespace Microsoft.Recognizers.Text.DateTime.Turkish public class TurkishDateTimePeriodParserConfiguration : BaseDateTimeOptionsConfiguration, IDateTimePeriodParserConfiguration { public static readonly Regex MorningStartEndRegex = - new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.MorningStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex AfternoonStartEndRegex = - new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.AfternoonStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex EveningStartEndRegex = - new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.EveningStartEndRegex, RegexFlags, RegexTimeOut); public static readonly Regex NightStartEndRegex = - new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags); + new Regex(DateTimeDefinitions.NightStartEndRegex, RegexFlags, RegexTimeOut); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -25,6 +28,7 @@ public TurkishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati : base(config) { TokenBeforeDate = DateTimeDefinitions.TokenListBeforeDate; + TokenBeforeTime = DateTimeDefinitions.TokenBeforeTime; DateExtractor = config.DateExtractor; TimeExtractor = config.TimeExtractor; @@ -39,8 +43,11 @@ public TurkishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati DurationParser = config.DurationParser; DateTimeParser = config.DateTimeParser; TimeZoneParser = config.TimeZoneParser; + HolidayExtractor = config.HolidayExtractor; + HolidayTimeParser = config.HolidayTimeParser; PureNumberFromToRegex = TurkishTimePeriodExtractorConfiguration.PureNumFromTo; + HyphenDateRegex = TurkishDateTimePeriodExtractorConfiguration.HyphenDateRegex; PureNumberBetweenAndRegex = TurkishTimePeriodExtractorConfiguration.PureNumBetweenAnd; SpecificTimeOfDayRegex = TurkishDateTimeExtractorConfiguration.SpecificTimeOfDayRegex; TimeOfDayRegex = TurkishDateTimeExtractorConfiguration.TimeOfDayRegex; @@ -65,6 +72,8 @@ public TurkishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati public string TokenBeforeDate { get; } + public string TokenBeforeTime { get; } + public IDateExtractor DateExtractor { get; } public IDateTimeExtractor TimeExtractor { get; } @@ -93,6 +102,8 @@ public TurkishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati public Regex PureNumberFromToRegex { get; } + public Regex HyphenDateRegex { get; } + public Regex PureNumberBetweenAndRegex { get; } public Regex SpecificTimeOfDayRegex { get; } @@ -129,48 +140,52 @@ public TurkishDateTimePeriodParserConfiguration(ICommonDateTimeParserConfigurati bool IDateTimePeriodParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + Regex IDateTimePeriodParserConfiguration.TasksmodeMealTimeofDayRegex => null; + public IImmutableDictionary UnitMap { get; } public IImmutableDictionary Numbers { get; } - public bool GetMatchedTimeRange(string text, out string timeStr, out int beginHour, out int endHour, out int endMin) + public IDateTimeExtractor HolidayExtractor { get; } + + public IDateTimeParser HolidayTimeParser { get; } + + public bool GetMatchedTimeRange(string text, out string todSymbol, out int beginHour, out int endHour, out int endMin) { var trimmedText = text.Trim(); beginHour = 0; endHour = 0; endMin = 0; + if (MorningStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TMO"; - beginHour = 8; - endHour = Constants.HalfDayHourCount; + todSymbol = Constants.Morning; } else if (AfternoonStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TAF"; - beginHour = Constants.HalfDayHourCount; - endHour = 16; + todSymbol = Constants.Afternoon; } else if (EveningStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TEV"; - beginHour = 16; - endHour = 20; + todSymbol = Constants.Evening; } else if (NightStartEndRegex.IsMatch(trimmedText)) { - timeStr = "TNI"; - beginHour = 20; - endHour = 23; - endMin = 59; + todSymbol = Constants.Night; } else { - timeStr = null; + todSymbol = null; return false; } + var parseResult = TimexUtility.ResolveTimeOfDay(todSymbol); + todSymbol = parseResult.Timex; + beginHour = parseResult.BeginHour; + endHour = parseResult.EndHour; + endMin = parseResult.EndMin; + return true; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDurationParserConfiguration.cs index 9ce3005a15..ce048e0d52 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDurationParserConfiguration.cs @@ -1,10 +1,20 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Turkish; namespace Microsoft.Recognizers.Text.DateTime.Turkish { public class TurkishDurationParserConfiguration : BaseDateTimeOptionsConfiguration, IDurationParserConfiguration { + + public static readonly Regex PrefixArticleRegex = + new Regex(DateTimeDefinitions.PrefixArticleRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public TurkishDurationParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -34,7 +44,7 @@ public TurkishDurationParserConfiguration(ICommonDateTimeParserConfiguration con public IExtractor CardinalExtractor { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public IParser NumberParser { get; } @@ -42,6 +52,8 @@ public TurkishDurationParserConfiguration(ICommonDateTimeParserConfiguration con public Regex AnUnitRegex { get; } + Regex IDurationParserConfiguration.PrefixArticleRegex => PrefixArticleRegex; + public Regex DuringRegex { get; } public Regex AllDateUnitRegex { get; } @@ -62,6 +74,8 @@ public TurkishDurationParserConfiguration(ICommonDateTimeParserConfiguration con public Regex SpecialNumberUnitRegex { get; } + bool IDurationParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public IImmutableDictionary UnitMap { get; } public IImmutableDictionary UnitValueMap { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishHolidayParserConfiguration.cs index 6de5e69e7f..4a6b1f9cd0 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishHolidayParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishHolidayParserConfiguration.cs @@ -1,7 +1,9 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; -using System.Globalization; using Microsoft.Recognizers.Definitions.Turkish; using DateObject = System.DateTime; @@ -19,17 +21,20 @@ public TurkishHolidayParserConfiguration(IDateTimeOptionsConfiguration config) public override int GetSwiftYear(string text) { - var trimmedText = text.Trim().ToLowerInvariant(); + var trimmedText = text.Trim(); var swift = -10; - if (trimmedText.StartsWith("gelecek")) + + // @TODO move hardcoded values to resources file + + if (trimmedText.StartsWith("gelecek", StringComparison.Ordinal)) { swift = 1; } - else if (trimmedText.StartsWith("geçen")) + else if (trimmedText.StartsWith("geçen", StringComparison.Ordinal)) { swift = -1; } - else if (trimmedText.StartsWith("bu")) + else if (trimmedText.StartsWith("bu", StringComparison.Ordinal)) { swift = 0; } @@ -75,16 +80,16 @@ protected override IDictionary> InitHolidayFuncs() { "earthday", EarthDay }, { "stgeorgeday", StGeorgeDay }, { "mayday", Mayday }, - { "cincodemayoday", CincoDeMayoday }, + { "cincodemayoday", CincoDeMayoDay }, { "baptisteday", BaptisteDay }, { "usindependenceday", UsaIndependenceDay }, { "independenceday", UsaIndependenceDay }, { "bastilleday", BastilleDay }, { "halloweenday", HalloweenDay }, - { "allhallowday", AllHallowDay }, - { "allsoulsday", AllSoulsday }, + { "allhallowday", AllHallowsDay }, + { "allsoulsday", AllSoulsDay }, { "guyfawkesday", GuyFawkesDay }, - { "veteransday", Veteransday }, + { "veteransday", VeteransDay }, { "christmaseve", ChristmasEve }, { "newyeareve", NewYearEve }, { "easterday", EasterDay }, @@ -151,7 +156,7 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject Mayday(int year) => new DateObject(year, 5, 1); - private static DateObject CincoDeMayoday(int year) => new DateObject(year, 5, 5); + private static DateObject CincoDeMayoDay(int year) => new DateObject(year, 5, 5); private static DateObject BaptisteDay(int year) => new DateObject(year, 6, 24); @@ -161,15 +166,15 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject HalloweenDay(int year) => new DateObject(year, 10, 31); - private static DateObject AllHallowDay(int year) => new DateObject(year, 11, 1); + private static DateObject AllHallowsDay(int year) => new DateObject(year, 11, 1); - private static DateObject AllSoulsday(int year) => new DateObject(year, 11, 2); + private static DateObject AllSoulsDay(int year) => new DateObject(year, 11, 2); private static DateObject GuyFawkesDay(int year) => new DateObject(year, 11, 5); - private static DateObject Veteransday(int year) => new DateObject(year, 11, 11); + private static DateObject VeteransDay(int year) => new DateObject(year, 11, 11); - private static DateObject EasterDay(int year) => CalculateHolydaysByEaster(year); + private static DateObject EasterDay(int year) => HolidayFunctions.CalculateHolidayByEaster(year); private static DateObject AshWednesday(int year) => EasterDay(year).AddDays(-46); @@ -193,9 +198,9 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject CorpusChristi(int year) => EasterDay(year).AddDays(60); - private static DateObject Ramadan(int year) => IslamicHoliday(year, "ramadan"); + private static DateObject Ramadan(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.Ramadan); - private static DateObject Sacrifice(int year) => IslamicHoliday(year, "sacrifice"); + private static DateObject Sacrifice(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.Sacrifice); private static DateObject Republic(int year) => new DateObject(year, 10, 29); @@ -207,67 +212,5 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject Democracy(int year) => new DateObject(year, 7, 15); - // function adopted from German implementation - private static DateObject CalculateHolydaysByEaster(int year, int days = 0) - { - int day = 0; - int month = 3; - - int g = year % 19; - int c = year / 100; - int h = (c - (int)(c / 4) - (int)(((8 * c) + 13) / 25) + (19 * g) + 15) % 30; - int i = h - ((int)(h / 28) * (1 - ((int)(h / 28) * (int)(29 / (h + 1)) * (int)((21 - g) / 11)))); - - day = i - ((year + (int)(year / 4) + i + 2 - c + (int)(c / 4)) % 7) + 28; - - if (day > 31) - { - month++; - day -= 31; - } - - return DateObject.MinValue.SafeCreateFromValue(year, month, day).AddDays(days); - } - - // Calculates the exact gregorian date for the given holiday using only gregorian year and exact hijri date - private static DateObject IslamicHoliday(int year, string holidayType) - { - int y = 0; - int m = 0; - int d = 0; - - int hijriDay = 1; - int hijriMonth = 1; - int hijriYear = 1; - - var gregorian = new GregorianCalendar(); - var hijri = new HijriCalendar(); - - if (holidayType == "ramadan") - { - hijriDay = 1; - hijriMonth = 10; - } - else if (holidayType == "sacrifice") - { - hijriDay = 10; - hijriMonth = 12; - } - - for (hijriYear = 1; hijriYear <= 9999; hijriYear++) - { - var hijriDate = new DateObject(hijriYear, hijriMonth, hijriDay, hijri); - y = gregorian.GetYear(hijriDate); - m = gregorian.GetMonth(hijriDate); - d = gregorian.GetDayOfMonth(hijriDate); - - if (y == year) - { - break; - } - } - - return DateObject.MinValue.SafeCreateFromValue(y, m, d); - } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishMergedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishMergedParserConfiguration.cs index 02956a0377..eb7643609b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishMergedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishMergedParserConfiguration.cs @@ -1,16 +1,28 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Turkish; using Microsoft.Recognizers.Text.Matcher; namespace Microsoft.Recognizers.Text.DateTime.Turkish { public sealed class TurkishMergedParserConfiguration : TurkishCommonDateTimeParserConfiguration, IMergedParserConfiguration { + public static readonly Regex BeforeRegex = + new Regex(DateTimeDefinitions.BeforeRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex AfterRegex = + new Regex(DateTimeDefinitions.AfterRegex, RegexFlags, RegexTimeOut); + + public static readonly Regex SinceRegex = + new Regex(DateTimeDefinitions.SinceRegex, RegexFlags, RegexTimeOut); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + public TurkishMergedParserConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - BeforeRegex = TurkishMergedExtractorConfiguration.BeforeRegex; - AfterRegex = TurkishMergedExtractorConfiguration.AfterRegex; - SinceRegex = TurkishMergedExtractorConfiguration.SinceRegex; AroundRegex = TurkishMergedExtractorConfiguration.AroundRegex; EqualRegex = TurkishMergedExtractorConfiguration.EqualRegex; SuffixAfter = TurkishMergedExtractorConfiguration.SuffixAfterRegex; @@ -23,14 +35,14 @@ public TurkishMergedParserConfiguration(IDateTimeOptionsConfiguration config) DateTimePeriodParser = new BaseDateTimePeriodParser(new TurkishDateTimePeriodParserConfiguration(this)); SetParser = new BaseSetParser(new TurkishSetParserConfiguration(this)); HolidayParser = new BaseHolidayParser(new TurkishHolidayParserConfiguration(this)); - TimeZoneParser = new BaseTimeZoneParser(); + TimeZoneParser = new BaseTimeZoneParser(new TurkishTimeZoneParserConfiguration(this)); } - public Regex BeforeRegex { get; } + Regex IMergedParserConfiguration.BeforeRegex => BeforeRegex; - public Regex AfterRegex { get; } + Regex IMergedParserConfiguration.AfterRegex => AfterRegex; - public Regex SinceRegex { get; } + Regex IMergedParserConfiguration.SinceRegex => SinceRegex; public Regex AroundRegex { get; } @@ -45,5 +57,7 @@ public TurkishMergedParserConfiguration(IDateTimeOptionsConfiguration config) public IDateTimeParser HolidayParser { get; } public StringMatcher SuperfluousWordMatcher { get; } + + bool IMergedParserConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishSetParserConfiguration.cs index 5c42d8fc86..538b87eb42 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishSetParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishSetParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -11,7 +15,10 @@ public class TurkishSetParserConfiguration : BaseDateTimeOptionsConfiguration, I private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); + new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut); + + // pass FutureTerms as List to ReplaceValueInTextWithFutTerm function + private static readonly List ThisTerms = (List)DateTimeDefinitions.FutureTerms; public TurkishSetParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) @@ -148,5 +155,7 @@ public bool GetMatchedUnitTimex(string text, out string timex) } public string WeekDayGroupMatchString(Match match) => SetHandler.WeekDayGroupMatchString(match); + + public string ReplaceValueInTextWithFutTerm(string text, string value) => TasksModeSetHandler.ReplaceValueInTextWithFutTerm(text, value, ThisTerms); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishTimeParserConfiguration.cs index 9da7e1e357..7b03dd1715 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishTimeParserConfiguration.cs @@ -1,5 +1,9 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; using Microsoft.Recognizers.Text.DateTime.Utilities; @@ -12,22 +16,22 @@ public class TurkishTimeParserConfiguration : BaseDateTimeOptionsConfiguration, private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex TimeSuffixFull = - new Regex(DateTimeDefinitions.TimeSuffixFull, RegexFlags); + new Regex(DateTimeDefinitions.TimeSuffixFull, RegexFlags, RegexTimeOut); private static readonly Regex LunchRegex = - new Regex(DateTimeDefinitions.LunchRegex, RegexFlags); + new Regex(DateTimeDefinitions.LunchRegex, RegexFlags, RegexTimeOut); private static readonly Regex NightRegex = - new Regex(DateTimeDefinitions.NightRegex, RegexFlags); + new Regex(DateTimeDefinitions.NightRegex, RegexFlags, RegexTimeOut); private static readonly Regex HalfTokenRegex = - new Regex(DateTimeDefinitions.HalfTokenRegex, RegexFlags); + new Regex(DateTimeDefinitions.HalfTokenRegex, RegexFlags, RegexTimeOut); private static readonly Regex QuarterTokenRegex = - new Regex(DateTimeDefinitions.QuarterTokenRegex, RegexFlags); + new Regex(DateTimeDefinitions.QuarterTokenRegex, RegexFlags, RegexTimeOut); private static readonly Regex ToTokenRegex = - new Regex(DateTimeDefinitions.ToTokenRegex, RegexFlags); + new Regex(DateTimeDefinitions.ToTokenRegex, RegexFlags, RegexTimeOut); public TurkishTimeParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) @@ -74,7 +78,7 @@ public void AdjustByPrefix(string prefix, ref int hour, ref int min, ref bool ha var minStr = match.Groups["deltamin"].Value; if (!string.IsNullOrWhiteSpace(minStr)) { - deltaMin = int.Parse(minStr); + deltaMin = int.Parse(minStr, CultureInfo.InvariantCulture); } else { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishTimePeriodParserConfiguration.cs index 20f6795894..a849707c4a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishTimePeriodParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; @@ -11,7 +15,7 @@ public class TurkishTimePeriodParserConfiguration : BaseDateTimeOptionsConfigura private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex PluralTokenRegex = - new Regex(DateTimeDefinitions.PluralTokenRegex, RegexFlags); + new Regex(DateTimeDefinitions.PluralTokenRegex, RegexFlags, RegexTimeOut); public TurkishTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) @@ -59,7 +63,7 @@ public TurkishTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration c public IDateTimeUtilityConfiguration UtilityConfiguration { get; } - public bool GetMatchedTimexRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) + public bool GetMatchedTimeRange(string text, out string timex, out int beginHour, out int endHour, out int endMin) { var trimmedText = text.Trim(); if (PluralTokenRegex.IsMatch(trimmedText)) @@ -72,23 +76,28 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou endMin = 0; var timeOfDay = string.Empty; - if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o))) + + if (DateTimeDefinitions.MorningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal) || + trimmedText.StartsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Morning; } - else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.AfternoonTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal) || + trimmedText.StartsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Afternoon; } - else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.EveningTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal) || + trimmedText.StartsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Evening; } - else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o))) + else if (DateTimeDefinitions.DaytimeTermList.Any(o => trimmedText.Equals(o, StringComparison.Ordinal))) { timeOfDay = Constants.Daytime; } - else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o))) + else if (DateTimeDefinitions.NightTermList.Any(o => trimmedText.EndsWith(o, StringComparison.Ordinal) || + trimmedText.StartsWith(o, StringComparison.Ordinal))) { timeOfDay = Constants.Night; } @@ -102,7 +111,7 @@ public bool GetMatchedTimexRange(string text, out string timex, out int beginHou return false; } - var parseResult = TimexUtility.ParseTimeOfDay(timeOfDay); + var parseResult = TimexUtility.ResolveTimeOfDay(timeOfDay); timex = parseResult.Timex; beginHour = parseResult.BeginHour; endHour = parseResult.EndHour; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishTimeZoneParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishTimeZoneParserConfiguration.cs new file mode 100644 index 0000000000..c58d8483b9 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishTimeZoneParserConfiguration.cs @@ -0,0 +1,33 @@ +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Turkish; + +namespace Microsoft.Recognizers.Text.DateTime.Turkish +{ + public class TurkishTimeZoneParserConfiguration : BaseDateTimeOptionsConfiguration, ITimeZoneParserConfiguration + { + public static readonly string TimeZoneEndRegex = TimeZoneDefinitions.TimeZoneEndRegex; + + public static readonly Dictionary FullToMinMapping = TimeZoneDefinitions.FullToMinMapping; + + public static readonly Regex DirectUtcRegex = + new Regex(TimeZoneDefinitions.DirectUtcRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline); + + public static readonly Dictionary AbbrToMinMapping = TimeZoneDefinitions.AbbrToMinMapping; + + public TurkishTimeZoneParserConfiguration(IDateTimeOptionsConfiguration config) + : base(config) + { + } + + string ITimeZoneParserConfiguration.TimeZoneEndRegex => TimeZoneEndRegex; + + Dictionary ITimeZoneParserConfiguration.FullToMinMapping => FullToMinMapping; + + Regex ITimeZoneParserConfiguration.DirectUtcRegex => DirectUtcRegex; + + Dictionary ITimeZoneParserConfiguration.AbbrToMinMapping => AbbrToMinMapping; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Utilities/TurkishDatetimeUtilityConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Utilities/TurkishDatetimeUtilityConfiguration.cs index 06bc0db687..96fa9d911e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Utilities/TurkishDatetimeUtilityConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Utilities/TurkishDatetimeUtilityConfiguration.cs @@ -1,73 +1,32 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; using Microsoft.Recognizers.Text.DateTime.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Turkish.Utilities { - public class TurkishDatetimeUtilityConfiguration : IDateTimeUtilityConfiguration + public class TurkishDatetimeUtilityConfiguration : BaseDatetimeUtilityConfiguration { - public static readonly Regex AgoRegex = - new Regex(DateTimeDefinitions.AgoRegex, RegexFlags); - - public static readonly Regex LaterRegex = - new Regex(DateTimeDefinitions.LaterRegex, RegexFlags); - - public static readonly Regex InConnectorRegex = - new Regex(DateTimeDefinitions.InConnectorRegex, RegexFlags); - - public static readonly Regex SinceYearSuffixRegex = - new Regex(DateTimeDefinitions.SinceYearSuffixRegex, RegexFlags); - - public static readonly Regex WithinNextPrefixRegex = - new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags); - - public static readonly Regex AmDescRegex = - new Regex(DateTimeDefinitions.AmDescRegex, RegexFlags); - - public static readonly Regex PmDescRegex = - new Regex(DateTimeDefinitions.PmDescRegex, RegexFlags); - - public static readonly Regex AmPmDescRegex = - new Regex(DateTimeDefinitions.AmPmDescRegex, RegexFlags); - - public static readonly Regex RangeUnitRegex = - new Regex(DateTimeDefinitions.RangeUnitRegex, RegexFlags); - - public static readonly Regex TimeUnitRegex = - new Regex(DateTimeDefinitions.TimeUnitRegex, RegexFlags); - - public static readonly Regex DateUnitRegex = - new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); - - public static readonly Regex CommonDatePrefixRegex = - new Regex(DateTimeDefinitions.CommonDatePrefixRegex, RegexFlags); - - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - - Regex IDateTimeUtilityConfiguration.LaterRegex => LaterRegex; - - Regex IDateTimeUtilityConfiguration.AgoRegex => AgoRegex; - - Regex IDateTimeUtilityConfiguration.InConnectorRegex => InConnectorRegex; - - Regex IDateTimeUtilityConfiguration.SinceYearSuffixRegex => SinceYearSuffixRegex; - - Regex IDateTimeUtilityConfiguration.WithinNextPrefixRegex => WithinNextPrefixRegex; - - Regex IDateTimeUtilityConfiguration.AmDescRegex => AmDescRegex; - - Regex IDateTimeUtilityConfiguration.PmDescRegex => PmDescRegex; - - Regex IDateTimeUtilityConfiguration.AmPmDescRegex => AmPmDescRegex; - - Regex IDateTimeUtilityConfiguration.RangeUnitRegex => RangeUnitRegex; - - Regex IDateTimeUtilityConfiguration.TimeUnitRegex => TimeUnitRegex; - - Regex IDateTimeUtilityConfiguration.DateUnitRegex => DateUnitRegex; - - Regex IDateTimeUtilityConfiguration.CommonDatePrefixRegex => CommonDatePrefixRegex; - - bool IDateTimeUtilityConfiguration.CheckBothBeforeAfter => DateTimeDefinitions.CheckBothBeforeAfter; + public TurkishDatetimeUtilityConfiguration() + : base( + DateTimeDefinitions.AgoRegex, + DateTimeDefinitions.LaterRegex, + DateTimeDefinitions.InConnectorRegex, + DateTimeDefinitions.SinceYearSuffixRegex, + DateTimeDefinitions.WithinNextPrefixRegex, + DateTimeDefinitions.AmDescRegex, + DateTimeDefinitions.PmDescRegex, + DateTimeDefinitions.AmPmDescRegex, + DateTimeDefinitions.RangeUnitRegex, + DateTimeDefinitions.TimeUnitRegex, + DateTimeDefinitions.DateUnitRegex, + DateTimeDefinitions.CommonDatePrefixRegex, + DateTimeDefinitions.RangePrefixRegex, + RegexOptions.Singleline | RegexOptions.ExplicitCapture, + DateTimeDefinitions.CheckBothBeforeAfter) + { + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/AgoLaterUtil.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/AgoLaterUtil.cs index 12f5553186..1cce5abc2a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/AgoLaterUtil.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/AgoLaterUtil.cs @@ -1,9 +1,13 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -36,70 +40,68 @@ public static List ExtractorDurationWithBeforeAndAfter(string text, Extra var afterString = text.Substring(pos); var beforeString = text.Substring(0, (int)er.Start); var isTimeDuration = utilityConfiguration.TimeUnitRegex.Match(er.Text).Success; - - if (MatchingUtil.GetAgoLaterIndex(afterString, utilityConfiguration.AgoRegex, out var index)) + int index; + bool isMatch = false; + var agoLaterRegexes = new List { - // We don't support cases like "5 minutes from today" for now - // Cases like "5 minutes ago" or "5 minutes from now" are supported - // Cases like "2 days before today" or "2 weeks from today" are also supported - var isDayMatchInAfterString = utilityConfiguration.AgoRegex.Match(afterString).Groups["day"].Success; - - if (!(isTimeDuration && isDayMatchInAfterString)) - { - ret.Add(new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + index)); - } + utilityConfiguration.AgoRegex, + utilityConfiguration.LaterRegex, + }; - if (utilityConfiguration.CheckBothBeforeAfter && !isDayMatchInAfterString) - { - // check if regex match is split between beforeString and afterString - string beforeAfterStr = beforeString + afterString.Substring(0, index); - if (MatchingUtil.GetAgoLaterIndexInBeforeString(beforeAfterStr, utilityConfiguration.AgoRegex, out var indexStart)) - { - isDayMatchInAfterString = utilityConfiguration.AgoRegex.Match(beforeAfterStr).Groups["day"].Success; - - if (isDayMatchInAfterString && !(isTimeDuration && isDayMatchInAfterString)) - { - ret.Add(new Token(indexStart, (er.Start + er.Length ?? 0) + index)); - } - } - } - } - else if (utilityConfiguration.CheckBothBeforeAfter && MatchingUtil.GetAgoLaterIndexInBeforeString(beforeString, utilityConfiguration.AgoRegex, out index)) - { - // Check also beforeString - var isDayMatchInBeforeString = utilityConfiguration.AgoRegex.Match(beforeString).Groups["day"].Success; - if (!(isTimeDuration && isDayMatchInBeforeString)) - { - ret.Add(new Token(index, (er.Start + er.Length ?? 0) + index)); - } - } - else if (MatchingUtil.GetAgoLaterIndex(afterString, utilityConfiguration.LaterRegex, out index) || (utilityConfiguration.CheckBothBeforeAfter && - MatchingUtil.GetAgoLaterIndexInBeforeString(beforeString, utilityConfiguration.LaterRegex, out index))) + foreach (var regex in agoLaterRegexes) { Token tokAfter = null, tokBefore = null; - if (MatchingUtil.GetAgoLaterIndex(afterString, utilityConfiguration.LaterRegex, out index)) + bool isDayMatch = false; + + // Check afterString + if (MatchingUtil.GetAgoLaterIndex(afterString, regex, out index, inSuffix: true)) { - var isDayMatchInAfterString = utilityConfiguration.LaterRegex.Match(afterString).Groups["day"].Success; + // We don't support cases like "5 minutes from today" for now + // Cases like "5 minutes ago" or "5 minutes from now" are supported + // Cases like "2 days before today" or "2 weeks from today" are also supported + isDayMatch = regex.Match(afterString).Groups["day"].Success; - if (!(isTimeDuration && isDayMatchInAfterString)) + if (!(isTimeDuration && isDayMatch)) { tokAfter = new Token(er.Start ?? 0, (er.Start + er.Length ?? 0) + index); + isMatch = true; } } - // Check also beforeString - if (utilityConfiguration.CheckBothBeforeAfter && MatchingUtil.GetAgoLaterIndexInBeforeString(beforeString, utilityConfiguration.LaterRegex, out index)) + if (utilityConfiguration.CheckBothBeforeAfter) { - var isDayMatchInBeforeString = utilityConfiguration.LaterRegex.Match(beforeString).Groups["day"].Success; - if (!(isTimeDuration && isDayMatchInBeforeString)) + // Check if regex match is split between beforeString and afterString + if (!isDayMatch && isMatch) + { + string beforeAfterStr = beforeString + afterString.Substring(0, index); + var isRangeMatch = utilityConfiguration.RangePrefixRegex.MatchBegin(afterString.Substring(index), trim: true).Success; + if (!isRangeMatch && MatchingUtil.GetAgoLaterIndex(beforeAfterStr, regex, out var indexStart, inSuffix: false)) + { + isDayMatch = regex.Match(beforeAfterStr).Groups["day"].Success; + + if (isDayMatch && !(isTimeDuration && isDayMatch)) + { + ret.Add(new Token(indexStart, (er.Start + er.Length ?? 0) + index)); + isMatch = true; + } + } + } + + // Check also beforeString + if (MatchingUtil.GetAgoLaterIndex(beforeString, regex, out index, inSuffix: false)) { - tokBefore = new Token(index, er.Start + er.Length ?? 0); + isDayMatch = regex.Match(beforeString).Groups["day"].Success; + if (!(isTimeDuration && isDayMatch)) + { + tokBefore = new Token(index, er.Start + er.Length ?? 0); + isMatch = true; + } } } if (tokAfter != null && tokBefore != null && tokBefore.Start + tokBefore.Length > tokAfter.Start) { - // merge overlapping tokens + // Merge overlapping tokens ret.Add(new Token(tokBefore.Start, tokAfter.Start + tokAfter.Length - tokBefore.Start)); } else if (tokAfter != null) @@ -110,50 +112,57 @@ public static List ExtractorDurationWithBeforeAndAfter(string text, Extra { ret.Add(tokBefore); } - } - else if (MatchingUtil.GetTermIndex(beforeString, utilityConfiguration.InConnectorRegex, out index)) - { - // For range unit like "week, month, year", it should output dateRange or datetimeRange - if (!utilityConfiguration.RangeUnitRegex.IsMatch(er.Text)) + + if (isMatch) { - if (er.Start != null && er.Length != null && (int)er.Start >= index) - { - ret.Add(new Token((int)er.Start - index, (int)er.Start + (int)er.Length)); - } + break; } } - else if (utilityConfiguration.CheckBothBeforeAfter && MatchingUtil.GetAgoLaterIndex(afterString, utilityConfiguration.InConnectorRegex, out index)) + + if (!isMatch) { - // Check also afterString - // For range unit like "week, month, year", it should output dateRange or datetimeRange - if (!utilityConfiguration.RangeUnitRegex.IsMatch(er.Text)) + // Item1 is the main regex to be tested + // Item2 is a list of unit regexes used to validate the extraction (in case of match, the extraction is discarded) + var inWithinRegexTuples = new List<(Regex, List)> + { + (utilityConfiguration.InConnectorRegex, new List { utilityConfiguration.RangeUnitRegex }), + (utilityConfiguration.WithinNextPrefixRegex, new List { utilityConfiguration.DateUnitRegex, utilityConfiguration.TimeUnitRegex }), + }; + + foreach (var regex in inWithinRegexTuples) { - if (er.Start != null && er.Length != null) + bool isMatchAfter = false; + if (MatchingUtil.GetTermIndex(beforeString, regex.Item1, out index)) { - ret.Add(new Token((int)er.Start, (int)er.Start + (int)er.Length + index)); + isMatch = true; } - } - } - else if (MatchingUtil.GetTermIndex(beforeString, utilityConfiguration.WithinNextPrefixRegex, out index)) - { - // For range unit like "week, month, year, day, second, minute, hour", it should output dateRange or datetimeRange - if (!utilityConfiguration.DateUnitRegex.IsMatch(er.Text) && !utilityConfiguration.TimeUnitRegex.IsMatch(er.Text)) - { - if (er.Start != null && er.Length != null && (int)er.Start >= index) + else if (utilityConfiguration.CheckBothBeforeAfter && MatchingUtil.GetAgoLaterIndex(afterString, regex.Item1, out index, inSuffix: true)) { - ret.Add(new Token((int)er.Start - index, (int)er.Start + (int)er.Length)); + // Check also afterString + isMatch = isMatchAfter = true; } - } - } - else if (utilityConfiguration.CheckBothBeforeAfter && MatchingUtil.GetAgoLaterIndex(afterString, utilityConfiguration.WithinNextPrefixRegex, out index)) - { - // Check also afterString - // For range unit like "week, month, year, day, second, minute, hour", it should output dateRange or datetimeRange - if (!utilityConfiguration.DateUnitRegex.IsMatch(er.Text) && !utilityConfiguration.TimeUnitRegex.IsMatch(er.Text)) - { - if (er.Start != null && er.Length != null) + + if (isMatch) { - ret.Add(new Token((int)er.Start, (int)er.Start + (int)er.Length + index)); + // For InConnectorRegex and range unit like "week, month, year", it should output dateRange or datetimeRange + // For WithinNextPrefixRegex and range unit like "week, month, year, day, second, minute, hour", it should output dateRange or datetimeRange + bool isUnitMatch = false; + foreach (var unitRegex in regex.Item2) + { + isUnitMatch = isUnitMatch || unitRegex.IsMatch(er.Text); + } + + if (!isUnitMatch) + { + if (er.Start != null && er.Length != null && ((int)er.Start >= index || isMatchAfter)) + { + int start = (int)er.Start - (!isMatchAfter ? index : 0); + int end = (int)er.Start + (int)er.Length + (isMatchAfter ? index : 0); + ret.Add(new Token(start, end)); + } + } + + break; } } } @@ -225,106 +234,97 @@ private static DateTimeResolutionResult GetAgoLaterResult( ret.Mod = Constants.LESS_THAN_MOD; } - if (MatchingUtil.ContainsAgoLaterIndex(afterStr, utilityConfiguration.AgoRegex)) + int swift = 0; + bool isMatch = false, isLater = false; + string dayStr = null; + + // Item2 is a label identifying the regex defined in Item1 + var agoLaterRegexTuples = new List<(Regex, string)> { - var match = utilityConfiguration.AgoRegex.Match(afterStr); - var swift = 0; + (utilityConfiguration.AgoRegex, Constants.AGO_LABEL), + (utilityConfiguration.LaterRegex, Constants.LATER_LABEL), + }; - // Handle cases like "3 days before yesterday" - if (match.Success && !string.IsNullOrEmpty(match.Groups["day"].Value)) + // AgoRegex and LaterRegex cases + foreach (var regex in agoLaterRegexTuples) + { + // Match in afterStr + if (MatchingUtil.ContainsAgoLaterIndex(afterStr, regex.Item1, inSuffix: true)) { - swift = swiftDay(match.Groups["day"].Value); + isMatch = true; + isLater = regex.Item2 == Constants.LATER_LABEL; + var match = regex.Item1.Match(afterStr); + dayStr = match.Groups["day"].Value; } - else if (utilityConfiguration.CheckBothBeforeAfter && match.Success && !MatchingUtil.ContainsAgoLaterIndexInBeforeString(beforeStr, utilityConfiguration.AgoRegex)) + + if (utilityConfiguration.CheckBothBeforeAfter) { - match = utilityConfiguration.AgoRegex.Match(beforeStr + " " + afterStr); - if (match.Success && !string.IsNullOrEmpty(match.Groups["day"].Value)) + // Match split between beforeStr and afterStr + if (string.IsNullOrEmpty(dayStr) && isMatch) { - swift = swiftDay(match.Groups["day"].Value); + var match = regex.Item1.Match(beforeStr + " " + afterStr); + dayStr = match.Groups["day"].Value; } - } - - resultDateTime = DurationParsingUtil.ShiftDateTime(timex, referenceTime.AddDays(swift), false); - - ((DateTimeResolutionResult)durationParseResult.Value).Mod = Constants.BEFORE_MOD; - } - else if (utilityConfiguration.CheckBothBeforeAfter && MatchingUtil.ContainsAgoLaterIndexInBeforeString(beforeStr, utilityConfiguration.AgoRegex)) - { - var match = utilityConfiguration.AgoRegex.Match(beforeStr); - var swift = 0; - // Handle cases like "3 days before yesterday" - if (match.Success && !string.IsNullOrEmpty(match.Groups["day"].Value)) - { - swift = swiftDay(match.Groups["day"].Value); + // Match in beforeStr + if (string.IsNullOrEmpty(dayStr) && MatchingUtil.ContainsAgoLaterIndex(beforeStr, regex.Item1, inSuffix: false)) + { + isMatch = true; + isLater = regex.Item2 == Constants.LATER_LABEL; + var match = regex.Item1.Match(beforeStr); + dayStr = match.Groups["day"].Value; + } } - resultDateTime = DurationParsingUtil.ShiftDateTime(timex, referenceTime.AddDays(swift), false); - - ((DateTimeResolutionResult)durationParseResult.Value).Mod = Constants.BEFORE_MOD; - } - else if (MatchingUtil.ContainsAgoLaterIndex(afterStr, utilityConfiguration.LaterRegex) || - MatchingUtil.ContainsTermIndex(beforeStr, utilityConfiguration.InConnectorRegex) || - (utilityConfiguration.CheckBothBeforeAfter && MatchingUtil.ContainsAgoLaterIndexInBeforeString(beforeStr, utilityConfiguration.LaterRegex))) - { - var match = utilityConfiguration.LaterRegex.Match(afterStr); - var swift = 0; - - if (utilityConfiguration.CheckBothBeforeAfter && MatchingUtil.ContainsAgoLaterIndexInBeforeString(beforeStr, utilityConfiguration.LaterRegex) && string.IsNullOrEmpty(match.Groups["day"].Value)) + if (isMatch) { - match = utilityConfiguration.LaterRegex.Match(beforeStr); + break; } + } - // Handle cases like "3 days after tomorrow" - if (match.Success && !string.IsNullOrEmpty(match.Groups["day"].Value)) + // InConnectorRegex cases + if (!isMatch) + { + if (MatchingUtil.ContainsTermIndex(beforeStr, utilityConfiguration.InConnectorRegex)) { - swift = swiftDay(match.Groups["day"].Value); + // Match in afterStr + isMatch = isLater = true; + var match = utilityConfiguration.LaterRegex.Match(afterStr); + dayStr = match.Groups["day"].Value; } - - var yearMatch = utilityConfiguration.SinceYearSuffixRegex.Match(afterStr); - if (yearMatch.Success) + else if (utilityConfiguration.CheckBothBeforeAfter && MatchingUtil.ContainsAgoLaterIndex(afterStr, utilityConfiguration.InConnectorRegex, inSuffix: true)) { - var yearString = yearMatch.Groups[Constants.YearGroupName].Value; - var yearEr = new ExtractResult { Text = yearString }; - var year = Convert.ToInt32((double)(numberParser.Parse(yearEr).Value ?? 0)); - referenceTime = DateObject.MinValue.SafeCreateFromValue(year, 1, 1); + // Match in beforeStr + isMatch = isLater = true; + var match = utilityConfiguration.LaterRegex.Match(beforeStr); + dayStr = match.Groups["day"].Value; } - - resultDateTime = DurationParsingUtil.ShiftDateTime(timex, referenceTime.AddDays(swift), true); - - ((DateTimeResolutionResult)durationParseResult.Value).Mod = Constants.AFTER_MOD; } - else if (utilityConfiguration.CheckBothBeforeAfter && (MatchingUtil.ContainsAgoLaterIndexInBeforeString(beforeStr, utilityConfiguration.LaterRegex) || - MatchingUtil.ContainsAgoLaterIndex(afterStr, utilityConfiguration.InConnectorRegex) || - MatchingUtil.ContainsAgoLaterIndex(afterStr, utilityConfiguration.LaterRegex))) - { - // Check also beforeStr - var match = utilityConfiguration.LaterRegex.Match(beforeStr); - var swift = 0; - if (MatchingUtil.ContainsAgoLaterIndex(afterStr, utilityConfiguration.LaterRegex) && string.IsNullOrEmpty(match.Groups["day"].Value)) - { - match = utilityConfiguration.LaterRegex.Match(beforeStr); - } - - // Handle cases like "3 days after tomorrow" - if (match.Success && !string.IsNullOrEmpty(match.Groups["day"].Value)) + if (isMatch) + { + // Handle cases like "3 days before yesterday", "3 days after tomorrow" + if (!string.IsNullOrEmpty(dayStr)) { - swift = swiftDay(match.Groups["day"].Value); + swift = swiftDay(dayStr); } - var yearMatch = utilityConfiguration.SinceYearSuffixRegex.Match(beforeStr); - if (yearMatch.Success) + if (isLater) { - var yearString = yearMatch.Groups[Constants.YearGroupName].Value; - var yearEr = new ExtractResult { Text = yearString }; - var year = Convert.ToInt32((double)(numberParser.Parse(yearEr).Value ?? 0)); - referenceTime = DateObject.MinValue.SafeCreateFromValue(year, 1, 1); + var yearMatch = utilityConfiguration.SinceYearSuffixRegex.Match(afterStr); + if (yearMatch.Success) + { + var yearString = yearMatch.Groups[Constants.YearGroupName].Value; + var yearEr = new ExtractResult { Text = yearString }; + var year = Convert.ToInt32((double)(numberParser.Parse(yearEr).Value ?? 0)); + referenceTime = DateObject.MinValue.SafeCreateFromValue(year, 1, 1); + } } - resultDateTime = DurationParsingUtil.ShiftDateTime(timex, referenceTime.AddDays(swift), true); + var isFuture = isLater; + resultDateTime = DurationParsingUtil.ShiftDateTime(timex, referenceTime.AddDays(swift), future: isFuture); - ((DateTimeResolutionResult)durationParseResult.Value).Mod = Constants.AFTER_MOD; + ((DateTimeResolutionResult)durationParseResult.Value).Mod = isLater ? Constants.AFTER_MOD : Constants.BEFORE_MOD; } if (resultDateTime != referenceTime) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/BaseDatetimeUtilityConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/BaseDatetimeUtilityConfiguration.cs new file mode 100644 index 0000000000..f49a004966 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/BaseDatetimeUtilityConfiguration.cs @@ -0,0 +1,75 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Reflection; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.DateTime.Utilities +{ + public abstract class BaseDatetimeUtilityConfiguration : IDateTimeUtilityConfiguration + { + public BaseDatetimeUtilityConfiguration( + string agoRegex, + string laterRegex, + string inConnectorRegex, + string sinceYearSuffixRegex, + string withinNextPrefixRegex, + string amDescRegex, + string pmDescRegex, + string amPmDescRegex, + string rangeUnitRegex, + string timeUnitRegex, + string dateUnitRegex, + string commonDatePrefixRegex, + string rangePrefixRegex, + RegexOptions options, + bool checkBothBeforeAfter) + { + this.AgoRegex = new Regex(agoRegex, options, RegexTimeOut); + this.LaterRegex = new Regex(laterRegex, options, RegexTimeOut); + this.InConnectorRegex = new Regex(inConnectorRegex, options, RegexTimeOut); + this.SinceYearSuffixRegex = new Regex(sinceYearSuffixRegex, options, RegexTimeOut); + this.WithinNextPrefixRegex = new Regex(withinNextPrefixRegex, options, RegexTimeOut); + this.AmDescRegex = new Regex(amDescRegex, options, RegexTimeOut); + this.PmDescRegex = new Regex(pmDescRegex, options, RegexTimeOut); + this.AmPmDescRegex = new Regex(amPmDescRegex, options, RegexTimeOut); + this.RangeUnitRegex = new Regex(rangeUnitRegex, options, RegexTimeOut); + this.TimeUnitRegex = new Regex(timeUnitRegex, options, RegexTimeOut); + this.DateUnitRegex = new Regex(dateUnitRegex, options, RegexTimeOut); + this.CommonDatePrefixRegex = new Regex(commonDatePrefixRegex, options, RegexTimeOut); + this.RangePrefixRegex = new Regex(rangePrefixRegex, options, RegexTimeOut); + this.CheckBothBeforeAfter = checkBothBeforeAfter; + } + + public static TimeSpan RegexTimeOut => DateTimeRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + + public Regex AgoRegex { get; set; } + + public Regex LaterRegex { get; set; } + + public Regex InConnectorRegex { get; set; } + + public Regex SinceYearSuffixRegex { get; set; } + + public Regex WithinNextPrefixRegex { get; set; } + + public Regex RangeUnitRegex { get; set; } + + public Regex TimeUnitRegex { get; set; } + + public Regex DateUnitRegex { get; set; } + + public Regex AmDescRegex { get; set; } + + public Regex PmDescRegex { get; set; } + + public Regex AmPmDescRegex { get; set; } + + public Regex CommonDatePrefixRegex { get; set; } + + public Regex RangePrefixRegex { get; set; } + + public bool CheckBothBeforeAfter { get; set; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateContext.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateContext.cs index f9470c2664..9aa28d97ff 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateContext.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateContext.cs @@ -1,4 +1,11 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime @@ -8,6 +15,63 @@ public class DateContext { public int Year { get; set; } = Constants.InvalidYear; + // Generate future/past date for cases without specific year like "Feb 29th" + public static (DateObject future, DateObject past) GenerateDates(bool noYear, DateObject referenceDate, int year, int month, int day) + { + var futureDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); + var pastDate = DateObject.MinValue.SafeCreateFromValue(year, month, day); + var futureYear = year; + var pastYear = year; + if (noYear) + { + if (IsFeb29th(year, month, day)) + { + if (DateObject.IsLeapYear(year)) + { + if (futureDate < referenceDate) + { + futureDate = DateObject.MinValue.SafeCreateFromValue(futureYear + 4, month, day); + } + else + { + pastDate = DateObject.MinValue.SafeCreateFromValue(pastYear - 4, month, day); + } + } + else + { + pastYear = pastYear >> 2 << 2; + if (!DateObject.IsLeapYear(pastYear)) + { + pastYear -= 4; + } + + futureYear = pastYear + 4; + if (!DateObject.IsLeapYear(futureYear)) + { + futureYear += 4; + } + + futureDate = DateObject.MinValue.SafeCreateFromValue(futureYear, month, day); + pastDate = DateObject.MinValue.SafeCreateFromValue(pastYear, month, day); + } + } + else + { + if (futureDate < referenceDate && !futureDate.IsDefaultValue()) + { + futureDate = DateObject.MinValue.SafeCreateFromValue(year + 1, month, day); + } + + if (pastDate >= referenceDate && !pastDate.IsDefaultValue()) + { + pastDate = DateObject.MinValue.SafeCreateFromValue(year - 1, month, day); + } + } + } + + return (futureDate, pastDate); + } + // This method is to ensure the begin date is less than the end date. // As DateContext only supports common Year as context, so it subtracts one year from beginDate. @TODO problematic in other usages. public static DateObject SwiftDateObject(DateObject beginDate, DateObject endDate) @@ -20,6 +84,102 @@ public static DateObject SwiftDateObject(DateObject beginDate, DateObject endDat return beginDate; } + public static bool IsFeb29th(DateObject date) + { + return date.Month == 2 && date.Day == 29; + } + + public static bool IsFeb29th(int year, int month, int day) + { + return month == 2 && day == 29; + } + + // this method is to validate whether the match is part of date range and is a correct split + // For example: in case "10-1 - 11-7", "10-1 - 11" can be matched by some of the Regexes, but the full text is a date range, so "10-1 - 11" is not a correct split + public static bool ValidateMatch(Match match, string text, IEnumerable dateRegexList, Regex rangeConnectorSymbolRegex) + { + // If the match doesn't contains "year" part, it will not be ambiguous and it's a valid match + var isValidMatch = !match.Groups[Constants.YearGroupName].Success; + + if (!isValidMatch) + { + var yearGroup = match.Groups[Constants.YearGroupName]; + + // If the "year" part is not at the end of the match, it's a valid match + if (yearGroup.Index + yearGroup.Length != match.Index + match.Length) + { + isValidMatch = true; + } + else + { + var subText = text.Substring(yearGroup.Index); + + // If the following text (include the "year" part) doesn't start with a Date entity, it's a valid match + if (!StartsWithBasicDate(subText, dateRegexList)) + { + isValidMatch = true; + } + else + { + // If the following text (include the "year" part) starts with a Date entity, but the following text (doesn't include the "year" part) also starts with a valid Date entity, the current match is still valid + // For example, "10-1-2018-10-2-2018". Match "10-1-2018" is valid because though "2018-10-2" a valid match (indicates the first year "2018" might belongs to the second Date entity), but "10-2-2018" is also a valid match. + subText = text.Substring(yearGroup.Index + yearGroup.Length).Trim(); + subText = TrimStartRangeConnectorSymbols(subText, rangeConnectorSymbolRegex); + isValidMatch = StartsWithBasicDate(subText, dateRegexList); + } + } + + // Expressions with mixed separators are not considered valid dates e.g. "30/4.85" (unless one is a comma "30/4, 2016") + if (match.Groups[Constants.DayGroupName].Success && match.Groups[Constants.MonthGroupName].Success) + { + var noDateText = match.Value.Replace(match.Groups[Constants.YearGroupName].Value, string.Empty) + .Replace(match.Groups[Constants.MonthGroupName].Value, string.Empty) + .Replace(match.Groups[Constants.DayGroupName].Value, string.Empty); + noDateText = match.Groups[Constants.WeekdayGroupName].Success ? noDateText.Replace(match.Groups[Constants.WeekdayGroupName].Value, string.Empty) : noDateText; + var separators = new List { '/', '\\', '-', '.' }; + + if (separators.Count(separator => noDateText.Contains(separator)) > 1) + { + isValidMatch = false; + } + } + } + + return isValidMatch; + } + + // This method is to ensure the year of begin date is same with the end date in no year situation. + public (DateTimeParseResult pr1, DateTimeParseResult pr2) SyncYear(DateTimeParseResult pr1, DateTimeParseResult pr2) + { + if (IsEmpty()) + { + int futureYear; + int pastYear; + if (IsFeb29th((DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue)) + { + futureYear = ((DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue).Year; + pastYear = ((DateObject)((DateTimeResolutionResult)pr1.Value).PastValue).Year; + pr2.Value = SyncDateEntityResolutionInFeb29th((DateTimeResolutionResult)pr2.Value, futureYear, pastYear); + } + else if (IsFeb29th((DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue)) + { + futureYear = ((DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue).Year; + pastYear = ((DateObject)((DateTimeResolutionResult)pr2.Value).PastValue).Year; + pr1.Value = SyncDateEntityResolutionInFeb29th((DateTimeResolutionResult)pr1.Value, futureYear, pastYear); + } + } + + return (pr1, pr2); + } + + public DateTimeResolutionResult SyncDateEntityResolutionInFeb29th(DateTimeResolutionResult resolutionResult, int futureYear, int pastYear) + { + resolutionResult.FutureValue = SetDateWithContext((DateObject)resolutionResult.FutureValue, futureYear); + resolutionResult.PastValue = SetDateWithContext((DateObject)resolutionResult.PastValue, pastYear); + + return resolutionResult; + } + public DateTimeParseResult ProcessDateEntityParsingResult(DateTimeParseResult originalResult) { if (!IsEmpty()) @@ -60,9 +220,53 @@ public bool IsEmpty() return this.Year == Constants.InvalidYear; } - private DateObject SetDateWithContext(DateObject originalDate) + // TODO: Simplify this method to improve its performance + private static string TrimStartRangeConnectorSymbols(string text, Regex rangeConnectorSymbolRegex) + { + var rangeConnectorSymbolMatches = rangeConnectorSymbolRegex.Matches(text); + + foreach (Match symbolMatch in rangeConnectorSymbolMatches) + { + var startSymbolLength = -1; + + if (symbolMatch.Success && symbolMatch.Index == 0 && symbolMatch.Length > startSymbolLength) + { + startSymbolLength = symbolMatch.Length; + } + + if (startSymbolLength > 0) + { + text = text.Substring(startSymbolLength); + } + } + + return text.Trim(); + } + + // TODO: Simplify this method to improve its performance + private static bool StartsWithBasicDate(string text, IEnumerable dateRegexList) + { + foreach (var regex in dateRegexList) + { + var match = regex.MatchBegin(text, trim: true); + + if (match.Success) + { + return true; + } + } + + return false; + } + + private DateObject SetDateWithContext(DateObject originalDate, int year = -1) { - return new DateObject(Year, originalDate.Month, originalDate.Day); + if (!originalDate.IsDefaultValue()) + { + return DateObject.MinValue.SafeCreateFromValue(year == -1 ? Year : year, originalDate.Month, originalDate.Day); + } + + return originalDate; } private Tuple SetDateRangeWithContext(Tuple originalDateRange) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateObjectExtension.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateObjectExtension.cs index e790e96da1..84cf0847e8 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateObjectExtension.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateObjectExtension.cs @@ -1,10 +1,17 @@ -using System; -using DateObject = System.DateTime; - -namespace Microsoft.Recognizers.Text.DateTime -{ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ public static class DateObjectExtension { + private const short IndexOfLeapMonth = 1; + private static readonly List MonthValidDays = new List { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; + public static DateObject Next(this DateObject from, DayOfWeek dayOfWeek) { var start = (int)from.DayOfWeek; @@ -23,11 +30,11 @@ public static DateObject Next(this DateObject from, DayOfWeek dayOfWeek) return from.AddDays(target - start + 7); } - public static DateObject Upcoming(this DateObject from, DayOfWeek dayOfWeek) - { - var start = (int)from.DayOfWeek; - var target = (int)dayOfWeek; - + public static DateObject Upcoming(this DateObject from, DayOfWeek dayOfWeek) + { + var start = (int)from.DayOfWeek; + var target = (int)dayOfWeek; + if (start == 0) { start = 7; @@ -36,16 +43,16 @@ public static DateObject Upcoming(this DateObject from, DayOfWeek dayOfWeek) if (target == 0) { target = 7; - } - - if (start < target) - { - return This(from, dayOfWeek); - } - else - { - return Next(from, dayOfWeek); - } + } + + if (start < target) + { + return This(from, dayOfWeek); + } + else + { + return Next(from, dayOfWeek); + } } public static DateObject This(this DateObject from, DayOfWeek dayOfWeek) @@ -84,11 +91,11 @@ public static DateObject Last(this DateObject from, DayOfWeek dayOfWeek) return from.AddDays(target - start - 7); } - public static DateObject Past(this DateObject from, DayOfWeek dayOfWeek) - { - var start = (int)from.DayOfWeek; - var target = (int)dayOfWeek; - + public static DateObject Past(this DateObject from, DayOfWeek dayOfWeek) + { + var start = (int)from.DayOfWeek; + var target = (int)dayOfWeek; + if (start == 0) { start = 7; @@ -97,77 +104,139 @@ public static DateObject Past(this DateObject from, DayOfWeek dayOfWeek) if (target == 0) { target = 7; - } - - if (start > target) - { - return This(from, dayOfWeek); - } - else - { - return Last(from, dayOfWeek); - } + } + + if (start > target) + { + return This(from, dayOfWeek); + } + else + { + return Last(from, dayOfWeek); + } + } + + public static DateObject GetFirstThursday(int year, int month = Constants.InvalidMonth) + { + var targetMonth = month; + + if (month == Constants.InvalidMonth) + { + targetMonth = 1; + } + + var firstDay = DateObject.MinValue.SafeCreateFromValue(year, targetMonth, 1); + DateObject firstThursday = firstDay.This(DayOfWeek.Thursday); + + // Thursday falls into previous year or previous month + if (firstThursday.Month != targetMonth) + { + firstThursday = firstDay.AddDays(Constants.WeekDayCount); + } + + return firstThursday; + } + + public static DateObject GetLastThursday(int year, int month = Constants.InvalidMonth) + { + var targetMonth = month; + + if (month == Constants.InvalidMonth) + { + targetMonth = 12; + } + + var lastDay = GetLastDay(year, targetMonth); + DateObject lastThursday = lastDay.This(DayOfWeek.Thursday); + + // Thursday falls into next year or next month + if (lastThursday.Month != targetMonth) + { + lastThursday = lastThursday.AddDays(-Constants.WeekDayCount); + } + + return lastThursday; } - public static DateObject SafeCreateFromValue(this DateObject datetime, int year, int month, int day) - { - if (IsValidDate(year, month, day)) - { - datetime = datetime.AddYears(year - datetime.Year); - datetime = datetime.AddMonths(month - datetime.Month); - datetime = datetime.AddDays(day - datetime.Day); - } - + public static DateObject GetLastDay(int year, int month) + { + month++; + + if (month == 13) + { + year++; + month = 1; + } + + var firstDayOfNextMonth = DateObject.MinValue.SafeCreateFromValue(year, month, 1); + + return firstDayOfNextMonth.AddDays(-1); + } + + public static DateObject SafeCreateFromValue(this DateObject datetime, int year, int month, int day) + { + if (IsValidDate(year, month, day)) + { + datetime = datetime.AddYears(year - datetime.Year); + datetime = datetime.AddMonths(month - datetime.Month); + datetime = datetime.AddDays(day - datetime.Day); + } + return datetime; - } - - public static DateObject SafeCreateFromValue(this DateObject datetime, int year, int month, int day, int hour, int minute, int second) - { - if (IsValidDate(year, month, day) && IsValidTime(hour, minute, second)) - { - datetime = datetime.SafeCreateFromValue(year, month, day); - datetime = datetime.AddHours(hour - datetime.Hour); - datetime = datetime.AddMinutes(minute - datetime.Minute); - datetime = datetime.AddSeconds(second - datetime.Second); - } - + } + + public static DateObject SafeCreateFromValue(this DateObject datetime, int year, int month, int day, int hour, int minute, int second) + { + if (IsValidDate(year, month, day) && IsValidTime(hour, minute, second)) + { + datetime = datetime.SafeCreateFromValue(year, month, day); + datetime = datetime.AddHours(hour - datetime.Hour); + datetime = datetime.AddMinutes(minute - datetime.Minute); + datetime = datetime.AddSeconds(second - datetime.Second); + } + return datetime; - } - - public static bool IsValidDate(int year, int month, int day) - { - if (year < 1 || year > 9999) - { - return false; - } - - int[] validDays = - { - 31, - (year % 4 == 0 && year % 100 != 0) || year % 400 == 0 ? 29 : 28, - 31, - 30, - 31, - 30, - 31, - 31, - 30, - 31, - 30, - 31, - }; - - return month >= 1 && month <= 12 && day >= 1 && day <= validDays[month - 1]; - } - - public static bool IsValidTime(int hour, int minute, int second) - { - return hour >= 0 && hour <= 23 && minute >= 0 && minute <= 59 && second >= 0 && second <= 59; - } - - public static bool IsDefaultValue(this DateObject datetime) - { - return datetime == default(DateObject); - } - } -} \ No newline at end of file + } + + public static bool IsValidDate(int year, int month, int day) + { + MonthValidDays[IndexOfLeapMonth] = LeapMonthDays(year); + + if (year < 1 || year > 9999) + { + return false; + } + + return month >= 1 && month <= 12 && day >= 1 && day <= MonthValidDays[month - 1]; + } + + public static int GetMonthMaxDay(int year, int month) + { + MonthValidDays[IndexOfLeapMonth] = LeapMonthDays(year); + + var maxDay = MonthValidDays[month - 1]; + + if (!DateObject.IsLeapYear(year) && month == 2) + { + maxDay -= 1; + } + + return maxDay; + } + + public static bool IsValidTime(int hour, int minute, int second) + { + return hour >= 0 && hour <= 23 && minute >= 0 && minute <= 59 && second >= 0 && second <= 59; + } + + public static bool IsDefaultValue(this DateObject datetime) + { + return datetime == default(DateObject); + } + + private static int LeapMonthDays(int year) + { + return (year % 4 == 0 && year % 100 != 0) || year % 400 == 0 ? 29 : 28; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeExtra.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeExtra.cs index 24a0f50ef2..acb75f47a0 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeExtra.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeExtra.cs @@ -1,4 +1,7 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime.Utilities { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeFormatUtil.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeFormatUtil.cs index 5182da73d2..c0a4c40415 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeFormatUtil.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeFormatUtil.cs @@ -1,8 +1,13 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Collections.Immutable; using System.Globalization; using System.Text; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime @@ -12,6 +17,28 @@ public static class DateTimeFormatUtil private static readonly Regex HourTimexRegex = new Regex(@"(? dynastyYearMap, IExtractor integerExtractor, IParser numberParser) + { + int year = -1; + var regionTitleMatch = dynastyYearRegex.MatchExact(yearStr, trim: true); + if (regionTitleMatch.Success) + { + // handle "康熙元年" refer to https://zh.wikipedia.org/wiki/%E5%B9%B4%E5%8F%B7 + string dynastyYearStr = regionTitleMatch.Groups["dynasty"].Value; + string biasYearStr = regionTitleMatch.Groups["biasYear"].Value; + int basicYear = dynastyYearMap[dynastyYearStr]; + int biasYear = 1; + if (biasYearStr != dynastyStartYear) + { + biasYear = Convert.ToInt32((double)(numberParser.Parse(integerExtractor.Extract(biasYearStr)[0]).Value ?? 0)); + } + + year = basicYear + biasYear - 1; + } + + return year; + } + public static string LuisDate(int year) { if (year == Constants.InvalidYear) @@ -19,7 +46,7 @@ public static string LuisDate(int year) return Constants.TimexFuzzyYear; } - return year.ToString("D4"); + return year.ToString("D4", CultureInfo.InvariantCulture); } public static string LuisDate(int year, int month) @@ -31,10 +58,10 @@ public static string LuisDate(int year, int month) return string.Join(Constants.DateTimexConnector, Constants.TimexFuzzyYear, Constants.TimexFuzzyMonth); } - return string.Join(Constants.DateTimexConnector, Constants.TimexFuzzyYear, month.ToString("D2")); + return string.Join(Constants.DateTimexConnector, Constants.TimexFuzzyYear, month.ToString("D2", CultureInfo.InvariantCulture)); } - return string.Join(Constants.DateTimexConnector, year.ToString("D4"), month.ToString("D2")); + return string.Join(Constants.DateTimexConnector, year.ToString("D4", CultureInfo.InvariantCulture), month.ToString("D2", CultureInfo.InvariantCulture)); } public static string LuisDate(int year, int month, int day) @@ -48,13 +75,13 @@ public static string LuisDate(int year, int month, int day) return string.Join(Constants.DateTimexConnector, Constants.TimexFuzzyYear, Constants.TimexFuzzyMonth, Constants.TimexFuzzyDay); } - return string.Join(Constants.DateTimexConnector, Constants.TimexFuzzyYear, Constants.TimexFuzzyMonth, day.ToString("D2")); + return string.Join(Constants.DateTimexConnector, Constants.TimexFuzzyYear, Constants.TimexFuzzyMonth, day.ToString("D2", CultureInfo.InvariantCulture)); } - return string.Join(Constants.DateTimexConnector, Constants.TimexFuzzyYear, month.ToString("D2"), day.ToString("D2")); + return string.Join(Constants.DateTimexConnector, Constants.TimexFuzzyYear, month.ToString("D2", CultureInfo.InvariantCulture), day.ToString("D2", CultureInfo.InvariantCulture)); } - return string.Join(Constants.DateTimexConnector, year.ToString("D4"), month.ToString("D2"), day.ToString("D2")); + return string.Join(Constants.DateTimexConnector, year.ToString("D4", CultureInfo.InvariantCulture), month.ToString("D2", CultureInfo.InvariantCulture), day.ToString("D2", CultureInfo.InvariantCulture)); } public static string LuisDate(DateObject date, DateObject alternativeDate = default(DateObject)) @@ -110,11 +137,11 @@ public static string LuisTime(int hour, int min, int second = Constants.InvalidS if (second == Constants.InvalidSecond) { - result = string.Join(Constants.TimeTimexConnector, hour.ToString("D2"), min.ToString("D2")); + result = string.Join(Constants.TimeTimexConnector, hour.ToString("D2", CultureInfo.InvariantCulture), min.ToString("D2", CultureInfo.InvariantCulture)); } else { - result = string.Join(Constants.TimeTimexConnector, hour.ToString("D2"), min.ToString("D2"), second.ToString("D2")); + result = string.Join(Constants.TimeTimexConnector, hour.ToString("D2", CultureInfo.InvariantCulture), min.ToString("D2", CultureInfo.InvariantCulture), second.ToString("D2", CultureInfo.InvariantCulture)); } return result; @@ -130,14 +157,24 @@ public static string LuisDateTime(DateObject time) return $"{LuisDate(time)}{Constants.TimeTimexPrefix}{LuisTime(time.Hour, time.Minute, time.Second)}"; } - // Only handle TimeSpan which is less than one day + // If a timex is given and it contains minutes and seconds, the result also includes minutes and seconds. + // Otherwise the result does not include minutes and seconds if they are zero. + public static string LuisDateShortTime(DateObject time, string timex = null) + { + var hasMin = timex != null ? timex.Contains(Constants.TimeTimexConnector) : false; + var hasSec = timex != null ? timex.Split(Constants.TimeTimexConnector[0]).Length > 2 : false; + + return $"{LuisDate(time)}{FormatShortTime(time, hasMin, hasSec)}"; + } + + // Also handle TimeSpans which are more than one day public static string LuisTimeSpan(System.TimeSpan timeSpan) { var timexBuilder = new StringBuilder($"{Constants.GeneralPeriodPrefix}{Constants.TimeTimexPrefix}"); - if (timeSpan.Hours > 0) + if (timeSpan.Days > 0 || timeSpan.Hours > 0) { - timexBuilder.Append($"{timeSpan.Hours}H"); + timexBuilder.Append($"{(timeSpan.Days * Constants.DayHourCount) + timeSpan.Hours}H"); } if (timeSpan.Minutes > 0) @@ -155,12 +192,21 @@ public static string LuisTimeSpan(System.TimeSpan timeSpan) public static string FormatDate(DateObject date) { - return string.Join(Constants.DateTimexConnector, date.Year.ToString("D4"), date.Month.ToString("D2"), date.Day.ToString("D2")); + return string.Join(Constants.DateTimexConnector, date.Year.ToString("D4", CultureInfo.InvariantCulture), date.Month.ToString("D2", CultureInfo.InvariantCulture), date.Day.ToString("D2", CultureInfo.InvariantCulture)); } public static string FormatTime(DateObject time) { - return string.Join(Constants.TimeTimexConnector, time.Hour.ToString("D2"), time.Minute.ToString("D2"), time.Second.ToString("D2")); + return string.Join(Constants.TimeTimexConnector, time.Hour.ToString("D2", CultureInfo.InvariantCulture), time.Minute.ToString("D2", CultureInfo.InvariantCulture), time.Second.ToString("D2", CultureInfo.InvariantCulture)); + } + + // Does not return minutes and seconds if they are zero + public static string FormatShortTime(DateObject time, bool keepMin = false, bool keepSec = false) + { + int hour = time.Hour, + min = (keepMin || time.Minute > 0) ? time.Minute : Constants.InvalidMinute, + sec = (keepSec || time.Second > 0) ? time.Second : Constants.InvalidSecond; + return ShortTime(hour, min, sec); } public static string FormatDateTime(DateObject datetime) @@ -225,16 +271,16 @@ public static string AllStringToPm(string timeStr) public static string ToPm(string timeStr) { bool hasT = false; - if (timeStr.StartsWith(Constants.TimeTimexPrefix)) + if (timeStr.StartsWith(Constants.TimeTimexPrefix, StringComparison.Ordinal)) { hasT = true; timeStr = timeStr.Substring(1); } var splits = timeStr.Split(new[] { Constants.TimeTimexConnector }, StringSplitOptions.RemoveEmptyEntries); - var hour = int.Parse(splits[0]); + var hour = int.Parse(splits[0], CultureInfo.InvariantCulture); hour = hour >= Constants.HalfDayHourCount ? hour - Constants.HalfDayHourCount : hour + Constants.HalfDayHourCount; - splits[0] = hour.ToString("D2"); + splits[0] = hour.ToString("D2", CultureInfo.InvariantCulture); return hasT ? Constants.TimeTimexPrefix + string.Join(Constants.TimeTimexConnector, splits) : string.Join(Constants.TimeTimexConnector, splits); } @@ -246,5 +292,18 @@ public static string ToIsoWeekTimex(DateObject date) return $"{thursday.Year:D4}-W{cal.GetWeekOfYear(thursday, CalendarWeekRule.FirstFourDayWeek, DayOfWeek.Monday):D2}"; } + + public static DateTimeResolutionResult ResolveEndOfDay(string timexPrefix, DateObject futureDate, DateObject pastDate) + { + var ret = new DateTimeResolutionResult + { + Timex = timexPrefix + "T23:59:59", // Due to .NET framework design + FutureValue = futureDate.Date.AddDays(1).AddSeconds(-1), + PastValue = pastDate.Date.AddDays(1).AddSeconds(-1), + Success = true, + }; + + return ret; + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeResolutionResult.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeResolutionResult.cs index df5e34792d..66ad9e9fd9 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeResolutionResult.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeResolutionResult.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.DateTime { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DurationParsingUtil.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DurationParsingUtil.cs index 9d99c3ef4d..c4d0d519b0 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DurationParsingUtil.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DurationParsingUtil.cs @@ -1,7 +1,11 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; +using System.Text.RegularExpressions; using DateObject = System.DateTime; @@ -64,7 +68,7 @@ public static bool IsDateDuration(string timex) { var dict = ResolveDurationTimex(timex); - return dict.Keys.All(unit => !IsTimeDurationUnit(unit)); + return dict.All(unit => !IsTimeDurationUnit(unit.Item1)); } public static DateObject ShiftDateTime(string timex, DateObject referenceDateTime, bool future) @@ -107,15 +111,80 @@ public static DateObject GetNextBusinessDay(DateObject startDate, bool isFuture return date; } - private static DateObject GetShiftResult(IImmutableDictionary timexUnitMap, DateObject referenceDate, bool future) + public static bool IsLessThanDay(string unit) + { + return unit.Equals("S", StringComparison.Ordinal) || + unit.Equals("M", StringComparison.Ordinal) || + unit.Equals("H", StringComparison.Ordinal); + } + + public static DateTimeResolutionResult ParseInexactNumberUnit(string text, IDurationParserConfiguration config) + { + return ParseInexactNumberUnit(text, config.InexactNumberUnitRegex, config.UnitMap, config.UnitValueMap); + } + + public static DateTimeResolutionResult ParseInexactNumberUnit(string text, ICJKDurationParserConfiguration config) + { + return ParseInexactNumberUnit(text, config.SomeRegex, config.UnitMap, config.UnitValueMap, isCJK: true); + } + + private static DateTimeResolutionResult ParseInexactNumberUnit(string text, Regex inexactNumberUnitRegex, IImmutableDictionary unitMap, IImmutableDictionary unitValueMap, bool isCJK = false) + { + var ret = new DateTimeResolutionResult(); + + var match = inexactNumberUnitRegex.Match(text); + if (match.Success) + { + // set the inexact number "few", "some" to 3 for now + double numVal = match.Groups["NumTwoTerm"].Success ? 2 : 3; + var srcUnit = match.Groups["unit"].Value; + + if (unitMap.ContainsKey(srcUnit)) + { + var unitStr = unitMap[srcUnit]; + + if (numVal > 1000 && (unitStr.Equals(Constants.TimexYear, StringComparison.Ordinal) || + unitStr.Equals(Constants.TimexMonthFull, StringComparison.Ordinal) || + unitStr.Equals(Constants.TimexWeek, StringComparison.Ordinal))) + { + return ret; + } + + ret.Timex = TimexUtility.GenerateDurationTimex(numVal, unitStr, IsLessThanDay(unitStr)); + + // In CJK implementation unitValueMap uses the unitMap values as keys while + // in standard implementation unitMap and unitValueMap have the same keys. + var unitValue = isCJK ? unitValueMap[unitStr] : unitValueMap[srcUnit]; + ret.FutureValue = ret.PastValue = numVal * unitValue; + ret.Success = true; + } + else if (match.Groups[Constants.BusinessDayGroupName].Success) + { + ret.Timex = TimexUtility.GenerateDurationTimex(numVal, Constants.TimexBusinessDay, false); + + // The line below was containing this.config.UnitValueMap[srcUnit.Split()[1]] + // it was updated to accommodate single word "business day" expressions. + ret.FutureValue = ret.PastValue = numVal * unitValueMap[srcUnit.Split()[srcUnit.Split().Length - 1]]; + ret.Success = true; + } + } + + return ret; + } + + private static DateObject GetShiftResult(List<(string, double)> timexUnitMap, DateObject referenceDate, bool future) { var result = referenceDate; var futureOrPast = future ? 1 : -1; + // timexUnitMap needs to be an ordered collection because the result depends on the order of the shifts. + // For example "1 month 21 days later" produces different results depending on whether the day or month shift is applied first + // (when the reference month and the following month have different numbers of days). foreach (var pair in timexUnitMap) { - var unitStr = pair.Key; - var number = pair.Value; + var unitStr = pair.Item1; + var number = pair.Item2; + switch (unitStr) { case "H": @@ -150,9 +219,9 @@ private static DateObject GetShiftResult(IImmutableDictionary ti return result; } - private static ImmutableDictionary ResolveDurationTimex(string timexStr) + private static List<(string, double)> ResolveDurationTimex(string timexStr) { - var ret = new Dictionary(); + var ret = new List<(string, double)>(); // Resolve duration timex, such as P21DT2H (21 days 2 hours) var durationStr = timexStr.Replace(Constants.GeneralPeriodPrefix, string.Empty); @@ -160,14 +229,14 @@ private static ImmutableDictionary ResolveDurationTimex(string t var isTime = false; // Resolve business days - if (durationStr.EndsWith(Constants.TimexBusinessDay)) + if (durationStr.EndsWith(Constants.TimexBusinessDay, StringComparison.Ordinal)) { if (double.TryParse(durationStr.Substring(0, durationStr.Length - 2), out var numVal)) { - ret.Add(Constants.TimexBusinessDay, numVal); + ret.Add((Constants.TimexBusinessDay, numVal)); } - return ret.ToImmutableDictionary(); + return ret; } for (var idx = 0; idx < durationStr.Length; idx++) @@ -183,7 +252,7 @@ private static ImmutableDictionary ResolveDurationTimex(string t var numStr = durationStr.Substring(numberStart, idx - numberStart); if (!double.TryParse(numStr, out var number)) { - return new Dictionary().ToImmutableDictionary(); + return new List<(string, double)>(); } var srcTimexUnit = durationStr.Substring(idx, 1); @@ -192,14 +261,14 @@ private static ImmutableDictionary ResolveDurationTimex(string t srcTimexUnit = Constants.TimexMonthFull; } - ret.Add(srcTimexUnit, number); + ret.Add((srcTimexUnit, number)); } numberStart = idx + 1; } } - return ret.ToImmutableDictionary(); + return ret; } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/ExtractResultExtension.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/ExtractResultExtension.cs index d60edee212..0ac043f8bb 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/ExtractResultExtension.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/ExtractResultExtension.cs @@ -1,4 +1,11 @@ -namespace Microsoft.Recognizers.Text.DateTime +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.DateTime { public static class ExtractResultExtension { @@ -12,5 +19,77 @@ public static bool IsCover(this ExtractResult er1, ExtractResult er2) return (er2.Start < er1.Start && er2.Start + er2.Length >= er1.Start + er1.Length) || (er2.Start <= er1.Start && er2.Start + er2.Length > er1.Start + er1.Length); } + + public static List MergeAllResults(List results) + { + var ret = new List(); + + results = results.OrderBy(s => s.Start).ThenByDescending(s => s.Length).ToList(); + var mergedResults = new List(); + foreach (var result in results) + { + if (result != null) + { + bool shouldAdd = true; + var resStart = result.Start; + var resEnd = resStart + result.Length; + for (var index = 0; index < mergedResults.Count && shouldAdd; index++) + { + var mergedStart = mergedResults[index].Start; + var mergedEnd = mergedStart + mergedResults[index].Length; + + // It is included in one of the current results + if (resStart >= mergedStart && resEnd <= mergedEnd) + { + shouldAdd = false; + } + + // If it contains overlaps + if (resStart > mergedStart && resStart < mergedEnd) + { + shouldAdd = false; + } + + // It includes one of the results and should replace the included one + if (resStart <= mergedStart && resEnd >= mergedEnd) + { + shouldAdd = false; + mergedResults[index] = result; + } + } + + if (shouldAdd) + { + mergedResults.Add(result); + } + } + } + + return mergedResults; + } + + public static List FilterAmbiguity(List extractResults, string text, Dictionary ambiguityFiltersDict) + { + if (ambiguityFiltersDict != null) + { + foreach (var regex in ambiguityFiltersDict) + { + for (int i = extractResults.Count - 1; i >= 0; i--) + { + var er = extractResults[i]; + if (regex.Key.IsMatch(er.Text)) + { + var matches = regex.Value.Matches(text).Cast(); + if (matches.Any(m => m.Index < er.Start + er.Length && m.Index + m.Length > er.Start)) + { + extractResults.RemoveAt(i); + } + } + } + } + } + + return extractResults; + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/HolidayFunctions.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/HolidayFunctions.cs new file mode 100644 index 0000000000..c4c28264c0 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/HolidayFunctions.cs @@ -0,0 +1,173 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Globalization; + +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + internal static class HolidayFunctions + { + + // Holi an Diwali dates { year, (holy_month, holy_day, diwali_month, diwali_day) } + // @TODO move declarations to base DateTime or implement lunar calculation + private static readonly IDictionary> HoliDiwaliRakshabandhanBaisakhiDates = + Definitions.Hindi.DateTimeDefinitions.HoliDiwaliRakshabandhanBaisakhiDates.ToImmutableDictionary(); + + public enum IslamicHolidayType + { + /// Ramadan + Ramadan = 0, + + /// Eid al-Adha (Feast of the Sacrifice) + Sacrifice, + + /// Eid al-Fitr (Festival of Breaking the Fast) + EidAlFitr, + + /// Islamic New Year + NewYear, + } + + public static DateObject CalculateHolidayByEaster(int year, int days = 0) + { + int day = 0; + int month = 3; + + int g = year % 19; + int c = year / 100; + int h = (c - (int)(c / 4) - (int)(((8 * c) + 13) / 25) + (19 * g) + 15) % 30; + int i = h - ((int)(h / 28) * (1 - ((int)(h / 28) * (int)(29 / (h + 1)) * (int)((21 - g) / 11)))); + + day = i - ((year + (int)(year / 4) + i + 2 - c + (int)(c / 4)) % 7) + 28; + + if (day > 31) + { + month++; + day -= 31; + } + + return DateObject.MinValue.SafeCreateFromValue(year, month, day).AddDays(days); + } + + public static DateObject CalculateAdventDate(int year, int days = 0) + { + DateObject xmas = new DateObject(year, 12, 25); + int weekday = (int)xmas.DayOfWeek; + + DateObject result; + + if (weekday == 0) + { + result = xmas.AddDays(-7 - days); + } + else + { + result = xmas.AddDays(-weekday - days); + } + + return result; + } + + // Holi and Diwali follow the lunar calendar + // their dates have been included in the dictionary HoliDiwaliDates + public static DateObject CalculateHoliDiwaliDate(int year, bool isHoli) + { + int day = 1; + int month = 1; + if (year >= 1900 && year < 2100) + { + var dates = HoliDiwaliRakshabandhanBaisakhiDates[year].ToImmutableList(); + if (isHoli) + { + month = dates[0]; + day = dates[1]; + } + else + { + month = dates[2]; + day = dates[3]; + } + } + + return DateObject.MinValue.SafeCreateFromValue(year, month, day); + } + + // Rakshabandhan and Vaishakhi also follow the lunar calendar + // their dates have been included in the dictionary HoliDiwaliDates + public static DateObject CalculateRakshaBandhanVaishakhiDate(int year, bool isRakshabandhan) + { + int day = 1; + int month = 1; + if (year >= 1900 && year < 2100) + { + var dates = HoliDiwaliRakshabandhanBaisakhiDates[year].ToImmutableList(); + if (isRakshabandhan) + { + month = dates[4]; + day = dates[5]; + } + else + { + month = dates[6]; + day = dates[7]; + } + } + + return DateObject.MinValue.SafeCreateFromValue(year, month, day); + } + + // Calculates the exact gregorian date for the given holiday using only gregorian year and exact hijri date + public static DateObject IslamicHoliday(int year, IslamicHolidayType holidayType) + { + int y = 0; + int m = 0; + int d = 0; + + int hijriDay = 1; + int hijriMonth = 1; + int hijriYear = 1; + + var gregorian = new GregorianCalendar(); + var hijri = new HijriCalendar(); + + switch (holidayType) + { + case IslamicHolidayType.Ramadan: + hijriDay = 1; + hijriMonth = 9; + break; + case IslamicHolidayType.Sacrifice: + hijriDay = 10; + hijriMonth = 12; + break; + case IslamicHolidayType.EidAlFitr: + hijriDay = 1; + hijriMonth = 10; + break; + case IslamicHolidayType.NewYear: + hijriDay = 1; + hijriMonth = 1; + break; + } + + for (hijriYear = 1; hijriYear <= 9999; hijriYear++) + { + var hijriDate = new DateObject(hijriYear, hijriMonth, hijriDay, hijri); + y = gregorian.GetYear(hijriDate); + m = gregorian.GetMonth(hijriDate); + d = gregorian.GetDayOfMonth(hijriDate); + + if (y == year) + { + break; + } + } + + return DateObject.MinValue.SafeCreateFromValue(y, m, d); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/IDateTimeUtilityConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/IDateTimeUtilityConfiguration.cs index 7995da742c..31ec6e48b9 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/IDateTimeUtilityConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/IDateTimeUtilityConfiguration.cs @@ -1,4 +1,7 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime.Utilities { @@ -28,6 +31,8 @@ public interface IDateTimeUtilityConfiguration Regex CommonDatePrefixRegex { get; } + Regex RangePrefixRegex { get; } + bool CheckBothBeforeAfter { get; } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/MatchingUtil.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/MatchingUtil.cs index bd8a394fc6..48524ef954 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/MatchingUtil.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/MatchingUtil.cs @@ -1,6 +1,10 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; +using System.Reflection; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Matcher; @@ -10,28 +14,25 @@ namespace Microsoft.Recognizers.Text.DateTime { public static class MatchingUtil { - public static bool GetAgoLaterIndex(string text, Regex regex, out int index) - { - index = -1; - var match = regex.MatchBegin(text, trim: true); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + private static readonly Regex InvalidDayNumberPrefix = + new Regex(Definitions.BaseDateTime.InvalidDayNumberPrefix, RegexFlags, RegexTimeOut); - if (match.Success) - { - index = match.Index + match.Length; - return true; - } + private static TimeSpan RegexTimeOut => DateTimeRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); - return false; + public static bool IsInvalidDayNumberPrefix(string prefix) + { + return InvalidDayNumberPrefix.IsMatch(prefix); } - public static bool GetAgoLaterIndexInBeforeString(string text, Regex regex, out int index) + public static bool GetAgoLaterIndex(string text, Regex regex, out int index, bool inSuffix) { index = -1; - var match = regex.MatchEnd(text, trim: true); + var match = inSuffix ? regex.MatchBegin(text, trim: true) : regex.MatchEnd(text, trim: true); if (match.Success) { - index = match.Index; + index = match.Index + (inSuffix ? match.Length : 0); return true; } @@ -51,14 +52,9 @@ public static bool GetTermIndex(string text, Regex regex, out int index) return false; } - public static bool ContainsAgoLaterIndex(string text, Regex regex) - { - return GetAgoLaterIndex(text, regex, out var index); - } - - public static bool ContainsAgoLaterIndexInBeforeString(string text, Regex regex) + public static bool ContainsAgoLaterIndex(string text, Regex regex, bool inSuffix) { - return GetAgoLaterIndexInBeforeString(text, regex, out var index); + return GetAgoLaterIndex(text, regex, out var index, inSuffix); } public static bool ContainsTermIndex(string text, Regex regex) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/MergedParserUtil.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/MergedParserUtil.cs new file mode 100644 index 0000000000..6823212722 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/MergedParserUtil.cs @@ -0,0 +1,714 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public static class MergedParserUtil + { + public const string ParserTypeName = "datetimeV2"; + + public static readonly string DateMinString = DateTimeFormatUtil.FormatDate(DateObject.MinValue); + + public static List FilterResults(string query, List candidateResults) + { + return candidateResults; + } + + public static string CombineMod(string originalMod, string newMod) + { + var combinedMod = newMod; + + if (!string.IsNullOrEmpty(originalMod) && !originalMod.Equals(newMod, StringComparison.Ordinal)) + { + combinedMod = $"{newMod}-{originalMod}"; + } + + return combinedMod; + } + + public static bool IsDurationWithAgoAndLater(ExtractResult er) + { + return er.Metadata != null && er.Metadata.IsDurationWithAgoAndLater; + } + + public static void AddSingleDateTimeToResolution(Dictionary resolutionDic, string type, string mod, + Dictionary res) + { + + // If an "invalid" Date or DateTime is extracted, it should not have an assigned resolution. + // Only valid entities should pass this condition. + if (resolutionDic.ContainsKey(type) && + !resolutionDic[type].StartsWith(DateMinString, StringComparison.Ordinal)) + { + if (!string.IsNullOrEmpty(mod)) + { + if (mod.StartsWith(Constants.BEFORE_MOD, StringComparison.Ordinal)) + { + res.Add(DateTimeResolutionKey.End, resolutionDic[type]); + return; + } + + if (mod.StartsWith(Constants.AFTER_MOD, StringComparison.Ordinal)) + { + res.Add(DateTimeResolutionKey.Start, resolutionDic[type]); + return; + } + + if (mod.StartsWith(Constants.SINCE_MOD, StringComparison.Ordinal)) + { + res.Add(DateTimeResolutionKey.Start, resolutionDic[type]); + return; + } + + if (mod.StartsWith(Constants.UNTIL_MOD, StringComparison.Ordinal)) + { + res.Add(DateTimeResolutionKey.End, resolutionDic[type]); + return; + } + } + + res.Add(ResolutionKey.Value, resolutionDic[type]); + } + } + + public static void AddPeriodToResolution(Dictionary resolutionDic, string startType, string endType, string mod, + Dictionary res) + { + var start = string.Empty; + var end = string.Empty; + + if (resolutionDic.ContainsKey(startType)) + { + start = resolutionDic[startType]; + if (start.Equals(Constants.InvalidDateString, StringComparison.Ordinal)) + { + return; + } + } + + if (resolutionDic.ContainsKey(endType)) + { + end = resolutionDic[endType]; + if (end.Equals(Constants.InvalidDateString, StringComparison.Ordinal)) + { + return; + } + } + + if (!string.IsNullOrEmpty(mod)) + { + // For the 'before' mod + // 1. Cases like "Before December", the start of the period should be the end of the new period, not the start + // (but not for cases like "Before the end of December") + // 2. Cases like "More than 3 days before today", the date point should be the end of the new period + if (mod.StartsWith(Constants.BEFORE_MOD, StringComparison.Ordinal)) + { + if (!string.IsNullOrEmpty(start) && !string.IsNullOrEmpty(end) && !mod.EndsWith(Constants.LATE_MOD, StringComparison.Ordinal)) + { + res.Add(DateTimeResolutionKey.End, start); + } + else + { + res.Add(DateTimeResolutionKey.End, end); + } + + return; + } + + // For the 'after' mod + // 1. Cases like "After January", the end of the period should be the start of the new period, not the end + // (but not for cases like "After the beginning of January") + // 2. Cases like "More than 3 days after today", the date point should be the start of the new period + if (mod.StartsWith(Constants.AFTER_MOD, StringComparison.Ordinal)) + { + if (!string.IsNullOrEmpty(start) && !string.IsNullOrEmpty(end) && !mod.EndsWith(Constants.EARLY_MOD, StringComparison.Ordinal)) + { + res.Add(DateTimeResolutionKey.Start, end); + } + else + { + res.Add(DateTimeResolutionKey.Start, start); + } + + return; + } + + // For the 'since' mod, the start of the period should be the start of the new period, not the end + if (mod.StartsWith(Constants.SINCE_MOD, StringComparison.Ordinal)) + { + res.Add(DateTimeResolutionKey.Start, start); + return; + } + + // For the 'until' mod, the end of the period should be the end of the new period, not the start + if (mod.StartsWith(Constants.UNTIL_MOD, StringComparison.Ordinal)) + { + res.Add(DateTimeResolutionKey.End, end); + return; + } + } + + if (!AreUnresolvedDates(start, end)) + { + res.Add(DateTimeResolutionKey.Start, start); + res.Add(DateTimeResolutionKey.End, end); + + // Preserving any present timex values. Useful for Holiday weekend where the timex is known during parsing. + if (resolutionDic.ContainsKey(DateTimeResolutionKey.Timex)) + { + res.Add(DateTimeResolutionKey.Timex, resolutionDic[DateTimeResolutionKey.Timex]); + } + } + } + + public static void AddAltPeriodToResolution(Dictionary resolutionDic, string mod, Dictionary res) + { + if (resolutionDic.ContainsKey(TimeTypeConstants.START_DATETIME) || resolutionDic.ContainsKey(TimeTypeConstants.END_DATETIME)) + { + AddPeriodToResolution(resolutionDic, TimeTypeConstants.START_DATETIME, TimeTypeConstants.END_DATETIME, mod, res); + } + else if (resolutionDic.ContainsKey(TimeTypeConstants.START_DATE) || resolutionDic.ContainsKey(TimeTypeConstants.END_DATE)) + { + AddPeriodToResolution(resolutionDic, TimeTypeConstants.START_DATE, TimeTypeConstants.END_DATE, mod, res); + } + else if (resolutionDic.ContainsKey(TimeTypeConstants.START_TIME) || resolutionDic.ContainsKey(TimeTypeConstants.END_TIME)) + { + AddPeriodToResolution(resolutionDic, TimeTypeConstants.START_TIME, TimeTypeConstants.END_TIME, mod, res); + } + } + + public static void AddAltSingleDateTimeToResolution(Dictionary resolutionDic, string type, string mod, + Dictionary res) + { + if (resolutionDic.ContainsKey(TimeTypeConstants.DATE)) + { + AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.DATE, mod, res); + } + else if (resolutionDic.ContainsKey(TimeTypeConstants.DATETIME)) + { + AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.DATETIME, mod, res); + } + else if (resolutionDic.ContainsKey(TimeTypeConstants.TIME)) + { + AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.TIME, mod, res); + } + } + + public static bool AreUnresolvedDates(string startDate, string endDate) + { + return string.IsNullOrEmpty(startDate) || string.IsNullOrEmpty(endDate) || + startDate.StartsWith(DateMinString, StringComparison.Ordinal) || + endDate.StartsWith(DateMinString, StringComparison.Ordinal); + } + + public static DateTimeParseResult SetInclusivePeriodEnd(DateTimeParseResult slot) + { + if (slot.Type == $"{ParserTypeName}.{Constants.SYS_DATETIME_DATEPERIOD}") + { + var timexComponents = slot.TimexStr.Split(Constants.DatePeriodTimexSplitter, StringSplitOptions.RemoveEmptyEntries); + + // Only handle DatePeriod like "(StartDate,EndDate,Duration)" + if (timexComponents.Length == 3) + { + var value = (SortedDictionary)slot.Value; + var altTimex = string.Empty; + + if (value != null && value.ContainsKey(ResolutionKey.ValueSet)) + { + if (value[ResolutionKey.ValueSet] is IList> valueSet && valueSet.Any()) + { + foreach (var values in valueSet) + { + // This is only a sanity check, as here we only handle DatePeriod like "(StartDate,EndDate,Duration)" + if (values.ContainsKey(DateTimeResolutionKey.Start) && values.ContainsKey(DateTimeResolutionKey.End) && + values.ContainsKey(DateTimeResolutionKey.Timex)) + { + var startDate = DateObject.Parse(values[DateTimeResolutionKey.Start], CultureInfo.InvariantCulture); + var endDate = DateObject.Parse(values[DateTimeResolutionKey.End], CultureInfo.InvariantCulture); + var durationStr = timexComponents[2]; + var datePeriodTimexType = TimexUtility.GetDatePeriodTimexType(durationStr); + + endDate = TimexUtility.OffsetDateObject(endDate, offset: 1, timexType: datePeriodTimexType); + values[DateTimeResolutionKey.End] = DateTimeFormatUtil.LuisDate(endDate); + values[DateTimeResolutionKey.Timex] = + TimexUtility.GenerateEndInclusiveTimex(slot.TimexStr, datePeriodTimexType, startDate, endDate); + + if (string.IsNullOrEmpty(altTimex)) + { + altTimex = values[DateTimeResolutionKey.Timex]; + } + } + } + } + } + + slot.Value = value; + slot.TimexStr = altTimex; + } + } + + return slot; + } + + public static DateTimeParseResult SetParseResult(DateTimeParseResult slot, bool hasMod, IDateTimeOptionsConfiguration config) + { + slot.Value = DateTimeResolution(slot, config); + + // Change the type at last for the after or before modes + slot.Type = $"{ParserTypeName}.{DetermineDateTimeType(slot.Type, hasMod, config)}"; + return slot; + } + + public static string DetermineDateTimeType(string type, bool hasMod, IDateTimeOptionsConfiguration config) + { + if ((config.Options & DateTimeOptions.SplitDateAndTime) != 0) + { + if (type.Equals(Constants.SYS_DATETIME_DATETIME, StringComparison.Ordinal)) + { + return Constants.SYS_DATETIME_TIME; + } + } + else + { + if (hasMod) + { + if (type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal)) + { + return Constants.SYS_DATETIME_DATEPERIOD; + } + + if (type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) + { + return Constants.SYS_DATETIME_TIMEPERIOD; + } + + if (type.Equals(Constants.SYS_DATETIME_DATETIME, StringComparison.Ordinal)) + { + return Constants.SYS_DATETIME_DATETIMEPERIOD; + } + } + } + + return type; + } + + public static string DetermineSourceEntityType(string sourceType, string newType, bool hasMod) + { + if (!hasMod) + { + return null; + } + + if (!newType.Equals(sourceType, StringComparison.Ordinal)) + { + return Constants.SYS_DATETIME_DATETIMEPOINT; + } + + if (newType.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal)) + { + return Constants.SYS_DATETIME_DATETIMEPERIOD; + } + + return null; + } + + public static SortedDictionary DateTimeResolution(DateTimeParseResult slot, IDateTimeOptionsConfiguration config) + { + if (slot == null) + { + return null; + } + + var resolutions = new List>(); + var res = new Dictionary(); + + var type = slot.Type; + var timex = slot.TimexStr; + + var val = (DateTimeResolutionResult)slot.Value; + if (val == null) + { + return null; + } + + var isLunar = val.IsLunar; + var mod = val.Mod; + string list = null; + + // Resolve dates list for date periods + if (slot.Type.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal) && val.List != null) + { + list = string.Join(",", val.List.Select(o => DateTimeFormatUtil.LuisDate((DateObject)o)).ToArray()); + } + + // With modifier, output Type might not be the same with type in resolution result + // For example, if the resolution type is "date", with modifier the output type should be "daterange" + var typeOutput = DetermineDateTimeType(slot.Type, hasMod: !string.IsNullOrEmpty(mod), config); + + var sourceEntity = DetermineSourceEntityType(slot.Type, typeOutput, val.HasRangeChangingMod); + + var comment = val.Comment; + + // The following should be added to res first, since ResolveAmPm requires these fields. + AddResolutionFields(res, DateTimeResolutionKey.Timex, timex); + AddResolutionFields(res, Constants.Comment, comment); + AddResolutionFields(res, DateTimeResolutionKey.Mod, mod); + AddResolutionFields(res, ResolutionKey.Type, typeOutput); + AddResolutionFields(res, DateTimeResolutionKey.IsLunar, isLunar ? isLunar.ToString(CultureInfo.InvariantCulture) : string.Empty); + + var hasTimeZone = false; + + // For standalone timezone entity recognition, we generate TimeZoneResolution for each entity we extracted. + // We also merge time entity with timezone entity and add the information in TimeZoneResolution to every DateTime resolutions. + if (val.TimeZoneResolution != null) + { + if (slot.Type.Equals(Constants.SYS_DATETIME_TIMEZONE, StringComparison.Ordinal)) + { + // single timezone + AddResolutionFields(res, Constants.ResolveTimeZone, new Dictionary + { + { ResolutionKey.Value, val.TimeZoneResolution.Value }, + { Constants.UtcOffsetMinsKey, val.TimeZoneResolution.UtcOffsetMins.ToString(CultureInfo.InvariantCulture) }, + }); + } + else + { + // timezone as clarification of datetime + hasTimeZone = true; + AddResolutionFields(res, Constants.TimeZone, val.TimeZoneResolution.Value); + AddResolutionFields(res, Constants.TimeZoneText, val.TimeZoneResolution.TimeZoneText); + AddResolutionFields(res, Constants.UtcOffsetMinsKey, + val.TimeZoneResolution.UtcOffsetMins.ToString(CultureInfo.InvariantCulture)); + } + } + + var pastResolutionStr = ((DateTimeResolutionResult)slot.Value).PastResolution; + var futureResolutionStr = ((DateTimeResolutionResult)slot.Value).FutureResolution; + + if (typeOutput == Constants.SYS_DATETIME_DATETIMEALT && pastResolutionStr.Count > 0) + { + typeOutput = DetermineResolutionDateTimeType(pastResolutionStr); + } + + var resolutionPast = GenerateResolution(type, pastResolutionStr, mod); + var resolutionFuture = GenerateResolution(type, futureResolutionStr, mod); + + if ((config.Options & DateTimeOptions.TasksMode) != 0) + { + if (type.Equals(Constants.SYS_DATETIME_SET, StringComparison.Ordinal)) + { + pastResolutionStr = ((DateTimeResolutionResult)slot.Value).PastResolution; + futureResolutionStr = ((DateTimeResolutionResult)slot.Value).FutureResolution; + + resolutionPast = TasksModeSetHandler.TasksModeGenerateResolutionSetParser(pastResolutionStr, mod, timex); + resolutionFuture = TasksModeSetHandler.TasksModeGenerateResolutionSetParser(futureResolutionStr, mod, timex); + } + } + + // If past and future are same, keep only one + if (resolutionFuture.OrderBy(t => t.Key).Select(t => t.Value) + .SequenceEqual(resolutionPast.OrderBy(t => t.Key).Select(t => t.Value))) + { + if (resolutionPast.Count > 0) + { + AddResolutionFields(res, Constants.Resolve, resolutionPast); + } + } + else + { + if (resolutionPast.Count > 0) + { + AddResolutionFields(res, Constants.ResolveToPast, resolutionPast); + } + + if (resolutionFuture.Count > 0) + { + AddResolutionFields(res, Constants.ResolveToFuture, resolutionFuture); + } + } + + // If 'ampm', double our resolution accordingly + if (!string.IsNullOrEmpty(comment) && comment.Equals(Constants.Comment_AmPm, StringComparison.Ordinal)) + { + if (res.ContainsKey(Constants.Resolve)) + { + ResolveAmpm(res, Constants.Resolve); + } + else + { + ResolveAmpm(res, Constants.ResolveToPast); + ResolveAmpm(res, Constants.ResolveToFuture); + } + } + + // If WeekOf and in CalendarMode, modify the past part of our resolution + if ((config.Options & DateTimeOptions.CalendarMode) != 0 && + !string.IsNullOrEmpty(comment) && comment.Equals(Constants.Comment_WeekOf, StringComparison.Ordinal)) + { + ResolveWeekOf(res, Constants.ResolveToPast); + } + + if (!string.IsNullOrEmpty(comment) && TimexUtility.HasDoubleTimex(comment)) + { + TimexUtility.ProcessDoubleTimex(res, Constants.ResolveToFuture, Constants.ResolveToPast, timex); + } + + foreach (var p in res) + { + if (p.Value is Dictionary dictionary) + { + var value = new Dictionary(); + + AddResolutionFields(value, DateTimeResolutionKey.Timex, timex); + AddResolutionFields(value, DateTimeResolutionKey.Mod, mod); + AddResolutionFields(value, ResolutionKey.Type, typeOutput); + AddResolutionFields(value, DateTimeResolutionKey.IsLunar, + isLunar ? isLunar.ToString(CultureInfo.InvariantCulture) : string.Empty); + AddResolutionFields(value, DateTimeResolutionKey.List, list); + AddResolutionFields(value, DateTimeResolutionKey.SourceEntity, sourceEntity); + + if (hasTimeZone) + { + AddResolutionFields(value, Constants.TimeZone, val.TimeZoneResolution.Value); + AddResolutionFields(value, Constants.TimeZoneText, val.TimeZoneResolution.TimeZoneText); + AddResolutionFields(value, Constants.UtcOffsetMinsKey, + val.TimeZoneResolution.UtcOffsetMins.ToString(CultureInfo.InvariantCulture)); + } + + foreach (var q in dictionary) + { + value[q.Key] = q.Value; + } + + resolutions.Add(value); + } + } + + if (resolutionPast.Count == 0 && resolutionFuture.Count == 0 && val.TimeZoneResolution == null) + { + var notResolved = new Dictionary + { + { + DateTimeResolutionKey.Timex, timex + }, + { + ResolutionKey.Type, typeOutput + }, + { + ResolutionKey.Value, "not resolved" + }, + }; + + resolutions.Add(notResolved); + } + + return new SortedDictionary { { ResolutionKey.ValueSet, resolutions } }; + } + + public static List DateTimeResolutionForSplit(DateTimeParseResult slot, IDateTimeOptionsConfiguration config) + { + var results = new List(); + if (((DateTimeResolutionResult)slot.Value).SubDateTimeEntities != null) + { + var subEntities = ((DateTimeResolutionResult)slot.Value).SubDateTimeEntities; + foreach (var subEntity in subEntities) + { + var result = (DateTimeParseResult)subEntity; + result.Start += slot.Start; + results.AddRange(DateTimeResolutionForSplit(result, config)); + } + } + else + { + slot.Value = DateTimeResolution(slot, config); + slot.Type = $"{ParserTypeName}.{DetermineDateTimeType(slot.Type, hasMod: false, config)}"; + results.Add(slot); + } + + return results; + } + + internal static void AddResolutionFields(Dictionary dic, string key, string value) + { + if (!string.IsNullOrEmpty(value)) + { + dic.Add(key, value); + } + } + + internal static void AddResolutionFields(Dictionary dic, string key, object value) + { + if (value != null) + { + dic.Add(key, value); + } + } + + internal static void ResolveAmpm(Dictionary resolutionDic, string keyName) + { + if (resolutionDic.ContainsKey(keyName)) + { + var resolution = (Dictionary)resolutionDic[keyName]; + var resolutionPm = new Dictionary(); + + if (!resolutionDic.ContainsKey(DateTimeResolutionKey.Timex)) + { + return; + } + + var timex = (string)resolutionDic[DateTimeResolutionKey.Timex]; + + resolutionDic.Remove(keyName); + resolutionDic.Add(keyName + "Am", resolution); + + switch ((string)resolutionDic[ResolutionKey.Type]) + { + case Constants.SYS_DATETIME_TIME: + resolutionPm[ResolutionKey.Value] = DateTimeFormatUtil.ToPm(resolution[ResolutionKey.Value]); + resolutionPm[DateTimeResolutionKey.Timex] = DateTimeFormatUtil.ToPm(timex); + break; + + case Constants.SYS_DATETIME_DATETIME: + var split = resolution[ResolutionKey.Value].Split(' '); + resolutionPm[ResolutionKey.Value] = split[0] + " " + DateTimeFormatUtil.ToPm(split[1]); + resolutionPm[DateTimeResolutionKey.Timex] = DateTimeFormatUtil.AllStringToPm(timex); + break; + + case Constants.SYS_DATETIME_TIMEPERIOD: + if (resolution.ContainsKey(DateTimeResolutionKey.Start)) + { + resolutionPm[DateTimeResolutionKey.Start] = DateTimeFormatUtil.ToPm(resolution[DateTimeResolutionKey.Start]); + } + + if (resolution.ContainsKey(DateTimeResolutionKey.End)) + { + resolutionPm[DateTimeResolutionKey.End] = DateTimeFormatUtil.ToPm(resolution[DateTimeResolutionKey.End]); + } + + if (resolution.ContainsKey(DateTimeResolutionKey.Value)) + { + resolutionPm[ResolutionKey.Value] = DateTimeFormatUtil.ToPm(resolution[ResolutionKey.Value]); + } + + resolutionPm[DateTimeResolutionKey.Timex] = DateTimeFormatUtil.AllStringToPm(timex); + break; + + case Constants.SYS_DATETIME_DATETIMEPERIOD: + if (resolution.ContainsKey(DateTimeResolutionKey.Start)) + { + var start = Convert.ToDateTime(resolution[DateTimeResolutionKey.Start], CultureInfo.InvariantCulture); + start = start.Hour == Constants.HalfDayHourCount ? + start.AddHours(-Constants.HalfDayHourCount) : start.AddHours(Constants.HalfDayHourCount); + + resolutionPm[DateTimeResolutionKey.Start] = DateTimeFormatUtil.FormatDateTime(start); + } + + if (resolution.ContainsKey(DateTimeResolutionKey.End)) + { + var end = Convert.ToDateTime(resolution[DateTimeResolutionKey.End], CultureInfo.InvariantCulture); + end = end.Hour == Constants.HalfDayHourCount ? + end.AddHours(-Constants.HalfDayHourCount) : end.AddHours(Constants.HalfDayHourCount); + + resolutionPm[DateTimeResolutionKey.End] = DateTimeFormatUtil.FormatDateTime(end); + } + + resolutionPm[DateTimeResolutionKey.Timex] = DateTimeFormatUtil.AllStringToPm(timex); + break; + } + + resolutionDic.Add(keyName + "Pm", resolutionPm); + } + } + + internal static void ResolveWeekOf(Dictionary resolutionDic, string keyName) + { + if (resolutionDic.ContainsKey(keyName)) + { + var resolution = (Dictionary)resolutionDic[keyName]; + + var monday = DateObject.Parse(resolution[DateTimeResolutionKey.Start], CultureInfo.InvariantCulture); + resolution[DateTimeResolutionKey.Timex] = DateTimeFormatUtil.ToIsoWeekTimex(monday); + + resolutionDic.Remove(keyName); + resolutionDic.Add(keyName, resolution); + } + } + + internal static Dictionary GenerateResolution(string type, Dictionary resolutionDic, string mod) + { + var res = new Dictionary(); + + if (type.Equals(Constants.SYS_DATETIME_DATETIME, StringComparison.Ordinal)) + { + AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.DATETIME, mod, res); + } + else if (type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) + { + AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.TIME, mod, res); + } + else if (type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal)) + { + AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.DATE, mod, res); + } + else if (type.Equals(Constants.SYS_DATETIME_DURATION, StringComparison.Ordinal)) + { + if (resolutionDic.ContainsKey(TimeTypeConstants.DURATION)) + { + res.Add(ResolutionKey.Value, resolutionDic[TimeTypeConstants.DURATION]); + } + } + else if (type.Equals(Constants.SYS_DATETIME_TIMEPERIOD, StringComparison.Ordinal)) + { + AddPeriodToResolution(resolutionDic, TimeTypeConstants.START_TIME, TimeTypeConstants.END_TIME, mod, res); + } + else if (type.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal)) + { + AddPeriodToResolution(resolutionDic, TimeTypeConstants.START_DATE, TimeTypeConstants.END_DATE, mod, res); + } + else if (type.Equals(Constants.SYS_DATETIME_DATETIMEPERIOD, StringComparison.Ordinal)) + { + AddPeriodToResolution(resolutionDic, TimeTypeConstants.START_DATETIME, TimeTypeConstants.END_DATETIME, mod, res); + } + else if (type.Equals(Constants.SYS_DATETIME_DATETIMEALT, StringComparison.Ordinal)) + { + // For a period + if (resolutionDic.Count > 2 || !string.IsNullOrEmpty(mod)) + { + AddAltPeriodToResolution(resolutionDic, mod, res); + } + else + { + // For a datetime point + AddAltSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.DATETIMEALT, mod, res); + } + } + + return res; + } + + private static string DetermineResolutionDateTimeType(Dictionary pastResolutionStr) + { + switch (pastResolutionStr.Keys.First()) + { + case TimeTypeConstants.START_DATE: + return Constants.SYS_DATETIME_DATEPERIOD; + + case TimeTypeConstants.START_DATETIME: + return Constants.SYS_DATETIME_DATETIMEPERIOD; + + case TimeTypeConstants.START_TIME: + return Constants.SYS_DATETIME_TIMEPERIOD; + + default: + // ToLowerInvariant needed for legacy reasons with subtype code. + // @TODO remove in future refactoring of test code and double-check there's no impact in output schema. + return pastResolutionStr.Keys.First().ToLowerInvariant(); + } + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/ModAndDateResult.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/ModAndDateResult.cs index 1ede78f8d6..7844eff50f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/ModAndDateResult.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/ModAndDateResult.cs @@ -1,4 +1,9 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Globalization; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime.Utilities @@ -28,5 +33,56 @@ public ModAndDateResult(DateObject beginDate, DateObject endDate) public string Mod { get; set; } public List DateList { get; set; } + + public static ModAndDateResult GetModAndDate(DateObject beginDate, DateObject endDate, DateObject referenceDate, string timex, bool future) + { + DateObject beginDateResult = beginDate; + DateObject endDateResult = endDate; + var isBusinessDay = timex.EndsWith(Constants.TimexBusinessDay, StringComparison.Ordinal); + var businessDayCount = 0; + List dateList = null; + + if (isBusinessDay) + { + businessDayCount = int.Parse(timex.Substring(1, timex.Length - 3), CultureInfo.InvariantCulture); + } + + if (future) + { + string mod = Constants.AFTER_MOD; + + // For future the beginDate should add 1 first + if (isBusinessDay) + { + beginDateResult = DurationParsingUtil.GetNextBusinessDay(referenceDate); + endDateResult = DurationParsingUtil.GetNthBusinessDay(beginDateResult, businessDayCount - 1, true, out dateList); + endDateResult = endDateResult.AddDays(1); + return new ModAndDateResult(beginDateResult, endDateResult, mod, dateList); + } + else + { + beginDateResult = referenceDate.AddDays(1); + endDateResult = DurationParsingUtil.ShiftDateTime(timex, beginDateResult, true); + return new ModAndDateResult(beginDateResult, endDateResult, mod, null); + } + } + else + { + const string mod = Constants.BEFORE_MOD; + + if (isBusinessDay) + { + endDateResult = DurationParsingUtil.GetNextBusinessDay(endDateResult, false); + beginDateResult = DurationParsingUtil.GetNthBusinessDay(endDateResult, businessDayCount - 1, false, out dateList); + endDateResult = endDateResult.AddDays(1); + return new ModAndDateResult(beginDateResult, endDateResult, mod, dateList); + } + else + { + beginDateResult = DurationParsingUtil.ShiftDateTime(timex, endDateResult, false); + return new ModAndDateResult(beginDateResult, endDateResult, mod, null); + } + } + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/RangeTimexComponents.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/RangeTimexComponents.cs index f3300e8917..54952895d0 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/RangeTimexComponents.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/RangeTimexComponents.cs @@ -1,4 +1,7 @@ -using DateObject = System.DateTime; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/SetHandler.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/SetHandler.cs index db249b9199..a8c5fd8c5f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/SetHandler.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/SetHandler.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.DateTime.Utilities @@ -21,5 +24,14 @@ public static string WeekDayGroupMatchString(Match match) return weekday; } + public static DateTimeResolutionResult ResolveSet(ref DateTimeResolutionResult result, string innerTimex) + { + result.Timex = innerTimex; + result.FutureValue = result.PastValue = "Set: " + innerTimex; + result.Success = true; + + return result; + } + } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/StringExtension.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/StringExtension.cs index eb41f65ef7..870cbb16a8 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/StringExtension.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/StringExtension.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Text; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TasksModeProcessing.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TasksModeProcessing.cs new file mode 100644 index 0000000000..62a39c571d --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TasksModeProcessing.cs @@ -0,0 +1,778 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using Microsoft.Recognizers.Text.DataTypes.TimexExpression; +using Microsoft.Recognizers.Text.Utilities; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime +{ + public static class TasksModeProcessing + { + public const string ParserTypeName = "datetimeV2"; + + public static readonly string DateMinString = DateTimeFormatUtil.FormatDate(DateObject.MinValue); + + /* + TasksModeModification modifies past datetime references under tasksmode. + Eg if input text is 22 june at 9 pm and current time is 22 june 2022, 8 am then + under default mode pastdateime value will be 22 june 2022 9 pm, but since time has not been passed + under tasksmode it's value will get mapped to 22 june 2021 9 pm. + TasksModeModification function will modify datetime value according to it's type and w.r.t + refrence time. + Under TasksMode + For Input: 22 april at 5 pm. (reference time is 22/04/2022 T17:30:00, output type is datetime) + Expected output : {Past resolution value: 22/04/2022T17, + Future resolution value: 22/04/2023T17 + }, + Under Default Mode + For Input: 22 april at 5 pm. (reference time is 22/04/2022 T17:30:00) + Expected output : {Past resolution value: 22/04/2021T17, + Future resolution value: 22/04/2022T17 + }, + */ + public static DateTimeParseResult TasksModeModification(DateTimeParseResult slot, DateObject referenceTime) + { + switch (slot.Type.Substring(ParserTypeName.Length + 1)) + { + + case Constants.SYS_DATETIME_DATE: + slot = TasksModeModifyDateValue(slot, referenceTime); + break; + + case Constants.SYS_DATETIME_DATEPERIOD: + slot = TasksModeModifyDatePeriodValue(slot, referenceTime); + break; + + case Constants.SYS_DATETIME_TIME: + slot = TasksModeModifyTimeValue(slot, referenceTime); + break; + + case Constants.SYS_DATETIME_TIMEPERIOD: + slot = TasksModeTimePeriodValue(slot, referenceTime); + break; + + case Constants.SYS_DATETIME_DATETIME: + slot = TasksModeModifyDateTimeValue(slot, referenceTime); + break; + + case Constants.SYS_DATETIME_DATETIMEPERIOD: + slot = TasksModeModifyDateTimePeriodValue(slot, referenceTime); + break; + } + + return slot; + } + + // Change resolution value of datetime value under tasksmode. + public static TimeOfDayResolutionResult TasksModeResolveTimeOfDay(string tod) + { + var result = new TimeOfDayResolutionResult(); + switch (tod) + { + case Constants.EarlyMorning: + result.Timex = Constants.EarlyMorning; + result.BeginHour = TasksModeConstants.EarlyMorningBeginHour; + result.EndHour = TasksModeConstants.EarlyMorningEndHour; + break; + case Constants.Morning: + result.Timex = Constants.Morning; + result.BeginHour = TasksModeConstants.MorningBeginHour; + result.EndHour = TasksModeConstants.MorningEndHour; + break; + case Constants.MidDay: + result.Timex = Constants.MidDay; + result.BeginHour = TasksModeConstants.MidDayBeginHour; + result.EndHour = TasksModeConstants.MidDayEndHour; + break; + case Constants.Afternoon: + result.Timex = Constants.Afternoon; + result.BeginHour = TasksModeConstants.AfternoonBeginHour; + result.EndHour = TasksModeConstants.AfternoonEndHour; + break; + case Constants.Evening: + result.Timex = Constants.Evening; + result.BeginHour = TasksModeConstants.EveningBeginHour; + result.EndHour = TasksModeConstants.EveningEndHour; + break; + case Constants.Daytime: + result.Timex = Constants.Daytime; + result.BeginHour = TasksModeConstants.DaytimeBeginHour; + result.EndHour = TasksModeConstants.DaytimeEndHour; + break; + case Constants.Nighttime: + result.Timex = Constants.Nighttime; + result.BeginHour = TasksModeConstants.NighttimeBeginHour; + result.EndHour = TasksModeConstants.NighttimeEndHour; + break; + case Constants.BusinessHour: + result.Timex = Constants.BusinessHour; + result.BeginHour = TasksModeConstants.BusinessBeginHour; + result.EndHour = TasksModeConstants.BusinessEndHour; + break; + case Constants.Night: + result.Timex = Constants.Night; + result.BeginHour = TasksModeConstants.NightBeginHour; + result.EndHour = TasksModeConstants.NightEndHour; + result.EndMin = TasksModeConstants.NightEndMin; + break; + case Constants.MealtimeBreakfast: + result.Timex = Constants.MealtimeBreakfast; + result.BeginHour = TasksModeConstants.MealtimeBreakfastBeginHour; + result.EndHour = TasksModeConstants.MealtimeBreakfastEndHour; + break; + case Constants.MealtimeBrunch: + result.Timex = Constants.MealtimeBrunch; + result.BeginHour = TasksModeConstants.MealtimeBrunchBeginHour; + result.EndHour = TasksModeConstants.MealtimeBrunchEndHour; + break; + case Constants.MealtimeLunch: + result.Timex = Constants.MealtimeLunch; + result.BeginHour = TasksModeConstants.MealtimeLunchBeginHour; + result.EndHour = TasksModeConstants.MealtimeLunchEndHour; + break; + case Constants.MealtimeDinner: + result.Timex = Constants.MealtimeDinner; + result.BeginHour = TasksModeConstants.MealtimeDinnerBeginHour; + result.EndHour = TasksModeConstants.MealtimeDinnerEndHour; + break; + default: + break; + } + + return result; + } + + /* + Change beginHour and endHour for subjective time refereneces under TasksMode. + morning get's mapped to 6:00 am + */ + public static bool GetMatchedTimeRangeForTasksMode(string text, string todSymbol, out int beginHour, out int endHour, out int endMin) + { + var trimmedText = text.Trim(); + beginHour = 0; + endHour = 0; + endMin = 0; + if (todSymbol == Constants.Morning) + { + beginHour = TasksModeConstants.MorningBeginHour; + endHour = TasksModeConstants.EarlyMorningEndHour; + } + else if (todSymbol == Constants.Afternoon) + { + beginHour = Constants.AfternoonBeginHour; + endHour = Constants.AfternoonEndHour; + + } + else if (todSymbol == Constants.Evening) + { + beginHour = Constants.EveningBeginHour; + endHour = Constants.EveningEndHour; + } + else if (todSymbol == Constants.Night) + { + beginHour = TasksModeConstants.NightBeginHour; + endHour = TasksModeConstants.NightEndHour; + } + else if (todSymbol == Constants.MealtimeBreakfast) + { + beginHour = TasksModeConstants.MealtimeBreakfastBeginHour; + endHour = TasksModeConstants.MealtimeBreakfastEndHour; + } + else if (todSymbol == Constants.MealtimeBrunch) + { + beginHour = TasksModeConstants.MealtimeBrunchBeginHour; + endHour = TasksModeConstants.MealtimeBrunchEndHour; + } + else if (todSymbol == Constants.MealtimeDinner) + { + beginHour = TasksModeConstants.MealtimeDinnerBeginHour; + endHour = TasksModeConstants.MealtimeDinnerEndHour; + } + else if (todSymbol == Constants.MealtimeLunch) + { + beginHour = TasksModeConstants.MealtimeLunchBeginHour; + endHour = TasksModeConstants.MealtimeLunchEndHour; + } + else + { + return false; + } + + return true; + } + + /*Under TasksMode If you input today's date, future date should get mapped to current date insted of next year. + ex if input is meet on 7 july and refrence time is 7 july 2022, + expected future value --> 7 july 2022 && + past value--> 7 july 2021 + */ + private static DateTimeParseResult TasksModeModifyDateValue(DateTimeParseResult slot, DateObject referenceTime) + { + var value = (SortedDictionary)slot.Value; + if (value != null && value.ContainsKey(ResolutionKey.ValueSet)) + { + if (value[ResolutionKey.ValueSet] is IList> valueSet && valueSet.Any()) + { + foreach (var values in valueSet) + { + var inputTime = DateObject.Parse(values[DateTimeResolutionKey.Value], CultureInfo.InvariantCulture); + var inputDay = inputTime.Day; + var inputMonth = inputTime.Month; + + if (slot.Text.Contains(TasksModeConstants.NextWeekGroupName) && !slot.TimexStr.Contains(Constants.TimexFuzzyYear)) + { + var tempdate = referenceTime.Upcoming(DayOfWeek.Monday).Date; + var dateTimeToSet = DateObject.MinValue.SafeCreateFromValue(tempdate.Year, tempdate.Month, tempdate.Day); + values[DateTimeResolutionKey.Value] = DateTimeFormatUtil.FormatDate(dateTimeToSet); + values[DateTimeResolutionKey.Timex] = $"{DateTimeFormatUtil.LuisDate(dateTimeToSet)}"; + } + else if (slot.TimexStr.Contains(Constants.TimexFuzzyYear) && inputDay == referenceTime.Day && inputMonth == referenceTime.Month) + { + // ignore for input text like monday, tue etc + if (!slot.TimexStr.Contains(Constants.TimexFuzzyWeek)) + { + var newDate = inputTime.Date.AddYears(-1); + var dateTimeToSet = DateObject.MinValue.SafeCreateFromValue(newDate.Year, newDate.Month, newDate.Day); + + values[DateTimeResolutionKey.Value] = DateTimeFormatUtil.FormatDate(dateTimeToSet); + } + } + + } + } + } + + slot.Value = value; + + return slot; + } + + /*TasksMode specific change. + Under TasksMode If input is today's dateperiod, future value should get mapped to current dateperiod insted of next year. + ex if input is "meet after 7 july" and refrence time is 7 july 2022, + expected future start value --> 7 july 2022 && + past start value--> 7 july 2021 + */ + private static DateTimeParseResult TasksModeModifyDatePeriodValue(DateTimeParseResult slot, DateObject referenceTime) + { + if (!slot.TimexStr.Contains(Constants.TimexFuzzyYear)) + { + return slot; + } + + var value = (SortedDictionary)slot.Value; + + if (value != null && value.ContainsKey(ResolutionKey.ValueSet)) + { + if (value[ResolutionKey.ValueSet] is IList> valueSet && valueSet.Any()) + { + for (int i = 0; i < valueSet.Count - 1; i = i + 2) + { + var pastvalue = valueSet.ElementAt(i); + var futurevalue = valueSet.ElementAt(i + 1); + + DateObject pastdate; + DateObject futuredate; + + bool maptonew = false; + + if (pastvalue.ContainsKey("start")) + { + futuredate = DateObject.Parse(futurevalue[DateTimeResolutionKey.Start], CultureInfo.InvariantCulture); + + if ((futuredate.Day == referenceTime.Day) && (futuredate.Month == referenceTime.Month) + && (futuredate.Year != referenceTime.Year) && (!slot.TimexStr.Contains(Constants.TimexFuzzyWeek))) + { + maptonew = true; + } + + } + + if (pastvalue.ContainsKey("end")) + { + + futuredate = DateObject.Parse(futurevalue[DateTimeResolutionKey.End], CultureInfo.InvariantCulture); + + if ((futuredate.Day == referenceTime.Day) && (futuredate.Month == referenceTime.Month) + && (futuredate.Year != referenceTime.Year) && (!slot.TimexStr.Contains(Constants.TimexFuzzyWeek))) + { + maptonew = true; + } + + } + + if (maptonew) + { + { + if (pastvalue.ContainsKey("start")) + { + pastdate = DateObject.Parse(pastvalue[DateTimeResolutionKey.Start], + CultureInfo.InvariantCulture); + + futuredate = DateObject.Parse(futurevalue[DateTimeResolutionKey.Start], + CultureInfo.InvariantCulture); + + futurevalue[DateTimeResolutionKey.Start] = DateTimeFormatUtil.FormatDate(futuredate.AddYears(-1)); + + pastvalue[DateTimeResolutionKey.Start] = DateTimeFormatUtil.FormatDate(pastdate.AddYears(-1)); + } + + if (pastvalue.ContainsKey("end")) + { + pastdate = DateObject.Parse(pastvalue[DateTimeResolutionKey.End], + CultureInfo.InvariantCulture); + + futuredate = DateObject.Parse(futurevalue[DateTimeResolutionKey.End], + CultureInfo.InvariantCulture); + + futurevalue[DateTimeResolutionKey.End] = DateTimeFormatUtil.FormatDate(futuredate.AddYears(-1)); + + pastvalue[DateTimeResolutionKey.End] = DateTimeFormatUtil.FormatDate(pastdate.AddYears(-1)); + } + + } + + } + + } + } + } + + slot.Value = value; + + return slot; + } + + /* TasksMode specific change. + If input datetimeperiod string precedes the referenceTime. + ex if input is "meet on 7 july morning" and refrence time is 7 july 2022 10pm, + expected future value should get mapped to 7 july 2023, morning && + past value get mapped to 7 july 2022, morning. + ex if input is "meet on thursday morning" and refrence time is 7 july 2022 (thursday) 10pm, + expected future value should get mapped to 14 july 2022, morning && + past value get mapped to 7 july 2022, morning. + */ + private static DateTimeParseResult TasksModeModifyDateTimePeriodValue(DateTimeParseResult slot, DateObject referenceTime) + { + if (!slot.TimexStr.Contains(Constants.TimexFuzzyYear)) + { + return slot; + } + + var value = (SortedDictionary)slot.Value; + + if (value != null && value.ContainsKey(ResolutionKey.ValueSet)) + { + if (value[ResolutionKey.ValueSet] is IList> valueSet && valueSet.Any()) + { + for (int i = 0; i < valueSet.Count - 1; i = i + 2) + { + var pastvalue = valueSet.ElementAt(i); + var futurevalue = valueSet.ElementAt(i + 1); + + DateObject pastdatetimeperiod; + DateObject futuredatetimeperiod; + + bool maptonew = false; + + if (pastvalue.ContainsKey("start")) + { + pastdatetimeperiod = DateObject.Parse(pastvalue[DateTimeResolutionKey.Start], CultureInfo.InvariantCulture); + futuredatetimeperiod = DateObject.Parse(futurevalue[DateTimeResolutionKey.Start], CultureInfo.InvariantCulture); + + if ((pastdatetimeperiod > referenceTime) && !slot.TimexStr.Contains(Constants.TimexFuzzyWeek)) + { + maptonew = true; + } + + if ((futuredatetimeperiod < referenceTime) && slot.TimexStr.Contains(Constants.TimexFuzzyWeek)) + { + maptonew = true; + } + + } + else + { + if (pastvalue.ContainsKey("end")) + { + pastdatetimeperiod = DateObject.Parse(pastvalue[DateTimeResolutionKey.End], CultureInfo.InvariantCulture); + futuredatetimeperiod = DateObject.Parse(futurevalue[DateTimeResolutionKey.End], CultureInfo.InvariantCulture); + + if ((pastdatetimeperiod > referenceTime) && !slot.TimexStr.Contains(Constants.TimexFuzzyWeek)) + { + maptonew = true; + } + + if ((futuredatetimeperiod < referenceTime) && slot.TimexStr.Contains(Constants.TimexFuzzyWeek)) + { + maptonew = true; + } + } + } + + if (maptonew) + { + if (slot.TimexStr.Contains(Constants.TimexFuzzyWeek)) + { + if (pastvalue.ContainsKey("start")) + { + futuredatetimeperiod = DateObject.Parse(futurevalue[DateTimeResolutionKey.Start], + CultureInfo.InvariantCulture); + pastvalue[DateTimeResolutionKey.Start] = futurevalue[DateTimeResolutionKey.Start]; + var tempdate = futuredatetimeperiod.AddDays(7); + var dateTimeToSet = DateObject.MinValue.SafeCreateFromValue(tempdate.Year, tempdate.Month, tempdate.Day, tempdate.Hour, + tempdate.Minute, tempdate.Second); + futurevalue[DateTimeResolutionKey.Start] = DateTimeFormatUtil.FormatDateTime(dateTimeToSet); + } + + if (pastvalue.ContainsKey("end")) + { + futuredatetimeperiod = DateObject.Parse(futurevalue[DateTimeResolutionKey.End], + CultureInfo.InvariantCulture); + pastvalue[DateTimeResolutionKey.End] = futurevalue[DateTimeResolutionKey.End]; + var tempdate = futuredatetimeperiod.AddDays(7); + var dateTimeToSet = DateObject.MinValue.SafeCreateFromValue(tempdate.Year, tempdate.Month, tempdate.Day, tempdate.Hour, + tempdate.Minute, tempdate.Second); + futurevalue[DateTimeResolutionKey.End] = DateTimeFormatUtil.FormatDateTime(dateTimeToSet); + } + + } + else + { + if (pastvalue.ContainsKey("start")) + { + pastdatetimeperiod = DateObject.Parse(pastvalue[DateTimeResolutionKey.Start], + CultureInfo.InvariantCulture); + + futurevalue[DateTimeResolutionKey.Start] = pastvalue[DateTimeResolutionKey.Start]; + var tempdate = pastdatetimeperiod.AddYears(-1); + var dateTimeToSet = DateObject.MinValue.SafeCreateFromValue(tempdate.Year, tempdate.Month, tempdate.Day, tempdate.Hour, + tempdate.Minute, tempdate.Second); + pastvalue[DateTimeResolutionKey.Start] = DateTimeFormatUtil.FormatDateTime(dateTimeToSet); + } + + if (pastvalue.ContainsKey("end")) + { + pastdatetimeperiod = DateObject.Parse(pastvalue[DateTimeResolutionKey.End], + CultureInfo.InvariantCulture); + futurevalue[DateTimeResolutionKey.End] = pastvalue[DateTimeResolutionKey.End]; + var tempdate = pastdatetimeperiod.AddYears(-1); + var dateTimeToSet = DateObject.MinValue.SafeCreateFromValue(tempdate.Year, tempdate.Month, tempdate.Day, tempdate.Hour, + tempdate.Minute, tempdate.Second); + pastvalue[DateTimeResolutionKey.End] = DateTimeFormatUtil.FormatDateTime(dateTimeToSet); + } + + } + + } + + } + + if ((valueSet.Count == 1) && slot.TimexStr.Contains(Constants.TimexFuzzyWeek)) + { + var currvalue = valueSet.ElementAt(0); + bool maptonew = false; + + if (currvalue.ContainsKey("start")) + { + var datetimeperiod = DateObject.Parse(currvalue[DateTimeResolutionKey.Start], CultureInfo.InvariantCulture); + + if (datetimeperiod < referenceTime) + { + maptonew = true; + } + + } + else + { + if (currvalue.ContainsKey("end")) + { + var datetimeperiod = DateObject.Parse(currvalue[DateTimeResolutionKey.End], CultureInfo.InvariantCulture); + + if (datetimeperiod < referenceTime) + { + maptonew = true; + } + + } + } + + if (maptonew) + { + if (slot.TimexStr.Contains(Constants.TimexFuzzyWeek)) + { + if (currvalue.ContainsKey("start")) + { + var datetimeperiod = DateObject.Parse(currvalue[DateTimeResolutionKey.Start], + CultureInfo.InvariantCulture); + var tempdate = datetimeperiod.AddDays(7); + var dateTimeToSet = DateObject.MinValue.SafeCreateFromValue(tempdate.Year, tempdate.Month, tempdate.Day, tempdate.Hour, + tempdate.Minute, tempdate.Second); + currvalue[DateTimeResolutionKey.Start] = DateTimeFormatUtil.FormatDateTime(dateTimeToSet); + } + + if (currvalue.ContainsKey("end")) + { + var datetimeperiod = DateObject.Parse(currvalue[DateTimeResolutionKey.End], + CultureInfo.InvariantCulture); + var tempdate = datetimeperiod.AddDays(7); + var dateTimeToSet = DateObject.MinValue.SafeCreateFromValue(tempdate.Year, tempdate.Month, tempdate.Day, tempdate.Hour, + tempdate.Minute, tempdate.Second); + currvalue[DateTimeResolutionKey.End] = DateTimeFormatUtil.FormatDateTime(dateTimeToSet); + } + + } + } + } + } + } + + slot.Value = value; + return slot; + } + + /* + If input datetime string precedes the referenceTime. + ex if input is "meet after 7 july at 9pm" and refrence time is 7 july 2022 10pm, + expected future value should get mapped to 7 july 2023,9pm && + past value get mapped to 7 july 2022,9pm. + ex if input is "meet on thursday at 6pm" and refrence time is 7 july 2022 (thursday) 10pm, + expected future value should get mapped to 14 july 2022, 6pm && + past value get mapped to 7 july 2022, 6pm. + */ + private static DateTimeParseResult TasksModeModifyDateTimeValue(DateTimeParseResult slot, DateObject referenceTime) + { + if (!slot.TimexStr.Contains(Constants.TimexFuzzyYear)) + { + return slot; + } + + var value = (SortedDictionary)slot.Value; + + if (value != null && value.ContainsKey(ResolutionKey.ValueSet)) + { + if (value[ResolutionKey.ValueSet] is IList> valueSet && valueSet.Any()) + { + int i; + for (i = 0; i < valueSet.Count - 1; i = i + 2) + { + var pastvalue = valueSet.ElementAt(i); + var futurevalue = valueSet.ElementAt(i + 1); + + var pastdatetime = DateObject.Parse(pastvalue[DateTimeResolutionKey.Value], CultureInfo.InvariantCulture); + var futuredatetime = DateObject.Parse(futurevalue[DateTimeResolutionKey.Value], CultureInfo.InvariantCulture); + + if (futuredatetime < referenceTime) + { + if (slot.TimexStr.Contains(Constants.TimexFuzzyWeek)) + { + pastvalue[DateTimeResolutionKey.Value] = futurevalue[DateTimeResolutionKey.Value]; + var tempdate = futuredatetime.Date.AddDays(7); + var dateTimeToSet = DateObject.MinValue.SafeCreateFromValue(tempdate.Year, tempdate.Month, tempdate.Day, futuredatetime.Hour, + futuredatetime.Minute, futuredatetime.Second); + futurevalue[DateTimeResolutionKey.Value] = DateTimeFormatUtil.FormatDateTime(dateTimeToSet); + } + else + { + pastvalue[DateTimeResolutionKey.Value] = futurevalue[DateTimeResolutionKey.Value]; + var tempdate = futuredatetime.AddYears(1); + var dateTimeToSet = DateObject.MinValue.SafeCreateFromValue(tempdate.Year, tempdate.Month, tempdate.Day, futuredatetime.Hour, + futuredatetime.Minute, futuredatetime.Second); + futurevalue[DateTimeResolutionKey.Value] = DateTimeFormatUtil.FormatDateTime(dateTimeToSet); + } + + } + + } + } + + } + + slot.Value = value; + return slot; + } + + /*Tasksmode specific change. + If input time string precedes the referenceTime, then the date should be set to the next day, + and instead of returning time only, both date and time should be returned.Example: + "Do this at 9 AM" issued when the current time is past 9 AM, e.g., 10 AM.When AM/PM is not explicitly mentioned, + then this has to be done for both AM and PM but depending on the date, e.g., if we say "Do this at 9" but current time is + 8 PM, then we mean 9 PM on the same day or 9 AM the next day. + */ + private static DateTimeParseResult TasksModeModifyTimeValue(DateTimeParseResult slot, DateObject referenceTime) + { + var value = (SortedDictionary)slot.Value; + var newType = $"{ParserTypeName}.{Constants.SYS_DATETIME_TIME}"; + if (value != null && value.ContainsKey(ResolutionKey.ValueSet)) + { + if (value[ResolutionKey.ValueSet] is IList> valueSet && valueSet.Any()) + { + foreach (var values in valueSet) + { + var inputTime = DateObject.Parse(values[DateTimeResolutionKey.Value], CultureInfo.InvariantCulture); + int inputHour = inputTime.Hour; + int inputMinute = inputTime.Minute; + if ((inputHour < referenceTime.Hour) || (inputHour == referenceTime.Hour && inputMinute < referenceTime.Minute)) + { + var tomorrowsDate = referenceTime.Date.AddDays(1); + var dateTimeToSet = DateObject.MinValue.SafeCreateFromValue(tomorrowsDate.Year, tomorrowsDate.Month, tomorrowsDate.Day, inputHour, inputMinute, inputTime.Second); + var timeStr = inputHour.ToString("D2", CultureInfo.InvariantCulture); + if (inputMinute > 0) + { + timeStr += ":" + inputMinute.ToString("D2", CultureInfo.InvariantCulture); + } + + values[DateTimeResolutionKey.Timex] = $"{DateTimeFormatUtil.LuisDate(dateTimeToSet)}T{timeStr}"; + values[DateTimeResolutionKey.Value] = DateTimeFormatUtil.FormatDateTime(dateTimeToSet); + values[ResolutionKey.Type] = $"{Constants.SYS_DATETIME_DATETIME}"; + if (newType == $"{ParserTypeName}.{Constants.SYS_DATETIME_TIME}") + { + newType = $"{ParserTypeName}.{Constants.SYS_DATETIME_DATETIME}"; + } + } + else + { + if (newType == $"{ParserTypeName}.{Constants.SYS_DATETIME_DATETIME}") + { + var dateTimeToSet = DateObject.MinValue.SafeCreateFromValue(referenceTime.Year, referenceTime.Month, referenceTime.Day, inputHour, inputMinute, inputTime.Second); + var timeStr = inputHour.ToString("D2", CultureInfo.InvariantCulture); + if (inputMinute > 0) + { + timeStr += ":" + inputMinute.ToString("D2", CultureInfo.InvariantCulture); + } + + values[DateTimeResolutionKey.Timex] = $"{DateTimeFormatUtil.LuisDate(dateTimeToSet)}T{timeStr}"; + values[DateTimeResolutionKey.Value] = DateTimeFormatUtil.FormatDateTime(dateTimeToSet); + values[ResolutionKey.Type] = $"{Constants.SYS_DATETIME_DATETIME}"; + + } + } + } + } + } + + slot.Value = value; + slot.Type = newType; + + return slot; + } + + /*Tasksmode specific change. + If input timeperiod string precedes the referenceTime, then the date should be set to the next day, + and instead of returning time only, both date and time should be returned.Example: + "Do this in morning" issued when the current time is past 9 pm. + */ + private static DateTimeParseResult TasksModeTimePeriodValue(DateTimeParseResult slot, DateObject referenceTime) + { + var value = (SortedDictionary)slot.Value; + var newType = $"{ParserTypeName}.{Constants.SYS_DATETIME_TIMEPERIOD}"; + if (value != null && value.ContainsKey(ResolutionKey.ValueSet)) + { + if (value[ResolutionKey.ValueSet] is IList> valueSet && valueSet.Any()) + { + foreach (var values in valueSet) + { + var tempDate = referenceTime.Date; + var timexComponents = slot.TimexStr.Split(Constants.DatePeriodTimexSplitter, StringSplitOptions.RemoveEmptyEntries); + var maptonextday = false; + var tempstr = string.Empty; + var timestr = "("; + if (values.ContainsKey("start")) + { + var startinputTime = DateObject.Parse(values[DateTimeResolutionKey.Start], CultureInfo.InvariantCulture); + int startinputHour = startinputTime.Hour; + int startinputMinute = startinputTime.Minute; + if ((startinputHour < referenceTime.Hour) || (startinputHour == referenceTime.Hour && startinputMinute < referenceTime.Minute)) + { + maptonextday = true; + } + } + else + { + if (values.ContainsKey("end")) + { + var endinputTime = DateObject.Parse(values[DateTimeResolutionKey.End], CultureInfo.InvariantCulture); + int endinputHour = endinputTime.Hour; + int endtinputMinute = endinputTime.Minute; + if ((endinputHour < referenceTime.Hour) || (endinputHour == referenceTime.Hour && endinputHour < referenceTime.Minute)) + { + maptonextday = true; + } + } + } + + if (maptonextday) + { + tempDate = referenceTime.Date.AddDays(1); + if (values.ContainsKey("start")) + { + var startinputTime = DateObject.Parse(values[DateTimeResolutionKey.Start], CultureInfo.InvariantCulture); + int startinputHour = startinputTime.Hour; + int startinputMinute = startinputTime.Minute; + var startDateTimeToSet = DateObject.MinValue.SafeCreateFromValue(tempDate.Year, tempDate.Month, tempDate.Day, startinputHour, startinputMinute, 0); + values[DateTimeResolutionKey.Start] = $"{DateTimeFormatUtil.FormatDateTime(startDateTimeToSet)}"; + tempstr = startinputHour.ToString("D2", CultureInfo.InvariantCulture); + if (startinputMinute > 0) + { + tempstr += ":" + startinputMinute.ToString("D2", CultureInfo.InvariantCulture); + } + + timestr = "(" + $"{DateTimeFormatUtil.LuisDate(startDateTimeToSet)}T{tempstr}"; + + } + + if (values.ContainsKey("end")) + { + var endinputTime = DateObject.Parse(values[DateTimeResolutionKey.End], CultureInfo.InvariantCulture); + int endinputHour = endinputTime.Hour; + int endinputMinute = endinputTime.Minute; + var endDateTimeToSet = DateObject.MinValue.SafeCreateFromValue(tempDate.Year, tempDate.Month, tempDate.Day, endinputHour, endinputMinute, 0); + values[DateTimeResolutionKey.End] = $"{DateTimeFormatUtil.FormatDateTime(endDateTimeToSet)}"; + + tempstr = endinputHour.ToString("D2", CultureInfo.InvariantCulture); + if (endinputMinute > 0) + { + tempstr += ":" + endinputMinute.ToString("D2", CultureInfo.InvariantCulture); + } + + if (timestr == "(") + { + timestr = timestr + $"{DateTimeFormatUtil.LuisDate(endDateTimeToSet)}T{tempstr}"; + } + else + { + timestr = timestr + "," + $"{DateTimeFormatUtil.LuisDate(endDateTimeToSet)}T{tempstr}"; + } + } + + newType = $"{ParserTypeName}.{Constants.SYS_DATETIME_DATETIMEPERIOD}"; + if (timexComponents.Length == 3) + { + timestr = timestr + "," + timexComponents[2] + ")"; + } + + // handling cases : afternoon, morning, night + else + { + timestr = $"{DateTimeFormatUtil.LuisDate(tempDate)}{values[DateTimeResolutionKey.Timex]}"; + + } + + values[DateTimeResolutionKey.Timex] = timestr; + values[ResolutionKey.Type] = $"{Constants.SYS_DATETIME_DATETIMEPERIOD}"; + + } + } + } + } + + slot.Value = value; + slot.Type = newType; + + return slot; + + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TasksModeSetHandler.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TasksModeSetHandler.cs new file mode 100644 index 0000000000..e74f9e42a1 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TasksModeSetHandler.cs @@ -0,0 +1,328 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DataTypes.TimexExpression; +using DateObject = System.DateTime; + +namespace Microsoft.Recognizers.Text.DateTime.Utilities +{ + public static class TasksModeSetHandler + { + public static DateTimeResolutionResult TasksModeResolveSet(ref DateTimeResolutionResult result, string innerTimex, DateTimeParseResult pr = null) + { + result.Timex = innerTimex; + + result.FutureValue = result.PastValue = ExtendSetTimex(TasksModeConstants.KeySet, innerTimex); + + if (pr != null) + { + DateTimeResolutionResult value = (DateTimeResolutionResult)pr.Value; + if (value.FutureValue != null) + { + if (pr.TimexStr.EndsWith(TasksModeConstants.WeekEndPrefix)) + { + result.FutureValue = ((Tuple)value.FutureValue).Item1; + result.PastValue = ((Tuple)value.PastValue).Item1; + } + } + } + + result.Success = true; + + return result; + } + + public static Dictionary TasksModeGenerateResolutionSetParser(Dictionary resolutionDic, string mod, string timex) + { + var res = new Dictionary(); + + TasksModeAddAltSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.DATETIMEALT, mod, res); + if (timex.StartsWith(Constants.GeneralPeriodPrefix) && res.Count > 0) + { + var extracted = new Dictionary(); + TimexRegex.Extract(TasksModeConstants.PeriodString, timex, extracted); + res.Add(TasksModeConstants.KeyIntSize, extracted.TryGetValue(TasksModeConstants.AmountString, out var intervalSize) ? intervalSize : string.Empty); + res.Add(TasksModeConstants.KeyIntType, extracted.TryGetValue(TasksModeConstants.DateUnitString, out var intervalType) ? intervalType : string.Empty); + } + else if (timex.StartsWith(TasksModeConstants.FuzzyYear) && res.Count > 0) + { + var extracted = new Dictionary(); + TimexRegex.Extract(TasksModeConstants.PeriodString, timex, extracted); + res.Add(TasksModeConstants.KeyIntSize, extracted.TryGetValue(TasksModeConstants.AmountString, out var intervalSize) ? intervalSize : "1"); + res.Add(TasksModeConstants.KeyIntType, extracted.TryGetValue(TasksModeConstants.DateUnitString, out var intervalType) ? intervalType : Constants.TimexWeek); + } + else if (timex.StartsWith(Constants.TimeTimexPrefix) && res.Count > 0) + { + res.Add(TasksModeConstants.KeyIntSize, "1"); + res.Add(TasksModeConstants.KeyIntType, Constants.TimexDay); + } + + return res; + } + + public static string TasksModeTimexIntervalExt(string timex) + { + string periodicity; + if (timex.Contains(Constants.TimexFuzzyWeek)) + { + periodicity = TasksModeConstants.WeeklyPeriodSuffix; + } + else if (timex.Contains(Constants.TimexFuzzyYear)) + { + periodicity = TasksModeConstants.YearlyPeriodSuffix; + } + else if (!timex.EndsWith(TasksModeConstants.WeekEndPrefix) && !timex.EndsWith(TasksModeConstants.WeekDayPrefix)) + { + periodicity = TasksModeConstants.PeriodDaySuffix; + } + else + { + periodicity = string.Empty; + } + + timex = ExtendSetTimex(timex, periodicity); + return timex; + } + + public static DateTimeResolutionResult TasksModeAddResolution(ref DateTimeResolutionResult result, ExtractResult er, DateObject refDate) + { + if (result.Timex.EndsWith(TasksModeConstants.WeekEndPrefix)) + { + if (refDate.DayOfWeek == DayOfWeek.Sunday) + { + result.FutureResolution = new Dictionary + { + { + TimeTypeConstants.DATE, + DateTimeFormatUtil.FormatDate((DateObject)refDate) + }, + }; + + result.PastResolution = new Dictionary + { + { + TimeTypeConstants.DATE, + DateTimeFormatUtil.FormatDate((DateObject)refDate) + }, + }; + } + else + { + var tempdate = refDate.Upcoming(DayOfWeek.Sunday).Date; + var dateTimeToSet = DateObject.MinValue.SafeCreateFromValue(tempdate.Year, tempdate.Month, tempdate.Day); + result.FutureResolution = new Dictionary + { + { + TimeTypeConstants.DATE, + DateTimeFormatUtil.FormatDate(dateTimeToSet) + }, + }; + + result.PastResolution = new Dictionary + { + { + TimeTypeConstants.DATE, + DateTimeFormatUtil.FormatDate(dateTimeToSet) + }, + }; + } + } + else if (result.Timex.EndsWith(TasksModeConstants.WeekDayPrefix)) + { + if (refDate.DayOfWeek == DayOfWeek.Saturday) + { + result.FutureResolution = new Dictionary + { + { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)refDate.AddDays(2)) }, + }; + + result.PastResolution = new Dictionary + { + { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)refDate.AddDays(2)) }, + }; + } + else if (refDate.DayOfWeek == DayOfWeek.Sunday) + { + result.FutureResolution = new Dictionary + { + { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)refDate.AddDays(1)) }, + }; + + result.PastResolution = new Dictionary + { + { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)refDate.AddDays(1)) }, + }; + } + else + { + result.FutureResolution = new Dictionary + { + { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)refDate) }, + }; + + result.PastResolution = new Dictionary + { + { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)refDate) }, + }; + } + } + else if (result.Timex.StartsWith(Constants.GeneralPeriodPrefix)) + { + result.FutureResolution = new Dictionary + { + { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)refDate) }, + }; + + result.PastResolution = new Dictionary + { + { TimeTypeConstants.DATE, DateTimeFormatUtil.FormatDate((DateObject)refDate) }, + }; + } + else if (result.Timex.StartsWith(TasksModeConstants.FuzzyYear)) + { + var timexRes = TimexResolver.Resolve(new[] { result.Timex }, refDate); + + string value = timexRes.Values[1].Value; + + var resKey = TimeTypeConstants.DATETIME; + + if (!result.Timex.Contains(Constants.TimeTimexPrefix)) + { + resKey = TimeTypeConstants.DATE; + } + + var futureValue = refDate.AddDays(7); + + // value = "09-04-2022 19:30" to extract only date substring from value used value[0:10]. + if (DateTimeFormatUtil.FormatDate(futureValue).Equals(value.Substring(TasksModeConstants.IntDateStartIdx, TasksModeConstants.IntDateEndIdx)) && result.Timex.StartsWith(TasksModeConstants.FuzzyYearAndWeek)) + { + if (result.Timex.Contains(Constants.TimeTimexPrefix)) + { + if (DateTimeFormatUtil.FormatTime(refDate).CompareTo(value.Substring(11)) <= 0) + { + value = JoinDateWithValue(refDate, value.Substring(11)); + } + } + else + { + value = DateTimeFormatUtil.FormatDate(refDate); + } + } + + result.FutureResolution = new Dictionary + { + { resKey, (string)value }, + }; + + result.PastResolution = new Dictionary + { + { resKey, (string)value }, + }; + } + else if (result.Timex.StartsWith(Constants.TimeTimexPrefix)) + { + var timexRes = TimexResolver.Resolve(new[] { result.Timex }, refDate); + + string value = GetStartValue(timexRes); + if (value == null) + { + value = timexRes.Values[0].Value; + } + + DateObject resDate = refDate; + if (DateTimeFormatUtil.FormatTime(resDate).CompareTo(value) > 0) + { + resDate = resDate.AddDays(1); + } + + result.FutureResolution = new Dictionary + { + { TimeTypeConstants.DATETIME, JoinDateWithValue(resDate, (string)value) }, + }; + + result.PastResolution = new Dictionary + { + { TimeTypeConstants.DATETIME, JoinDateWithValue(resDate, (string)value) }, + }; + } + else + { + result.FutureResolution = new Dictionary + { + { TimeTypeConstants.SET, (string)result.FutureValue }, + }; + + result.PastResolution = new Dictionary + { + { TimeTypeConstants.SET, (string)result.PastValue }, + }; + + } + + return result; + } + + // function used for replacing "every" with "this" in string for parsing text i.e "19th of every month". (only with month duration) + public static string ReplaceValueInTextWithFutTerm(string text, string value, List thisTermList) + { + value = value.Trim(); + + // the function should replace value with first term of list in text, It must agree with "month". + string thisTerm = thisTermList[0]; + text = text.Replace(value, thisTerm); + return text; + } + + internal static string JoinDateWithValue(DateObject resDate, string value) + { + return string.Join(" ", DateTimeFormatUtil.FormatDate((DateObject)resDate), (string)value); + } + + internal static void TasksModeAddAltSingleDateTimeToResolution(Dictionary resolutionDic, string type, string mod, + Dictionary res) + { + if (resolutionDic.ContainsKey(TimeTypeConstants.DATE)) + { + res.Add(TasksModeConstants.KeySetTypeName, TimeTypeConstants.DATE); + MergedParserUtil.AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.DATE, mod, res); + } + else if (resolutionDic.ContainsKey(TimeTypeConstants.DATETIME)) + { + res.Add(TasksModeConstants.KeySetTypeName, Constants.SYS_DATETIME_DATETIME); + MergedParserUtil.AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.DATETIME, mod, res); + } + else if (resolutionDic.ContainsKey(TimeTypeConstants.TIME)) + { + MergedParserUtil.AddSingleDateTimeToResolution(resolutionDic, TimeTypeConstants.TIME, mod, res); + } + } + + internal static string GetStartValue(Resolution timexRes) + { + switch (timexRes.Values[0].Timex) + { + case Constants.Morning: return TasksModeConstants.StringMorningHHMMSS; + case Constants.Afternoon: return TasksModeConstants.StringAfternoonHHMMSS; + case Constants.Evening: return TasksModeConstants.StringEveningHHMMSS; + case Constants.Night: return TasksModeConstants.StringNightHHMMSS; + default: return timexRes.Values[0].Start; + } + } + + // function replaces P1 with P2 when parsing values i.e. every other day at 2pm + internal static string TasksModeTimexIntervalReplace(string timex) + { + timex = timex.Replace(TasksModeConstants.DailyPeriodPrefix, TasksModeConstants.AlternatePeriodPrefix); + + return timex; + } + + internal static string ExtendSetTimex(string timex, string extTimex) + { + return timex + extTimex; + } + + } +} diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs index e36344c155..375ef6b034 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; using System.Linq; using System.Text; using System.Text.RegularExpressions; @@ -75,45 +79,61 @@ public DateTimeResolutionResult PackTimeResult(DateTimeExtra extra, Ti noDesc = false; } - int hour = timeResult.Hour > 0 ? timeResult.Hour : 0, + // Hours > 24 (e.g. 25時 which resolves to the next day) are kept unnormalized in the timex + // to avoid ambiguity in other entities. For example, "on the 30th at 25" is resolved to + // "XXXX-XX-30T25" because with "XXXX-XX-30+1T01" it is not known if the day should be "31" or "01". + int hour = timeResult.Hour > 0 && timeResult.Hour != Constants.DayHourCount ? timeResult.Hour : 0, min = timeResult.Minute > 0 ? timeResult.Minute : 0, - second = timeResult.Second > 0 ? timeResult.Second : 0, - day = referenceTime.Day, - month = referenceTime.Month, - year = referenceTime.Year; + second = timeResult.Second > 0 ? timeResult.Second : 0; var dateTimeResult = new DateTimeResolutionResult(); var build = new StringBuilder("T"); if (timeResult.Hour >= 0) { - build.Append(timeResult.Hour.ToString("D2")); + build.Append(hour.ToString("D2", CultureInfo.InvariantCulture)); } if (timeResult.Minute >= 0) { - build.Append(":" + timeResult.Minute.ToString("D2")); + build.Append(":" + min.ToString("D2", CultureInfo.InvariantCulture)); } if (timeResult.Second >= 0) { - build.Append(":" + timeResult.Second.ToString("D2")); + if (timeResult.Minute < 0) + { + build.Append(":" + min.ToString("D2", CultureInfo.InvariantCulture)); + } + + build.Append(":" + second.ToString("D2", CultureInfo.InvariantCulture)); } - if (noDesc) + // handle cases with time like 25時 (the hour is normalized in the past/future values) + if (timeResult.Hour > Constants.DayHourCount) + { + hour = timeResult.Hour - Constants.DayHourCount; + referenceTime = referenceTime.AddDays(1); + if (noDesc) + { + dateTimeResult.Comment = Constants.Comment_Am; + noDesc = false; + } + } + + if (noDesc && hour <= Constants.HalfDayHourCount && hour > Constants.DayHourStart) { // build.Append("ampm"); dateTimeResult.Comment = Constants.Comment_AmPm; } dateTimeResult.Timex = build.ToString(); - if (hour == 24) - { - hour = 0; - } - dateTimeResult.FutureValue = dateTimeResult.PastValue = DateObject.MinValue.SafeCreateFromValue(year, month, day, hour, min, second); + int day = referenceTime.Day, month = referenceTime.Month, year = referenceTime.Year; + dateTimeResult.FutureValue = dateTimeResult.PastValue = + DateObject.MinValue.SafeCreateFromValue(year, month, day, hour, min, second); dateTimeResult.Success = true; + return dateTimeResult; } @@ -126,7 +146,7 @@ public int MatchToValue(string text) if (Regex.IsMatch(text, @"\d+")) { - return int.Parse(text); + return int.Parse(text, CultureInfo.InvariantCulture); } if (text.Length == 1) @@ -155,6 +175,7 @@ public int MatchToValue(string text) return tempValue; } + // Handle am/pm modifiers (e.g. "1 in the afternoon") and time of day (e.g. "mid-morning") public void AddDesc(TimeResult result, string dayDesc) { if (string.IsNullOrEmpty(dayDesc)) @@ -164,11 +185,19 @@ public void AddDesc(TimeResult result, string dayDesc) dayDesc = NormalizeDayDesc(dayDesc); - if (LowBoundDesc.ContainsKey(dayDesc) && result.Hour < LowBoundDesc[dayDesc]) + if (result.Hour >= 0 && LowBoundDesc.ContainsKey(dayDesc) && (result.Hour < LowBoundDesc[dayDesc] || + (result.Hour == Constants.HalfDayHourCount && LowBoundDesc[dayDesc] == Constants.DayHourStart))) { + // cases like "1 in the afternoon", "12 midnight" result.Hour += Constants.HalfDayHourCount; result.LowBound = LowBoundDesc[dayDesc]; } + else if (result.Hour < 0 && LowBoundDesc.ContainsKey(dayDesc)) + { + // cases like "mid-morning", "mid-afternoon" + result.LowBound = LowBoundDesc[dayDesc]; + result.Hour = result.LowBound; + } else { result.LowBound = 0; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeOfDayResolutionResult.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeOfDayResolutionResult.cs index 5d0980f253..ef0325e0f4 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeOfDayResolutionResult.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeOfDayResolutionResult.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.DateTime +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.DateTime { public class TimeOfDayResolutionResult { @@ -9,5 +12,7 @@ public class TimeOfDayResolutionResult public int EndHour { get; set; } public int EndMin { get; set; } + + public int Swift { get; set; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimePeriodFunctions.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimePeriodFunctions.cs index 5adfa8192d..cedc96452c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimePeriodFunctions.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimePeriodFunctions.cs @@ -1,4 +1,9 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Globalization; using System.Text; using DateObject = System.DateTime; @@ -48,39 +53,93 @@ public static DateTimeResolutionResult Handle(IDateTimeParser timeParser, DateTi Success = true, }; + var spanHour = rightResult.Hour - leftResult.Hour; + if (spanHour < 0 || (spanHour == 0 && leftResult.Minute > rightResult.Minute)) + { + spanHour += Constants.DayHourCount; + } + // the right side doesn't contain desc while the left side does - if (rightResult.LowBound == -1 && leftResult.LowBound != -1 && rightResult.Hour <= leftResult.LowBound) + if (rightResult.LowBound == -1 && leftResult.LowBound != -1 && rightResult.Hour <= Constants.HalfDayHourCount && + spanHour > Constants.HalfDayHourCount) { rightResult.Hour += Constants.HalfDayHourCount; } + // the left side doesn't contain desc while the right side does + if (leftResult.LowBound == -1 && rightResult.LowBound != -1 && leftResult.Hour <= Constants.HalfDayHourCount && + spanHour > Constants.HalfDayHourCount) + { + leftResult.Hour += Constants.HalfDayHourCount; + } + + // No 'am' or 'pm' indicator + if (leftResult.LowBound == -1 && rightResult.LowBound == -1 && leftResult.Hour <= Constants.HalfDayHourCount && rightResult.Hour <= Constants.HalfDayHourCount) + { + if (spanHour > Constants.HalfDayHourCount) + { + if (leftResult.Hour > rightResult.Hour) + { + if (leftResult.Hour == Constants.HalfDayHourCount) + { + leftResult.Hour -= Constants.HalfDayHourCount; + } + else + { + rightResult.Hour += Constants.HalfDayHourCount; + } + } + } + + ret.Comment = Constants.Comment_AmPm; + } + int day = refTime.Day, month = refTime.Month, - year = refTime.Year; + year = refTime.Year, + rightSwiftDay = 0, + leftSwiftDay = 0; // determine if the right side time is smaller than the left side, if yes, add one day int hour = leftResult.Hour > 0 ? leftResult.Hour : 0, min = leftResult.Minute > 0 ? leftResult.Minute : 0, second = leftResult.Second > 0 ? leftResult.Second : 0; + // handle cases with time like 25時 which resolve to the next day + if (hour > Constants.DayHourCount) + { + hour -= Constants.DayHourCount; + leftSwiftDay++; + } + var leftTime = DateObject.MinValue.SafeCreateFromValue(year, month, day, hour, min, second); hour = rightResult.Hour > 0 ? rightResult.Hour : 0; min = rightResult.Minute > 0 ? rightResult.Minute : 0; second = rightResult.Second > 0 ? rightResult.Second : 0; + // handle cases with time like 25時 which resolve to the next day + if (hour > Constants.DayHourCount) + { + hour -= Constants.DayHourCount; + rightSwiftDay++; + } + var rightTime = DateObject.MinValue.SafeCreateFromValue(year, month, day, hour, min, second); - if (rightTime.Hour < leftTime.Hour) + if (rightResult.Hour < leftResult.Hour) { rightTime = rightTime.AddDays(1); } - ret.FutureValue = ret.PastValue = new Tuple(leftTime, rightTime); - var leftTimex = BuildTimex(leftResult); var rightTimex = BuildTimex(rightResult); ret.Timex = $"({leftTimex},{rightTimex},{BuildSpan(leftResult, rightResult)})"; + + leftTime = leftTime.AddDays(leftSwiftDay); + rightTime = rightTime.AddDays(rightSwiftDay); + + ret.FutureValue = ret.PastValue = new Tuple(leftTime, rightTime); return ret; } @@ -89,17 +148,17 @@ public static string BuildTimex(TimeResult timeResult) var build = new StringBuilder("T"); if (timeResult.Hour >= 0) { - build.Append(timeResult.Hour.ToString("D2")); + build.Append(timeResult.Hour.ToString("D2", CultureInfo.InvariantCulture)); } if (timeResult.Minute >= 0) { - build.Append(":" + timeResult.Minute.ToString("D2")); + build.Append(":" + timeResult.Minute.ToString("D2", CultureInfo.InvariantCulture)); } if (timeResult.Second >= 0) { - build.Append(":" + timeResult.Second.ToString("D2")); + build.Append(":" + timeResult.Second.ToString("D2", CultureInfo.InvariantCulture)); } return build.ToString(); @@ -148,7 +207,11 @@ public static string BuildSpan(TimeResult left, TimeResult right) } var spanTimex = new StringBuilder(); - spanTimex.Append($"PT{spanHour}H"); + spanTimex.Append("PT"); + if (spanHour > 0) + { + spanTimex.Append($"{spanHour}H"); + } if (spanMinute != 0 && spanSecond == 0) { @@ -161,5 +224,11 @@ public static string BuildSpan(TimeResult left, TimeResult right) return spanTimex.ToString(); } + + // used to filter ambiguous extractions e.g. 'morgen' in German and Dutch + public static List ApplyPotentialPeriodAmbiguityHotfix(string text, List timePeriodErs) + { + return timePeriodErs; + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeResult.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeResult.cs index 1bb3d1c920..f99108f345 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeResult.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeResult.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.DateTime.Utilities +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.DateTime.Utilities { public class TimeResult { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeZoneResolutionResult.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeZoneResolutionResult.cs index 6b9e716fc9..7cc75944d8 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeZoneResolutionResult.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeZoneResolutionResult.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.DateTime +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.DateTime { public class TimeZoneResolutionResult { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeZoneUtility.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeZoneUtility.cs index 1d5df8d614..01f38fe89c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeZoneUtility.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeZoneUtility.cs @@ -1,5 +1,12 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Linq; +using System.Reflection; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Text.Matcher; @@ -7,14 +14,24 @@ namespace Microsoft.Recognizers.Text.DateTime { public static class TimeZoneUtility { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex BracketRegex = + new Regex(BaseDateTime.BracketRegex, RegexFlags, RegexTimeOut); + + private static TimeSpan RegexTimeOut => DateTimeRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + public static List MergeTimeZones(List originalErs, List timeZoneErs, string text) { foreach (var er in originalErs) { foreach (var timeZoneEr in timeZoneErs) { + // Extend timezone extraction to include brackets if any. + var tzEr = ExtendTimeZoneExtraction(timeZoneEr, text); + var begin = er.Start + er.Length; - var end = timeZoneEr.Start; + var end = tzEr.Start; if (begin < end) { @@ -22,7 +39,7 @@ public static List MergeTimeZones(List originalErs if (string.IsNullOrWhiteSpace(gapText)) { - var newLength = (int)(timeZoneEr.Start + timeZoneEr.Length - er.Start); + var newLength = (int)(tzEr.Start + tzEr.Length - er.Start); er.Text = text.Substring((int)er.Start, newLength); er.Length = newLength; @@ -86,5 +103,20 @@ public static StringMatcher BuildMatcherFromLists(params List[] collecti return matcher; } + + private static ExtractResult ExtendTimeZoneExtraction(ExtractResult timeZoneEr, string text) + { + var beforeStr = text.Substring(0, (int)timeZoneEr.Start); + var afterStr = text.Substring((int)timeZoneEr.Start + (int)timeZoneEr.Length); + var matchLeft = BracketRegex.Match(beforeStr); + var matchRight = BracketRegex.Match(afterStr); + if (matchLeft.Success && matchRight.Success) + { + timeZoneEr.Start -= matchLeft.Length; + timeZoneEr.Length += matchLeft.Length + matchRight.Length; + } + + return timeZoneEr; + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimexUtility.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimexUtility.cs index 90ec50ce21..2b657e5697 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimexUtility.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimexUtility.cs @@ -1,12 +1,41 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; +using System.Linq; using System.Text; +using Microsoft.Recognizers.Text.DataTypes.TimexExpression; using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime { + [Flags] + public enum UnspecificDateTimeTerms + { + /// + /// None + /// + None = 0, + + /// + /// NonspecificYear + /// + NonspecificYear = 1, + + /// + /// NonspecificMonth + /// + NonspecificMonth = 2, + + /// + /// NonspecificDay + /// + NonspecificDay = 4, + } + public static class TimexUtility { private static readonly Calendar Cal = DateTimeFormatInfo.InvariantInfo.Calendar; @@ -20,6 +49,7 @@ public static class TimexUtility { { DatePeriodTimexType.ByDay, Constants.TimexDay }, { DatePeriodTimexType.ByWeek, Constants.TimexWeek }, + { DatePeriodTimexType.ByFortnight, Constants.TimexFortnight }, { DatePeriodTimexType.ByMonth, Constants.TimexMonth }, { DatePeriodTimexType.ByYear, Constants.TimexYear }, }; @@ -28,30 +58,19 @@ public static string GenerateCompoundDurationTimex(Dictionary un { var unitList = new List(unitToTimexComponents.Keys); unitList.Sort((x, y) => (unitValueMap[x] < unitValueMap[y] ? 1 : -1)); - var isTimeDurationAlreadyExist = false; - var timexBuilder = new StringBuilder(Constants.GeneralPeriodPrefix); + unitList = unitList.Select(t => unitToTimexComponents[t]).ToList(); + return TimexHelpers.GenerateCompoundDurationTimex(unitList); + } - for (int i = 0; i < unitList.Count; i++) + // TODO: Unify this two methods. This one here detect if "begin/end" have same year, month and day with "alter begin/end" and make them nonspecific. + public static string GenerateDatePeriodTimex(DateObject begin, DateObject end, DatePeriodTimexType timexType, DateObject alternativeBegin = default(DateObject), DateObject alternativeEnd = default(DateObject), bool hasYear = true) + { + // If the year is not specified, the combined range timex will use fuzzy years. + if (!hasYear) { - var timexComponent = unitToTimexComponents[unitList[i]]; - - // The Time Duration component occurs first time, - if (!isTimeDurationAlreadyExist && IsTimeDurationTimex(timexComponent)) - { - timexBuilder.Append($"{Constants.TimeTimexPrefix}{GetDurationTimexWithoutPrefix(timexComponent)}"); - isTimeDurationAlreadyExist = true; - } - else - { - timexBuilder.Append($"{GetDurationTimexWithoutPrefix(timexComponent)}"); - } + return GenerateDatePeriodTimex(begin, end, timexType, UnspecificDateTimeTerms.NonspecificYear); } - return timexBuilder.ToString(); - } - - public static string GenerateDatePeriodTimex(DateObject begin, DateObject end, DatePeriodTimexType timexType, DateObject alternativeBegin = default(DateObject), DateObject alternativeEnd = default(DateObject)) - { var equalDurationLength = (end - begin) == (alternativeEnd - alternativeBegin); if (alternativeBegin.IsDefaultValue() || alternativeEnd.IsDefaultValue()) @@ -59,30 +78,50 @@ public static string GenerateCompoundDurationTimex(Dictionary un equalDurationLength = true; } - var unitCount = "XX"; + var unitCount = equalDurationLength ? GetDatePeriodTimexUnitCount(begin, end, timexType) : "XX"; - if (equalDurationLength) + var datePeriodTimex = $"P{unitCount}{DatePeriodTimexTypeToTimexSuffix[timexType]}"; + + return $"({DateTimeFormatUtil.LuisDate(begin, alternativeBegin)},{DateTimeFormatUtil.LuisDate(end, alternativeEnd)},{datePeriodTimex})"; + } + + public static string GenerateDatePeriodTimex(DateObject begin, DateObject end, DatePeriodTimexType timexType, UnspecificDateTimeTerms terms) + { + var beginYear = begin.Year; + var endYear = end.Year; + var beginMonth = begin.Month; + var endMonth = end.Month; + var beginDay = begin.Day; + var endDay = end.Day; + + if ((terms & UnspecificDateTimeTerms.NonspecificYear) != 0) { - switch (timexType) - { - case DatePeriodTimexType.ByDay: - unitCount = (end - begin).TotalDays.ToString(CultureInfo.InvariantCulture); - break; - case DatePeriodTimexType.ByWeek: - unitCount = ((end - begin).TotalDays / 7).ToString(CultureInfo.InvariantCulture); - break; - case DatePeriodTimexType.ByMonth: - unitCount = (((end.Year - begin.Year) * 12) + (end.Month - begin.Month)).ToString(CultureInfo.InvariantCulture); - break; - default: - unitCount = ((end.Year - begin.Year) + ((end.Month - begin.Month) / 12.0)).ToString(CultureInfo.InvariantCulture); - break; - } + beginYear = endYear = -1; + } + + if ((terms & UnspecificDateTimeTerms.NonspecificMonth) != 0) + { + beginMonth = endMonth = -1; + } + + if ((terms & UnspecificDateTimeTerms.NonspecificDay) != 0) + { + beginDay = endDay = -1; } + var unitCount = GetDatePeriodTimexUnitCount(begin, end, timexType); + var datePeriodTimex = $"P{unitCount}{DatePeriodTimexTypeToTimexSuffix[timexType]}"; - return $"({DateTimeFormatUtil.LuisDate(begin, alternativeBegin)},{DateTimeFormatUtil.LuisDate(end, alternativeEnd)},{datePeriodTimex})"; + return $"({DateTimeFormatUtil.LuisDate(beginYear, beginMonth, beginDay)},{DateTimeFormatUtil.LuisDate(endYear, endMonth, endDay)},{datePeriodTimex})"; + } + + public static string GenerateDatePeriodTimex(DateObject begin, DateObject end, DatePeriodTimexType timexType, string timex1, string timex2) + { + var boundaryValid = !begin.IsDefaultValue() && !end.IsDefaultValue(); + var unitCount = boundaryValid ? GetDatePeriodTimexUnitCount(begin, end, timexType) : "X"; + var datePeriodTimex = $"P{unitCount}{DatePeriodTimexTypeToTimexSuffix[timexType]}"; + return $"({timex1},{timex2},{datePeriodTimex})"; } public static string GenerateWeekTimex(DateObject monday = default(DateObject)) @@ -109,6 +148,11 @@ public static string GenerateCompoundDurationTimex(Dictionary un } } + public static string GenerateWeekdayTimex(int weekday) + { + return $"{Constants.TimexFuzzyYear}{Constants.DateTimexConnector}{Constants.TimexFuzzyWeek}{Constants.DateTimexConnector}{weekday}"; + } + public static string GenerateMonthTimex(DateObject date = default(DateObject)) { if (date.IsDefaultValue()) @@ -133,9 +177,41 @@ public static string GenerateYearTimex(int year, string specialYearPrefixes = nu return specialYearPrefixes == null ? yearTimex : specialYearPrefixes + yearTimex; } + public static string GenerateDatePeriodTimexWithDiff(DateObject beginDate, ref DateObject endDate, string durationUnit) + { + var diff = 0; + switch (durationUnit) + { + case Constants.TimexWeek: + diff = Constants.WeekDayCount - (beginDate.DayOfWeek == 0 ? Constants.WeekDayCount : (int)beginDate.DayOfWeek); + endDate = beginDate.AddDays(diff); + break; + + case Constants.TimexMonthFull: + endDate = DateObject.MinValue.SafeCreateFromValue(beginDate.Year, beginDate.Month, 1); + endDate = endDate.AddMonths(1).AddDays(-1); + diff = endDate.Day - beginDate.Day + 1; + break; + + case Constants.TimexYear: + endDate = DateObject.MinValue.SafeCreateFromValue(beginDate.Year, 12, 1); + endDate = endDate.AddMonths(1).AddDays(-1); + diff = endDate.DayOfYear - beginDate.DayOfYear + 1; + break; + } + + var durationTimex = Constants.GeneralPeriodPrefix + diff + Constants.TimexDay; + return $"({DateTimeFormatUtil.LuisDate(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)},{durationTimex})"; + } + + public static string GenerateDatePeriodTimexWithDuration(DateObject beginDate, DateObject endDate, string durationTimex) + { + return $"({DateTimeFormatUtil.LuisDate(beginDate)},{DateTimeFormatUtil.LuisDate(endDate)},{durationTimex})"; + } + public static string GenerateDurationTimex(double number, string unitStr, bool isLessThanDay) { - if (!Constants.TimexBusinessDay.Equals(unitStr)) + if (!Constants.TimexBusinessDay.Equals(unitStr, StringComparison.Ordinal)) { switch (unitStr) { @@ -147,6 +223,9 @@ public static string GenerateDurationTimex(double number, string unitStr, bool i number = number * 2; unitStr = Constants.TimexWeek; break; + case Constants.WEEKEND_UNIT: + unitStr = Constants.TimexWeekend; + break; default: unitStr = unitStr.Substring(0, 1); break; @@ -158,6 +237,22 @@ public static string GenerateDurationTimex(double number, string unitStr, bool i number.ToString(CultureInfo.InvariantCulture) + unitStr; } + public static string GenerateDurationTimex(DateObject beginDateTime, DateObject endDateTime) + { + var duration = endDateTime - beginDateTime; + var days = duration.Days; + var hours = duration.Hours; + var mins = duration.Minutes; + var secs = duration.Seconds; + + return Constants.GeneralPeriodPrefix + + (days > 0 ? days.ToString(CultureInfo.InvariantCulture) + Constants.TimexDay : string.Empty) + + (hours > 0 || mins > 0 || secs > 0 ? Constants.TimeTimexPrefix : string.Empty) + + (hours > 0 ? hours.ToString(CultureInfo.InvariantCulture) + Constants.TimexHour : string.Empty) + + (mins > 0 ? mins.ToString(CultureInfo.InvariantCulture) + Constants.TimexMinute : string.Empty) + + (secs > 0 ? secs.ToString(CultureInfo.InvariantCulture) + Constants.TimexSecond : string.Empty); + } + public static DatePeriodTimexType GetDatePeriodTimexType(string durationTimex) { DatePeriodTimexType result; @@ -209,71 +304,106 @@ public static DateObject OffsetDateObject(DateObject date, int offset, DatePerio return result; } - public static TimeOfDayResolutionResult ParseTimeOfDay(string tod) + public static string MergeTimexAlternatives(string timex1, string timex2) + { + if (timex1.Equals(timex2, StringComparison.Ordinal)) + { + return timex1; + } + + return $"{timex1}{Constants.CompositeTimexDelimiter}{timex2}"; + } + + public static void ProcessDoubleTimex(Dictionary resolutionDic, string futureKey, string pastKey, string originTimex) + { + string[] timexes = originTimex.Split(Constants.CompositeTimexDelimiter); + + if (!resolutionDic.ContainsKey(futureKey) || !resolutionDic.ContainsKey(pastKey) || timexes.Length != 2) + { + return; + } + + var futureResolution = (Dictionary)resolutionDic[futureKey]; + var pastResolution = (Dictionary)resolutionDic[pastKey]; + futureResolution[DateTimeResolutionKey.Timex] = timexes[0]; + pastResolution[DateTimeResolutionKey.Timex] = timexes[1]; + } + + public static bool HasDoubleTimex(string comment) + { + return comment.Equals(Constants.Comment_DoubleTimex, StringComparison.Ordinal); + } + + public static TimeOfDayResolutionResult ResolveTimeOfDay(string tod) { var result = new TimeOfDayResolutionResult(); switch (tod) { case Constants.EarlyMorning: result.Timex = Constants.EarlyMorning; - result.BeginHour = 4; - result.EndHour = 8; + result.BeginHour = Constants.EarlyMorningBeginHour; + result.EndHour = Constants.EarlyMorningEndHour; break; case Constants.Morning: result.Timex = Constants.Morning; - result.BeginHour = 8; - result.EndHour = 12; + result.BeginHour = Constants.MorningBeginHour; + result.EndHour = Constants.MorningEndHour; break; case Constants.MidDay: result.Timex = Constants.MidDay; - result.BeginHour = 11; - result.EndHour = 13; + result.BeginHour = Constants.MidDayBeginHour; + result.EndHour = Constants.MidDayEndHour; break; case Constants.Afternoon: result.Timex = Constants.Afternoon; - result.BeginHour = 12; - result.EndHour = 16; + result.BeginHour = Constants.AfternoonBeginHour; + result.EndHour = Constants.AfternoonEndHour; break; case Constants.Evening: result.Timex = Constants.Evening; - result.BeginHour = 16; - result.EndHour = 20; + result.BeginHour = Constants.EveningBeginHour; + result.EndHour = Constants.EveningEndHour; break; case Constants.Daytime: result.Timex = Constants.Daytime; - result.BeginHour = 8; - result.EndHour = 18; + result.BeginHour = Constants.DaytimeBeginHour; + result.EndHour = Constants.DaytimeEndHour; + break; + case Constants.Nighttime: + result.Timex = Constants.Nighttime; + result.BeginHour = Constants.NighttimeBeginHour; + result.EndHour = Constants.NighttimeEndHour; break; case Constants.BusinessHour: result.Timex = Constants.BusinessHour; - result.BeginHour = 8; - result.EndHour = 18; + result.BeginHour = Constants.BusinessBeginHour; + result.EndHour = Constants.BusinessEndHour; break; case Constants.Night: result.Timex = Constants.Night; - result.BeginHour = 20; - result.EndHour = 23; - result.EndMin = 59; + result.BeginHour = Constants.NightBeginHour; + result.EndHour = Constants.NightEndHour; + result.EndMin = Constants.NightEndMin; break; case Constants.MealtimeBreakfast: result.Timex = Constants.MealtimeBreakfast; - result.BeginHour = 8; - result.EndHour = 12; + result.BeginHour = Constants.MealtimeBreakfastBeginHour; + result.EndHour = Constants.MealtimeBreakfastEndHour; break; case Constants.MealtimeBrunch: result.Timex = Constants.MealtimeBrunch; - result.BeginHour = 8; - result.EndHour = 12; + result.BeginHour = Constants.MealtimeBrunchBeginHour; + result.EndHour = Constants.MealtimeBrunchEndHour; break; case Constants.MealtimeLunch: result.Timex = Constants.MealtimeLunch; - result.BeginHour = 11; - result.EndHour = 13; + result.BeginHour = Constants.MealtimeLunchBeginHour; + result.EndHour = Constants.MealtimeLunchEndHour; break; case Constants.MealtimeDinner: result.Timex = Constants.MealtimeDinner; - result.BeginHour = 16; - result.EndHour = 20; + result.BeginHour = Constants.MealtimeDinnerBeginHour; + result.EndHour = Constants.MealtimeDinnerEndHour; break; default: break; @@ -287,6 +417,58 @@ public static string CombineDateAndTimeTimex(string dateTimex, string timeTimex) return $"{dateTimex}{timeTimex}"; } + public static string GenerateEndInclusiveTimex(string originalTimex, DatePeriodTimexType datePeriodTimexType, + DateObject startDate, DateObject endDate) + { + + var timexEndInclusive = GenerateDatePeriodTimex(startDate, endDate, datePeriodTimexType); + + // Sometimes the original timex contains fuzzy part like "XXXX-05-31" + // The fuzzy part needs to stay the same in the new end-inclusive timex + if (originalTimex.Contains(Constants.TimexFuzzy) && originalTimex.Length == timexEndInclusive.Length) + { + var timexCharSet = new char[timexEndInclusive.Length]; + + for (int i = 0; i < originalTimex.Length; i++) + { + if (originalTimex[i] != Constants.TimexFuzzy) + { + timexCharSet[i] = timexEndInclusive[i]; + } + else + { + timexCharSet[i] = Constants.TimexFuzzy; + } + } + + timexEndInclusive = new string(timexCharSet); + } + + return timexEndInclusive; + } + + public static string GenerateDecadeTimex(int beginYear, int totalLastYear, int decade, bool inputCentury) + { + string beginStr, endStr; + if (inputCentury) + { + beginStr = DateTimeFormatUtil.LuisDate(beginYear, 1, 1); + endStr = DateTimeFormatUtil.LuisDate(beginYear + totalLastYear, 1, 1); + } + else + { + var beginYearStr = Constants.TimexFuzzyTwoDigitYear + decade; + beginStr = DateTimeFormatUtil.LuisDate(-1, 1, 1); + beginStr = beginStr.Replace(Constants.TimexFuzzyYear, beginYearStr); + + var endYearStr = Constants.TimexFuzzyTwoDigitYear + ((decade + totalLastYear) % 100).ToString("D2", CultureInfo.InvariantCulture); + endStr = DateTimeFormatUtil.LuisDate(-1, 1, 1); + endStr = endStr.Replace(Constants.TimexFuzzyYear, endYearStr); + } + + return $"({beginStr},{endStr},{Constants.GeneralPeriodPrefix}{totalLastYear}{Constants.TimexYear})"; + } + public static string GenerateWeekOfYearTimex(int year, int weekNum) { var weekTimex = GenerateWeekTimex(weekNum); @@ -305,7 +487,18 @@ public static string GenerateWeekOfMonthTimex(int year, int month, int weekNum) public static string GenerateWeekTimex(int weekNum) { - return $"W{weekNum.ToString("D2")}"; + return $"W{weekNum.ToString("D2", CultureInfo.InvariantCulture)}"; + } + + public static string CombineDateTimeTimex(string timeTimex1, string dateTimeTimex2, DateObject dateTime1) + { + return dateTimeTimex2.Equals(Constants.TimexNow, StringComparison.Ordinal) ? DateTimeFormatUtil.LuisDateShortTime(dateTime1) : + dateTimeTimex2.Split(Constants.TimeTimexPrefix[0])[0] + timeTimex1; + } + + public static string GenerateDateTimeTimex(DateObject dateTime) + { + return DateTimeFormatUtil.LuisDateTime(dateTime); } public static string GenerateDateTimePeriodTimex(string beginTimex, string endTimex, string durationTimex) @@ -313,6 +506,75 @@ public static string GenerateDateTimePeriodTimex(string beginTimex, string endTi return $"({beginTimex},{endTimex},{durationTimex})"; } + public static string GenerateDateTimePeriodTimex(string beginTimex, string endTimex, TimeSpan duration) + { + var durationTimex = DateTimeFormatUtil.LuisTimeSpan(duration); + return GenerateDateTimePeriodTimex(beginTimex, endTimex, durationTimex); + } + + public static string GenerateDateTimePeriodTimex(DateObject beginDateTime, DateObject endDateTime, string durationTimex) + { + return GenerateDateTimePeriodTimex(DateTimeFormatUtil.LuisDateTime(beginDateTime), + DateTimeFormatUtil.LuisDateTime(endDateTime), durationTimex); + } + + public static string GenerateDateTimePeriodTimex(DateObject beginDateTime, DateObject endDateTime) + { + var durationTimex = GenerateDurationTimex(beginDateTime, endDateTime); + + return GenerateDateTimePeriodTimex(beginDateTime, endDateTime, durationTimex); + } + + public static string GenerateRelativeUnitDateTimePeriodTimex(ref DateObject beginDateTime, ref DateObject endDateTime, DateObject referenceTime, string unitStr, int swift) + { + string prefix = Constants.GeneralPeriodPrefix + Constants.TimeTimexPrefix; + string durationTimex = string.Empty; + switch (unitStr) + { + case Constants.TimexDay: + endDateTime = DateObject.MinValue.SafeCreateFromValue(beginDateTime.Year, beginDateTime.Month, beginDateTime.Day); + endDateTime = endDateTime.AddDays(1).AddSeconds(-1); + durationTimex = prefix + (endDateTime - beginDateTime).TotalSeconds + Constants.TimexSecond; + break; + case Constants.TimexHour: + beginDateTime = swift > 0 ? beginDateTime : referenceTime.AddHours(swift); + endDateTime = swift > 0 ? referenceTime.AddHours(swift) : endDateTime; + durationTimex = prefix + "1" + Constants.TimexHour; + break; + case Constants.TimexMinute: + beginDateTime = swift > 0 ? beginDateTime : referenceTime.AddMinutes(swift); + endDateTime = swift > 0 ? referenceTime.AddMinutes(swift) : endDateTime; + durationTimex = prefix + "1" + Constants.TimexMinute; + break; + case Constants.TimexSecond: + beginDateTime = swift > 0 ? beginDateTime : referenceTime.AddSeconds(swift); + endDateTime = swift > 0 ? referenceTime.AddSeconds(swift) : endDateTime; + durationTimex = prefix + "1" + Constants.TimexSecond; + break; + default: + return string.Empty; + } + + return GenerateDateTimePeriodTimex(beginDateTime, endDateTime, durationTimex); + } + + public static string GenerateSplitDateTimePeriodTimex(string dateTimex, string timeRangeTimex) + { + var split = timeRangeTimex.Split(Constants.TimeTimexPrefix[0]); + string timex = null; + if (split.Length == 4) + { + timex = split[0] + dateTimex + Constants.TimeTimexPrefix + split[1] + dateTimex + + Constants.TimeTimexPrefix + split[2] + Constants.TimeTimexPrefix + split[3]; + } + else if (split.Length == 2) + { + timex = dateTimex + timeRangeTimex; + } + + return timex; + } + public static RangeTimexComponents GetRangeTimexComponents(string rangeTimex) { rangeTimex = rangeTimex.Replace("(", string.Empty).Replace(")", string.Empty); @@ -332,23 +594,84 @@ public static RangeTimexComponents GetRangeTimexComponents(string rangeTimex) public static bool IsRangeTimex(string timex) { - return !string.IsNullOrEmpty(timex) && timex.StartsWith("("); + return !string.IsNullOrEmpty(timex) && timex.StartsWith("(", StringComparison.Ordinal); } public static string SetTimexWithContext(string timex, DateContext context) { - return timex.Replace(Constants.TimexFuzzyYear, context.Year.ToString("D4")); + return timex.Replace(Constants.TimexFuzzyYear, context.Year.ToString("D4", CultureInfo.InvariantCulture)); + } + + public static string GenerateSetTimex(string durationType, float durationLength, float multiplier = 1) + { + return $"P{durationLength * multiplier:0.#}{durationType}"; + } + + public static string ModifyAmbiguousCenturyTimex(string timex) + { + return "XX" + timex.Substring(2); + } + + public static float ParseNumberFromDurationTimex(string timex) + { + var numberStr = timex.Substring(timex.IndexOf(Constants.GeneralPeriodPrefix) + 1, timex.IndexOfAny(Constants.DurationUnitChar) - 1); + return float.Parse(numberStr); + } + + public static int ParseHourFromTimeTimex(string timex) + { + var start = timex.IndexOf(Constants.TimeTimexPrefix) + 1; + var end = timex.IndexOf(Constants.TimeTimexConnector); + end = end > 0 ? end : timex.Length; + var hourStr = timex.Substring(start, end - start); + int.TryParse(hourStr, out int hour); + + return hour; + } + + public static Tuple ParseHoursFromTimePeriodTimex(string timex) + { + int hour1 = 0, hour2 = 0; + var timeList = timex.Split(Constants.TimexSeparator[0]); + if (timeList.Length > 2) + { + hour1 = ParseHourFromTimeTimex(timeList[0]); + hour2 = ParseHourFromTimeTimex(timeList[1]); + } + + return new Tuple(hour1, hour2); } private static bool IsTimeDurationTimex(string timex) { - return timex.StartsWith($"{Constants.GeneralPeriodPrefix}{Constants.TimeTimexPrefix}"); + return timex.StartsWith($"{Constants.GeneralPeriodPrefix}{Constants.TimeTimexPrefix}", StringComparison.Ordinal); } - private static string GetDurationTimexWithoutPrefix(string timex) + private static string GetDatePeriodTimexUnitCount(DateObject begin, DateObject end, DatePeriodTimexType timexType) { - // Remove "PT" prefix for TimeDuration, Remove "P" prefix for DateDuration - return timex.Substring(IsTimeDurationTimex(timex) ? 2 : 1); + string unitCount; + + switch (timexType) + { + case DatePeriodTimexType.ByDay: + unitCount = (end - begin).TotalDays.ToString(CultureInfo.InvariantCulture); + break; + case DatePeriodTimexType.ByWeek: + unitCount = ((end - begin).TotalDays / 7).ToString(CultureInfo.InvariantCulture); + break; + case DatePeriodTimexType.ByFortnight: + unitCount = ((end - begin).TotalDays / 7).ToString(CultureInfo.InvariantCulture); + break; + case DatePeriodTimexType.ByMonth: + unitCount = (((end.Year - begin.Year) * 12) + (end.Month - begin.Month)).ToString(CultureInfo.InvariantCulture); + break; + default: + unitCount = ((end.Year - begin.Year) + ((end.Month - begin.Month) / 12.0)).ToString(CultureInfo.InvariantCulture); + break; + } + + return unitCount; } + } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/Token.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/Token.cs index 861586e776..169b796686 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/Token.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/Token.cs @@ -1,10 +1,15 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.InternalCache; + namespace Microsoft.Recognizers.Text.DateTime { - public class Token + public class Token : ICloneableType { public Token(int s, int e, Metadata metadata = null) @@ -56,7 +61,9 @@ public static List MergeAllTokens(List tokens, string text var ret = new List(); tokens = tokens.OrderBy(s => s.Start).ThenByDescending(s => s.Length).ToList(); + var mergedTokens = new List(); + foreach (var token in tokens) { if (token != null) @@ -95,8 +102,8 @@ public static List MergeAllTokens(List tokens, string text { var start = token.Start; var length = token.Length; - var substr = text.Substring(start, length); - + var substr = text.Substring(start, length); + var er = new ExtractResult { Start = start, @@ -112,5 +119,10 @@ public static List MergeAllTokens(List tokens, string text return ret; } + + public Token Clone() + { + return (Token)MemberwiseClone(); + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/CardinalExtractor.cs new file mode 100644 index 0000000000..d651e4d7e7 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/CardinalExtractor.cs @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.Number.Arabic +{ + public class CardinalExtractor : BaseNumberExtractor + { + private static readonly ConcurrentDictionary Instances = + new ConcurrentDictionary(); + + private readonly string keyPrefix; + + private CardinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) + { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + + var builder = ImmutableDictionary.CreateBuilder(); + + // Add Integer Regexes + var intExtract = IntegerExtractor.GetInstance(config); + builder.AddRange(intExtract.Regexes); + + // Add Double Regexes + var douExtract = DoubleExtractor.GetInstance(config); + builder.AddRange(douExtract.Regexes); + + Regexes = builder.ToImmutable(); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUM_CARDINAL; // "Cardinal"; + + public static CardinalExtractor GetInstance(BaseNumberOptionsConfiguration config) + { + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) + { + var instance = new CardinalExtractor(config); + Instances.TryAdd(extractorKey, instance); + } + + return Instances[extractorKey]; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/DoubleExtractor.cs new file mode 100644 index 0000000000..67a62c0822 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/DoubleExtractor.cs @@ -0,0 +1,92 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.Number.Arabic +{ + public class DoubleExtractor : BaseNumberExtractor + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + private static readonly ConcurrentDictionary Instances = + new ConcurrentDictionary(); + + private DoubleExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) + { + var regexes = new Dictionary + { + { + new Regex(NumbersDefinitions.DoubleDecimalPointRegex(config.Placeholder), RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(config.Placeholder), RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.ARABIC) + }, + { + new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) + }, + { + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumCommaDot, config.Placeholder, RegexFlags), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumBlankDot, config.Placeholder, RegexFlags), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceDot, config.Placeholder, RegexFlags), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.DoubleWithThousandMarkRegex(config.Placeholder), RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + }; + + Regexes = regexes.ToImmutableDictionary(); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUM_DOUBLE; // "Double"; + + public static DoubleExtractor GetInstance(BaseNumberOptionsConfiguration config) + { + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) + { + var instance = new DoubleExtractor(config); + Instances.TryAdd(extractorKey, instance); + } + + return Instances[extractorKey]; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/FractionExtractor.cs new file mode 100644 index 0000000000..6f8a7c4cda --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/FractionExtractor.cs @@ -0,0 +1,94 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.Number.Arabic +{ + public class FractionExtractor : BaseNumberExtractor + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor> Instances = + new ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor>(); + + private FractionExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) + { + + var regexes = new Dictionary + { + { + new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex2, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ARABIC) + }, + { + new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ARABIC) + }, + { + new Regex(NumbersDefinitions.FractionWithOrdinalPrefix, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ARABIC) + }, + { + new Regex(NumbersDefinitions.FractionWithPartOfPrefix, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ARABIC) + }, + }; + + // Not add FractionPrepositionRegex when the mode is Unit to avoid wrong recognize cases like "$1000 over 3" + if (config.Mode != NumberMode.Unit) + { + if ((Options & NumberOptions.PercentageMode) != 0) + { + regexes.Add( + new Regex(NumbersDefinitions.FractionPrepositionWithinPercentModeRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ARABIC)); + } + else + { + regexes.Add( + new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ARABIC)); + } + } + + Regexes = regexes.ToImmutableDictionary(); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; + + public static FractionExtractor GetInstance(BaseNumberOptionsConfiguration config) + { + var cacheKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(cacheKey)) + { + var instance = new FractionExtractor(config); + Instances.TryAdd(cacheKey, instance); + } + + return Instances[cacheKey]; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/IntegerExtractor.cs new file mode 100644 index 0000000000..c8daf7eaae --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/IntegerExtractor.cs @@ -0,0 +1,93 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.Number.Arabic +{ + public class IntegerExtractor : BaseNumberExtractor + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + private static readonly ConcurrentDictionary Instances = + new ConcurrentDictionary(); + + private readonly string keyPrefix; + + private IntegerExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) + { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + + var regexes = new Dictionary + { + { + new Regex(NumbersDefinitions.NumbersWithPlaceHolder(config.Placeholder), RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.ARABIC) + }, + { + new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.ARABIC) + }, + { + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, config.Placeholder, RegexFlags), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumInvertedComma, config.Placeholder, RegexFlags), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, config.Placeholder, RegexFlags), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, config.Placeholder, RegexFlags), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + }; + + Regexes = regexes.ToImmutableDictionary(); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUM_INTEGER; // "Integer"; + + public static IntegerExtractor GetInstance(BaseNumberOptionsConfiguration config) + { + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) + { + var instance = new IntegerExtractor(config); + Instances.TryAdd(extractorKey, instance); + } + + return Instances[extractorKey]; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/NumberExtractor.cs new file mode 100644 index 0000000000..a0310af396 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/NumberExtractor.cs @@ -0,0 +1,106 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.Number.Arabic +{ + public class NumberExtractor : BaseNumberExtractor + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor> Instances = + new ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor>(); + + private readonly string keyPrefix; + + private NumberExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) + { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Mode + "_" + config.Culture); + + NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags, RegexTimeOut); + + AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); + + RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags, RegexTimeOut); + + var builder = ImmutableDictionary.CreateBuilder(); + + // Add Cardinal + CardinalExtractor cardExtract = null; + switch (config.Mode) + { + case NumberMode.PureNumber: + var purNumConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options, config.Mode, + NumbersDefinitions.PlaceHolderPureNumber); + cardExtract = CardinalExtractor.GetInstance(purNumConfig); + break; + case NumberMode.Currency: + builder.Add(BaseNumberExtractor.CurrencyRegex, + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)); + break; + case NumberMode.Unit: + break; + case NumberMode.Default: + break; + } + + if (cardExtract == null) + { + cardExtract = CardinalExtractor.GetInstance(config); + } + + builder.AddRange(cardExtract.Regexes); + + // Add Fraction + var fracExtract = FractionExtractor.GetInstance(config); + builder.AddRange(fracExtract.Regexes); + + Regexes = builder.ToImmutable(); + + var ambiguityBuilder = ImmutableDictionary.CreateBuilder(); + + // Do not filter the ambiguous number cases like 'that one' in NumberWithUnit, otherwise they can't be resolved. + if (config.Mode != NumberMode.Unit) + { + foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) + { + ambiguityBuilder.Add(new Regex(item.Key, RegexFlags, RegexTimeOut), new Regex(item.Value, RegexFlags, RegexTimeOut)); + } + } + + AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUM; // "Number"; + + protected sealed override Regex NegativeNumberTermsRegex { get; } + + protected sealed override Regex AmbiguousFractionConnectorsRegex { get; } + + protected sealed override Regex RelativeReferenceRegex { get; } + + public static NumberExtractor GetInstance(BaseNumberOptionsConfiguration config) + { + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) + { + var instance = new NumberExtractor(config); + Instances.TryAdd(extractorKey, instance); + } + + return Instances[extractorKey]; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/NumberRangeExtractor.cs new file mode 100644 index 0000000000..3bb0812d1c --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/NumberRangeExtractor.cs @@ -0,0 +1,112 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.Number.Arabic +{ + public class NumberRangeExtractor : BaseNumberRangeExtractor + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + private readonly BaseNumberExtractor numberExtractor; + + private readonly BaseNumberExtractor ordinalExtractor; + + private readonly BaseNumberParser numberParser; + + public NumberRangeExtractor(INumberOptionsConfiguration config) + : base( + NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + new BaseNumberParser(new ArabicNumberParserConfiguration(config)), + config) + { + + this.numberExtractor = NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)); + this.ordinalExtractor = OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)); + this.numberParser = new BaseNumberParser(new ArabicNumberParserConfiguration(config)); + + var regexes = new Dictionary() + { + { + // between...and... + new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUMBETWEEN + }, + { + // more than ... less than ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUM + }, + { + // less than ... more than ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUM + }, + { + // from ... to/~/- ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUMTILL + }, + { + // more/greater/higher than ... + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // 30 and/or greater/higher + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // فيه خمس مائة وأكثر منتجات + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex3, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // less/smaller/lower than ... + new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + { + // 30 and/or less/smaller/lower + new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + { + // equal to ... + new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags, RegexTimeOut), + NumberRangeConstants.EQUAL + }, + { + // equal to 30 or more than, larger than 30 or equal to ... + new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // equal to 30 or less, smaller than 30 or equal ... + new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + }; + + Regexes = regexes.ToImmutableDictionary(); + + AmbiguousFractionConnectorsRegex = + new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + internal sealed override Regex AmbiguousFractionConnectorsRegex { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUMRANGE; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/OrdinalExtractor.cs new file mode 100644 index 0000000000..bc45618a05 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/OrdinalExtractor.cs @@ -0,0 +1,73 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.Number.Arabic +{ + public class OrdinalExtractor : BaseNumberExtractor + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + private static readonly ConcurrentDictionary Instances = + new ConcurrentDictionary(); + + private readonly string keyPrefix; + + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) + { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); + + AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); + + RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags, RegexTimeOut); + + var regexes = new Dictionary + { + { + new Regex(NumbersDefinitions.OrdinalNumericRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.OrdinalEnglishRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.ARABIC) + }, + { + new Regex(NumbersDefinitions.OrdinalRoundNumberRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.ARABIC) + }, + }; + + Regexes = regexes.ToImmutableDictionary(); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUM_ORDINAL; // "Ordinal"; + + protected sealed override Regex AmbiguousFractionConnectorsRegex { get; } + + protected sealed override Regex RelativeReferenceRegex { get; } + + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) + { + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) + { + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); + } + + return Instances[extractorKey]; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/PercentageExtractor.cs new file mode 100644 index 0000000000..a770aad7cc --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Extractors/PercentageExtractor.cs @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.Number.Arabic +{ + public sealed class PercentageExtractor : BasePercentageExtractor + { + public PercentageExtractor(BaseNumberOptionsConfiguration config) + : base(NumberExtractor.GetInstance(config)) + { + Options = config.Options; + Regexes = InitRegexes(); + } + + protected override NumberOptions Options { get; } + + protected override ImmutableHashSet InitRegexes() + { + HashSet regexStrings = new HashSet + { + NumbersDefinitions.NumberWithSuffixPercentage, + NumbersDefinitions.NumberWithPrefixPercentage, + }; + + if ((Options & NumberOptions.PercentageMode) != 0) + { + regexStrings.Add(NumbersDefinitions.FractionNumberWithSuffixPercentage); + regexStrings.Add(NumbersDefinitions.NumberWithPrepositionPercentage); + } + + return BuildRegexes(regexStrings); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Arabic/Parsers/ArabicNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Parsers/ArabicNumberParserConfiguration.cs new file mode 100644 index 0000000000..6c854a23b3 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Parsers/ArabicNumberParserConfiguration.cs @@ -0,0 +1,81 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Globalization; +using System.Linq; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.Number.Arabic +{ + public class ArabicNumberParserConfiguration : BaseNumberParserConfiguration + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.RightToLeft; + + public ArabicNumberParserConfiguration(INumberOptionsConfiguration config) + { + + this.Config = config; + this.LanguageMarker = NumbersDefinitions.LangMarker; + + // @TODO Temporary workaround + var culture = config.Culture; + if (culture.IndexOf("*", StringComparison.Ordinal) != -1) + { + culture = config.Culture.Replace("*", "sa"); + } + + this.CultureInfo = new CultureInfo(culture); + this.CultureInfo.NumberFormat.NegativeSign = "-"; + this.CultureInfo.NumberFormat.NumberDecimalSeparator = "."; + + this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; + this.IsMultiDecimalSeparatorCulture = NumbersDefinitions.MultiDecimalSeparatorCulture; + + this.DecimalSeparatorChar = NumbersDefinitions.DecimalSeparatorChar; + this.FractionMarkerToken = NumbersDefinitions.FractionMarkerToken; + this.NonDecimalSeparatorChar = NumbersDefinitions.NonDecimalSeparatorChar; + this.HalfADozenText = NumbersDefinitions.HalfADozenText; + this.WordSeparatorToken = NumbersDefinitions.WordSeparatorToken; + + this.WrittenDecimalSeparatorTexts = NumbersDefinitions.WrittenDecimalSeparatorTexts; + this.WrittenGroupSeparatorTexts = NumbersDefinitions.WrittenGroupSeparatorTexts; + this.WrittenIntegerSeparatorTexts = NumbersDefinitions.WrittenIntegerSeparatorTexts; + this.WrittenFractionSeparatorTexts = NumbersDefinitions.WrittenFractionSeparatorTexts; + + this.CardinalNumberMap = NumbersDefinitions.CardinalNumberMap.ToImmutableDictionary(); + this.OrdinalNumberMap = NumbersDefinitions.OrdinalNumberMap.ToImmutableDictionary(); + this.RelativeReferenceOffsetMap = NumbersDefinitions.RelativeReferenceOffsetMap.ToImmutableDictionary(); + this.RelativeReferenceRelativeToMap = NumbersDefinitions.RelativeReferenceRelativeToMap.ToImmutableDictionary(); + this.RoundNumberMap = NumbersDefinitions.RoundNumberMap.ToImmutableDictionary(); + + this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags, RegexTimeOut); + this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags, RegexTimeOut); + this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags, RegexTimeOut); + this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut); + } + + public string NonDecimalSeparatorText { get; private set; } + + public override (bool isRelevant, double value) GetLangSpecificIntValue(List matchStrs) + { + var result = NotApplicable; + + // @TODO "و" should be moved to Arabic YAML file. + + // Workaround to solve "و" which means "and" before rounded number in Arabic. + // ألف و مائة = one thousand and one hundred + // But in Arabic there is no integer before hundred, because it's 100 by default. + if (matchStrs.Count == 1 && matchStrs.First() == "و") + { + result = (true, 1); + } + + return result; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/Arabic/Parsers/ArabicNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Parsers/ArabicNumberRangeParserConfiguration.cs new file mode 100644 index 0000000000..370b89dc4c --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Arabic/Parsers/ArabicNumberRangeParserConfiguration.cs @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Arabic; + +namespace Microsoft.Recognizers.Text.Number.Arabic +{ + public class ArabicNumberRangeParserConfiguration : BaseNumberRangeParserConfiguration + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public ArabicNumberRangeParserConfiguration(INumberOptionsConfiguration config) + { + if (config.Culture == "ar-*") + { + CultureInfo = new CultureInfo("ar"); + } + else + { + CultureInfo = new CultureInfo(config.Culture); + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options); + + NumberExtractor = Arabic.NumberExtractor.GetInstance(numConfig); + OrdinalExtractor = Arabic.OrdinalExtractor.GetInstance(numConfig); + NumberParser = new BaseNumberParser(new ArabicNumberParserConfiguration(config)); + + MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags, RegexTimeOut); + LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags, RegexTimeOut); + MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags, RegexTimeOut); + LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags, RegexTimeOut); + MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut); + LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/CardinalExtractor.cs index 9567bc8781..8e534125ce 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/CardinalExtractor.cs @@ -1,21 +1,27 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Number.Config; namespace Microsoft.Recognizers.Text.Number.Chinese { + public class CardinalExtractor : BaseNumberExtractor { + // CardinalExtractor = Int + Double - public CardinalExtractor(CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) + public CardinalExtractor(BaseNumberOptionsConfiguration config, CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { + var builder = ImmutableDictionary.CreateBuilder(); - var intExtractChs = new IntegerExtractor(mode); + var intExtractChs = new IntegerExtractor(config, mode); builder.AddRange(intExtractChs.Regexes); - var douExtractorChs = new DoubleExtractor(); + var douExtractorChs = new DoubleExtractor(config); builder.AddRange(douExtractorChs.Regexes); Regexes = builder.ToImmutable(); diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/DoubleExtractor.cs index 80bdc35102..a815b15dc1 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/DoubleExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -11,47 +14,47 @@ public class DoubleExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public DoubleExtractor() + public DoubleExtractor(BaseNumberOptionsConfiguration config) { var regexes = new Dictionary { { - new Regex(NumbersDefinitions.DoubleSpecialsChars, RegexFlags), + new Regex(NumbersDefinitions.DoubleSpecialsChars, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // (-)2.5, can avoid cases like ip address xx.xx.xx.xx - new Regex(NumbersDefinitions.DoubleSpecialsCharsWithNegatives, RegexFlags), + new Regex(NumbersDefinitions.DoubleSpecialsCharsWithNegatives, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // (-).2 - new Regex(NumbersDefinitions.SimpleDoubleSpecialsChars, RegexFlags), + new Regex(NumbersDefinitions.SimpleDoubleSpecialsChars, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // 1.0 K - new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // 15.2万 - new Regex(NumbersDefinitions.DoubleWithThousandsRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithThousandsRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.CHINESE) }, { // 四十五点三三 - new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.CHINESE) }, { // 2e6, 21.2e0 - new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { // 2^5 - new Regex(NumbersDefinitions.DoubleScientificNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleScientificNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, }; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/FractionExtractor.cs index 003d62f206..a5926e0718 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/FractionExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -11,23 +14,23 @@ public class FractionExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public FractionExtractor() + public FractionExtractor(BaseNumberOptionsConfiguration config) { var regexes = new Dictionary { { // -4 5/2, 4 6/3 - new Regex(NumbersDefinitions.FractionNotationSpecialsCharsRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationSpecialsCharsRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { // 8/3 - new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { // 四分之六十五 - new Regex(NumbersDefinitions.AllFractionNumber, RegexFlags), + new Regex(NumbersDefinitions.AllFractionNumber, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.CHINESE) }, }; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/IntegerExtractor.cs index 748122bfc8..dabf8c7d6c 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/IntegerExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -11,38 +14,38 @@ public class IntegerExtractor : BaseNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public IntegerExtractor(CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) + public IntegerExtractor(BaseNumberOptionsConfiguration config, CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { var regexes = new Dictionary() { { // 123456, -123456 - new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexFlags), + new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 15k, 16 G - new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 1,234, 2,332,111 - new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexFlags), + new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 半百 半打 - new Regex(NumbersDefinitions.NumbersWithHalfDozen, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithHalfDozen, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE) }, { // 半 - new Regex(NumbersDefinitions.HalfUnitRegex, RegexFlags), + new Regex(NumbersDefinitions.HalfUnitRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE) }, { // 一打 五十打 - new Regex(NumbersDefinitions.NumbersWithDozen, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithDozen, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.CHINESE) }, }; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberExtractor.cs index b0daf200e2..314633cc1c 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberExtractor.cs @@ -1,5 +1,9 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Chinese; using Microsoft.Recognizers.Text.Number.Config; @@ -8,23 +12,40 @@ namespace Microsoft.Recognizers.Text.Number.Chinese public class NumberExtractor : BaseNumberExtractor { - public NumberExtractor(CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public NumberExtractor(BaseNumberOptionsConfiguration config, CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { var builder = ImmutableDictionary.CreateBuilder(); // Add Cardinal - var cardExtractChs = new CardinalExtractor(mode); + var cardExtractChs = new CardinalExtractor(config, mode); builder.AddRange(cardExtractChs.Regexes); // Add Fraction - var fracExtractChs = new FractionExtractor(); + var fracExtractChs = new FractionExtractor(config); builder.AddRange(fracExtractChs.Regexes); Regexes = builder.ToImmutable(); + + var ambiguityBuilder = ImmutableDictionary.CreateBuilder(); + + // Do not filter the ambiguous number cases like 'that one' in NumberWithUnit, otherwise they can't be resolved. + if (config.Mode != NumberMode.Unit) + { + foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) + { + ambiguityBuilder.Add(new Regex(item.Key, RegexFlags, RegexTimeOut), new Regex(item.Value, RegexFlags, RegexTimeOut)); + } + } + + AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); } internal sealed override ImmutableDictionary Regexes { get; } protected sealed override string ExtractType { get; } = Constants.SYS_NUM; + + protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberRangeExtractor.cs index fd5993e77d..645a276f1a 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberRangeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberRangeExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; @@ -8,63 +11,66 @@ namespace Microsoft.Recognizers.Text.Number.Chinese public class NumberRangeExtractor : BaseNumberRangeExtractor { public NumberRangeExtractor(INumberOptionsConfiguration config) - : base(new NumberExtractor(), new OrdinalExtractor(), new BaseCJKNumberParser(new ChineseNumberParserConfiguration(config)), config) + : base(new NumberExtractor(new BaseNumberOptionsConfiguration(config)), + new OrdinalExtractor(new BaseNumberOptionsConfiguration(config)), + new BaseCJKNumberParser(new ChineseNumberParserConfiguration(config)), + config) { var regexes = new Dictionary() { { // 在...和...之间 - new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexOptions.Singleline), + new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexOptions.Singleline, RegexTimeOut), NumberRangeConstants.TWONUMBETWEEN }, { // 大于...小于... - new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexOptions.Singleline), + new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexOptions.Singleline, RegexTimeOut), NumberRangeConstants.TWONUM }, { // 小于...大于... - new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexOptions.Singleline), + new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexOptions.Singleline, RegexTimeOut), NumberRangeConstants.TWONUM }, { // ...到/至..., 20~30 - new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexOptions.Singleline), + new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexOptions.Singleline, RegexTimeOut), NumberRangeConstants.TWONUMTILL }, { // 大于/多于/高于... - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexOptions.Singleline), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexOptions.Singleline, RegexTimeOut), NumberRangeConstants.MORE }, { // 比...大/高/多 - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexOptions.Singleline), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexOptions.Singleline, RegexTimeOut), NumberRangeConstants.MORE }, { // ...多/以上/之上 - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex3, RegexOptions.Singleline), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex3, RegexOptions.Singleline, RegexTimeOut), NumberRangeConstants.MORE }, { // 小于/少于/低于... - new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexOptions.Singleline), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexOptions.Singleline, RegexTimeOut), NumberRangeConstants.LESS }, { // 比...小/低/少 - new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexOptions.Singleline), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexOptions.Singleline, RegexTimeOut), NumberRangeConstants.LESS }, { // .../以下/之下 - new Regex(NumbersDefinitions.OneNumberRangeLessRegex3, RegexOptions.Singleline), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex3, RegexOptions.Singleline, RegexTimeOut), NumberRangeConstants.LESS }, { // 等于... - new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexOptions.Singleline), + new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexOptions.Singleline, RegexTimeOut), NumberRangeConstants.EQUAL }, }; @@ -72,7 +78,7 @@ public NumberRangeExtractor(INumberOptionsConfiguration config) Regexes = regexes.ToImmutableDictionary(); AmbiguousFractionConnectorsRegex = - new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexOptions.Singleline); + new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexOptions.Singleline, RegexTimeOut); } internal sealed override ImmutableDictionary Regexes { get; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/OrdinalExtractor.cs index 28eaa3f92a..eab57f0620 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/OrdinalExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -11,18 +14,18 @@ public class OrdinalExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public OrdinalExtractor() + public OrdinalExtractor(BaseNumberOptionsConfiguration config) { var regexes = new Dictionary { { // 第一百五十四 - new Regex(NumbersDefinitions.OrdinalRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.CHINESE) }, { // 第2565, 第1234 - new Regex(NumbersDefinitions.OrdinalNumbersRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalNumbersRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.CHINESE) }, }; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/PercentageExtractor.cs index 191d6f038d..bcd0228133 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/PercentageExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -11,118 +14,118 @@ public class PercentageExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public PercentageExtractor() + public PercentageExtractor(BaseNumberOptionsConfiguration config) { var regexes = new Dictionary { { // 二十个百分点, 四点五个百分点 - new Regex(NumbersDefinitions.PercentagePointRegex, RegexFlags), + new Regex(NumbersDefinitions.PercentagePointRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.CHINESE) }, { // 百分之五十 百分之一点五 - new Regex(NumbersDefinitions.SimplePercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.SimplePercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.CHINESE) }, { // 百分之56.2 百分之12 - new Regex(NumbersDefinitions.NumbersPercentagePointRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersPercentagePointRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 百分之3,000 百分之1,123 - new Regex(NumbersDefinitions.NumbersPercentageWithSeparatorRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersPercentageWithSeparatorRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 百分之3.2 k - new Regex(NumbersDefinitions.NumbersPercentageWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersPercentageWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 12.56个百分点 0.4个百分点 - new Regex(NumbersDefinitions.FractionPercentagePointRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionPercentagePointRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 15,123个百分点 111,111个百分点 - new Regex(NumbersDefinitions.FractionPercentageWithSeparatorRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionPercentageWithSeparatorRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 12.1k个百分点 15.1k个百分点 - new Regex(NumbersDefinitions.FractionPercentageWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionPercentageWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 百分之22 百分之120 - new Regex(NumbersDefinitions.SimpleNumbersPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.SimpleNumbersPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 百分之15k - new Regex(NumbersDefinitions.SimpleNumbersPercentageWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.SimpleNumbersPercentageWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 百分之1,111 百分之9,999 - new Regex(NumbersDefinitions.SimpleNumbersPercentagePointRegex, RegexFlags), + new Regex(NumbersDefinitions.SimpleNumbersPercentagePointRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 12个百分点 - new Regex(NumbersDefinitions.IntegerPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.IntegerPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 12k个百分点 - new Regex(NumbersDefinitions.IntegerPercentageWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.IntegerPercentageWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 2,123个百分点 - new Regex(NumbersDefinitions.NumbersFractionPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersFractionPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 32.5% - new Regex(NumbersDefinitions.SimpleIntegerPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.SimpleIntegerPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 2折 2.5折 - new Regex(NumbersDefinitions.NumbersFoldsPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersFoldsPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.SPECIAL_SUFFIX) }, { // 三折 六点五折 七五折 - new Regex(NumbersDefinitions.FoldsPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.FoldsPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.SPECIAL_SUFFIX) }, { // 5成 6成半 6成4 - new Regex(NumbersDefinitions.SimpleFoldsPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.SimpleFoldsPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.SPECIAL_SUFFIX) }, { // 七成半 七成五 - new Regex(NumbersDefinitions.SpecialsPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.SpecialsPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.SPECIAL_SUFFIX) }, { // 2成 2.5成 - new Regex(NumbersDefinitions.NumbersSpecialsPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersSpecialsPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.SPECIAL_SUFFIX) }, { // 三成 六点五成 - new Regex(NumbersDefinitions.SimpleSpecialsPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.SimpleSpecialsPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.SPECIAL_SUFFIX) }, { // 打对折 半成 - new Regex(NumbersDefinitions.SpecialsFoldsPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.SpecialsFoldsPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.SPECIAL_SUFFIX) }, }; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberParserConfiguration.cs index fa55e8af90..e3fe16e8bf 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; using System.Linq; @@ -16,7 +20,7 @@ public class ChineseNumberParserConfiguration : BaseNumberParserConfiguration, I public ChineseNumberParserConfiguration(INumberOptionsConfiguration config) { - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.Config = config; @@ -45,24 +49,25 @@ public ChineseNumberParserConfiguration(INumberOptionsConfiguration config) this.RoundNumberMapChar = NumbersDefinitions.RoundNumberMapChar.ToImmutableDictionary(); this.FullToHalfMap = NumbersDefinitions.FullToHalfMap.ToImmutableDictionary(); this.TratoSimMap = NumbersDefinitions.TratoSimMap.ToImmutableDictionary(); - this.UnitMap = NumbersDefinitions.UnitMap.ToImmutableDictionary(); + this.UnitMap = NumbersDefinitions.UnitMap.ToImmutableSortedDictionary(new StringLengthComparer()); this.RoundDirectList = NumbersDefinitions.RoundDirectList.ToImmutableList(); this.TenChars = NumbersDefinitions.TenChars.ToImmutableList(); this.HalfADozenRegex = null; // @TODO Change init to follow design in other languages - this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags); - this.DigitNumRegex = new Regex(NumbersDefinitions.DigitNumRegex, RegexFlags); - this.DozenRegex = new Regex(NumbersDefinitions.DozenRegex, RegexFlags); - this.PercentageRegex = new Regex(NumbersDefinitions.PercentageRegex, RegexFlags); - this.DoubleAndRoundRegex = new Regex(NumbersDefinitions.DoubleAndRoundRegex, RegexFlags); - this.FracSplitRegex = new Regex(NumbersDefinitions.FracSplitRegex, RegexFlags); - this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags); - this.PointRegex = new Regex(NumbersDefinitions.PointRegex, RegexFlags); - this.SpeGetNumberRegex = new Regex(NumbersDefinitions.SpeGetNumberRegex, RegexFlags); - this.PairRegex = new Regex(NumbersDefinitions.PairRegex, RegexFlags); - this.RoundNumberIntegerRegex = new Regex(NumbersDefinitions.RoundNumberIntegerRegex, RegexFlags); + this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags, RegexTimeOut); + this.DigitNumRegex = new Regex(NumbersDefinitions.DigitNumRegex, RegexFlags, RegexTimeOut); + this.DozenRegex = new Regex(NumbersDefinitions.DozenRegex, RegexFlags, RegexTimeOut); + this.PercentageRegex = new Regex(NumbersDefinitions.PercentageRegex, RegexFlags, RegexTimeOut); + this.DoubleAndRoundRegex = new Regex(NumbersDefinitions.DoubleAndRoundRegex, RegexFlags, RegexTimeOut); + this.FracSplitRegex = new Regex(NumbersDefinitions.FracSplitRegex, RegexFlags, RegexTimeOut); + this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags, RegexTimeOut); + this.PointRegex = new Regex(NumbersDefinitions.PointRegex, RegexFlags, RegexTimeOut); + this.SpeGetNumberRegex = new Regex(NumbersDefinitions.SpeGetNumberRegex, RegexFlags, RegexTimeOut); + this.PairRegex = new Regex(NumbersDefinitions.PairRegex, RegexFlags, RegexTimeOut); + this.RoundNumberIntegerRegex = new Regex(NumbersDefinitions.RoundNumberIntegerRegex, RegexFlags, RegexTimeOut); + this.PercentageNumRegex = new Regex(NumbersDefinitions.PercentageNumRegex, RegexFlags, RegexTimeOut); this.FractionPrepositionRegex = null; } @@ -78,6 +83,8 @@ public ChineseNumberParserConfiguration(INumberOptionsConfiguration config) public Regex PercentageRegex { get; private set; } + public Regex PercentageNumRegex { get; private set; } + public Regex DoubleAndRoundRegex { get; private set; } public Regex FracSplitRegex { get; private set; } @@ -96,7 +103,7 @@ public ChineseNumberParserConfiguration(INumberOptionsConfiguration config) public ImmutableDictionary FullToHalfMap { get; private set; } - public ImmutableDictionary UnitMap { get; private set; } + public ImmutableSortedDictionary UnitMap { get; private set; } public ImmutableDictionary TratoSimMap { get; private set; } @@ -124,5 +131,18 @@ public override string ResolveSpecificString(string numberStr) return string.Empty; } + + private class StringLengthComparer : IComparer + { + public int Compare(string x, string y) + { + if (x.Length != y.Length) + { + return x.Length - y.Length; + } + + return x.CompareTo(y); + } + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberRangeParserConfiguration.cs index e654f8ca45..bf269df89a 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberRangeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberRangeParserConfiguration.cs @@ -1,10 +1,13 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; namespace Microsoft.Recognizers.Text.Number.Chinese { - public class ChineseNumberRangeParserConfiguration : INumberRangeParserConfiguration + public class ChineseNumberRangeParserConfiguration : BaseNumberRangeParserConfiguration { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -13,36 +16,18 @@ public ChineseNumberRangeParserConfiguration(INumberOptionsConfiguration config) { CultureInfo = new CultureInfo(config.Culture); - NumberExtractor = new NumberExtractor(); - OrdinalExtractor = new OrdinalExtractor(); + var numConfig = new BaseNumberOptionsConfiguration(config); + + NumberExtractor = new NumberExtractor(numConfig); + OrdinalExtractor = new OrdinalExtractor(numConfig); NumberParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration(config)); - MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags); - LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags); - MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags); - LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags); - MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags); - LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags); + MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags, RegexTimeOut); + LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags, RegexTimeOut); + MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags, RegexTimeOut); + LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags, RegexTimeOut); + MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut); + LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut); } - - public CultureInfo CultureInfo { get; private set; } - - public IExtractor NumberExtractor { get; private set; } - - public IExtractor OrdinalExtractor { get; private set; } - - public IParser NumberParser { get; private set; } - - public Regex MoreOrEqual { get; private set; } - - public Regex LessOrEqual { get; private set; } - - public Regex MoreOrEqualSuffix { get; private set; } - - public Regex LessOrEqualSuffix { get; private set; } - - public Regex MoreOrEqualSeparate { get; private set; } - - public Regex LessOrEqualSeparate { get; private set; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Config/BaseNumberOptionsConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Config/BaseNumberOptionsConfiguration.cs index 1e4a2aed9f..9d7fa7e858 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Config/BaseNumberOptionsConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Config/BaseNumberOptionsConfiguration.cs @@ -1,12 +1,20 @@ -namespace Microsoft.Recognizers.Text.Number +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Microsoft.Recognizers.Definitions; + +namespace Microsoft.Recognizers.Text.Number { public class BaseNumberOptionsConfiguration : INumberOptionsConfiguration { - public BaseNumberOptionsConfiguration(string culture, NumberOptions options = NumberOptions.None, NumberMode mode = NumberMode.Default) + + public BaseNumberOptionsConfiguration(string culture, NumberOptions options = NumberOptions.None, + NumberMode mode = NumberMode.Default, string placeholder = BaseNumbers.PlaceHolderDefault) { Culture = culture; Options = options; Mode = mode; + Placeholder = placeholder; } public BaseNumberOptionsConfiguration(INumberOptionsConfiguration config) @@ -14,12 +22,15 @@ public BaseNumberOptionsConfiguration(INumberOptionsConfiguration config) Culture = config.Culture; Options = config.Options; Mode = config.Mode; + Placeholder = config.Placeholder; } public NumberOptions Options { get; } public NumberMode Mode { get; } + public string Placeholder { get; } + public string Culture { get; } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Config/CJKNumberExtractorMode.cs b/.NET/Microsoft.Recognizers.Text.Number/Config/CJKNumberExtractorMode.cs index a40ca9be2b..86b5f13520 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Config/CJKNumberExtractorMode.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Config/CJKNumberExtractorMode.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Number.Config +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Number.Config { /// diff --git a/.NET/Microsoft.Recognizers.Text.Number/Config/INumberOptionsConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Config/INumberOptionsConfiguration.cs index c4cde05c99..79e199315c 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Config/INumberOptionsConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Config/INumberOptionsConfiguration.cs @@ -1,4 +1,5 @@ -using Microsoft.Recognizers.Text.Config; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. namespace Microsoft.Recognizers.Text.Number { @@ -7,5 +8,7 @@ public interface INumberOptionsConfiguration : IConfiguration NumberOptions Options { get; } NumberMode Mode { get; } + + string Placeholder { get; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Constants.cs b/.NET/Microsoft.Recognizers.Text.Number/Constants.cs index 717ba03cfa..86f591d01c 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Constants.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Constants.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; namespace Microsoft.Recognizers.Text.Number @@ -25,20 +28,27 @@ public static class Constants // NARROW NO-BREAK SPACE public const char NO_BREAK_SPACE = '\u202f'; - // Language Markers + // Language Markers - ISO 639-2 B public const string ENGLISH = "Eng"; - public const string CHINESE = "Chs"; - public const string FRENCH = "Fr"; + public const string CHINESE = "Chi"; + public const string FRENCH = "Fre"; public const string GERMAN = "Ger"; public const string JAPANESE = "Jpn"; public const string PORTUGUESE = "Por"; public const string SPANISH = "Spa"; - public const string DUTCH = "Nl"; + public const string DUTCH = "Dut"; public const string KOREAN = "Kor"; public const string ITALIAN = "Ita"; public const string SWEDISH = "Swe"; public const string BULGARIAN = "Bul"; - public const string TURKISH = "Tr"; + public const string TURKISH = "Tur"; + public const string HINDI = "Hin"; + public const string ARABIC = "Ara"; + + /// + /// Substitute for language markers for Japanese. + /// + public const string JAPANESE_SUBS = "Jap"; // Regex Prefixes / Suffixes public const string FRACTION_PREFIX = "Frac"; @@ -57,12 +67,14 @@ public static class Constants // Groups' names for named groups in regexes public const string RelativeOrdinalGroupName = "relativeOrdinal"; + public const string FracMultiplierGroupName = "fracMultiplier"; // Number subtypes public const string INTEGER = "integer"; public const string DECIMAL = "decimal"; public const string FRACTION = "fraction"; public const string POWER = "power"; + public static readonly HashSet ValidSubTypes = new HashSet() { INTEGER, diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/CardinalExtractor.cs index 9674966d12..9d15db2878 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/CardinalExtractor.cs @@ -1,25 +1,33 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions.Dutch; - namespace Microsoft.Recognizers.Text.Number.Dutch { - public class CardinalExtractor : BaseNumberExtractor + public class CardinalExtractor : CachedNumberExtractor { - private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); + private static readonly ConcurrentDictionary Instances = + new ConcurrentDictionary(); + + private readonly string keyPrefix; - private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private CardinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var builder = ImmutableDictionary.CreateBuilder(); // Add Integer Regexes - var intExtract = IntegerExtractor.GetInstance(placeholder); + var intExtract = IntegerExtractor.GetInstance(config); builder.AddRange(intExtract.Regexes); // Add Double Regexes - var douExtract = DoubleExtractor.GetInstance(placeholder); + var douExtract = DoubleExtractor.GetInstance(config); builder.AddRange(douExtract.Regexes); Regexes = builder.ToImmutable(); @@ -29,15 +37,23 @@ private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDef protected sealed override string ExtractType { get; } = Constants.SYS_NUM_CARDINAL; // "Cardinal"; - public static CardinalExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static CardinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new CardinalExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new CardinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/DoubleExtractor.cs index ccf068caa8..b8fa08ecf3 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/DoubleExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -13,48 +16,49 @@ public class DoubleExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private DoubleExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { var regexes = new Dictionary { { - new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleDecimalPointRegex(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.DUTCH) }, { - new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumBlankComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumBlankComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, }; @@ -66,15 +70,18 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau protected sealed override string ExtractType { get; } = Constants.SYS_NUM_DOUBLE; // "Double"; - public static DoubleExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static DoubleExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new DoubleExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new DoubleExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/FractionExtractor.cs index d867346088..78e265cc8d 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/FractionExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -11,47 +14,47 @@ public class FractionExtractor : BaseNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor> Instances = - new ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor>(); + private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor> Instances = + new ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor>(); - private FractionExtractor(NumberMode mode, NumberOptions options) + private FractionExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - Options = options; var regexes = new Dictionary { { - new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { new Regex( - NumbersDefinitions.FractionNounRegex, RegexFlags), + NumbersDefinitions.FractionNounRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.DUTCH) }, { - new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.DUTCH) }, }; // Not add FractionPrepositionRegex when the mode is Unit to avoid wrong recognize cases like "$1000 over 3" - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { if ((Options & NumberOptions.PercentageMode) != 0) { regexes.Add( - new Regex(NumbersDefinitions.FractionPrepositionWithinPercentModeRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionPrepositionWithinPercentModeRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.DUTCH)); } else { regexes.Add( - new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.DUTCH)); } } @@ -59,22 +62,23 @@ private FractionExtractor(NumberMode mode, NumberOptions options) Regexes = regexes.ToImmutableDictionary(); } - internal sealed override ImmutableDictionary Regexes { get; } + public sealed override NumberOptions Options { get; } - protected sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; - public static FractionExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None, string placeholder = "") + public static FractionExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options, placeholder); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new FractionExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new FractionExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/IntegerExtractor.cs index abb75af917..59250ed47b 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/IntegerExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -7,54 +10,60 @@ namespace Microsoft.Recognizers.Text.Number.Dutch { - public class IntegerExtractor : BaseNumberExtractor + public class IntegerExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private IntegerExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), + new Regex(NumbersDefinitions.NumbersWithPlaceHolder(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.DUTCH) }, { - new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.DUTCH) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, }; @@ -66,15 +75,24 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa protected sealed override string ExtractType { get; } = Constants.SYS_NUM_INTEGER; // "Integer"; - public static IntegerExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static IntegerExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new IntegerExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new IntegerExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } + + protected override object GenKey(string input) + { + return (keyPrefix, input); + } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberExtractor.cs index bbacc4f841..c50338d93f 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberExtractor.cs @@ -1,33 +1,41 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Dutch; namespace Microsoft.Recognizers.Text.Number.Dutch { - public class NumberExtractor : BaseNumberExtractor + public class NumberExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor> Instances = new ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor>(); - private NumberExtractor(NumberMode mode, NumberOptions options) + private readonly string keyPrefix; + + private NumberExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags); - Options = options; + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Mode + "_" + config.Culture); + + NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags, RegexTimeOut); var builder = ImmutableDictionary.CreateBuilder(); // Add Cardinal CardinalExtractor cardExtract = null; - switch (mode) + switch (config.Mode) { case NumberMode.PureNumber: - cardExtract = CardinalExtractor.GetInstance(NumbersDefinitions.PlaceHolderPureNumber); + var purNumConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options, config.Mode, + NumbersDefinitions.PlaceHolderPureNumber); + cardExtract = CardinalExtractor.GetInstance(purNumConfig); break; case NumberMode.Currency: builder.Add( @@ -40,13 +48,13 @@ private NumberExtractor(NumberMode mode, NumberOptions options) if (cardExtract == null) { - cardExtract = CardinalExtractor.GetInstance(); + cardExtract = CardinalExtractor.GetInstance(config); } builder.AddRange(cardExtract.Regexes); // Add Fraction - var fracExtract = FractionExtractor.GetInstance(mode, Options); + var fracExtract = FractionExtractor.GetInstance(config); builder.AddRange(fracExtract.Regexes); Regexes = builder.ToImmutable(); @@ -54,11 +62,11 @@ private NumberExtractor(NumberMode mode, NumberOptions options) var ambiguityBuilder = ImmutableDictionary.CreateBuilder(); // Do not filter the ambiguous number cases like '$2000' in NumberWithUnit, otherwise they can't be resolved. - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) { - ambiguityBuilder.Add(new Regex(item.Key, RegexFlags), new Regex(item.Value, RegexFlags)); + ambiguityBuilder.Add(new Regex(item.Key, RegexFlags, RegexTimeOut), new Regex(item.Value, RegexFlags, RegexTimeOut)); } } @@ -69,22 +77,26 @@ private NumberExtractor(NumberMode mode, NumberOptions options) protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM; // "Number"; protected sealed override Regex NegativeNumberTermsRegex { get; } - public static NumberExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) + public static NumberExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new NumberExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new NumberExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberRangeExtractor.cs index c0b9a3e75a..17762ceb00 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberRangeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberRangeExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -12,8 +15,8 @@ public class NumberRangeExtractor : BaseNumberRangeExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public NumberRangeExtractor(INumberOptionsConfiguration config) - : base(NumberExtractor.GetInstance(), - OrdinalExtractor.GetInstance(), + : base(NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), new BaseNumberParser(new DutchNumberParserConfiguration(config)), config) { @@ -22,57 +25,62 @@ public NumberRangeExtractor(INumberOptionsConfiguration config) { { // between...and... - new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUMBETWEEN }, { // more than ... less than ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUM }, { // less than ... more than ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUM }, { // from ... to/~/- ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUMTILL }, + { + // from ... to/~/- ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex5, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUMCLOSED + }, { // more/greater/higher than ... - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // 30 and/or greater/higher - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // less/smaller/lower than ... - new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, { // 30 and/or less/smaller/lower - new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, { // equal to ... - new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.EQUAL }, { // equal to 30 or more than, larger than 30 or equal to ... - new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // equal to 30 or less, smaller than 30 or equal ... - new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, }; @@ -80,7 +88,7 @@ public NumberRangeExtractor(INumberOptionsConfiguration config) Regexes = regexes.ToImmutableDictionary(); AmbiguousFractionConnectorsRegex = - new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); + new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); } internal sealed override ImmutableDictionary Regexes { get; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/OrdinalExtractor.cs index 2abe35fd9e..4742b50413 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/OrdinalExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -7,31 +10,37 @@ namespace Microsoft.Recognizers.Text.Number.Dutch { - public class OrdinalExtractor : BaseNumberExtractor + public class OrdinalExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private OrdinalExtractor() + private readonly string keyPrefix; + + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.OrdinalNumericRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalNumericRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.OrdinalDutchRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalDutchRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.DUTCH) }, { - new Regex(NumbersDefinitions.OrdinalRoundNumberRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalRoundNumberRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.DUTCH) }, }; @@ -43,15 +52,23 @@ private OrdinalExtractor() protected sealed override string ExtractType { get; } = Constants.SYS_NUM_ORDINAL; // "Ordinal"; - public static OrdinalExtractor GetInstance(string placeholder = "") + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new OrdinalExtractor(); - Instances.TryAdd(placeholder, instance); + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/PercentageExtractor.cs index 8d3182b0e8..eb7eb377cc 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/PercentageExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -8,10 +11,10 @@ namespace Microsoft.Recognizers.Text.Number.Dutch { public sealed class PercentageExtractor : BasePercentageExtractor { - public PercentageExtractor(NumberOptions options = NumberOptions.None) - : base(NumberExtractor.GetInstance(options: options)) + public PercentageExtractor(BaseNumberOptionsConfiguration config) + : base(NumberExtractor.GetInstance(config)) { - Options = options; + Options = config.Options; Regexes = InitRegexes(); } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberParserConfiguration.cs index c3bde4f6b9..712b38bc68 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberParserConfiguration.cs @@ -1,5 +1,10 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; @@ -10,10 +15,18 @@ public class DutchNumberParserConfiguration : BaseNumberParserConfiguration { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + private static readonly Regex FractionHalfRegex = + new Regex(NumbersDefinitions.FractionHalfRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex FractionUnitsRegex = + new Regex(NumbersDefinitions.FractionUnitsRegex, RegexFlags, RegexTimeOut); + + private static readonly string[] OneHalfTokens = NumbersDefinitions.OneHalfTokens; + public DutchNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; @@ -37,12 +50,97 @@ public DutchNumberParserConfiguration(INumberOptionsConfiguration config) this.RoundNumberMap = NumbersDefinitions.RoundNumberMap.ToImmutableDictionary(); // @TODO Change init to follow design in other languages - this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags); - this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags); - this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags); - this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags); + this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags, RegexTimeOut); + this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags, RegexTimeOut); + this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags, RegexTimeOut); + this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut); + this.RoundMultiplierRegex = new Regex(NumbersDefinitions.RoundMultiplierRegex, RegexFlags, RegexTimeOut); } public string NonDecimalSeparatorText { get; private set; } + + // Same behavior as the base but also handles numbers such as tweeënhalf and tweeëneenhalf + public override IEnumerable NormalizeTokenSet(IEnumerable tokens, ParseResult context) + { + var fracWords = new List(); + var tokenList = tokens.ToList(); + var tokenLen = tokenList.Count; + + for (var i = 0; i < tokenLen; i++) + { + if (tokenList[i].Contains("-")) + { + var splitTokens = tokenList[i].Split('-'); + if (splitTokens.Length == 2 && OrdinalNumberMap.ContainsKey(splitTokens[1])) + { + fracWords.Add(splitTokens[0]); + fracWords.Add(splitTokens[1]); + } + else + { + fracWords.Add(tokenList[i]); + } + } + else if (i < tokenLen - 2 && tokenList[i + 1] == "-") + { + if (OrdinalNumberMap.ContainsKey(tokenList[i + 2])) + { + fracWords.Add(tokenList[i]); + fracWords.Add(tokenList[i + 2]); + } + else + { + fracWords.Add(tokenList[i] + tokenList[i + 1] + tokenList[i + 2]); + } + + i += 2; + } + else + { + fracWords.Add(tokenList[i]); + } + } + + // The following piece of code is needed to compute the fraction pattern number+'ënhalf' + // e.g. 'tweeënhalf' ('two and a half'). Similarly for "ëneenhalf", e.g. tweeëneenhalf. + int len = 2; + fracWords.RemoveAll(item => item == "/"); + for (int i = fracWords.Count - 1; i >= 0; i--) + { + if (FractionHalfRegex.IsMatch(fracWords[i])) + { + fracWords[i] = fracWords[i].Substring(0, fracWords[i].Length - 6); + fracWords.Insert(i + 1, this.WrittenFractionSeparatorTexts.ElementAt(3)); + fracWords.Insert(i + 2, OneHalfTokens[0]); + fracWords.Insert(i + 3, OneHalfTokens[1]); + len = 4; + } + else if (FractionUnitsRegex.Match(fracWords[i]).Groups["onehalf"].Success) + { + fracWords[i] = OneHalfTokens[0]; + fracWords.Insert(i + 1, this.WrittenFractionSeparatorTexts.ElementAt(3)); + fracWords.Insert(i + 2, OneHalfTokens[0]); + fracWords.Insert(i + 3, OneHalfTokens[1]); + len = 4; + } + else if (FractionUnitsRegex.Match(fracWords[i]).Groups["quarter"].Success) + { + var tempWord = fracWords[i]; + fracWords[i] = tempWord.Substring(0, 4); + fracWords.Insert(i + 1, this.WrittenFractionSeparatorTexts.ElementAt(3)); + fracWords.Insert(i + 2, tempWord.Substring(4, 5)); + len = 3; + } + } + + // In Dutch, only the last two numbers in fracWords must be considered as fraction + var fracLen = fracWords.Count; + if (fracLen > len && fracWords[fracLen - len - 1] != NumbersDefinitions.WordSeparatorToken) + { + fracWords.Insert(fracLen - len, NumbersDefinitions.WordSeparatorToken); + } + + return fracWords; + } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberRangeParserConfiguration.cs index 4ed4e4174e..1300ca882d 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberRangeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberRangeParserConfiguration.cs @@ -1,10 +1,13 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; namespace Microsoft.Recognizers.Text.Number.Dutch { - public class DutchNumberRangeParserConfiguration : INumberRangeParserConfiguration + public class DutchNumberRangeParserConfiguration : BaseNumberRangeParserConfiguration { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -13,38 +16,20 @@ public DutchNumberRangeParserConfiguration(INumberOptionsConfiguration config) { CultureInfo = new CultureInfo(config.Culture); - NumberExtractor = Dutch.NumberExtractor.GetInstance(); - OrdinalExtractor = Dutch.OrdinalExtractor.GetInstance(); + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options); + + NumberExtractor = Dutch.NumberExtractor.GetInstance(numConfig); + OrdinalExtractor = Dutch.OrdinalExtractor.GetInstance(numConfig); // @TODO Change init to follow design in other languages NumberParser = new BaseNumberParser(new DutchNumberParserConfiguration(config)); - MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags); - LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags); - MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags); - LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags); - MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags); - LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags); + MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags, RegexTimeOut); + LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags, RegexTimeOut); + MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags, RegexTimeOut); + LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags, RegexTimeOut); + MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut); + LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut); } - - public CultureInfo CultureInfo { get; private set; } - - public IExtractor NumberExtractor { get; private set; } - - public IExtractor OrdinalExtractor { get; private set; } - - public IParser NumberParser { get; private set; } - - public Regex MoreOrEqual { get; private set; } - - public Regex LessOrEqual { get; private set; } - - public Regex MoreOrEqualSuffix { get; private set; } - - public Regex LessOrEqualSuffix { get; private set; } - - public Regex MoreOrEqualSeparate { get; private set; } - - public Regex LessOrEqualSeparate { get; private set; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/CardinalExtractor.cs index b3a65c2e4d..4ca7480445 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/CardinalExtractor.cs @@ -1,26 +1,33 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions.English; - namespace Microsoft.Recognizers.Text.Number.English { - public class CardinalExtractor : BaseNumberExtractor + public class CardinalExtractor : CachedNumberExtractor { private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private CardinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var builder = ImmutableDictionary.CreateBuilder(); // Add Integer Regexes - var intExtract = IntegerExtractor.GetInstance(placeholder); + var intExtract = IntegerExtractor.GetInstance(config); builder.AddRange(intExtract.Regexes); // Add Double Regexes - var douExtract = DoubleExtractor.GetInstance(placeholder); + var douExtract = DoubleExtractor.GetInstance(config); builder.AddRange(douExtract.Regexes); Regexes = builder.ToImmutable(); @@ -30,15 +37,24 @@ private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDef protected sealed override string ExtractType { get; } = Constants.SYS_NUM_CARDINAL; // "Cardinal"; - public static CardinalExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static CardinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new CardinalExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new CardinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } + + protected override object GenKey(string input) + { + return (keyPrefix, input); + } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/DoubleExtractor.cs index 84fe37ba1c..f49a98efdd 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/DoubleExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -14,48 +17,54 @@ public class DoubleExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private DoubleExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleDecimalPointRegex(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleIndianDecimalPointRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.ENGLISH) }, { - new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumCommaDot, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumCommaDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumBlankDot, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumBlankDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceDot, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, }; @@ -67,15 +76,18 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau protected sealed override string ExtractType { get; } = Constants.SYS_NUM_DOUBLE; // "Double"; - public static DoubleExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static DoubleExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new DoubleExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new DoubleExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/FractionExtractor.cs index 4886ae0140..72d489a907 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/FractionExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -9,48 +12,49 @@ namespace Microsoft.Recognizers.Text.Number.English { public class FractionExtractor : BaseNumberExtractor { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor> Instances = - new ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor>(); + private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor> Instances = + new ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor>(); - private FractionExtractor(NumberMode mode, NumberOptions options) + private FractionExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - Options = options; var regexes = new Dictionary { { - new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ENGLISH) }, { - new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ENGLISH) }, }; // Not add FractionPrepositionRegex when the mode is Unit to avoid wrong recognize cases like "$1000 over 3" - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { if ((Options & NumberOptions.PercentageMode) != 0) { regexes.Add( - new Regex(NumbersDefinitions.FractionPrepositionWithinPercentModeRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionPrepositionWithinPercentModeRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ENGLISH)); } else { regexes.Add( - new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ENGLISH)); } } @@ -60,20 +64,19 @@ private FractionExtractor(NumberMode mode, NumberOptions options) internal sealed override ImmutableDictionary Regexes { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; - public static FractionExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None, string placeholder = "") + public static FractionExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options, placeholder); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new FractionExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new FractionExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/IntegerExtractor.cs index ccf0498a94..a0fcbd8605 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/IntegerExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -7,51 +10,61 @@ namespace Microsoft.Recognizers.Text.Number.English { - public class IntegerExtractor : BaseNumberExtractor + public class IntegerExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private IntegerExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), + new Regex(NumbersDefinitions.NumbersWithPlaceHolder(config.Placeholder), RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.IndianNumberingSystemRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.ENGLISH) }, { - new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.ENGLISH) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, }; @@ -63,15 +76,24 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa protected sealed override string ExtractType { get; } = Constants.SYS_NUM_INTEGER; // "Integer"; - public static IntegerExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static IntegerExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new IntegerExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new IntegerExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/MergedNumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/MergedNumberExtractor.cs index 03e103cceb..35c19e639d 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/MergedNumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/MergedNumberExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; @@ -12,11 +15,11 @@ internal class MergedNumberExtractor : BaseMergedNumberExtractor private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), MergedNumberExtractor> Instances = new ConcurrentDictionary<(NumberMode, NumberOptions), MergedNumberExtractor>(); - public MergedNumberExtractor(NumberMode mode, NumberOptions options) + public MergedNumberExtractor(BaseNumberOptionsConfiguration config) { - NumberExtractor = English.NumberExtractor.GetInstance(mode, options); - RoundNumberIntegerRegexWithLocks = new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags); - ConnectorRegex = new Regex(NumbersDefinitions.ConnectorRegex, RegexFlags); + NumberExtractor = English.NumberExtractor.GetInstance(config); + RoundNumberIntegerRegexWithLocks = new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags, RegexTimeOut); + ConnectorRegex = new Regex(NumbersDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); } public sealed override BaseNumberExtractor NumberExtractor { get; set; } @@ -25,18 +28,18 @@ public MergedNumberExtractor(NumberMode mode, NumberOptions options) public sealed override Regex ConnectorRegex { get; set; } - public static MergedNumberExtractor GetInstance( - NumberMode mode = NumberMode.Default, - NumberOptions options = NumberOptions.None) + public static MergedNumberExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options); - if (!Instances.ContainsKey(cacheKey)) + + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new MergedNumberExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new MergedNumberExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberExtractor.cs index 428f745565..ce706ec8c0 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberExtractor.cs @@ -1,42 +1,51 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.English; namespace Microsoft.Recognizers.Text.Number.English { - public class NumberExtractor : BaseNumberExtractor + public class NumberExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor> Instances = new ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor>(); - private NumberExtractor(NumberMode mode, NumberOptions options) + private readonly string keyPrefix; + + private NumberExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags); - AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Mode + "_" + config.Culture); + + NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags, RegexTimeOut); - RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags); + AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); - Options = options; + RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags, RegexTimeOut); + + NumberParser = new BaseNumberParser(new EnglishNumberParserConfiguration(config)); var builder = ImmutableDictionary.CreateBuilder(); // Add Cardinal CardinalExtractor cardExtract = null; - switch (mode) + switch (config.Mode) { case NumberMode.PureNumber: - cardExtract = CardinalExtractor.GetInstance(NumbersDefinitions.PlaceHolderPureNumber); + var purNumConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options, config.Mode, + NumbersDefinitions.PlaceHolderPureNumber); + cardExtract = CardinalExtractor.GetInstance(purNumConfig); break; case NumberMode.Currency: - builder.Add( - BaseNumberExtractor.CurrencyRegex, - RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)); + builder.Add(BaseNumberExtractor.CurrencyRegex, + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)); break; case NumberMode.Unit: break; @@ -46,13 +55,13 @@ private NumberExtractor(NumberMode mode, NumberOptions options) if (cardExtract == null) { - cardExtract = CardinalExtractor.GetInstance(); + cardExtract = CardinalExtractor.GetInstance(config); } builder.AddRange(cardExtract.Regexes); // Add Fraction - var fracExtract = FractionExtractor.GetInstance(mode, Options); + var fracExtract = FractionExtractor.GetInstance(config); builder.AddRange(fracExtract.Regexes); Regexes = builder.ToImmutable(); @@ -60,23 +69,23 @@ private NumberExtractor(NumberMode mode, NumberOptions options) var ambiguityBuilder = ImmutableDictionary.CreateBuilder(); // Do not filter the ambiguous number cases like 'that one' in NumberWithUnit, otherwise they can't be resolved. - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) { - ambiguityBuilder.Add(new Regex(item.Key, RegexFlags), new Regex(item.Value, RegexFlags)); + ambiguityBuilder.Add(new Regex(item.Key, RegexFlags, RegexTimeOut), new Regex(item.Value, RegexFlags, RegexTimeOut)); } } AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); } + public override BaseNumberParser NumberParser { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM; // "Number"; protected sealed override Regex NegativeNumberTermsRegex { get; } @@ -85,16 +94,23 @@ private NumberExtractor(NumberMode mode, NumberOptions options) protected sealed override Regex RelativeReferenceRegex { get; } - public static NumberExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) + public static NumberExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new NumberExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new NumberExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } + + protected override object GenKey(string input) + { + return (keyPrefix, input); + } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberRangeExtractor.cs index 9a65273485..2672872e24 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberRangeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberRangeExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -12,66 +15,67 @@ public class NumberRangeExtractor : BaseNumberRangeExtractor public NumberRangeExtractor(INumberOptionsConfiguration config) : base( - NumberExtractor.GetInstance(), - OrdinalExtractor.GetInstance(), - new BaseNumberParser(new EnglishNumberParserConfiguration(config)), - config) + NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + new BaseNumberParser(new EnglishNumberParserConfiguration(config)), + config) { + var regexes = new Dictionary() { { // between...and... - new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUMBETWEEN }, { // more than ... less than ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUM }, { // less than ... more than ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUM }, { // from ... to/~/- ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUMTILL }, { // more/greater/higher than ... - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1LB, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // 30 and/or greater/higher - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // less/smaller/lower than ... - new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex1LB, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, { // 30 and/or less/smaller/lower - new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, { // equal to ... - new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.EQUAL }, { // equal to 30 or more than, larger than 30 or equal to ... - new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // equal to 30 or less, smaller than 30 or equal ... - new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, }; @@ -79,7 +83,7 @@ public NumberRangeExtractor(INumberOptionsConfiguration config) Regexes = regexes.ToImmutableDictionary(); AmbiguousFractionConnectorsRegex = - new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); + new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); } internal sealed override ImmutableDictionary Regexes { get; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/OrdinalExtractor.cs index abde8c3b84..4f2b55607b 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/OrdinalExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -7,7 +10,7 @@ namespace Microsoft.Recognizers.Text.Number.English { - public class OrdinalExtractor : BaseNumberExtractor + public class OrdinalExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,29 +18,34 @@ public class OrdinalExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private OrdinalExtractor(NumberOptions options) - : base(options) + private readonly string keyPrefix; + + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); - RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags); + keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); + + AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); + + RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags, RegexTimeOut); var regexes = new Dictionary { { - new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.OrdinalNumericRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalNumericRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.OrdinalEnglishRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalEnglishRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.ENGLISH) }, { - new Regex(NumbersDefinitions.OrdinalRoundNumberRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalRoundNumberRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.ENGLISH) }, }; @@ -53,16 +61,23 @@ private OrdinalExtractor(NumberOptions options) protected sealed override Regex RelativeReferenceRegex { get; } - public static OrdinalExtractor GetInstance(NumberOptions options = NumberOptions.None) + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = options.ToString(); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new OrdinalExtractor(options); - Instances.TryAdd(cacheKey, instance); + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } + + protected override object GenKey(string input) + { + return (keyPrefix, input); + } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/PercentageExtractor.cs index 779c73fe8a..5f463043ed 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/PercentageExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -8,10 +11,10 @@ namespace Microsoft.Recognizers.Text.Number.English { public sealed class PercentageExtractor : BasePercentageExtractor { - public PercentageExtractor(NumberOptions options = NumberOptions.None) - : base(NumberExtractor.GetInstance(options: options)) + public PercentageExtractor(BaseNumberOptionsConfiguration config) + : base(NumberExtractor.GetInstance(config)) { - Options = options; + Options = config.Options; Regexes = InitRegexes(); } @@ -19,7 +22,7 @@ public PercentageExtractor(NumberOptions options = NumberOptions.None) protected override ImmutableHashSet InitRegexes() { - HashSet regexStrs = new HashSet + HashSet regexStrings = new HashSet { NumbersDefinitions.NumberWithSuffixPercentage, NumbersDefinitions.NumberWithPrefixPercentage, @@ -27,11 +30,11 @@ protected override ImmutableHashSet InitRegexes() if ((Options & NumberOptions.PercentageMode) != 0) { - regexStrs.Add(NumbersDefinitions.FractionNumberWithSuffixPercentage); - regexStrs.Add(NumbersDefinitions.NumberWithPrepositionPercentage); + regexStrings.Add(NumbersDefinitions.FractionNumberWithSuffixPercentage); + regexStrings.Add(NumbersDefinitions.NumberWithPrepositionPercentage); } - return BuildRegexes(regexStrs); + return BuildRegexes(regexStrings); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberParserConfiguration.cs index 2c9494b0ec..1cb37a77ad 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberParserConfiguration.cs @@ -1,4 +1,8 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; @@ -15,7 +19,7 @@ public EnglishNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; // @TODO Temporary workaround var culture = config.Culture; @@ -46,12 +50,15 @@ public EnglishNumberParserConfiguration(INumberOptionsConfiguration config) this.RelativeReferenceRelativeToMap = NumbersDefinitions.RelativeReferenceRelativeToMap.ToImmutableDictionary(); this.RoundNumberMap = NumbersDefinitions.RoundNumberMap.ToImmutableDictionary(); - this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags); - this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags); - this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags); - this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags); + this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags, RegexTimeOut); + this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags, RegexTimeOut); + this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags, RegexTimeOut); + this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut); + this.RoundMultiplierRegex = new Regex(NumbersDefinitions.RoundMultiplierRegex, RegexFlags, RegexTimeOut); } public string NonDecimalSeparatorText { get; private set; } + + public override IEnumerable NonStandardSeparatorVariants => NumbersDefinitions.NonStandardSeparatorVariants; } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberRangeParserConfiguration.cs index 469402ab5f..653e00b914 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberRangeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberRangeParserConfiguration.cs @@ -1,47 +1,33 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; namespace Microsoft.Recognizers.Text.Number.English { - public class EnglishNumberRangeParserConfiguration : INumberRangeParserConfiguration + public class EnglishNumberRangeParserConfiguration : BaseNumberRangeParserConfiguration { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public EnglishNumberRangeParserConfiguration(INumberOptionsConfiguration config) { CultureInfo = new CultureInfo(config.Culture); - NumberExtractor = English.NumberExtractor.GetInstance(); - OrdinalExtractor = English.OrdinalExtractor.GetInstance(); + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options); + + NumberExtractor = English.NumberExtractor.GetInstance(numConfig); + OrdinalExtractor = English.OrdinalExtractor.GetInstance(numConfig); NumberParser = new BaseNumberParser(new EnglishNumberParserConfiguration(config)); - MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags); - LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags); - MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags); - LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags); - MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags); - LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags); + MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags, RegexTimeOut); + LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags, RegexTimeOut); + MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags, RegexTimeOut); + LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags, RegexTimeOut); + MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut); + LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut); } - - public CultureInfo CultureInfo { get; private set; } - - public IExtractor NumberExtractor { get; private set; } - - public IExtractor OrdinalExtractor { get; private set; } - - public IParser NumberParser { get; private set; } - - public Regex MoreOrEqual { get; private set; } - - public Regex LessOrEqual { get; private set; } - - public Regex MoreOrEqualSuffix { get; private set; } - - public Regex LessOrEqualSuffix { get; private set; } - - public Regex MoreOrEqualSeparate { get; private set; } - - public Regex LessOrEqualSeparate { get; private set; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseMergedNumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseMergedNumberExtractor.cs index 5249bf4a78..b857afbf04 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseMergedNumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseMergedNumberExtractor.cs @@ -1,5 +1,9 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Reflection; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Number @@ -12,6 +16,8 @@ internal abstract class BaseMergedNumberExtractor : IExtractor public abstract Regex ConnectorRegex { get; set; } + protected static TimeSpan RegexTimeOut => NumberRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + // Currently, this extractor is only for English number extracting. public List Extract(string source) { @@ -71,7 +77,9 @@ public List Extract(string source) { if (idx == 0 || groups[idx] != groups[idx - 1]) { - var tmpExtractResult = ers[idx]; + + var tmpExtractResult = ers[idx].Clone(); + tmpExtractResult.Data = new List { new ExtractResult diff --git a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs index e26aae4c02..3ff1f0bb6d 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs @@ -1,31 +1,43 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; using System.Linq; +using System.Reflection; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Text.InternalCache; namespace Microsoft.Recognizers.Text.Number { public abstract class BaseNumberExtractor : IExtractor { public static readonly Regex CurrencyRegex = - new Regex(BaseNumbers.CurrencyRegex, RegexOptions.Singleline | RegexOptions.ExplicitCapture); + new Regex(BaseNumbers.CurrencyRegex, RegexOptions.Singleline | RegexOptions.ExplicitCapture, RegexTimeOut); + + protected static readonly ResultsCache ResultsCache = new ResultsCache(4); protected BaseNumberExtractor(NumberOptions options = NumberOptions.None) { Options = options; } + public virtual NumberOptions Options { get; } = NumberOptions.None; + + public virtual BaseNumberParser NumberParser { get; } + internal abstract ImmutableDictionary Regexes { get; } + protected static TimeSpan RegexTimeOut => NumberRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + protected virtual ImmutableDictionary AmbiguityFiltersDict { get; } = null; protected virtual string ExtractType { get; } = string.Empty; - protected virtual NumberOptions Options { get; } = NumberOptions.None; - protected virtual Regex NegativeNumberTermsRegex { get; } = null; protected virtual Regex AmbiguousFractionConnectorsRegex { get; } = null; @@ -34,7 +46,6 @@ protected BaseNumberExtractor(NumberOptions options = NumberOptions.None) public virtual List Extract(string source) { - if (string.IsNullOrEmpty(source)) { return new List(); @@ -61,6 +72,20 @@ public virtual List Extract(string source) continue; } + // Matches containing separators 'in', 'out of' should be considered fractions only when numerator < denominator + if (m.Groups["ambiguousSeparator"].Success) + { + var numerator = m.Groups["numerator"]; + var denominator = m.Groups["denominator"]; + int num = ParseNumber(numerator); + int den = ParseNumber(denominator); + + if (num > den) + { + continue; + } + } + for (var j = 0; j < m.Length; j++) { matched[m.Index + j] = true; @@ -144,7 +169,7 @@ protected static Regex GenerateLongFormatNumberRegexes(LongFormatType type, stri BaseNumbers.IntegerRegexDefinition(placeholder, thousandsMark) : BaseNumbers.DoubleRegexDefinition(placeholder, thousandsMark, decimalsMark); - return new Regex(regexDefinition, flags); + return new Regex(regexDefinition, flags, RegexTimeOut); } private List FilterAmbiguity(List extractResults, string text) @@ -158,7 +183,8 @@ private List FilterAmbiguity(List extractResults, if (regex.Key.IsMatch(extractResult.Text)) { var matches = regex.Value.Matches(text).Cast(); - extractResults = extractResults.Where(er => !matches.Any(m => m.Index < er.Start + er.Length && m.Index + m.Length > er.Start)) + extractResults = extractResults.Where(er => !matches.Any(m => m.Index < er.Start + er.Length && + m.Index + m.Length > er.Start)) .ToList(); } } @@ -167,5 +193,25 @@ private List FilterAmbiguity(List extractResults, return extractResults; } + + private int ParseNumber(Group numerator) + { + var isParsed = int.TryParse(numerator.Value, out int num); + if (!isParsed) + { + var er = new ExtractResult + { + Start = numerator.Index, + Length = numerator.Length, + Text = numerator.Value, + Type = "Integer", + Data = null, + }; + var pr = NumberParser.Parse(er); + int.TryParse(pr.ResolutionStr, out num); + } + + return num; + } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberRangeExtractor.cs index 2f8dae045d..91c06298e7 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberRangeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberRangeExtractor.cs @@ -1,6 +1,10 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; +using System.Reflection; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Number @@ -13,8 +17,8 @@ public abstract class BaseNumberRangeExtractor : IExtractor private readonly BaseNumberParser numberParser; - public BaseNumberRangeExtractor(BaseNumberExtractor numberExtractor, BaseNumberExtractor ordinalExtractor, BaseNumberParser numberParser, - INumberOptionsConfiguration config) + protected BaseNumberRangeExtractor(BaseNumberExtractor numberExtractor, BaseNumberExtractor ordinalExtractor, BaseNumberParser numberParser, + INumberOptionsConfiguration config) { this.numberExtractor = numberExtractor; this.ordinalExtractor = ordinalExtractor; @@ -26,6 +30,8 @@ public BaseNumberRangeExtractor(BaseNumberExtractor numberExtractor, BaseNumberE internal abstract Regex AmbiguousFractionConnectorsRegex { get; } + protected static TimeSpan RegexTimeOut => NumberRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + protected virtual INumberOptionsConfiguration Config { get; } protected virtual string ExtractType { get; } = string.Empty; @@ -42,6 +48,7 @@ public virtual List Extract(string source) var matched = new bool[source.Length]; var collections = Regexes.ToDictionary(o => o.Key.Matches(source), p => p.Value); + foreach (var collection in collections) { foreach (Match m in collection.Key) @@ -50,15 +57,68 @@ public virtual List Extract(string source) if (start >= 0 && length > 0) { - for (var j = 0; j < length; j++) + // Add match if not already in matchSource (it can happen that a certain pattern is extracted by more than one regex, + // but if the same tuple is added more than once execution breaks). + if (!matchSource.ContainsKey(Tuple.Create(start, length))) { - matched[start + j] = true; + // Keep Source Data for extra information + matchSource.Add(new Tuple(start, length), collection.Value); } + } + } + } + + foreach (var match in matchSource) + { + var start = match.Key.Item1; + var length = match.Key.Item2; + + // Filter wrong two number ranges such as "more than 20 and less than 10" and "大于20小于10". + if (match.Value.Equals(NumberRangeConstants.TWONUM, StringComparison.Ordinal)) + { + int moreIndex = 0, lessIndex = 0; + + var text = source.Substring(match.Key.Item1, match.Key.Item2); + + var er = numberExtractor.Extract(text); + + if (er.Count != 2) + { + er = ordinalExtractor.Extract(text); - // Keep Source Data for extra information - matchSource.Add(new Tuple(start, length), collection.Value); + if (er.Count != 2) + { + continue; + } } + + var nums = er.Select(r => (double)(numberParser.Parse(r).Value ?? 0)).ToList(); + + // Order matchSource by decreasing match length so that "no less than x" is before "less than x" + var matchList = matchSource.ToList(); + matchList.Sort((pair1, pair2) => pair2.Key.Item2.CompareTo(pair1.Key.Item2)); + + moreIndex = matchList.First(r => + r.Value.Equals(NumberRangeConstants.MORE, StringComparison.Ordinal) && + r.Key.Item1 >= start && r.Key.Item1 + r.Key.Item2 <= start + length).Key.Item1; + + lessIndex = matchList.First(r => + r.Value.Equals(NumberRangeConstants.LESS, StringComparison.Ordinal) && + r.Key.Item1 >= start && r.Key.Item1 + r.Key.Item2 <= start + length).Key.Item1; + + if (!((nums[0] < nums[1] && moreIndex <= lessIndex) || (nums[0] > nums[1] && moreIndex >= lessIndex))) + { + continue; + } + } + + // The entity is longer than 1, so don't mark the last char to represent the end. + // To avoid no connector cases like "大于20小于10" being marked as a whole entity. + for (var j = 0; j < length - 1; j++) + { + matched[start + j] = true; } + } var last = -1; @@ -69,7 +129,7 @@ public virtual List Extract(string source) if (i + 1 == source.Length || !matched[i + 1]) { var start = last + 1; - var length = i - last; + var length = i - last + 1; var substr = source.Substring(start, length); if (matchSource.Keys.Any(o => o.Item1 == start && o.Item2 == length)) @@ -99,8 +159,9 @@ public virtual List Extract(string source) { foreach (var result in results) { - if (result.Data.ToString() == NumberRangeConstants.TWONUMBETWEEN || - result.Data.ToString() == NumberRangeConstants.TWONUMTILL) + var data = result.Data.ToString(); + if (data == NumberRangeConstants.TWONUMBETWEEN || + data == NumberRangeConstants.TWONUMTILL) { result.Data = NumberRangeConstants.TWONUMCLOSED; } @@ -117,13 +178,14 @@ private static bool ValidateMatchAndGetStartAndLength(List extrac foreach (var extractNum in extractNumList) { - if (numberStr.Trim().EndsWith(extractNum.Text) && match.Value.StartsWith(numberStr)) + if (numberStr.Trim().EndsWith(extractNum.Text, StringComparison.Ordinal) && + match.Value.StartsWith(numberStr, StringComparison.Ordinal)) { - start = source.IndexOf(numberStr) + extractNum.Start ?? 0; + start = match.Index + extractNum.Start ?? 0; length = length - extractNum.Start ?? 0; validNum = true; } - else if (extractNum.Start == 0 && match.Value.EndsWith(numberStr)) + else if (extractNum.Start == 0 && match.Value.EndsWith(numberStr, StringComparison.Ordinal)) { length = length - numberStr.Length + extractNum.Length ?? 0; validNum = true; @@ -146,7 +208,7 @@ private static bool ValidateMatchAndGetStartAndLength(List extrac // For these specific cases, we will not treat "30000 in 2010" as a fraction number private static bool IsAmbiguousRangeOrFraction(Match match, string type, string numberStr) { - return (type == NumberRangeConstants.MORE || type == NumberRangeConstants.LESS) && match.Value.Trim().EndsWith(numberStr); + return (type == NumberRangeConstants.MORE || type == NumberRangeConstants.LESS) && match.Value.Trim().EndsWith(numberStr, StringComparison.Ordinal); } private void GetMatchedStartAndLength(Match match, string type, string source, out int start, out int length) @@ -154,8 +216,8 @@ private void GetMatchedStartAndLength(Match match, string type, string source, o start = NumberRangeConstants.INVALID_NUM; length = NumberRangeConstants.INVALID_NUM; - var numberStr1 = match.Groups["number1"].Value; - var numberStr2 = match.Groups["number2"].Value; + var numberStr1 = match.Groups["number1"].Value?.TrimStart(); + var numberStr2 = match.Groups["number2"].Value?.TrimStart(); if (type.Contains(NumberRangeConstants.TWONUM)) { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BasePercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BasePercentageExtractor.cs index 9354fa7b6c..e23b54ada9 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BasePercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BasePercentageExtractor.cs @@ -1,7 +1,11 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; +using System.Reflection; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Number @@ -16,14 +20,17 @@ public abstract class BasePercentageExtractor : IExtractor private readonly BaseNumberExtractor numberExtractor; - public BasePercentageExtractor(BaseNumberExtractor numberExtractor) + protected BasePercentageExtractor(BaseNumberExtractor numberExtractor) { + this.Options = numberExtractor.Options; this.numberExtractor = numberExtractor; } + protected static TimeSpan RegexTimeOut => NumberRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + protected string ExtractType { get; set; } = Constants.SYS_NUM_PERCENTAGE; - protected virtual NumberOptions Options { get; } = NumberOptions.None; + protected virtual NumberOptions Options { get; } protected ImmutableHashSet Regexes { get; set; } @@ -77,6 +84,7 @@ public List Extract(string source) int start = last + 1; int length = i - last; string substr = source.Substring(start, length); + ExtractResult er = new ExtractResult { Start = start, @@ -84,6 +92,7 @@ public List Extract(string source) Text = substr, Type = ExtractType, }; + result.Add(er); } } @@ -102,23 +111,24 @@ public List Extract(string source) /// /// read the rules. /// - /// rule list. + /// rule list. /// . /// Immutable HashSet of regex. - protected static ImmutableHashSet BuildRegexes(HashSet regexStrs, bool ignoreCase = false) + protected static ImmutableHashSet BuildRegexes(HashSet regexStrings, bool ignoreCase = false) { var regexes = new HashSet(); - foreach (var regexStr in regexStrs) + foreach (var regexString in regexStrings) { // var sl = "(?=\\b)(" + regexStr + ")(?=(s?\\b))"; - var options = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + var regexOptions = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + if (ignoreCase) { - options = options | RegexOptions.IgnoreCase; + regexOptions |= RegexOptions.IgnoreCase; } - Regex regex = new Regex(regexStr, options); + Regex regex = new Regex(regexString, regexOptions, RegexTimeOut); regexes.Add(regex); } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Extractors/CachedNumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Extractors/CachedNumberExtractor.cs new file mode 100644 index 0000000000..be28b5903b --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Extractors/CachedNumberExtractor.cs @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; + +namespace Microsoft.Recognizers.Text.Number +{ + public abstract class CachedNumberExtractor : BaseNumberExtractor + { + + protected CachedNumberExtractor(NumberOptions options = NumberOptions.None) + : base(options) + { + } + + public override List Extract(string source) + { + + List results; + + if ((this.Options & NumberOptions.NoProtoCache) != 0) + { + results = base.Extract(source); + } + else + { + var key = GenKey(source); + + results = ResultsCache.GetOrCreate(key, () => base.Extract(source)); + } + + return results; + } + + protected abstract object GenKey(string input); + + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/CardinalExtractor.cs index f2c1f40c6b..b8c91b48e5 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/CardinalExtractor.cs @@ -1,26 +1,33 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions.French; - namespace Microsoft.Recognizers.Text.Number.French { - public class CardinalExtractor : BaseNumberExtractor + public class CardinalExtractor : CachedNumberExtractor { private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private CardinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var builder = ImmutableDictionary.CreateBuilder(); // Add Integer Regexes - var intExtract = IntegerExtractor.GetInstance(placeholder); + var intExtract = IntegerExtractor.GetInstance(config); builder.AddRange(intExtract.Regexes); // Add Double Regexes - var douExtract = DoubleExtractor.GetInstance(placeholder); + var douExtract = DoubleExtractor.GetInstance(config); builder.AddRange(douExtract.Regexes); Regexes = builder.ToImmutable(); @@ -30,15 +37,23 @@ private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDef protected sealed override string ExtractType { get; } = Constants.SYS_NUM_CARDINAL; - public static CardinalExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static CardinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new CardinalExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new CardinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/DoubleExtractor.cs index b2d5c50222..d9134400c3 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/DoubleExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -15,44 +18,46 @@ public class DoubleExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private DoubleExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + this.Regexes = new Dictionary { { - new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleDecimalPointRegex(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.FRENCH) }, { - new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, }.ToImmutableDictionary(); @@ -62,15 +67,18 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau protected sealed override string ExtractType { get; } = Constants.SYS_NUM_DOUBLE; - public static DoubleExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static DoubleExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new DoubleExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new DoubleExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/FractionExtractor.cs index 756e4202df..f42ab613c4 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/FractionExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -12,38 +15,38 @@ public class FractionExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor> Instances = - new ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor>(); + private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor> Instances = + new ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor>(); - private FractionExtractor(NumberMode mode, NumberOptions options) + private FractionExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - Options = options; var regexes = new Dictionary { { - new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.FRENCH) }, { - new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.FRENCH) }, }; // Not add FractionPrepositionRegex when the mode is Unit to avoid wrong recognize cases like "$1000 over 3" - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { regexes.Add( - new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.FRENCH)); } @@ -52,20 +55,19 @@ private FractionExtractor(NumberMode mode, NumberOptions options) internal sealed override ImmutableDictionary Regexes { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; - public static FractionExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None, string placeholder = "") + public static FractionExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options, placeholder); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new FractionExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new FractionExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/IntegerExtractor.cs index feb84023e2..fbad41e057 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/IntegerExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -7,7 +10,7 @@ namespace Microsoft.Recognizers.Text.Number.French { - public class IntegerExtractor : BaseNumberExtractor + public class IntegerExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,44 +18,50 @@ public class IntegerExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private IntegerExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + this.Regexes = new Dictionary { { - new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), + new Regex(NumbersDefinitions.NumbersWithPlaceHolder(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, placeholder), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, config.Placeholder), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.FRENCH) }, { - new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.FRENCH) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, }.ToImmutableDictionary(); @@ -62,15 +71,23 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa protected sealed override string ExtractType { get; } = Constants.SYS_NUM_INTEGER; // "Integer"; - public static IntegerExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static IntegerExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new IntegerExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new IntegerExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/NumberExtractor.cs index 3601c684c6..63b69019c0 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/NumberExtractor.cs @@ -1,30 +1,45 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.French; namespace Microsoft.Recognizers.Text.Number.French { - public class NumberExtractor : BaseNumberExtractor + public class NumberExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor> Instances = new ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor>(); - private NumberExtractor(NumberMode mode, NumberOptions options) + private readonly string keyPrefix; + + private NumberExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - Options = options; + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Mode + "_" + config.Culture); + + NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags, RegexTimeOut); + + AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); + + // RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags, RegexTimeOut); + + NumberParser = new BaseNumberParser(new FrenchNumberParserConfiguration(config)); var builder = ImmutableDictionary.CreateBuilder(); CardinalExtractor cardExtract = null; - switch (mode) + switch (config.Mode) { case NumberMode.PureNumber: - cardExtract = CardinalExtractor.GetInstance(NumbersDefinitions.PlaceHolderPureNumber); + var purNumConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options, config.Mode, + NumbersDefinitions.PlaceHolderPureNumber); + cardExtract = CardinalExtractor.GetInstance(purNumConfig); break; case NumberMode.Currency: builder.Add( @@ -37,12 +52,12 @@ private NumberExtractor(NumberMode mode, NumberOptions options) if (cardExtract == null) { - cardExtract = CardinalExtractor.GetInstance(); + cardExtract = CardinalExtractor.GetInstance(config); } builder.AddRange(cardExtract.Regexes); - var fracExtract = FractionExtractor.GetInstance(mode, Options); + var fracExtract = FractionExtractor.GetInstance(config); builder.AddRange(fracExtract.Regexes); Regexes = builder.ToImmutable(); @@ -50,37 +65,47 @@ private NumberExtractor(NumberMode mode, NumberOptions options) var ambiguityBuilder = ImmutableDictionary.CreateBuilder(); // Do not filter the ambiguous number cases like '$2000' in NumberWithUnit, otherwise they can't be resolved. - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) { - ambiguityBuilder.Add(new Regex(item.Key, RegexFlags), new Regex(item.Value, RegexFlags)); + ambiguityBuilder.Add(new Regex(item.Key, RegexFlags, RegexTimeOut), new Regex(item.Value, RegexFlags, RegexTimeOut)); } } AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); } + public override BaseNumberParser NumberParser { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM; - public static NumberExtractor GetInstance( - NumberMode mode = NumberMode.Default, - NumberOptions options = NumberOptions.None) + protected sealed override Regex NegativeNumberTermsRegex { get; } + + protected sealed override Regex AmbiguousFractionConnectorsRegex { get; } + + protected sealed override Regex RelativeReferenceRegex { get; } + + public static NumberExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new NumberExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new NumberExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/NumberRangeExtractor.cs new file mode 100644 index 0000000000..d323d5ae1f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/NumberRangeExtractor.cs @@ -0,0 +1,95 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.French; + +namespace Microsoft.Recognizers.Text.Number.French +{ + public class NumberRangeExtractor : BaseNumberRangeExtractor + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public NumberRangeExtractor(INumberOptionsConfiguration config) + : base( + NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + new BaseNumberParser(new FrenchNumberParserConfiguration(config)), + config) + { + + var regexes = new Dictionary() + { + { + // between...and... + new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUMBETWEEN + }, + { + // more than ... less than ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUM + }, + { + // less than ... more than ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUM + }, + { + // from ... to/~/- ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUMTILL + }, + { + // more/greater/higher than ... + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1LB, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // 30 and/or greater/higher + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // less/smaller/lower than ... + new Regex(NumbersDefinitions.OneNumberRangeLessRegex1LB, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + { + // 30 and/or less/smaller/lower + new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + { + // equal to ... + new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags, RegexTimeOut), + NumberRangeConstants.EQUAL + }, + { + // equal to 30 or more than, larger than 30 or equal to ... + new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // equal to 30 or less, smaller than 30 or equal ... + new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + }; + + Regexes = regexes.ToImmutableDictionary(); + + AmbiguousFractionConnectorsRegex = + new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + internal sealed override Regex AmbiguousFractionConnectorsRegex { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUMRANGE; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/OrdinalExtractor.cs index 931c93a5f2..f36e87b17b 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/OrdinalExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -7,7 +10,7 @@ namespace Microsoft.Recognizers.Text.Number.French { - public class OrdinalExtractor : BaseNumberExtractor + public class OrdinalExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,16 +18,26 @@ public class OrdinalExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private OrdinalExtractor() + private readonly string keyPrefix; + + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); + + AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); + + // RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags, RegexTimeOut); + this.Regexes = new Dictionary { { - new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.OrdinalFrenchRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalFrenchRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.FRENCH) }, }.ToImmutableDictionary(); @@ -34,15 +47,26 @@ private OrdinalExtractor() protected sealed override string ExtractType { get; } = Constants.SYS_NUM_ORDINAL; // "Ordinal"; - public static OrdinalExtractor GetInstance(string placeholder = "") + protected sealed override Regex AmbiguousFractionConnectorsRegex { get; } + + protected sealed override Regex RelativeReferenceRegex { get; } + + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new OrdinalExtractor(); - Instances.TryAdd(placeholder, instance); + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/PercentageExtractor.cs index c874538870..9b3dc1a44f 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/PercentageExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -8,10 +11,10 @@ namespace Microsoft.Recognizers.Text.Number.French { public sealed class PercentageExtractor : BasePercentageExtractor { - public PercentageExtractor(NumberOptions options = NumberOptions.None) - : base(NumberExtractor.GetInstance(options: options)) + public PercentageExtractor(BaseNumberOptionsConfiguration config) + : base(NumberExtractor.GetInstance(config)) { - Options = options; + Options = config.Options; Regexes = InitRegexes(); } diff --git a/.NET/Microsoft.Recognizers.Text.Number/French/Parsers/FrenchNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/French/Parsers/FrenchNumberParserConfiguration.cs index c9385a57d4..ad6c4302c6 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/French/Parsers/FrenchNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/French/Parsers/FrenchNumberParserConfiguration.cs @@ -1,6 +1,10 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; +using System.Linq; using System.Text; using System.Text.RegularExpressions; @@ -17,7 +21,7 @@ public FrenchNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; @@ -41,17 +45,48 @@ public FrenchNumberParserConfiguration(INumberOptionsConfiguration config) this.RoundNumberMap = NumbersDefinitions.RoundNumberMap.ToImmutableDictionary(); // @TODO Change init to follow design in other languages - this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags); - this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags); - this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags); - this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags); + this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags, RegexTimeOut); + this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags, RegexTimeOut); + this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags, RegexTimeOut); + this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut); + this.RoundMultiplierRegex = new Regex(NumbersDefinitions.RoundMultiplierRegex, RegexFlags, RegexTimeOut); } public string NonDecimalSeparatorText { get; private set; } public override IEnumerable NormalizeTokenSet(IEnumerable tokens, ParseResult context) { - return tokens; + var fracWords = new List(); + var tokenList = tokens.ToList(); + var tokenLen = tokenList.Count; + + for (var i = 0; i < tokenLen; i++) + { + if ((i < tokenLen - 2) && tokenList[i + 1] == "-") + { + fracWords.Add(tokenList[i] + tokenList[i + 1] + tokenList[i + 2]); + i += 2; + } + else + { + fracWords.Add(tokenList[i]); + } + } + + // The following piece of code is needed to compute the fraction pattern number+'et demi' + // e.g. 'deux et demi' ('two and a half') where the numerator is omitted in French. + // It works by inserting the numerator 'un' ('a') in the list fracWords + // so that the pattern is correctly processed. + var fracLen = fracWords.Count; + if (fracLen > 2) + { + if (fracWords[fracLen - 1] == NumbersDefinitions.OneHalfTokens[1] && fracWords[fracLen - 2] == NumbersDefinitions.WordSeparatorToken) + { + fracWords.Insert(fracLen - 1, NumbersDefinitions.OneHalfTokens[0]); + } + } + + return fracWords; } public override long ResolveCompositeNumber(string numberStr) diff --git a/.NET/Microsoft.Recognizers.Text.Number/French/Parsers/FrenchNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/French/Parsers/FrenchNumberRangeParserConfiguration.cs new file mode 100644 index 0000000000..2d348469d0 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/French/Parsers/FrenchNumberRangeParserConfiguration.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.French; + +namespace Microsoft.Recognizers.Text.Number.French +{ + public class FrenchNumberRangeParserConfiguration : BaseNumberRangeParserConfiguration + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public FrenchNumberRangeParserConfiguration(INumberOptionsConfiguration config) + { + CultureInfo = new CultureInfo(config.Culture); + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options); + + NumberExtractor = French.NumberExtractor.GetInstance(numConfig); + OrdinalExtractor = French.OrdinalExtractor.GetInstance(numConfig); + NumberParser = new BaseNumberParser(new FrenchNumberParserConfiguration(config)); + + MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags, RegexTimeOut); + LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags, RegexTimeOut); + MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags, RegexTimeOut); + LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags, RegexTimeOut); + MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut); + LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/CardinalExtractor.cs index 6ab7679fe7..aec83091cc 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/CardinalExtractor.cs @@ -1,26 +1,33 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions.German; - namespace Microsoft.Recognizers.Text.Number.German { - public class CardinalExtractor : BaseNumberExtractor + public class CardinalExtractor : CachedNumberExtractor { private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private CardinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var builder = ImmutableDictionary.CreateBuilder(); // Add Integer Regexes - var intExtract = IntegerExtractor.GetInstance(placeholder); + var intExtract = IntegerExtractor.GetInstance(config); builder.AddRange(intExtract.Regexes); // Add Double Regexes - var douExtract = DoubleExtractor.GetInstance(placeholder); + var douExtract = DoubleExtractor.GetInstance(config); builder.AddRange(douExtract.Regexes); Regexes = builder.ToImmutable(); @@ -31,15 +38,23 @@ private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDef // "Cardinal"; protected sealed override string ExtractType { get; } = Constants.SYS_NUM_CARDINAL; - public static CardinalExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static CardinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new CardinalExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new CardinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/DoubleExtractor.cs index 7fd35c747d..817d47d590 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/DoubleExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -15,49 +18,48 @@ public class DoubleExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private DoubleExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - var regexes = new Dictionary + this.Regexes = new Dictionary { { - new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleDecimalPointRegex(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.GERMAN) }, { - new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, - }; - - Regexes = regexes.ToImmutableDictionary(); + }.ToImmutableDictionary(); } internal sealed override ImmutableDictionary Regexes { get; } @@ -65,15 +67,18 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau // "Double"; protected sealed override string ExtractType { get; } = Constants.SYS_NUM_DOUBLE; - public static DoubleExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static DoubleExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new DoubleExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new DoubleExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/FractionExtractor.cs index ec00a41f15..4be1d71534 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/FractionExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -12,36 +15,37 @@ public class FractionExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly ConcurrentDictionary<(NumberMode, string), FractionExtractor> Instances = - new ConcurrentDictionary<(NumberMode, string), FractionExtractor>(); + private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor> Instances = + new ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor>(); - private FractionExtractor(NumberMode mode) + private FractionExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { var regexes = new Dictionary { { - new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.GERMAN) }, { - new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.GERMAN) }, }; // Not add FractionPrepositionRegex when the mode is Unit to avoid wrong recognize cases like "$1000 over 3" - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { regexes.Add( - new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.GERMAN)); } @@ -53,16 +57,17 @@ private FractionExtractor(NumberMode mode) // "Fraction"; protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; - public static FractionExtractor GetInstance(NumberMode mode = NumberMode.Default, string placeholder = "") + public static FractionExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, placeholder); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new FractionExtractor(mode); - Instances.TryAdd(cacheKey, instance); + var instance = new FractionExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/IntegerExtractor.cs index 0189cf08c3..1198485b39 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/IntegerExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -7,7 +10,7 @@ namespace Microsoft.Recognizers.Text.Number.German { - public class IntegerExtractor : BaseNumberExtractor + public class IntegerExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,49 +18,54 @@ public class IntegerExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private IntegerExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - var regexes = new Dictionary + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + + this.Regexes = new Dictionary { { - new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), + new Regex(NumbersDefinitions.NumbersWithPlaceHolder(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.GERMAN) }, { - new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.GERMAN) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, - }; + }.ToImmutableDictionary(); - Regexes = regexes.ToImmutableDictionary(); } internal sealed override ImmutableDictionary Regexes { get; } @@ -65,15 +73,23 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa // "Integer"; protected sealed override string ExtractType { get; } = Constants.SYS_NUM_INTEGER; - public static IntegerExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static IntegerExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new IntegerExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new IntegerExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/NumberExtractor.cs index 417c103cba..5f77dcd585 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/NumberExtractor.cs @@ -1,29 +1,36 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.German; namespace Microsoft.Recognizers.Text.Number.German { - public class NumberExtractor : BaseNumberExtractor + public class NumberExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor> Instances = new ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor>(); - private NumberExtractor(NumberMode mode = NumberMode.Default) + private readonly string keyPrefix; + + private NumberExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Mode + "_" + config.Culture); + var builder = ImmutableDictionary.CreateBuilder(); // Add Cardinal CardinalExtractor cardExtract = null; - switch (mode) + switch (config.Mode) { case NumberMode.PureNumber: - cardExtract = CardinalExtractor.GetInstance(NumbersDefinitions.PlaceHolderPureNumber); + cardExtract = CardinalExtractor.GetInstance(config); break; case NumberMode.Currency: builder.Add( @@ -36,13 +43,13 @@ private NumberExtractor(NumberMode mode = NumberMode.Default) if (cardExtract == null) { - cardExtract = CardinalExtractor.GetInstance(); + cardExtract = CardinalExtractor.GetInstance(config); } builder.AddRange(cardExtract.Regexes); // Add Fraction - var fracExtract = FractionExtractor.GetInstance(mode); + var fracExtract = FractionExtractor.GetInstance(config); builder.AddRange(fracExtract.Regexes); Regexes = builder.ToImmutable(); @@ -50,11 +57,11 @@ private NumberExtractor(NumberMode mode = NumberMode.Default) var ambiguityBuilder = ImmutableDictionary.CreateBuilder(); // Do not filter the ambiguous number cases like '$2000' in NumberWithUnit, otherwise they can't be resolved. - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) { - ambiguityBuilder.Add(new Regex(item.Key, RegexFlags), new Regex(item.Value, RegexFlags)); + ambiguityBuilder.Add(new Regex(item.Key, RegexFlags, RegexTimeOut), new Regex(item.Value, RegexFlags, RegexTimeOut)); } } @@ -67,18 +74,22 @@ private NumberExtractor(NumberMode mode = NumberMode.Default) protected sealed override string ExtractType { get; } = Constants.SYS_NUM; // "Number"; - public static NumberExtractor GetInstance( - NumberMode mode = NumberMode.Default, - NumberOptions options = NumberOptions.None) + public static NumberExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new NumberExtractor(mode); - Instances.TryAdd(cacheKey, instance); + var instance = new NumberExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/NumberRangeExtractor.cs new file mode 100644 index 0000000000..d3538c5c7f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/NumberRangeExtractor.cs @@ -0,0 +1,95 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.German; + +namespace Microsoft.Recognizers.Text.Number.German +{ + public class NumberRangeExtractor : BaseNumberRangeExtractor + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public NumberRangeExtractor(INumberOptionsConfiguration config) + : base( + NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + new BaseNumberParser(new GermanNumberParserConfiguration(config)), + config) + { + + var regexes = new Dictionary() + { + { + // between...and... + new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUMBETWEEN + }, + { + // more than ... less than ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUM + }, + { + // less than ... more than ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUM + }, + { + // from ... to/~/- ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUMTILL + }, + { + // more/greater/higher than ... + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // 30 and/or greater/higher + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // less/smaller/lower than ... + new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + { + // 30 and/or less/smaller/lower + new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + { + // equal to ... + new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags, RegexTimeOut), + NumberRangeConstants.EQUAL + }, + { + // equal to 30 or more than, larger than 30 or equal to ... + new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // equal to 30 or less, smaller than 30 or equal ... + new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + }; + + Regexes = regexes.ToImmutableDictionary(); + + AmbiguousFractionConnectorsRegex = + new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + internal sealed override Regex AmbiguousFractionConnectorsRegex { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUMRANGE; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/OrdinalExtractor.cs index 7b7770d6ab..19eee3b6cd 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/OrdinalExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -7,7 +10,7 @@ namespace Microsoft.Recognizers.Text.Number.German { - public class OrdinalExtractor : BaseNumberExtractor + public class OrdinalExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,44 +18,55 @@ public class OrdinalExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private OrdinalExtractor() + private readonly string keyPrefix; + + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - var regexes = new Dictionary + + keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); + + this.Regexes = new Dictionary { { - new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.OrdinalNumericRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalNumericRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.OrdinalGermanRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalGermanRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.GERMAN) }, { - new Regex(NumbersDefinitions.OrdinalRoundNumberRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalRoundNumberRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.GERMAN) }, - }; - - Regexes = regexes.ToImmutableDictionary(); + }.ToImmutableDictionary(); } internal sealed override ImmutableDictionary Regexes { get; } protected sealed override string ExtractType { get; } = Constants.SYS_NUM_ORDINAL; // "Ordinal"; - public static OrdinalExtractor GetInstance(string placeholder = "") + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new OrdinalExtractor(); - Instances.TryAdd(placeholder, instance); + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/PercentageExtractor.cs index 2d659e16df..86ba8e692e 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/German/Extractors/PercentageExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -8,10 +11,10 @@ namespace Microsoft.Recognizers.Text.Number.German { public sealed class PercentageExtractor : BasePercentageExtractor { - public PercentageExtractor(NumberOptions options = NumberOptions.None) - : base(NumberExtractor.GetInstance(options: options)) + public PercentageExtractor(BaseNumberOptionsConfiguration config) + : base(NumberExtractor.GetInstance(config)) { - Options = options; + Options = config.Options; Regexes = InitRegexes(); } diff --git a/.NET/Microsoft.Recognizers.Text.Number/German/Parsers/GermanNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/German/Parsers/GermanNumberParserConfiguration.cs index d0cdc00350..3a4fcd59cd 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/German/Parsers/GermanNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/German/Parsers/GermanNumberParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; using System.Linq; @@ -13,11 +16,19 @@ public class GermanNumberParserConfiguration : BaseNumberParserConfiguration private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + private static readonly Regex FractionHalfRegex = + new Regex(NumbersDefinitions.FractionHalfRegex, RegexFlags, RegexTimeOut); + + private static readonly Regex FractionUnitsRegex = + new Regex(NumbersDefinitions.FractionUnitsRegex, RegexFlags, RegexTimeOut); + + private static readonly string[] OneHalfTokens = NumbersDefinitions.OneHalfTokens; + public GermanNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; @@ -40,10 +51,11 @@ public GermanNumberParserConfiguration(INumberOptionsConfiguration config) this.RelativeReferenceRelativeToMap = NumbersDefinitions.RelativeReferenceRelativeToMap.ToImmutableDictionary(); this.RoundNumberMap = NumbersDefinitions.RoundNumberMap.ToImmutableDictionary(); - this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags); - this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags); - this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags); - this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags); + this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags, RegexTimeOut); + this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags, RegexTimeOut); + this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags, RegexTimeOut); + this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut); + this.RoundMultiplierRegex = new Regex(NumbersDefinitions.RoundMultiplierRegex, RegexFlags, RegexTimeOut); } public string NonDecimalSeparatorText { get; private set; } @@ -67,6 +79,34 @@ public override IEnumerable NormalizeTokenSet(IEnumerable tokens } } + // The following piece of code is needed to compute the fraction pattern number+'einhalb' + // e.g. 'zweieinhalb' ('two and a half'). + fracWords.RemoveAll(item => item == "/"); + for (int i = fracWords.Count - 1; i >= 0; i--) + { + if (FractionHalfRegex.IsMatch(fracWords[i])) + { + fracWords[i] = fracWords[i].Substring(0, fracWords[i].Length - 7); + fracWords.Insert(i + 1, this.WrittenFractionSeparatorTexts.ElementAt(0)); + fracWords.Insert(i + 2, OneHalfTokens[0]); + fracWords.Insert(i + 3, OneHalfTokens[1]); + } + else if (FractionUnitsRegex.Match(fracWords[i]).Groups["onehalf"].Success) + { + fracWords[i] = OneHalfTokens[0]; + fracWords.Insert(i + 1, this.WrittenFractionSeparatorTexts.ElementAt(0)); + fracWords.Insert(i + 2, OneHalfTokens[0]); + fracWords.Insert(i + 3, OneHalfTokens[1]); + } + else if (FractionUnitsRegex.Match(fracWords[i]).Groups["quarter"].Success) + { + var tempWord = fracWords[i]; + fracWords[i] = tempWord.Substring(0, 4); + fracWords.Insert(i + 1, this.WrittenFractionSeparatorTexts.ElementAt(0)); + fracWords.Insert(i + 2, tempWord.Substring(4, 5)); + } + } + return fracWords; } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/German/Parsers/GermanNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/German/Parsers/GermanNumberRangeParserConfiguration.cs new file mode 100644 index 0000000000..e285709e55 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/German/Parsers/GermanNumberRangeParserConfiguration.cs @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.German; + +namespace Microsoft.Recognizers.Text.Number.German +{ + public class GermanNumberRangeParserConfiguration : BaseNumberRangeParserConfiguration + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public GermanNumberRangeParserConfiguration(INumberOptionsConfiguration config) + { + CultureInfo = new CultureInfo(config.Culture); + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options); + + NumberExtractor = German.NumberExtractor.GetInstance(numConfig); + OrdinalExtractor = German.OrdinalExtractor.GetInstance(numConfig); + NumberParser = new BaseNumberParser(new GermanNumberParserConfiguration(config)); + + MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags, RegexTimeOut); + LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags, RegexTimeOut); + MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags, RegexTimeOut); + LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags, RegexTimeOut); + MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut); + LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/CardinalExtractor.cs index 202d8d4b71..a1a2987326 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/CardinalExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/DoubleExtractor.cs index 00758ae362..11e4302292 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/DoubleExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -18,31 +21,31 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau var regexes = new Dictionary { { - new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.ENGLISH) + new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.HINDI) }, { - new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/FractionExtractor.cs index a754f9d497..94fab54902 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/FractionExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -20,42 +23,49 @@ private FractionExtractor(NumberOptions options) var regexes = new Dictionary { { - new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ENGLISH) + new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.HINDI) + }, + { + new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.HINDI) }, { - new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ENGLISH) + new Regex(NumbersDefinitions.NegativeCompoundNumberOrdinals, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.HINDI) }, }; if ((Options & NumberOptions.PercentageMode) != 0) { regexes.Add( - new Regex(NumbersDefinitions.FractionPrepositionWithinPercentModeRegex, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ENGLISH)); + new Regex(NumbersDefinitions.FractionPrepositionWithinPercentModeRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.HINDI)); } else { regexes.Add( - new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ENGLISH)); + new Regex(NumbersDefinitions.FractionRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.HINDI)); + regexes.Add( + new Regex(NumbersDefinitions.FractionPrepositionInverseRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.HINDI)); } Regexes = regexes.ToImmutableDictionary(); } - internal sealed override ImmutableDictionary Regexes { get; } + public sealed override NumberOptions Options { get; } - protected sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/IntegerExtractor.cs index bf1db4611d..b1256ba5fe 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/IntegerExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -19,28 +22,32 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa var regexes = new Dictionary { { - new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), + new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags), + new Regex(NumbersDefinitions.IndianNumberingSystemRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags), + new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.ENGLISH) + new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.ENGLISH) + new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.HINDI) + }, + { + new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.HINDI) }, { GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, placeholder, RegexFlags), @@ -54,6 +61,18 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, + { + new Regex(NumbersDefinitions.NegativeHinglishRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.HINDI) + }, + { + new Regex(NumbersDefinitions.CompoundEnglishNumberRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.HINDI) + }, + { + new Regex(NumbersDefinitions.DecimalUnitsWithRoundNumberRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.HINDI) + }, }; Regexes = regexes.ToImmutableDictionary(); diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/MergedNumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/MergedNumberExtractor.cs index 50bcd0c7eb..442dfeaf83 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/MergedNumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/MergedNumberExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Hindi; @@ -13,9 +16,9 @@ internal class MergedNumberExtractor : BaseMergedNumberExtractor public MergedNumberExtractor(NumberMode mode, NumberOptions options) { - NumberExtractor = English.NumberExtractor.GetInstance(mode, options); - RoundNumberIntegerRegexWithLocks = new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags); - ConnectorRegex = new Regex(NumbersDefinitions.ConnectorRegex, RegexFlags); + NumberExtractor = Hindi.NumberExtractor.GetInstance(mode, options); + RoundNumberIntegerRegexWithLocks = new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags, RegexTimeOut); + ConnectorRegex = new Regex(NumbersDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); } public sealed override BaseNumberExtractor NumberExtractor { get; set; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/NumberExtractor.cs index 1704a3632b..8b52caeb12 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/NumberExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Hindi; @@ -14,11 +17,11 @@ public class NumberExtractor : BaseNumberExtractor private NumberExtractor(NumberMode mode, NumberOptions options) { - NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags); + NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags, RegexTimeOut); - AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); + AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); - RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags); + RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags, RegexTimeOut); Options = options; @@ -62,19 +65,19 @@ private NumberExtractor(NumberMode mode, NumberOptions options) { foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) { - ambiguityBuilder.Add(new Regex(item.Key, RegexFlags), new Regex(item.Value, RegexFlags)); + ambiguityBuilder.Add(new Regex(item.Key, RegexFlags, RegexTimeOut), new Regex(item.Value, RegexFlags, RegexTimeOut)); } } AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); } + public sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM; // "Number"; protected sealed override Regex NegativeNumberTermsRegex { get; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/NumberRangeExtractor.cs index fbeea35014..be3a7bca8c 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/NumberRangeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/NumberRangeExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Hindi; @@ -13,64 +16,74 @@ public NumberRangeExtractor(INumberOptionsConfiguration config) : base( NumberExtractor.GetInstance(), OrdinalExtractor.GetInstance(), - new BaseNumberParser(new HindiNumberParserConfiguration(config)), + new BaseIndianNumberParser(new HindiNumberParserConfiguration(config)), config) { var regexes = new Dictionary() { { // between...and... - new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUMBETWEEN }, { // more than ... less than ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUM }, { // less than ... more than ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUM }, { // from ... to/~/- ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUMTILL }, { // more/greater/higher than ... - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // 30 and/or greater/higher - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // less/smaller/lower than ... - new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, { // 30 and/or less/smaller/lower - new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, { // equal to ... - new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.EQUAL }, { // equal to 30 or more than, larger than 30 or equal to ... - new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // equal to 30 or less, smaller than 30 or equal ... - new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + { + // 30 and/or greater/higher + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex0, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // less/smaller/lower than ... + new Regex(NumbersDefinitions.OneNumberRangeLessRegex0, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, }; @@ -78,7 +91,7 @@ public NumberRangeExtractor(INumberOptionsConfiguration config) Regexes = regexes.ToImmutableDictionary(); AmbiguousFractionConnectorsRegex = - new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); + new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); } internal sealed override ImmutableDictionary Regexes { get; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/OrdinalExtractor.cs index 3fe91aeed2..55404eeaaa 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/OrdinalExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -16,27 +19,36 @@ public class OrdinalExtractor : BaseNumberExtractor private OrdinalExtractor(NumberOptions options) : base(options) { - AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); - RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags); + AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); + + RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags, RegexTimeOut); var regexes = new Dictionary { { - new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) + new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.HINDI) }, { - new Regex(NumbersDefinitions.OrdinalNumericRegex, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) + new Regex(NumbersDefinitions.HinglishOrdinalRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.HINDI) }, { - new Regex(NumbersDefinitions.OrdinalEnglishRegex, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.ENGLISH) + new Regex(NumbersDefinitions.CompoundHindiOrdinalRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.HINDI) }, { - new Regex(NumbersDefinitions.OrdinalRoundNumberRegex, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.ENGLISH) + new Regex(NumbersDefinitions.CompoundNumberOrdinals, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.HINDI) + }, + { + new Regex(NumbersDefinitions.CompoundEnglishOrdinalRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.HINDI) + }, + { + new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, }; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/PercentageExtractor.cs index 7159c42ee7..7233888f52 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/PercentageExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Hindi; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberParserConfiguration.cs index ba8158a235..2bad80f481 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberParserConfiguration.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; @@ -7,7 +10,7 @@ namespace Microsoft.Recognizers.Text.Number.Hindi { - public class HindiNumberParserConfiguration : BaseNumberParserConfiguration + public class HindiNumberParserConfiguration : BaseIndianNumberParserConfiguration { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,7 +18,7 @@ public HindiNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; @@ -34,14 +37,18 @@ public HindiNumberParserConfiguration(INumberOptionsConfiguration config) this.CardinalNumberMap = NumbersDefinitions.CardinalNumberMap.ToImmutableDictionary(); this.OrdinalNumberMap = NumbersDefinitions.OrdinalNumberMap.ToImmutableDictionary(); + this.DecimalUnitsMap = NumbersDefinitions.DecimalUnitsMap.ToImmutableDictionary(); this.RelativeReferenceOffsetMap = NumbersDefinitions.RelativeReferenceOffsetMap.ToImmutableDictionary(); this.RelativeReferenceRelativeToMap = NumbersDefinitions.RelativeReferenceRelativeToMap.ToImmutableDictionary(); this.RoundNumberMap = NumbersDefinitions.RoundNumberMap.ToImmutableDictionary(); - - this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags); - this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags); - this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags); - this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags); + this.ZeroToNineMap = NumbersDefinitions.ZeroToNineMap.ToImmutableDictionary(); + + this.AdditionTermsRegex = new Regex(NumbersDefinitions.AdditionTermsRegex, RegexFlags, RegexTimeOut); + this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags, RegexTimeOut); + this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags, RegexTimeOut); + this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags, RegexTimeOut); + this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut); + this.FractionPrepositionInverseRegex = new Regex(NumbersDefinitions.FractionPrepositionInverseRegex, RegexFlags, RegexTimeOut); } public string NonDecimalSeparatorText { get; private set; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberRangeParserConfiguration.cs index 54a605d66b..fccff63bf6 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberRangeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberRangeParserConfiguration.cs @@ -1,47 +1,33 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Hindi; namespace Microsoft.Recognizers.Text.Number.Hindi { - public class HindiNumberRangeParserConfiguration : INumberRangeParserConfiguration + public class HindiNumberRangeParserConfiguration : BaseNumberRangeParserConfiguration { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public HindiNumberRangeParserConfiguration(INumberOptionsConfiguration config) { CultureInfo = new CultureInfo(config.Culture); - NumberExtractor = English.NumberExtractor.GetInstance(); - OrdinalExtractor = English.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new HindiNumberParserConfiguration(config)); - - MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags); - LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags); - MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags); - LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags); - MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags); - LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags); - } - - public CultureInfo CultureInfo { get; private set; } - - public IExtractor NumberExtractor { get; private set; } - - public IExtractor OrdinalExtractor { get; private set; } + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options); - public IParser NumberParser { get; private set; } + NumberExtractor = Hindi.NumberExtractor.GetInstance(); + OrdinalExtractor = Hindi.OrdinalExtractor.GetInstance(); + NumberParser = new BaseIndianNumberParser(new HindiNumberParserConfiguration(config)); - public Regex MoreOrEqual { get; private set; } - - public Regex LessOrEqual { get; private set; } - - public Regex MoreOrEqualSuffix { get; private set; } - - public Regex LessOrEqualSuffix { get; private set; } - - public Regex MoreOrEqualSeparate { get; private set; } - - public Regex LessOrEqualSeparate { get; private set; } + MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags, RegexTimeOut); + LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags, RegexTimeOut); + MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags, RegexTimeOut); + LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags, RegexTimeOut); + MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut); + LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut); + } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/CardinalExtractor.cs index d08cd77d05..dc78de3941 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/CardinalExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/DoubleExtractor.cs index 0e6663b8a0..a7e3625f69 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/DoubleExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -20,31 +23,31 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau this.Regexes = new Dictionary { { - new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.ITALIAN) }, { - new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/FractionExtractor.cs index fb2a5173b3..100ce82696 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/FractionExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -22,19 +25,19 @@ private FractionExtractor(NumberMode mode, NumberOptions options) var regexes = new Dictionary { { - new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ITALIAN) }, { - new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ITALIAN) }, }; @@ -43,16 +46,16 @@ private FractionExtractor(NumberMode mode, NumberOptions options) if (mode != NumberMode.Unit) { regexes.Add( - new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.ITALIAN)); } Regexes = regexes.ToImmutableDictionary(); } - internal sealed override ImmutableDictionary Regexes { get; } + public sealed override NumberOptions Options { get; } - protected sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/IntegerExtractor.cs index 918f64417c..51652f4bc6 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/IntegerExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -20,11 +23,11 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa this.Regexes = new Dictionary { { - new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), + new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { @@ -32,19 +35,19 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.ITALIAN) }, { - new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.ITALIAN) }, { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/NumberExtractor.cs index e3dc95a276..222637723f 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/NumberExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -57,19 +60,19 @@ private NumberExtractor(NumberMode mode, NumberOptions options) { foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) { - ambiguityBuilder.Add(new Regex(item.Key, RegexFlags), new Regex(item.Value, RegexFlags)); + ambiguityBuilder.Add(new Regex(item.Key, RegexFlags, RegexTimeOut), new Regex(item.Value, RegexFlags, RegexTimeOut)); } } AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); } + public sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM; public static NumberExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) diff --git a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/NumberRangeExtractor.cs index 9de0ca8335..531a635680 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/NumberRangeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/NumberRangeExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -13,7 +16,7 @@ public class NumberRangeExtractor : BaseNumberRangeExtractor public NumberRangeExtractor(INumberOptionsConfiguration config) : base(NumberExtractor.GetInstance(), - OrdinalExtractor.GetInstance(), + OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), new BaseNumberParser(new ItalianNumberParserConfiguration(config)), config) { @@ -22,57 +25,57 @@ public NumberRangeExtractor(INumberOptionsConfiguration config) { { // between...and... - new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUMBETWEEN }, { // more than ... less than ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUM }, { // less than ... more than ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUM }, { // from ... to/~/- ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUMTILL }, { // more/greater/higher than ... - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // 30 and/or greater/higher - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // less/smaller/lower than ... - new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, { // 30 and/or less/smaller/lower - new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, { // equal to ... - new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.EQUAL }, { // equal to 30 or more than, larger than 30 or equal to ... - new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // equal to 30 or less, smaller than 30 or equal ... - new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, }; @@ -80,7 +83,7 @@ public NumberRangeExtractor(INumberOptionsConfiguration config) Regexes = regexes.ToImmutableDictionary(); AmbiguousFractionConnectorsRegex = - new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); + new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); } internal sealed override ImmutableDictionary Regexes { get; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/OrdinalExtractor.cs index ff36c1ec46..edaa2aeccd 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/OrdinalExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -15,16 +18,17 @@ public class OrdinalExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private OrdinalExtractor() + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { this.Regexes = new Dictionary { { - new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.OrdinalItalianRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalItalianRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.ITALIAN) }, }.ToImmutableDictionary(); @@ -34,15 +38,17 @@ private OrdinalExtractor() protected sealed override string ExtractType { get; } = Constants.SYS_NUM_ORDINAL; // "Ordinal"; - public static OrdinalExtractor GetInstance(string placeholder = "") + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new OrdinalExtractor(); - Instances.TryAdd(placeholder, instance); + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/PercentageExtractor.cs index a427cfe0e1..467373bc46 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/PercentageExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Italian/Parsers/ItalianNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Italian/Parsers/ItalianNumberParserConfiguration.cs index 5c24aa5940..a9a256b6eb 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Italian/Parsers/ItalianNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Italian/Parsers/ItalianNumberParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; using System.Linq; @@ -18,7 +21,7 @@ public ItalianNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; @@ -41,11 +44,12 @@ public ItalianNumberParserConfiguration(INumberOptionsConfiguration config) this.RelativeReferenceRelativeToMap = NumbersDefinitions.RelativeReferenceRelativeToMap.ToImmutableDictionary(); this.RoundNumberMap = NumbersDefinitions.RoundNumberMap.ToImmutableDictionary(); - this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags); - this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags); - this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags); - this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags); - this.OneToNineOrdinalRegex = new Regex(NumbersDefinitions.OneToNineOrdinalRegex, RegexFlags); + this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags, RegexTimeOut); + this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags, RegexTimeOut); + this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags, RegexTimeOut); + this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut); + this.OneToNineOrdinalRegex = new Regex(NumbersDefinitions.OneToNineOrdinalRegex, RegexFlags, RegexTimeOut); + this.RoundMultiplierRegex = new Regex(NumbersDefinitions.RoundMultiplierRegex, RegexFlags, RegexTimeOut); } public string NonDecimalSeparatorText { get; private set; } @@ -79,7 +83,7 @@ public override IEnumerable NormalizeTokenSet(IEnumerable tokens var fracLen = fracWords.Count; if (fracLen > 2 && this.OneToNineOrdinalRegex.Match(fracWords[fracLen - 1]).Success) { - if (fracWords[fracLen - 3] != "e" && fracWords[fracLen - 2] != "e") + if (fracWords[fracLen - 3] != NumbersDefinitions.WordSeparatorToken && fracWords[fracLen - 2] != NumbersDefinitions.WordSeparatorToken) { fracWords[fracLen - 3] += fracWords[fracLen - 2]; fracWords.RemoveAt(fracLen - 2); @@ -93,9 +97,9 @@ public override IEnumerable NormalizeTokenSet(IEnumerable tokens fracLen = fracWords.Count; if (fracLen > 2) { - if (fracWords[fracLen - 1] == "mezzo" && fracWords[fracLen - 2] == "e") + if (fracWords[fracLen - 1] == NumbersDefinitions.OneHalfTokens[1] && fracWords[fracLen - 2] == NumbersDefinitions.WordSeparatorToken) { - fracWords.Insert(fracLen - 1, "un"); + fracWords.Insert(fracLen - 1, NumbersDefinitions.OneHalfTokens[0]); } } @@ -118,6 +122,7 @@ public override long ResolveCompositeNumber(string numberStr) long prevValue = 0; long finalValue = 0; + var strBuilder = new StringBuilder(); int lastGoodChar = 0; for (int i = 0; i < numberStr.Length; i++) diff --git a/.NET/Microsoft.Recognizers.Text.Number/Italian/Parsers/ItalianNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Italian/Parsers/ItalianNumberRangeParserConfiguration.cs index 97ae0b6c88..fe9598caf0 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Italian/Parsers/ItalianNumberRangeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Italian/Parsers/ItalianNumberRangeParserConfiguration.cs @@ -1,11 +1,14 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; namespace Microsoft.Recognizers.Text.Number.Italian { - public class ItalianNumberRangeParserConfiguration : INumberRangeParserConfiguration + public class ItalianNumberRangeParserConfiguration : BaseNumberRangeParserConfiguration { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,36 +18,16 @@ public ItalianNumberRangeParserConfiguration(INumberOptionsConfiguration config) CultureInfo = new CultureInfo(config.Culture); NumberExtractor = Italian.NumberExtractor.GetInstance(); - OrdinalExtractor = Italian.OrdinalExtractor.GetInstance(); + OrdinalExtractor = Italian.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)); NumberParser = new BaseNumberParser(new ItalianNumberParserConfiguration(config)); - MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags); - LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags); - MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags); - LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags); - MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags); - LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags); + MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags, RegexTimeOut); + LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags, RegexTimeOut); + MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags, RegexTimeOut); + LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags, RegexTimeOut); + MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut); + LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut); } - - public CultureInfo CultureInfo { get; private set; } - - public IExtractor NumberExtractor { get; private set; } - - public IExtractor OrdinalExtractor { get; private set; } - - public IParser NumberParser { get; private set; } - - public Regex MoreOrEqual { get; private set; } - - public Regex LessOrEqual { get; private set; } - - public Regex MoreOrEqualSuffix { get; private set; } - - public Regex LessOrEqualSuffix { get; private set; } - - public Regex MoreOrEqualSeparate { get; private set; } - - public Regex LessOrEqualSeparate { get; private set; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/CardinalExtractor.cs index 1bf72302e4..24c1dc6d63 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/CardinalExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Number.Config; @@ -8,14 +11,14 @@ namespace Microsoft.Recognizers.Text.Number.Japanese public class CardinalExtractor : BaseNumberExtractor { // CardinalExtractor = Int + Double - public CardinalExtractor(CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) + public CardinalExtractor(BaseNumberOptionsConfiguration config, CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { var builder = ImmutableDictionary.CreateBuilder(); - var intExtract = new IntegerExtractor(mode); + var intExtract = new IntegerExtractor(config, mode); builder.AddRange(intExtract.Regexes); - var douExtractor = new DoubleExtractor(); + var douExtractor = new DoubleExtractor(config, mode); builder.AddRange(douExtractor.Regexes); Regexes = builder.ToImmutable(); diff --git a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/DoubleExtractor.cs index 728c223c3e..8ed7774aa1 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/DoubleExtractor.cs @@ -1,8 +1,12 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Text.Number.Config; namespace Microsoft.Recognizers.Text.Number.Japanese { @@ -11,46 +15,83 @@ public class DoubleExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public DoubleExtractor() + public DoubleExtractor(BaseNumberOptionsConfiguration config, CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { var regexes = new Dictionary { - { - new Regex(NumbersDefinitions.DoubleSpecialsChars, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) - }, { // (-)2.5, can avoid cases like ip address xx.xx.xx.xx - new Regex(NumbersDefinitions.DoubleSpecialsCharsWithNegatives, RegexFlags), + new Regex(NumbersDefinitions.DoubleSpecialsCharsWithNegatives, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // (-).2 - new Regex(NumbersDefinitions.SimpleDoubleSpecialsChars, RegexFlags), + new Regex(NumbersDefinitions.SimpleDoubleSpecialsChars, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - // 1.0 K - new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), + // えは九・二三二一三一二 + new Regex(NumbersDefinitions.DoubleRoundNumberSpecialsChars, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // 15.2万 - new Regex(NumbersDefinitions.DoubleWithThousandsRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithThousandsRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.JAPANESE) }, { // 2e6, 21.2e0 - new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.DoubleExponentialNotationKanjiRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { // 2^5 - new Regex(NumbersDefinitions.DoubleScientificNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleScientificNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, + { + // 1 234 567.89 + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumFullWidthBlankDot), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + // 1 234 567.89 + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumBlankDot), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, }; + switch (mode) + { + case CJKNumberExtractorMode.Default: + // Uses an allow list to avoid extracting "西九条" from "九" + regexes.Add( + new Regex(NumbersDefinitions.DoubleSpecialsChars, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX)); + + // 1.0 K + regexes.Add( + new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX)); + break; + + case CJKNumberExtractorMode.ExtractAll: + // Uses no allow lists and extracts all potential numbers (useful in Units, for example). + regexes.Add( + new Regex(NumbersDefinitions.DoubleSpecialsCharsAggressive, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX)); + + // 1.0 K + regexes.Add( + new Regex(NumbersDefinitions.DoubleWithMultiplierAggressiveRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX)); + break; + } + Regexes = regexes.ToImmutableDictionary(); } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/FractionExtractor.cs index d60653efb1..0e6feba3b4 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/FractionExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -11,23 +14,23 @@ public class FractionExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public FractionExtractor() + public FractionExtractor(BaseNumberOptionsConfiguration config) { var regexes = new Dictionary { { // -4 5/2, 4 6/3 - new Regex(NumbersDefinitions.FractionNotationSpecialsCharsRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationSpecialsCharsRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { // 8/3 - new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { // 五分の二 七分の三 - new Regex(NumbersDefinitions.AllFractionNumber, RegexFlags), + new Regex(NumbersDefinitions.AllFractionNumber, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.JAPANESE) }, }; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/IntegerExtractor.cs index 1d0d3ca57b..60cc6101b6 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/IntegerExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -12,33 +15,28 @@ public class IntegerExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public IntegerExtractor(CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) + public IntegerExtractor(BaseNumberOptionsConfiguration config, CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { var regexes = new Dictionary { - { - // 123456, -123456 - new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) - }, - { - // 15k, 16 G - new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) - }, { // 1,234, 2,332,111 - new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexFlags), + new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 半百 半ダース - new Regex(NumbersDefinitions.NumbersWithHalfDozen, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithHalfDozen, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE) + }, + { + // 半 + new Regex(NumbersDefinitions.HalfUnitRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE) }, { // 一ダース 五十ダース - new Regex(NumbersDefinitions.NumbersWithDozen, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithDozen, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE) }, }; @@ -49,16 +47,36 @@ public IntegerExtractor(CJKNumberExtractorMode mode = CJKNumberExtractorMode.Def // 一百五十五, 负一亿三百二十二. // Uses an allow list to avoid extracting "西九条" from "九" regexes.Add( - new Regex(NumbersDefinitions.NumbersWithAllowListRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithAllowListRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE)); + + // 123456, -123456 + regexes.Add( + new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)); + + // 15k, 16 G + regexes.Add( + new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)); break; case CJKNumberExtractorMode.ExtractAll: // 一百五十五, 负一亿三百二十二, "西九条" from "九" // Uses no allow lists and extracts all potential integers (useful in Units, for example). regexes.Add( - new Regex(NumbersDefinitions.NumbersAggressiveRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersAggressiveRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.JAPANESE)); + + // 123456, -123456 + regexes.Add( + new Regex(NumbersDefinitions.NumbersSpecialsCharsAggressive, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)); + + // 15k, 16 G + regexes.Add( + new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffixAggressive, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)); break; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/NumberExtractor.cs index 6574dd50a8..7c86d543da 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/NumberExtractor.cs @@ -1,6 +1,10 @@ -using System.Collections.Immutable; -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Number.Config; namespace Microsoft.Recognizers.Text.Number.Japanese @@ -8,23 +12,27 @@ namespace Microsoft.Recognizers.Text.Number.Japanese public class NumberExtractor : BaseNumberExtractor { - public NumberExtractor(CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) + public NumberExtractor(BaseNumberOptionsConfiguration config, CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { var builder = ImmutableDictionary.CreateBuilder(); // Add Cardinal - var cardExtract = new CardinalExtractor(mode); + var cardExtract = new CardinalExtractor(config, mode); builder.AddRange(cardExtract.Regexes); // Add Fraction - var fracExtract = new FractionExtractor(); + var fracExtract = new FractionExtractor(config); builder.AddRange(fracExtract.Regexes); Regexes = builder.ToImmutable(); + + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersDefinitions.AmbiguityFiltersDict).ToImmutableDictionary(); } internal sealed override ImmutableDictionary Regexes { get; } + protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } + protected sealed override string ExtractType { get; } = Constants.SYS_NUM; } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/NumberRangeExtractor.cs index d4c496c2cc..2faac817fd 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/NumberRangeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/NumberRangeExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; @@ -11,8 +14,8 @@ public class NumberRangeExtractor : BaseNumberRangeExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public NumberRangeExtractor(INumberOptionsConfiguration config) - : base(new NumberExtractor(), - new OrdinalExtractor(), + : base(new NumberExtractor(new BaseNumberOptionsConfiguration(config)), + OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), new BaseCJKNumberParser(new JapaneseNumberParserConfiguration(config)), config) { @@ -21,57 +24,67 @@ public NumberRangeExtractor(INumberOptionsConfiguration config) { { // ...と...の間 - new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUMBETWEEN }, { // より大きい...より小さい... - new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUM }, { // より小さい...より大きい... - new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUM }, { // ...と/から..., 20~30 - new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUMTILL }, { // 大なり|大きい|高い|大きく... - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // ...以上 + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex3, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { - // ...より大なり|大きい|高い|大きく - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags), + // 少なくとも|多くて|最大... + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex4, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // ...以上 - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex3, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex5, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // ...以上 + new Regex(NumbersDefinitions.TwoNumberRangeMoreSuffix, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // 小なり|小さい|低い... - new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, { - // ...より小なり|小さい|低い - new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags), + // ...以下 + new Regex(NumbersDefinitions.OneNumberRangeLessRegex3, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, { // ...以下 - new Regex(NumbersDefinitions.OneNumberRangeLessRegex3, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex4, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, { // イコール... | ...等しい| - new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.EQUAL }, }; @@ -79,7 +92,7 @@ public NumberRangeExtractor(INumberOptionsConfiguration config) Regexes = regexes.ToImmutableDictionary(); AmbiguousFractionConnectorsRegex = - new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); + new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); } internal sealed override ImmutableDictionary Regexes { get; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/OrdinalExtractor.cs index 9c8c218786..157d6c1287 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/OrdinalExtractor.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -11,25 +15,20 @@ public class OrdinalExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public OrdinalExtractor() + private static readonly ConcurrentDictionary Instances = + new ConcurrentDictionary(); + + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { var regexes = new Dictionary { { // だい一百五十四 - new Regex(NumbersDefinitions.OrdinalRegex, RegexFlags), + new Regex(NumbersDefinitions.AllOrdinalRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.JAPANESE) }, - { - // だい2565 - new Regex(NumbersDefinitions.OrdinalNumbersRegex, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.JAPANESE) - }, - { - // 2折 2.5折 - new Regex(NumbersDefinitions.NumbersFoldsPercentageRegex, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.SPECIAL_SUFFIX) - }, + }; Regexes = regexes.ToImmutableDictionary(); @@ -38,5 +37,18 @@ public OrdinalExtractor() internal sealed override ImmutableDictionary Regexes { get; } protected sealed override string ExtractType { get; } = Constants.SYS_NUM_ORDINAL; + + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) + { + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) + { + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); + } + + return Instances[extractorKey]; + } } -} +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/PercentageExtractor.cs index bcf10eb65f..3bd1dd4d14 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Extractors/PercentageExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -11,73 +14,73 @@ public class PercentageExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public PercentageExtractor() + public PercentageExtractor(BaseNumberOptionsConfiguration config) { var regexes = new Dictionary { { // 百パーセント 十五パーセント - new Regex(NumbersDefinitions.SimplePercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.SimplePercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.JAPANESE) }, { // 19パーセント 1パーセント - new Regex(NumbersDefinitions.NumbersPercentagePointRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersPercentagePointRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 3,000パーセント 1,123パーセント - new Regex(NumbersDefinitions.NumbersPercentageWithSeparatorRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersPercentageWithSeparatorRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 3.2 k パーセント - new Regex(NumbersDefinitions.NumbersPercentageWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersPercentageWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 15kパーセント - new Regex(NumbersDefinitions.SimpleNumbersPercentageWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.SimpleNumbersPercentageWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // @TODO Example missing - new Regex(NumbersDefinitions.SimpleIntegerPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.SimpleIntegerPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 2割引 2.5割引 - new Regex(NumbersDefinitions.NumbersFoldsPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersFoldsPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.SPECIAL_SUFFIX) }, { // 三割引 六点五折 七五折 - new Regex(NumbersDefinitions.FoldsPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.FoldsPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.SPECIAL_SUFFIX) }, { // 5割 7割半 - new Regex(NumbersDefinitions.SimpleFoldsPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.SimpleFoldsPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.SPECIAL_SUFFIX) }, { // 七割半 - new Regex(NumbersDefinitions.SpecialsPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.SpecialsPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.SPECIAL_SUFFIX) }, { // 2割 2.5割 - new Regex(NumbersDefinitions.NumbersSpecialsPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersSpecialsPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.SPECIAL_SUFFIX) }, { // 三割 - new Regex(NumbersDefinitions.SimpleSpecialsPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.SimpleSpecialsPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.SPECIAL_SUFFIX) }, { // @TODO Example missing - new Regex(NumbersDefinitions.SpecialsFoldsPercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.SpecialsFoldsPercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.SPECIAL_SUFFIX) }, }; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Parsers/JapaneseNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Parsers/JapaneseNumberParserConfiguration.cs index 476b1de9e9..6f8fa70760 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Parsers/JapaneseNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Parsers/JapaneseNumberParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; using System.Linq; @@ -17,7 +21,7 @@ public JapaneseNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; @@ -44,24 +48,27 @@ public JapaneseNumberParserConfiguration(INumberOptionsConfiguration config) this.ZeroToNineMap = NumbersDefinitions.ZeroToNineMap.ToImmutableDictionary(); this.FullToHalfMap = NumbersDefinitions.FullToHalfMap.ToImmutableDictionary(); this.RoundNumberMapChar = NumbersDefinitions.RoundNumberMapChar.ToImmutableDictionary(); - this.UnitMap = NumbersDefinitions.UnitMap.ToImmutableDictionary(); + + // Sorted by decreasing key length + this.UnitMap = NumbersDefinitions.UnitMap.ToImmutableSortedDictionary(new LengthComparer(true)); this.RoundDirectList = NumbersDefinitions.RoundDirectList.ToImmutableList(); this.TenChars = NumbersDefinitions.TenChars.ToImmutableList(); this.HalfADozenRegex = null; // @TODO Change init to follow design in other languages - this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags); - this.DozenRegex = new Regex(NumbersDefinitions.DozenRegex, RegexFlags); - this.PointRegex = new Regex(NumbersDefinitions.PointRegex, RegexFlags); - this.DigitNumRegex = new Regex(NumbersDefinitions.DigitNumRegex, RegexFlags); - this.DoubleAndRoundRegex = new Regex(NumbersDefinitions.DoubleAndRoundRegex, RegexFlags); - this.FracSplitRegex = new Regex(NumbersDefinitions.FracSplitRegex, RegexFlags); - this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags); - this.SpeGetNumberRegex = new Regex(NumbersDefinitions.SpeGetNumberRegex, RegexFlags); - this.PercentageRegex = new Regex(NumbersDefinitions.PercentageRegex, RegexFlags); - this.PairRegex = new Regex(NumbersDefinitions.PairRegex, RegexFlags); - this.RoundNumberIntegerRegex = new Regex(NumbersDefinitions.RoundNumberIntegerRegex, RegexFlags); + this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags, RegexTimeOut); + this.DozenRegex = new Regex(NumbersDefinitions.DozenRegex, RegexFlags, RegexTimeOut); + this.PointRegex = new Regex(NumbersDefinitions.PointRegex, RegexFlags, RegexTimeOut); + this.DigitNumRegex = new Regex(NumbersDefinitions.DigitNumRegex, RegexFlags, RegexTimeOut); + this.DoubleAndRoundRegex = new Regex(NumbersDefinitions.DoubleAndRoundRegex, RegexFlags, RegexTimeOut); + this.FracSplitRegex = new Regex(NumbersDefinitions.FracSplitRegex, RegexFlags, RegexTimeOut); + this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags, RegexTimeOut); + this.SpeGetNumberRegex = new Regex(NumbersDefinitions.SpeGetNumberRegex, RegexFlags, RegexTimeOut); + this.PercentageRegex = new Regex(NumbersDefinitions.PercentageRegex, RegexFlags, RegexTimeOut); + this.PairRegex = new Regex(NumbersDefinitions.PairRegex, RegexFlags, RegexTimeOut); + this.RoundNumberIntegerRegex = new Regex(NumbersDefinitions.RoundNumberIntegerRegex, RegexFlags, RegexTimeOut); + this.PercentageNumRegex = null; } public string NonDecimalSeparatorText { get; private set; } @@ -72,6 +79,8 @@ public JapaneseNumberParserConfiguration(INumberOptionsConfiguration config) public Regex PercentageRegex { get; private set; } + public Regex PercentageNumRegex { get; private set; } + public Regex DoubleAndRoundRegex { get; private set; } public Regex FracSplitRegex { get; private set; } @@ -94,7 +103,7 @@ public JapaneseNumberParserConfiguration(INumberOptionsConfiguration config) public ImmutableDictionary FullToHalfMap { get; private set; } - public ImmutableDictionary UnitMap { get; private set; } + public ImmutableSortedDictionary UnitMap { get; private set; } public ImmutableDictionary TratoSimMap { get; private set; } @@ -111,5 +120,21 @@ public override long ResolveCompositeNumber(string numberStr) { return 0; } + + private class LengthComparer : IComparer + { + private bool isReverseOrder; + + public LengthComparer(bool reverseOrder = false) + { + isReverseOrder = reverseOrder; + } + + public int Compare(string x, string y) + { + int comparison = isReverseOrder ? y.Length.CompareTo(x.Length) : x.Length.CompareTo(y.Length); + return comparison == 0 ? x.CompareTo(y) : comparison; + } + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Parsers/JapaneseNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Parsers/JapaneseNumberRangeParserConfiguration.cs index c6df6d4cc9..d8a453861e 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Parsers/JapaneseNumberRangeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Parsers/JapaneseNumberRangeParserConfiguration.cs @@ -1,10 +1,13 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Japanese; namespace Microsoft.Recognizers.Text.Number.Japanese { - public class JapaneseNumberRangeParserConfiguration : INumberRangeParserConfiguration + public class JapaneseNumberRangeParserConfiguration : BaseNumberRangeParserConfiguration { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -13,37 +16,18 @@ public JapaneseNumberRangeParserConfiguration(INumberOptionsConfiguration config { CultureInfo = new CultureInfo(config.Culture); - NumberExtractor = new NumberExtractor(); - OrdinalExtractor = new OrdinalExtractor(); + var numConfig = new BaseNumberOptionsConfiguration(config); + NumberExtractor = new NumberExtractor(numConfig); + OrdinalExtractor = Japanese.OrdinalExtractor.GetInstance(numConfig); NumberParser = new BaseCJKNumberParser(new JapaneseNumberParserConfiguration(config)); - MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags); - LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags); - MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags); - LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags); - MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags); - LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags); + MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags, RegexTimeOut); + LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags, RegexTimeOut); + MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags, RegexTimeOut); + LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags, RegexTimeOut); + MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut); + LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut); } - - public CultureInfo CultureInfo { get; private set; } - - public IExtractor NumberExtractor { get; private set; } - - public IExtractor OrdinalExtractor { get; private set; } - - public IParser NumberParser { get; private set; } - - public Regex MoreOrEqual { get; private set; } - - public Regex LessOrEqual { get; private set; } - - public Regex MoreOrEqualSuffix { get; private set; } - - public Regex LessOrEqualSuffix { get; private set; } - - public Regex MoreOrEqualSeparate { get; private set; } - - public Regex LessOrEqualSeparate { get; private set; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/CardinalExtractor.cs index a6675e65b0..31a52ec835 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/CardinalExtractor.cs @@ -1,19 +1,23 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.Number.Config; namespace Microsoft.Recognizers.Text.Number.Korean { public class CardinalExtractor : BaseNumberExtractor { // CardinalExtractor = Int + Double - public CardinalExtractor(KoreanNumberExtractorMode mode = KoreanNumberExtractorMode.Default) + public CardinalExtractor(BaseNumberOptionsConfiguration config, CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { var builder = ImmutableDictionary.CreateBuilder(); - var intExtract = new IntegerExtractor(mode); + var intExtract = new IntegerExtractor(config, mode); builder.AddRange(intExtract.Regexes); - var douExtractor = new DoubleExtractor(); + var douExtractor = new DoubleExtractor(config); builder.AddRange(douExtractor.Regexes); Regexes = builder.ToImmutable(); diff --git a/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/DoubleExtractor.cs index 66fe7f6dc2..35d6836eb9 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/DoubleExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -11,42 +14,46 @@ public class DoubleExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public DoubleExtractor() + public DoubleExtractor(BaseNumberOptionsConfiguration config) { var regexes = new Dictionary { { - new Regex(NumbersDefinitions.DoubleSpecialsChars, RegexFlags), + new Regex(NumbersDefinitions.DoubleSpecialsChars, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // (-)2.5, can avoid cases like ip address xx.xx.xx.xx - new Regex(NumbersDefinitions.DoubleSpecialsCharsWithNegatives, RegexFlags), + new Regex(NumbersDefinitions.DoubleSpecialsCharsWithNegatives, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, + { + new Regex(NumbersDefinitions.DoubleRoundNumberSpecialsChars, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.KOREAN) + }, { // (-).2 - new Regex(NumbersDefinitions.SimpleDoubleSpecialsChars, RegexFlags), + new Regex(NumbersDefinitions.SimpleDoubleSpecialsChars, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // 1.0 K - new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { // 15.2만 - new Regex(NumbersDefinitions.DoubleWithThousandsRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithThousandsRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.KOREAN) }, { // 2e6, 21.2e0 - new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { // 2^5 - new Regex(NumbersDefinitions.DoubleScientificNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleScientificNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, }; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/FractionExtractor.cs index 47fbc51eea..a9d8272332 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/FractionExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -11,23 +14,23 @@ public class FractionExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public FractionExtractor() + public FractionExtractor(BaseNumberOptionsConfiguration config) { var regexes = new Dictionary { { // -4 5/2, 4 6/3 - new Regex(NumbersDefinitions.FractionNotationSpecialsCharsRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationSpecialsCharsRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { // 8/3 - new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { // 오분의 이 칠분의 삼 - new Regex(NumbersDefinitions.AllFractionNumber, RegexFlags), + new Regex(NumbersDefinitions.AllFractionNumber, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.KOREAN) }, }; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/IntegerExtractor.cs index d65b06e9bb..883a056c8f 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/IntegerExtractor.cs @@ -1,8 +1,12 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Text.Number.Config; namespace Microsoft.Recognizers.Text.Number.Korean { @@ -11,46 +15,69 @@ public class IntegerExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public IntegerExtractor(KoreanNumberExtractorMode mode = KoreanNumberExtractorMode.Default) + public IntegerExtractor(BaseNumberOptionsConfiguration config, CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { var regexes = new Dictionary { { // 123456, -123456 - new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexFlags), + new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 15k, 16 G - new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersSpecialsCharsWithSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 1,234, 2,332,111 - new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexFlags), + new Regex(NumbersDefinitions.DottedNumbersSpecialsChar, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { // 마이너스 일, 마이너스 오 - new Regex(NumbersDefinitions.ZeroToNineIntegerSpecialsChars, RegexFlags), + new Regex(NumbersDefinitions.ZeroToNineIntegerSpecialsChars, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.KOREAN) + }, + { + // 마이너스 일, 마이너스 오 + new Regex(NumbersDefinitions.NumbersSpecialsChars, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.KOREAN) + }, + { + // 다스 + new Regex(NumbersDefinitions.NumbersWithDozen, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.KOREAN) + }, + { + // 3백21 + new Regex(NumbersDefinitions.NativeCumKoreanRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + // 스물여섯 + new Regex(NumbersDefinitions.NativeSingleRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.KOREAN) }, }; switch (mode) { - case KoreanNumberExtractorMode.Default: + case CJKNumberExtractorMode.Default: // 일백오십오 regexes.Add( - new Regex(NumbersDefinitions.NumbersWithAllowListRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithAllowListRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.KOREAN)); break; - case KoreanNumberExtractorMode.ExtractAll: + case CJKNumberExtractorMode.ExtractAll: // 일백오십오, 사직구장, "사직구장" from "사(it is homonym, seems like four(4) or other chinese character)" // Uses no allow lists and extracts all potential integers (useful in Units, for example). regexes.Add( - new Regex(NumbersDefinitions.NumbersAggressiveRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersAggressiveRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.KOREAN)); + regexes.Add( + new Regex(NumbersDefinitions.InexactNumberUnitRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.KOREAN)); break; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/NumberExtractor.cs index 6c57b50dec..c72fcb3f05 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/NumberExtractor.cs @@ -1,5 +1,9 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.Number.Config; namespace Microsoft.Recognizers.Text.Number.Korean { @@ -24,16 +28,16 @@ public enum KoreanNumberExtractorMode public class NumberExtractor : BaseNumberExtractor { - public NumberExtractor(KoreanNumberExtractorMode mode = KoreanNumberExtractorMode.Default) + public NumberExtractor(BaseNumberOptionsConfiguration config, CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { var builder = ImmutableDictionary.CreateBuilder(); // Add Cardinal - var cardExtract = new CardinalExtractor(mode); + var cardExtract = new CardinalExtractor(config, mode); builder.AddRange(cardExtract.Regexes); // Add Fraction - var fracExtract = new FractionExtractor(); + var fracExtract = new FractionExtractor(config); builder.AddRange(fracExtract.Regexes); Regexes = builder.ToImmutable(); diff --git a/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/NumberRangeExtractor.cs new file mode 100644 index 0000000000..fd873f711b --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/NumberRangeExtractor.cs @@ -0,0 +1,126 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Korean; + +namespace Microsoft.Recognizers.Text.Number.Korean +{ + public class NumberRangeExtractor : BaseNumberRangeExtractor + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public NumberRangeExtractor(INumberOptionsConfiguration config) + : base(new NumberExtractor(new BaseNumberOptionsConfiguration(config)), + OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + new BaseCJKNumberParser(new KoreanNumberParserConfiguration(config)), + config) + { + + var regexes = new Dictionary + { + { + // ...과...사이 + new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUMBETWEEN + }, + { + // 이상...이하... + + new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUM + }, + { + // 이하...이상... + new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUM + }, + { + // 이십보다 크고 삼십오보다 작다 + new Regex(NumbersDefinitions.TwoNumberRangeRegex7, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUM + }, + { + // ...에서..., 20~30 + new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUMTILL + }, + { + new Regex(NumbersDefinitions.TwoNumberRangeRegex5, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUMTILL + }, + { + new Regex(NumbersDefinitions.TwoNumberRangeRegex6, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUMTILL + }, + { + // ...이상|초과|많|높|크|더많|더높|더크|> + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // ...이상|초과|많|높|크|더많|더높|더크|> + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // >|≥... + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex3, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex5, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // >|≥... + new Regex(NumbersDefinitions.OneNumberRangeMoreRegexFraction, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + { + new Regex(NumbersDefinitions.TwoNumberRangeRegex, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUMCLOSED + }, + { + // 까지최소|<|≤... + new Regex(NumbersDefinitions.OneNumberRangeLessRegex3, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + { + new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags, RegexTimeOut), + NumberRangeConstants.EQUAL + }, + { + // >|≥... + new Regex(NumbersDefinitions.OneNumberRangeEqualRegex2, RegexFlags, RegexTimeOut), + NumberRangeConstants.EQUAL + }, + { + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex4, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // 700에 달하는 + new Regex(NumbersDefinitions.OneNumberRangeLessRegex4, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + }; + + Regexes = regexes.ToImmutableDictionary(); + + } + + internal sealed override ImmutableDictionary Regexes { get; } + + internal sealed override Regex AmbiguousFractionConnectorsRegex { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUMRANGE; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/OrdinalExtractor.cs new file mode 100644 index 0000000000..074debc960 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/OrdinalExtractor.cs @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Korean; + +namespace Microsoft.Recognizers.Text.Number.Korean +{ + public class OrdinalExtractor : BaseNumberExtractor + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly ConcurrentDictionary Instances = + new ConcurrentDictionary(); + + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) + { + var regexes = new Dictionary + { + { + new Regex(NumbersDefinitions.OrdinalKoreanRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.KOREAN) + }, + + }; + + Regexes = regexes.ToImmutableDictionary(); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUM_ORDINAL; + + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) + { + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) + { + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); + } + + return Instances[extractorKey]; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/PercentageExtractor.cs index c36fc490d0..9131ce2985 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Korean/Extractors/PercentageExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -11,40 +14,40 @@ public class PercentageExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public PercentageExtractor() + public PercentageExtractor(BaseNumberOptionsConfiguration config) { var regexes = new Dictionary() { { // 백퍼센트 십오퍼센트 - new Regex(NumbersDefinitions.SimplePercentageRegex, RegexFlags), + new Regex(NumbersDefinitions.SimplePercentageRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.KOREAN) }, { // 19퍼센트 1퍼센트 - new Regex(NumbersDefinitions.NumbersPercentagePointRegex, RegexFlags), + new Regex(NumbersDefinitions.NumbersPercentagePointRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) }, { // 3,000퍼센트 1,123퍼센트 - new Regex(NumbersDefinitions.NumbersPercentageWithSeparatorRegex, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) + new Regex(NumbersDefinitions.NumbersPercentageWithSeparatorRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.KOREAN) }, { // 3.2 k 퍼센트 - new Regex(NumbersDefinitions.NumbersPercentageWithMultiplierRegex, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) + new Regex(NumbersDefinitions.NumbersPercentageWithMultiplierRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.KOREAN) }, { // 15k퍼센트 - new Regex(NumbersDefinitions.SimpleNumbersPercentageWithMultiplierRegex, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) + new Regex(NumbersDefinitions.SimpleNumbersPercentageWithMultiplierRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.KOREAN) }, { // 마이너스십삼퍼센트 - new Regex(NumbersDefinitions.SimpleIntegerPercentageRegex, RegexFlags), - RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.NUMBER_SUFFIX) + new Regex(NumbersDefinitions.SimpleIntegerPercentageRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.PERCENT_PREFIX, Constants.KOREAN) }, }; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Korean/Parsers/KoreanNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Korean/Parsers/KoreanNumberParserConfiguration.cs index f83a852d27..b070694a42 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Korean/Parsers/KoreanNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Korean/Parsers/KoreanNumberParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; using System.Linq; @@ -16,7 +19,7 @@ public KoreanNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; @@ -43,25 +46,26 @@ public KoreanNumberParserConfiguration(INumberOptionsConfiguration config) this.ZeroToNineMap = NumbersDefinitions.ZeroToNineMap.ToImmutableDictionary(); this.RoundNumberMapChar = NumbersDefinitions.RoundNumberMapChar.ToImmutableDictionary(); this.FullToHalfMap = NumbersDefinitions.FullToHalfMap.ToImmutableDictionary(); - this.UnitMap = NumbersDefinitions.UnitMap.ToImmutableDictionary(); + this.UnitMap = NumbersDefinitions.UnitMap.ToImmutableSortedDictionary(); this.RoundDirectList = NumbersDefinitions.RoundDirectList.ToImmutableList(); this.TenChars = NumbersDefinitions.TenChars.ToImmutableList(); // @TODO Change init to follow design in other languages this.HalfADozenRegex = null; - this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags); - this.DigitNumRegex = new Regex(NumbersDefinitions.DigitNumRegex, RegexFlags); - this.DozenRegex = new Regex(NumbersDefinitions.DozenRegex, RegexFlags); - this.PercentageRegex = new Regex(NumbersDefinitions.PercentageRegex, RegexFlags); - this.DoubleAndRoundRegex = new Regex(NumbersDefinitions.DoubleAndRoundRegex, RegexFlags); - this.FracSplitRegex = new Regex(NumbersDefinitions.FracSplitRegex, RegexFlags); - this.NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex, RegexFlags); - this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags); - this.PointRegex = new Regex(NumbersDefinitions.PointRegex, RegexFlags); - this.SpeGetNumberRegex = new Regex(NumbersDefinitions.SpeGetNumberRegex, RegexFlags); - this.PairRegex = new Regex(NumbersDefinitions.PairRegex, RegexFlags); - this.RoundNumberIntegerRegex = new Regex(NumbersDefinitions.RoundNumberIntegerRegex, RegexFlags); + this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags, RegexTimeOut); + this.DigitNumRegex = new Regex(NumbersDefinitions.DigitNumRegex, RegexFlags, RegexTimeOut); + this.DozenRegex = new Regex(NumbersDefinitions.DozenRegex, RegexFlags, RegexTimeOut); + this.PercentageRegex = new Regex(NumbersDefinitions.PercentageRegex, RegexFlags, RegexTimeOut); + this.DoubleAndRoundRegex = new Regex(NumbersDefinitions.DoubleAndRoundRegex, RegexFlags, RegexTimeOut); + this.FracSplitRegex = new Regex(NumbersDefinitions.FracSplitRegex, RegexFlags, RegexTimeOut); + this.NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex, RegexFlags, RegexTimeOut); + this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags, RegexTimeOut); + this.PointRegex = new Regex(NumbersDefinitions.PointRegex, RegexFlags, RegexTimeOut); + this.SpeGetNumberRegex = new Regex(NumbersDefinitions.SpeGetNumberRegex, RegexFlags, RegexTimeOut); + this.PairRegex = new Regex(NumbersDefinitions.PairRegex, RegexFlags, RegexTimeOut); + this.RoundNumberIntegerRegex = new Regex(NumbersDefinitions.RoundNumberIntegerRegex, RegexFlags, RegexTimeOut); this.FractionPrepositionRegex = null; + this.PercentageNumRegex = null; } public string NonDecimalSeparatorText { get; private set; } @@ -72,6 +76,8 @@ public KoreanNumberParserConfiguration(INumberOptionsConfiguration config) public Regex PercentageRegex { get; private set; } + public Regex PercentageNumRegex { get; private set; } + public Regex DoubleAndRoundRegex { get; private set; } public Regex FracSplitRegex { get; private set; } @@ -96,7 +102,7 @@ public KoreanNumberParserConfiguration(INumberOptionsConfiguration config) public ImmutableDictionary FullToHalfMap { get; private set; } - public ImmutableDictionary UnitMap { get; private set; } + public ImmutableSortedDictionary UnitMap { get; private set; } public ImmutableDictionary TratoSimMap { get; private set; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Korean/Parsers/KoreanNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Korean/Parsers/KoreanNumberRangeParserConfiguration.cs new file mode 100644 index 0000000000..81dcdfe3b3 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Korean/Parsers/KoreanNumberRangeParserConfiguration.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Korean; + +namespace Microsoft.Recognizers.Text.Number.Korean +{ + public class KoreanNumberRangeParserConfiguration : BaseNumberRangeParserConfiguration + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public KoreanNumberRangeParserConfiguration(INumberOptionsConfiguration config) + { + CultureInfo = new CultureInfo(config.Culture); + + var numConfig = new BaseNumberOptionsConfiguration(config); + + NumberExtractor = new NumberExtractor(numConfig); + OrdinalExtractor = Japanese.OrdinalExtractor.GetInstance(numConfig); + NumberParser = new BaseCJKNumberParser(new KoreanNumberParserConfiguration(config)); + + MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags, RegexTimeOut); + LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags, RegexTimeOut); + MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags, RegexTimeOut); + LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags, RegexTimeOut); + MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut); + LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/LongFormatType.cs b/.NET/Microsoft.Recognizers.Text.Number/LongFormatType.cs index dd1f85fd4c..64795e0829 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/LongFormatType.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/LongFormatType.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Number +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Number { public class LongFormatType { @@ -13,6 +16,9 @@ private LongFormatType(char thousandsMark, char decimalsMark) // 1,234,567 public static LongFormatType IntegerNumComma { get; set; } = new LongFormatType(',', '\0'); + // 1،234،567 + public static LongFormatType IntegerNumInvertedComma { get; set; } = new LongFormatType('،', '\0'); + // 1.234.567 public static LongFormatType IntegerNumDot { get; set; } = new LongFormatType('.', '\0'); @@ -49,6 +55,9 @@ private LongFormatType(char thousandsMark, char decimalsMark) // 1'234'567,89 public static LongFormatType DoubleNumQuoteComma { get; set; } = new LongFormatType('\'', ','); + // 1 234 567.89 + public static LongFormatType DoubleNumFullWidthBlankDot { get; set; } = new LongFormatType(' ', '.'); + public char DecimalsMark { get; } public char ThousandsMark { get; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.csproj b/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.csproj index 58e52730a4..6b959c07be 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.csproj +++ b/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.csproj @@ -1,7 +1,8 @@  - netstandard2.0;net462;net452;net45 + netstandard2.1;netstandard2.0;net462;net6.0 + 9 false false @@ -9,6 +10,9 @@ + true + ..\buildtools\35MSSharedLib1024.snk + true $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + Microsoft + nlp, entity-extraction, parser-library, recognizer, numex, numbers, netstandard2.0 + Microsoft.Recognizers.Text.Number provides robust recognition and resolution of numbers expressed in English, Spanish, French, Portuguese, Chinese, + German, Dutch, Japanese, Italian, Turkish, Swedish, and Hindi. + MIT + https://github.com/Microsoft/Recognizers-Text + images\icon.png + © Microsoft Corporation. All rights reserved. + - + all runtime; build; native; contentfiles; analyzers - + all runtime; build; native; contentfiles; analyzers - diff --git a/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.nuspec b/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.nuspec index 7d84f67926..66f8aa0a76 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.nuspec +++ b/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.nuspec @@ -6,22 +6,21 @@ $title$ Microsoft true - Microsoft.Recognizers.Text.Number provides robust recognition and resolution of numbers expressed in English, Spanish, French, Portuguese, Chinese, German, Dutch, and Japanese. + Microsoft.Recognizers.Text.Number provides robust recognition and resolution of numbers expressed in English, Spanish, French, Portuguese, Chinese, + German, Dutch, Japanese, Italian, Turkish, Swedish, and Hindi. MIT https://github.com/Microsoft/Recognizers-Text - http://docs.botframework.com/images/bot_icon.png + images\icon.png © Microsoft Corporation. All rights reserved. nlp entity-extraction parser-library recognizer numex numbers netstandard2.0 - - - + diff --git a/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.xml b/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.xml index 1824f6b8fd..4c1a1b5c03 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.xml +++ b/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.xml @@ -21,6 +21,11 @@ Extract all number-related terms aggressively. + + + Substitute for language markers for Japanese. + + extractor the percentage entities from the sentence. @@ -32,7 +37,7 @@ read the rules. - rule list. + rule list. . Immutable HashSet of regex. @@ -100,6 +105,11 @@ PercentageMode + + + NoProtoCache + + SuppressExtendedTypes, mode that skips extraction of extra types not in v1. May be removed later. @@ -157,6 +167,15 @@ input arabic number. parsed result. + + + Get the split index for a fraction word list, split index used to separate the numerator and the denominator. + Ex: A fraction is "three fifth", it will be joined as a list which 1st item is "three" and 2nd item is "fifth", the split index is 1 (index of fifth). + Ex: A fraction is "two and fifty-four hundredths", the split index is 3 (index of hundredths). + + fraction words list. + split index. + Used when requiring to normalize a token to a valid expression supported by the ImmutableDictionaries (language dictionaries). @@ -172,5 +191,12 @@ composite number. value of the string. + + + Used when requiring special processing for number value cases. + + matches. + value of the match. + diff --git a/.NET/Microsoft.Recognizers.Text.Number/Models/AbstractNumberModel.cs b/.NET/Microsoft.Recognizers.Text.Number/Models/AbstractNumberModel.cs index b98ce9987a..13634b92d9 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Models/AbstractNumberModel.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Models/AbstractNumberModel.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; @@ -8,8 +11,26 @@ namespace Microsoft.Recognizers.Text.Number { public abstract class AbstractNumberModel : IModel { + // Languages supporting subtypes in the resolution to be added here - private static readonly List ExtractorsSupportingSubtype = new List { Constants.ENGLISH, Constants.SWEDISH }; + private static readonly List ExtractorsSupportingSubtype = new List + { + Constants.ARABIC, + Constants.ENGLISH, + Constants.PORTUGUESE, + Constants.SPANISH, + Constants.SWEDISH, + Constants.KOREAN, + Constants.FRENCH, + + // TODO: Temporarily disabled as existing TestSpec not supporting + // Constants.JAPANESE_SUBS, + // Constants.KOREAN, + }; + + private string culture; + + private string requestedCulture; protected AbstractNumberModel(IParser parser, IExtractor extractor) { @@ -19,6 +40,10 @@ protected AbstractNumberModel(IParser parser, IExtractor extractor) public abstract string ModelTypeName { get; } + public string Culture => this.culture; + + public string RequestedCulture => this.requestedCulture; + protected IExtractor Extractor { get; private set; } protected IParser Parser { get; private set; } @@ -47,7 +72,9 @@ public List Parse(string query) } } - return parsedNumbers.Select(BuildModelResult).Where(r => r != null).ToList(); + var modelResults = parsedNumbers.Select(BuildModelResult).Where(r => r != null).ToList(); + + return modelResults; } catch (Exception) { @@ -58,6 +85,12 @@ public List Parse(string query) return new List(); } + public void SetCultureInfo(string culture, string requestedCulture = null) + { + this.culture = culture; + this.requestedCulture = requestedCulture; + } + private ModelResult BuildModelResult(ParseResult pn) { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Models/NumberModel.cs b/.NET/Microsoft.Recognizers.Text.Number/Models/NumberModel.cs index 32f18f0818..1f6a19be14 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Models/NumberModel.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Models/NumberModel.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Number +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Number { public class NumberModel : AbstractNumberModel { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Models/NumberRangeModel.cs b/.NET/Microsoft.Recognizers.Text.Number/Models/NumberRangeModel.cs index a70e40705b..8451c711b2 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Models/NumberRangeModel.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Models/NumberRangeModel.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Number +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Number { public class NumberRangeModel : AbstractNumberModel { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Models/OrdinalModel.cs b/.NET/Microsoft.Recognizers.Text.Number/Models/OrdinalModel.cs index 1921c8fa93..17bc2ed718 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Models/OrdinalModel.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Models/OrdinalModel.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Number +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Number { public class OrdinalModel : AbstractNumberModel { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Models/PercentModel.cs b/.NET/Microsoft.Recognizers.Text.Number/Models/PercentModel.cs index 5cc6a62f1f..cdc4abf4e0 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Models/PercentModel.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Models/PercentModel.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Number +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Number { public class PercentModel : AbstractNumberModel { diff --git a/.NET/Microsoft.Recognizers.Text.Number/NumberMapGenerator.cs b/.NET/Microsoft.Recognizers.Text.Number/NumberMapGenerator.cs index 157c5de599..f0761d8ef2 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/NumberMapGenerator.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/NumberMapGenerator.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; diff --git a/.NET/Microsoft.Recognizers.Text.Number/NumberMode.cs b/.NET/Microsoft.Recognizers.Text.Number/NumberMode.cs index ed5bd109f6..861145fb91 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/NumberMode.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/NumberMode.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Number +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Number { public enum NumberMode { diff --git a/.NET/Microsoft.Recognizers.Text.Number/NumberOptions.cs b/.NET/Microsoft.Recognizers.Text.Number/NumberOptions.cs index 07ed99c218..6c672c87a8 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/NumberOptions.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/NumberOptions.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; namespace Microsoft.Recognizers.Text.Number { @@ -15,6 +18,11 @@ public enum NumberOptions /// PercentageMode = 1, + /// + /// NoProtoCache + /// + NoProtoCache = 16, + /// /// SuppressExtendedTypes, mode that skips extraction of extra types not in v1. May be removed later. /// diff --git a/.NET/Microsoft.Recognizers.Text.Number/NumberRangeConstants.cs b/.NET/Microsoft.Recognizers.Text.Number/NumberRangeConstants.cs index fd9099d0d9..846a7adf33 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/NumberRangeConstants.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/NumberRangeConstants.cs @@ -1,4 +1,7 @@ -using System.Diagnostics.CodeAnalysis; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Diagnostics.CodeAnalysis; namespace Microsoft.Recognizers.Text.Number { diff --git a/.NET/Microsoft.Recognizers.Text.Number/NumberRecognizer.cs b/.NET/Microsoft.Recognizers.Text.Number/NumberRecognizer.cs index a82c7ddf8c..9b6dfc58c9 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/NumberRecognizer.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/NumberRecognizer.cs @@ -1,5 +1,9 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using Microsoft.Recognizers.Text.Number.Arabic; using Microsoft.Recognizers.Text.Number.Chinese; using Microsoft.Recognizers.Text.Number.Dutch; using Microsoft.Recognizers.Text.Number.English; @@ -11,24 +15,30 @@ using Microsoft.Recognizers.Text.Number.Korean; using Microsoft.Recognizers.Text.Number.Portuguese; using Microsoft.Recognizers.Text.Number.Spanish; +using Microsoft.Recognizers.Text.Number.Swedish; using Microsoft.Recognizers.Text.Number.Turkish; namespace Microsoft.Recognizers.Text.Number { public class NumberRecognizer : Recognizer { + public NumberRecognizer(string targetCulture, NumberOptions options, bool lazyInitialization, int timeoutInSeconds) + : base(targetCulture, options, lazyInitialization, timeoutInSeconds) + { + } + public NumberRecognizer(string targetCulture, NumberOptions options = NumberOptions.None, bool lazyInitialization = false) - : base(targetCulture, options, lazyInitialization) + : base(targetCulture, options, lazyInitialization, 0) { } public NumberRecognizer(string targetCulture, int options, bool lazyInitialization = false) - : this(targetCulture, GetOptions(options), lazyInitialization) + : this(targetCulture, GetOptions(options), lazyInitialization, 0) { } public NumberRecognizer(NumberOptions options = NumberOptions.None, bool lazyInitialization = true) - : this(null, options, lazyInitialization) + : this(null, options, lazyInitialization, 0) { } @@ -79,30 +89,59 @@ public NumberRangeModel GetNumberRangeModel(string culture = null, bool fallback protected override void InitializeConfiguration() { + + RegisterModel( + Culture.Arabic, + (options) => new NumberModel( + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new ArabicNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Arabic, options))), + Arabic.NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Arabic, options, NumberMode.PureNumber)))); + + RegisterModel( + Culture.Arabic, + (options) => new OrdinalModel( + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new ArabicNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Arabic, options))), + Arabic.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Arabic, options)))); + + RegisterModel( + Culture.Arabic, + (options) => new PercentModel( + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new ArabicNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Arabic, options))), + new Arabic.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.Arabic, options)))); + + RegisterModel( + Culture.Arabic, + (options) => new NumberRangeModel( + new BaseNumberRangeParser(new ArabicNumberRangeParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Arabic, options))), + new Arabic.NumberRangeExtractor(new BaseNumberOptionsConfiguration(Culture.Arabic, options)))); + RegisterModel( Culture.English, - options => new NumberModel( + (options) => new NumberModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new EnglishNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.English, options))), - English.MergedNumberExtractor.GetInstance(NumberMode.PureNumber, options))); + English.MergedNumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.English, options, NumberMode.PureNumber)))); RegisterModel( Culture.English, - options => new OrdinalModel( + (options) => new OrdinalModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new EnglishNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.English, options))), - English.OrdinalExtractor.GetInstance(options))); + English.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.English, options)))); RegisterModel( Culture.English, - options => new PercentModel( + (options) => new PercentModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new EnglishNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.English, options))), - new English.PercentageExtractor(options))); + new English.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.English, options)))); RegisterModel( Culture.English, - options => new NumberRangeModel( + (options) => new NumberRangeModel( new BaseNumberRangeParser(new EnglishNumberRangeParserConfiguration( new BaseNumberOptionsConfiguration(Culture.English, options))), new English.NumberRangeExtractor(new BaseNumberOptionsConfiguration(Culture.English, options)))); @@ -112,21 +151,21 @@ protected override void InitializeConfiguration() (options) => new NumberModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new ChineseNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Chinese, options))), - new Chinese.NumberExtractor())); + new Chinese.NumberExtractor(new BaseNumberOptionsConfiguration(Culture.Chinese, options)))); RegisterModel( Culture.Chinese, (options) => new OrdinalModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new ChineseNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Chinese, options))), - new Chinese.OrdinalExtractor())); + new Chinese.OrdinalExtractor(new BaseNumberOptionsConfiguration(Culture.Chinese, options)))); RegisterModel( Culture.Chinese, (options) => new PercentModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new ChineseNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Chinese, options))), - new Chinese.PercentageExtractor())); + new Chinese.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.Chinese, options)))); RegisterModel( Culture.Chinese, @@ -140,21 +179,21 @@ protected override void InitializeConfiguration() (options) => new NumberModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new SpanishNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Spanish, options))), - Spanish.NumberExtractor.GetInstance(NumberMode.PureNumber, options))); + Spanish.NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Spanish, options, NumberMode.PureNumber)))); RegisterModel( Culture.Spanish, (options) => new OrdinalModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new SpanishNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Spanish, options))), - Spanish.OrdinalExtractor.GetInstance())); + Spanish.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Spanish, options)))); RegisterModel( Culture.Spanish, (options) => new PercentModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new SpanishNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Spanish, options))), - new Spanish.PercentageExtractor())); + new Spanish.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.Spanish, options)))); RegisterModel( Culture.Spanish, @@ -163,68 +202,117 @@ protected override void InitializeConfiguration() new BaseNumberOptionsConfiguration(Culture.Spanish, options))), new Spanish.NumberRangeExtractor(new BaseNumberOptionsConfiguration(Culture.Spanish, options)))); + RegisterModel( + Culture.SpanishMexican, + (options) => new NumberModel( + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new SpanishNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.SpanishMexican, options))), + Spanish.NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.SpanishMexican, options, NumberMode.PureNumber)))); + + RegisterModel( + Culture.SpanishMexican, + (options) => new OrdinalModel( + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new SpanishNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.SpanishMexican, options))), + Spanish.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.SpanishMexican, options)))); + + RegisterModel( + Culture.SpanishMexican, + (options) => new PercentModel( + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new SpanishNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.SpanishMexican, options))), + new Spanish.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.SpanishMexican, options)))); + + RegisterModel( + Culture.SpanishMexican, + (options) => new NumberRangeModel( + new BaseNumberRangeParser(new SpanishNumberRangeParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.SpanishMexican, options))), + new Spanish.NumberRangeExtractor(new BaseNumberOptionsConfiguration(Culture.SpanishMexican, options)))); + RegisterModel( Culture.Portuguese, (options) => new NumberModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new PortugueseNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Portuguese, options))), - Portuguese.NumberExtractor.GetInstance(NumberMode.PureNumber, options))); + Portuguese.NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Portuguese, options, NumberMode.PureNumber)))); RegisterModel( Culture.Portuguese, (options) => new OrdinalModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new PortugueseNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Portuguese, options))), - Portuguese.OrdinalExtractor.GetInstance())); + Portuguese.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Portuguese, options)))); RegisterModel( Culture.Portuguese, (options) => new PercentModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new PortugueseNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Portuguese, options))), - new Portuguese.PercentageExtractor())); + new Portuguese.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.Portuguese, options)))); + + RegisterModel( + Culture.Portuguese, + (options) => new NumberRangeModel( + new BaseNumberRangeParser(new PortugueseNumberRangeParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Portuguese, options))), + new Portuguese.NumberRangeExtractor(new BaseNumberOptionsConfiguration(Culture.Portuguese, options)))); RegisterModel( Culture.French, (options) => new NumberModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new FrenchNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.French, options))), - French.NumberExtractor.GetInstance(NumberMode.PureNumber, options))); + French.NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.French, options, NumberMode.PureNumber)))); RegisterModel( Culture.French, (options) => new OrdinalModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new FrenchNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.French, options))), - French.OrdinalExtractor.GetInstance())); + French.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.French, options)))); RegisterModel( Culture.French, (options) => new PercentModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new FrenchNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.French, options))), - new French.PercentageExtractor())); + new French.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.French, options)))); + + RegisterModel( + Culture.French, + (options) => new NumberRangeModel( + new BaseNumberRangeParser(new FrenchNumberRangeParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.French, options))), + new French.NumberRangeExtractor(new BaseNumberOptionsConfiguration(Culture.French, options)))); RegisterModel( Culture.German, (options) => new NumberModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new GermanNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.German, options))), - German.NumberExtractor.GetInstance(NumberMode.PureNumber))); + German.NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.German, options, NumberMode.PureNumber)))); RegisterModel( Culture.German, (options) => new OrdinalModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new GermanNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.German, options))), - German.OrdinalExtractor.GetInstance())); + German.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.German, options)))); RegisterModel( Culture.German, (options) => new PercentModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new GermanNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.German, options))), - new German.PercentageExtractor())); + new German.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.German, options)))); + + RegisterModel( + Culture.German, + (options) => new NumberRangeModel( + new BaseNumberRangeParser(new GermanNumberRangeParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.German, options))), + new German.NumberRangeExtractor(new BaseNumberOptionsConfiguration(Culture.German, options)))); RegisterModel( Culture.Italian, @@ -238,7 +326,7 @@ protected override void InitializeConfiguration() (options) => new OrdinalModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new ItalianNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Italian, options))), - Italian.OrdinalExtractor.GetInstance())); + Italian.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Italian, options)))); RegisterModel( Culture.Italian, @@ -254,86 +342,103 @@ protected override void InitializeConfiguration() new BaseNumberOptionsConfiguration(Culture.Italian, options))), new Italian.NumberRangeExtractor(new BaseNumberOptionsConfiguration(Culture.Italian, options)))); + RegisterModel( + Culture.Dutch, + (options) => new NumberModel( + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new DutchNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Dutch, options))), + Dutch.NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Dutch, options, NumberMode.PureNumber)))); + + RegisterModel( + Culture.Dutch, + (options) => new OrdinalModel( + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new DutchNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Dutch, options))), + Dutch.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Dutch, options)))); + + RegisterModel( + Culture.Dutch, + (options) => new PercentModel( + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new DutchNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Dutch, options))), + new Dutch.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.Dutch, options)))); + + RegisterModel( + Culture.Dutch, + (options) => new NumberRangeModel( + new BaseNumberRangeParser(new DutchNumberRangeParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Dutch, options))), + new Dutch.NumberRangeExtractor(new BaseNumberOptionsConfiguration(Culture.Dutch, options)))); + RegisterModel( Culture.Japanese, (options) => new NumberModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new JapaneseNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Japanese, options))), - new Japanese.NumberExtractor())); + new Japanese.NumberExtractor(new BaseNumberOptionsConfiguration(Culture.Japanese, options)))); RegisterModel( Culture.Japanese, (options) => new OrdinalModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new JapaneseNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Japanese, options))), - new Japanese.OrdinalExtractor())); + Japanese.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Japanese, options)))); RegisterModel( Culture.Japanese, (options) => new PercentModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new JapaneseNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Japanese, options))), - new Japanese.PercentageExtractor())); + new Japanese.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.Japanese, options)))); - /* RegisterModel( Culture.Japanese, (options) => new NumberRangeModel( new BaseNumberRangeParser(new JapaneseNumberRangeParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Japanese, options))), - new Japanese.NumberRangeExtractor(options))); - */ + new Japanese.NumberRangeExtractor(new BaseNumberOptionsConfiguration(Culture.Japanese, options)))); RegisterModel( Culture.Korean, (options) => new NumberModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new KoreanNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Korean, options))), - new Korean.NumberExtractor())); - - RegisterModel( - Culture.Dutch, - (options) => new NumberModel( - AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new DutchNumberParserConfiguration( - new BaseNumberOptionsConfiguration(Culture.Dutch, options))), - Dutch.NumberExtractor.GetInstance(NumberMode.PureNumber))); + new Korean.NumberExtractor(new BaseNumberOptionsConfiguration(Culture.Korean, options)))); RegisterModel( - Culture.Dutch, + Culture.Korean, (options) => new OrdinalModel( - AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new DutchNumberParserConfiguration( - new BaseNumberOptionsConfiguration(Culture.Dutch, options))), - Dutch.OrdinalExtractor.GetInstance())); + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new KoreanNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Korean, options))), + Korean.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Japanese, options)))); RegisterModel( - Culture.Dutch, + Culture.Korean, (options) => new PercentModel( - AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new DutchNumberParserConfiguration( - new BaseNumberOptionsConfiguration(Culture.Dutch, options))), - new Dutch.PercentageExtractor(options))); + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new KoreanNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Korean, options))), + new Korean.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.Korean, options)))); - // When registering NumberRangeModel, enable TestNumber_Dutch -> NumberRangeModel tests - /* RegisterModel( - Culture.Dutch, + Culture.Korean, (options) => new NumberRangeModel( - new BaseNumberRangeParser(new DutchNumberRangeParserConfiguration()), - new Dutch.NumberRangeExtractor(options))); - */ + new BaseNumberRangeParser(new KoreanNumberRangeParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Korean, options))), + new Korean.NumberRangeExtractor(new BaseNumberOptionsConfiguration(Culture.Korean, options)))); RegisterModel( - Culture.Turkish, - (options) => new NumberModel( - AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new TurkishNumberParserConfiguration( - new BaseNumberOptionsConfiguration(Culture.Turkish, options))), - Turkish.NumberExtractor.GetInstance(NumberMode.PureNumber))); + Culture.Turkish, + (options) => new NumberModel( + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new TurkishNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Turkish, options))), + Turkish.NumberExtractor.GetInstance(NumberMode.PureNumber))); RegisterModel( Culture.Turkish, (options) => new OrdinalModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new TurkishNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Turkish, options))), - Turkish.OrdinalExtractor.GetInstance())); + Turkish.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Japanese, options)))); RegisterModel( Culture.Turkish, @@ -342,16 +447,14 @@ protected override void InitializeConfiguration() new BaseNumberOptionsConfiguration(Culture.Turkish, options))), new Turkish.PercentageExtractor(options))); - // @TODO Uncomment once the NumberRangeModel test passes - /* RegisterModel( - Culture.Turkish, - options => new NumberRangeModel( + RegisterModel( + Culture.Turkish, + options => new NumberRangeModel( new BaseNumberRangeParser(new TurkishNumberRangeParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Turkish, options))), - new Turkish.NumberRangeExtractor(options)));*/ + new Turkish.NumberRangeExtractor(new BaseNumberOptionsConfiguration(Culture.Turkish, options)))); - // @TODO Uncomment once all the tests pass - /*RegisterModel( + RegisterModel( Culture.Hindi, options => new NumberModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new HindiNumberParserConfiguration( @@ -377,7 +480,49 @@ protected override void InitializeConfiguration() options => new NumberRangeModel( new BaseNumberRangeParser(new HindiNumberRangeParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Hindi, options))), - new Hindi.NumberRangeExtractor(new BaseNumberOptionsConfiguration(Culture.Hindi, options))));*/ + new Hindi.NumberRangeExtractor(new BaseNumberOptionsConfiguration(Culture.Hindi, options)))); + + RegisterModel( + Culture.Swedish, + (options) => new NumberModel( + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new SwedishNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Swedish, options))), + Swedish.NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Swedish, options, NumberMode.PureNumber)))); + + RegisterModel( + Culture.Swedish, + (options) => new OrdinalModel( + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new SwedishNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Swedish, options))), + Swedish.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Swedish, options)))); + + RegisterModel( + Culture.Swedish, + (options) => new PercentModel( + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new SwedishNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Swedish, options))), + new Swedish.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.Swedish, options)))); + + // RegisterModel( + // Culture.Swedish, + // (options) => new NumberRangeModel( + // new BaseNumberRangeParser(new SwedishNumberRangeParserConfiguration( + // new BaseNumberOptionsConfiguration(Culture.Swedish, options))), + // new Swedish.NumberRangeExtractor(new BaseNumberOptionsConfiguration(Culture.Swedish, options)))); + } + + protected override List GetRelatedTypes() + { + return new List() + { + typeof(BaseNumberExtractor), + typeof(BaseNumberRangeExtractor), + typeof(BasePercentageExtractor), + typeof(BaseMergedNumberExtractor), + typeof(BaseNumberParser), + typeof(BaseNumberParserConfiguration), + typeof(BaseNumberRangeParserConfiguration), + }; } private static List RecognizeByModel(Func getModelFunc, string query, NumberOptions options) diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/AgnosticNumberParserFactory.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/AgnosticNumberParserFactory.cs index 21faa202a4..b9b40f3899 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Parsers/AgnosticNumberParserFactory.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/AgnosticNumberParserFactory.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.Number { @@ -48,6 +51,7 @@ public static BaseNumberParser GetParser(AgnosticNumberParserType type, INumberP var isChinese = culture == Culture.Chinese; var isJapanese = culture == Culture.Japanese; var isKorean = culture == Culture.Korean; + var isHindi = culture == Culture.Hindi; BaseNumberParser parser; @@ -55,6 +59,10 @@ public static BaseNumberParser GetParser(AgnosticNumberParserType type, INumberP { parser = new BaseCJKNumberParser(languageConfiguration); } + else if (isHindi) + { + parser = new BaseIndianNumberParser(languageConfiguration); + } else { parser = new BaseNumberParser(languageConfiguration); @@ -78,7 +86,7 @@ public static BaseNumberParser GetParser(AgnosticNumberParserType type, INumberP parser.SupportedTypes = new List { Constants.SYS_NUM_ORDINAL }; break; case AgnosticNumberParserType.Percentage: - if ((!isChinese && !isJapanese) || isKorean) + if (!isChinese && !isJapanese && !isKorean) { parser = new BasePercentageParser(languageConfiguration); } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseCJKNumberParser.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseCJKNumberParser.cs index 205a7154fd..b2c35f7681 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseCJKNumberParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseCJKNumberParser.cs @@ -1,4 +1,8 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Globalization; using System.Linq; using System.Text; using System.Text.RegularExpressions; @@ -37,7 +41,7 @@ public override ParseResult Parse(ExtractResult extResult) Metadata = extResult.Metadata, }; - if (Config.CultureInfo.Name == "zh-CN") + if (Config.CultureInfo.Name.ToLowerInvariant() == Culture.Chinese) { getExtResult.Text = ReplaceTraWithSim(getExtResult.Text); } @@ -60,13 +64,13 @@ public override ParseResult Parse(ExtractResult extResult) ret.Value = -(double)ret.Value; } - ret.ResolutionStr = ret.Value.ToString(); + ret.ResolutionStr = ((double)ret.Value).ToString("G15", CultureInfo.InvariantCulture); } else if (extra.Contains("Pow")) { getExtResult.Text = NormalizeCharWidth(getExtResult.Text); ret = PowerNumberParse(getExtResult); - ret.ResolutionStr = ret.Value.ToString(); + ret.ResolutionStr = ((double)ret.Value).ToString("G15", CultureInfo.InvariantCulture); } else if (extra.Contains("Frac")) { @@ -99,6 +103,11 @@ public override ParseResult Parse(ExtractResult extResult) ret.Metadata.Offset = Config.RelativeReferenceOffsetMap[extResult.Text]; ret.Metadata.RelativeTo = Config.RelativeReferenceRelativeToMap[extResult.Text]; ret.Type = Constants.MODEL_ORDINAL_RELATIVE; + + // Add value for ordinal.relative + string sign = ret.Metadata.Offset[0].Equals('-') ? string.Empty : "+"; + ret.Value = string.Concat(ret.Metadata.RelativeTo, sign, ret.Metadata.Offset); + ret.ResolutionStr = GetResolutionStr(ret.Value); } else { @@ -110,6 +119,13 @@ public override ParseResult Parse(ExtractResult extResult) } } + // TODO: @Refactor this check to determine the subtype for JA and KO + if ((Config.CultureInfo.Name.ToLowerInvariant() == Culture.Japanese || Config.CultureInfo.Name.ToLowerInvariant() == Culture.Korean) && ret != null) + { + ret.Type = DetermineType(extResult, ret); + ret.Text = ret.Text.ToLowerInvariant(); + } + return ret; } @@ -127,15 +143,29 @@ protected ParseResult ParseFraction(ExtractResult extResult) var resultText = extResult.Text; var splitResult = Config.FracSplitRegex.Split(resultText); string intPart = string.Empty, demoPart = string.Empty, numPart = string.Empty; - if (splitResult.Length == 3) + + if (splitResult.Length == 4) + { + intPart = splitResult[0] + splitResult[1]; + demoPart = splitResult[2]; + numPart = splitResult[3]; + } + else if (splitResult.Length == 3) { intPart = splitResult[0]; demoPart = splitResult[1]; numPart = splitResult[2]; } + else if (splitResult.Length == 1) + { + // Needed to support "half" (eg. KO: 반, JA: 半) + intPart = Config.ZeroChar.ToString(CultureInfo.InvariantCulture); + demoPart = "2"; + numPart = "1"; + } else { - intPart = Config.ZeroChar.ToString(); + intPart = Config.ZeroChar.ToString(CultureInfo.InvariantCulture); demoPart = splitResult[0]; numPart = splitResult[1]; } @@ -146,7 +176,9 @@ protected ParseResult ParseFraction(ExtractResult extResult) var numValue = Config.DigitNumRegex.IsMatch(numPart) ? GetDigitValue(numPart, 1.0) - : GetIntValue(numPart); + : (Config.PointRegex.IsMatch(numPart) + ? GetIntValue(Config.PointRegex.Split(numPart)[0]) + GetPointValue(Config.PointRegex.Split(numPart)[1]) + : GetIntValue(numPart)); var demoValue = Config.DigitNumRegex.IsMatch(demoPart) ? GetDigitValue(demoPart, 1.0) @@ -161,7 +193,7 @@ protected ParseResult ParseFraction(ExtractResult extResult) result.Value = intValue + (numValue / demoValue); } - result.ResolutionStr = result.Value.ToString(); + result.ResolutionStr = ((double)result.Value).ToString("G15", CultureInfo.InvariantCulture); return result; } @@ -187,11 +219,11 @@ protected ParseResult ParsePercentage(ExtractResult extResult) if (resultText == "半額" || resultText == "半値" || resultText == "半折") { - result.Value = 50; + result.Value = 50d; } else if (resultText == "10成" || resultText == "10割" || resultText == "十割") { - result.Value = 100; + result.Value = 100d; } else { @@ -296,9 +328,9 @@ protected ParseResult ParsePercentage(ExtractResult extResult) doubleText = ReplaceUnit(doubleText); var splitResult = Config.PointRegex.Split(doubleText); - if (splitResult[0] == string.Empty) + if (string.IsNullOrEmpty(splitResult[0])) { - splitResult[0] = Config.ZeroChar.ToString(); + splitResult[0] = Config.ZeroChar.ToString(CultureInfo.InvariantCulture); } var doubleValue = GetIntValue(splitResult[0]); @@ -317,7 +349,30 @@ protected ParseResult ParsePercentage(ExtractResult extResult) result.Value = doubleValue; } - result.ResolutionStr = result.Value + @"%"; + if (Config.PercentageNumRegex != null) + { + var percentageNumSearch = Config.PercentageNumRegex.Match(resultText); + if (percentageNumSearch.Length != 0) + { + string demoPart = percentageNumSearch.Value; + var splitResult = Config.FracSplitRegex.Split(demoPart); + demoPart = splitResult[0]; + var demoValue = Config.DigitNumRegex.IsMatch(demoPart) + ? GetDigitValue(demoPart, 1.0) + : GetIntValue(demoPart); + + if (demoValue < 100 && demoValue > 0) + { + result.Value = (double)result.Value * (100 / demoValue); + } + else if (demoValue > 100) + { + result.Value = (double)result.Value / (demoValue / 100); + } + } + } + + result.ResolutionStr = ((double)result.Value).ToString("G15", CultureInfo.InvariantCulture) + @"%"; return result; } @@ -334,12 +389,12 @@ protected ParseResult ParseOrdinal(ExtractResult extResult) }; var resultText = extResult.Text; - resultText = resultText.Substring(1); result.Value = (Config.DigitNumRegex.IsMatch(resultText) && !Config.RoundNumberIntegerRegex.IsMatch(resultText)) ? GetDigitValue(resultText, 1) : GetIntValue(resultText); - result.ResolutionStr = result.Value.ToString(); + + result.ResolutionStr = ((double)result.Value).ToString("G15", CultureInfo.InvariantCulture); return result; } @@ -369,9 +424,9 @@ protected ParseResult ParseDouble(ExtractResult extResult) resultText = ReplaceUnit(resultText); var splitResult = Config.PointRegex.Split(resultText); - if (splitResult[0] == string.Empty) + if (string.IsNullOrEmpty(splitResult[0])) { - splitResult[0] = Config.ZeroChar.ToString(); + splitResult[0] = Config.ZeroChar.ToString(CultureInfo.InvariantCulture); } if (Config.NegativeNumberSignRegex.IsMatch(splitResult[0])) @@ -384,7 +439,7 @@ protected ParseResult ParseDouble(ExtractResult extResult) } } - result.ResolutionStr = result.Value.ToString(); + result.ResolutionStr = ((double)result.Value).ToString("G15", CultureInfo.InvariantCulture); return result; } @@ -441,7 +496,7 @@ private double GetDigitValue(string intStr, double power) return intValue; } - // Replace full digtal numbers with half digtal numbers. "4" and "4" are both legal in Japanese, replace "4" with "4", then deal with "4" + // Replace full digit numbers with half digit numbers. "4" and "4" are both legal in Japanese, replace "4" with "4", then deal with "4" private string NormalizeCharWidth(string text) { if (string.IsNullOrWhiteSpace(text)) @@ -476,6 +531,9 @@ private double GetIntValue(string intStr) var isRoundBefore = false; long roundBefore = -1, roundDefault = 1; var isNegative = false; + var hasPreviousDigits = false; + var hasRoundDirect = intStr.Any(c => Config.RoundDirectList.Contains(c)); + var hasRoundDirectOrZero = hasRoundDirect || intStr.Any(c => c == Config.ZeroChar); var isDozen = false; var isPair = false; @@ -483,11 +541,11 @@ private double GetIntValue(string intStr) if (Config.DozenRegex.IsMatch(intStr)) { isDozen = true; - if (Config.CultureInfo.Name == "zh-CN") + if (Config.CultureInfo.Name.ToLowerInvariant() == Culture.Chinese) { intStr = intStr.Substring(0, intStr.Length - 1); } - else if (Config.CultureInfo.Name == "ja-JP") + else if (Config.CultureInfo.Name.ToLowerInvariant() == Culture.Japanese) { intStr = intStr.Substring(0, intStr.Length - 3); } @@ -501,7 +559,7 @@ private double GetIntValue(string intStr) if (Config.NegativeNumberSignRegex.IsMatch(intStr)) { isNegative = true; - if (Config.CultureInfo.Name == "ko-KR") + if (Config.CultureInfo.Name.ToLowerInvariant() == Culture.Korean) { intStr = Regex.Replace(intStr, Config.NegativeNumberSignRegex.ToString(), string.Empty); } @@ -513,6 +571,11 @@ private double GetIntValue(string intStr) for (var i = 0; i < intStr.Length; i++) { + if (intStr[i] == Config.NonDecimalSeparatorChar) + { + continue; + } + if (Config.RoundNumberMapChar.ContainsKey(intStr[i])) { var roundRecent = Config.RoundNumberMapChar[intStr[i]]; @@ -546,6 +609,7 @@ private double GetIntValue(string intStr) } roundDefault = roundRecent / 10; + beforeValue = 1; } else if (Config.ZeroToNineMap.ContainsKey(intStr[i])) { @@ -559,22 +623,57 @@ private double GetIntValue(string intStr) } else { - beforeValue = Config.ZeroToNineMap[intStr[i]]; + double currentDigit = Config.ZeroToNineMap[intStr[i]]; + if (hasPreviousDigits) + { + beforeValue = (beforeValue * 10) + currentDigit; + } + else + { + beforeValue = currentDigit; + } + isRoundBefore = false; } } else { - if (i == intStr.Length - 1 && (Config.CultureInfo.Name == "ja-JP" || Config.CultureInfo.Name == "ko-KR")) + // In colloquial Chinese, 百 may be omitted from the end of a number, similarly to how 一 can be dropped + // from the beginning. Japanese doesn't have such behaviour. + if ((Config.CultureInfo.Name.ToLowerInvariant() == Culture.Japanese || Config.CultureInfo.Name.ToLowerInvariant() == Culture.Korean) || char.IsDigit(intStr[i])) { roundDefault = 1; } - partValue += Config.ZeroToNineMap[intStr[i]] * roundDefault; + double currentDigit = Config.ZeroToNineMap[intStr[i]]; + if (hasPreviousDigits) + { + beforeValue = (beforeValue * 10) + currentDigit; + } + else + { + beforeValue = currentDigit; + } + + partValue += beforeValue * roundDefault; intValue += partValue; partValue = 0; } } + + hasPreviousDigits = char.IsDigit(intStr[i]); + + // Japanese numbers in the form "一九九九" (1999) must be processed as digit numbers + if (Config.CultureInfo.Name.ToLowerInvariant() == Culture.Japanese && !hasPreviousDigits) + { + hasPreviousDigits = !hasRoundDirect && Config.ZeroToNineMap.ContainsKey(intStr[i]) && intStr[i] != Config.ZeroChar; + } + + if (Config.RoundDirectList.Contains(intStr[i])) + { + intValue += partValue; + partValue = 0; + } } if (isNegative) diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseIndianNumberParser.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseIndianNumberParser.cs new file mode 100644 index 0000000000..c71e32096b --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseIndianNumberParser.cs @@ -0,0 +1,560 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.Number +{ + public class BaseIndianNumberParser : BaseNumberParser + { + private readonly bool isCompoundNumberLanguage = false; + + public BaseIndianNumberParser(INumberParserConfiguration config) + : base(config) + { + this.Config = config as IIndianNumberParserConfiguration; + + this.isCompoundNumberLanguage = config.IsCompoundNumberLanguage; + + TextNumberRegex = BuildTextNumberRegex(); + } + + protected new IIndianNumberParserConfiguration Config { get; private set; } + + protected new Regex TextNumberRegex { get; } + + // Same behavior as base but accounts peculiarities in Indian languages Fractions + public override ParseResult FracLikeNumberParse(ExtractResult extResult) + { + var result = new ParseResult + { + Start = extResult.Start, + Length = extResult.Length, + Text = extResult.Text, + Type = extResult.Type, + }; + + var resultText = extResult.Text; + if (Config.FractionPrepositionRegex.IsMatch(resultText) && !Config.AdditionTermsRegex.IsMatch(resultText)) + { + // condition inncludes AdditionTermsRegex in combination with FractionPrepositionRegex + // to account for Behaviour changes of और - In fraction cases where और is used to connect two words and may not be used as addition of two words from its left and right. + // solve cases like: "तीन और एक का पाँचवाँ भाग" + var match = Config.FractionPrepositionRegex.Match(resultText); + var numerator = match.Groups["numerator"].Value; + var denominator = match.Groups["denominator"].Value; + + var smallValue = char.IsDigit(numerator[0]) ? + GetDigitalValue(numerator, 1) : + GetIntValue(Utilities.RegExpUtility.GetMatches(this.TextNumberRegex, numerator)); + + var bigValue = char.IsDigit(denominator[0]) ? + GetDigitalValue(denominator, 1) : + GetIntValue(Utilities.RegExpUtility.GetMatches(this.TextNumberRegex, denominator)); + + result.Value = smallValue / bigValue; + } + else if (Config.FractionPrepositionInverseRegex.IsMatch(resultText)) + { + // condition to use FractionPrepositionInverseRegex where denominator and nominator are switched to account for + // में से (out of) - These type of cases are very common in Hindi. It belongs to fraction unit type. Here any word/char + // at left of में से acts as denominator and right of it acts as numerator + var match = Config.FractionPrepositionInverseRegex.Match(resultText); + var numerator = match.Groups["numerator"].Value; + var denominator = match.Groups["denominator"].Value; + + var smallValue = char.IsDigit(numerator[0]) ? + GetDigitalValue(numerator, 1) : + GetIntValue(Utilities.RegExpUtility.GetMatches(this.TextNumberRegex, numerator)); + + var bigValue = char.IsDigit(denominator[0]) ? + GetDigitalValue(denominator, 1) : + GetIntValue(Utilities.RegExpUtility.GetMatches(this.TextNumberRegex, denominator)); + + result.Value = smallValue / bigValue; + } + else + { + var fracWords = Config.NormalizeTokenSet(resultText.Split(null), result).ToList(); + + // Split fraction with integer + var splitIndex = fracWords.Count - 1; + var currentValue = Config.ResolveCompositeNumber(fracWords[splitIndex]); + long roundValue = 1; + + // For case like "half" + if (fracWords.Count == 1) + { + result.Value = 1 / GetIntValue(fracWords); + return result; + } + + for (splitIndex = fracWords.Count - 2; splitIndex >= 0; splitIndex--) + { + if (Config.WrittenFractionSeparatorTexts.Contains(fracWords[splitIndex]) || + Config.WrittenIntegerSeparatorTexts.Contains(fracWords[splitIndex])) + { + continue; + } + + var previousValue = currentValue; + currentValue = Config.ResolveCompositeNumber(fracWords[splitIndex]); + + var hundredsSM = 100; + + // Previous : hundred + // Current : one + if ((previousValue >= hundredsSM && previousValue > currentValue) || + (previousValue < hundredsSM && IsComposable(currentValue, previousValue))) + { + if (previousValue < hundredsSM && currentValue >= roundValue) + { + roundValue = currentValue; + } + else if (previousValue < hundredsSM && currentValue < roundValue) + { + splitIndex++; + break; + } + + // Current is the first word + if (splitIndex == 0) + { + // Scan, skip the first word + splitIndex = 1; + while (splitIndex <= fracWords.Count - 2) + { + // e.g. one hundred thousand + // frac[i+1] % 100 && frac[i] % 100 = 0 + if (Config.ResolveCompositeNumber(fracWords[splitIndex]) >= hundredsSM && + !Config.WrittenFractionSeparatorTexts.Contains(fracWords[splitIndex + 1]) && + Config.ResolveCompositeNumber(fracWords[splitIndex + 1]) < hundredsSM) + { + splitIndex++; + break; + } + + splitIndex++; + } + + break; + } + + continue; + } + + splitIndex++; + break; + } + + if (splitIndex < 0) + { + splitIndex = 0; + } + + var fracPart = new List(); + for (var i = splitIndex; i < fracWords.Count; i++) + { + if (fracWords[i].Contains("-")) + { + var split = fracWords[i].Split('-'); + fracPart.Add(split[0]); + fracPart.Add("-"); + fracPart.Add(split[1]); + } + else + { + fracPart.Add(fracWords[i]); + } + } + + fracWords.RemoveRange(splitIndex, fracWords.Count - splitIndex); + + // Split mixed number with fraction + var denominator = GetIntValue(fracPart); + double numerValue = 0; + double intValue = 0; + + var mixedIndex = fracWords.Count; + for (var i = fracWords.Count - 1; i >= 0; i--) + { + if (i < fracWords.Count - 1 && Config.WrittenFractionSeparatorTexts.Contains(fracWords[i])) + { + var numerStr = string.Join(" ", fracWords.GetRange(i + 1, fracWords.Count - 1 - i)); + numerValue = GetIntValue(Utilities.RegExpUtility.GetMatches(this.TextNumberRegex, numerStr)); + mixedIndex = i + 1; + break; + } + } + + var intStr = string.Join(" ", fracWords.GetRange(0, mixedIndex)); + intValue = GetIntValue(Utilities.RegExpUtility.GetMatches(this.TextNumberRegex, intStr)); + + // Find mixed number + if (mixedIndex != fracWords.Count && numerValue < denominator) + { + result.Value = intValue + (numerValue / denominator); + } + else + { + result.Value = (intValue + numerValue) / denominator; + } + } + + return result; + } + + // Same behavior as base but accounts uses modified BuildTextNumberRegex + public override ParseResult TextNumberParse(ExtractResult extResult) + { + var result = new ParseResult + { + Start = extResult.Start, + Length = extResult.Length, + Text = extResult.Text, + Type = extResult.Type, + Metadata = extResult.Metadata, + }; + + var handle = extResult.Text; + + handle = Config.HalfADozenRegex.Replace(handle, Config.HalfADozenText); + + // Handling cases like "last", "next one", "previous one" + if ((this.Config.Config.Options & NumberOptions.SuppressExtendedTypes) == 0) + { + if (extResult.Metadata != null && extResult.Metadata.IsOrdinalRelative) + { + return result; + } + } + + var numGroup = handle.Split(Config.WrittenDecimalSeparatorTexts.ToArray(), StringSplitOptions.RemoveEmptyEntries); + + var intPart = numGroup[0]; + var stringMatch = TextNumberRegex.Match(intPart); + + // Store all match str. + var matchStrs = new List(); + + while (stringMatch.Success) + { + var matchStr = stringMatch.Groups[0].Value; + matchStrs.Add(matchStr); + stringMatch = stringMatch.NextMatch(); + } + + // Get the value recursively + var intPartRet = GetIntValue(matchStrs); + + double pointPartRet = 0; + if (numGroup.Length == 2) + { + var pointPart = numGroup[1]; + stringMatch = TextNumberRegex.Match(pointPart); + matchStrs.Clear(); + + while (stringMatch.Success) + { + var matchStr = stringMatch.Groups[0].Value; + matchStrs.Add(matchStr); + stringMatch = stringMatch.NextMatch(); + } + + pointPartRet += GetPointValue(matchStrs); + } + + result.Value = intPartRet + pointPartRet; + + return result; + } + + // Same behavior as base but accounts for Devenagari Numerals in parsing + public override double GetDigitalValue(string digitsStr, double power) + { + double temp = 0; + double scale = 10; + var decimalSeparatorFound = false; + var strLength = digitsStr.Length; + var isNegative = false; + + var isFrac = digitsStr.Contains('/'); + + var hasSingleSeparator = false; + + var calStack = new Stack(); + + for (var i = 0; i < digitsStr.Length; i++) + { + var ch = digitsStr[i]; + var prevCh = (i > 0) ? digitsStr[i - 1] : '\0'; + + var skippableNonDecimal = SkipNonDecimalSeparator(ch, strLength - i, i, hasSingleSeparator, prevCh, Config.NonDecimalSeparatorChar); + + if (!isFrac && (ch == ' ' || ch == Constants.NO_BREAK_SPACE || skippableNonDecimal)) + { + continue; + } + + if (ch == ' ' || ch == '/') + { + calStack.Push(temp); + temp = 0; + } + else if (ch >= '0' && ch <= '9') + { + if (decimalSeparatorFound) + { + temp += scale * (ch - '0'); + scale *= 0.1; + } + else + { + temp = (temp * scale) + (ch - '0'); + } + } + else if (ch == Config.DecimalSeparatorChar || (!skippableNonDecimal && ch == Config.NonDecimalSeparatorChar)) + { + decimalSeparatorFound = true; + scale = 0.1; + } + else if (ch == '-') + { + isNegative = true; + } + else if (Config.ZeroToNineMap.Any(x => x.Key == ch)) + { + // handle Devanagari numerals defined in ZeroToNineMap + if (char.IsDigit(ch)) + { + if (decimalSeparatorFound) + { + temp += Config.ZeroToNineMap[ch] * scale; + scale *= 0.1; + } + else + { + temp = (temp * scale) + Config.ZeroToNineMap[ch]; + } + } + } + } + + calStack.Push(temp); + + // If the number is a fraction. + double calResult = 0; + if (isFrac) + { + var denominator = calStack.Pop(); + var mole = calStack.Pop(); + calResult += mole / denominator; + } + + while (calStack.Any()) + { + calResult += calStack.Pop(); + } + + calResult *= power; + + if (isNegative) + { + return -calResult; + } + + return calResult; + } + + // Same behavior as base but accounts for regional Hindi cases like डेढ/सवा/ढाई + public override double GetIntValue(List matchStrs) + { + var isEnd = new bool[matchStrs.Count]; + for (var i = 0; i < isEnd.Length; i++) + { + isEnd[i] = false; + } + + double tempValue = 0; + long endFlag = 1; + + // Scan from end to start, find the end word + for (var i = matchStrs.Count - 1; i >= 0; i--) + { + var matchI = matchStrs[i].ToLowerInvariant(); + + if (RoundNumberSet.Contains(matchI)) + { + var mappedValue = Config.RoundNumberMap[matchI]; + + // If false, then continue. Will meet hundred first, then thousand. + if (endFlag > mappedValue) + { + continue; + } + + isEnd[i] = true; + endFlag = mappedValue; + } + } + + // If no multiplier found + if (endFlag == 1) + { + var tempStack = new Stack(); + var oldSym = string.Empty; + + foreach (var matchStr in matchStrs) + { + var isCardinal = Config.CardinalNumberMap.ContainsKey(matchStr); + var isOrdinal = Config.OrdinalNumberMap.ContainsKey(matchStr); + + if (isCardinal || isOrdinal) + { + var matchValue = isCardinal ? + Config.CardinalNumberMap[matchStr] : + Config.OrdinalNumberMap[matchStr]; + + // This is just for ordinal now. Not for fractions. + if (isOrdinal) + { + double fracPart = Config.OrdinalNumberMap[matchStr]; + + if (tempStack.Any()) + { + var intPart = tempStack.Pop(); + + // If intPart >= fracPart, it means it is an ordinal number + // it begins with an integer, ends with an ordinal + // e.g. ninety-ninth + if (intPart >= fracPart) + { + tempStack.Push(intPart + fracPart); + } + else + { + // Another case where the type is ordinal + // e.g. three hundredth + while (tempStack.Any()) + { + intPart = intPart + tempStack.Pop(); + } + + tempStack.Push(intPart * fracPart); + } + } + else + { + tempStack.Push(fracPart); + } + } + else if (Config.CardinalNumberMap.ContainsKey(matchStr)) + { + if (oldSym.Equals("-", StringComparison.Ordinal)) + { + var sum = tempStack.Pop() + matchValue; + tempStack.Push(sum); + } + else if (oldSym.Equals(Config.WrittenIntegerSeparatorTexts.First(), StringComparison.Ordinal) || tempStack.Count < 2) + { + tempStack.Push(matchValue); + } + else if (tempStack.Count >= 2) + { + var sum = tempStack.Pop() + matchValue; + sum = tempStack.Pop() + sum; + tempStack.Push(sum); + } + } + } + else + { + // Used to parse regional Hindi cases like डेढ/सवा/ढाई + // They are Indian Language specific cases and holds various meaning when prefixed with Number unit. + var complexVal = Config.ResolveUnitCompositeNumber(matchStr); + if (complexVal != 0) + { + tempStack.Push(complexVal); + } + + var complexValue = Config.ResolveCompositeNumber(matchStr); + if (complexValue != 0) + { + tempStack.Push(complexValue); + } + } + + oldSym = matchStr; + } + + foreach (var stackValue in tempStack) + { + tempValue += stackValue; + } + } + else + { + var lastIndex = 0; + double mulValue = 1; + double partValue = 1; + for (var i = 0; i < isEnd.Length; i++) + { + if (isEnd[i]) + { + mulValue = Config.RoundNumberMap[matchStrs[i]]; + partValue = 1; + + if (i != 0) + { + partValue = GetIntValue(matchStrs.GetRange(lastIndex, i - lastIndex)); + } + + tempValue += mulValue * partValue; + lastIndex = i + 1; + } + } + + // Calculate the part like "thirty-one" + mulValue = 1; + + if (lastIndex != isEnd.Length) + { + partValue = GetIntValue(matchStrs.GetRange(lastIndex, isEnd.Length - lastIndex)); + tempValue += mulValue * partValue; + } + } + + return tempValue; + } + + private Regex BuildTextNumberRegex() + { + // For Hindi, there is a need for another NumberMap of the type double to handle values like 1.5. + // As this cannot be included in either Cardinal or Ordinal NumberMap as they are of the type long, + // DecimalUnitsList (type double) takes care of these entries and it needs to be added to the singleIntFrac + // for extraction + var singleIntFrac = $"{this.Config.WordSeparatorToken}| -|" + + GetKeyRegex(this.Config.OrdinalNumberMap.Keys) + "|" + + GetKeyRegex(this.Config.CardinalNumberMap.Keys) + "|" + + GetKeyRegex(this.Config.DecimalUnitsMap.Keys); + + string textNumberPattern; + + // Checks for languages that use "compound numbers". I.e. written number parts are not separated by whitespaces or special characters (e.g., dreihundert in German). + if (isCompoundNumberLanguage) + { + textNumberPattern = @"(" + singleIntFrac + @")"; + } + else + { + // Default case, like in English. + textNumberPattern = @"(?<=\b)(" + singleIntFrac + @")(?=\b)"; + } + + return new Regex(textNumberPattern, RegexOptions.Singleline | RegexOptions.Compiled, RegexTimeOut); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseIndianNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseIndianNumberParserConfiguration.cs new file mode 100644 index 0000000000..30aa1cadea --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseIndianNumberParserConfiguration.cs @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.Number +{ + public class BaseIndianNumberParserConfiguration : BaseNumberParserConfiguration, IIndianNumberParserConfiguration + { + + public ImmutableDictionary ZeroToNineMap { get; set; } + + public ImmutableDictionary DecimalUnitsMap { get; set; } + + public Regex FractionPrepositionInverseRegex { get; set; } + + public Regex AdditionTermsRegex { get; set; } + + // Used to parse regional Hindi cases like डेढ/सवा/ढाई + // they are Indian language specific cases and holds various meaning when prefixed with Number units. + public virtual double ResolveUnitCompositeNumber(string numberStr) + { + if (this.DecimalUnitsMap.ContainsKey(numberStr)) + { + return this.DecimalUnitsMap[numberStr]; + } + + return 0; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberParser.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberParser.cs index 9f6b9e20aa..3164897f30 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberParser.cs @@ -1,7 +1,11 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Globalization; using System.Linq; +using System.Reflection; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Number @@ -9,10 +13,12 @@ namespace Microsoft.Recognizers.Text.Number public class BaseNumberParser : IParser { private static readonly Regex LongFormRegex = - new Regex(@"\d+", RegexOptions.Singleline); + new Regex(@"\d+", RegexOptions.Singleline | RegexOptions.Compiled, RegexTimeOut); private readonly bool isMultiDecimalSeparatorCulture = false; + private readonly bool isNonStandardSeparatorVariant = false; + private readonly bool isCompoundNumberLanguage = false; public BaseNumberParser(INumberParserConfiguration config) @@ -29,12 +35,16 @@ public BaseNumberParser(INumberParserConfiguration config) { RoundNumberSet.Add(roundNumber); } + + isNonStandardSeparatorVariant = Config.NonStandardSeparatorVariants.Contains(Config.CultureInfo.Name.ToLowerInvariant()); } internal IEnumerable SupportedTypes { get; set; } protected static Regex LongFormatRegex => LongFormRegex; + protected static TimeSpan RegexTimeOut => NumberRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + protected INumberParserConfiguration Config { get; private set; } protected Regex TextNumberRegex { get; } @@ -53,7 +63,7 @@ public virtual ParseResult Parse(ExtractResult extResult) if (!(extResult.Data is string extra)) { - extra = LongFormatRegex.Match(extResult.Text).Success ? Constants.NUMBER_SUFFIX : Config.LangMarker; + extra = LongFormatRegex.Match(extResult.Text).Success ? Constants.NUMBER_SUFFIX : Config.LanguageMarker; } // Resolve symbol prefix @@ -108,12 +118,12 @@ public virtual ParseResult Parse(ExtractResult extResult) { ret = DigitNumberParse(extResult); } - else if (extra.Contains($"{Constants.FRACTION_PREFIX}{Config.LangMarker}")) + else if (extra.Contains($"{Constants.FRACTION_PREFIX}{Config.LanguageMarker}")) { // Such fractions are special cases, parse via another method ret = FracLikeNumberParse(extResult); } - else if (extra.Contains(Config.LangMarker)) + else if (extra.Contains(Config.LanguageMarker)) { ret = TextNumberParse(extResult); } @@ -131,6 +141,7 @@ public virtual ParseResult Parse(ExtractResult extResult) } else if (ret?.Value != null) { + if (isNegative) { // Recover the original extracted Text @@ -168,20 +179,14 @@ public virtual ParseResult Parse(ExtractResult extResult) if (ret != null) { - ret.Type = DetermineType(extResult); + ret.Type = DetermineType(extResult, ret); ret.Text = ret.Text.ToLowerInvariant(); } return ret; } - protected static string GetKeyRegex(IEnumerable keyCollection) - { - var sortKeys = keyCollection.OrderByDescending(key => key.Length); - return string.Join("|", sortKeys); - } - - protected ParseResult PowerNumberParse(ExtractResult extResult) + public virtual ParseResult PowerNumberParse(ExtractResult extResult) { var result = new ParseResult { @@ -192,14 +197,17 @@ protected ParseResult PowerNumberParse(ExtractResult extResult) }; var handle = extResult.Text.ToUpperInvariant(); - var isE = !extResult.Text.Contains("^"); + + // Process cases like '1x10^6' as '1e6' + handle = handle.Replace("X10^", "E"); + var isE = !handle.Contains("^"); // [1] 1e10 // [2] 1.1^-23 var calStack = new Queue(); double scale = 10; - var dot = false; + var decimalSeparatorFound = false; var isNegative = false; double tmp = 0; for (var i = 0; i < handle.Length; i++) @@ -218,14 +226,14 @@ protected ParseResult PowerNumberParse(ExtractResult extResult) tmp = 0; scale = 10; - dot = false; + decimalSeparatorFound = false; isNegative = false; } else if (ch >= '0' && ch <= '9') { - if (dot) + if (decimalSeparatorFound) { - tmp = tmp + (scale * (ch - '0')); + tmp += scale * (ch - '0'); scale *= 0.1; } else @@ -235,7 +243,7 @@ protected ParseResult PowerNumberParse(ExtractResult extResult) } else if (ch == Config.DecimalSeparatorChar) { - dot = true; + decimalSeparatorFound = true; scale = 0.1; } else if (ch == '-') @@ -271,12 +279,12 @@ protected ParseResult PowerNumberParse(ExtractResult extResult) } result.Value = ret; - result.ResolutionStr = ret.ToString(CultureInfo.InvariantCulture); + result.ResolutionStr = ret.ToString("G15", CultureInfo.InvariantCulture); return result; } - protected ParseResult TextNumberParse(ExtractResult extResult) + public virtual ParseResult TextNumberParse(ExtractResult extResult) { var result = new ParseResult { @@ -340,7 +348,7 @@ protected ParseResult TextNumberParse(ExtractResult extResult) return result; } - protected ParseResult FracLikeNumberParse(ExtractResult extResult) + public virtual ParseResult FracLikeNumberParse(ExtractResult extResult) { var result = new ParseResult { @@ -358,94 +366,42 @@ protected ParseResult FracLikeNumberParse(ExtractResult extResult) var denominator = match.Groups["denominator"].Value; var smallValue = char.IsDigit(numerator[0]) ? - GetDigitalValue(numerator, 1) : - GetIntValue(Utilities.RegExpUtility.GetMatches(this.TextNumberRegex, numerator)); + GetDigitalValue(numerator, 1) : + GetIntValue(Utilities.RegExpUtility.GetMatches(this.TextNumberRegex, numerator)); var bigValue = char.IsDigit(denominator[0]) ? - GetDigitalValue(denominator, 1) : - GetIntValue(Utilities.RegExpUtility.GetMatches(this.TextNumberRegex, denominator)); + GetDigitalValue(denominator, 1) : + GetIntValue(Utilities.RegExpUtility.GetMatches(this.TextNumberRegex, denominator)); result.Value = smallValue / bigValue; } else { - var fracWords = Config.NormalizeTokenSet(resultText.Split(null), result).ToList(); - - // Split fraction with integer - var splitIndex = fracWords.Count - 1; - var currentValue = Config.ResolveCompositeNumber(fracWords[splitIndex]); - long roundValue = 1; - - // For case like "half" - if (fracWords.Count == 1) - { - result.Value = 1 / GetIntValue(fracWords); - return result; - } - - for (splitIndex = fracWords.Count - 2; splitIndex >= 0; splitIndex--) + var isFractionMultiplier = false; + long multiplier = 1; + if (Config.RoundMultiplierRegex != null) { - if (Config.WrittenFractionSeparatorTexts.Contains(fracWords[splitIndex]) || - Config.WrittenIntegerSeparatorTexts.Contains(fracWords[splitIndex])) + var match = Config.RoundMultiplierRegex.Match(resultText); + if (match.Success) { - continue; + resultText = resultText.Replace(match.Value, string.Empty); + multiplier = Config.RoundNumberMap[match.Groups["multiplier"].Value]; + isFractionMultiplier = match.Groups[Constants.FracMultiplierGroupName].Success ? true : false; } - - var previousValue = currentValue; - currentValue = Config.ResolveCompositeNumber(fracWords[splitIndex]); - - var hundredsSM = 100; - - // Previous : hundred - // Current : one - if ((previousValue >= hundredsSM && previousValue > currentValue) || - (previousValue < hundredsSM && IsComposable(currentValue, previousValue))) - { - if (previousValue < hundredsSM && currentValue >= roundValue) - { - roundValue = currentValue; - } - else if (previousValue < hundredsSM && currentValue < roundValue) - { - splitIndex++; - break; - } - - // Current is the first word - if (splitIndex == 0) - { - // Scan, skip the first word - splitIndex = 1; - while (splitIndex <= fracWords.Count - 2) - { - // e.g. one hundred thousand - // frac[i+1] % 100 && frac[i] % 100 = 0 - if (Config.ResolveCompositeNumber(fracWords[splitIndex]) >= hundredsSM && - !Config.WrittenFractionSeparatorTexts.Contains(fracWords[splitIndex + 1]) && - Config.ResolveCompositeNumber(fracWords[splitIndex + 1]) < hundredsSM) - { - splitIndex++; - break; - } - - splitIndex++; - } - - break; - } - - continue; - } - - splitIndex++; - break; } - if (splitIndex < 0) + var fracWords = Config.NormalizeTokenSet(resultText.Split(null), result).ToList(); + + // For case like "half" + if (fracWords.Count == 1) { - splitIndex = 0; + result.Value = (1 / GetIntValue(fracWords)) * multiplier; + return result; } + // Split fraction with integer + var splitIndex = this.GetSplitIndex(fracWords); + var fracPart = new List(); for (var i = splitIndex; i < fracWords.Count; i++) { @@ -487,11 +443,12 @@ protected ParseResult FracLikeNumberParse(ExtractResult extResult) // Find mixed number if (mixedIndex != fracWords.Count && numerValue < denominator) { - result.Value = intValue + (numerValue / denominator); + result.Value = isFractionMultiplier ? (intValue + (numerValue / denominator)) * multiplier : + intValue + (multiplier * numerValue / denominator); } else { - result.Value = (intValue + numerValue) / denominator; + result.Value = multiplier * (intValue + numerValue) / denominator; } } @@ -503,7 +460,7 @@ protected ParseResult FracLikeNumberParse(ExtractResult extResult) /// /// input arabic number. /// parsed result. - protected ParseResult DigitNumberParse(ExtractResult extResult) + public virtual ParseResult DigitNumberParse(ExtractResult extResult) { var result = new ParseResult { @@ -511,7 +468,7 @@ protected ParseResult DigitNumberParse(ExtractResult extResult) Length = extResult.Length, Text = extResult.Text, Type = extResult.Type, - Metadata = extResult.Metadata, + Metadata = extResult.Metadata != null ? extResult.Metadata : new Metadata(), }; // [1] 24 @@ -546,26 +503,96 @@ protected ParseResult DigitNumberParse(ExtractResult extResult) } // Scale used in calculating double - result.Value = GetDigitalValue(extText, power); + var value = GetDigitalValue(extText, power); + result.Value = value; + result.Metadata.TreatAsInteger = (value % 1) == 0; return result; } - protected double GetDigitalValue(string digitsStr, double power) + public virtual double GetDigitalValue(string digitsStr, double power) { double temp = 0; double scale = 10; - var decimalSeparator = false; - var strLength = digitsStr.Length; + var hasDecimalSeparator = false; var isNegative = false; + + var strLength = digitsStr.Length; var isFrac = digitsStr.Contains('/'); + // As some languages use different separators depending on variant, some pre-processing is required to allow for unified processing. + + // Default separators from general language config + var decimalSeparator = Config.DecimalSeparatorChar; + var nonDecimalSeparator = Config.NonDecimalSeparatorChar; + + var lastDecimalSeparator = -1; + var lastNonDecimalSeparator = -1; + var firstNonDecimalSeparator = int.MaxValue; + var hasSingleSeparator = false; + + if (Config.IsMultiDecimalSeparatorCulture) + { + + if (isNonStandardSeparatorVariant) + { + // Reverse separators + decimalSeparator = Config.NonDecimalSeparatorChar; + nonDecimalSeparator = Config.DecimalSeparatorChar; + } + + for (int i = 0; i < strLength; i++) + { + var ch = digitsStr[i]; + if (ch == decimalSeparator) + { + lastDecimalSeparator = i; + } + else if (ch == nonDecimalSeparator) + { + lastNonDecimalSeparator = i; + if (firstNonDecimalSeparator == int.MaxValue) + { + firstNonDecimalSeparator = i; + } + } + } + + if (((lastDecimalSeparator < 0 && lastNonDecimalSeparator >= 0) || (lastNonDecimalSeparator < 0 && lastDecimalSeparator >= 0)) && + firstNonDecimalSeparator == lastNonDecimalSeparator) + { + hasSingleSeparator = true; + } + else if ((lastDecimalSeparator < lastNonDecimalSeparator) && !(lastDecimalSeparator == -1 || lastNonDecimalSeparator == -1)) + { + // Switch separators + var aux = decimalSeparator; + decimalSeparator = nonDecimalSeparator; + nonDecimalSeparator = aux; + } + + } + + // Try to parse vulgar fraction chars + if (!isFrac && strLength == 1 && !char.IsDigit(digitsStr[0])) + { + double fracResult = char.GetNumericValue(digitsStr, 0); + + if (fracResult != -1.0) + { + return fracResult; + } + } + var calStack = new Stack(); - for (var i = 0; i < digitsStr.Length; i++) + for (var i = 0; i < strLength; i++) { var ch = digitsStr[i]; - var skippableNonDecimal = SkipNonDecimalSeparator(ch, strLength - i); + var prevCh = (i > 0) ? digitsStr[i - 1] : '\0'; + + var skippableNonDecimal = SkipNonDecimalSeparator(ch, strLength - i, i, hasSingleSeparator, prevCh, nonDecimalSeparator); + if (!isFrac && (ch == ' ' || ch == Constants.NO_BREAK_SPACE || skippableNonDecimal)) { continue; @@ -578,9 +605,9 @@ protected double GetDigitalValue(string digitsStr, double power) } else if (ch >= '0' && ch <= '9') { - if (decimalSeparator) + if (hasDecimalSeparator) { - temp = temp + (scale * (ch - '0')); + temp += scale * (ch - '0'); scale *= 0.1; } else @@ -588,9 +615,9 @@ protected double GetDigitalValue(string digitsStr, double power) temp = (temp * scale) + (ch - '0'); } } - else if (ch == Config.DecimalSeparatorChar || (!skippableNonDecimal && ch == Config.NonDecimalSeparatorChar)) + else if (ch == decimalSeparator || (!skippableNonDecimal && ch == nonDecimalSeparator)) { - decimalSeparator = true; + hasDecimalSeparator = true; scale = 0.1; } else if (ch == '-') @@ -625,79 +652,15 @@ protected double GetDigitalValue(string digitsStr, double power) return calResult; } - private static string DetermineType(ExtractResult er) + public virtual double GetIntValue(List matchStrs) { - if (!string.IsNullOrEmpty(er.Type) && er.Type.Contains(Constants.MODEL_ORDINAL)) - { - return er.Metadata.IsOrdinalRelative ? Constants.MODEL_ORDINAL_RELATIVE : Constants.MODEL_ORDINAL; - } - - var data = er.Data as string; - var subType = string.Empty; + var specialCase = Config.GetLangSpecificIntValue(matchStrs); - if (!string.IsNullOrEmpty(data)) + if (specialCase.isRelevant) { - if (data.StartsWith(Constants.FRACTION_PREFIX, StringComparison.Ordinal)) - { - subType = Constants.FRACTION; - } - else if (data.Contains(Constants.POWER_SUFFIX)) - { - subType = Constants.POWER; - } - else if (data.StartsWith(Constants.INTEGER_PREFIX, StringComparison.Ordinal)) - { - subType = Constants.INTEGER; - } - else if (data.StartsWith(Constants.DOUBLE_PREFIX, StringComparison.Ordinal)) - { - subType = Constants.DECIMAL; - } + return specialCase.value; } - return subType; - } - - private static bool IsMergeable(double former, double later) - { - // The former number is an order of magnitude larger than the later number, and they must be integers - return Math.Abs(former % 1) < double.Epsilon && Math.Abs(later % 1) < double.Epsilon && - former > later && former.ToString(CultureInfo.InvariantCulture).Length > later.ToString(CultureInfo.InvariantCulture).Length && later > 0; - } - - // Test if big and combine with small. - // e.g. "hundred" can combine with "thirty" but "twenty" can't combine with "thirty". - private static bool IsComposable(long big, long small) - { - var baseNumber = small > 10 ? 100 : 10; - - return big % baseNumber == 0 && big / baseNumber >= 1; - } - - private string GetResolutionStr(object value) - { - var resolutionStr = value.ToString(); - - if (Config.CultureInfo != null && value is double) - { - resolutionStr = ((double)value).ToString(Config.CultureInfo); - } - - return resolutionStr; - } - - // Special cases for multi-language countries where decimal separators can be used interchangeably. Mostly informally. - // Ex: South Africa, Namibia; Puerto Rico in ES; or in Canada for EN and FR. - // "me pidio $5.00 prestados" and "me pidio $5,00 prestados" -> currency $5 - private bool SkipNonDecimalSeparator(char ch, int distance) - { - const int decimalLength = 3; - - return ch == Config.NonDecimalSeparatorChar && !(distance <= decimalLength && isMultiDecimalSeparatorCulture); - } - - private double GetIntValue(List matchStrs) - { var isEnd = new bool[matchStrs.Count]; for (var i = 0; i < isEnd.Length; i++) { @@ -766,7 +729,7 @@ private double GetIntValue(List matchStrs) // e.g. three hundredth while (tempStack.Any()) { - intPart = intPart + tempStack.Pop(); + intPart += tempStack.Pop(); } tempStack.Push(intPart * fracPart); @@ -784,18 +747,22 @@ private double GetIntValue(List matchStrs) var sum = tempStack.Pop() + matchValue; tempStack.Push(sum); } - else if (oldSym.Equals(Config.WrittenIntegerSeparatorTexts.First(), StringComparison.Ordinal) || tempStack.Count() < 2) + else if (oldSym.Equals(Config.WrittenIntegerSeparatorTexts.First(), StringComparison.Ordinal) || tempStack.Count < 2) { tempStack.Push(matchValue); } - else if (tempStack.Count() >= 2) + else if (tempStack.Count >= 2) { var sum = tempStack.Pop() + matchValue; - sum = tempStack.Pop() + sum; + sum += tempStack.Pop(); tempStack.Push(sum); } } } + else if (int.TryParse(matchStr, out int digitValue)) + { + tempStack.Push(digitValue); + } else { var complexValue = Config.ResolveCompositeNumber(matchStr); @@ -818,6 +785,7 @@ private double GetIntValue(List matchStrs) var lastIndex = 0; double mulValue = 1; double partValue = 1; + for (var i = 0; i < isEnd.Length; i++) { if (isEnd[i]) @@ -848,16 +816,109 @@ private double GetIntValue(List matchStrs) return tempValue; } - private double GetPointValue(List matchStrs) + protected static string GetKeyRegex(IEnumerable keyCollection) + { + var sortKeys = keyCollection.OrderByDescending(key => key.Length); + return string.Join("|", sortKeys); + } + + protected static string DetermineType(ExtractResult er, ParseResult pr) + { + if (!string.IsNullOrEmpty(er.Type) && er.Type.Contains(Constants.MODEL_ORDINAL)) + { + return er.Metadata.IsOrdinalRelative ? Constants.MODEL_ORDINAL_RELATIVE : Constants.MODEL_ORDINAL; + } + + var data = er.Data as string; + var subType = string.Empty; + + if (!string.IsNullOrEmpty(data)) + { + if (data.StartsWith(Constants.FRACTION_PREFIX, StringComparison.Ordinal)) + { + subType = Constants.FRACTION; + } + else if (data.Contains(Constants.POWER_SUFFIX)) + { + subType = Constants.POWER; + } + else if (data.StartsWith(Constants.INTEGER_PREFIX, StringComparison.Ordinal)) + { + subType = (pr.Metadata == null || pr.Metadata.TreatAsInteger) ? Constants.INTEGER : Constants.DECIMAL; + } + else if (data.StartsWith(Constants.DOUBLE_PREFIX, StringComparison.Ordinal)) + { + subType = (pr.Metadata == null || !pr.Metadata.TreatAsInteger) ? Constants.DECIMAL : Constants.INTEGER; + } + } + + return subType; + } + + protected static bool IsMergeable(double former, double later) + { + // The former number is an order of magnitude larger than the later number, and they must be integers + return Math.Abs(former % 1) < double.Epsilon && Math.Abs(later % 1) < double.Epsilon && former > later && + former.ToString("G15", CultureInfo.InvariantCulture).Length > later.ToString("G15", CultureInfo.InvariantCulture).Length && + later > 0; + } + + // Test if big and combine with small. + // e.g. "hundred" can combine with "thirty" but "twenty" can't be combined with "thirty". + protected static bool IsComposable(long big, long small) + { + var baseNumber = small > 10 ? 100 : 10; + + return big % baseNumber == 0 && big / baseNumber >= 1; + } + + protected string GetResolutionStr(object value) + { + var resolutionStr = value.ToString(); + + if (Config.CultureInfo != null && value is double) + { + resolutionStr = ((double)value).ToString("G15", Config.CultureInfo); + } + + return resolutionStr; + } + + // Special cases for multi-language countries where decimal separators can be used interchangeably. Mostly informally. + // Ex: South Africa, Namibia; Puerto Rico in ES; or in Canada for EN and FR. + // "me pidio $5.00 prestados" and "me pidio $5,00 prestados" -> currency $5 + // "1.000" can be ambiguous and should return "1000" by default + // If only one separator and not three digits to the right, interpret as decimal separator + // "100.00" = "100,00" -> "100" + protected bool SkipNonDecimalSeparator(char ch, int distanceEnd, int distanceStart, bool hasSingleSeparator, char prevCh, char nonDecimalSeparator) + { + bool result = false; + + const int decimalLength = 1 + 3; + + if (ch == nonDecimalSeparator) + { + result = true; + + if (isMultiDecimalSeparatorCulture && hasSingleSeparator && + (distanceEnd != decimalLength || (prevCh == '0' && distanceStart == 1) || distanceStart > 3)) + { + result = false; + } + } + + return result; + } + + protected double GetPointValue(List matchStrs) { double ret = 0; var firstMatch = matchStrs.First(); if (Config.CardinalNumberMap.ContainsKey(firstMatch) && Config.CardinalNumberMap[firstMatch] >= 10) { - var prefix = "0."; var tempInt = GetIntValue(matchStrs); - var all = prefix + tempInt; + var all = $"0.{tempInt}"; ret = double.Parse(all, CultureInfo.InvariantCulture); } else @@ -873,11 +934,96 @@ private double GetPointValue(List matchStrs) return ret; } + /// + /// Get the split index for a fraction word list, split index used to separate the numerator and the denominator. + /// Ex: A fraction is "three fifth", it will be joined as a list which 1st item is "three" and 2nd item is "fifth", the split index is 1 (index of fifth). + /// Ex: A fraction is "two and fifty-four hundredths", the split index is 3 (index of hundredths). + /// + /// fraction words list. + /// split index. + private int GetSplitIndex(List fracWords) + { + var splitIndex = fracWords.Count - 1; + var currentValue = Config.ResolveCompositeNumber(fracWords[splitIndex]); + long roundValue = 1; + for (splitIndex = fracWords.Count - 2; splitIndex >= 0; splitIndex--) + { + if (Config.WrittenFractionSeparatorTexts.Contains(fracWords[splitIndex]) || + Config.WrittenIntegerSeparatorTexts.Contains(fracWords[splitIndex])) + { + continue; + } + + var previousValue = currentValue; + currentValue = Config.ResolveCompositeNumber(fracWords[splitIndex]); + + var hundredsSM = 100; + + // Below flag isUncomposobleWithSeparator is used to handle one scenario for handling fraction input like "two and fifty-four hundredths". + // Generally, when two numbers are not compsable, like "two" and "fifty-four", it will return the splitIndex as 1 (index of "fifty-four"). + // But in this scenario, there is a separator "and" between "two" and "fifty-four" which means that the "two" is integer part and "fifty-four hundredths" is the fraction part. + // The splitIndex should be 3 (index of "hundredths") then. + bool isUncomposobleWithSeparator = previousValue < hundredsSM && !IsComposable(currentValue, previousValue) && + Config.WrittenFractionSeparatorTexts.Contains(fracWords[splitIndex + 1]); + + // Previous : hundred + // Current : one + if ((previousValue >= hundredsSM && previousValue > currentValue) || + (previousValue < hundredsSM && IsComposable(currentValue, previousValue)) || isUncomposobleWithSeparator) + { + if (previousValue < hundredsSM && currentValue >= roundValue) + { + roundValue = currentValue; + } + else if (previousValue < hundredsSM && currentValue < roundValue) + { + splitIndex++; + break; + } + + // Current is the first word + if (splitIndex == 0) + { + // Scan, skip the first word + splitIndex = 1; + while (splitIndex <= fracWords.Count - 2) + { + // e.g. one hundred thousand + // frac[i+1] % 100 && frac[i] % 100 = 0 + if (Config.ResolveCompositeNumber(fracWords[splitIndex]) >= hundredsSM && + !Config.WrittenFractionSeparatorTexts.Contains(fracWords[splitIndex + 1]) && + Config.ResolveCompositeNumber(fracWords[splitIndex + 1]) < hundredsSM) + { + splitIndex++; + break; + } + + splitIndex++; + } + + break; + } + + continue; + } + + splitIndex++; + break; + } + + if (splitIndex < 0) + { + splitIndex = 0; + } + + return splitIndex; + } + private Regex BuildTextNumberRegex() { var singleIntFrac = $"{this.Config.WordSeparatorToken}| -|" + GetKeyRegex(this.Config.CardinalNumberMap.Keys) + "|" + - GetKeyRegex(this.Config.OrdinalNumberMap.Keys); + GetKeyRegex(this.Config.OrdinalNumberMap.Keys) + "|\\d+"; // @TODO consider remodeling the creation of this regex // For Italian, we invert the order of Cardinal and Ordinal in singleIntFrac in order to correctly extract @@ -887,12 +1033,13 @@ private Regex BuildTextNumberRegex() { singleIntFrac = $"{this.Config.WordSeparatorToken}| -|" + GetKeyRegex(this.Config.OrdinalNumberMap.Keys) + "|" + - GetKeyRegex(this.Config.CardinalNumberMap.Keys); + GetKeyRegex(this.Config.CardinalNumberMap.Keys) + "|\\d+"; } string textNumberPattern; - // Checks for languages that use "compound numbers". I.e. written number parts are not separated by whitespaces or special characters (e.g., dreihundert in German). + // Checks for languages that use "compound numbers". I.e. written number parts are not separated by whitespaces or + // special characters (e.g., dreihundert in German). if (isCompoundNumberLanguage) { textNumberPattern = @"(" + singleIntFrac + @")"; @@ -903,7 +1050,7 @@ private Regex BuildTextNumberRegex() textNumberPattern = @"(?<=\b)(" + singleIntFrac + @")(?=\b)"; } - return new Regex(textNumberPattern, RegexOptions.Singleline | RegexOptions.Compiled); + return new Regex(textNumberPattern, RegexOptions.Singleline | RegexOptions.Compiled, RegexTimeOut); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberRangeParser.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberRangeParser.cs index 4010a8ee23..587d05f7b5 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberRangeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberRangeParser.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Globalization; using System.Linq; @@ -70,8 +73,8 @@ private ParseResult ParseNumberRangeWhichHasTwoNum(ExtractResult extResult) endValue = nums[0]; } - var startValueStr = startValue.ToString(CultureInfo.InvariantCulture); - var endValueStr = endValue.ToString(CultureInfo.InvariantCulture); + var startValueStr = startValue.ToString("G15", CultureInfo.InvariantCulture); + var endValueStr = endValue.ToString("G15", CultureInfo.InvariantCulture); char leftBracket, rightBracket; var type = extResult.Data as string; @@ -163,6 +166,7 @@ private ParseResult ParseNumberRangeWhichHasOneNum(ExtractResult extResult) char leftBracket, rightBracket; string startValueStr = string.Empty, endValueStr = string.Empty; var type = extResult.Data as string; + if (type.Contains(NumberRangeConstants.MORE)) { rightBracket = NumberRangeConstants.RIGHT_OPEN; @@ -188,7 +192,7 @@ private ParseResult ParseNumberRangeWhichHasOneNum(ExtractResult extResult) leftBracket = NumberRangeConstants.LEFT_OPEN; } - startValueStr = num[0].ToString(CultureInfo.InvariantCulture); + startValueStr = num[0].ToString("G15", CultureInfo.InvariantCulture); result.Value = new Dictionary() { @@ -220,7 +224,7 @@ private ParseResult ParseNumberRangeWhichHasOneNum(ExtractResult extResult) rightBracket = NumberRangeConstants.RIGHT_OPEN; } - endValueStr = num[0].ToString(CultureInfo.InvariantCulture); + endValueStr = num[0].ToString("G15", CultureInfo.InvariantCulture); result.Value = new Dictionary() { @@ -232,7 +236,7 @@ private ParseResult ParseNumberRangeWhichHasOneNum(ExtractResult extResult) leftBracket = NumberRangeConstants.LEFT_CLOSED; rightBracket = NumberRangeConstants.RIGHT_CLOSED; - startValueStr = num[0].ToString(CultureInfo.InvariantCulture); + startValueStr = num[0].ToString("G15", CultureInfo.InvariantCulture); endValueStr = startValueStr; result.Value = new Dictionary() diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberRangeParserConfiguration.cs new file mode 100644 index 0000000000..09b18215e3 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberRangeParserConfiguration.cs @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Globalization; +using System.Reflection; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.Number +{ + public abstract class BaseNumberRangeParserConfiguration : INumberRangeParserConfiguration + { + public static TimeSpan RegexTimeOut => NumberRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + + public CultureInfo CultureInfo { get; set; } + + public IExtractor NumberExtractor { get; set; } + + public IExtractor OrdinalExtractor { get; set; } + + public IParser NumberParser { get; set; } + + public Regex MoreOrEqual { get; set; } + + public Regex LessOrEqual { get; set; } + + public Regex MoreOrEqualSuffix { get; set; } + + public Regex LessOrEqualSuffix { get; set; } + + public Regex MoreOrEqualSeparate { get; set; } + + public Regex LessOrEqualSeparate { get; set; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BasePercentageParser.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BasePercentageParser.cs index e0ead79be8..09e2e83bc7 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BasePercentageParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BasePercentageParser.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.Number { @@ -21,7 +25,7 @@ public override ParseResult Parse(ExtractResult extResult) { // for case like "2 out of 5". extResult.Text = $"{extendedData1[0].Item1} {Config.FractionMarkerToken} {extendedData1[1].Item1}"; - extResult.Data = $"Frac{Config.LangMarker}"; + extResult.Data = $"Frac{Config.LanguageMarker}"; ret = base.Parse(extResult); ret.Value = (double)ret.Value * 100; @@ -34,15 +38,21 @@ public override ParseResult Parse(ExtractResult extResult) ret = base.Parse(extResult); - if (extResult.Data.ToString().StartsWith("Frac")) + if (extResult.Data.ToString().StartsWith("Frac", StringComparison.Ordinal)) { ret.Value = (double)ret.Value * 100; } } - ret.ResolutionStr = Config.CultureInfo != null - ? ((double)ret.Value).ToString(Config.CultureInfo) + "%" - : ret.Value + "%"; + // @TODO make this uniform across cultures. + ret.ResolutionStr = Config.CultureInfo != null ? + ((double)ret.Value).ToString("G15", Config.CultureInfo) + "%" : + ret.Value + "%"; + } + else if (extResult.Data is null) + { + // for case where only symbol is present + ret = new ParseResult(extResult) { Value = "null", ResolutionStr = "null" }; } else { @@ -54,7 +64,7 @@ public override ParseResult Parse(ExtractResult extResult) if (!string.IsNullOrWhiteSpace(ret.ResolutionStr)) { - if (!ret.ResolutionStr.Trim().EndsWith("%")) + if (!ret.ResolutionStr.Trim().EndsWith("%", StringComparison.Ordinal)) { ret.ResolutionStr = ret.ResolutionStr.Trim() + "%"; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/ICJKNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/ICJKNumberParserConfiguration.cs index 0774ecccca..f8c69cf9f9 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Parsers/ICJKNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/ICJKNumberParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Number @@ -11,7 +14,8 @@ public interface ICJKNumberParserConfiguration : INumberParserConfiguration ImmutableDictionary FullToHalfMap { get; } - ImmutableDictionary UnitMap { get; } + // A UnitMap sorted by key length is required to ensure the correct replacement order. + ImmutableSortedDictionary UnitMap { get; } ImmutableDictionary TratoSimMap { get; } @@ -27,6 +31,8 @@ public interface ICJKNumberParserConfiguration : INumberParserConfiguration Regex PercentageRegex { get; } + Regex PercentageNumRegex { get; } + Regex PointRegex { get; } Regex DoubleAndRoundRegex { get; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/IIndianNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/IIndianNumberParserConfiguration.cs new file mode 100644 index 0000000000..9eff8c2b53 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/IIndianNumberParserConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.Number +{ + public interface IIndianNumberParserConfiguration : INumberParserConfiguration + { + // Map used for decimal values that are Hindi specific such as डेढ़, सवा and ढाई etc which + // loosely translates as "one and a half" "one and a quarter" "two and a half". + ImmutableDictionary DecimalUnitsMap { get; } + + ImmutableDictionary ZeroToNineMap { get; } + + Regex AdditionTermsRegex { get; } + + Regex FractionPrepositionInverseRegex { get; } + + // Used to parse regional Hindi cases like डेढ/सवा/ढाई which roughly translates to one and a half, one quarters, etc. + // these are Indian Language specific cases and hold various meaning when prefixed with Number units. + double ResolveUnitCompositeNumber(string numberStr); + + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/INumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/INumberParserConfiguration.cs index 0d805b424f..6048dd7398 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Parsers/INumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/INumberParserConfiguration.cs @@ -1,13 +1,20 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; using System.Linq; +using System.Reflection; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Number { public interface INumberParserConfiguration { + string LanguageMarker { get; } + ImmutableDictionary CardinalNumberMap { get; } ImmutableDictionary OrdinalNumberMap { get; } @@ -28,18 +35,22 @@ public interface INumberParserConfiguration Regex FractionPrepositionRegex { get; } + Regex RoundMultiplierRegex { get; } + string FractionMarkerToken { get; } Regex HalfADozenRegex { get; } string HalfADozenText { get; } - string LangMarker { get; } - char NonDecimalSeparatorChar { get; } char DecimalSeparatorChar { get; } + bool IsMultiDecimalSeparatorCulture { get; } + + IEnumerable NonStandardSeparatorVariants { get; } + string WordSeparatorToken { get; } IEnumerable WrittenDecimalSeparatorTexts { get; } @@ -54,8 +65,6 @@ public interface INumberParserConfiguration bool IsCompoundNumberLanguage { get; } - bool IsMultiDecimalSeparatorCulture { get; } - /// /// Used when requiring to normalize a token to a valid expression supported by the ImmutableDictionaries (language dictionaries). /// @@ -70,10 +79,21 @@ public interface INumberParserConfiguration /// composite number. /// value of the string. long ResolveCompositeNumber(string numberStr); + + /// + /// Used when requiring special processing for number value cases. + /// + /// matches. + /// value of the match. + (bool isRelevant, double value) GetLangSpecificIntValue(List matchStrs); + } public class BaseNumberParserConfiguration : INumberParserConfiguration { + + protected static readonly (bool, double) NotApplicable = (false, double.MinValue); + public ImmutableDictionary CardinalNumberMap { get; set; } public ImmutableDictionary OrdinalNumberMap { get; set; } @@ -94,18 +114,24 @@ public class BaseNumberParserConfiguration : INumberParserConfiguration public Regex FractionPrepositionRegex { get; set; } + public Regex RoundMultiplierRegex { get; set; } = null; + public string FractionMarkerToken { get; set; } public Regex HalfADozenRegex { get; set; } public string HalfADozenText { get; set; } - public string LangMarker { get; set; } + public string LanguageMarker { get; set; } public char NonDecimalSeparatorChar { get; set; } public char DecimalSeparatorChar { get; set; } + public bool IsMultiDecimalSeparatorCulture { get; set; } + + public virtual IEnumerable NonStandardSeparatorVariants => Enumerable.Empty(); + public string WordSeparatorToken { get; set; } public IEnumerable WrittenDecimalSeparatorTexts { get; set; } @@ -120,7 +146,7 @@ public class BaseNumberParserConfiguration : INumberParserConfiguration public bool IsCompoundNumberLanguage { get; set; } - public bool IsMultiDecimalSeparatorCulture { get; set; } + protected static TimeSpan RegexTimeOut => NumberRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); public virtual long ResolveCompositeNumber(string numberStr) { @@ -156,6 +182,11 @@ public virtual long ResolveCompositeNumber(string numberStr) return 0; } + public virtual (bool isRelevant, double value) GetLangSpecificIntValue(List matchStrs) + { + return NotApplicable; + } + public virtual IEnumerable NormalizeTokenSet(IEnumerable tokens, ParseResult context) { var fracWords = new List(); diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/INumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/INumberRangeParserConfiguration.cs index e1b78ebcc1..95f121d5b0 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Parsers/INumberRangeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/INumberRangeParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Globalization; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Number diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/CardinalExtractor.cs index 781328a560..7ae9c859de 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/CardinalExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -6,21 +9,27 @@ namespace Microsoft.Recognizers.Text.Number.Portuguese { - public class CardinalExtractor : BaseNumberExtractor // Same as Spanish. + public class CardinalExtractor : CachedNumberExtractor { private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private CardinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var builder = ImmutableDictionary.CreateBuilder(); // Add Integer Regexes - var intExtract = IntegerExtractor.GetInstance(placeholder); + var intExtract = IntegerExtractor.GetInstance(config); builder.AddRange(intExtract.Regexes); // Add Double Regexes - var douExtract = DoubleExtractor.GetInstance(placeholder); + var douExtract = DoubleExtractor.GetInstance(config); builder.AddRange(douExtract.Regexes); Regexes = builder.ToImmutable(); @@ -30,15 +39,23 @@ private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDef protected sealed override string ExtractType { get; } = Constants.SYS_NUM_CARDINAL; // "Cardinal"; - public static CardinalExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static CardinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new CardinalExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new CardinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/DoubleExtractor.cs index d244014f01..68e311495e 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/DoubleExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -15,44 +18,46 @@ public class DoubleExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private DoubleExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleDecimalPointRegex(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.PORTUGUESE) }, { - new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, }; @@ -64,15 +69,18 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau protected sealed override string ExtractType { get; } = Constants.SYS_NUM_DOUBLE; // "Double"; - public static DoubleExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static DoubleExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new DoubleExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new DoubleExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/FractionExtractor.cs index 2ffb3ff55b..9b881242a7 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/FractionExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -12,38 +15,38 @@ public class FractionExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor> Instances = - new ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor>(); + private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor> Instances = + new ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor>(); - private FractionExtractor(NumberMode mode, NumberOptions options) + private FractionExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - Options = options; var regexes = new Dictionary { { - new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.PORTUGUESE) }, { - new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.PORTUGUESE) }, }; // Not add FractionPrepositionRegex when the mode is Unit to avoid wrong recognize cases like "$1000 over 3" - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { regexes.Add( - new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.PORTUGUESE)); } @@ -52,20 +55,19 @@ private FractionExtractor(NumberMode mode, NumberOptions options) internal sealed override ImmutableDictionary Regexes { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; - public static FractionExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None, string placeholder = "") + public static FractionExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options, placeholder); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new FractionExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new FractionExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/IntegerExtractor.cs index 7b55926b7f..702f163f4b 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/IntegerExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -7,7 +10,7 @@ namespace Microsoft.Recognizers.Text.Number.Portuguese { - public class IntegerExtractor : BaseNumberExtractor + public class IntegerExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,48 +18,54 @@ public class IntegerExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private IntegerExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), + new Regex(NumbersDefinitions.NumbersWithPlaceHolder(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithDozen2Suffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithDozen2Suffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.PORTUGUESE) }, { - new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.PORTUGUESE) }, }; @@ -68,15 +77,23 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa protected sealed override string ExtractType { get; } = Constants.SYS_NUM_INTEGER; - public static IntegerExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static IntegerExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new IntegerExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new IntegerExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/NumberExtractor.cs index 3402564f81..9006b5621e 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/NumberExtractor.cs @@ -1,31 +1,39 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Portuguese; namespace Microsoft.Recognizers.Text.Number.Portuguese { - public class NumberExtractor : BaseNumberExtractor + public class NumberExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor> Instances = new ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor>(); - private NumberExtractor(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) + private readonly string keyPrefix; + + private NumberExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - Options = options; + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Mode + "_" + config.Culture); var builder = ImmutableDictionary.CreateBuilder(); // Add Cardinal CardinalExtractor cardExtract = null; - switch (mode) + switch (config.Mode) { case NumberMode.PureNumber: - cardExtract = CardinalExtractor.GetInstance(NumbersDefinitions.PlaceHolderPureNumber); + var purNumConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options, config.Mode, + NumbersDefinitions.PlaceHolderPureNumber); + cardExtract = CardinalExtractor.GetInstance(purNumConfig); break; case NumberMode.Currency: builder.Add( @@ -38,13 +46,13 @@ private NumberExtractor(NumberMode mode = NumberMode.Default, NumberOptions opti if (cardExtract == null) { - cardExtract = CardinalExtractor.GetInstance(); + cardExtract = CardinalExtractor.GetInstance(config); } builder.AddRange(cardExtract.Regexes); // Add Fraction - var fracExtract = FractionExtractor.GetInstance(mode, Options); + var fracExtract = FractionExtractor.GetInstance(config); builder.AddRange(fracExtract.Regexes); this.Regexes = builder.ToImmutable(); @@ -52,35 +60,41 @@ private NumberExtractor(NumberMode mode = NumberMode.Default, NumberOptions opti var ambiguityBuilder = ImmutableDictionary.CreateBuilder(); // Do not filter the ambiguous number cases like '$2000' in NumberWithUnit, otherwise they can't be resolved. - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) { - ambiguityBuilder.Add(new Regex(item.Key, RegexFlags), new Regex(item.Value, RegexFlags)); + ambiguityBuilder.Add(new Regex(item.Key, RegexFlags, RegexTimeOut), new Regex(item.Value, RegexFlags, RegexTimeOut)); } } AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); } + public sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM; // "Number"; - public static NumberExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) + public static NumberExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new NumberExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new NumberExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/NumberRangeExtractor.cs new file mode 100644 index 0000000000..9e1f9ca53d --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/NumberRangeExtractor.cs @@ -0,0 +1,95 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Portuguese; + +namespace Microsoft.Recognizers.Text.Number.Portuguese +{ + public class NumberRangeExtractor : BaseNumberRangeExtractor + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public NumberRangeExtractor(INumberOptionsConfiguration config) + : base(NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + new BaseNumberParser(new PortugueseNumberParserConfiguration(config)), + config) + { + + var regexes = new Dictionary() + { + { + // entre ...e ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUMBETWEEN + }, + { + // mais que ... menos que ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUM + }, + { + // menos que ... mais que ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUM + }, + { + // de ... a ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags, RegexTimeOut), + NumberRangeConstants.TWONUMTILL + }, + { + // mais/maior que ... + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1LB, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // 30 and/or greater/higher + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // less/smaller/lower than ... + new Regex(NumbersDefinitions.OneNumberRangeLessRegex1LB, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + { + // 30 e/ou mais/maior/maiores + new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + { + // igual a ... + new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags, RegexTimeOut), + NumberRangeConstants.EQUAL + }, + { + // igual a 30 ou mais, mais que 30 ou igual ... + new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut), + NumberRangeConstants.MORE + }, + { + // igual a 30 ou menos, menos que 30 ou igual ... + new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut), + NumberRangeConstants.LESS + }, + }; + + Regexes = regexes.ToImmutableDictionary(); + + AmbiguousFractionConnectorsRegex = + new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + internal sealed override Regex AmbiguousFractionConnectorsRegex { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUMRANGE; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/OrdinalExtractor.cs index 5fe31bd3c5..86daf272de 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/OrdinalExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -7,7 +10,7 @@ namespace Microsoft.Recognizers.Text.Number.Portuguese { - public class OrdinalExtractor : BaseNumberExtractor + public class OrdinalExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,16 +18,22 @@ public class OrdinalExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private OrdinalExtractor() + private readonly string keyPrefix; + + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.OrdinalEnglishRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalEnglishRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.PORTUGUESE) }, }; @@ -36,15 +45,22 @@ private OrdinalExtractor() protected sealed override string ExtractType { get; } = Constants.SYS_NUM_ORDINAL; // "Ordinal"; - public static OrdinalExtractor GetInstance(string placeholder = "") + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new OrdinalExtractor(); - Instances.TryAdd(placeholder, instance); + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/PercentageExtractor.cs index 6bd2423a83..a3b9b7e702 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/PercentageExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -8,10 +11,10 @@ namespace Microsoft.Recognizers.Text.Number.Portuguese { public sealed class PercentageExtractor : BasePercentageExtractor { - public PercentageExtractor(NumberOptions options = NumberOptions.None) - : base(NumberExtractor.GetInstance(options: options)) + public PercentageExtractor(BaseNumberOptionsConfiguration config) + : base(NumberExtractor.GetInstance(config)) { - Options = options; + Options = config.Options; Regexes = InitRegexes(); } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Parsers/PortugueseNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Parsers/PortugueseNumberParserConfiguration.cs index 1c4005a11a..fb12f586ec 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Parsers/PortugueseNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Parsers/PortugueseNumberParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; using System.Linq; @@ -18,7 +22,7 @@ public PortugueseNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; @@ -44,10 +48,11 @@ public PortugueseNumberParserConfiguration(INumberOptionsConfiguration config) this.RelativeReferenceRelativeToMap = NumbersDefinitions.RelativeReferenceRelativeToMap.ToImmutableDictionary(); this.RoundNumberMap = NumbersDefinitions.RoundNumberMap.ToImmutableDictionary(); - this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags); - this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags); - this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags); - this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags); + this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags, RegexTimeOut); + this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags, RegexTimeOut); + this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags, RegexTimeOut); + this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut); + this.RoundMultiplierRegex = new Regex(NumbersDefinitions.RoundMultiplierRegex, RegexFlags, RegexTimeOut); } public string NonDecimalSeparatorText { get; private set; } @@ -66,7 +71,7 @@ public override IEnumerable NormalizeTokenSet(IEnumerable tokens } // ends with 'avo' or 'ava' - if (NumbersDefinitions.WrittenFractionSuffix.Any(suffix => tempWord.EndsWith(suffix))) + if (NumbersDefinitions.WrittenFractionSuffix.Any(suffix => tempWord.EndsWith(suffix, StringComparison.Ordinal))) { var origTempWord = tempWord; var newLength = origTempWord.Length; @@ -96,6 +101,20 @@ public override IEnumerable NormalizeTokenSet(IEnumerable tokens result.Add(token); } + // The following piece of code is needed to compute the fraction pattern number+'e meio' + // e.g. 'cinco e meio' ('five and a half') where the numerator is omitted in Portuguese. + // It works by inserting the numerator 'um' ('a') in the list fracWords + // so that the pattern is correctly processed. + var resLen = result.Count; + if (resLen > 2) + { + if (result[resLen - 1] == NumbersDefinitions.OneHalfTokens[1] && result[resLen - 2] == NumbersDefinitions.WordSeparatorToken) + { + result[resLen - 2] = NumbersDefinitions.WrittenFractionSeparatorTexts[0]; + result.Insert(resLen - 1, NumbersDefinitions.OneHalfTokens[0]); + } + } + return result; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Parsers/PortugueseNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Parsers/PortugueseNumberRangeParserConfiguration.cs new file mode 100644 index 0000000000..a38f6e0632 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Parsers/PortugueseNumberRangeParserConfiguration.cs @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Portuguese; + +namespace Microsoft.Recognizers.Text.Number.Portuguese +{ + public class PortugueseNumberRangeParserConfiguration : BaseNumberRangeParserConfiguration + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public PortugueseNumberRangeParserConfiguration(INumberOptionsConfiguration config) + { + + CultureInfo = new CultureInfo(config.Culture); + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options); + + NumberExtractor = Portuguese.NumberExtractor.GetInstance(numConfig); + OrdinalExtractor = Portuguese.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new PortugueseNumberParserConfiguration(config)); + + MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags, RegexTimeOut); + LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags, RegexTimeOut); + MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags, RegexTimeOut); + LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags, RegexTimeOut); + MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut); + LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/Properties/AssemblyInfo.cs b/.NET/Microsoft.Recognizers.Text.Number/Properties/AssemblyInfo.cs index c1f4921312..e866d7ae7d 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Properties/AssemblyInfo.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Properties/AssemblyInfo.cs @@ -1,4 +1,7 @@ -using System.Reflection; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Reflection; using System.Runtime.InteropServices; // General Information about an assembly is controlled through the following diff --git a/.NET/Microsoft.Recognizers.Text.Number/RegexTagGenerator.cs b/.NET/Microsoft.Recognizers.Text.Number/RegexTagGenerator.cs index e02f28cba7..fe30c85058 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/RegexTagGenerator.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/RegexTagGenerator.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Number +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Number { public static class RegexTagGenerator { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/CardinalExtractor.cs index 2bfafb9715..f084c1cdf7 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/CardinalExtractor.cs @@ -1,26 +1,33 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions.Spanish; - namespace Microsoft.Recognizers.Text.Number.Spanish { - public class CardinalExtractor : BaseNumberExtractor + public class CardinalExtractor : CachedNumberExtractor { private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private CardinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var builder = ImmutableDictionary.CreateBuilder(); // Add Integer Regexes - var intExtract = IntegerExtractor.GetInstance(placeholder); + var intExtract = IntegerExtractor.GetInstance(config); builder.AddRange(intExtract.Regexes); // Add Double Regexes - var douExtract = DoubleExtractor.GetInstance(placeholder); + var douExtract = DoubleExtractor.GetInstance(config); builder.AddRange(douExtract.Regexes); this.Regexes = builder.ToImmutable(); @@ -30,15 +37,22 @@ private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDef protected sealed override string ExtractType { get; } = Constants.SYS_NUM_CARDINAL; // "Cardinal"; - public static CardinalExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static CardinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new CardinalExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new CardinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/DoubleExtractor.cs index 67d628243e..eb8850a3ad 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/DoubleExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -15,44 +18,46 @@ public class DoubleExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private DoubleExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleDecimalPointRegex(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.SPANISH) }, { - new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, }; @@ -64,15 +69,18 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau protected sealed override string ExtractType { get; } = Constants.SYS_NUM_DOUBLE; // "Double"; - public static DoubleExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static DoubleExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new DoubleExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new DoubleExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/FractionExtractor.cs index 3caba540fb..252158b7c8 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/FractionExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -12,38 +15,38 @@ public class FractionExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor> Instances = - new ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor>(); + private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor> Instances = + new ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor>(); - private FractionExtractor(NumberMode mode, NumberOptions options) + private FractionExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - Options = options; var regexes = new Dictionary { { - new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.SPANISH) }, { - new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.SPANISH) }, }; // Not add FractionPrepositionRegex when the mode is Unit to avoid wrong recognize cases like "$1000 over 3" - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { regexes.Add( - new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.SPANISH)); } @@ -52,20 +55,19 @@ private FractionExtractor(NumberMode mode, NumberOptions options) internal sealed override ImmutableDictionary Regexes { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; - public static FractionExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None, string placeholder = "") + public static FractionExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options, placeholder); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new FractionExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new FractionExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/IntegerExtractor.cs index 19ee186291..02530a94dc 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/IntegerExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -7,7 +10,7 @@ namespace Microsoft.Recognizers.Text.Number.Spanish { - public class IntegerExtractor : BaseNumberExtractor + public class IntegerExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,44 +18,50 @@ public class IntegerExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private IntegerExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), + new Regex(NumbersDefinitions.NumbersWithPlaceHolder(config.Placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.SPANISH) }, { - new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.SPANISH) }, }; @@ -64,15 +73,22 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa protected sealed override string ExtractType { get; } = Constants.SYS_NUM_INTEGER; - public static IntegerExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static IntegerExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new IntegerExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new IntegerExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberExtractor.cs index 775cac68ff..b3463ddb2d 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberExtractor.cs @@ -1,13 +1,15 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Spanish; namespace Microsoft.Recognizers.Text.Number.Spanish { - public class NumberExtractor : BaseNumberExtractor + public class NumberExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,20 +17,26 @@ public class NumberExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor> Instances = new ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor>(); - private NumberExtractor(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) + private readonly string keyPrefix; + + private NumberExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags); - Options = options; + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Mode + "_" + config.Culture); + + NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags, RegexTimeOut); var builder = ImmutableDictionary.CreateBuilder(); // Add Cardinal CardinalExtractor cardExtract = null; - switch (mode) + switch (config.Mode) { case NumberMode.PureNumber: - cardExtract = CardinalExtractor.GetInstance(NumbersDefinitions.PlaceHolderPureNumber); + var purNumConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options, config.Mode, + NumbersDefinitions.PlaceHolderPureNumber); + cardExtract = CardinalExtractor.GetInstance(purNumConfig); break; case NumberMode.Currency: builder.Add( @@ -41,13 +49,13 @@ private NumberExtractor(NumberMode mode = NumberMode.Default, NumberOptions opti if (cardExtract == null) { - cardExtract = CardinalExtractor.GetInstance(); + cardExtract = CardinalExtractor.GetInstance(config); } builder.AddRange(cardExtract.Regexes); // Add Fraction - var fracExtract = FractionExtractor.GetInstance(mode, Options); + var fracExtract = FractionExtractor.GetInstance(config); builder.AddRange(fracExtract.Regexes); Regexes = builder.ToImmutable(); @@ -55,11 +63,11 @@ private NumberExtractor(NumberMode mode = NumberMode.Default, NumberOptions opti var ambiguityBuilder = ImmutableDictionary.CreateBuilder(); // Do not filter the ambiguous number cases like '$2000' in NumberWithUnit, otherwise they can't be resolved. - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) { - ambiguityBuilder.Add(new Regex(item.Key, RegexFlags), new Regex(item.Value, RegexFlags)); + ambiguityBuilder.Add(new Regex(item.Key, RegexFlags, RegexTimeOut), new Regex(item.Value, RegexFlags, RegexTimeOut)); } } @@ -70,23 +78,27 @@ private NumberExtractor(NumberMode mode = NumberMode.Default, NumberOptions opti protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - // "Number" protected sealed override string ExtractType { get; } = Constants.SYS_NUM; protected sealed override Regex NegativeNumberTermsRegex { get; } - public static NumberExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) + public static NumberExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new NumberExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new NumberExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberRangeExtractor.cs index fe09b73d34..f18eea587e 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberRangeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberRangeExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -12,8 +15,8 @@ public class NumberRangeExtractor : BaseNumberRangeExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public NumberRangeExtractor(INumberOptionsConfiguration config) - : base(NumberExtractor.GetInstance(), - OrdinalExtractor.GetInstance(), + : base(NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), new BaseNumberParser(new SpanishNumberParserConfiguration(config)), config) { @@ -22,57 +25,57 @@ public NumberRangeExtractor(INumberOptionsConfiguration config) { { // entre ...y ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUMBETWEEN }, { // más que ... monos que ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUM }, { // monos que ... más que ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUM }, { // de ... a ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUMTILL }, { // más/mayor que ... - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1LB, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // 30 and/or greater/higher - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // less/smaller/lower than ... - new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex1LB, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, { // 30 y/o mas/más/mayor/mayores - new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, { // igual a ... - new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.EQUAL }, { // igual a 30 o más, más que 30 o igual ... - new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // igual a 30 o menos, menos que 30 o igual ... - new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, }; @@ -80,7 +83,7 @@ public NumberRangeExtractor(INumberOptionsConfiguration config) Regexes = regexes.ToImmutableDictionary(); AmbiguousFractionConnectorsRegex = - new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); + new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); } internal sealed override ImmutableDictionary Regexes { get; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/OrdinalExtractor.cs index f38ff4ae5c..00f3fa2eda 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/OrdinalExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -7,7 +10,7 @@ namespace Microsoft.Recognizers.Text.Number.Spanish { - public class OrdinalExtractor : BaseNumberExtractor + public class OrdinalExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,16 +18,22 @@ public class OrdinalExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private OrdinalExtractor() + private readonly string keyPrefix; + + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.OrdinalNounRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalNounRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.SPANISH) }, }; @@ -36,15 +45,22 @@ private OrdinalExtractor() protected sealed override string ExtractType { get; } = Constants.SYS_NUM_ORDINAL; // "Ordinal"; - public static OrdinalExtractor GetInstance(string placeholder = "") + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new OrdinalExtractor(); - Instances.TryAdd(placeholder, instance); + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/PercentageExtractor.cs index e26924cedc..fedeca121c 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/PercentageExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -8,10 +11,10 @@ namespace Microsoft.Recognizers.Text.Number.Spanish { public sealed class PercentageExtractor : BasePercentageExtractor { - public PercentageExtractor(NumberOptions options = NumberOptions.None) - : base(NumberExtractor.GetInstance(options: options)) + public PercentageExtractor(BaseNumberOptionsConfiguration config) + : base(NumberExtractor.GetInstance(config)) { - Options = options; + Options = config.Options; Regexes = InitRegexes(); } @@ -19,7 +22,7 @@ public PercentageExtractor(NumberOptions options = NumberOptions.None) protected override ImmutableHashSet InitRegexes() { - HashSet regexStrs = new HashSet + var regexStrs = new HashSet { NumbersDefinitions.NumberWithPrefixPercentage, }; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberParserConfiguration.cs index 24d1e6a482..70f19a0a8a 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberParserConfiguration.cs @@ -1,6 +1,11 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; +using System.Linq; using System.Text; using System.Text.RegularExpressions; @@ -17,7 +22,7 @@ public SpanishNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; @@ -44,14 +49,17 @@ public SpanishNumberParserConfiguration(INumberOptionsConfiguration config) this.RelativeReferenceRelativeToMap = NumbersDefinitions.RelativeReferenceRelativeToMap.ToImmutableDictionary(); this.RoundNumberMap = NumbersDefinitions.RoundNumberMap.ToImmutableDictionary(); - this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags); - this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags); - this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags); - this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags); + this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags, RegexTimeOut); + this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags, RegexTimeOut); + this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags, RegexTimeOut); + this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut); + this.RoundMultiplierRegex = new Regex(NumbersDefinitions.RoundMultiplierRegex, RegexFlags, RegexTimeOut); } public string NonDecimalSeparatorText { get; private set; } + public override IEnumerable NonStandardSeparatorVariants => NumbersDefinitions.NonStandardSeparatorVariants; + public override IEnumerable NormalizeTokenSet(IEnumerable tokens, ParseResult context) { var result = new List(); @@ -65,7 +73,7 @@ public override IEnumerable NormalizeTokenSet(IEnumerable tokens continue; } - if (tempWord.EndsWith("avo") || tempWord.EndsWith("ava")) + if (tempWord.EndsWith("avo", StringComparison.Ordinal) || tempWord.EndsWith("ava", StringComparison.Ordinal)) { var origTempWord = tempWord; var newLength = origTempWord.Length; @@ -89,6 +97,20 @@ public override IEnumerable NormalizeTokenSet(IEnumerable tokens result.Add(token); } + // The following piece of code is needed to compute the fraction pattern number+'y medio' + // e.g. 'cinco y medio' ('five and a half') where the numerator is omitted in Spanish. + // It works by inserting the numerator 'un' ('a') in the list fracWords + // so that the pattern is correctly processed. + var resLen = result.Count; + if (resLen > 2) + { + if (result[resLen - 1] == NumbersDefinitions.OneHalfTokens[1] && result[resLen - 2] == NumbersDefinitions.WordSeparatorToken) + { + result[resLen - 2] = NumbersDefinitions.WrittenFractionSeparatorTexts[0]; + result.Insert(resLen - 1, NumbersDefinitions.OneHalfTokens[0]); + } + } + return result; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberRangeParserConfiguration.cs index 365f782674..e7c78c479f 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberRangeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberRangeParserConfiguration.cs @@ -1,10 +1,13 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; namespace Microsoft.Recognizers.Text.Number.Spanish { - public class SpanishNumberRangeParserConfiguration : INumberRangeParserConfiguration + public class SpanishNumberRangeParserConfiguration : BaseNumberRangeParserConfiguration { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -14,37 +17,19 @@ public SpanishNumberRangeParserConfiguration(INumberOptionsConfiguration config) CultureInfo = new CultureInfo(config.Culture); - NumberExtractor = Spanish.NumberExtractor.GetInstance(); - OrdinalExtractor = Spanish.OrdinalExtractor.GetInstance(); + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options); + + NumberExtractor = Spanish.NumberExtractor.GetInstance(numConfig); + OrdinalExtractor = Spanish.OrdinalExtractor.GetInstance(numConfig); NumberParser = new BaseNumberParser(new SpanishNumberParserConfiguration(config)); - MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags); - LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags); - MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags); - LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags); - MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags); - LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags); + MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags, RegexTimeOut); + LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags, RegexTimeOut); + MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags, RegexTimeOut); + LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags, RegexTimeOut); + MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut); + LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut); } - - public CultureInfo CultureInfo { get; private set; } - - public IExtractor NumberExtractor { get; private set; } - - public IExtractor OrdinalExtractor { get; private set; } - - public IParser NumberParser { get; private set; } - - public Regex MoreOrEqual { get; private set; } - - public Regex LessOrEqual { get; private set; } - - public Regex MoreOrEqualSuffix { get; private set; } - - public Regex LessOrEqualSuffix { get; private set; } - - public Regex MoreOrEqualSeparate { get; private set; } - - public Regex LessOrEqualSeparate { get; private set; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/CardinalExtractor.cs new file mode 100644 index 0000000000..9adb86077c --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/CardinalExtractor.cs @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.Number.Swedish +{ + public class CardinalExtractor : CachedNumberExtractor + { + private static readonly ConcurrentDictionary Instances = + new ConcurrentDictionary(); + + private readonly string keyPrefix; + + private CardinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) + { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + + var builder = ImmutableDictionary.CreateBuilder(); + + // Add Integer Regexes + var intExtract = IntegerExtractor.GetInstance(config); + builder.AddRange(intExtract.Regexes); + + // Add Double Regexes + var douExtract = DoubleExtractor.GetInstance(config); + builder.AddRange(douExtract.Regexes); + + Regexes = builder.ToImmutable(); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUM_CARDINAL; // "Cardinal"; + + public static CardinalExtractor GetInstance(BaseNumberOptionsConfiguration config) + { + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) + { + var instance = new CardinalExtractor(config); + Instances.TryAdd(extractorKey, instance); + } + + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/DoubleExtractor.cs new file mode 100644 index 0000000000..14685fa4e6 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/DoubleExtractor.cs @@ -0,0 +1,89 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.Number.Swedish +{ + public class DoubleExtractor : BaseNumberExtractor + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly ConcurrentDictionary Instances = + new ConcurrentDictionary(); + + private DoubleExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) + { + var regexes = new Dictionary + { + { + new Regex(NumbersDefinitions.DoubleDecimalPointRegex(config.Placeholder), RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(config.Placeholder), RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.SWEDISH) + }, + { + new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) + }, + /* { + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, placeholder, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + },*/ + { + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, config.Placeholder, RegexFlags), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumBlankComma, config.Placeholder), + RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) + }, + }; + + Regexes = regexes.ToImmutableDictionary(); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + // "Double"; + protected sealed override string ExtractType { get; } = Constants.SYS_NUM_DOUBLE; + + public static DoubleExtractor GetInstance(BaseNumberOptionsConfiguration config) + { + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) + { + var instance = new DoubleExtractor(config); + Instances.TryAdd(extractorKey, instance); + } + + return Instances[extractorKey]; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/FractionExtractor.cs new file mode 100644 index 0000000000..0f82909031 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/FractionExtractor.cs @@ -0,0 +1,78 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.Number.Swedish +{ + public class FractionExtractor : BaseNumberExtractor + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor> Instances = + new ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor>(); + + private FractionExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) + { + + Options = config.Options; + + var regexes = new Dictionary + { + { + new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.FractionNounRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.SWEDISH) + }, + { + new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.SWEDISH) + }, + }; + + // Not add FractionPrepositionRegex when the mode is Unit to avoid wrong recognize cases like "$1000 over 3" + if (config.Mode != NumberMode.Unit) + { + regexes.Add( + new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.SWEDISH)); + } + + Regexes = regexes.ToImmutableDictionary(); + } + + public sealed override NumberOptions Options { get; } + + internal sealed override ImmutableDictionary Regexes { get; } + + // "Fraction"; + protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; + + public static FractionExtractor GetInstance(BaseNumberOptionsConfiguration config) + { + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) + { + var instance = new FractionExtractor(config); + Instances.TryAdd(extractorKey, instance); + } + + return Instances[extractorKey]; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/IntegerExtractor.cs new file mode 100644 index 0000000000..c7a0f33030 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/IntegerExtractor.cs @@ -0,0 +1,94 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.Number.Swedish +{ + public class IntegerExtractor : CachedNumberExtractor + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); + + private readonly string keyPrefix; + + private IntegerExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) + { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + + var regexes = new Dictionary + { + { + new Regex(NumbersDefinitions.NumbersWithPlaceHolder(config.Placeholder), RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.SWEDISH) + }, + { + new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags, RegexTimeOut), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.SWEDISH) + }, + /*{ + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, config.Placeholder, RegexFlags), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + },*/ + { + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, config.Placeholder, RegexFlags), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, config.Placeholder, RegexFlags), + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) + }, + }; + + Regexes = regexes.ToImmutableDictionary(); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUM_INTEGER; // "Integer"; + + public static IntegerExtractor GetInstance(BaseNumberOptionsConfiguration config) + { + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) + { + var instance = new IntegerExtractor(config); + Instances.TryAdd(extractorKey, instance); + } + + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); + } + + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/NumberExtractor.cs new file mode 100644 index 0000000000..e3911f498b --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/NumberExtractor.cs @@ -0,0 +1,110 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.Number.Swedish +{ + public class NumberExtractor : CachedNumberExtractor + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor> Instances = + new ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor>(); + + private readonly string keyPrefix; + + private NumberExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) + { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Mode + "_" + config.Culture); + + NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags, RegexTimeOut); + + AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags, RegexTimeOut); + + RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags, RegexTimeOut); + + var builder = ImmutableDictionary.CreateBuilder(); + + // Add Cardinal + CardinalExtractor cardExtract = null; + switch (config.Mode) + { + case NumberMode.PureNumber: + var purNumConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options, config.Mode, + NumbersDefinitions.PlaceHolderPureNumber); + cardExtract = CardinalExtractor.GetInstance(purNumConfig); + break; + case NumberMode.Currency: + builder.Add( + BaseNumberExtractor.CurrencyRegex, + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)); + break; + case NumberMode.Default: + break; + } + + if (cardExtract == null) + { + cardExtract = CardinalExtractor.GetInstance(config); + } + + builder.AddRange(cardExtract.Regexes); + + // Add Fraction + var fracExtract = FractionExtractor.GetInstance(config); + builder.AddRange(fracExtract.Regexes); + + Regexes = builder.ToImmutable(); + + var ambiguityBuilder = ImmutableDictionary.CreateBuilder(); + + // Do not filter the ambiguous number cases like '$2000' in NumberWithUnit, otherwise they can't be resolved. + if (config.Mode != NumberMode.Unit) + { + foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) + { + ambiguityBuilder.Add(new Regex(item.Key, RegexFlags, RegexTimeOut), new Regex(item.Value, RegexFlags, RegexTimeOut)); + } + } + + AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUM; // "Number"; + + protected sealed override Regex NegativeNumberTermsRegex { get; } + + protected sealed override Regex AmbiguousFractionConnectorsRegex { get; } + + protected sealed override Regex RelativeReferenceRegex { get; } + + public static NumberExtractor GetInstance(BaseNumberOptionsConfiguration config) + { + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) + { + var instance = new NumberExtractor(config); + Instances.TryAdd(extractorKey, instance); + } + + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/NumberRangeExtractor.cs new file mode 100644 index 0000000000..5509de3dd4 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/NumberRangeExtractor.cs @@ -0,0 +1,95 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.Number.Swedish +{ + public class NumberRangeExtractor : BaseNumberRangeExtractor + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public NumberRangeExtractor(INumberOptionsConfiguration config) + : base( + NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + new BaseNumberParser(new SwedishNumberParserConfiguration(config)), + config) + { + + var regexes = new Dictionary() + { + { + // between...and... + new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags), + NumberRangeConstants.TWONUMBETWEEN + }, + { + // more than ... less than ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags), + NumberRangeConstants.TWONUM + }, + { + // less than ... more than ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags), + NumberRangeConstants.TWONUM + }, + { + // from ... to/~/- ... + new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags), + NumberRangeConstants.TWONUMTILL + }, + { + // more/greater/higher than ... + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags), + NumberRangeConstants.MORE + }, + { + // 30 and/or greater/higher + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags), + NumberRangeConstants.MORE + }, + { + // less/smaller/lower than ... + new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags), + NumberRangeConstants.LESS + }, + { + // 30 and/or less/smaller/lower + new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags), + NumberRangeConstants.LESS + }, + { + // equal to ... + new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags), + NumberRangeConstants.EQUAL + }, + { + // equal to 30 or more than, larger than 30 or equal to ... + new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags), + NumberRangeConstants.MORE + }, + { + // equal to 30 or less, smaller than 30 or equal ... + new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags), + NumberRangeConstants.LESS + }, + }; + + Regexes = regexes.ToImmutableDictionary(); + + AmbiguousFractionConnectorsRegex = + new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + internal sealed override Regex AmbiguousFractionConnectorsRegex { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUMRANGE; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/OrdinalExtractor.cs new file mode 100644 index 0000000000..adb488303d --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/OrdinalExtractor.cs @@ -0,0 +1,83 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.Number.Swedish +{ + public class OrdinalExtractor : CachedNumberExtractor + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly ConcurrentDictionary Instances = + new ConcurrentDictionary(); + + private readonly string keyPrefix; + + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) + { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); + + AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); + + RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags); + + var regexes = new Dictionary + { + { + new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), + RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.OrdinalNumericRegex, RegexFlags), + RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) + }, + { + new Regex(NumbersDefinitions.OrdinalSwedishRegex, RegexFlags), + RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.SWEDISH) + }, + { + new Regex(NumbersDefinitions.OrdinalRoundNumberRegex, RegexFlags), + RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.SWEDISH) + }, + }; + + Regexes = regexes.ToImmutableDictionary(); + } + + internal sealed override ImmutableDictionary Regexes { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_NUM_ORDINAL; // "Ordinal"; + + protected sealed override Regex AmbiguousFractionConnectorsRegex { get; } + + protected sealed override Regex RelativeReferenceRegex { get; } + + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) + { + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) + { + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); + } + + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); + } + + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/PercentageExtractor.cs new file mode 100644 index 0000000000..c11d79f97c --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/PercentageExtractor.cs @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.Number.Swedish +{ + public sealed class PercentageExtractor : BasePercentageExtractor + { + public PercentageExtractor(BaseNumberOptionsConfiguration config) + : base(NumberExtractor.GetInstance(config)) + { + Options = config.Options; + Regexes = InitRegexes(); + } + + protected override NumberOptions Options { get; } + + protected override ImmutableHashSet InitRegexes() + { + HashSet regexStrs = new HashSet + { + NumbersDefinitions.NumberWithSuffixPercentage, + NumbersDefinitions.NumberWithPrefixPercentage, + }; + + if ((Options & NumberOptions.PercentageMode) != 0) + { + regexStrs.Add(NumbersDefinitions.FractionNumberWithSuffixPercentage); + regexStrs.Add(NumbersDefinitions.NumberWithPrepositionPercentage); + } + + return BuildRegexes(regexStrs); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Parsers/SwedishNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Parsers/SwedishNumberParserConfiguration.cs new file mode 100644 index 0000000000..3f3d0e79f7 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Parsers/SwedishNumberParserConfiguration.cs @@ -0,0 +1,112 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Globalization; +using System.Linq; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.Number.Swedish +{ + public class SwedishNumberParserConfiguration : BaseNumberParserConfiguration + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public SwedishNumberParserConfiguration(INumberOptionsConfiguration config) + { + + this.Config = config; + this.LanguageMarker = NumbersDefinitions.LangMarker; + this.CultureInfo = new CultureInfo(config.Culture); + this.CultureInfo.NumberFormat.NegativeSign = "-"; + + this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; + this.IsMultiDecimalSeparatorCulture = NumbersDefinitions.MultiDecimalSeparatorCulture; + + this.DecimalSeparatorChar = NumbersDefinitions.DecimalSeparatorChar; + this.FractionMarkerToken = NumbersDefinitions.FractionMarkerToken; + this.NonDecimalSeparatorChar = NumbersDefinitions.NonDecimalSeparatorChar; + this.HalfADozenText = NumbersDefinitions.HalfADozenText; + this.WordSeparatorToken = NumbersDefinitions.WordSeparatorToken; + + this.WrittenDecimalSeparatorTexts = NumbersDefinitions.WrittenDecimalSeparatorTexts; + this.WrittenGroupSeparatorTexts = NumbersDefinitions.WrittenGroupSeparatorTexts; + this.WrittenIntegerSeparatorTexts = NumbersDefinitions.WrittenIntegerSeparatorTexts; + this.WrittenFractionSeparatorTexts = NumbersDefinitions.WrittenFractionSeparatorTexts; + + this.CardinalNumberMap = NumbersDefinitions.CardinalNumberMap.ToImmutableDictionary(); + this.OrdinalNumberMap = NumbersDefinitions.OrdinalNumberMap.ToImmutableDictionary(); + this.RelativeReferenceOffsetMap = NumbersDefinitions.RelativeReferenceOffsetMap.ToImmutableDictionary(); + this.RelativeReferenceRelativeToMap = NumbersDefinitions.RelativeReferenceRelativeToMap.ToImmutableDictionary(); + this.RoundNumberMap = NumbersDefinitions.RoundNumberMap.ToImmutableDictionary(); + + this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags, RegexTimeOut); + this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags, RegexTimeOut); + this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags, RegexTimeOut); + this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut); + + SwedishWrittenFractionLookupMap = NumbersDefinitions.SwedishWrittenFractionLookupMap.ToImmutableDictionary(); + } + + public string NonDecimalSeparatorText { get; private set; } + + private static ImmutableDictionary SwedishWrittenFractionLookupMap { get; set; } + + public override IEnumerable NormalizeTokenSet(IEnumerable tokens, ParseResult context) + { + var fracWords = new List(); + var tokenList = tokens.ToList(); + var tokenLen = tokenList.Count; + + for (var i = 0; i < tokenLen; i++) + { + if ((i < tokenLen - 2) && tokenList[i + 1] == "-") + { + fracWords.Add(tokenList[i] + tokenList[i + 1] + tokenList[i + 2]); + i += 2; + } + else + { + fracWords.Add(tokenList[i]); + } + } + + return fracWords; + } + + public override long ResolveCompositeNumber(string numberStr) + { + // Swedish Ordinals can't be used for denoting fractions as in other languages, e.g. English. + // The default method uses the OrdinalNumberMap map to find a fraction expression. + // When parsing swedish fractions, such as "en tjugoförstedel" (1/21) this method + // fails to find the corresponding Ordinal since this doesn't exists in the OrdinalNumberMap. + var resolvedNumber = base.ResolveCompositeNumber(numberStr); + + // So, if resolvedNumber == 0 we test for fractions and thus choose to + // use the fallback swedishWrittenFractionLookupMap map to try to + // find the corresponding value. + if (resolvedNumber == 0) + { + // The swedishWrittenFractionLookupMap map contains the leading parts of all + // tenths fractions, e.g. + // 21: "tjugoförst" -> "tjugoförst(a|e)del(s|ar(na)?s?)" + // 26: "tjugosjätted" -> "tjugosjätted(el(s|ar(na)?s?)" + var tempResult = SwedishWrittenFractionLookupMap.FirstOrDefault(k => + { + // Try to find an entry in the map matching the start of numberStr + // E.g. "tjugoförstedel" starts w/ "tjugoförst" -> return 21 + return numberStr.StartsWith(k.Key, StringComparison.OrdinalIgnoreCase); + }); + + resolvedNumber = tempResult.Value; + } + + return resolvedNumber; + + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Parsers/SwedishNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Parsers/SwedishNumberRangeParserConfiguration.cs new file mode 100644 index 0000000000..8fa0aa8cb7 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Parsers/SwedishNumberRangeParserConfiguration.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.Number.Swedish +{ + public class SwedishNumberRangeParserConfiguration : BaseNumberRangeParserConfiguration + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + public SwedishNumberRangeParserConfiguration(INumberOptionsConfiguration config) + { + CultureInfo = new CultureInfo(config.Culture); + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options); + + NumberExtractor = Swedish.NumberExtractor.GetInstance(numConfig); + OrdinalExtractor = Swedish.OrdinalExtractor.GetInstance(numConfig); + NumberParser = new BaseNumberParser(new SwedishNumberParserConfiguration(config)); + + MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags); + LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags); + MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags); + LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags); + MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags); + LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/CardinalExtractor.cs index 248f1604a7..f9c1946a37 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/CardinalExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/DoubleExtractor.cs index b2a2101caf..1b49b40218 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/DoubleExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -19,31 +22,31 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau var regexes = new Dictionary { { - new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithMultiplierRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags), + new Regex(NumbersDefinitions.DoubleWithRoundNumber, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleAllFloatRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.TURKISH) }, { - new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.DoubleCaretExponentialNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/FractionExtractor.cs index 092c6d81a6..267a2f696f 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/FractionExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -21,15 +24,15 @@ private FractionExtractor(NumberMode mode, NumberOptions options) var regexes = new Dictionary { { - new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationWithSpacesRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNotationRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionNounWithArticleRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.TURKISH) }, }; @@ -38,16 +41,16 @@ private FractionExtractor(NumberMode mode, NumberOptions options) if (mode != NumberMode.Unit) { regexes.Add( - new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags), + new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.FRACTION_PREFIX, Constants.TURKISH)); } Regexes = regexes.ToImmutableDictionary(); } - internal sealed override ImmutableDictionary Regexes { get; } + public sealed override NumberOptions Options { get; } - protected sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/IntegerExtractor.cs index 84541b4210..462335e0f1 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/IntegerExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -19,31 +22,31 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa var regexes = new Dictionary { { - new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), + new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags), + new Regex(NumbersDefinitions.NumbersWithDozenSuffix, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.TURKISH) }, { - new Regex(NumbersDefinitions.NegativeAllIntRegexWithLocks, RegexFlags), + new Regex(NumbersDefinitions.NegativeAllIntRegexWithLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.TURKISH) }, { - new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags), + new Regex(NumbersDefinitions.AllIntRegexWithDozenSuffixLocks, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.TURKISH) }, { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/MergedNumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/MergedNumberExtractor.cs index e7b96c7566..45972d4772 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/MergedNumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/MergedNumberExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; @@ -16,9 +19,9 @@ public MergedNumberExtractor(NumberMode mode, NumberOptions options) { NumberExtractor = Turkish.NumberExtractor.GetInstance(mode, options); RoundNumberIntegerRegexWithLocks = - new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags); + new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags, RegexTimeOut); ConnectorRegex = - new Regex(NumbersDefinitions.ConnectorRegex, RegexFlags); + new Regex(NumbersDefinitions.ConnectorRegex, RegexFlags, RegexTimeOut); } public sealed override BaseNumberExtractor NumberExtractor { get; set; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberExtractor.cs index 776d74c627..3577ce8226 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -16,9 +19,9 @@ public class NumberExtractor : BaseNumberExtractor private NumberExtractor(NumberMode mode, NumberOptions options) { - NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags); + NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags, RegexTimeOut); - RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags); + RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags, RegexTimeOut); Options = options; @@ -62,19 +65,19 @@ private NumberExtractor(NumberMode mode, NumberOptions options) { foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) { - ambiguityBuilder.Add(new Regex(item.Key, RegexFlags), new Regex(item.Value, RegexFlags)); + ambiguityBuilder.Add(new Regex(item.Key, RegexFlags, RegexTimeOut), new Regex(item.Value, RegexFlags, RegexTimeOut)); } } AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); } + public sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM; // "Number"; protected sealed override Regex NegativeNumberTermsRegex { get; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberRangeExtractor.cs index 20df3d8db8..62537cdb7f 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberRangeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberRangeExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -11,8 +14,9 @@ public class NumberRangeExtractor : BaseNumberRangeExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public NumberRangeExtractor(INumberOptionsConfiguration config) - : base(NumberExtractor.GetInstance(), - OrdinalExtractor.GetInstance(), + : base( + NumberExtractor.GetInstance(), + OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), new BaseNumberParser(new TurkishNumberParserConfiguration(config)), config) { @@ -21,57 +25,57 @@ public NumberRangeExtractor(INumberOptionsConfiguration config) { { // between...and... - new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUMBETWEEN }, { // more than ... less than ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUM }, { // less than ... more than ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex3, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUM }, { // from ... to/~/- ... - new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags), + new Regex(NumbersDefinitions.TwoNumberRangeRegex4, RegexFlags, RegexTimeOut), NumberRangeConstants.TWONUMTILL }, { // more/greater/higher than ... - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // 30 and/or greater/higher - new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // less/smaller/lower than ... - new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex1, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, { // 30 and/or less/smaller/lower - new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessRegex2, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, { // equal to ... - new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeEqualRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.EQUAL }, { // equal to 30 or more than, larger than 30 or equal to ... - new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.MORE }, { // equal to 30 or less, smaller than 30 or equal ... - new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags), + new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut), NumberRangeConstants.LESS }, }; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/OrdinalExtractor.cs index 54e8bafe7d..ae6e1cfa8c 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/OrdinalExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Concurrent; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; @@ -14,27 +17,27 @@ public class OrdinalExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private OrdinalExtractor(NumberOptions options) - : base(options) + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags); + RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags, RegexTimeOut); var regexes = new Dictionary { { - new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalSuffixRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.OrdinalNumericRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalNumericRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.OrdinalTurkishRegex, RegexFlags), + new Regex(NumbersDefinitions.OrdinalTurkishRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.TURKISH) }, { - new Regex(NumbersDefinitions.RoundNumberOrdinalRegex, RegexFlags), + new Regex(NumbersDefinitions.RoundNumberOrdinalRegex, RegexFlags, RegexTimeOut), RegexTagGenerator.GenerateRegexTag(Constants.ORDINAL_PREFIX, Constants.TURKISH) }, }; @@ -50,16 +53,17 @@ private OrdinalExtractor(NumberOptions options) protected sealed override Regex RelativeReferenceRegex { get; } - public static OrdinalExtractor GetInstance(NumberOptions options = NumberOptions.None) + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = options.ToString(); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new OrdinalExtractor(options); - Instances.TryAdd(cacheKey, instance); + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/PercentageExtractor.cs index 4f5596e014..b9d8aea544 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/PercentageExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberParserConfiguration.cs index 0de8c5daee..3907888977 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberParserConfiguration.cs @@ -1,5 +1,10 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; @@ -14,7 +19,7 @@ public TurkishNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; @@ -36,12 +41,47 @@ public TurkishNumberParserConfiguration(INumberOptionsConfiguration config) this.RelativeReferenceOffsetMap = NumbersDefinitions.RelativeReferenceOffsetMap.ToImmutableDictionary(); this.RelativeReferenceRelativeToMap = NumbersDefinitions.RelativeReferenceRelativeToMap.ToImmutableDictionary(); this.RoundNumberMap = NumbersDefinitions.RoundNumberMap.ToImmutableDictionary(); - this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags); - this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags); - this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags); - this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags); + this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexFlags, RegexTimeOut); + this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexFlags, RegexTimeOut); + this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexFlags, RegexTimeOut); + this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags, RegexTimeOut); } public string NonDecimalSeparatorText { get; private set; } + + public override IEnumerable NormalizeTokenSet(IEnumerable tokens, ParseResult context) + { + var fracWords = new List(); + var tokenList = tokens.ToList(); + var tokenLen = tokenList.Count; + + for (var i = 0; i < tokenLen; i++) + { + if ((i < tokenLen - 2) && tokenList[i + 1] == "-") + { + fracWords.Add(tokenList[i] + tokenList[i + 1] + tokenList[i + 2]); + i += 2; + } + else + { + fracWords.Add(tokenList[i]); + } + } + + // The following piece of code is needed to compute the fraction pattern number+'buçuk' + // e.g. 'bir buçuk' ('one and a half') + var fracLen = fracWords.Count; + if (fracLen == 2) + { + if (fracWords.Last() == "buçuk") + { + fracWords.Insert(fracLen - 1, NumbersDefinitions.WrittenFractionSeparatorTexts[0]); + fracWords.Insert(fracLen, "bir"); + fracWords.Insert(fracLen + 1, NumbersDefinitions.FractionMarkerToken); + } + } + + return fracWords; + } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberRangeParserConfiguration.cs index 1d04d3435a..f7df93c982 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberRangeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberRangeParserConfiguration.cs @@ -1,42 +1,33 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Turkish; namespace Microsoft.Recognizers.Text.Number.Turkish { - public class TurkishNumberRangeParserConfiguration : INumberRangeParserConfiguration + public class TurkishNumberRangeParserConfiguration : BaseNumberRangeParserConfiguration { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public TurkishNumberRangeParserConfiguration(INumberOptionsConfiguration config) { CultureInfo = new CultureInfo(config.Culture); - NumberExtractor = Turkish.NumberExtractor.GetInstance(); - OrdinalExtractor = Turkish.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new TurkishNumberParserConfiguration(config)); - MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags); - LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags); - } - - public CultureInfo CultureInfo { get; private set; } - - public IExtractor NumberExtractor { get; private set; } - - public IExtractor OrdinalExtractor { get; private set; } + var numConfig = new BaseNumberOptionsConfiguration(config); - public IParser NumberParser { get; private set; } - - public Regex MoreOrEqual { get; private set; } - - public Regex LessOrEqual { get; private set; } - - public Regex MoreOrEqualSuffix { get; private set; } - - public Regex LessOrEqualSuffix { get; private set; } - - public Regex MoreOrEqualSeparate { get; private set; } + NumberExtractor = Turkish.NumberExtractor.GetInstance(NumberMode.Default, config.Options); + OrdinalExtractor = Turkish.OrdinalExtractor.GetInstance(numConfig); + NumberParser = new BaseNumberParser(new TurkishNumberParserConfiguration(config)); - public Regex LessOrEqualSeparate { get; private set; } + MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags, RegexTimeOut); + LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags, RegexTimeOut); + MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags, RegexTimeOut); + LessOrEqualSuffix = new Regex(NumbersDefinitions.LessOrEqualSuffix, RegexFlags, RegexTimeOut); + MoreOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeMoreSeparateRegex, RegexFlags, RegexTimeOut); + LessOrEqualSeparate = new Regex(NumbersDefinitions.OneNumberRangeLessSeparateRegex, RegexFlags, RegexTimeOut); + } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/TypeTag.cs b/.NET/Microsoft.Recognizers.Text.Number/TypeTag.cs index 8610a4e190..a3e01c6d95 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/TypeTag.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/TypeTag.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Number +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Number { public class TypeTag { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/AgeExtractorConfiguration.cs index 9ba43e517c..b6d8c5c020 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/AgeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/AgeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Chinese; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/ChineseNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/ChineseNumberWithUnitExtractorConfiguration.cs index 33fced4bb6..13f6024c72 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/ChineseNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/ChineseNumberWithUnitExtractorConfiguration.cs @@ -1,61 +1,53 @@ -using System.Collections.Generic; -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Chinese; +using Microsoft.Recognizers.Definitions.Utilities; +using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Chinese; using Microsoft.Recognizers.Text.Number.Config; +using Microsoft.Recognizers.Text.NumberWithUnit.Utilities; namespace Microsoft.Recognizers.Text.NumberWithUnit.Chinese { - public abstract class ChineseNumberWithUnitExtractorConfiguration : INumberWithUnitExtractorConfiguration + public abstract class ChineseNumberWithUnitExtractorConfiguration : BaseNumberWithUnitExtractorConfiguration { - private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly Regex CompoundUnitConnRegex = - new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexFlags); - - private static readonly Regex NonUnitsRegex = - new Regex(BaseUnits.PmNonUnitRegex, RegexFlags); + private static readonly Regex HalfUnitRegex = new Regex(NumbersWithUnitDefinitions.HalfUnitRegex, RegexFlags, RegexTimeOut); protected ChineseNumberWithUnitExtractorConfiguration(CultureInfo ci) + : base( + NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, + BaseUnits.PmNonUnitRegex, + string.Empty, + RegexFlags) { this.CultureInfo = ci; - this.UnitNumExtractor = new NumberExtractor(CJKNumberExtractorMode.ExtractAll); - this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; - this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; - this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; - } - - public Regex CompoundUnitConnectorRegex => CompoundUnitConnRegex; - - public Regex NonUnitRegex => NonUnitsRegex; - - public virtual Regex AmbiguousUnitNumberMultiplierRegex => null; - - public abstract string ExtractType { get; } - public CultureInfo CultureInfo { get; } + var numConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None); - public IExtractor UnitNumExtractor { get; } + this.UnitNumExtractor = new NumberExtractor(numConfig, CJKNumberExtractorMode.ExtractAll); - public string BuildPrefix { get; } - - public string BuildSuffix { get; } + this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; + this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; + this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; - public string ConnectorToken { get; } + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); + } public IExtractor IntegerExtractor { get; } - public Dictionary AmbiguityFiltersDict { get; } = null; - - public abstract ImmutableDictionary SuffixList { get; } - - public abstract ImmutableDictionary PrefixList { get; } - - public abstract ImmutableList AmbiguousUnitList { get; } + public override void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers) + { + // Expand Chinese phrase to the `half` patterns when it follows closely origin phrase. + CommonUtils.ExpandHalfSuffix(source, ref result, numbers, HalfUnitRegex); + } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/CurrencyExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/CurrencyExtractorConfiguration.cs index 043631b9e7..2daf944b68 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/CurrencyExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/CurrencyExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Chinese; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/DimensionExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/DimensionExtractorConfiguration.cs index 6a78c41f71..8e53e29384 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/DimensionExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/DimensionExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Chinese; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/TemperatureExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/TemperatureExtractorConfiguration.cs index b49e896036..08a4b448eb 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/TemperatureExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/TemperatureExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; @@ -13,7 +16,7 @@ public class TemperatureExtractorConfiguration : ChineseNumberWithUnitExtractorC private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex AmbiguousUnitMultiplierRegex = - new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags); + new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags, RegexTimeOut); public TemperatureExtractorConfiguration() : this(new CultureInfo(Culture.Chinese)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/AgeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/AgeParserConfiguration.cs index 06c74d0bf6..59099f9c73 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/AgeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/AgeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; using Microsoft.Recognizers.Definitions.Chinese; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/ChineseNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/ChineseNumberWithUnitParserConfiguration.cs index 63c5f800a5..683274b2bf 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/ChineseNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/ChineseNumberWithUnitParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Chinese; @@ -9,9 +13,12 @@ public class ChineseNumberWithUnitParserConfiguration : BaseNumberWithUnitParser public ChineseNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { - this.InternalNumberExtractor = new NumberExtractor(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new ChineseNumberParserConfiguration( - new BaseNumberOptionsConfiguration(Culture.Chinese))); + + var numConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None); + + this.InternalNumberExtractor = new NumberExtractor(numConfig); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new ChineseNumberParserConfiguration(numConfig)); this.ConnectorToken = string.Empty; } @@ -20,5 +27,7 @@ public ChineseNumberWithUnitParserConfiguration(CultureInfo ci) public override IExtractor InternalNumberExtractor { get; } public override string ConnectorToken { get; } + + public override IDictionary TypeList { get; set; } = null; } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/CurrencyParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/CurrencyParserConfiguration.cs index 83b16c5f21..5d72ef3c57 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/CurrencyParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/CurrencyParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Chinese; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/DimensionParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/DimensionParserConfiguration.cs index 3dcb329453..0bef2e0f7c 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/DimensionParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/DimensionParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; using Microsoft.Recognizers.Definitions.Chinese; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/TemperatureParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/TemperatureParserConfiguration.cs index 1d977296ad..cceb43d9c7 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/TemperatureParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/TemperatureParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; using Microsoft.Recognizers.Definitions.Chinese; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Constants.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Constants.cs index e61f5c366d..eedd4666f3 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Constants.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Constants.cs @@ -1,5 +1,9 @@ -// ReSharper disable InconsistentNaming +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// ReSharper disable InconsistentNaming + +using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using Microsoft.Recognizers.Definitions; @@ -18,13 +22,38 @@ public static class Constants public const string SYS_UNIT_TEMPERATURE = "builtin.unit.temperature"; public const string SYS_UNIT_VOLUME = "builtin.unit.volume"; public const string SYS_UNIT_WEIGHT = "builtin.unit.weight"; + public const string SYS_UNIT_ANGLE = "builtin.unit.angle"; public const string SYS_NUM = "builtin.num"; + public const string INFORMATION = "Information"; + public const string AREA = "Area"; + public const string LENGTH = "Length"; + public const string SPEED = "Speed"; + public const string VOLUME = "Volume"; + public const string WEIGHT = "Weight"; + public const string ANGLE = "Angle"; + // For cases like '2:00 pm', both 'pm' and '00 pm' are not dimension public const string AMBIGUOUS_TIME_TERM = BaseUnits.AmbiguousTimeTerm; + // In certain cultures the unit can be split around the number, + // e.g. in Japanese "秒速100メートル" ('speed per second 100 meters' = 100m/s). + // This flag tells the recognizer to combine prefix and suffix in order to parse the unit correctly. + public const string SPLIT_UNIT = "split_unit"; + // For currencies without ISO codes, we use internal values prefixed by '_'. // These values should never be present in parse output. public const string FAKE_ISO_CODE_PREFIX = "_"; + + public static readonly HashSet ValidSubTypes = new HashSet() + { + INFORMATION, + AREA, + LENGTH, + SPEED, + VOLUME, + WEIGHT, + ANGLE, + }; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/AgeExtractorConfiguration.cs index 20e14d038b..c32814a0a2 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/AgeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/AgeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Dutch; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/AngleExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/AngleExtractorConfiguration.cs new file mode 100644 index 0000000000..f71e30a0bf --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/AngleExtractorConfiguration.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Dutch; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Dutch +{ + public class AngleExtractorConfiguration : DutchNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary AngleSuffixList = + NumbersWithUnitDefinitions.AngleSuffixList.ToImmutableDictionary(); + + public static readonly ImmutableList AmbiguousUnits = + NumbersWithUnitDefinitions.AmbiguousAngleUnitList.ToImmutableList(); + + public AngleExtractorConfiguration() + : this(new CultureInfo(Culture.Dutch)) + { + } + + public AngleExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => AngleSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; + + public override string ExtractType => Constants.SYS_UNIT_ANGLE; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/AreaExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/AreaExtractorConfiguration.cs index b89b1aa767..d3bb5a510a 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/AreaExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/AreaExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Dutch; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/CurrencyExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/CurrencyExtractorConfiguration.cs index 7cb765ce93..c223bdc285 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/CurrencyExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/CurrencyExtractorConfiguration.cs @@ -1,5 +1,11 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; using System.Globalization; +using System.Linq; using Microsoft.Recognizers.Definitions.Dutch; @@ -10,8 +16,25 @@ public class CurrencyExtractorConfiguration : DutchNumberWithUnitExtractorConfig public static readonly ImmutableDictionary CurrencySuffixList = NumbersWithUnitDefinitions.CurrencySuffixList.ToImmutableDictionary(); - public static readonly ImmutableDictionary CurrencyPrefixList = - NumbersWithUnitDefinitions.CurrencyPrefixList.ToImmutableDictionary(); + // CurrencyNameToIsoCodeMap dictionary (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary IsoCodeDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture)); + + // CurrencyNameToIsoCodeMap followed by '$' symbol (e.g. 'AUD$') + public static readonly Dictionary IsoCodeWithSymbolDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture) + "$"); + + // Merge IsoCodeDict and IsoCodeWithSymbolDict + public static readonly Dictionary IsoCodeCombinedDict = IsoCodeDict.Concat(IsoCodeWithSymbolDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + // Merge IsoCodeCombinedDict with CurrencyPrefixList (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary CurrencyPrefixDict = NumbersWithUnitDefinitions.CurrencyPrefixList.Concat(IsoCodeCombinedDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + public static readonly ImmutableDictionary CurrencyPrefixList = CurrencyPrefixDict.ToImmutableDictionary(); public static readonly ImmutableDictionary FractionalUnitNameToCodeMap = NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary(); diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DimensionExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DimensionExtractorConfiguration.cs index 50534d1f2d..0cc305fd03 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DimensionExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DimensionExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Linq; @@ -15,10 +18,27 @@ public class DimensionExtractorConfiguration : DutchNumberWithUnitExtractorConfi .Concat(SpeedExtractorConfiguration.SpeedSuffixList) .Concat(VolumeExtractorConfiguration.VolumeSuffixList) .Concat(WeightExtractorConfiguration.WeightSuffixList) + .Concat(AngleExtractorConfiguration.AngleSuffixList) + .ToImmutableDictionary(x => x.Key, x => x.Value); + + public static readonly ImmutableDictionary DimensionTypeList = + NumbersWithUnitDefinitions.InformationSuffixList.ToDictionary(x => x.Key, x => Constants.INFORMATION) + .Concat(AreaExtractorConfiguration.AreaSuffixList.ToDictionary(x => x.Key, x => Constants.AREA)) + .Concat(LengthExtractorConfiguration.LengthSuffixList.ToDictionary(x => x.Key, x => Constants.LENGTH)) + .Concat(SpeedExtractorConfiguration.SpeedSuffixList.ToDictionary(x => x.Key, x => Constants.SPEED)) + .Concat(VolumeExtractorConfiguration.VolumeSuffixList.ToDictionary(x => x.Key, x => Constants.VOLUME)) + .Concat(WeightExtractorConfiguration.WeightSuffixList.ToDictionary(x => x.Key, x => Constants.WEIGHT)) + .Concat(AngleExtractorConfiguration.AngleSuffixList.ToDictionary(x => x.Key, x => Constants.ANGLE)) .ToImmutableDictionary(x => x.Key, x => x.Value); private static readonly ImmutableList AmbiguousValues = - NumbersWithUnitDefinitions.AmbiguousDimensionUnitList.ToImmutableList(); + NumbersWithUnitDefinitions.AmbiguousDimensionUnitList + .Concat(LengthExtractorConfiguration.AmbiguousValues) + .Concat(VolumeExtractorConfiguration.AmbiguousValues) + .Concat(WeightExtractorConfiguration.AmbiguousValues) + .Concat(AngleExtractorConfiguration.AmbiguousUnits) + .Distinct() + .ToImmutableList(); public DimensionExtractorConfiguration() : base(new CultureInfo(Culture.Dutch)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DutchNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DutchNumberWithUnitExtractorConfiguration.cs index f067ff37fc..99f1d63e5d 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DutchNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DutchNumberWithUnitExtractorConfiguration.cs @@ -1,57 +1,46 @@ -using System.Collections.Generic; -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; + using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Dutch; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Dutch; namespace Microsoft.Recognizers.Text.NumberWithUnit.Dutch { - public abstract class DutchNumberWithUnitExtractorConfiguration : INumberWithUnitExtractorConfiguration + public abstract class DutchNumberWithUnitExtractorConfiguration : BaseNumberWithUnitExtractorConfiguration { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly Regex CompoundUnitConnRegex = - new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexFlags); - - private static readonly Regex NonUnitsRegex = - new Regex(BaseUnits.PmNonUnitRegex, RegexFlags); - protected DutchNumberWithUnitExtractorConfiguration(CultureInfo ci) + : base( + NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, + BaseUnits.PmNonUnitRegex, + string.Empty, + RegexFlags) { this.CultureInfo = ci; - this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); + + var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit); + this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig); + this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = string.Empty; - } - - public abstract string ExtractType { get; } - - public CultureInfo CultureInfo { get; } - - public IExtractor UnitNumExtractor { get; } - - public string BuildPrefix { get; } - public string BuildSuffix { get; } - - public string ConnectorToken { get; } - - public Regex CompoundUnitConnectorRegex => CompoundUnitConnRegex; - - public Regex NonUnitRegex => NonUnitsRegex; - - public virtual Regex AmbiguousUnitNumberMultiplierRegex => null; - - public Dictionary AmbiguityFiltersDict { get; } = null; - - public abstract ImmutableDictionary SuffixList { get; } - - public abstract ImmutableDictionary PrefixList { get; } + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); + TemperatureAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.TemperatureAmbiguityFiltersDict); + DimensionAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.DimensionAmbiguityFiltersDict); + } - public abstract ImmutableList AmbiguousUnitList { get; } + public override void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers) + { + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/LengthExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/LengthExtractorConfiguration.cs index e73b9a4489..692bf2720a 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/LengthExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/LengthExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Dutch; @@ -10,7 +13,7 @@ public class LengthExtractorConfiguration : DutchNumberWithUnitExtractorConfigur public static readonly ImmutableDictionary LengthSuffixList = NumbersWithUnitDefinitions.LengthSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousLengthUnitList.ToImmutableList(); public LengthExtractorConfiguration() diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/SpeedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/SpeedExtractorConfiguration.cs index 34c6b398c8..ac4c35601f 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/SpeedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/SpeedExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Dutch; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/TemperatureExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/TemperatureExtractorConfiguration.cs index 4e8e65ce95..cece980a1b 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/TemperatureExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/TemperatureExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; @@ -19,7 +22,7 @@ public class TemperatureExtractorConfiguration : DutchNumberWithUnitExtractorCon NumbersWithUnitDefinitions.AmbiguousTemperatureUnitList.ToImmutableList(); private static readonly Regex AmbiguousUnitMultiplierRegex = - new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags); + new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags, RegexTimeOut); public TemperatureExtractorConfiguration() : this(new CultureInfo(Culture.Dutch)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/VolumeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/VolumeExtractorConfiguration.cs index 78f2f8e545..e7e71af2e2 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/VolumeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/VolumeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Dutch; @@ -10,7 +13,7 @@ public class VolumeExtractorConfiguration : DutchNumberWithUnitExtractorConfigur public static readonly ImmutableDictionary VolumeSuffixList = NumbersWithUnitDefinitions.VolumeSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousVolumeUnitList.ToImmutableList(); public VolumeExtractorConfiguration() diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/WeightExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/WeightExtractorConfiguration.cs index f66a4bb278..a971d8a3ac 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/WeightExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/WeightExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Dutch; @@ -10,7 +13,7 @@ public class WeightExtractorConfiguration : DutchNumberWithUnitExtractorConfigur public static readonly ImmutableDictionary WeightSuffixList = NumbersWithUnitDefinitions.WeightSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousWeightUnitList.ToImmutableList(); public WeightExtractorConfiguration() diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/AgeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/AgeParserConfiguration.cs index 57e91878f2..f1f3ce3c09 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/AgeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/AgeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Dutch { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/AreaParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/AreaParserConfiguration.cs index 33962acf7e..ee07655289 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/AreaParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/AreaParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Dutch { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/CurrencyParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/CurrencyParserConfiguration.cs index abc07cea3d..ef1861c3c9 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/CurrencyParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/CurrencyParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Dutch; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/DimensionParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/DimensionParserConfiguration.cs index 4b8d141772..2238a2ed70 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/DimensionParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/DimensionParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Dutch { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/DutchNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/DutchNumberWithUnitParserConfiguration.cs index b94f468722..34c68be17c 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/DutchNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/DutchNumberWithUnitParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Dutch; @@ -9,10 +13,15 @@ public class DutchNumberWithUnitParserConfiguration : BaseNumberWithUnitParserCo public DutchNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { - this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new DutchNumberParserConfiguration( - new BaseNumberOptionsConfiguration(Culture.Dutch))); + + var config = new BaseNumberOptionsConfiguration(Culture.Dutch, NumberOptions.None); + + this.InternalNumberExtractor = NumberExtractor.GetInstance(config); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new DutchNumberParserConfiguration(config)); this.ConnectorToken = string.Empty; + + this.TypeList = DimensionExtractorConfiguration.DimensionTypeList; } public override IParser InternalNumberParser { get; } @@ -20,5 +29,7 @@ public DutchNumberWithUnitParserConfiguration(CultureInfo ci) public override IExtractor InternalNumberExtractor { get; } public override string ConnectorToken { get; } + + public override IDictionary TypeList { get; set; } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/LengthParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/LengthParserConfiguration.cs index 770c7bd84c..d1188c6f32 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/LengthParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/LengthParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Dutch { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/SpeedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/SpeedParserConfiguration.cs index eb37de26d9..02e46e041e 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/SpeedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/SpeedParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Dutch { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/TemperatureParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/TemperatureParserConfiguration.cs index b82f0eea97..4b2532ab7b 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/TemperatureParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/TemperatureParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Dutch { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/VolumeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/VolumeParserConfiguration.cs index 58ec3a5506..0d15a8b2b3 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/VolumeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/VolumeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Dutch { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/WeightParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/WeightParserConfiguration.cs index d2158f33e9..4b9f3d049b 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/WeightParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/WeightParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Dutch { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/AgeExtractorConfiguration.cs index a8842d1ae1..320efe8770 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/AgeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/AgeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.English; @@ -10,6 +13,12 @@ public class AgeExtractorConfiguration : EnglishNumberWithUnitExtractorConfigura public static readonly ImmutableDictionary AgeSuffixList = NumbersWithUnitDefinitions.AgeSuffixList.ToImmutableDictionary(); + public static readonly ImmutableDictionary AgePrefixList = + NumbersWithUnitDefinitions.AgePrefixList.ToImmutableDictionary(); + + private static readonly ImmutableList AmbiguousUnits = + NumbersWithUnitDefinitions.AmbiguousAgeUnitList.ToImmutableList(); + public AgeExtractorConfiguration() : this(new CultureInfo(Culture.English)) { @@ -22,9 +31,9 @@ public AgeExtractorConfiguration(CultureInfo ci) public override ImmutableDictionary SuffixList => AgeSuffixList; - public override ImmutableDictionary PrefixList => null; + public override ImmutableDictionary PrefixList => AgePrefixList; - public override ImmutableList AmbiguousUnitList => null; + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; public override string ExtractType => Constants.SYS_UNIT_AGE; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/AngleExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/AngleExtractorConfiguration.cs new file mode 100644 index 0000000000..4572417f8e --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/AngleExtractorConfiguration.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.English; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.English +{ + public class AngleExtractorConfiguration : EnglishNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary AngleSuffixList = + NumbersWithUnitDefinitions.AngleSuffixList.ToImmutableDictionary(); + + public static readonly ImmutableList AmbiguousUnits = + NumbersWithUnitDefinitions.AmbiguousAngleUnitList.ToImmutableList(); + + public AngleExtractorConfiguration() + : this(new CultureInfo(Culture.English)) + { + } + + public AngleExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => AngleSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; + + public override string ExtractType => Constants.SYS_UNIT_ANGLE; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/AreaExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/AreaExtractorConfiguration.cs index fdf3a08256..2e4869f9f2 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/AreaExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/AreaExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.English; @@ -10,6 +13,9 @@ public class AreaExtractorConfiguration : EnglishNumberWithUnitExtractorConfigur public static readonly ImmutableDictionary AreaSuffixList = NumbersWithUnitDefinitions.AreaSuffixList.ToImmutableDictionary(); + public static readonly ImmutableList AmbiguousUnits = + NumbersWithUnitDefinitions.AmbiguousAreaUnitList.ToImmutableList(); + public AreaExtractorConfiguration() : this(new CultureInfo(Culture.English)) { @@ -24,7 +30,7 @@ public AreaExtractorConfiguration(CultureInfo ci) public override ImmutableDictionary PrefixList => null; - public override ImmutableList AmbiguousUnitList => null; + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; public override string ExtractType => Constants.SYS_UNIT_AREA; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/CurrencyExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/CurrencyExtractorConfiguration.cs index a7238deede..25308851d8 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/CurrencyExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/CurrencyExtractorConfiguration.cs @@ -1,5 +1,11 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; using System.Globalization; +using System.Linq; using Microsoft.Recognizers.Definitions.English; @@ -7,16 +13,45 @@ namespace Microsoft.Recognizers.Text.NumberWithUnit.English { public class CurrencyExtractorConfiguration : EnglishNumberWithUnitExtractorConfiguration { - public static readonly ImmutableDictionary CurrencySuffixList = - NumbersWithUnitDefinitions.CurrencySuffixList.ToImmutableDictionary(); + // CurrencyNameToIsoCodeMap dictionary (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary IsoCodeDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture)); + + // CurrencyNameToIsoCodeMap followed by '$' symbol (e.g. 'AUD$') + public static readonly Dictionary IsoCodeWithSymbolDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture) + "$"); + + // CurrencyNameToIsoCodeMap preceded by 'M' symbol (e.g. 'MUSD') + public static readonly Dictionary IsoCodeWithMutiplierDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => "m" + x.Value.ToLower(CultureInfo.InvariantCulture)); + + // Merge IsoCodeDict and IsoCodeWithSymbolDict + public static readonly Dictionary IsoCodeCombinedDictWithSymbol = IsoCodeDict.Concat(IsoCodeWithSymbolDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + // Merge IsoCodeDict and IsoCodeWithMutiplierDict + public static readonly Dictionary IsoCodeCombinedDict = IsoCodeCombinedDictWithSymbol.Concat(IsoCodeWithMutiplierDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + // Merge IsoCodeCombinedDict with CurrencyPrefixList (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary CurrencyPrefixDict = NumbersWithUnitDefinitions.CurrencyPrefixList.Concat(IsoCodeCombinedDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + // Merge IsoCodeCombinedDict with CurrencySuffixList (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary CurrencySuffixDict = NumbersWithUnitDefinitions.CurrencySuffixList.Concat(IsoCodeCombinedDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + public static readonly ImmutableDictionary CurrencyPrefixList = CurrencyPrefixDict.ToImmutableDictionary(); - public static readonly ImmutableDictionary CurrencyPrefixList = - NumbersWithUnitDefinitions.CurrencyPrefixList.ToImmutableDictionary(); + public static readonly ImmutableDictionary CurrencySuffixList = CurrencySuffixDict.ToImmutableDictionary(); public static readonly ImmutableDictionary FractionalUnitNameToCodeMap = NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = + private static readonly ImmutableList AmbiguousUnits = NumbersWithUnitDefinitions.AmbiguousCurrencyUnitList.ToImmutableList(); public CurrencyExtractorConfiguration() @@ -33,7 +68,7 @@ public CurrencyExtractorConfiguration(CultureInfo ci) public override ImmutableDictionary PrefixList => CurrencyPrefixList; - public override ImmutableList AmbiguousUnitList => AmbiguousValues; + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; public override string ExtractType => Constants.SYS_UNIT_CURRENCY; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/DimensionExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/DimensionExtractorConfiguration.cs index 8439e92705..d3a21ecbb6 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/DimensionExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/DimensionExtractorConfiguration.cs @@ -1,7 +1,10 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; using System.Globalization; using System.Linq; - using Microsoft.Recognizers.Definitions.English; namespace Microsoft.Recognizers.Text.NumberWithUnit.English @@ -15,10 +18,33 @@ public class DimensionExtractorConfiguration : EnglishNumberWithUnitExtractorCon .Concat(SpeedExtractorConfiguration.SpeedSuffixList) .Concat(VolumeExtractorConfiguration.VolumeSuffixList) .Concat(WeightExtractorConfiguration.WeightSuffixList) + .Concat(AngleExtractorConfiguration.AngleSuffixList) + .ToImmutableDictionary(x => x.Key, x => x.Value); + + public static readonly ImmutableDictionary DimensionTypeList = + NumbersWithUnitDefinitions.InformationSuffixList.ToDictionary(x => x.Key, x => Constants.INFORMATION) + .Concat(AreaExtractorConfiguration.AreaSuffixList.ToDictionary(x => x.Key, x => Constants.AREA)) + .Concat(LengthExtractorConfiguration.LengthSuffixList.ToDictionary(x => x.Key, x => Constants.LENGTH)) + .Concat(SpeedExtractorConfiguration.SpeedSuffixList.ToDictionary(x => x.Key, x => Constants.SPEED)) + .Concat(VolumeExtractorConfiguration.VolumeSuffixList.ToDictionary(x => x.Key, x => Constants.VOLUME)) + .Concat(WeightExtractorConfiguration.WeightSuffixList.ToDictionary(x => x.Key, x => Constants.WEIGHT)) + .Concat(AngleExtractorConfiguration.AngleSuffixList.ToDictionary(x => x.Key, x => Constants.ANGLE)) .ToImmutableDictionary(x => x.Key, x => x.Value); - private static readonly ImmutableList AmbiguousValues = - NumbersWithUnitDefinitions.AmbiguousDimensionUnitList.ToImmutableList(); + public static readonly IDictionary LengthUnitToSubUnitMap = NumbersWithUnitDefinitions.LengthUnitToSubUnitMap; + + public static readonly IDictionary LengthSubUnitFractionalRatios = NumbersWithUnitDefinitions.LengthSubUnitFractionalRatios; + + private static readonly ImmutableList AmbiguousUnits = + NumbersWithUnitDefinitions.AmbiguousDimensionUnitList + .Concat(AreaExtractorConfiguration.AmbiguousUnits) + .Concat(LengthExtractorConfiguration.AmbiguousUnits) + .Concat(SpeedExtractorConfiguration.AmbiguousUnits) + .Concat(VolumeExtractorConfiguration.AmbiguousUnits) + .Concat(WeightExtractorConfiguration.AmbiguousUnits) + .Concat(AngleExtractorConfiguration.AmbiguousUnits) + .Distinct() + .ToImmutableList(); public DimensionExtractorConfiguration() : base(new CultureInfo(Culture.English)) @@ -34,7 +60,7 @@ public DimensionExtractorConfiguration(CultureInfo ci) public override ImmutableDictionary PrefixList => null; - public override ImmutableList AmbiguousUnitList => AmbiguousValues; + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; public override string ExtractType => Constants.SYS_UNIT_DIMENSION; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/EnglishNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/EnglishNumberWithUnitExtractorConfiguration.cs index de4ccbd0c8..04fb3e6b8d 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/EnglishNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/EnglishNumberWithUnitExtractorConfiguration.cs @@ -1,6 +1,9 @@ -using System.Collections.Generic; -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; @@ -11,52 +14,36 @@ namespace Microsoft.Recognizers.Text.NumberWithUnit.English { - public abstract class EnglishNumberWithUnitExtractorConfiguration : INumberWithUnitExtractorConfiguration + public abstract class EnglishNumberWithUnitExtractorConfiguration : BaseNumberWithUnitExtractorConfiguration { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly Regex CompoundUnitConnRegex = - new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexFlags); - - private static readonly Regex NonUnitsRegex = - new Regex(BaseUnits.PmNonUnitRegex, RegexFlags); - protected EnglishNumberWithUnitExtractorConfiguration(CultureInfo ci) + : base( + NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, + BaseUnits.PmNonUnitRegex, + NumbersWithUnitDefinitions.MultiplierRegex, + RegexFlags) { this.CultureInfo = ci; - this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); + + // PlaceHolderMixed allows to extract numbers from expressions like 'USD15', '15USD' where there is no space between + // alphabetic and numeric characters (PlaeHolderDefault does not extract numbers from expressions like 'USD15'). + var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit, BaseNumbers.PlaceHolderMixed); + this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig); + this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = string.Empty; AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); + TemperatureAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.TemperatureAmbiguityFiltersDict); + DimensionAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.DimensionAmbiguityFiltersDict); } - public abstract string ExtractType { get; } - - public CultureInfo CultureInfo { get; } - - public IExtractor UnitNumExtractor { get; } - - public string BuildPrefix { get; } - - public string BuildSuffix { get; } - - public string ConnectorToken { get; } - - public Regex CompoundUnitConnectorRegex => CompoundUnitConnRegex; - - public Regex NonUnitRegex => NonUnitsRegex; - - public virtual Regex AmbiguousUnitNumberMultiplierRegex => null; - - public Dictionary AmbiguityFiltersDict { get; } = null; - - public abstract ImmutableDictionary SuffixList { get; } - - public abstract ImmutableDictionary PrefixList { get; } - - public abstract ImmutableList AmbiguousUnitList { get; } + public override void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers) + { + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/LengthExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/LengthExtractorConfiguration.cs index c073e0cc59..8646dca164 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/LengthExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/LengthExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.English; @@ -10,7 +13,7 @@ public class LengthExtractorConfiguration : EnglishNumberWithUnitExtractorConfig public static readonly ImmutableDictionary LengthSuffixList = NumbersWithUnitDefinitions.LengthSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = + public static readonly ImmutableList AmbiguousUnits = NumbersWithUnitDefinitions.AmbiguousLengthUnitList.ToImmutableList(); public LengthExtractorConfiguration() @@ -27,7 +30,7 @@ public LengthExtractorConfiguration(CultureInfo ci) public override ImmutableDictionary PrefixList => null; - public override ImmutableList AmbiguousUnitList => AmbiguousValues; + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; public override string ExtractType => Constants.SYS_UNIT_LENGTH; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/SpeedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/SpeedExtractorConfiguration.cs index 213a9fd9cc..0bf1e06f09 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/SpeedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/SpeedExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.English; @@ -10,6 +13,9 @@ public class SpeedExtractorConfiguration : EnglishNumberWithUnitExtractorConfigu public static readonly ImmutableDictionary SpeedSuffixList = NumbersWithUnitDefinitions.SpeedSuffixList.ToImmutableDictionary(); + public static readonly ImmutableList AmbiguousUnits = + NumbersWithUnitDefinitions.AmbiguousSpeedUnitList.ToImmutableList(); + public SpeedExtractorConfiguration() : base(new CultureInfo(Culture.English)) { @@ -24,7 +30,7 @@ public SpeedExtractorConfiguration(CultureInfo ci) public override ImmutableDictionary PrefixList => null; - public override ImmutableList AmbiguousUnitList => null; + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; public override string ExtractType => Constants.SYS_UNIT_SPEED; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/TemperatureExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/TemperatureExtractorConfiguration.cs index 7da28521c1..ef907da746 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/TemperatureExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/TemperatureExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; @@ -14,11 +17,11 @@ public class TemperatureExtractorConfiguration : EnglishNumberWithUnitExtractorC private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly ImmutableList AmbiguousValues = + private static readonly ImmutableList AmbiguousUnits = NumbersWithUnitDefinitions.AmbiguousTemperatureUnitList.ToImmutableList(); private static readonly Regex AmbiguousUnitMultiplierRegex = - new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags); + new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags, RegexTimeOut); public TemperatureExtractorConfiguration() : this(new CultureInfo(Culture.English)) @@ -34,7 +37,7 @@ public TemperatureExtractorConfiguration(CultureInfo ci) public override ImmutableDictionary PrefixList => null; - public override ImmutableList AmbiguousUnitList => AmbiguousValues; + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; public override string ExtractType => Constants.SYS_UNIT_TEMPERATURE; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/VolumeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/VolumeExtractorConfiguration.cs index da36ea2d77..b21ddd1ccd 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/VolumeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/VolumeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.English; @@ -10,7 +13,7 @@ public class VolumeExtractorConfiguration : EnglishNumberWithUnitExtractorConfig public static readonly ImmutableDictionary VolumeSuffixList = NumbersWithUnitDefinitions.VolumeSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = + public static readonly ImmutableList AmbiguousUnits = NumbersWithUnitDefinitions.AmbiguousVolumeUnitList.ToImmutableList(); public VolumeExtractorConfiguration() @@ -27,7 +30,7 @@ public VolumeExtractorConfiguration(CultureInfo ci) public override ImmutableDictionary PrefixList => null; - public override ImmutableList AmbiguousUnitList => AmbiguousValues; + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; public override string ExtractType => Constants.SYS_UNIT_VOLUME; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/WeightExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/WeightExtractorConfiguration.cs index fa6f447c6c..60708fa1b2 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/WeightExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/WeightExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.English; @@ -10,7 +13,7 @@ public class WeightExtractorConfiguration : EnglishNumberWithUnitExtractorConfig public static readonly ImmutableDictionary WeightSuffixList = NumbersWithUnitDefinitions.WeightSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = + public static readonly ImmutableList AmbiguousUnits = NumbersWithUnitDefinitions.AmbiguousWeightUnitList.ToImmutableList(); public WeightExtractorConfiguration() @@ -27,7 +30,7 @@ public WeightExtractorConfiguration(CultureInfo ci) public override ImmutableDictionary PrefixList => null; - public override ImmutableList AmbiguousUnitList => AmbiguousValues; + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; public override string ExtractType => Constants.SYS_UNIT_WEIGHT; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/AgeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/AgeParserConfiguration.cs index fcff18b1a5..3e80fa6263 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/AgeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/AgeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.English { @@ -13,6 +16,7 @@ public AgeParserConfiguration(CultureInfo ci) : base(ci) { this.BindDictionary(AgeExtractorConfiguration.AgeSuffixList); + this.BindDictionary(AgeExtractorConfiguration.AgePrefixList); } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/AreaParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/AreaParserConfiguration.cs index dfe588c0b2..e9b2105429 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/AreaParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/AreaParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.English { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/CurrencyParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/CurrencyParserConfiguration.cs index 6e6dc71ba1..89f58e806a 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/CurrencyParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/CurrencyParserConfiguration.cs @@ -1,5 +1,9 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; +using System.Linq; using Microsoft.Recognizers.Definitions.English; namespace Microsoft.Recognizers.Text.NumberWithUnit.English @@ -17,6 +21,7 @@ public CurrencyParserConfiguration(CultureInfo ci) this.BindDictionary(CurrencyExtractorConfiguration.CurrencySuffixList); this.BindDictionary(CurrencyExtractorConfiguration.CurrencyPrefixList); this.CurrencyNameToIsoCodeMap = NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.ToImmutableDictionary(); + this.MultiplierIsoCodeList = CurrencyExtractorConfiguration.IsoCodeWithMutiplierDict.Values.ToList(); this.CurrencyFractionCodeList = NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary(); } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/DimensionParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/DimensionParserConfiguration.cs index 29de4704d7..a0034ebfed 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/DimensionParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/DimensionParserConfiguration.cs @@ -1,9 +1,17 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.English { public class DimensionParserConfiguration : EnglishNumberWithUnitParserConfiguration { + public static readonly IDictionary LengthUnitToSubUnitMap = DimensionExtractorConfiguration.LengthUnitToSubUnitMap; + + public static readonly IDictionary LengthSubUnitFractionalRatios = DimensionExtractorConfiguration.LengthSubUnitFractionalRatios; + public DimensionParserConfiguration() : this(new CultureInfo(Culture.English)) { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/EnglishNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/EnglishNumberWithUnitParserConfiguration.cs index 2401b51b74..05bbf47219 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/EnglishNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/EnglishNumberWithUnitParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.English; @@ -9,10 +13,14 @@ public class EnglishNumberWithUnitParserConfiguration : BaseNumberWithUnitParser public EnglishNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { - this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new EnglishNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + var numConfig = new BaseNumberOptionsConfiguration(Culture.English, NumberOptions.None); + + this.InternalNumberExtractor = NumberExtractor.GetInstance(numConfig); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new EnglishNumberParserConfiguration(numConfig)); this.ConnectorToken = string.Empty; + + this.TypeList = DimensionExtractorConfiguration.DimensionTypeList; } public override IParser InternalNumberParser { get; } @@ -20,5 +28,7 @@ public EnglishNumberWithUnitParserConfiguration(CultureInfo ci) public override IExtractor InternalNumberExtractor { get; } public override string ConnectorToken { get; } + + public override IDictionary TypeList { get; set; } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/LengthParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/LengthParserConfiguration.cs index da8429a61b..ae1207ed1b 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/LengthParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/LengthParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.English { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/SpeedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/SpeedParserConfiguration.cs index 42405f4154..628517ab0e 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/SpeedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/SpeedParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.English { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/TemperatureParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/TemperatureParserConfiguration.cs index 82fe6fdc8f..2d139d2a26 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/TemperatureParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/TemperatureParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.English { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/VolumeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/VolumeParserConfiguration.cs index b5073fb759..575a2ebb92 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/VolumeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/VolumeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.English { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/WeightParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/WeightParserConfiguration.cs index c3012b0b2a..54172a1ca0 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/WeightParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/WeightParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.English { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/BaseMergedUnitExtractor.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/BaseMergedUnitExtractor.cs index 435fd240cd..3de3a8b203 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/BaseMergedUnitExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/BaseMergedUnitExtractor.cs @@ -1,5 +1,11 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.Matcher; namespace Microsoft.Recognizers.Text.NumberWithUnit { @@ -57,7 +63,8 @@ private List MergeCompoundUnits(string source) continue; } - if (ers[idx].Data is ExtractResult er && !er.Data.ToString().StartsWith("Integer", StringComparison.Ordinal)) + if (ers[idx].Data is ExtractResult er && + !er.Data.ToString().StartsWith(Number.Constants.INTEGER_PREFIX, StringComparison.Ordinal)) { groups[idx + 1] = groups[idx] + 1; continue; @@ -65,8 +72,14 @@ private List MergeCompoundUnits(string source) var middleBegin = ers[idx].Start + ers[idx].Length ?? 0; var middleEnd = ers[idx + 1].Start ?? 0; + var length = middleEnd - middleBegin; - var middleStr = source.Substring(middleBegin, middleEnd - middleBegin).Trim(); + if (length < 0) + { + continue; + } + + var middleStr = source.Substring(middleBegin, length).Trim(); // Separated by whitespace if (string.IsNullOrEmpty(middleStr)) @@ -91,7 +104,8 @@ private List MergeCompoundUnits(string source) { if (idx == 0 || groups[idx] != groups[idx - 1]) { - var tmpExtractResult = ers[idx]; + var tmpExtractResult = ers[idx].Clone(); + tmpExtractResult.Data = new List { new ExtractResult @@ -103,6 +117,7 @@ private List MergeCompoundUnits(string source) Type = ers[idx].Type, }, }; + result.Add(tmpExtractResult); } @@ -132,12 +147,15 @@ private List MergeCompoundUnits(string source) result.RemoveAll(o => o.Type == Constants.SYS_NUM); + MergeMultiplier(source, result); + return result; } private void MergePureNumber(string source, List ers) { var numErs = config.UnitNumExtractor.Extract(source); + var unitNumbers = new List(); for (int i = 0, j = 0; i < numErs.Count; i++) { @@ -153,10 +171,20 @@ private void MergePureNumber(string source, List ers) continue; } + // Filter cases like "1 dollars 11a", "11" is not the fraction here. + if (source.Length > numErs[i].Start + numErs[i].Length) + { + var endChar = source.Substring(numErs[i].Length + numErs[i].Start ?? 0, 1); + if (char.IsLetter(endChar[0]) && !SimpleTokenizer.IsCjk(endChar[0])) + { + continue; + } + } + var middleBegin = ers[j - 1].Start + ers[j - 1].Length ?? 0; var middleEnd = numErs[i].Start ?? 0; - var middleStr = source.Substring(middleBegin, middleEnd - middleBegin).Trim().ToLowerInvariant(); + var middleStr = source.Substring(middleBegin, middleEnd - middleBegin).Trim(); // Separated by whitespace if (string.IsNullOrEmpty(middleStr)) @@ -192,5 +220,30 @@ private void MergePureNumber(string source, List ers) ers.Sort((x, y) => x.Start - y.Start ?? 0); } + + // Add multiplier to extraction when it follows the unit e.g. "10 USD million" + private void MergeMultiplier(string source, List ers) + { + if (config.MultiplierRegex != null) + { + var multiplierMatches = config.MultiplierRegex.Matches(source); + Match[] multipliers = new Match[multiplierMatches.Count]; + multiplierMatches.CopyTo(multipliers, 0); + if (multipliers.Length > 0) + { + for (int i = 0; i < ers.Count; i++) + { + var afterMatch = multipliers.Where(o => ers[i].Start + ers[i].Length == o.Index).ToList(); + if (afterMatch.Count > 0 && ers[i].Data != null) + { + ers[i].Data = new List { ers[i].Clone() }; + ers[i].Length += afterMatch[0].Length; + ers[i].Text = source.Substring((int)ers[i].Start, (int)ers[i].Length); + ers[i].Metadata = new Metadata { HasMod = true }; + } + } + } + } + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/BaseNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/BaseNumberWithUnitExtractorConfiguration.cs new file mode 100644 index 0000000000..eb8cd75515 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/BaseNumberWithUnitExtractorConfiguration.cs @@ -0,0 +1,66 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Globalization; +using System.Linq; +using System.Reflection; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.NumberWithUnit +{ + public abstract class BaseNumberWithUnitExtractorConfiguration : INumberWithUnitExtractorConfiguration + { + public BaseNumberWithUnitExtractorConfiguration( + string compoundUnitConnectorRegex, + string pmNonUnitRegex, + string multiplierRegex, + RegexOptions options) + { + this.CompoundUnitConnectorRegex = new Regex(compoundUnitConnectorRegex, options, RegexTimeOut); + this.NonUnitRegex = new Regex(pmNonUnitRegex, options, RegexTimeOut); + if (!string.IsNullOrEmpty(multiplierRegex)) + { + this.MultiplierRegex = new Regex(multiplierRegex, options, RegexTimeOut); + } + } + + public static TimeSpan RegexTimeOut => NumberWithUnitRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + + public virtual ImmutableDictionary SuffixList { get; set; } + + public virtual ImmutableDictionary PrefixList { get; set; } + + public virtual ImmutableList AmbiguousUnitList { get; set; } + + public virtual string ExtractType { get; set; } + + public CultureInfo CultureInfo { get; set; } + + public IExtractor UnitNumExtractor { get; set; } + + public string BuildPrefix { get; set; } + + public string BuildSuffix { get; set; } + + public string ConnectorToken { get; set; } + + public Regex CompoundUnitConnectorRegex { get; set; } + + public Regex NonUnitRegex { get; set; } + + public Regex MultiplierRegex { get; set; } + + public virtual Regex AmbiguousUnitNumberMultiplierRegex { get; } + + public Dictionary AmbiguityFiltersDict { get; set; } + + public virtual Dictionary TemperatureAmbiguityFiltersDict { get; set; } + + public Dictionary DimensionAmbiguityFiltersDict { get; set; } + + public abstract void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers); + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/INumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/INumberWithUnitExtractorConfiguration.cs index 3235e5db71..fa957c430d 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/INumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/INumberWithUnitExtractorConfiguration.cs @@ -1,6 +1,10 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.NumberWithUnit @@ -29,8 +33,16 @@ public interface INumberWithUnitExtractorConfiguration Regex NonUnitRegex { get; } + Regex MultiplierRegex { get; } + Regex AmbiguousUnitNumberMultiplierRegex { get; } Dictionary AmbiguityFiltersDict { get; } + + Dictionary TemperatureAmbiguityFiltersDict { get; } + + Dictionary DimensionAmbiguityFiltersDict { get; } + + void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers); } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/NumberWithUnitExtractor.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/NumberWithUnitExtractor.cs index 214f65c853..9676901752 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/NumberWithUnitExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/NumberWithUnitExtractor.cs @@ -1,18 +1,28 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; +using System.Reflection; using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Text.Matcher; namespace Microsoft.Recognizers.Text.NumberWithUnit { public class NumberWithUnitExtractor : IExtractor { + private readonly INumberWithUnitExtractorConfiguration config; private readonly StringMatcher suffixMatcher = new StringMatcher(MatchStrategy.TrieTree, new NumberWithUnitTokenizer()); private readonly StringMatcher prefixMatcher = new StringMatcher(MatchStrategy.TrieTree, new NumberWithUnitTokenizer()); + private readonly Regex separateRegex; + private readonly Regex singleCharUnitRegex = new Regex(BaseUnits.SingleCharUnitRegex, + RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.IgnoreCase, RegexTimeOut); private readonly int maxPrefixMatchLen; @@ -46,6 +56,8 @@ public NumberWithUnitExtractor(INumberWithUnitExtractorConfiguration config) separateRegex = BuildSeparateRegexFromSet(); } + protected static TimeSpan RegexTimeOut => NumberWithUnitRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + public static bool ValidateUnit(string source) { return !source.StartsWith("-", StringComparison.Ordinal); @@ -54,6 +66,7 @@ public static bool ValidateUnit(string source) public List Extract(string source) { var result = new List(); + IOrderedEnumerable numbers; if (!PreCheckStr(source)) { @@ -63,14 +76,46 @@ public List Extract(string source) var mappingPrefix = new Dictionary(); var sourceLen = source.Length; var prefixMatched = false; + var unitIsPrefix = new List(); MatchCollection nonUnitMatches = null; - var prefixMatch = prefixMatcher.Find(source).OrderBy(o => o.Start).ToList(); - var suffixMatch = suffixMatcher.Find(source).OrderBy(o => o.Start).ToList(); + var prefixMatches = prefixMatcher.Find(source).OrderBy(o => o.Start).ToList(); + var suffixMatches = suffixMatcher.Find(source).OrderBy(o => o.Start).ToList(); - if (prefixMatch.Count > 0 || suffixMatch.Count > 0) + // Remove matches with wrong length, e.g. both 'm2' and 'm 2' are extracted but only 'm2' represents a unit. + for (int i = suffixMatches.Count - 1; i >= 0; i--) { - var numbers = this.config.UnitNumExtractor.Extract(source).OrderBy(o => o.Start); + var m = suffixMatches[i]; + if (m.CanonicalValues.All(l => l.Length != m.Length)) + { + suffixMatches.RemoveAt(i); + } + } + + if (prefixMatches.Count > 0 || suffixMatches.Count > 0) + { + numbers = this.config.UnitNumExtractor.Extract(source).OrderBy(o => o.Start); + + // Checking if there are conflicting interpretations between currency unit as prefix and suffix for each number. + // For example, in Chinese, "$20,300美圆" should be broken into two entities instead of treating 20,300 as one number: "$20" and "300美圆". + if (numbers.Any() && CheckExtractorType(Constants.SYS_UNIT_CURRENCY) && prefixMatches.Any() && suffixMatches.Any()) + { + + foreach (var number in numbers) + { + int start = (int)number.Start, length = (int)number.Length; + var numberPrefix = prefixMatches.Any(o => o.Start + o.Length == number.Start); + var numberSuffix = suffixMatches.Any(o => o.Start == number.Start + number.Length); + + if (numberPrefix != false && numberSuffix != false && number.Text.Contains(",")) + { + int commaIndex = (int)number.Start + number.Text.IndexOf(",", StringComparison.Ordinal); + source = source.Substring(0, commaIndex) + " " + source.Substring(commaIndex + 1); + } + } + + numbers = this.config.UnitNumExtractor.Extract(source).OrderBy(o => o.Start); + } // Special case for cases where number multipliers clash with unit var ambiguousMultiplierRegex = this.config.AmbiguousUnitNumberMultiplierRegex; @@ -99,21 +144,28 @@ public List Extract(string source) var maxFindPref = Math.Min(maxPrefixMatchLen, number.Start.Value); var maxFindSuff = sourceLen - start - length; + var closeMatch = false; if (maxFindPref != 0) { // Scan from left to right, find the longest match var lastIndex = start; MatchResult bestMatch = null; - foreach (var m in prefixMatch) + foreach (var m in prefixMatches) { if (m.Length > 0 && m.End > start) { break; } - if (m.Length > 0 && source.Substring(m.Start, lastIndex - m.Start).Trim() == m.Text) + var unitStr = source.Substring(m.Start, lastIndex - m.Start); + if (m.Length > 0 && unitStr.Trim() == m.Text) { + if (unitStr == m.Text) + { + closeMatch = true; + } + bestMatch = m; break; } @@ -123,18 +175,47 @@ public List Extract(string source) { var offSet = lastIndex - bestMatch.Start; var unitStr = source.Substring(bestMatch.Start, offSet); - mappingPrefix.Add(number.Start.Value, new PrefixUnitResult { Offset = offSet, UnitStr = unitStr }); + mappingPrefix[number.Start.Value] = new PrefixUnitResult { Offset = offSet, UnitStr = unitStr }; } } mappingPrefix.TryGetValue(start, out PrefixUnitResult prefixUnit); + + // For currency unit, such as "$ 10 $ 20", get candidate "$ 10" "10 $" "$20" then select to get result. + // So add "$ 10" to result here, then get "10 $" in the suffixMatch. + // But for case like "摄氏温度10度", "摄氏温度10" will skip this and continue to extend the suffix. + if (prefixUnit != null && !prefixMatched && CheckExtractorType(Constants.SYS_UNIT_CURRENCY)) + { + var er = new ExtractResult + { + Start = number.Start - prefixUnit.Offset, + Length = number.Length + prefixUnit.Offset, + Text = prefixUnit.UnitStr + number.Text, + Type = this.config.ExtractType, + }; + + // Relative position will be used in Parser + var numberData = number.Clone(); + numberData.Start = start - er.Start; + er.Data = numberData; + + result.Add(er); + unitIsPrefix.Add(true); + } + if (maxFindSuff > 0) { + // If the number already get close prefix currency unit, skip the suffix match. + if (CheckExtractorType(Constants.SYS_UNIT_CURRENCY) && closeMatch) + { + continue; + } + // find the best suffix unit var maxlen = 0; var firstIndex = start + length; - foreach (var m in suffixMatch) + foreach (var m in suffixMatches) { if (m.Length > 0 && m.Start >= firstIndex) { @@ -142,10 +223,20 @@ public List Extract(string source) if (maxlen < endpos) { var midStr = source.Substring(firstIndex, m.Start - firstIndex); - if (string.IsNullOrWhiteSpace(midStr) || midStr.Trim().Equals(this.config.ConnectorToken)) + if (string.IsNullOrWhiteSpace(midStr) || midStr.Trim().Equals(this.config.ConnectorToken, StringComparison.Ordinal)) { maxlen = endpos; } + + // Check for brackets + if (m.End < source.Length && ( + (midStr.EndsWith("(") && source[m.End] == ')') || + (midStr.EndsWith("[") && source[m.End] == ']') || + (midStr.EndsWith("{") && source[m.End] == '}') || + (midStr.EndsWith("<") && source[m.End] == '>'))) + { + maxlen = m.End - firstIndex + 1; + } } } } @@ -153,6 +244,7 @@ public List Extract(string source) if (maxlen != 0) { var substr = source.Substring(start, length + maxlen); + var er = new ExtractResult { Start = start, @@ -161,7 +253,7 @@ public List Extract(string source) Type = this.config.ExtractType, }; - if (prefixUnit != null) + if (prefixUnit != null && !CheckExtractorType(Constants.SYS_UNIT_CURRENCY)) { prefixMatched = true; er.Start -= prefixUnit.Offset; @@ -170,8 +262,9 @@ public List Extract(string source) } // Relative position will be used in Parser - number.Start = start - er.Start; - er.Data = number; + var numberData = number.Clone(); + numberData.Start = start - er.Start; + er.Data = numberData; // Special treatment, handle cases like '2:00 pm', '00 pm' is not dimension var isNotUnit = false; @@ -198,10 +291,11 @@ public List Extract(string source) } result.Add(er); + unitIsPrefix.Add(false); } } - if (prefixUnit != null && !prefixMatched) + if (prefixUnit != null && !prefixMatched && !CheckExtractorType(Constants.SYS_UNIT_CURRENCY)) { var er = new ExtractResult { @@ -212,12 +306,18 @@ public List Extract(string source) }; // Relative position will be used in Parser - number.Start = start - er.Start; - er.Data = number; + var numberData = number.Clone(); + numberData.Start = start - er.Start; + er.Data = numberData; + result.Add(er); } } } + else + { + numbers = null; + } // Extract Separate unit if (separateRegex != null) @@ -229,10 +329,47 @@ public List Extract(string source) ExtractSeparateUnits(source, result, nonUnitMatches); - // Remove common ambiguous cases - result = FilterAmbiguity(result, source); } + // Remove common ambiguous cases + result = FilterAmbiguity(result, source); + + // Remove entity-specific ambiguous cases + if (CheckExtractorType(Constants.SYS_UNIT_TEMPERATURE)) + { + result = FilterAmbiguity(result, source, this.config.TemperatureAmbiguityFiltersDict); + } + else if (CheckExtractorType(Constants.SYS_UNIT_DIMENSION)) + { + result = FilterAmbiguity(result, source, this.config.DimensionAmbiguityFiltersDict); + + // Only compound those dimensions that set within the LengthUnitToSubUnitMap, for now, it supports compound with foot and inch. + if (this.config as English.DimensionExtractorConfiguration != null + && suffixMatches.Count > 0 + && result != null + && result.Count >= 2) + { + var compoundUnit = English.DimensionExtractorConfiguration.DimensionSuffixList + .Where(kvp => English.DimensionExtractorConfiguration.LengthUnitToSubUnitMap.ContainsKey(kvp.Key)) + .Where(kvp => kvp.Value.Split('|').Contains(suffixMatches[0].Text)) + .Select(kvp => kvp) + .ToList(); + + if (compoundUnit.Any()) + { + result = MergeCompoundUnits(result, source); + } + } + } + + if (CheckExtractorType(Constants.SYS_UNIT_CURRENCY)) + { + result = SelectCandidates(source, result, unitIsPrefix); + } + + // Expand Chinese phrase to the `half` patterns when it follows closely origin phrase. + this.config.ExpandHalfSuffix(source, ref result, numbers); + return result; } @@ -244,6 +381,7 @@ public void ExtractSeparateUnits(string source, List numDependRes { int start = numDependResult.Start.Value; int i = 0; + do { matchResult[start + i++] = true; @@ -331,7 +469,7 @@ protected HashSet BuildRegexFromSet(IEnumerable collection, bool var pattern = $@"{this.config.BuildPrefix}({string.Join("|", regexTokens)}){this.config.BuildSuffix}"; var options = RegexOptions.Singleline | RegexOptions.ExplicitCapture | (ignoreCase ? RegexOptions.IgnoreCase : RegexOptions.None); - var regex = new Regex(pattern, options); + var regex = new Regex(pattern, options, RegexTimeOut); regexes.Add(regex); } @@ -392,7 +530,7 @@ protected Regex BuildSeparateRegexFromSet(bool ignoreCase = false) var pattern = $@"{this.config.BuildPrefix}({string.Join("|", regexTokens)}){this.config.BuildSuffix}"; var options = RegexOptions.Singleline | RegexOptions.ExplicitCapture | (ignoreCase ? RegexOptions.IgnoreCase : RegexOptions.None); - var regex = new Regex(pattern, options); + var regex = new Regex(pattern, options, RegexTimeOut); return regex; } @@ -407,25 +545,323 @@ private static bool IsMatchOverlap(Match match, Match nonUnitMatch) return isSubMatch; } - private List FilterAmbiguity(List extractResults, string text) + private List FilterAmbiguity(List extractResults, string text, Dictionary ambiguityFiltersDict = null) { - if (this.config.AmbiguityFiltersDict != null) + // If no filter is specified, use common AmbiguityFilter + if (ambiguityFiltersDict == null) + { + ambiguityFiltersDict = this.config.AmbiguityFiltersDict; + } + + if (ambiguityFiltersDict != null) { - foreach (var regex in this.config.AmbiguityFiltersDict) + foreach (var regex in ambiguityFiltersDict) { foreach (var extractResult in extractResults) { if (regex.Key.IsMatch(extractResult.Text)) { var matches = regex.Value.Matches(text).Cast(); - extractResults = extractResults.Where(er => !matches.Any(m => m.Index < er.Start + er.Length && m.Index + m.Length > er.Start)) - .ToList(); + extractResults = extractResults.Where(er => + !matches.Any(m => m.Index < er.Start + er.Length && + m.Index + m.Length > er.Start)).ToList(); } } } } + // Filter single-char units if not exact match + extractResults = extractResults.Where(er => !(er.Length != text.Length && singleCharUnitRegex.IsMatch(er.Text))).ToList(); + return extractResults; } + + /// + /// Merge compound units when extracting, like compound 5 foot 3 inch as one entity. + /// + /// Extract results. + /// Input text. + /// The compounded units. + private List MergeCompoundUnits(List ers, string source) + { + var result = new List(); + + MergePureNumber(source, ers); + + if (ers.Count == 0) + { + return result; + } + + var groups = new int[ers.Count]; + groups[0] = 0; + + for (var idx = 0; idx < ers.Count - 1; idx++) + { + if (ers[idx].Type != ers[idx + 1].Type && + !ers[idx].Type.Equals(Constants.SYS_NUM, StringComparison.Ordinal) && + !ers[idx + 1].Type.Equals(Constants.SYS_NUM, StringComparison.Ordinal)) + { + continue; + } + + if (ers[idx].Data is ExtractResult er && + !er.Data.ToString().StartsWith(Number.Constants.INTEGER_PREFIX, StringComparison.Ordinal)) + { + groups[idx + 1] = groups[idx] + 1; + continue; + } + + var middleBegin = ers[idx].Start + ers[idx].Length ?? 0; + var middleEnd = ers[idx + 1].Start ?? 0; + var length = middleEnd - middleBegin; + + if (length < 0) + { + continue; + } + + var middleStr = source.Substring(middleBegin, length).Trim(); + + // Separated by whitespace + if (string.IsNullOrEmpty(middleStr)) + { + groups[idx + 1] = groups[idx]; + continue; + } + + // Separated by connectors + var match = config.CompoundUnitConnectorRegex.Match(middleStr); + if (match.Success && match.Index == 0 && match.Length == middleStr.Length) + { + groups[idx + 1] = groups[idx]; + } + else + { + groups[idx + 1] = groups[idx] + 1; + } + } + + for (var idx = 0; idx < ers.Count; idx++) + { + if (idx == 0 || groups[idx] != groups[idx - 1]) + { + var tmpExtractResult = ers[idx].Clone(); + + tmpExtractResult.Data = new List + { + new ExtractResult + { + Data = ers[idx].Data, + Length = ers[idx].Length, + Start = ers[idx].Start, + Text = ers[idx].Text, + Type = ers[idx].Type, + }, + }; + + result.Add(tmpExtractResult); + } + + // Reduce extract results in same group + if (idx + 1 < ers.Count && groups[idx + 1] == groups[idx]) + { + var group = groups[idx]; + + var periodBegin = result[group].Start ?? 0; + var periodEnd = (ers[idx + 1].Start ?? 0) + (ers[idx + 1].Length ?? 0); + + result[group].Length = periodEnd - periodBegin; + result[group].Text = source.Substring(periodBegin, periodEnd - periodBegin); + result[group].Type = Constants.SYS_UNIT_CURRENCY; + (result[group].Data as List)?.Add(ers[idx + 1]); + } + } + + for (var idx = 0; idx < result.Count; idx++) + { + var innerData = result[idx].Data as List; + if (innerData?.Count == 1) + { + result[idx] = innerData[0]; + } + } + + result.RemoveAll(o => o.Type == Constants.SYS_NUM); + + return result; + } + + private void MergePureNumber(string source, List ers) + { + var numErs = config.UnitNumExtractor.Extract(source); + + var unitNumbers = new List(); + for (int i = 0, j = 0; i < numErs.Count; i++) + { + bool hasBehindExtraction = false; + while (j < ers.Count && ers[j].Start + ers[j].Length < numErs[i].Start) + { + hasBehindExtraction = true; + j++; + } + + if (!hasBehindExtraction) + { + continue; + } + + // Filter cases like "1 dollars 11a", "11" is not the fraction here. + if (source.Length > numErs[i].Start + numErs[i].Length) + { + var endChar = source.Substring(numErs[i].Length + numErs[i].Start ?? 0, 1); + if (char.IsLetter(endChar[0]) && !SimpleTokenizer.IsCjk(endChar[0])) + { + continue; + } + } + + var middleBegin = ers[j - 1].Start + ers[j - 1].Length ?? 0; + var middleEnd = numErs[i].Start ?? 0; + + var middleStr = source.Substring(middleBegin, middleEnd - middleBegin).Trim(); + + // Separated by whitespace + if (string.IsNullOrEmpty(middleStr)) + { + unitNumbers.Add(numErs[i]); + continue; + } + + // Separated by connectors + var match = config.CompoundUnitConnectorRegex.Match(middleStr); + if (match.Success && match.Index == 0 && match.Length == middleStr.Length) + { + unitNumbers.Add(numErs[i]); + } + } + + foreach (var extractResult in unitNumbers) + { + var overlap = false; + foreach (var er in ers) + { + if (er.Start <= extractResult.Start && er.Start + er.Length >= extractResult.Start) + { + overlap = true; + } + } + + if (!overlap) + { + ers.Add(extractResult); + } + } + + ers.Sort((x, y) => x.Start - y.Start ?? 0); + } + + private bool CheckExtractorType(string extractorType) + { + return this.config.ExtractType.Equals(extractorType, StringComparison.Ordinal); + } + + private List SelectCandidates(string source, List extractResults, List unitIsPrefix) + { + int totalCandidate = unitIsPrefix.Count; + bool haveConflict = false; + for (var index = 1; index < totalCandidate; index++) + { + if (extractResults[index - 1].Start + extractResults[index - 1].Length > extractResults[index].Start) + { + haveConflict = true; + } + } + + if (!haveConflict) + { + return extractResults; + } + + var prefixResult = new List(); + var suffixResult = new List(); + int currentEnd = -1; + for (var index = 0; index < totalCandidate; index++) + { + if (currentEnd < extractResults[index].Start) + { + currentEnd = (int)(extractResults[index].Start + extractResults[index].Length); + prefixResult.Add(extractResults[index]); + } + else + { + if (unitIsPrefix[index]) + { + prefixResult.RemoveAt(prefixResult.Count - 1); + currentEnd = (int)(extractResults[index].Start + extractResults[index].Length); + prefixResult.Add(extractResults[index]); + } + } + } + + currentEnd = source.Length; + for (var index = totalCandidate - 1; index >= 0; index--) + { + if (currentEnd >= extractResults[index].Start + extractResults[index].Length) + { + currentEnd = (int)extractResults[index].Start; + suffixResult.Add(extractResults[index]); + } + else + { + if (!unitIsPrefix[index]) + { + suffixResult.RemoveAt(suffixResult.Count - 1); + currentEnd = (int)extractResults[index].Start; + suffixResult.Add(extractResults[index]); + } + } + } + + // Find prefix units with no space, e.g. '$50'. + var noSpaceUnits = new List(); + foreach (var prefix in prefixResult) + { + if (prefix.Data is ExtractResult numberResult) + { + var unitStr = prefix.Text.Substring(0, (int)numberResult.Start); + if (unitStr.Length > 0 && unitStr.Equals(unitStr.TrimEnd(), StringComparison.Ordinal)) + { + noSpaceUnits.Add(new Token((int)prefix.Start, unitStr.Length)); + } + } + } + + // Remove from suffixResult units that are also prefix units with no space, + // e.g. in '1 $50', '$' should not be considered a suffix unit. + for (var index = suffixResult.Count - 1; index >= 0; index--) + { + var suffix = suffixResult[index]; + if (noSpaceUnits.Any(o => suffix.Start <= o.Start && suffix.Start + suffix.Length >= o.End)) + { + suffixResult.RemoveAt(index); + } + } + + // Add Separate unit + for (var index = totalCandidate; index < extractResults.Count; index++) + { + prefixResult.Add(extractResults[index]); + suffixResult.Add(extractResults[index]); + } + + if (suffixResult.Count >= prefixResult.Count) + { + suffixResult.Sort((x, y) => x.Start - y.Start ?? 0); + return suffixResult; + } + + return prefixResult; + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/PrefixUnitResult.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/PrefixUnitResult.cs index 858bd899bb..9081837e25 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/PrefixUnitResult.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/PrefixUnitResult.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.NumberWithUnit +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.NumberWithUnit { public class PrefixUnitResult { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/AgeExtractorConfiguration.cs index bbf44fa519..8e6e8eddca 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/AgeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/AgeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.French; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/AngleExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/AngleExtractorConfiguration.cs new file mode 100644 index 0000000000..5b78360c10 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/AngleExtractorConfiguration.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.French; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.French +{ + public class AngleExtractorConfiguration : FrenchNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary AngleSuffixList = + NumbersWithUnitDefinitions.AngleSuffixList.ToImmutableDictionary(); + + public static readonly ImmutableList AmbiguousUnits = + NumbersWithUnitDefinitions.AmbiguousAngleUnitList.ToImmutableList(); + + public AngleExtractorConfiguration() + : this(new CultureInfo(Culture.French)) + { + } + + public AngleExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => AngleSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; + + public override string ExtractType => Constants.SYS_UNIT_ANGLE; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/AreaExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/AreaExtractorConfiguration.cs index f91037515a..fb7927f8ed 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/AreaExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/AreaExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.French; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/CurrencyExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/CurrencyExtractorConfiguration.cs index 59ffb87eaf..034ad0fc5e 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/CurrencyExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/CurrencyExtractorConfiguration.cs @@ -1,5 +1,11 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; using System.Globalization; +using System.Linq; using Microsoft.Recognizers.Definitions.French; @@ -10,8 +16,25 @@ public class CurrencyExtractorConfiguration : FrenchNumberWithUnitExtractorConfi public static readonly ImmutableDictionary CurrencySuffixList = NumbersWithUnitDefinitions.CurrencySuffixList.ToImmutableDictionary(); - public static readonly ImmutableDictionary CurrencyPrefixList = - NumbersWithUnitDefinitions.CurrencyPrefixList.ToImmutableDictionary(); + // CurrencyNameToIsoCodeMap dictionary (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary IsoCodeDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture)); + + // CurrencyNameToIsoCodeMap followed by '$' symbol (e.g. 'AUD$') + public static readonly Dictionary IsoCodeWithSymbolDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture) + "$"); + + // Merge IsoCodeDict and IsoCodeWithSymbolDict + public static readonly Dictionary IsoCodeCombinedDict = IsoCodeDict.Concat(IsoCodeWithSymbolDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + // Merge IsoCodeCombinedDict with CurrencyPrefixList (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary CurrencyPrefixDict = NumbersWithUnitDefinitions.CurrencyPrefixList.Concat(IsoCodeCombinedDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + public static readonly ImmutableDictionary CurrencyPrefixList = CurrencyPrefixDict.ToImmutableDictionary(); private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousCurrencyUnitList.ToImmutableList(); diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/DimensionExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/DimensionExtractorConfiguration.cs index 9f48e32d35..539b7595f9 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/DimensionExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/DimensionExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Linq; @@ -15,10 +18,26 @@ public class DimensionExtractorConfiguration : FrenchNumberWithUnitExtractorConf .Concat(SpeedExtractorConfiguration.SpeedSuffixList) .Concat(VolumeExtractorConfiguration.VolumeSuffixList) .Concat(WeightExtractorConfiguration.WeightSuffixList) + .Concat(AngleExtractorConfiguration.AngleSuffixList) + .ToImmutableDictionary(x => x.Key, x => x.Value); + + public static readonly ImmutableDictionary DimensionTypeList = + NumbersWithUnitDefinitions.InformationSuffixList.ToDictionary(x => x.Key, x => Constants.INFORMATION) + .Concat(AreaExtractorConfiguration.AreaSuffixList.ToDictionary(x => x.Key, x => Constants.AREA)) + .Concat(LengthExtractorConfiguration.LengthSuffixList.ToDictionary(x => x.Key, x => Constants.LENGTH)) + .Concat(SpeedExtractorConfiguration.SpeedSuffixList.ToDictionary(x => x.Key, x => Constants.SPEED)) + .Concat(VolumeExtractorConfiguration.VolumeSuffixList.ToDictionary(x => x.Key, x => Constants.VOLUME)) + .Concat(WeightExtractorConfiguration.WeightSuffixList.ToDictionary(x => x.Key, x => Constants.WEIGHT)) + .Concat(AngleExtractorConfiguration.AngleSuffixList.ToDictionary(x => x.Key, x => Constants.ANGLE)) .ToImmutableDictionary(x => x.Key, x => x.Value); private static readonly ImmutableList AmbiguousValues = - NumbersWithUnitDefinitions.AmbiguousDimensionUnitList.ToImmutableList(); + NumbersWithUnitDefinitions.AmbiguousDimensionUnitList + .Concat(VolumeExtractorConfiguration.AmbiguousValues) + .Concat(WeightExtractorConfiguration.AmbiguousValues) + .Concat(AngleExtractorConfiguration.AmbiguousUnits) + .Distinct() + .ToImmutableList(); public DimensionExtractorConfiguration() : base(new CultureInfo(Culture.French)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/FrenchNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/FrenchNumberWithUnitExtractorConfiguration.cs index 93d9009f72..cf85240e7e 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/FrenchNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/FrenchNumberWithUnitExtractorConfiguration.cs @@ -1,6 +1,9 @@ -using System.Collections.Generic; -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; @@ -11,51 +14,33 @@ namespace Microsoft.Recognizers.Text.NumberWithUnit.French { - public abstract class FrenchNumberWithUnitExtractorConfiguration : INumberWithUnitExtractorConfiguration + public abstract class FrenchNumberWithUnitExtractorConfiguration : BaseNumberWithUnitExtractorConfiguration { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly Regex CompoundUnitConnRegex = - new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexFlags); - - private static readonly Regex NonUnitsRegex = - new Regex(BaseUnits.PmNonUnitRegex, RegexFlags); - protected FrenchNumberWithUnitExtractorConfiguration(CultureInfo ci) + : base( + NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, + BaseUnits.PmNonUnitRegex, + string.Empty, + RegexFlags) { this.CultureInfo = ci; - this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); + + var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit); + this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig); + this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); + TemperatureAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.TemperatureAmbiguityFiltersDict); + DimensionAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.DimensionAmbiguityFiltersDict); } - public abstract string ExtractType { get; } - - public CultureInfo CultureInfo { get; } - - public IExtractor UnitNumExtractor { get; } - - public string BuildPrefix { get; } - - public string BuildSuffix { get; } - - public string ConnectorToken { get; } - - public Regex CompoundUnitConnectorRegex => CompoundUnitConnRegex; - - public Regex NonUnitRegex => NonUnitsRegex; - - public virtual Regex AmbiguousUnitNumberMultiplierRegex => null; - - public Dictionary AmbiguityFiltersDict { get; } - - public abstract ImmutableDictionary SuffixList { get; } - - public abstract ImmutableDictionary PrefixList { get; } - - public abstract ImmutableList AmbiguousUnitList { get; } + public override void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers) + { + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/LengthExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/LengthExtractorConfiguration.cs index 600c2418b0..cc5a0f5f4d 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/LengthExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/LengthExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.French; @@ -10,7 +13,7 @@ public class LengthExtractorConfiguration : FrenchNumberWithUnitExtractorConfigu public static readonly ImmutableDictionary LengthSuffixList = NumbersWithUnitDefinitions.LengthSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousLengthUnitList.ToImmutableList(); public LengthExtractorConfiguration() diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/SpeedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/SpeedExtractorConfiguration.cs index 7235493102..a9ccb7f96e 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/SpeedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/SpeedExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.French; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/TemperatureExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/TemperatureExtractorConfiguration.cs index 38ddd9c42e..f8f5aa3e1e 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/TemperatureExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/TemperatureExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; @@ -15,7 +18,7 @@ public class TemperatureExtractorConfiguration : FrenchNumberWithUnitExtractorCo private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex AmbiguousUnitMultiplierRegex = - new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags); + new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags, RegexTimeOut); public TemperatureExtractorConfiguration() : this(new CultureInfo(Culture.French)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/VolumeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/VolumeExtractorConfiguration.cs index 3d50801698..6a8f32d0c0 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/VolumeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/VolumeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.French; @@ -10,7 +13,7 @@ public class VolumeExtractorConfiguration : FrenchNumberWithUnitExtractorConfigu public static readonly ImmutableDictionary VolumeSuffixList = NumbersWithUnitDefinitions.VolumeSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousVolumeUnitList.ToImmutableList(); public VolumeExtractorConfiguration() diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/WeightExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/WeightExtractorConfiguration.cs index a6193d2673..d46f8eadb2 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/WeightExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/WeightExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.French; @@ -10,7 +13,7 @@ public class WeightExtractorConfiguration : FrenchNumberWithUnitExtractorConfigu public static readonly ImmutableDictionary WeightSuffixList = NumbersWithUnitDefinitions.WeightSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousWeightUnitList.ToImmutableList(); public WeightExtractorConfiguration() diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/AgeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/AgeParserConfiguration.cs index 3ba9ddac2e..d355056e57 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/AgeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/AgeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.French { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/AreaParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/AreaParserConfiguration.cs index e176a65be2..4e031754d2 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/AreaParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/AreaParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.French { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/CurrencyParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/CurrencyParserConfiguration.cs index be8f27bf07..aef3cbdc89 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/CurrencyParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/CurrencyParserConfiguration.cs @@ -1,4 +1,9 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; +using Microsoft.Recognizers.Definitions.French; namespace Microsoft.Recognizers.Text.NumberWithUnit.French { @@ -14,6 +19,8 @@ public CurrencyParserConfiguration(CultureInfo ci) { this.BindDictionary(CurrencyExtractorConfiguration.CurrencySuffixList); this.BindDictionary(CurrencyExtractorConfiguration.CurrencyPrefixList); + this.CurrencyNameToIsoCodeMap = NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.ToImmutableDictionary(); + this.CurrencyFractionCodeList = NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary(); } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/DimensionParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/DimensionParserConfiguration.cs index c2eee53410..5e87392d39 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/DimensionParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/DimensionParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.French { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/FrenchNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/FrenchNumberWithUnitParserConfiguration.cs index ff92af09b3..3fa59c8868 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/FrenchNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/FrenchNumberWithUnitParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; using Microsoft.Recognizers.Definitions.French; using Microsoft.Recognizers.Text.Number; @@ -11,10 +15,17 @@ public class FrenchNumberWithUnitParserConfiguration : BaseNumberWithUnitParserC public FrenchNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { - this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new FrenchNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); - this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; + + var numConfig = new BaseNumberOptionsConfiguration(Culture.French, NumberOptions.None); + + this.InternalNumberExtractor = NumberExtractor.GetInstance(numConfig); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new FrenchNumberParserConfiguration(numConfig)); + + // A space is added to the token to avoid interpreting part of a unit as a connector (e.g. 'de' in 'degrés') + this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken + " "; + + this.TypeList = DimensionExtractorConfiguration.DimensionTypeList; } public override IParser InternalNumberParser { get; } @@ -22,5 +33,7 @@ public FrenchNumberWithUnitParserConfiguration(CultureInfo ci) public override IExtractor InternalNumberExtractor { get; } public override string ConnectorToken { get; } + + public override IDictionary TypeList { get; set; } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/LengthParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/LengthParserConfiguration.cs index 3563864990..07bc634f94 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/LengthParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/LengthParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.French { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/SpeedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/SpeedParserConfiguration.cs index fc999fd088..27b166b7dc 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/SpeedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/SpeedParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.French { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/TemperatureParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/TemperatureParserConfiguration.cs index cc5ae26200..8934760b70 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/TemperatureParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/TemperatureParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.French { @@ -14,7 +17,5 @@ public TemperatureParserConfiguration(CultureInfo ci) { this.BindDictionary(TemperatureExtractorConfiguration.TemperatureSuffixList); } - - public override string ConnectorToken => null; } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/VolumeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/VolumeParserConfiguration.cs index e878c2ce50..ea40576b75 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/VolumeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/VolumeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.French { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/WeightParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/WeightParserConfiguration.cs index d1050f0066..f473d60168 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/WeightParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/WeightParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.French { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/AgeExtractorConfiguration.cs index 86bdd028b8..e2c88383a5 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/AgeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/AgeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.German; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/AngleExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/AngleExtractorConfiguration.cs new file mode 100644 index 0000000000..83812b7918 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/AngleExtractorConfiguration.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.German; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.German +{ + public class AngleExtractorConfiguration : GermanNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary AngleSuffixList = + NumbersWithUnitDefinitions.AngleSuffixList.ToImmutableDictionary(); + + public static readonly ImmutableList AmbiguousUnits = + NumbersWithUnitDefinitions.AmbiguousAngleUnitList.ToImmutableList(); + + public AngleExtractorConfiguration() + : this(new CultureInfo(Culture.German)) + { + } + + public AngleExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => AngleSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; + + public override string ExtractType => Constants.SYS_UNIT_ANGLE; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/AreaExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/AreaExtractorConfiguration.cs index a1b401712f..1317dca3cd 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/AreaExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/AreaExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.German; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/CurrencyExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/CurrencyExtractorConfiguration.cs index bcea19b071..13f6c9f148 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/CurrencyExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/CurrencyExtractorConfiguration.cs @@ -1,5 +1,11 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; using System.Globalization; +using System.Linq; using Microsoft.Recognizers.Definitions.German; @@ -7,7 +13,25 @@ namespace Microsoft.Recognizers.Text.NumberWithUnit.German { public class CurrencyExtractorConfiguration : GermanNumberWithUnitExtractorConfiguration { - public static readonly ImmutableDictionary CurrencyPrefixList = NumbersWithUnitDefinitions.CurrencyPrefixList.ToImmutableDictionary(); + // CurrencyNameToIsoCodeMap dictionary (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary IsoCodeDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture)); + + // CurrencyNameToIsoCodeMap followed by '$' symbol (e.g. 'AUD$') + public static readonly Dictionary IsoCodeWithSymbolDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture) + "$"); + + // Merge IsoCodeDict and IsoCodeWithSymbolDict + public static readonly Dictionary IsoCodeCombinedDict = IsoCodeDict.Concat(IsoCodeWithSymbolDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + // Merge IsoCodeCombinedDict with CurrencyPrefixList (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary CurrencyPrefixDict = NumbersWithUnitDefinitions.CurrencyPrefixList.Concat(IsoCodeCombinedDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + public static readonly ImmutableDictionary CurrencyPrefixList = CurrencyPrefixDict.ToImmutableDictionary(); public static readonly ImmutableDictionary CurrencySuffixList = NumbersWithUnitDefinitions.CurrencySuffixList.ToImmutableDictionary(); diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/DimensionExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/DimensionExtractorConfiguration.cs index 667a520bef..b8082d2c23 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/DimensionExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/DimensionExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Linq; @@ -14,9 +17,27 @@ public class DimensionExtractorConfiguration : GermanNumberWithUnitExtractorConf .Concat(SpeedExtractorConfiguration.SpeedSuffixList) .Concat(VolumeExtractorConfiguration.VolumeSuffixList) .Concat(WeightExtractorConfiguration.WeightSuffixList) + .Concat(AngleExtractorConfiguration.AngleSuffixList) + .ToImmutableDictionary(x => x.Key, x => x.Value); + + public static readonly ImmutableDictionary DimensionTypeList = + NumbersWithUnitDefinitions.InformationSuffixList.ToDictionary(x => x.Key, x => Constants.INFORMATION) + .Concat(AreaExtractorConfiguration.AreaSuffixList.ToDictionary(x => x.Key, x => Constants.AREA)) + .Concat(LengthExtractorConfiguration.LengthSuffixList.ToDictionary(x => x.Key, x => Constants.LENGTH)) + .Concat(SpeedExtractorConfiguration.SpeedSuffixList.ToDictionary(x => x.Key, x => Constants.SPEED)) + .Concat(VolumeExtractorConfiguration.VolumeSuffixList.ToDictionary(x => x.Key, x => Constants.VOLUME)) + .Concat(WeightExtractorConfiguration.WeightSuffixList.ToDictionary(x => x.Key, x => Constants.WEIGHT)) + .Concat(AngleExtractorConfiguration.AngleSuffixList.ToDictionary(x => x.Key, x => Constants.ANGLE)) .ToImmutableDictionary(x => x.Key, x => x.Value); - private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousDimensionUnitList.ToImmutableList(); + private static readonly ImmutableList AmbiguousValues = + NumbersWithUnitDefinitions.AmbiguousDimensionUnitList + .Concat(LengthExtractorConfiguration.AmbiguousValues) + .Concat(VolumeExtractorConfiguration.AmbiguousValues) + .Concat(WeightExtractorConfiguration.AmbiguousValues) + .Concat(AngleExtractorConfiguration.AmbiguousUnits) + .Distinct() + .ToImmutableList(); public DimensionExtractorConfiguration() : base(new CultureInfo(Culture.German)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/GermanNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/GermanNumberWithUnitExtractorConfiguration.cs index 0659a563ad..2085e0138f 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/GermanNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/GermanNumberWithUnitExtractorConfiguration.cs @@ -1,58 +1,46 @@ -using System.Collections.Generic; -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.German; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.German; namespace Microsoft.Recognizers.Text.NumberWithUnit.German { - public abstract class GermanNumberWithUnitExtractorConfiguration : INumberWithUnitExtractorConfiguration + public abstract class GermanNumberWithUnitExtractorConfiguration : BaseNumberWithUnitExtractorConfiguration { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly Regex CompoundUnitConnRegex = - new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexFlags); - - private static readonly Regex NonUnitsRegex = - new Regex(BaseUnits.PmNonUnitRegex, RegexFlags); - protected GermanNumberWithUnitExtractorConfiguration(CultureInfo ci) + : base( + NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, + BaseUnits.PmNonUnitRegex, + string.Empty, + RegexFlags) { this.CultureInfo = ci; - this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); - this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; - this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; - this.ConnectorToken = string.Empty; - } - - public abstract string ExtractType { get; } - - public CultureInfo CultureInfo { get; } - - public IExtractor UnitNumExtractor { get; } - - public string BuildPrefix { get; } - public string BuildSuffix { get; } + var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit); + this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig); - public string ConnectorToken { get; } - - public Regex CompoundUnitConnectorRegex => CompoundUnitConnRegex; - - public Regex NonUnitRegex => NonUnitsRegex; - - public virtual Regex AmbiguousUnitNumberMultiplierRegex => null; - - public Dictionary AmbiguityFiltersDict { get; } = null; - - public abstract ImmutableDictionary SuffixList { get; } + this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; + this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; + this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; - public abstract ImmutableDictionary PrefixList { get; } + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); + TemperatureAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.TemperatureAmbiguityFiltersDict); + DimensionAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.DimensionAmbiguityFiltersDict); + } - public abstract ImmutableList AmbiguousUnitList { get; } + public override void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers) + { + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/LengthExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/LengthExtractorConfiguration.cs index 300e06fba1..e380c54fe4 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/LengthExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/LengthExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.German; @@ -9,7 +12,7 @@ public class LengthExtractorConfiguration : GermanNumberWithUnitExtractorConfigu { public static readonly ImmutableDictionary LengthSuffixList = NumbersWithUnitDefinitions.LengthSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousLengthUnitList.ToImmutableList(); + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousLengthUnitList.ToImmutableList(); public LengthExtractorConfiguration() : base(new CultureInfo(Culture.German)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/SpeedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/SpeedExtractorConfiguration.cs index 37b368f998..e4bcfe30e1 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/SpeedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/SpeedExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.German; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/TemperatureExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/TemperatureExtractorConfiguration.cs index 821dc69e0e..1f347c9869 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/TemperatureExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/TemperatureExtractorConfiguration.cs @@ -18,7 +18,7 @@ public class TemperatureExtractorConfiguration : GermanNumberWithUnitExtractorCo NumbersWithUnitDefinitions.AmbiguousTemperatureUnitList.ToImmutableList(); private static readonly Regex AmbiguousUnitMultiplierRegex = - new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags); + new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags, RegexTimeOut); public TemperatureExtractorConfiguration() : this(new CultureInfo(Culture.German)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/VolumeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/VolumeExtractorConfiguration.cs index 0f26ae2935..ed49129da3 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/VolumeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/VolumeExtractorConfiguration.cs @@ -9,7 +9,7 @@ public class VolumeExtractorConfiguration : GermanNumberWithUnitExtractorConfigu { public static readonly ImmutableDictionary VolumeSuffixList = NumbersWithUnitDefinitions.VolumeSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousVolumeUnitList.ToImmutableList(); + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousVolumeUnitList.ToImmutableList(); public VolumeExtractorConfiguration() : this(new CultureInfo(Culture.German)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/WeightExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/WeightExtractorConfiguration.cs index 414b87f353..bfaf79f1e9 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/WeightExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/WeightExtractorConfiguration.cs @@ -9,7 +9,7 @@ public class WeightExtractorConfiguration : GermanNumberWithUnitExtractorConfigu { public static readonly ImmutableDictionary WeightSuffixList = NumbersWithUnitDefinitions.WeightSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousWeightUnitList.ToImmutableList(); + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousWeightUnitList.ToImmutableList(); public WeightExtractorConfiguration() : this(new CultureInfo(Culture.German)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/CurrencyParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/CurrencyParserConfiguration.cs index ff4b356990..5f0473fb66 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/CurrencyParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/CurrencyParserConfiguration.cs @@ -1,4 +1,9 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; +using Microsoft.Recognizers.Definitions.German; namespace Microsoft.Recognizers.Text.NumberWithUnit.German { @@ -14,6 +19,8 @@ public CurrencyParserConfiguration(CultureInfo ci) { this.BindDictionary(CurrencyExtractorConfiguration.CurrencySuffixList); this.BindDictionary(CurrencyExtractorConfiguration.CurrencyPrefixList); + this.CurrencyNameToIsoCodeMap = NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.ToImmutableDictionary(); + this.CurrencyFractionCodeList = NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary(); } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/DimensionParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/DimensionParserConfiguration.cs index b971c0caba..3a2e88255c 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/DimensionParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/DimensionParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.German { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/GermanNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/GermanNumberWithUnitParserConfiguration.cs index 13c1cbb261..c156f7af07 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/GermanNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/GermanNumberWithUnitParserConfiguration.cs @@ -1,4 +1,9 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; +using Microsoft.Recognizers.Definitions.German; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.German; @@ -9,10 +14,15 @@ public class GermanNumberWithUnitParserConfiguration : BaseNumberWithUnitParserC public GermanNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { - this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new GermanNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); - this.ConnectorToken = string.Empty; + + var numConfig = new BaseNumberOptionsConfiguration(Culture.German, NumberOptions.None); + + this.InternalNumberExtractor = NumberExtractor.GetInstance(numConfig); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new GermanNumberParserConfiguration(numConfig)); + this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; + + this.TypeList = DimensionExtractorConfiguration.DimensionTypeList; } public override IParser InternalNumberParser { get; } @@ -20,5 +30,7 @@ public GermanNumberWithUnitParserConfiguration(CultureInfo ci) public override IExtractor InternalNumberExtractor { get; } public override string ConnectorToken { get; } + + public override IDictionary TypeList { get; set; } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/LengthParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/LengthParserConfiguration.cs index baf4573b23..2837a61103 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/LengthParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/LengthParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.German { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/SpeedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/SpeedParserConfiguration.cs index 6737a43b5e..ac96c23fad 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/SpeedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/SpeedParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.German { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/TemperatureParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/TemperatureParserConfiguration.cs index 8b5175e015..9cd4462da7 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/TemperatureParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/TemperatureParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.German { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/VolumeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/VolumeParserConfiguration.cs index 23ca901a65..dca86333a4 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/VolumeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/VolumeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.German { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/WeightParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/WeightParserConfiguration.cs index 70c57b78ae..9d11064167 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/WeightParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/WeightParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.German { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/AgeExtractorConfiguration.cs index 0156806b4e..7f80d9af24 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/AgeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/AgeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Hindi; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/AreaExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/AreaExtractorConfiguration.cs index d26eb52596..49ece9d589 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/AreaExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/AreaExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Hindi; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/CurrencyExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/CurrencyExtractorConfiguration.cs index 672ce664a0..0b04947543 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/CurrencyExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/CurrencyExtractorConfiguration.cs @@ -1,5 +1,11 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; using System.Globalization; +using System.Linq; using Microsoft.Recognizers.Definitions.Hindi; @@ -10,8 +16,25 @@ public class CurrencyExtractorConfiguration : HindiNumberWithUnitExtractorConfig public static readonly ImmutableDictionary CurrencySuffixList = NumbersWithUnitDefinitions.CurrencySuffixList.ToImmutableDictionary(); - public static readonly ImmutableDictionary CurrencyPrefixList = - NumbersWithUnitDefinitions.CurrencyPrefixList.ToImmutableDictionary(); + // CurrencyNameToIsoCodeMap dictionary (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary IsoCodeDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture)); + + // CurrencyNameToIsoCodeMap followed by '$' symbol (e.g. 'AUD$') + public static readonly Dictionary IsoCodeWithSymbolDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture) + "$"); + + // Merge IsoCodeDict and IsoCodeWithSymbolDict + public static readonly Dictionary IsoCodeCombinedDict = IsoCodeDict.Concat(IsoCodeWithSymbolDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + // Merge IsoCodeCombinedDict with CurrencyPrefixList (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary CurrencyPrefixDict = NumbersWithUnitDefinitions.CurrencyPrefixList.Concat(IsoCodeCombinedDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + public static readonly ImmutableDictionary CurrencyPrefixList = CurrencyPrefixDict.ToImmutableDictionary(); public static readonly ImmutableDictionary FractionalUnitNameToCodeMap = NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary(); diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/DimensionExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/DimensionExtractorConfiguration.cs index 4862ac52f5..b5dcc5ba59 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/DimensionExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/DimensionExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Linq; @@ -17,6 +20,15 @@ public class DimensionExtractorConfiguration : HindiNumberWithUnitExtractorConfi .Concat(WeightExtractorConfiguration.WeightSuffixList) .ToImmutableDictionary(x => x.Key, x => x.Value); + public static readonly ImmutableDictionary DimensionTypeList = + NumbersWithUnitDefinitions.InformationSuffixList.ToDictionary(x => x.Key, x => Constants.INFORMATION) + .Concat(AreaExtractorConfiguration.AreaSuffixList.ToDictionary(x => x.Key, x => Constants.AREA)) + .Concat(LengthExtractorConfiguration.LengthSuffixList.ToDictionary(x => x.Key, x => Constants.LENGTH)) + .Concat(SpeedExtractorConfiguration.SpeedSuffixList.ToDictionary(x => x.Key, x => Constants.SPEED)) + .Concat(VolumeExtractorConfiguration.VolumeSuffixList.ToDictionary(x => x.Key, x => Constants.VOLUME)) + .Concat(WeightExtractorConfiguration.WeightSuffixList.ToDictionary(x => x.Key, x => Constants.WEIGHT)) + .ToImmutableDictionary(x => x.Key, x => x.Value); + private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousDimensionUnitList.ToImmutableList(); diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/HindiNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/HindiNumberWithUnitExtractorConfiguration.cs index 25cc300f07..27c09132a8 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/HindiNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/HindiNumberWithUnitExtractorConfiguration.cs @@ -1,6 +1,9 @@ -using System.Collections.Generic; -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; @@ -11,18 +14,17 @@ namespace Microsoft.Recognizers.Text.NumberWithUnit.Hindi { - public abstract class HindiNumberWithUnitExtractorConfiguration : INumberWithUnitExtractorConfiguration + public abstract class HindiNumberWithUnitExtractorConfiguration : BaseNumberWithUnitExtractorConfiguration { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly Regex CompoundUnitConnRegex = - new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexFlags); - - private static readonly Regex NonUnitsRegex = - new Regex(BaseUnits.PmNonUnitRegex, RegexFlags); - protected HindiNumberWithUnitExtractorConfiguration(CultureInfo ci) + : base( + NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, + BaseUnits.PmNonUnitRegex, + string.Empty, + RegexFlags) { this.CultureInfo = ci; this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); @@ -33,30 +35,8 @@ protected HindiNumberWithUnitExtractorConfiguration(CultureInfo ci) AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); } - public abstract string ExtractType { get; } - - public CultureInfo CultureInfo { get; } - - public IExtractor UnitNumExtractor { get; } - - public string BuildPrefix { get; } - - public string BuildSuffix { get; } - - public string ConnectorToken { get; } - - public Regex CompoundUnitConnectorRegex => CompoundUnitConnRegex; - - public Regex NonUnitRegex => NonUnitsRegex; - - public virtual Regex AmbiguousUnitNumberMultiplierRegex => null; - - public Dictionary AmbiguityFiltersDict { get; } = null; - - public abstract ImmutableDictionary SuffixList { get; } - - public abstract ImmutableDictionary PrefixList { get; } - - public abstract ImmutableList AmbiguousUnitList { get; } + public override void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers) + { + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/LengthExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/LengthExtractorConfiguration.cs index 9444fb60c5..74ef767c32 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/LengthExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/LengthExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Hindi; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/SpeedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/SpeedExtractorConfiguration.cs index 74d836ea0c..e5a74998ba 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/SpeedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/SpeedExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Hindi; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/TemperatureExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/TemperatureExtractorConfiguration.cs index 1ca977cd74..75d1d7a2f3 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/TemperatureExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/TemperatureExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; @@ -18,7 +21,7 @@ public class TemperatureExtractorConfiguration : HindiNumberWithUnitExtractorCon NumbersWithUnitDefinitions.AmbiguousTemperatureUnitList.ToImmutableList(); private static readonly Regex AmbiguousUnitMultiplierRegex = - new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags); + new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags, RegexTimeOut); public TemperatureExtractorConfiguration() : this(new CultureInfo(Culture.Hindi)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/VolumeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/VolumeExtractorConfiguration.cs index fc23d5849a..1684805184 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/VolumeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/VolumeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Hindi; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/WeightExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/WeightExtractorConfiguration.cs index 85e6bb7a7b..cb232f874b 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/WeightExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Extractors/WeightExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Hindi; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/AgeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/AgeParserConfiguration.cs index c9bafce1b3..0403e794af 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/AgeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/AgeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Hindi { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/AreaParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/AreaParserConfiguration.cs index 551d295c19..221b3c7601 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/AreaParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/AreaParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Hindi { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/CurrencyParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/CurrencyParserConfiguration.cs index 2e6dc70ec1..332192e97b 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/CurrencyParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/CurrencyParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Hindi; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/DimensionParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/DimensionParserConfiguration.cs index 594a909b27..911b4d4cdc 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/DimensionParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/DimensionParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Hindi { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/HindiNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/HindiNumberWithUnitParserConfiguration.cs index 6faf00a222..4d53b75f38 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/HindiNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/HindiNumberWithUnitParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Hindi; @@ -9,10 +13,14 @@ public class HindiNumberWithUnitParserConfiguration : BaseNumberWithUnitParserCo public HindiNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { + var numConfig = new BaseNumberOptionsConfiguration(Culture.Hindi, NumberOptions.None); + this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new HindiNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new HindiNumberParserConfiguration(numConfig)); this.ConnectorToken = string.Empty; + + this.TypeList = DimensionExtractorConfiguration.DimensionTypeList; } public override IParser InternalNumberParser { get; } @@ -20,5 +28,7 @@ public HindiNumberWithUnitParserConfiguration(CultureInfo ci) public override IExtractor InternalNumberExtractor { get; } public override string ConnectorToken { get; } + + public override IDictionary TypeList { get; set; } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/LengthParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/LengthParserConfiguration.cs index 71e90caeff..195ec5e7c8 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/LengthParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/LengthParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Hindi { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/SpeedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/SpeedParserConfiguration.cs index bbbcdc2d60..c03c7bc9ec 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/SpeedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/SpeedParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Hindi { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/TemperatureParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/TemperatureParserConfiguration.cs index 53eada03d6..b7e5fb7a81 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/TemperatureParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/TemperatureParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Hindi { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/VolumeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/VolumeParserConfiguration.cs index d6278289d3..cc96aa7d71 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/VolumeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/VolumeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Hindi { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/WeightParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/WeightParserConfiguration.cs index bfafb43dab..12840ae2c7 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/WeightParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/WeightParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Hindi { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/AgeExtractorConfiguration.cs index 858c0af0c7..1dece7e7b9 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/AgeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/AgeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Italian; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/AngleExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/AngleExtractorConfiguration.cs new file mode 100644 index 0000000000..75aadacfbc --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/AngleExtractorConfiguration.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Italian; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Italian +{ + public class AngleExtractorConfiguration : ItalianNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary AngleSuffixList = + NumbersWithUnitDefinitions.AngleSuffixList.ToImmutableDictionary(); + + public static readonly ImmutableList AmbiguousUnits = + NumbersWithUnitDefinitions.AmbiguousAngleUnitList.ToImmutableList(); + + public AngleExtractorConfiguration() + : this(new CultureInfo(Culture.Italian)) + { + } + + public AngleExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => AngleSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; + + public override string ExtractType => Constants.SYS_UNIT_ANGLE; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/AreaExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/AreaExtractorConfiguration.cs index 4cf897e358..d137f1ffde 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/AreaExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/AreaExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Italian; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/CurrencyExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/CurrencyExtractorConfiguration.cs index efbfc736d5..effca61b64 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/CurrencyExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/CurrencyExtractorConfiguration.cs @@ -1,5 +1,11 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; using System.Globalization; +using System.Linq; using Microsoft.Recognizers.Definitions.Italian; @@ -9,7 +15,25 @@ public class CurrencyExtractorConfiguration : ItalianNumberWithUnitExtractorConf { public static readonly ImmutableDictionary CurrencySuffixList = NumbersWithUnitDefinitions.CurrencySuffixList.ToImmutableDictionary(); - public static readonly ImmutableDictionary CurrencyPrefixList = NumbersWithUnitDefinitions.CurrencyPrefixList.ToImmutableDictionary(); + // CurrencyNameToIsoCodeMap dictionary (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary IsoCodeDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture)); + + // CurrencyNameToIsoCodeMap followed by '$' symbol (e.g. 'AUD$') + public static readonly Dictionary IsoCodeWithSymbolDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture) + "$"); + + // Merge IsoCodeDict and IsoCodeWithSymbolDict + public static readonly Dictionary IsoCodeCombinedDict = IsoCodeDict.Concat(IsoCodeWithSymbolDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + // Merge IsoCodeCombinedDict with CurrencyPrefixList (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary CurrencyPrefixDict = NumbersWithUnitDefinitions.CurrencyPrefixList.Concat(IsoCodeCombinedDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + public static readonly ImmutableDictionary CurrencyPrefixList = CurrencyPrefixDict.ToImmutableDictionary(); private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousCurrencyUnitList.ToImmutableList(); diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/DimensionExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/DimensionExtractorConfiguration.cs index 5f95886639..8c3ecb1e16 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/DimensionExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/DimensionExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Linq; @@ -14,9 +17,27 @@ public class DimensionExtractorConfiguration : ItalianNumberWithUnitExtractorCon .Concat(SpeedExtractorConfiguration.SpeedSuffixList) .Concat(VolumeExtractorConfiguration.VolumeSuffixList) .Concat(WeightExtractorConfiguration.WeightSuffixList) + .Concat(AngleExtractorConfiguration.AngleSuffixList) + .ToImmutableDictionary(x => x.Key, x => x.Value); + + public static readonly ImmutableDictionary DimensionTypeList = + NumbersWithUnitDefinitions.InformationSuffixList.ToDictionary(x => x.Key, x => Constants.INFORMATION) + .Concat(AreaExtractorConfiguration.AreaSuffixList.ToDictionary(x => x.Key, x => Constants.AREA)) + .Concat(LengthExtractorConfiguration.LengthSuffixList.ToDictionary(x => x.Key, x => Constants.LENGTH)) + .Concat(SpeedExtractorConfiguration.SpeedSuffixList.ToDictionary(x => x.Key, x => Constants.SPEED)) + .Concat(VolumeExtractorConfiguration.VolumeSuffixList.ToDictionary(x => x.Key, x => Constants.VOLUME)) + .Concat(WeightExtractorConfiguration.WeightSuffixList.ToDictionary(x => x.Key, x => Constants.WEIGHT)) + .Concat(AngleExtractorConfiguration.AngleSuffixList.ToDictionary(x => x.Key, x => Constants.ANGLE)) .ToImmutableDictionary(x => x.Key, x => x.Value); - private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousDimensionUnitList.ToImmutableList(); + private static readonly ImmutableList AmbiguousValues = + NumbersWithUnitDefinitions.AmbiguousDimensionUnitList + .Concat(LengthExtractorConfiguration.AmbiguousValues) + .Concat(VolumeExtractorConfiguration.AmbiguousValues) + .Concat(WeightExtractorConfiguration.AmbiguousValues) + .Concat(AngleExtractorConfiguration.AmbiguousUnits) + .Distinct() + .ToImmutableList(); public DimensionExtractorConfiguration() : base(new CultureInfo(Culture.Italian)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/ItalianNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/ItalianNumberWithUnitExtractorConfiguration.cs index 4e90406805..d5c9560406 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/ItalianNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/ItalianNumberWithUnitExtractorConfiguration.cs @@ -1,6 +1,9 @@ -using System.Collections.Generic; -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; @@ -11,18 +14,17 @@ namespace Microsoft.Recognizers.Text.NumberWithUnit.Italian { - public abstract class ItalianNumberWithUnitExtractorConfiguration : INumberWithUnitExtractorConfiguration + public abstract class ItalianNumberWithUnitExtractorConfiguration : BaseNumberWithUnitExtractorConfiguration { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly Regex CompoundUnitConnRegex = - new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexFlags); - - private static readonly Regex NonUnitsRegex = - new Regex(BaseUnits.PmNonUnitRegex, RegexFlags); - protected ItalianNumberWithUnitExtractorConfiguration(CultureInfo ci) + : base( + NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, + BaseUnits.PmNonUnitRegex, + string.Empty, + RegexFlags) { this.CultureInfo = ci; this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); @@ -31,32 +33,12 @@ protected ItalianNumberWithUnitExtractorConfiguration(CultureInfo ci) this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); + TemperatureAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.TemperatureAmbiguityFiltersDict); + DimensionAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.DimensionAmbiguityFiltersDict); } - public abstract string ExtractType { get; } - - public CultureInfo CultureInfo { get; } - - public IExtractor UnitNumExtractor { get; } - - public string BuildPrefix { get; } - - public string BuildSuffix { get; } - - public string ConnectorToken { get; } - - public Regex CompoundUnitConnectorRegex => CompoundUnitConnRegex; - - public Regex NonUnitRegex => NonUnitsRegex; - - public virtual Regex AmbiguousUnitNumberMultiplierRegex => null; - - public Dictionary AmbiguityFiltersDict { get; } = null; - - public abstract ImmutableDictionary SuffixList { get; } - - public abstract ImmutableDictionary PrefixList { get; } - - public abstract ImmutableList AmbiguousUnitList { get; } + public override void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers) + { + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/LengthExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/LengthExtractorConfiguration.cs index 3a2fe54c8b..c689889b2a 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/LengthExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/LengthExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Italian; @@ -9,7 +12,7 @@ public class LengthExtractorConfiguration : ItalianNumberWithUnitExtractorConfig { public static readonly ImmutableDictionary LengthSuffixList = NumbersWithUnitDefinitions.LengthSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousLengthUnitList.ToImmutableList(); + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousLengthUnitList.ToImmutableList(); public LengthExtractorConfiguration() : base(new CultureInfo(Culture.Italian)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/SpeedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/SpeedExtractorConfiguration.cs index 7d17bc829e..b9a07155bd 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/SpeedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/SpeedExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Italian; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/TemperatureExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/TemperatureExtractorConfiguration.cs index 0e8a1d271a..0f26ab17f7 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/TemperatureExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/TemperatureExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; @@ -15,7 +18,7 @@ public class TemperatureExtractorConfiguration : ItalianNumberWithUnitExtractorC private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex AmbiguousUnitMultiplierRegex = - new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags); + new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags, RegexTimeOut); public TemperatureExtractorConfiguration() : this(new CultureInfo(Culture.Italian)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/VolumeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/VolumeExtractorConfiguration.cs index b0ba2ec85b..5b3fa2007f 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/VolumeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/VolumeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Italian; @@ -9,7 +12,7 @@ public class VolumeExtractorConfiguration : ItalianNumberWithUnitExtractorConfig { public static readonly ImmutableDictionary VolumeSuffixList = NumbersWithUnitDefinitions.VolumeSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousVolumeUnitList.ToImmutableList(); + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousVolumeUnitList.ToImmutableList(); public VolumeExtractorConfiguration() : this(new CultureInfo(Culture.Italian)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/WeightExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/WeightExtractorConfiguration.cs index 97b6edf7e2..8d4371a555 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/WeightExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/WeightExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Italian; @@ -9,7 +12,7 @@ public class WeightExtractorConfiguration : ItalianNumberWithUnitExtractorConfig { public static readonly ImmutableDictionary WeightSuffixList = NumbersWithUnitDefinitions.WeightSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousWeightUnitList.ToImmutableList(); + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousWeightUnitList.ToImmutableList(); public WeightExtractorConfiguration() : this(new CultureInfo(Culture.Italian)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/AgeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/AgeParserConfiguration.cs index 327a28b2bb..6df7a3fb23 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/AgeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/AgeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Italian { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/AreaParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/AreaParserConfiguration.cs index 71d0fd99a9..c3d08f0c97 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/AreaParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/AreaParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Italian { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/CurrencyParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/CurrencyParserConfiguration.cs index 0398fde853..843253102a 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/CurrencyParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/CurrencyParserConfiguration.cs @@ -1,4 +1,9 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; +using Microsoft.Recognizers.Definitions.Italian; namespace Microsoft.Recognizers.Text.NumberWithUnit.Italian { @@ -14,6 +19,8 @@ public CurrencyParserConfiguration(CultureInfo ci) { this.BindDictionary(CurrencyExtractorConfiguration.CurrencySuffixList); this.BindDictionary(CurrencyExtractorConfiguration.CurrencyPrefixList); + this.CurrencyNameToIsoCodeMap = NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.ToImmutableDictionary(); + this.CurrencyFractionCodeList = NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary(); } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/DimensionParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/DimensionParserConfiguration.cs index f766d46b6d..b0b19cf068 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/DimensionParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/DimensionParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Italian { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/ItalianNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/ItalianNumberWithUnitParserConfiguration.cs index f78ac78286..01799c6b70 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/ItalianNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/ItalianNumberWithUnitParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; using Microsoft.Recognizers.Definitions.Italian; using Microsoft.Recognizers.Text.Number; @@ -11,10 +15,15 @@ public class ItalianNumberWithUnitParserConfiguration : BaseNumberWithUnitParser public ItalianNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { + + var numConfig = new BaseNumberOptionsConfiguration(Culture.Italian, NumberOptions.None); + this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new ItalianNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new ItalianNumberParserConfiguration(numConfig)); this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; + + this.TypeList = DimensionExtractorConfiguration.DimensionTypeList; } public override IParser InternalNumberParser { get; } @@ -22,5 +31,7 @@ public ItalianNumberWithUnitParserConfiguration(CultureInfo ci) public override IExtractor InternalNumberExtractor { get; } public override string ConnectorToken { get; } + + public override IDictionary TypeList { get; set; } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/LengthParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/LengthParserConfiguration.cs index d7bba6642b..385f20e006 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/LengthParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/LengthParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Italian { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/SpeedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/SpeedParserConfiguration.cs index 2cdca9ea0a..d9b94266a9 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/SpeedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/SpeedParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Italian { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/TemperatureParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/TemperatureParserConfiguration.cs index 90a686569a..8d804e573c 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/TemperatureParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/TemperatureParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Italian { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/VolumeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/VolumeParserConfiguration.cs index 3b3e6c2e79..d84b55c2f2 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/VolumeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/VolumeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Italian { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/WeightParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/WeightParserConfiguration.cs index ab1043589d..01a88db247 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/WeightParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/WeightParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Italian { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/AgeExtractorConfiguration.cs index d0c1da9244..14ad6417df 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/AgeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/AgeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Japanese; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/CurrencyExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/CurrencyExtractorConfiguration.cs index 811e5402ce..c7f8fa6f88 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/CurrencyExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/CurrencyExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Japanese; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/DimensionExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/DimensionExtractorConfiguration.cs new file mode 100644 index 0000000000..1e0b31119d --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/DimensionExtractorConfiguration.cs @@ -0,0 +1,31 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Japanese; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Japanese +{ + public class DimensionExtractorConfiguration : JapaneseNumberWithUnitExtractorConfiguration + { + public DimensionExtractorConfiguration() + : this(new CultureInfo(Culture.Japanese)) + { + } + + public DimensionExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => NumbersWithUnitDefinitions.DimensionSuffixList.ToImmutableDictionary(); + + public override ImmutableDictionary PrefixList => NumbersWithUnitDefinitions.DimensionPrefixList.ToImmutableDictionary(); + + public override ImmutableList AmbiguousUnitList => NumbersWithUnitDefinitions.DimensionAmbiguousValues.ToImmutableList(); + + public override string ExtractType => Constants.SYS_UNIT_DIMENSION; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/JapaneseNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/JapaneseNumberWithUnitExtractorConfiguration.cs index a65d88f62a..64a5fb2198 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/JapaneseNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/JapaneseNumberWithUnitExtractorConfiguration.cs @@ -1,12 +1,21 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; +using System.Linq; +using System.Reflection; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Definitions.Utilities; +using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Config; using Microsoft.Recognizers.Text.Number.Japanese; +using Microsoft.Recognizers.Text.NumberWithUnit.Utilities; namespace Microsoft.Recognizers.Text.NumberWithUnit.Japanese { @@ -21,13 +30,21 @@ public abstract class JapaneseNumberWithUnitExtractorConfiguration : INumberWith private static readonly Regex NonUnitsRegex = new Regex(BaseUnits.PmNonUnitRegex, RegexFlags); + private static readonly Regex HalfUnitRegex = new Regex(NumbersWithUnitDefinitions.HalfUnitRegex, RegexFlags); + protected JapaneseNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; - this.UnitNumExtractor = new NumberExtractor(CJKNumberExtractorMode.ExtractAll); + + var numConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None); + + this.UnitNumExtractor = new NumberExtractor(numConfig, CJKNumberExtractorMode.ExtractAll); + this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; + + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); } public abstract string ExtractType { get; } @@ -48,14 +65,28 @@ protected JapaneseNumberWithUnitExtractorConfiguration(CultureInfo ci) public virtual Regex AmbiguousUnitNumberMultiplierRegex => null; + public Regex MultiplierRegex => null; + public IExtractor IntegerExtractor { get; } public Dictionary AmbiguityFiltersDict { get; } = null; + public Dictionary TemperatureAmbiguityFiltersDict { get; } = null; + + public Dictionary DimensionAmbiguityFiltersDict { get; } = null; + public abstract ImmutableDictionary SuffixList { get; } public abstract ImmutableDictionary PrefixList { get; } public abstract ImmutableList AmbiguousUnitList { get; } + + protected static TimeSpan RegexTimeOut => NumberWithUnitRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + + public void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers) + { + // Expand Japanese phrase to the `half` patterns when it follows closely origin phrase. + CommonUtils.ExpandHalfSuffix(source, ref result, numbers, HalfUnitRegex); + } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/TemperatureExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/TemperatureExtractorConfiguration.cs new file mode 100644 index 0000000000..cd63352d4c --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/TemperatureExtractorConfiguration.cs @@ -0,0 +1,44 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Japanese; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Japanese +{ + public class TemperatureExtractorConfiguration : JapaneseNumberWithUnitExtractorConfiguration + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex AmbiguousUnitMultiplierRegex = + new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags, RegexTimeOut); + + public TemperatureExtractorConfiguration() + : this(new CultureInfo(Culture.Japanese)) + { + } + + public TemperatureExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => + NumbersWithUnitDefinitions.TemperatureSuffixList.ToImmutableDictionary(); + + public override ImmutableDictionary PrefixList => + NumbersWithUnitDefinitions.TemperaturePrefixList.ToImmutableDictionary(); + + public override ImmutableList AmbiguousUnitList => + NumbersWithUnitDefinitions.TemperatureAmbiguousValues.ToImmutableList(); + + public override string ExtractType => Constants.SYS_UNIT_TEMPERATURE; + + public override Regex AmbiguousUnitNumberMultiplierRegex => AmbiguousUnitMultiplierRegex; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/AgeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/AgeParserConfiguration.cs index 3fbee9c090..73c6418cd1 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/AgeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/AgeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; using Microsoft.Recognizers.Definitions.Japanese; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/CurrencyParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/CurrencyParserConfiguration.cs index 4f532c001e..377d559d25 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/CurrencyParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/CurrencyParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Japanese; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/DimensionParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/DimensionParserConfiguration.cs new file mode 100644 index 0000000000..76b2ccd563 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/DimensionParserConfiguration.cs @@ -0,0 +1,25 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Japanese; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Japanese +{ + public class DimensionParserConfiguration : JapaneseNumberWithUnitParserConfiguration + { + public DimensionParserConfiguration() + : this(new CultureInfo(Culture.Japanese)) + { + } + + public DimensionParserConfiguration(CultureInfo ci) + : base(ci) + { + this.BindDictionary(NumbersWithUnitDefinitions.DimensionPrefixList); + this.BindDictionary(NumbersWithUnitDefinitions.DimensionSuffixList); + this.CheckFirstSuffix = NumbersWithUnitDefinitions.CheckFirstSuffix; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/JapaneseNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/JapaneseNumberWithUnitParserConfiguration.cs index ad5d0a46b3..62dd0c2f1b 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/JapaneseNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/JapaneseNumberWithUnitParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Japanese; @@ -9,9 +13,12 @@ public class JapaneseNumberWithUnitParserConfiguration : BaseNumberWithUnitParse public JapaneseNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { - this.InternalNumberExtractor = new NumberExtractor(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new JapaneseNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + + var numConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None); + + this.InternalNumberExtractor = new NumberExtractor(numConfig); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new JapaneseNumberParserConfiguration(numConfig)); this.ConnectorToken = string.Empty; } @@ -20,5 +27,7 @@ public JapaneseNumberWithUnitParserConfiguration(CultureInfo ci) public override IExtractor InternalNumberExtractor { get; } public override string ConnectorToken { get; } + + public override IDictionary TypeList { get; set; } = null; } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/TemperatureParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/TemperatureParserConfiguration.cs new file mode 100644 index 0000000000..e8b23c4d2e --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/TemperatureParserConfiguration.cs @@ -0,0 +1,25 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Japanese; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Japanese +{ + public class TemperatureParserConfiguration : JapaneseNumberWithUnitParserConfiguration + { + public TemperatureParserConfiguration() + : this(new CultureInfo(Culture.Japanese)) + { + } + + public TemperatureParserConfiguration(CultureInfo ci) + : base(ci) + { + this.BindDictionary(NumbersWithUnitDefinitions.TemperaturePrefixList); + this.BindDictionary(NumbersWithUnitDefinitions.TemperatureSuffixList); + this.CheckFirstSuffix = NumbersWithUnitDefinitions.CheckFirstSuffix; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Extractors/AgeExtractorConfiguration.cs new file mode 100644 index 0000000000..9d84bca269 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Extractors/AgeExtractorConfiguration.cs @@ -0,0 +1,31 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Korean; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Korean +{ + public class AgeExtractorConfiguration : KoreanNumberWithUnitExtractorConfiguration + { + public AgeExtractorConfiguration() + : this(new CultureInfo(Culture.Korean)) + { + } + + public AgeExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => NumbersWithUnitDefinitions.AgeSuffixList.ToImmutableDictionary(); + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => NumbersWithUnitDefinitions.AgeAmbiguousValues.ToImmutableList(); + + public override string ExtractType => Constants.SYS_UNIT_AGE; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Extractors/CurrencyExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Extractors/CurrencyExtractorConfiguration.cs new file mode 100644 index 0000000000..6b51f277e1 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Extractors/CurrencyExtractorConfiguration.cs @@ -0,0 +1,31 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Korean; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Korean +{ + public class CurrencyExtractorConfiguration : KoreanNumberWithUnitExtractorConfiguration + { + public CurrencyExtractorConfiguration() + : this(new CultureInfo(Culture.Korean)) + { + } + + public CurrencyExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => NumbersWithUnitDefinitions.CurrencySuffixList.ToImmutableDictionary(); + + public override ImmutableDictionary PrefixList => NumbersWithUnitDefinitions.CurrencyPrefixList.ToImmutableDictionary(); + + public override ImmutableList AmbiguousUnitList => NumbersWithUnitDefinitions.CurrencyAmbiguousValues.ToImmutableList(); + + public override string ExtractType => Constants.SYS_UNIT_CURRENCY; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Extractors/DimensionExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Extractors/DimensionExtractorConfiguration.cs new file mode 100644 index 0000000000..521afb2536 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Extractors/DimensionExtractorConfiguration.cs @@ -0,0 +1,31 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Korean; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Korean +{ + public class DimensionExtractorConfiguration : KoreanNumberWithUnitExtractorConfiguration + { + public DimensionExtractorConfiguration() + : this(new CultureInfo(Culture.Korean)) + { + } + + public DimensionExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => NumbersWithUnitDefinitions.DimensionSuffixList.ToImmutableDictionary(); + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => NumbersWithUnitDefinitions.DimensionAmbiguousValues.ToImmutableList(); + + public override string ExtractType => Constants.SYS_UNIT_DIMENSION; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Extractors/KoreanNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Extractors/KoreanNumberWithUnitExtractorConfiguration.cs new file mode 100644 index 0000000000..5dcbe3fbe1 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Extractors/KoreanNumberWithUnitExtractorConfiguration.cs @@ -0,0 +1,92 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Globalization; +using System.Linq; +using System.Reflection; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Korean; +using Microsoft.Recognizers.Definitions.Utilities; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Config; +using Microsoft.Recognizers.Text.Number.Korean; +using Microsoft.Recognizers.Text.NumberWithUnit.Utilities; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Korean +{ + public abstract class KoreanNumberWithUnitExtractorConfiguration : INumberWithUnitExtractorConfiguration + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex CompoundUnitConnRegex = + new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexFlags); + + private static readonly Regex NonUnitsRegex = + new Regex(BaseUnits.PmNonUnitRegex, RegexFlags); + + private static readonly Regex HalfUnitRegex = new Regex(NumbersWithUnitDefinitions.HalfUnitRegex, RegexFlags); + + protected KoreanNumberWithUnitExtractorConfiguration(CultureInfo ci) + { + this.CultureInfo = ci; + + var numConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None); + + this.UnitNumExtractor = new NumberExtractor(numConfig, CJKNumberExtractorMode.ExtractAll); + + this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; + this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; + this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; + + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); + } + + public Regex CompoundUnitConnectorRegex => CompoundUnitConnRegex; + + public Regex NonUnitRegex => NonUnitsRegex; + + public virtual Regex AmbiguousUnitNumberMultiplierRegex => null; + + public Regex MultiplierRegex => null; + + public abstract string ExtractType { get; } + + public CultureInfo CultureInfo { get; } + + public IExtractor UnitNumExtractor { get; } + + public string BuildPrefix { get; } + + public string BuildSuffix { get; } + + public string ConnectorToken { get; } + + public IExtractor IntegerExtractor { get; } + + public Dictionary AmbiguityFiltersDict { get; } = null; + + public Dictionary TemperatureAmbiguityFiltersDict { get; } = null; + + public Dictionary DimensionAmbiguityFiltersDict { get; } = null; + + public abstract ImmutableDictionary SuffixList { get; } + + public abstract ImmutableDictionary PrefixList { get; } + + public abstract ImmutableList AmbiguousUnitList { get; } + + protected static TimeSpan RegexTimeOut => NumberWithUnitRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + + public void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers) + { + // Expand Korean phrase to the `half` patterns when it follows closely origin phrase. + CommonUtils.ExpandHalfSuffix(source, ref result, numbers, HalfUnitRegex); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Extractors/TemperatureExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Extractors/TemperatureExtractorConfiguration.cs new file mode 100644 index 0000000000..0b305cb97f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Extractors/TemperatureExtractorConfiguration.cs @@ -0,0 +1,44 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Korean; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Korean +{ + public class TemperatureExtractorConfiguration : KoreanNumberWithUnitExtractorConfiguration + { + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly Regex AmbiguousUnitMultiplierRegex = + new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags, RegexTimeOut); + + public TemperatureExtractorConfiguration() + : this(new CultureInfo(Culture.Korean)) + { + } + + public TemperatureExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => + NumbersWithUnitDefinitions.TemperatureSuffixList.ToImmutableDictionary(); + + public override ImmutableDictionary PrefixList => + NumbersWithUnitDefinitions.TemperaturePrefixList.ToImmutableDictionary(); + + public override ImmutableList AmbiguousUnitList => + NumbersWithUnitDefinitions.TemperatureAmbiguousValues.ToImmutableList(); + + public override string ExtractType => Constants.SYS_UNIT_TEMPERATURE; + + public override Regex AmbiguousUnitNumberMultiplierRegex => AmbiguousUnitMultiplierRegex; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Parsers/AgeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Parsers/AgeParserConfiguration.cs new file mode 100644 index 0000000000..01e161e556 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Parsers/AgeParserConfiguration.cs @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Korean; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Korean +{ + public class AgeParserConfiguration : KoreanNumberWithUnitParserConfiguration + { + public AgeParserConfiguration() + : this(new CultureInfo(Culture.Korean)) + { + } + + public AgeParserConfiguration(CultureInfo ci) + : base(ci) + { + this.BindDictionary(NumbersWithUnitDefinitions.AgeSuffixList); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Parsers/CurrencyParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Parsers/CurrencyParserConfiguration.cs new file mode 100644 index 0000000000..d92f8fa061 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Parsers/CurrencyParserConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; +using Microsoft.Recognizers.Definitions.Korean; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Korean +{ + public class CurrencyParserConfiguration : KoreanNumberWithUnitParserConfiguration + { + public CurrencyParserConfiguration() + : this(new CultureInfo(Culture.Korean)) + { + } + + public CurrencyParserConfiguration(CultureInfo ci) + : base(ci) + { + this.BindDictionary(NumbersWithUnitDefinitions.CurrencyPrefixList); + this.BindDictionary(NumbersWithUnitDefinitions.CurrencySuffixList); + this.CurrencyNameToIsoCodeMap = NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.ToImmutableDictionary(); + this.CurrencyFractionCodeList = NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary(); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Parsers/DimensionParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Parsers/DimensionParserConfiguration.cs new file mode 100644 index 0000000000..a1fd0882c8 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Parsers/DimensionParserConfiguration.cs @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Korean; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Korean +{ + public class DimensionParserConfiguration : KoreanNumberWithUnitParserConfiguration + { + public DimensionParserConfiguration() + : this(new CultureInfo(Culture.Korean)) + { + } + + public DimensionParserConfiguration(CultureInfo ci) + : base(ci) + { + this.BindDictionary(NumbersWithUnitDefinitions.DimensionSuffixList); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Parsers/KoreanNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Parsers/KoreanNumberWithUnitParserConfiguration.cs new file mode 100644 index 0000000000..87fe44132d --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Parsers/KoreanNumberWithUnitParserConfiguration.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Korean; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Korean +{ + public class KoreanNumberWithUnitParserConfiguration : BaseNumberWithUnitParserConfiguration + { + public KoreanNumberWithUnitParserConfiguration(CultureInfo ci) + : base(ci) + { + + var numConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None); + + this.InternalNumberExtractor = new NumberExtractor(numConfig); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new KoreanNumberParserConfiguration(numConfig)); + this.ConnectorToken = string.Empty; + } + + public override IParser InternalNumberParser { get; } + + public override IExtractor InternalNumberExtractor { get; } + + public override string ConnectorToken { get; } + + public override IDictionary TypeList { get; set; } = null; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Parsers/TemperatureParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Parsers/TemperatureParserConfiguration.cs new file mode 100644 index 0000000000..9e3093dd63 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Korean/Parsers/TemperatureParserConfiguration.cs @@ -0,0 +1,25 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Korean; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Korean +{ + public class TemperatureParserConfiguration : KoreanNumberWithUnitParserConfiguration + { + public TemperatureParserConfiguration() + : this(new CultureInfo(Culture.Korean)) + { + } + + public TemperatureParserConfiguration(CultureInfo ci) + : base(ci) + { + this.BindDictionary(NumbersWithUnitDefinitions.TemperaturePrefixList); + this.BindDictionary(NumbersWithUnitDefinitions.TemperatureSuffixList); + this.CheckFirstSuffix = NumbersWithUnitDefinitions.CheckFirstSuffix; + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.csproj b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.csproj index be682a0dbd..9c20a86070 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.csproj +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.csproj @@ -1,13 +1,18 @@  - netstandard2.0;net462;net452;net45 + netstandard2.1;netstandard2.0;net462;net6.0 + 9 false false ../Recognizers-Text.ruleset - + + + true + ..\buildtools\35MSSharedLib1024.snk + true $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + Microsoft + nlp, entity-extraction, parser-library, recognizer, numex, units, netstandard2.0 + Microsoft.Recognizers.Text.NumberWithUnit provides robust recognition and resolution of numbers with units expressed in English, Spanish, French, Portuguese, Chinese, + German, Dutch, Italian, Turkish, and Hindi. + MIT + https://github.com/Microsoft/Recognizers-Text + images\icon.png + © Microsoft Corporation. All rights reserved. + + - + all runtime; build; native; contentfiles; analyzers - + all runtime; build; native; contentfiles; analyzers - diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.nuspec b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.nuspec index 046b7c6db2..bbd3fb24a1 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.nuspec +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.nuspec @@ -6,23 +6,22 @@ $title$ Microsoft true - Microsoft.Recognizers.Text.NumberWithUnit provides robust recognition and resolution of numbers with units expressed in English, Spanish, French, Portuguese, Chinese, German, and Dutch. + Microsoft.Recognizers.Text.NumberWithUnit provides robust recognition and resolution of numbers with units expressed in English, Spanish, French, Portuguese, Chinese, + German, Dutch, Italian, Turkish, and Hindi. MIT https://github.com/Microsoft/Recognizers-Text - http://docs.botframework.com/images/bot_icon.png + images\icon.png © Microsoft Corporation. All rights reserved. nlp entity-extraction parser-library recognizer numex units netstandard2.0 - - - + diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.xml b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.xml index 5f327ef61f..7152866dc7 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.xml +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.xml @@ -4,10 +4,35 @@ Microsoft.Recognizers.Text.NumberWithUnit + + + Merge compound units when extracting, like compound 5 foot 3 inch as one entity. + + Extract results. + Input text. + The compounded units. + Represents None + + + NoProtoCache + + + + + EnablePreview + + + + + Parsing compounded result, like 5 foot 3 inch. + + Extracted compounded result. + Parsed compounded result. + diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/AbstractNumberWithUnitModel.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/AbstractNumberWithUnitModel.cs index 7fd871d9ad..d0bea0b452 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/AbstractNumberWithUnitModel.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/AbstractNumberWithUnitModel.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; @@ -7,7 +10,11 @@ namespace Microsoft.Recognizers.Text.NumberWithUnit { public abstract class AbstractNumberWithUnitModel : IModel - { + { + private string culture; + + private string requestedCulture; + protected AbstractNumberWithUnitModel(Dictionary extractorParserDic) { this.ExtractorParserDic = extractorParserDic; @@ -15,6 +22,10 @@ protected AbstractNumberWithUnitModel(Dictionary extractorP public abstract string ModelTypeName { get; } + public string Culture => this.culture; + + public string RequestedCulture => this.requestedCulture; + protected Dictionary ExtractorParserDic { get; } public List Parse(string query) @@ -58,6 +69,12 @@ public List Parse(string query) { ResolutionKey.Unit, ((CurrencyUnitValue)o.Value).Unit }, { ResolutionKey.IsoCurrency, ((CurrencyUnitValue)o.Value).IsoCurrency }, } + : (o.Value is UnitValue && !string.IsNullOrEmpty(o.Type) && Constants.ValidSubTypes.Contains(o.Type)) ? new SortedDictionary + { + { ResolutionKey.Value, ((UnitValue)o.Value).Number }, + { ResolutionKey.Unit, ((UnitValue)o.Value).Unit }, + { ResolutionKey.SubType, o.Type }, + } : (o.Value is UnitValue) ? new SortedDictionary { { ResolutionKey.Value, ((UnitValue)o.Value).Number }, @@ -77,7 +94,7 @@ public List Parse(string query) foreach (var extractionResult in extractionResults) { - if (extractionResult.Start == result.Start && extractionResult.End == result.End) + if (result.Start <= extractionResult.Start && result.End >= extractionResult.End) { shouldAdd = false; } @@ -98,5 +115,11 @@ public List Parse(string query) return extractionResults; } + + public void SetCultureInfo(string culture, string requestedCulture = null) + { + this.culture = culture; + this.requestedCulture = requestedCulture; + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/AgeModel.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/AgeModel.cs index 94a30d7620..c14c9532b3 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/AgeModel.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/AgeModel.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.NumberWithUnit { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/CurrencyModel.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/CurrencyModel.cs index 6e6e3fc0a4..7d9bc7f63a 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/CurrencyModel.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/CurrencyModel.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.NumberWithUnit { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/DimensionModel.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/DimensionModel.cs index 3c9fbccd37..f4c57f9b8c 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/DimensionModel.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/DimensionModel.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.NumberWithUnit { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/TemperatureModel.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/TemperatureModel.cs index 0cb79d6fc6..57aadfd02e 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/TemperatureModel.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Models/TemperatureModel.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.NumberWithUnit { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/NumberWithUnitOptions.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/NumberWithUnitOptions.cs index cce72c53db..a07ddc491b 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/NumberWithUnitOptions.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/NumberWithUnitOptions.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; namespace Microsoft.Recognizers.Text.NumberWithUnit { @@ -9,5 +12,15 @@ public enum NumberWithUnitOptions /// Represents None /// None = 0, + + /// + /// NoProtoCache + /// + NoProtoCache = 16, + + /// + /// EnablePreview + /// + EnablePreview = 8388608, // 2 ^23 } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/NumberWithUnitRecognizer.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/NumberWithUnitRecognizer.cs index 94a6a7bc74..2c0a9e5310 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/NumberWithUnitRecognizer.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/NumberWithUnitRecognizer.cs @@ -1,27 +1,35 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; namespace Microsoft.Recognizers.Text.NumberWithUnit { public class NumberWithUnitRecognizer : Recognizer { + public NumberWithUnitRecognizer(string targetCulture, NumberWithUnitOptions options, bool lazyInitialization, int timeoutInSeconds) + : base(targetCulture, options, lazyInitialization, timeoutInSeconds) + { + } + public NumberWithUnitRecognizer(string targetCulture, NumberWithUnitOptions options = NumberWithUnitOptions.None, bool lazyInitialization = false) - : base(targetCulture, options, lazyInitialization) + : base(targetCulture, options, lazyInitialization, 0) { } public NumberWithUnitRecognizer(string targetCulture, int options, bool lazyInitialization = false) - : this(targetCulture, GetOptions(options), lazyInitialization) + : this(targetCulture, GetOptions(options), lazyInitialization, 0) { } public NumberWithUnitRecognizer(NumberWithUnitOptions options = NumberWithUnitOptions.None, bool lazyInitialization = true) - : this(null, options, lazyInitialization) + : this(null, options, lazyInitialization, 0) { } public NumberWithUnitRecognizer(int options, bool lazyInitialization = true) - : this(null, GetOptions(options), lazyInitialization) + : this(null, GetOptions(options), lazyInitialization, 0) { } @@ -67,46 +75,6 @@ public AgeModel GetAgeModel(string culture = null, bool fallbackToDefaultCulture protected override void InitializeConfiguration() { - RegisterModel( - Culture.English, - (options) => new CurrencyModel(new Dictionary - { - { - new BaseMergedUnitExtractor(new English.CurrencyExtractorConfiguration()), - new BaseMergedUnitParser(new English.CurrencyParserConfiguration()) - }, - })); - - RegisterModel( - Culture.English, - (options) => new TemperatureModel(new Dictionary - { - { - new NumberWithUnitExtractor(new English.TemperatureExtractorConfiguration()), - new NumberWithUnitParser(new English.TemperatureParserConfiguration()) - }, - })); - - RegisterModel( - Culture.English, - (options) => new DimensionModel(new Dictionary - { - { - new NumberWithUnitExtractor(new English.DimensionExtractorConfiguration()), - new NumberWithUnitParser(new English.DimensionParserConfiguration()) - }, - })); - - RegisterModel( - Culture.English, - (options) => new AgeModel(new Dictionary - { - { - new NumberWithUnitExtractor(new English.AgeExtractorConfiguration()), - new NumberWithUnitParser(new English.AgeParserConfiguration()) - }, - })); - RegisterModel( Culture.Chinese, (options) => new CurrencyModel(new Dictionary @@ -164,82 +132,82 @@ protected override void InitializeConfiguration() })); RegisterModel( - Culture.Spanish, + Culture.Dutch, (options) => new CurrencyModel(new Dictionary { { - new NumberWithUnitExtractor(new Spanish.CurrencyExtractorConfiguration()), - new NumberWithUnitParser(new Spanish.CurrencyParserConfiguration()) + new BaseMergedUnitExtractor(new Dutch.CurrencyExtractorConfiguration()), + new BaseMergedUnitParser(new Dutch.CurrencyParserConfiguration()) }, })); RegisterModel( - Culture.Spanish, + Culture.Dutch, (options) => new TemperatureModel(new Dictionary { { - new NumberWithUnitExtractor(new Spanish.TemperatureExtractorConfiguration()), - new NumberWithUnitParser(new Spanish.TemperatureParserConfiguration()) + new NumberWithUnitExtractor(new Dutch.TemperatureExtractorConfiguration()), + new NumberWithUnitParser(new Dutch.TemperatureParserConfiguration()) }, })); RegisterModel( - Culture.Spanish, + Culture.Dutch, (options) => new DimensionModel(new Dictionary { { - new NumberWithUnitExtractor(new Spanish.DimensionExtractorConfiguration()), - new NumberWithUnitParser(new Spanish.DimensionParserConfiguration()) + new NumberWithUnitExtractor(new Dutch.DimensionExtractorConfiguration()), + new NumberWithUnitParser(new Dutch.DimensionParserConfiguration()) }, })); RegisterModel( - Culture.Spanish, + Culture.Dutch, (options) => new AgeModel(new Dictionary { { - new NumberWithUnitExtractor(new Spanish.AgeExtractorConfiguration()), - new NumberWithUnitParser(new Spanish.AgeParserConfiguration()) + new NumberWithUnitExtractor(new Dutch.AgeExtractorConfiguration()), + new NumberWithUnitParser(new Dutch.AgeParserConfiguration()) }, })); RegisterModel( - Culture.Portuguese, + Culture.English, (options) => new CurrencyModel(new Dictionary { { - new NumberWithUnitExtractor(new Portuguese.CurrencyExtractorConfiguration()), - new NumberWithUnitParser(new Portuguese.CurrencyParserConfiguration()) + new BaseMergedUnitExtractor(new English.CurrencyExtractorConfiguration()), + new BaseMergedUnitParser(new English.CurrencyParserConfiguration()) }, })); RegisterModel( - Culture.Portuguese, + Culture.English, (options) => new TemperatureModel(new Dictionary { { - new NumberWithUnitExtractor(new Portuguese.TemperatureExtractorConfiguration()), - new NumberWithUnitParser(new Portuguese.TemperatureParserConfiguration()) + new NumberWithUnitExtractor(new English.TemperatureExtractorConfiguration()), + new NumberWithUnitParser(new English.TemperatureParserConfiguration()) }, })); RegisterModel( - Culture.Portuguese, + Culture.English, (options) => new DimensionModel(new Dictionary { { - new NumberWithUnitExtractor(new Portuguese.DimensionExtractorConfiguration()), - new NumberWithUnitParser(new Portuguese.DimensionParserConfiguration()) + new NumberWithUnitExtractor(new English.DimensionExtractorConfiguration()), + new NumberWithUnitParser(new English.DimensionParserConfiguration()) }, })); RegisterModel( - Culture.Portuguese, + Culture.English, (options) => new AgeModel(new Dictionary { { - new NumberWithUnitExtractor(new Portuguese.AgeExtractorConfiguration()), - new NumberWithUnitParser(new Portuguese.AgeParserConfiguration()) + new NumberWithUnitExtractor(new English.AgeExtractorConfiguration()), + new NumberWithUnitParser(new English.AgeParserConfiguration()) }, })); @@ -248,8 +216,8 @@ protected override void InitializeConfiguration() (options) => new CurrencyModel(new Dictionary { { - new NumberWithUnitExtractor(new French.CurrencyExtractorConfiguration()), - new NumberWithUnitParser(new French.CurrencyParserConfiguration()) + new BaseMergedUnitExtractor(new French.CurrencyExtractorConfiguration()), + new BaseMergedUnitParser(new French.CurrencyParserConfiguration()) }, })); @@ -288,8 +256,8 @@ protected override void InitializeConfiguration() (options) => new CurrencyModel(new Dictionary { { - new NumberWithUnitExtractor(new German.CurrencyExtractorConfiguration()), - new NumberWithUnitParser(new German.CurrencyParserConfiguration()) + new BaseMergedUnitExtractor(new German.CurrencyExtractorConfiguration()), + new BaseMergedUnitParser(new German.CurrencyParserConfiguration()) }, })); @@ -323,13 +291,53 @@ protected override void InitializeConfiguration() }, })); + RegisterModel( + Culture.Hindi, + (options) => new AgeModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Hindi.AgeExtractorConfiguration()), + new NumberWithUnitParser(new Hindi.AgeParserConfiguration()) + }, + })); + + RegisterModel( + Culture.Hindi, + (options) => new TemperatureModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Hindi.TemperatureExtractorConfiguration()), + new NumberWithUnitParser(new Hindi.TemperatureParserConfiguration()) + }, + })); + + RegisterModel( + Culture.Hindi, + (options) => new DimensionModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Hindi.DimensionExtractorConfiguration()), + new NumberWithUnitParser(new Hindi.DimensionParserConfiguration()) + }, + })); + + RegisterModel( + Culture.Hindi, + (options) => new CurrencyModel(new Dictionary + { + { + new BaseMergedUnitExtractor(new Hindi.CurrencyExtractorConfiguration()), + new BaseMergedUnitParser(new Hindi.CurrencyParserConfiguration()) + }, + })); + RegisterModel( Culture.Italian, (options) => new CurrencyModel(new Dictionary { { - new NumberWithUnitExtractor(new Italian.CurrencyExtractorConfiguration()), - new NumberWithUnitParser(new Italian.CurrencyParserConfiguration()) + new BaseMergedUnitExtractor(new Italian.CurrencyExtractorConfiguration()), + new BaseMergedUnitParser(new Italian.CurrencyParserConfiguration()) }, })); @@ -351,7 +359,7 @@ protected override void InitializeConfiguration() new NumberWithUnitExtractor(new Italian.DimensionExtractorConfiguration()), new NumberWithUnitParser(new Italian.DimensionParserConfiguration()) }, - })); + })); RegisterModel( Culture.Italian, @@ -391,43 +399,247 @@ protected override void InitializeConfiguration() }, })); + RegisterModel( + Culture.Japanese, + (options) => new DimensionModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Japanese.DimensionExtractorConfiguration()), + new NumberWithUnitParser(new Japanese.DimensionParserConfiguration()) + }, /* + { + new NumberWithUnitExtractor(new English.DimensionExtractorConfiguration()), + new NumberWithUnitParser(new English.DimensionParserConfiguration()) + }, */ + })); + + RegisterModel( + Culture.Japanese, + (options) => new TemperatureModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Japanese.TemperatureExtractorConfiguration()), + new NumberWithUnitParser(new Japanese.TemperatureParserConfiguration()) + }, + { + new NumberWithUnitExtractor(new English.TemperatureExtractorConfiguration()), + new NumberWithUnitParser(new English.TemperatureParserConfiguration()) + }, + })); + RegisterModel( - Culture.Dutch, + Culture.Korean, (options) => new CurrencyModel(new Dictionary { { - new BaseMergedUnitExtractor(new Dutch.CurrencyExtractorConfiguration()), - new BaseMergedUnitParser(new Dutch.CurrencyParserConfiguration()) + new BaseMergedUnitExtractor(new Korean.CurrencyExtractorConfiguration()), + new BaseMergedUnitParser(new Korean.CurrencyParserConfiguration()) + }, + { + new NumberWithUnitExtractor(new English.CurrencyExtractorConfiguration()), + new NumberWithUnitParser(new English.CurrencyParserConfiguration()) }, })); RegisterModel( - Culture.Dutch, + Culture.Korean, (options) => new TemperatureModel(new Dictionary { { - new NumberWithUnitExtractor(new Dutch.TemperatureExtractorConfiguration()), - new NumberWithUnitParser(new Dutch.TemperatureParserConfiguration()) + new NumberWithUnitExtractor(new Korean.TemperatureExtractorConfiguration()), + new NumberWithUnitParser(new Korean.TemperatureParserConfiguration()) + }, + { + new NumberWithUnitExtractor(new English.TemperatureExtractorConfiguration()), + new NumberWithUnitParser(new English.TemperatureParserConfiguration()) }, })); RegisterModel( - Culture.Dutch, + Culture.Korean, (options) => new DimensionModel(new Dictionary { { - new NumberWithUnitExtractor(new Dutch.DimensionExtractorConfiguration()), - new NumberWithUnitParser(new Dutch.DimensionParserConfiguration()) + new NumberWithUnitExtractor(new Korean.DimensionExtractorConfiguration()), + new NumberWithUnitParser(new Korean.DimensionParserConfiguration()) + }, + { + new NumberWithUnitExtractor(new English.DimensionExtractorConfiguration()), + new NumberWithUnitParser(new English.DimensionParserConfiguration()) }, })); RegisterModel( - Culture.Dutch, + Culture.Korean, (options) => new AgeModel(new Dictionary { { - new NumberWithUnitExtractor(new Dutch.AgeExtractorConfiguration()), - new NumberWithUnitParser(new Dutch.AgeParserConfiguration()) + new NumberWithUnitExtractor(new Korean.AgeExtractorConfiguration()), + new NumberWithUnitParser(new Korean.AgeParserConfiguration()) + }, + { + new NumberWithUnitExtractor(new English.AgeExtractorConfiguration()), + new NumberWithUnitParser(new English.AgeParserConfiguration()) + }, + })); + + RegisterModel( + Culture.Portuguese, + (options) => new CurrencyModel(new Dictionary + { + { + new BaseMergedUnitExtractor(new Portuguese.CurrencyExtractorConfiguration()), + new BaseMergedUnitParser(new Portuguese.CurrencyParserConfiguration()) + }, + })); + + RegisterModel( + Culture.Portuguese, + (options) => new TemperatureModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Portuguese.TemperatureExtractorConfiguration()), + new NumberWithUnitParser(new Portuguese.TemperatureParserConfiguration()) + }, + })); + + RegisterModel( + Culture.Portuguese, + (options) => new DimensionModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Portuguese.DimensionExtractorConfiguration()), + new NumberWithUnitParser(new Portuguese.DimensionParserConfiguration()) + }, + })); + + RegisterModel( + Culture.Portuguese, + (options) => new AgeModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Portuguese.AgeExtractorConfiguration()), + new NumberWithUnitParser(new Portuguese.AgeParserConfiguration()) + }, + })); + + RegisterModel( + Culture.Spanish, + (options) => new CurrencyModel(new Dictionary + { + { + new BaseMergedUnitExtractor(new Spanish.CurrencyExtractorConfiguration()), + new BaseMergedUnitParser(new Spanish.CurrencyParserConfiguration()) + }, + })); + + RegisterModel( + Culture.Spanish, + (options) => new TemperatureModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Spanish.TemperatureExtractorConfiguration()), + new NumberWithUnitParser(new Spanish.TemperatureParserConfiguration()) + }, + })); + + RegisterModel( + Culture.Spanish, + (options) => new DimensionModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Spanish.DimensionExtractorConfiguration()), + new NumberWithUnitParser(new Spanish.DimensionParserConfiguration()) + }, + })); + + RegisterModel( + Culture.Spanish, + (options) => new AgeModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Spanish.AgeExtractorConfiguration()), + new NumberWithUnitParser(new Spanish.AgeParserConfiguration()) + }, + })); + + RegisterModel( + Culture.SpanishMexican, + (options) => new CurrencyModel(new Dictionary + { + { + new BaseMergedUnitExtractor(new Spanish.CurrencyExtractorConfiguration()), + new BaseMergedUnitParser(new Spanish.CurrencyParserConfiguration()) + }, + })); + + RegisterModel( + Culture.SpanishMexican, + (options) => new TemperatureModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Spanish.TemperatureExtractorConfiguration()), + new NumberWithUnitParser(new Spanish.TemperatureParserConfiguration()) + }, + })); + + RegisterModel( + Culture.SpanishMexican, + (options) => new DimensionModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Spanish.DimensionExtractorConfiguration()), + new NumberWithUnitParser(new Spanish.DimensionParserConfiguration()) + }, + })); + + RegisterModel( + Culture.SpanishMexican, + (options) => new AgeModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Spanish.AgeExtractorConfiguration()), + new NumberWithUnitParser(new Spanish.AgeParserConfiguration()) + }, + })); + + RegisterModel( + Culture.Swedish, + (options) => new AgeModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Swedish.AgeExtractorConfiguration()), + new NumberWithUnitParser(new Swedish.AgeParserConfiguration()) + }, + })); + + RegisterModel( + Culture.Swedish, + (options) => new TemperatureModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Swedish.TemperatureExtractorConfiguration()), + new NumberWithUnitParser(new Swedish.TemperatureParserConfiguration()) + }, + })); + + RegisterModel( + Culture.Swedish, + (options) => new DimensionModel(new Dictionary + { + { + new NumberWithUnitExtractor(new Swedish.DimensionExtractorConfiguration()), + new NumberWithUnitParser(new Swedish.DimensionParserConfiguration()) + }, + })); + + RegisterModel( + Culture.Swedish, + (options) => new CurrencyModel(new Dictionary + { + { + new BaseMergedUnitExtractor(new Swedish.CurrencyExtractorConfiguration()), + new BaseMergedUnitParser(new Swedish.CurrencyParserConfiguration()) }, })); @@ -472,6 +684,14 @@ protected override void InitializeConfiguration() })); } + protected override List GetRelatedTypes() + { + return new List() + { + typeof(NumberWithUnitExtractor), + }; + } + private static List RecognizeByModel(Func getModelFunc, string query, NumberWithUnitOptions options) { var recognizer = new NumberWithUnitRecognizer(options); diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/BaseCurrencyParser.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/BaseCurrencyParser.cs index 661fcfdbe9..60228ba1c1 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/BaseCurrencyParser.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/BaseCurrencyParser.cs @@ -1,12 +1,17 @@ -using System; -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +using System; +using System.Collections.Generic; +using System.Globalization; using Microsoft.Recognizers.Text.NumberWithUnit.Utilities; namespace Microsoft.Recognizers.Text.NumberWithUnit { public class BaseCurrencyParser : IParser { + private const int DefaultFractionalSubunit = 100; + private readonly NumberWithUnitParser numberWithUnitParser; public BaseCurrencyParser(BaseNumberWithUnitParserConfiguration config) @@ -23,13 +28,33 @@ public ParseResult Parse(ExtractResult extResult) if (extResult.Data is List) { - pr = MergeCompoundUnit(extResult); + pr = MergeMultiplier(extResult); + if (pr == null) + { + pr = MergeCompoundUnit(extResult); + } } else { pr = numberWithUnitParser.Parse(extResult); var value = pr.Value as UnitValue; + // Parse unit like "MUSD" that resolves to "1 million USD" + if (extResult.Data is ExtractResult) + { + var data = (ExtractResult)extResult.Data; + var unitStr = extResult.Text.Replace(data.Text, string.Empty).Trim(); + if (this.Config.MultiplierIsoCodeList.Contains(unitStr) && float.TryParse(value?.Number, out var number)) + { + value.Number = (number * 1000000).ToString("G15", CultureInfo.InvariantCulture); + pr.Value = new UnitValue + { + Unit = value?.Unit, + Number = value?.Number, + }; + } + } + Config.CurrencyNameToIsoCodeMap.TryGetValue(value?.Unit, out var mainUnitIsoCode); if (string.IsNullOrEmpty(mainUnitIsoCode) || mainUnitIsoCode.StartsWith(Constants.FAKE_ISO_CODE_PREFIX, StringComparison.Ordinal)) { @@ -148,7 +173,7 @@ private ParseResult MergeCompoundUnit(ExtractResult compoundResult) if (parseResultValue?.Number != null) { - numberValue = double.Parse(parseResultValue.Number); + numberValue = double.Parse(parseResultValue.Number, CultureInfo.InvariantCulture); } result.ResolutionStr = parseResult.ResolutionStr; @@ -175,9 +200,16 @@ private ParseResult MergeCompoundUnit(ExtractResult compoundResult) // Match pure number as fraction unit. if (extractResult.Type.Equals(Constants.SYS_NUM, StringComparison.Ordinal)) { - numberValue += (double)parseResult.Value * (1.0 / 100); - result.ResolutionStr += ' ' + parseResult.ResolutionStr; - result.Length = parseResult.Start + parseResult.Length - result.Start; + Config.NonStandardFractionalSubunits.TryGetValue(mainUnitIsoCode, out var fractionMaxValue); + + fractionMaxValue = fractionMaxValue == 0 ? DefaultFractionalSubunit : fractionMaxValue; + if ((double)parseResult.Value < fractionMaxValue) + { + numberValue += (double)parseResult.Value * (1.0 / fractionMaxValue); + result.ResolutionStr += ' ' + parseResult.ResolutionStr; + result.Length = parseResult.Start + parseResult.Length - result.Start; + } + count++; continue; } @@ -188,7 +220,7 @@ private ParseResult MergeCompoundUnit(ExtractResult compoundResult) if (!string.IsNullOrEmpty(fractionUnitCode) && fractionNumValue != 0 && CheckUnitsStringContains(fractionUnitCode, fractionUnitsString)) { - numberValue += double.Parse(parseResultValue?.Number) * + numberValue += double.Parse(parseResultValue?.Number, CultureInfo.InvariantCulture) * (1.0 / fractionNumValue); result.ResolutionStr += ' ' + parseResult.ResolutionStr; result.Length = parseResult.Start + parseResult.Length - result.Start; @@ -226,5 +258,47 @@ private ParseResult MergeCompoundUnit(ExtractResult compoundResult) Value = results, }; } + + // Parse patterns where a multiplier follows the unit e.g. "10 USD million" + private ParseResult MergeMultiplier(ExtractResult er) + { + var parseResult = new ParseResult { }; + if (er.Metadata != null && er.Metadata.HasMod) + { + var origResult = ((List)er.Data)[0]; + var origData = origResult.Data as ExtractResult; + + var multiplier = er.Text.Replace(origResult.Text, string.Empty); + var number = origData.Text; + var unit = origResult.Text.Replace(number, string.Empty); + var orderedStr = number + multiplier + unit; + + origData.Length = (number + multiplier).Length; + origData.Text = number + multiplier; + + ExtractResult result = er.Clone(); + result.Text = orderedStr; + result.Data = origData; + + parseResult = numberWithUnitParser.Parse(result); + if (parseResult.Value != null) + { + parseResult.Text = er.Text; + var value = parseResult.Value as UnitValue; + Config.CurrencyNameToIsoCodeMap.TryGetValue(value?.Unit, out var mainUnitIsoCode); + if (!string.IsNullOrEmpty(mainUnitIsoCode) && !mainUnitIsoCode.StartsWith(Constants.FAKE_ISO_CODE_PREFIX, StringComparison.Ordinal)) + { + parseResult.Value = new CurrencyUnitValue + { + Unit = value?.Unit, + Number = value?.Number, + IsoCurrency = mainUnitIsoCode, + }; + } + } + } + + return parseResult.Value != null ? parseResult : null; + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/BaseMergedUnitParser.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/BaseMergedUnitParser.cs index 1db4c6b439..a4dced82f4 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/BaseMergedUnitParser.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/BaseMergedUnitParser.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; namespace Microsoft.Recognizers.Text.NumberWithUnit { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/CurrencyUnitValue.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/CurrencyUnitValue.cs index 1deddc1baf..110b8e2e3d 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/CurrencyUnitValue.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/CurrencyUnitValue.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.NumberWithUnit +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.NumberWithUnit { public class CurrencyUnitValue : UnitValue { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/INumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/INumberWithUnitParserConfiguration.cs index 3bd3d59817..c12f3d65c2 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/INumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/INumberWithUnitParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; @@ -21,6 +24,10 @@ public interface INumberWithUnitParserConfiguration string ConnectorToken { get; } + bool CheckFirstSuffix { get; set; } + + IDictionary TypeList { get; } + void BindDictionary(IDictionary dictionary); } @@ -32,8 +39,12 @@ protected BaseNumberWithUnitParserConfiguration(CultureInfo ci) this.UnitMap = new Dictionary(); this.CurrencyFractionNumMap = BaseCurrency.CurrencyFractionalRatios.ToImmutableDictionary(); this.CurrencyFractionMapping = BaseCurrency.CurrencyFractionMapping.ToImmutableDictionary(); + this.NonStandardFractionalSubunits = BaseCurrency.NonStandardFractionalSubunits.ToImmutableDictionary(); this.CurrencyNameToIsoCodeMap = new Dictionary(); + this.MultiplierIsoCodeList = new List(); this.CurrencyFractionCodeList = new Dictionary(); + this.TypeList = new Dictionary(); + this.CheckFirstSuffix = false; } public IDictionary UnitMap { get; } @@ -42,6 +53,8 @@ protected BaseNumberWithUnitParserConfiguration(CultureInfo ci) public IDictionary CurrencyFractionMapping { get; } + public IDictionary NonStandardFractionalSubunits { get; } + public CultureInfo CultureInfo { get; } public abstract IParser InternalNumberParser { get; } @@ -50,10 +63,16 @@ protected BaseNumberWithUnitParserConfiguration(CultureInfo ci) public abstract string ConnectorToken { get; } + public bool CheckFirstSuffix { get; set; } + public IDictionary CurrencyNameToIsoCodeMap { get; set; } + public List MultiplierIsoCodeList { get; set; } + public IDictionary CurrencyFractionCodeList { get; set; } + public abstract IDictionary TypeList { get; set; } + public void BindDictionary(IDictionary dictionary) { DictionaryUtils.BindDictionary(dictionary, UnitMap); diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/NumberWithUnitParser.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/NumberWithUnitParser.cs index 36f3de8a09..0f766b5fa2 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/NumberWithUnitParser.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/NumberWithUnitParser.cs @@ -1,7 +1,12 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Globalization; using System.Linq; using System.Text; +using Microsoft.Recognizers.Text.NumberWithUnit.Utilities; namespace Microsoft.Recognizers.Text.NumberWithUnit { @@ -35,21 +40,35 @@ public static void AddIfNotContained(List unitKeys, string unit) public ParseResult Parse(ExtractResult extResult) { var ret = new ParseResult(extResult); + ExtractResult numberResult; + ExtractResult halfResult; + + if (extResult.Data is List && this.Config as English.DimensionParserConfiguration != null) + { + return MergeCompoundUnit(extResult); + } if (extResult.Data is ExtractResult unitResult) { numberResult = unitResult; + halfResult = null; } else if (extResult.Type.Equals(Constants.SYS_NUM, StringComparison.Ordinal)) { ret.Value = Config.InternalNumberParser.Parse(extResult).Value; return ret; } + else if (extResult.Data is System.Collections.IList && ((List)extResult.Data).Count == 2) + { + numberResult = ((List)extResult.Data)[0]; + halfResult = ((List)extResult.Data)[1]; + } else { // If there is no unitResult, means there is just unit - numberResult = new ExtractResult { Start = -1, Length = 0 }; + numberResult = new ExtractResult { Start = -1, Length = 0, Text = string.Empty }; + halfResult = null; } // Key contains units @@ -86,30 +105,75 @@ public ParseResult Parse(ExtractResult extResult) } } - // Unit type depends on last unit in suffix - var lastUnit = unitKeys.Last(); + // By default, unit type depends on last unit in suffix, + // but in certain cultures (e.g. Japanese) it depends on first unit in suffix + var lastUnit = Config.CheckFirstSuffix ? unitKeys.First() : unitKeys.Last(); + + if (halfResult != null) + { + lastUnit = lastUnit.Substring(0, lastUnit.Length - halfResult.Text.Length).Trim(); + } + var normalizedLastUnit = lastUnit.ToLowerInvariant(); - if (!string.IsNullOrEmpty(Config.ConnectorToken) && normalizedLastUnit.StartsWith(Config.ConnectorToken)) + if (!string.IsNullOrEmpty(Config.ConnectorToken) && normalizedLastUnit.StartsWith(Config.ConnectorToken, StringComparison.Ordinal)) { normalizedLastUnit = normalizedLastUnit.Substring(Config.ConnectorToken.Length).Trim(); lastUnit = lastUnit.Substring(Config.ConnectorToken.Length).Trim(); } + // Delete brackets + normalizedLastUnit = DeleteBracketsIfExisted(normalizedLastUnit); + lastUnit = DeleteBracketsIfExisted(lastUnit); + if (!string.IsNullOrWhiteSpace(key) && Config.UnitMap != null) { if (Config.UnitMap.TryGetValue(lastUnit, out var unitValue) || Config.UnitMap.TryGetValue(normalizedLastUnit, out unitValue)) { + var numValue = string.IsNullOrEmpty(numberResult.Text) ? - null : - this.Config.InternalNumberParser.Parse(numberResult); + null : + this.Config.InternalNumberParser.Parse(numberResult); + + var resolutionStr = numValue?.ResolutionStr; + + if (halfResult != null) + { + var halfValue = this.Config.InternalNumberParser.Parse(halfResult); + resolutionStr += halfValue?.ResolutionStr.Substring(1); + } + + // In certain cultures the unit can be split around the number, + // e.g. in Japanese "秒速100メートル" ('speed per second 100 meters' = 100m/s). + // Here prefix and suffix are combined in order to parse the unit correctly. + if (unitValue == Constants.SPLIT_UNIT && unitKeys.Count > 1 && this.Config.CheckFirstSuffix) + { + if (Config.UnitMap.TryGetValue(lastUnit + unitKeys[1], out var allUnitValue) || + Config.UnitMap.TryGetValue(unitKeys[1], out allUnitValue) || + Config.UnitMap.TryGetValue(unitKeys[1].ToLowerInvariant(), out allUnitValue)) + { + unitValue = allUnitValue; + } + } + + if (unitValue == Constants.SPLIT_UNIT) + { + return ret; + } ret.Value = new UnitValue { - Number = numValue?.ResolutionStr, + Number = resolutionStr, Unit = unitValue, }; + ret.ResolutionStr = $"{numValue?.ResolutionStr} {unitValue}".Trim(); + + if (extResult.Type.Equals(Constants.SYS_UNIT_DIMENSION, StringComparison.Ordinal) && + this.Config.TypeList.TryGetValue(unitValue, out var unitType)) + { + ret.Type = unitType; + } } } @@ -117,5 +181,194 @@ public ParseResult Parse(ExtractResult extResult) return ret; } + + private static string DeleteBracketsIfExisted(string unit) + { + bool hasBrackets = false; + + if (unit.StartsWith("(", StringComparison.Ordinal) && unit.EndsWith(")", StringComparison.Ordinal)) + { + hasBrackets = true; + } + else if (unit.StartsWith("[", StringComparison.Ordinal) && unit.EndsWith("]", StringComparison.Ordinal)) + { + hasBrackets = true; + } + else if (unit.StartsWith("{", StringComparison.Ordinal) && unit.EndsWith("}", StringComparison.Ordinal)) + { + hasBrackets = true; + } + else if (unit.StartsWith("<", StringComparison.Ordinal) && unit.EndsWith(">", StringComparison.Ordinal)) + { + hasBrackets = true; + } + + if (hasBrackets) + { + unit = unit.Substring(1, unit.Length - 2); + } + + return unit; + } + + private static void ResolveText(List prs, string source, int bias) + { + foreach (var parseResult in prs) + { + if (parseResult.Start != null && parseResult.Length != null) + { + parseResult.Text = source.Substring((int)parseResult.Start - bias, (int)parseResult.Length); + } + } + } + + private static bool CheckUnitsStringContains(string fractionUnitCode, string fractionUnitsString) + { + var unitsMap = new Dictionary(); + DictionaryUtils.BindUnitsString(unitsMap, string.Empty, fractionUnitsString); + return unitsMap.ContainsKey(fractionUnitCode); + } + + /// + /// Parsing compounded result, like 5 foot 3 inch. + /// + /// Extracted compounded result. + /// Parsed compounded result. + private ParseResult MergeCompoundUnit(ExtractResult compoundResult) + { + var results = new List(); + var compoundUnit = (List)compoundResult.Data; + + var count = 0; + ParseResult result = null; + double? numberValue = null; + var mainUnitValue = string.Empty; + string mainUnitIsoCode = string.Empty; + string fractionUnitsString = string.Empty; + + for (var idx = 0; idx < compoundUnit.Count; idx++) + { + var extractResult = compoundUnit[idx]; + var parseResult = this.Parse(extractResult); + var parseResultValue = parseResult.Value as UnitValue; + var unitValue = parseResultValue?.Unit; + + // Process a new group + if (count == 0) + { + if (!extractResult.Type.Equals(Constants.SYS_UNIT_DIMENSION, StringComparison.Ordinal)) + { + continue; + } + + // Initialize a new result + result = new ParseResult + { + Start = extractResult.Start, + Length = extractResult.Length, + Text = extractResult.Text, + Type = extractResult.Type, + }; + + mainUnitValue = unitValue; + + if (parseResultValue?.Number != null) + { + numberValue = double.Parse(parseResultValue.Number, CultureInfo.InvariantCulture); + } + + result.ResolutionStr = parseResult.ResolutionStr; + English.DimensionParserConfiguration.LengthUnitToSubUnitMap.TryGetValue(mainUnitValue, out fractionUnitsString); + } + else + { + long fractionNumValue = 0; + string fractionUnit = parseResultValue is null ? null : parseResultValue.Unit; + English.DimensionParserConfiguration.LengthSubUnitFractionalRatios.TryGetValue(parseResultValue?.Unit, out fractionNumValue); + + if (!string.IsNullOrEmpty(fractionUnit) && fractionNumValue != 0 && + CheckUnitsStringContains(fractionUnit, fractionUnitsString)) + { + numberValue += double.Parse(parseResultValue?.Number, CultureInfo.InvariantCulture) * + (1.0 / fractionNumValue); + result.ResolutionStr += ' ' + parseResult.ResolutionStr; + result.Length = parseResult.Start + parseResult.Length - result.Start; + } + else + { + // If the fraction unit doesn't match the main unit, finish process this group. + if (result != null) + { + result = CreateResult(result, mainUnitIsoCode, numberValue, mainUnitValue); + results.Add(result); + result = null; + } + + count = 0; + idx -= 1; + numberValue = null; + continue; + } + } + + count++; + } + + if (result != null) + { + result = CreateResult(result, mainUnitIsoCode, numberValue, mainUnitValue); + results.Add(result); + } + + ResolveText(results, compoundResult.Text, (int)compoundResult.Start); + + return new ParseResult + { + Value = results, + }; + } + + private ParseResult CreateResult(ParseResult result, string mainUnitIsoCode, object numberValue, string mainUnitValue) + { + if (string.IsNullOrEmpty(mainUnitIsoCode) || + mainUnitIsoCode.StartsWith(Constants.FAKE_ISO_CODE_PREFIX, StringComparison.Ordinal)) + { + result.Value = new UnitValue + { + Number = GetResolutionStr(numberValue), + Unit = mainUnitValue, + }; + if (result.Type.Equals(Constants.SYS_UNIT_DIMENSION, StringComparison.Ordinal) && + this.Config.TypeList.TryGetValue(mainUnitValue, out var unitType)) + { + result.Type = unitType; + } + } + else + { + result.Value = new CurrencyUnitValue + { + Number = GetResolutionStr(numberValue), + Unit = mainUnitValue, + IsoCurrency = mainUnitIsoCode, + }; + } + + return result; + } + + private string GetResolutionStr(object value) + { + // Nothing to resolve. This happens when the entity is a currency name only (no numerical value). + if (value == null) + { + return null; + } + + return Config.CultureInfo != null ? + ((double)value).ToString(Config.CultureInfo) : + value.ToString(); + } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/UnitValue.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/UnitValue.cs index 97f5e7db31..78d4def546 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/UnitValue.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/UnitValue.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.NumberWithUnit +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.NumberWithUnit { public class UnitValue { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/AgeExtractorConfiguration.cs index ba22e831e0..6f4a914066 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/AgeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/AgeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Portuguese; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/AngleExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/AngleExtractorConfiguration.cs new file mode 100644 index 0000000000..b75a925bd1 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/AngleExtractorConfiguration.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Portuguese; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Portuguese +{ + public class AngleExtractorConfiguration : PortugueseNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary AngleSuffixList = + NumbersWithUnitDefinitions.AngleSuffixList.ToImmutableDictionary(); + + public static readonly ImmutableList AmbiguousUnits = + NumbersWithUnitDefinitions.AmbiguousAngleUnitList.ToImmutableList(); + + public AngleExtractorConfiguration() + : this(new CultureInfo(Culture.Portuguese)) + { + } + + public AngleExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => AngleSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; + + public override string ExtractType => Constants.SYS_UNIT_ANGLE; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/AreaExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/AreaExtractorConfiguration.cs index cdf9218cd8..d85f91e648 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/AreaExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/AreaExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Globalization; @@ -10,7 +13,7 @@ public class AreaExtractorConfiguration : PortugueseNumberWithUnitExtractorConfi { public static readonly ImmutableDictionary AreaSuffixList = NumbersWithUnitDefinitions.AreaSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = new List { }.ToImmutableList(); + public static readonly ImmutableList AmbiguousValues = new List { }.ToImmutableList(); public AreaExtractorConfiguration() : this(new CultureInfo(Culture.Portuguese)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/CurrencyExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/CurrencyExtractorConfiguration.cs index 4847fc3450..9b7e0049e4 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/CurrencyExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/CurrencyExtractorConfiguration.cs @@ -1,5 +1,11 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; using System.Globalization; +using System.Linq; using Microsoft.Recognizers.Definitions.Portuguese; @@ -9,7 +15,25 @@ public class CurrencyExtractorConfiguration : PortugueseNumberWithUnitExtractorC { public static readonly ImmutableDictionary CurrencySuffixList = NumbersWithUnitDefinitions.CurrencySuffixList.ToImmutableDictionary(); - public static readonly ImmutableDictionary CurrencyPrefixList = NumbersWithUnitDefinitions.CurrencyPrefixList.ToImmutableDictionary(); + // CurrencyNameToIsoCodeMap dictionary (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary IsoCodeDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture)); + + // CurrencyNameToIsoCodeMap followed by '$' symbol (e.g. 'AUD$') + public static readonly Dictionary IsoCodeWithSymbolDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture) + "$"); + + // Merge IsoCodeDict and IsoCodeWithSymbolDict + public static readonly Dictionary IsoCodeCombinedDict = IsoCodeDict.Concat(IsoCodeWithSymbolDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + // Merge IsoCodeCombinedDict with CurrencyPrefixList (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary CurrencyPrefixDict = NumbersWithUnitDefinitions.CurrencyPrefixList.Concat(IsoCodeCombinedDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + public static readonly ImmutableDictionary CurrencyPrefixList = CurrencyPrefixDict.ToImmutableDictionary(); private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousCurrencyUnitList.ToImmutableList(); diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/DimensionExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/DimensionExtractorConfiguration.cs index c78838f388..a579f88d9d 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/DimensionExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/DimensionExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Linq; @@ -14,9 +17,28 @@ public class DimensionExtractorConfiguration : PortugueseNumberWithUnitExtractor .Concat(SpeedExtractorConfiguration.SpeedSuffixList) .Concat(VolumeExtractorConfiguration.VolumeSuffixList) .Concat(WeightExtractorConfiguration.WeightSuffixList) + .Concat(AngleExtractorConfiguration.AngleSuffixList) + .ToImmutableDictionary(x => x.Key, x => x.Value); + + public static readonly ImmutableDictionary DimensionTypeList = + NumbersWithUnitDefinitions.InformationSuffixList.ToDictionary(x => x.Key, x => Constants.INFORMATION) + .Concat(AreaExtractorConfiguration.AreaSuffixList.ToDictionary(x => x.Key, x => Constants.AREA)) + .Concat(LengthExtractorConfiguration.LengthSuffixList.ToDictionary(x => x.Key, x => Constants.LENGTH)) + .Concat(SpeedExtractorConfiguration.SpeedSuffixList.ToDictionary(x => x.Key, x => Constants.SPEED)) + .Concat(VolumeExtractorConfiguration.VolumeSuffixList.ToDictionary(x => x.Key, x => Constants.VOLUME)) + .Concat(WeightExtractorConfiguration.WeightSuffixList.ToDictionary(x => x.Key, x => Constants.WEIGHT)) + .Concat(AngleExtractorConfiguration.AngleSuffixList.ToDictionary(x => x.Key, x => Constants.ANGLE)) .ToImmutableDictionary(x => x.Key, x => x.Value); - private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousDimensionUnitList.ToImmutableList(); + private static readonly ImmutableList AmbiguousValues = + NumbersWithUnitDefinitions.AmbiguousDimensionUnitList + .Concat(LengthExtractorConfiguration.AmbiguousValues) + .Concat(AreaExtractorConfiguration.AmbiguousValues) + .Concat(SpeedExtractorConfiguration.AmbiguousValues) + .Concat(AngleExtractorConfiguration.AmbiguousUnits) + .Concat(WeightExtractorConfiguration.AmbiguousUnits) + .Distinct() + .ToImmutableList(); public DimensionExtractorConfiguration() : base(new CultureInfo(Culture.Portuguese)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/LengthExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/LengthExtractorConfiguration.cs index bf2056dc97..98875cd1ed 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/LengthExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/LengthExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Portuguese; @@ -9,7 +12,7 @@ public class LengthExtractorConfiguration : PortugueseNumberWithUnitExtractorCon { public static readonly ImmutableDictionary LengthSuffixList = NumbersWithUnitDefinitions.LengthSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousLengthUnitList.ToImmutableList(); + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousLengthUnitList.ToImmutableList(); public LengthExtractorConfiguration() : base(new CultureInfo(Culture.Portuguese)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/PortugueseNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/PortugueseNumberWithUnitExtractorConfiguration.cs index 4ecfc68ef8..6491c95039 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/PortugueseNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/PortugueseNumberWithUnitExtractorConfiguration.cs @@ -1,58 +1,46 @@ -using System.Collections.Generic; -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Portuguese; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Portuguese; namespace Microsoft.Recognizers.Text.NumberWithUnit.Portuguese { - public abstract class PortugueseNumberWithUnitExtractorConfiguration : INumberWithUnitExtractorConfiguration + public abstract class PortugueseNumberWithUnitExtractorConfiguration : BaseNumberWithUnitExtractorConfiguration { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly Regex CompoundUnitConnRegex = - new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexFlags); - - private static readonly Regex NonUnitsRegex = - new Regex(BaseUnits.PmNonUnitRegex, RegexFlags); - protected PortugueseNumberWithUnitExtractorConfiguration(CultureInfo ci) + : base( + NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, + BaseUnits.PmNonUnitRegex, + NumbersWithUnitDefinitions.MultiplierRegex, + RegexFlags) { this.CultureInfo = ci; - this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); + + var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit); + this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig); + this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; - } - - public abstract string ExtractType { get; } - - public CultureInfo CultureInfo { get; } - - public IExtractor UnitNumExtractor { get; } - - public string BuildPrefix { get; } - public string BuildSuffix { get; } - - public string ConnectorToken { get; } - - public Regex CompoundUnitConnectorRegex => CompoundUnitConnRegex; - - public Regex NonUnitRegex => NonUnitsRegex; - - public virtual Regex AmbiguousUnitNumberMultiplierRegex => null; - - public Dictionary AmbiguityFiltersDict { get; } = null; - - public abstract ImmutableDictionary SuffixList { get; } - - public abstract ImmutableDictionary PrefixList { get; } + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); + TemperatureAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.TemperatureAmbiguityFiltersDict); + DimensionAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.DimensionAmbiguityFiltersDict); + } - public abstract ImmutableList AmbiguousUnitList { get; } + public override void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers) + { + } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/SpeedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/SpeedExtractorConfiguration.cs index c1579a4ddf..b7280cda8c 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/SpeedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/SpeedExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Portuguese; @@ -9,7 +12,7 @@ public class SpeedExtractorConfiguration : PortugueseNumberWithUnitExtractorConf { public static readonly ImmutableDictionary SpeedSuffixList = NumbersWithUnitDefinitions.SpeedSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousSpeedUnitList.ToImmutableList(); + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousSpeedUnitList.ToImmutableList(); public SpeedExtractorConfiguration() : base(new CultureInfo(Culture.Portuguese)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/TemperatureExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/TemperatureExtractorConfiguration.cs index 10fc4e26c6..cf99d2a377 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/TemperatureExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/TemperatureExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; @@ -15,7 +18,7 @@ public class TemperatureExtractorConfiguration : PortugueseNumberWithUnitExtract private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex AmbiguousUnitMultiplierRegex = - new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags); + new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags, RegexTimeOut); public TemperatureExtractorConfiguration() : this(new CultureInfo(Culture.Portuguese)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/VolumeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/VolumeExtractorConfiguration.cs index 56ba463562..88c6527f7d 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/VolumeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/VolumeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Portuguese; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/WeightExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/WeightExtractorConfiguration.cs index 9efc8d6753..6538198a50 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/WeightExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/WeightExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Portuguese; @@ -9,6 +12,9 @@ public class WeightExtractorConfiguration : PortugueseNumberWithUnitExtractorCon { public static readonly ImmutableDictionary WeightSuffixList = NumbersWithUnitDefinitions.WeightSuffixList.ToImmutableDictionary(); + public static readonly ImmutableList AmbiguousUnits = + NumbersWithUnitDefinitions.AmbiguousWeightUnitList.ToImmutableList(); + public WeightExtractorConfiguration() : this(new CultureInfo(Culture.Portuguese)) { @@ -23,7 +29,7 @@ public WeightExtractorConfiguration(CultureInfo ci) public override ImmutableDictionary PrefixList => null; - public override ImmutableList AmbiguousUnitList => null; + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; public override string ExtractType => Constants.SYS_UNIT_WEIGHT; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/AgeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/AgeParserConfiguration.cs index b741bc3a8d..885e7c65f9 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/AgeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/AgeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Portuguese { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/AreaParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/AreaParserConfiguration.cs index a0e45dca90..d1ff1cc1c7 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/AreaParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/AreaParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Portuguese { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/CurrencyParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/CurrencyParserConfiguration.cs index 76de0b3c9c..021e471873 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/CurrencyParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/CurrencyParserConfiguration.cs @@ -1,4 +1,9 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; +using Microsoft.Recognizers.Definitions.Portuguese; namespace Microsoft.Recognizers.Text.NumberWithUnit.Portuguese { @@ -14,6 +19,8 @@ public CurrencyParserConfiguration(CultureInfo ci) { this.BindDictionary(CurrencyExtractorConfiguration.CurrencySuffixList); this.BindDictionary(CurrencyExtractorConfiguration.CurrencyPrefixList); + this.CurrencyNameToIsoCodeMap = NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.ToImmutableDictionary(); + this.CurrencyFractionCodeList = NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary(); } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/DimensionParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/DimensionParserConfiguration.cs index ad1357f481..cf1006f011 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/DimensionParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/DimensionParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Portuguese { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/LengthParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/LengthParserConfiguration.cs index b940bfc288..5769e890a9 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/LengthParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/LengthParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Portuguese { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/PortugueseNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/PortugueseNumberWithUnitParserConfiguration.cs index fe3683fb4e..4d41d582ee 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/PortugueseNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/PortugueseNumberWithUnitParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; using Microsoft.Recognizers.Definitions.Portuguese; using Microsoft.Recognizers.Text.Number; @@ -11,10 +15,15 @@ public class PortugueseNumberWithUnitParserConfiguration : BaseNumberWithUnitPar public PortugueseNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { - this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new PortugueseNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + + var numConfig = new BaseNumberOptionsConfiguration(Culture.Portuguese, NumberOptions.None); + + this.InternalNumberExtractor = NumberExtractor.GetInstance(numConfig); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new PortugueseNumberParserConfiguration(numConfig)); this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; + + this.TypeList = DimensionExtractorConfiguration.DimensionTypeList; } public override IParser InternalNumberParser { get; } @@ -22,5 +31,7 @@ public PortugueseNumberWithUnitParserConfiguration(CultureInfo ci) public override IExtractor InternalNumberExtractor { get; } public override string ConnectorToken { get; } + + public override IDictionary TypeList { get; set; } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/SpeedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/SpeedParserConfiguration.cs index cb4e705675..57f0cb758e 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/SpeedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/SpeedParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Portuguese { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/TemperatureParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/TemperatureParserConfiguration.cs index 64fe6f7710..317fde887d 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/TemperatureParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/TemperatureParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Portuguese { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/VolumeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/VolumeParserConfiguration.cs index 321c19b845..1031727e3c 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/VolumeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/VolumeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Portuguese { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/WeightParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/WeightParserConfiguration.cs index 9359ced2e0..238076c3db 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/WeightParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/WeightParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Portuguese { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Properties/AssemblyInfo.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Properties/AssemblyInfo.cs index ef6b67494e..043c5d8754 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Properties/AssemblyInfo.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Properties/AssemblyInfo.cs @@ -1,4 +1,7 @@ -using System.Reflection; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Reflection; using System.Runtime.InteropServices; // General Information about an assembly is controlled through the following diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/AgeExtractorConfiguration.cs index abe8cd54e1..da49785b93 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/AgeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/AgeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Spanish; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/AngleExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/AngleExtractorConfiguration.cs new file mode 100644 index 0000000000..a761bea41f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/AngleExtractorConfiguration.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Spanish; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Spanish +{ + public class AngleExtractorConfiguration : SpanishNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary AngleSuffixList = + NumbersWithUnitDefinitions.AngleSuffixList.ToImmutableDictionary(); + + public static readonly ImmutableList AmbiguousUnits = + NumbersWithUnitDefinitions.AmbiguousAngleUnitList.ToImmutableList(); + + public AngleExtractorConfiguration() + : this(new CultureInfo(Culture.Spanish)) + { + } + + public AngleExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => AngleSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; + + public override string ExtractType => Constants.SYS_UNIT_ANGLE; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/AreaExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/AreaExtractorConfiguration.cs index f1ae09ce62..0ad7baad04 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/AreaExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/AreaExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Spanish; @@ -9,7 +12,7 @@ public class AreaExtractorConfiguration : SpanishNumberWithUnitExtractorConfigur { public static readonly ImmutableDictionary AreaSuffixList = NumbersWithUnitDefinitions.AreaSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AreaAmbiguousValues.ToImmutableList(); + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AreaAmbiguousValues.ToImmutableList(); public AreaExtractorConfiguration() : this(new CultureInfo(Culture.Spanish)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/CurrencyExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/CurrencyExtractorConfiguration.cs index f8d828ad83..f46d4b76f8 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/CurrencyExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/CurrencyExtractorConfiguration.cs @@ -1,5 +1,11 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; using System.Globalization; +using System.Linq; using Microsoft.Recognizers.Definitions.Spanish; @@ -9,7 +15,25 @@ public class CurrencyExtractorConfiguration : SpanishNumberWithUnitExtractorConf { public static readonly ImmutableDictionary CurrencySuffixList = NumbersWithUnitDefinitions.CurrencySuffixList.ToImmutableDictionary(); - public static readonly ImmutableDictionary CurrencyPrefixList = NumbersWithUnitDefinitions.CurrencyPrefixList.ToImmutableDictionary(); + // CurrencyNameToIsoCodeMap dictionary (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary IsoCodeDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture)); + + // CurrencyNameToIsoCodeMap followed by '$' symbol (e.g. 'AUD$') + public static readonly Dictionary IsoCodeWithSymbolDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture) + "$"); + + // Merge IsoCodeDict and IsoCodeWithSymbolDict + public static readonly Dictionary IsoCodeCombinedDict = IsoCodeDict.Concat(IsoCodeWithSymbolDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + // Merge IsoCodeCombinedDict with CurrencyPrefixList (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary CurrencyPrefixDict = NumbersWithUnitDefinitions.CurrencyPrefixList.Concat(IsoCodeCombinedDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + public static readonly ImmutableDictionary CurrencyPrefixList = CurrencyPrefixDict.ToImmutableDictionary(); private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousCurrencyUnitList.ToImmutableList(); diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/DimensionExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/DimensionExtractorConfiguration.cs index e2ad02e925..ac998c8d23 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/DimensionExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/DimensionExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Linq; using Microsoft.Recognizers.Definitions.Spanish; @@ -14,9 +17,27 @@ public class DimensionExtractorConfiguration : SpanishNumberWithUnitExtractorCon .Concat(SpeedExtractorConfiguration.SpeedSuffixList) .Concat(VolumeExtractorConfiguration.VolumeSuffixList) .Concat(WeightExtractorConfiguration.WeightSuffixList) + .Concat(AngleExtractorConfiguration.AngleSuffixList) .ToImmutableDictionary(x => x.Key, x => x.Value); - private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousDimensionUnitList.ToImmutableList(); + public static readonly ImmutableDictionary DimensionTypeList = + NumbersWithUnitDefinitions.InformationSuffixList.ToDictionary(x => x.Key, x => Constants.INFORMATION) + .Concat(AreaExtractorConfiguration.AreaSuffixList.ToDictionary(x => x.Key, x => Constants.AREA)) + .Concat(LengthExtractorConfiguration.LengthSuffixList.ToDictionary(x => x.Key, x => Constants.LENGTH)) + .Concat(SpeedExtractorConfiguration.SpeedSuffixList.ToDictionary(x => x.Key, x => Constants.SPEED)) + .Concat(VolumeExtractorConfiguration.VolumeSuffixList.ToDictionary(x => x.Key, x => Constants.VOLUME)) + .Concat(WeightExtractorConfiguration.WeightSuffixList.ToDictionary(x => x.Key, x => Constants.WEIGHT)) + .Concat(AngleExtractorConfiguration.AngleSuffixList.ToDictionary(x => x.Key, x => Constants.ANGLE)) + .ToImmutableDictionary(x => x.Key, x => x.Value); + + private static readonly ImmutableList AmbiguousValues = + NumbersWithUnitDefinitions.AmbiguousDimensionUnitList + .Concat(LengthExtractorConfiguration.AmbiguousValues) + .Concat(AreaExtractorConfiguration.AmbiguousValues) + .Concat(SpeedExtractorConfiguration.AmbiguousValues) + .Concat(AngleExtractorConfiguration.AmbiguousUnits) + .Distinct() + .ToImmutableList(); public DimensionExtractorConfiguration() : base(new CultureInfo(Culture.Spanish)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/LengthExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/LengthExtractorConfiguration.cs index 7573f663aa..e3b67dea59 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/LengthExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/LengthExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Spanish; @@ -9,7 +12,7 @@ public class LengthExtractorConfiguration : SpanishNumberWithUnitExtractorConfig { public static readonly ImmutableDictionary LengthSuffixList = NumbersWithUnitDefinitions.LengthSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousLengthUnitList.ToImmutableList(); + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousLengthUnitList.ToImmutableList(); public LengthExtractorConfiguration() : base(new CultureInfo(Culture.Spanish)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs index 9f27801d34..b76eab3a42 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs @@ -1,6 +1,9 @@ -using System.Collections.Generic; -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; @@ -11,50 +14,33 @@ namespace Microsoft.Recognizers.Text.NumberWithUnit.Spanish { - public abstract class SpanishNumberWithUnitExtractorConfiguration : INumberWithUnitExtractorConfiguration + public abstract class SpanishNumberWithUnitExtractorConfiguration : BaseNumberWithUnitExtractorConfiguration { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly Regex CompoundUnitConnRegex = - new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexFlags); - - private static readonly Regex NonUnitsRegex = - new Regex(BaseUnits.PmNonUnitRegex, RegexFlags); - protected SpanishNumberWithUnitExtractorConfiguration(CultureInfo ci) + : base( + NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, + BaseUnits.PmNonUnitRegex, + NumbersWithUnitDefinitions.MultiplierRegex, + RegexFlags) { this.CultureInfo = ci; - this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); + + var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit); + this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig); + this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; - } - - public abstract string ExtractType { get; } - - public CultureInfo CultureInfo { get; } - - public IExtractor UnitNumExtractor { get; } - - public string BuildPrefix { get; } - public string BuildSuffix { get; } - - public string ConnectorToken { get; } - - public Regex CompoundUnitConnectorRegex => CompoundUnitConnRegex; - - public Regex NonUnitRegex => NonUnitsRegex; - - public virtual Regex AmbiguousUnitNumberMultiplierRegex => null; - - public Dictionary AmbiguityFiltersDict { get; } = null; - - public abstract ImmutableDictionary SuffixList { get; } - - public abstract ImmutableDictionary PrefixList { get; } + TemperatureAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.TemperatureAmbiguityFiltersDict); + DimensionAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.DimensionAmbiguityFiltersDict); + } - public abstract ImmutableList AmbiguousUnitList { get; } + public override void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers) + { + } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpeedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpeedExtractorConfiguration.cs index 7b5a878b0b..e26aef01e8 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpeedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpeedExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Spanish; @@ -9,7 +12,7 @@ public class SpeedExtractorConfiguration : SpanishNumberWithUnitExtractorConfigu { public static readonly ImmutableDictionary SpeedSuffixList = NumbersWithUnitDefinitions.SpeedSuffixList.ToImmutableDictionary(); - private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousSpeedUnitList.ToImmutableList(); + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousSpeedUnitList.ToImmutableList(); public SpeedExtractorConfiguration() : base(new CultureInfo(Culture.Spanish)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/TemperatureExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/TemperatureExtractorConfiguration.cs index bacab63d41..d4d2e2e2d7 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/TemperatureExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/TemperatureExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; @@ -15,7 +18,7 @@ public class TemperatureExtractorConfiguration : SpanishNumberWithUnitExtractorC private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly Regex AmbiguousUnitMultiplierRegex = - new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags); + new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags, RegexTimeOut); public TemperatureExtractorConfiguration() : this(new CultureInfo(Culture.Spanish)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/VolumeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/VolumeExtractorConfiguration.cs index f5a9efb389..e886d5aa5e 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/VolumeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/VolumeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Spanish; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/WeightExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/WeightExtractorConfiguration.cs index c941f77188..2dafea9b70 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/WeightExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/WeightExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Spanish; @@ -9,6 +12,8 @@ public class WeightExtractorConfiguration : SpanishNumberWithUnitExtractorConfig { public static readonly ImmutableDictionary WeightSuffixList = NumbersWithUnitDefinitions.WeightSuffixList.ToImmutableDictionary(); + public static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousWeightUnitList.ToImmutableList(); + public WeightExtractorConfiguration() : this(new CultureInfo(Culture.Spanish)) { @@ -23,7 +28,7 @@ public WeightExtractorConfiguration(CultureInfo ci) public override ImmutableDictionary PrefixList => null; - public override ImmutableList AmbiguousUnitList => null; + public override ImmutableList AmbiguousUnitList => AmbiguousValues; public override string ExtractType => Constants.SYS_UNIT_WEIGHT; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/AgeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/AgeParserConfiguration.cs index 6246173e0e..ae74fee138 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/AgeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/AgeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Spanish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/AreaParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/AreaParserConfiguration.cs index 68ac9b5d2d..d8e237f367 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/AreaParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/AreaParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Spanish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/CurrencyParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/CurrencyParserConfiguration.cs index 16991464f9..90d3a62498 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/CurrencyParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/CurrencyParserConfiguration.cs @@ -1,4 +1,9 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; +using Microsoft.Recognizers.Definitions.Spanish; namespace Microsoft.Recognizers.Text.NumberWithUnit.Spanish { @@ -14,6 +19,8 @@ public CurrencyParserConfiguration(CultureInfo ci) { this.BindDictionary(CurrencyExtractorConfiguration.CurrencySuffixList); this.BindDictionary(CurrencyExtractorConfiguration.CurrencyPrefixList); + this.CurrencyNameToIsoCodeMap = NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.ToImmutableDictionary(); + this.CurrencyFractionCodeList = NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary(); } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/DimensionParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/DimensionParserConfiguration.cs index e2d67eaba6..798ccfbef9 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/DimensionParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/DimensionParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Spanish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/LengthParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/LengthParserConfiguration.cs index b6e5650e6f..f0828fffad 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/LengthParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/LengthParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Spanish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/SpanishNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/SpanishNumberWithUnitParserConfiguration.cs index cbda06e203..5e79fb8a0f 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/SpanishNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/SpanishNumberWithUnitParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; using Microsoft.Recognizers.Definitions.Spanish; using Microsoft.Recognizers.Text.Number; @@ -11,10 +15,15 @@ public class SpanishNumberWithUnitParserConfiguration : BaseNumberWithUnitParser public SpanishNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { - this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new SpanishNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + + var numConfig = new BaseNumberOptionsConfiguration(Culture.Spanish, NumberOptions.None); + + this.InternalNumberExtractor = NumberExtractor.GetInstance(numConfig); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new SpanishNumberParserConfiguration(numConfig)); this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; + + this.TypeList = DimensionExtractorConfiguration.DimensionTypeList; } public override IParser InternalNumberParser { get; } @@ -22,5 +31,7 @@ public SpanishNumberWithUnitParserConfiguration(CultureInfo ci) public override IExtractor InternalNumberExtractor { get; } public override string ConnectorToken { get; } + + public override IDictionary TypeList { get; set; } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/SpeedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/SpeedParserConfiguration.cs index b8105106ed..4d0f307b12 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/SpeedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/SpeedParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Spanish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/TemperatureParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/TemperatureParserConfiguration.cs index a13cce2cd3..67038012d5 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/TemperatureParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/TemperatureParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Spanish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/VolumeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/VolumeParserConfiguration.cs index fdd4a328ed..4186271853 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/VolumeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/VolumeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Spanish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/WeightParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/WeightParserConfiguration.cs index a8f774c4a4..0e8e6825e5 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/WeightParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/WeightParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Spanish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/AgeExtractorConfiguration.cs new file mode 100644 index 0000000000..2ad44de47c --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/AgeExtractorConfiguration.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class AgeExtractorConfiguration : SwedishNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary AgeSuffixList = NumbersWithUnitDefinitions.AgeSuffixList.ToImmutableDictionary(); + + public AgeExtractorConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public AgeExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => AgeSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => null; + + public override string ExtractType => Constants.SYS_UNIT_AGE; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/AngleExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/AngleExtractorConfiguration.cs new file mode 100644 index 0000000000..6481d45831 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/AngleExtractorConfiguration.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class AngleExtractorConfiguration : SwedishNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary AngleSuffixList = + NumbersWithUnitDefinitions.AngleSuffixList.ToImmutableDictionary(); + + public static readonly ImmutableList AmbiguousUnits = + NumbersWithUnitDefinitions.AmbiguousAngleUnitList.ToImmutableList(); + + public AngleExtractorConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public AngleExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => AngleSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => AmbiguousUnits; + + public override string ExtractType => Constants.SYS_UNIT_ANGLE; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/AreaExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/AreaExtractorConfiguration.cs new file mode 100644 index 0000000000..e7740d1271 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/AreaExtractorConfiguration.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class AreaExtractorConfiguration : SwedishNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary AreaSuffixList = NumbersWithUnitDefinitions.AreaSuffixList.ToImmutableDictionary(); + + public AreaExtractorConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public AreaExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => AreaSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => null; + + public override string ExtractType => Constants.SYS_UNIT_AREA; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/CurrencyExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/CurrencyExtractorConfiguration.cs new file mode 100644 index 0000000000..1f60565c16 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/CurrencyExtractorConfiguration.cs @@ -0,0 +1,63 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Globalization; +using System.Linq; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class CurrencyExtractorConfiguration : SwedishNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary CurrencySuffixList = + NumbersWithUnitDefinitions.CurrencySuffixList.ToImmutableDictionary(); + + // CurrencyNameToIsoCodeMap dictionary (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary IsoCodeDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture)); + + // CurrencyNameToIsoCodeMap followed by '$' symbol (e.g. 'AUD$') + public static readonly Dictionary IsoCodeWithSymbolDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture) + "$"); + + // Merge IsoCodeDict and IsoCodeWithSymbolDict + public static readonly Dictionary IsoCodeCombinedDict = IsoCodeDict.Concat(IsoCodeWithSymbolDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + // Merge IsoCodeCombinedDict with CurrencyPrefixList (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary CurrencyPrefixDict = NumbersWithUnitDefinitions.CurrencyPrefixList.Concat(IsoCodeCombinedDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + public static readonly ImmutableDictionary CurrencyPrefixList = CurrencyPrefixDict.ToImmutableDictionary(); + + public static readonly ImmutableDictionary FractionalUnitNameToCodeMap = + NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary(); + + private static readonly ImmutableList AmbiguousValues = + NumbersWithUnitDefinitions.AmbiguousCurrencyUnitList.ToImmutableList(); + + public CurrencyExtractorConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public CurrencyExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => CurrencySuffixList; + + public override ImmutableDictionary PrefixList => CurrencyPrefixList; + + public override ImmutableList AmbiguousUnitList => AmbiguousValues; + + public override string ExtractType => Constants.SYS_UNIT_CURRENCY; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/DimensionExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/DimensionExtractorConfiguration.cs new file mode 100644 index 0000000000..9ad444b707 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/DimensionExtractorConfiguration.cs @@ -0,0 +1,61 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; +using System.Linq; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class DimensionExtractorConfiguration : SwedishNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary DimensionSuffixList = + NumbersWithUnitDefinitions.InformationSuffixList + .Concat(AreaExtractorConfiguration.AreaSuffixList) + .Concat(LengthExtractorConfiguration.LengthSuffixList) + .Concat(SpeedExtractorConfiguration.SpeedSuffixList) + .Concat(VolumeExtractorConfiguration.VolumeSuffixList) + .Concat(WeightExtractorConfiguration.WeightSuffixList) + .Concat(AngleExtractorConfiguration.AngleSuffixList) + .ToImmutableDictionary(x => x.Key, x => x.Value); + + public static readonly ImmutableDictionary DimensionTypeList = + NumbersWithUnitDefinitions.InformationSuffixList.ToDictionary(x => x.Key, x => Constants.INFORMATION) + .Concat(AreaExtractorConfiguration.AreaSuffixList.ToDictionary(x => x.Key, x => Constants.AREA)) + .Concat(LengthExtractorConfiguration.LengthSuffixList.ToDictionary(x => x.Key, x => Constants.LENGTH)) + .Concat(SpeedExtractorConfiguration.SpeedSuffixList.ToDictionary(x => x.Key, x => Constants.SPEED)) + .Concat(VolumeExtractorConfiguration.VolumeSuffixList.ToDictionary(x => x.Key, x => Constants.VOLUME)) + .Concat(WeightExtractorConfiguration.WeightSuffixList.ToDictionary(x => x.Key, x => Constants.WEIGHT)) + .Concat(AngleExtractorConfiguration.AngleSuffixList.ToDictionary(x => x.Key, x => Constants.ANGLE)) + .ToImmutableDictionary(x => x.Key, x => x.Value); + + private static readonly ImmutableList AmbiguousValues = + NumbersWithUnitDefinitions.AmbiguousDimensionUnitList + .Concat(LengthExtractorConfiguration.AmbiguousValues) + .Concat(VolumeExtractorConfiguration.AmbiguousValues) + .Concat(WeightExtractorConfiguration.AmbiguousValues) + .Concat(AngleExtractorConfiguration.AmbiguousUnits) + .Distinct() + .ToImmutableList(); + + public DimensionExtractorConfiguration() + : base(new CultureInfo(Culture.Swedish)) + { + } + + public DimensionExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => DimensionSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => AmbiguousValues; + + public override string ExtractType => Constants.SYS_UNIT_DIMENSION; + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/LengthExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/LengthExtractorConfiguration.cs new file mode 100644 index 0000000000..884f15c38c --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/LengthExtractorConfiguration.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class LengthExtractorConfiguration : SwedishNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary LengthSuffixList = + NumbersWithUnitDefinitions.LengthSuffixList.ToImmutableDictionary(); + + public static readonly ImmutableList AmbiguousValues = + NumbersWithUnitDefinitions.AmbiguousLengthUnitList.ToImmutableList(); + + public LengthExtractorConfiguration() + : base(new CultureInfo(Culture.Swedish)) + { + } + + public LengthExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => LengthSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => AmbiguousValues; + + public override string ExtractType => Constants.SYS_UNIT_LENGTH; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/SpeedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/SpeedExtractorConfiguration.cs new file mode 100644 index 0000000000..7adaf76872 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/SpeedExtractorConfiguration.cs @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class SpeedExtractorConfiguration : SwedishNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary SpeedSuffixList = + NumbersWithUnitDefinitions.SpeedSuffixList.ToImmutableDictionary(); + + public SpeedExtractorConfiguration() + : base(new CultureInfo(Culture.Swedish)) + { + } + + public SpeedExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => SpeedSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => null; + + public override string ExtractType => Constants.SYS_UNIT_SPEED; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/SwedishNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/SwedishNumberWithUnitExtractorConfiguration.cs new file mode 100644 index 0000000000..5af0d693fc --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/SwedishNumberWithUnitExtractorConfiguration.cs @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Swedish; +using Microsoft.Recognizers.Definitions.Utilities; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Swedish; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public abstract class SwedishNumberWithUnitExtractorConfiguration : BaseNumberWithUnitExtractorConfiguration + { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + protected SwedishNumberWithUnitExtractorConfiguration(CultureInfo ci) + : base( + NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, + BaseUnits.PmNonUnitRegex, + string.Empty, + RegexFlags) + { + this.CultureInfo = ci; + + var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit); + this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig); + + this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; + this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; + this.ConnectorToken = string.Empty; + + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); + TemperatureAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.TemperatureAmbiguityFiltersDict); + DimensionAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.DimensionAmbiguityFiltersDict); + } + + public override void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers) + { + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/TemperatureExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/TemperatureExtractorConfiguration.cs new file mode 100644 index 0000000000..eaeead0682 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/TemperatureExtractorConfiguration.cs @@ -0,0 +1,47 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class TemperatureExtractorConfiguration : SwedishNumberWithUnitExtractorConfiguration + { + + public static readonly ImmutableDictionary TemperatureSuffixList = + NumbersWithUnitDefinitions.TemperatureSuffixList.ToImmutableDictionary(); + + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + + private static readonly ImmutableList AmbiguousValues = + NumbersWithUnitDefinitions.AmbiguousTemperatureUnitList.ToImmutableList(); + + private static readonly Regex AmbiguousUnitMultiplierRegex = + new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexFlags, RegexTimeOut); + + public TemperatureExtractorConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public TemperatureExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => TemperatureSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => AmbiguousValues; + + public override string ExtractType => Constants.SYS_UNIT_TEMPERATURE; + + public override Regex AmbiguousUnitNumberMultiplierRegex => AmbiguousUnitMultiplierRegex; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/VolumeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/VolumeExtractorConfiguration.cs new file mode 100644 index 0000000000..c0a9a4f4d3 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/VolumeExtractorConfiguration.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class VolumeExtractorConfiguration : SwedishNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary VolumeSuffixList = + NumbersWithUnitDefinitions.VolumeSuffixList.ToImmutableDictionary(); + + public static readonly ImmutableList AmbiguousValues = + NumbersWithUnitDefinitions.AmbiguousVolumeUnitList.ToImmutableList(); + + public VolumeExtractorConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public VolumeExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => VolumeSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => AmbiguousValues; + + public override string ExtractType => Constants.SYS_UNIT_VOLUME; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/WeightExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/WeightExtractorConfiguration.cs new file mode 100644 index 0000000000..400e276b57 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Extractors/WeightExtractorConfiguration.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class WeightExtractorConfiguration : SwedishNumberWithUnitExtractorConfiguration + { + public static readonly ImmutableDictionary WeightSuffixList = + NumbersWithUnitDefinitions.WeightSuffixList.ToImmutableDictionary(); + + public static readonly ImmutableList AmbiguousValues = + NumbersWithUnitDefinitions.AmbiguousWeightUnitList.ToImmutableList(); + + public WeightExtractorConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public WeightExtractorConfiguration(CultureInfo ci) + : base(ci) + { + } + + public override ImmutableDictionary SuffixList => WeightSuffixList; + + public override ImmutableDictionary PrefixList => null; + + public override ImmutableList AmbiguousUnitList => AmbiguousValues; + + public override string ExtractType => Constants.SYS_UNIT_WEIGHT; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/AgeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/AgeParserConfiguration.cs new file mode 100644 index 0000000000..0e8a11ea25 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/AgeParserConfiguration.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class AgeParserConfiguration : SwedishNumberWithUnitParserConfiguration + { + public AgeParserConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public AgeParserConfiguration(CultureInfo ci) + : base(ci) + { + this.BindDictionary(AgeExtractorConfiguration.AgeSuffixList); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/AreaParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/AreaParserConfiguration.cs new file mode 100644 index 0000000000..d02db2b3fe --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/AreaParserConfiguration.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class AreaParserConfiguration : SwedishNumberWithUnitParserConfiguration + { + public AreaParserConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public AreaParserConfiguration(CultureInfo ci) + : base(ci) + { + this.BindDictionary(AreaExtractorConfiguration.AreaSuffixList); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/CurrencyParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/CurrencyParserConfiguration.cs new file mode 100644 index 0000000000..416db720c1 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/CurrencyParserConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; +using System.Globalization; +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class CurrencyParserConfiguration : SwedishNumberWithUnitParserConfiguration + { + public CurrencyParserConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public CurrencyParserConfiguration(CultureInfo ci) + : base(ci) + { + this.BindDictionary(CurrencyExtractorConfiguration.CurrencySuffixList); + this.BindDictionary(CurrencyExtractorConfiguration.CurrencyPrefixList); + this.CurrencyNameToIsoCodeMap = NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.ToImmutableDictionary(); + this.CurrencyFractionCodeList = NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary(); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/DimensionParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/DimensionParserConfiguration.cs new file mode 100644 index 0000000000..2b7beceb2a --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/DimensionParserConfiguration.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class DimensionParserConfiguration : SwedishNumberWithUnitParserConfiguration + { + public DimensionParserConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public DimensionParserConfiguration(CultureInfo ci) + : base(ci) + { + this.BindDictionary(DimensionExtractorConfiguration.DimensionSuffixList); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/LengthParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/LengthParserConfiguration.cs new file mode 100644 index 0000000000..475fe677a7 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/LengthParserConfiguration.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class LengthParserConfiguration : SwedishNumberWithUnitParserConfiguration + { + public LengthParserConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public LengthParserConfiguration(CultureInfo ci) + : base(ci) + { + this.BindDictionary(LengthExtractorConfiguration.LengthSuffixList); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/SpeedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/SpeedParserConfiguration.cs new file mode 100644 index 0000000000..f1524738c0 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/SpeedParserConfiguration.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class SpeedParserConfiguration : SwedishNumberWithUnitParserConfiguration + { + public SpeedParserConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public SpeedParserConfiguration(CultureInfo ci) + : base(ci) + { + this.BindDictionary(SpeedExtractorConfiguration.SpeedSuffixList); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/SwedishNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/SwedishNumberWithUnitParserConfiguration.cs new file mode 100644 index 0000000000..f6cb1316fc --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/SwedishNumberWithUnitParserConfiguration.cs @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; +using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Text.Number.Swedish; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class SwedishNumberWithUnitParserConfiguration : BaseNumberWithUnitParserConfiguration + { + public SwedishNumberWithUnitParserConfiguration(CultureInfo ci) + : base(ci) + { + + var config = new BaseNumberOptionsConfiguration(Culture.Swedish, NumberOptions.None); + + this.InternalNumberExtractor = NumberExtractor.GetInstance(config); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new SwedishNumberParserConfiguration(config)); + this.ConnectorToken = string.Empty; + + this.TypeList = DimensionExtractorConfiguration.DimensionTypeList; + } + + public override IParser InternalNumberParser { get; } + + public override IExtractor InternalNumberExtractor { get; } + + public override string ConnectorToken { get; } + + public override IDictionary TypeList { get; set; } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/TemperatureParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/TemperatureParserConfiguration.cs new file mode 100644 index 0000000000..00880ebf3f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/TemperatureParserConfiguration.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class TemperatureParserConfiguration : SwedishNumberWithUnitParserConfiguration + { + public TemperatureParserConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public TemperatureParserConfiguration(CultureInfo ci) + : base(ci) + { + this.BindDictionary(TemperatureExtractorConfiguration.TemperatureSuffixList); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/VolumeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/VolumeParserConfiguration.cs new file mode 100644 index 0000000000..8a8567369a --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/VolumeParserConfiguration.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class VolumeParserConfiguration : SwedishNumberWithUnitParserConfiguration + { + public VolumeParserConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public VolumeParserConfiguration(CultureInfo ci) + : base(ci) + { + this.BindDictionary(VolumeExtractorConfiguration.VolumeSuffixList); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/WeightParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/WeightParserConfiguration.cs new file mode 100644 index 0000000000..de16c8db07 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Swedish/Parsers/WeightParserConfiguration.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Swedish +{ + public class WeightParserConfiguration : SwedishNumberWithUnitParserConfiguration + { + public WeightParserConfiguration() + : this(new CultureInfo(Culture.Swedish)) + { + } + + public WeightParserConfiguration(CultureInfo ci) + : base(ci) + { + this.BindDictionary(WeightExtractorConfiguration.WeightSuffixList); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/AgeExtractorConfiguration.cs index 19c37c7ea5..f8bed28dd4 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/AgeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/AgeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Turkish; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/AreaExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/AreaExtractorConfiguration.cs index 3d3f3f492e..809495f771 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/AreaExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/AreaExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Turkish; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/CurrencyExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/CurrencyExtractorConfiguration.cs index eef5df861d..13a0e90c52 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/CurrencyExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/CurrencyExtractorConfiguration.cs @@ -1,5 +1,11 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; using System.Globalization; +using System.Linq; using Microsoft.Recognizers.Definitions.Turkish; @@ -9,7 +15,25 @@ public class CurrencyExtractorConfiguration : TurkishNumberWithUnitExtractorConf { public static readonly ImmutableDictionary CurrencySuffixList = NumbersWithUnitDefinitions.CurrencySuffixList.ToImmutableDictionary(); - public static readonly ImmutableDictionary CurrencyPrefixList = NumbersWithUnitDefinitions.CurrencyPrefixList.ToImmutableDictionary(); + // CurrencyNameToIsoCodeMap dictionary (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary IsoCodeDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture)); + + // CurrencyNameToIsoCodeMap followed by '$' symbol (e.g. 'AUD$') + public static readonly Dictionary IsoCodeWithSymbolDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture) + "$"); + + // Merge IsoCodeDict and IsoCodeWithSymbolDict + public static readonly Dictionary IsoCodeCombinedDict = IsoCodeDict.Concat(IsoCodeWithSymbolDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + // Merge IsoCodeCombinedDict with CurrencyPrefixList (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary CurrencyPrefixDict = NumbersWithUnitDefinitions.CurrencyPrefixList.Concat(IsoCodeCombinedDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + public static readonly ImmutableDictionary CurrencyPrefixList = CurrencyPrefixDict.ToImmutableDictionary(); public static readonly ImmutableDictionary FractionalUnitNameToCodeMap = NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary(); diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/DimensionExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/DimensionExtractorConfiguration.cs index 98259b806a..2bc1a86437 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/DimensionExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/DimensionExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Linq; @@ -16,6 +19,15 @@ public class DimensionExtractorConfiguration : TurkishNumberWithUnitExtractorCon .Concat(WeightExtractorConfiguration.WeightSuffixList) .ToImmutableDictionary(x => x.Key, x => x.Value); + public static readonly ImmutableDictionary DimensionTypeList = + NumbersWithUnitDefinitions.InformationSuffixList.ToDictionary(x => x.Key, x => Constants.INFORMATION) + .Concat(AreaExtractorConfiguration.AreaSuffixList.ToDictionary(x => x.Key, x => Constants.AREA)) + .Concat(LengthExtractorConfiguration.LengthSuffixList.ToDictionary(x => x.Key, x => Constants.LENGTH)) + .Concat(SpeedExtractorConfiguration.SpeedSuffixList.ToDictionary(x => x.Key, x => Constants.SPEED)) + .Concat(VolumeExtractorConfiguration.VolumeSuffixList.ToDictionary(x => x.Key, x => Constants.VOLUME)) + .Concat(WeightExtractorConfiguration.WeightSuffixList.ToDictionary(x => x.Key, x => Constants.WEIGHT)) + .ToImmutableDictionary(x => x.Key, x => x.Value); + private static readonly ImmutableList AmbiguousValues = NumbersWithUnitDefinitions.AmbiguousDimensionUnitList.ToImmutableList(); public DimensionExtractorConfiguration() diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/LengthExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/LengthExtractorConfiguration.cs index f43c77a4c9..57374a13ab 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/LengthExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/LengthExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Turkish; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/SpeedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/SpeedExtractorConfiguration.cs index d55243e7f4..9f1ea86f74 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/SpeedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/SpeedExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Turkish; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/TemperatureExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/TemperatureExtractorConfiguration.cs index ff671d8009..b293758afe 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/TemperatureExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/TemperatureExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; @@ -16,7 +19,7 @@ public class TemperatureExtractorConfiguration : TurkishNumberWithUnitExtractorC NumbersWithUnitDefinitions.AmbiguousTemperatureUnitList.ToImmutableList(); private static readonly Regex AmbiguousUnitMultiplierRegex = - new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexOptions.None); + new Regex(BaseUnits.AmbiguousUnitNumberMultiplierRegex, RegexOptions.None, RegexTimeOut); public TemperatureExtractorConfiguration() : this(new CultureInfo(Culture.Turkish)) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/TurkishNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/TurkishNumberWithUnitExtractorConfiguration.cs index 66189a1724..ffe49bdb31 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/TurkishNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/TurkishNumberWithUnitExtractorConfiguration.cs @@ -1,56 +1,39 @@ -using System.Collections.Generic; -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Globalization; +using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Turkish; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Turkish; namespace Microsoft.Recognizers.Text.NumberWithUnit.Turkish { - public abstract class TurkishNumberWithUnitExtractorConfiguration : INumberWithUnitExtractorConfiguration + public abstract class TurkishNumberWithUnitExtractorConfiguration : BaseNumberWithUnitExtractorConfiguration { - private static readonly Regex CompoundUnitConnRegex = - new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.None); - - private static readonly Regex NonUnitsRegex = - new Regex(BaseUnits.PmNonUnitRegex, RegexOptions.None); - protected TurkishNumberWithUnitExtractorConfiguration(CultureInfo ci) + : base( + NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, + BaseUnits.PmNonUnitRegex, + string.Empty, + RegexOptions.None) { this.CultureInfo = ci; this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = string.Empty; - } - - public abstract string ExtractType { get; } - - public CultureInfo CultureInfo { get; } - - public IExtractor UnitNumExtractor { get; } - - public string BuildPrefix { get; } - - public string BuildSuffix { get; } - public string ConnectorToken { get; } - - public Regex CompoundUnitConnectorRegex => CompoundUnitConnRegex; - - public Regex NonUnitRegex => NonUnitsRegex; - - public virtual Regex AmbiguousUnitNumberMultiplierRegex => null; - - public Dictionary AmbiguityFiltersDict { get; } = null; - - public abstract ImmutableDictionary SuffixList { get; } - - public abstract ImmutableDictionary PrefixList { get; } + AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); + } - public abstract ImmutableList AmbiguousUnitList { get; } + public override void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers) + { + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/VolumeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/VolumeExtractorConfiguration.cs index a1901dba2f..1719412b12 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/VolumeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/VolumeExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Turkish; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/WeightExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/WeightExtractorConfiguration.cs index 0b8876c891..f5006b542f 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/WeightExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Extractors/WeightExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Turkish; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/AgeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/AgeParserConfiguration.cs index f37cfe0f6b..b16e2afa80 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/AgeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/AgeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Turkish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/AreaParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/AreaParserConfiguration.cs index b7fa4bf2f0..20221ae7ef 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/AreaParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/AreaParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Turkish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/CurrencyParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/CurrencyParserConfiguration.cs index 4bc61dcc43..361b95d2ea 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/CurrencyParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/CurrencyParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Collections.Immutable; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Immutable; using System.Globalization; using Microsoft.Recognizers.Definitions.Turkish; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/DimensionParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/DimensionParserConfiguration.cs index b8b0252e57..a9e3788a99 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/DimensionParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/DimensionParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Turkish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/LengthParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/LengthParserConfiguration.cs index d1d5ee337f..d77d592b0f 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/LengthParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/LengthParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Turkish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/SpeedParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/SpeedParserConfiguration.cs index bf0c905638..02ed68f49a 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/SpeedParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/SpeedParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Turkish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/TemperatureParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/TemperatureParserConfiguration.cs index 5c58a8eff8..5cbad9f76b 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/TemperatureParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/TemperatureParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Turkish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/TurkishNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/TurkishNumberWithUnitParserConfiguration.cs index 527568adf7..94bc9b94c7 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/TurkishNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/TurkishNumberWithUnitParserConfiguration.cs @@ -1,4 +1,8 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Globalization; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Turkish; @@ -9,10 +13,15 @@ public class TurkishNumberWithUnitParserConfiguration : BaseNumberWithUnitParser public TurkishNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { + + var numConfig = new BaseNumberOptionsConfiguration(Culture.Turkish, NumberOptions.None); + this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new TurkishNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new TurkishNumberParserConfiguration(numConfig)); this.ConnectorToken = string.Empty; + + this.TypeList = DimensionExtractorConfiguration.DimensionTypeList; } public override IParser InternalNumberParser { get; } @@ -20,5 +29,7 @@ public TurkishNumberWithUnitParserConfiguration(CultureInfo ci) public override IExtractor InternalNumberExtractor { get; } public override string ConnectorToken { get; } + + public override IDictionary TypeList { get; set; } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/VolumeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/VolumeParserConfiguration.cs index b183809e24..5de9ffdb09 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/VolumeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/VolumeParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Turkish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/WeightParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/WeightParserConfiguration.cs index 845cc41558..205e369f55 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/WeightParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/WeightParserConfiguration.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; namespace Microsoft.Recognizers.Text.NumberWithUnit.Turkish { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Utilities/CommonUtils.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Utilities/CommonUtils.cs new file mode 100644 index 0000000000..daae702af0 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Utilities/CommonUtils.cs @@ -0,0 +1,67 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.NumberWithUnit.Utilities +{ + public static class CommonUtils + { + // Expand patterns with 'half' suffix in CJK implementation. + public static void ExpandHalfSuffix(string source, ref List result, IOrderedEnumerable numbers, Regex halfUnitRegex) + { + if (halfUnitRegex != null && numbers != null) + { + var match = new List(); + foreach (var number in numbers) + { + if (halfUnitRegex.Matches(number.Text).Count == 1) + { + match.Add(number); + } + + } + + if (match.Count > 0) + { + var res = new List(); + foreach (var er in result) + { + int start = (int)er.Start; + int length = (int)er.Length; + var match_suffix = new List(); + foreach (var mr in match) + { + // Take into account possible whitespaces between result and half unit. + var subLength = (int)mr.Start - (start + length) >= 0 ? (int)mr.Start - (start + length) : 0; + var midStr = source.Substring(start + length, subLength); + if (string.IsNullOrWhiteSpace(midStr) && (int)mr.Start - (start + length) >= 0) + { + match_suffix.Add(mr); + } + } + + if (match_suffix.Count == 1) + { + var mr = match_suffix[0]; + var suffixLength = (int)(mr.Start + mr.Length) - (start + length); + er.Length += suffixLength; + er.Text += source.Substring(start + length, suffixLength); + var tmp = new List(); + tmp.Add((ExtractResult)er.Data); + tmp.Add(mr); + er.Data = tmp; + } + + res.Add(er); + } + + result = res; + } + } + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Utilities/DictionaryUtils.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Utilities/DictionaryUtils.cs index 0875d73d35..7f7dd8ea7a 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Utilities/DictionaryUtils.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Utilities/DictionaryUtils.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; namespace Microsoft.Recognizers.Text.NumberWithUnit.Utilities @@ -32,14 +35,21 @@ public static void BindUnitsString(IDictionary sourceDictionary, foreach (var token in values) { - if (string.IsNullOrWhiteSpace(token) || (sourceDictionary.ContainsKey(token) && sourceDictionary[token].Equals(key))) + if (string.IsNullOrWhiteSpace(token) || (sourceDictionary.ContainsKey(token) && sourceDictionary[token].Equals(key, StringComparison.Ordinal))) { continue; } // This segment of code is going to break if there're duplicated key-values in the resource files. // Those duplicates should be fixed before committing. - sourceDictionary.Add(token, key); + try + { + sourceDictionary.Add(token, key); + } + catch (ArgumentException ae) + { + throw new ArgumentException(ae.Message + ": " + token); + } } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Utilities/StringComparer.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Utilities/StringComparer.cs index ace0fd300e..3e1f73486b 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Utilities/StringComparer.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Utilities/StringComparer.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; namespace Microsoft.Recognizers.Text.NumberWithUnit diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChineseIpExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChineseIpExtractorConfiguration.cs index 1127d7a866..815bf1af4f 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChineseIpExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChineseIpExtractorConfiguration.cs @@ -1,5 +1,7 @@ -using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; namespace Microsoft.Recognizers.Text.Sequence.Chinese @@ -9,8 +11,8 @@ public class ChineseIpExtractorConfiguration : IpConfiguration public ChineseIpExtractorConfiguration(SequenceOptions options) : base(options) { - Ipv4Regex = new Regex(IpDefinitions.Ipv4Regex, RegexOptions.Compiled); - Ipv6Regex = new Regex(IpDefinitions.Ipv6Regex, RegexOptions.Compiled); + Ipv4Regex = new Regex(IpDefinitions.Ipv4Regex, RegexOptions.Compiled, RegexTimeOut); + Ipv6Regex = new Regex(IpDefinitions.Ipv6Regex, RegexOptions.Compiled, RegexTimeOut); } } } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChinesePhoneNumberExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChinesePhoneNumberExtractorConfiguration.cs index 1e1aea7de4..901d0510c7 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChinesePhoneNumberExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChinesePhoneNumberExtractorConfiguration.cs @@ -1,6 +1,11 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Chinese; +using Microsoft.Recognizers.Definitions.Utilities; namespace Microsoft.Recognizers.Text.Sequence.Chinese { @@ -12,8 +17,9 @@ public ChinesePhoneNumberExtractorConfiguration(SequenceOptions options) WordBoundariesRegex = PhoneNumbersDefinitions.WordBoundariesRegex; NonWordBoundariesRegex = PhoneNumbersDefinitions.NonWordBoundariesRegex; EndWordBoundariesRegex = PhoneNumbersDefinitions.EndWordBoundariesRegex; - ColonPrefixCheckRegex = new Regex(PhoneNumbersDefinitions.ColonPrefixCheckRegex); + ColonPrefixCheckRegex = new Regex(PhoneNumbersDefinitions.ColonPrefixCheckRegex, RegexOptions.None, RegexTimeOut); ForbiddenPrefixMarkers = (List)PhoneNumbersDefinitions.ForbiddenPrefixMarkers; + AmbiguityFiltersDict = AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(BasePhoneNumbers.AmbiguityFiltersDict); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChineseQuotedTextExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChineseQuotedTextExtractorConfiguration.cs new file mode 100644 index 0000000000..afc18c12f8 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChineseQuotedTextExtractorConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Chinese; + +namespace Microsoft.Recognizers.Text.Sequence.Chinese +{ + public class ChineseQuotedTextExtractorConfiguration : QuotedTextConfiguration + { + public ChineseQuotedTextExtractorConfiguration(SequenceOptions options) + : base(options) + { + QuotedTextRegex1 = new Regex(QuotedTextDefinitions.QuotedTextRegex1, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex2 = new Regex(QuotedTextDefinitions.QuotedTextRegex2, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex3 = new Regex(QuotedTextDefinitions.QuotedTextRegex3, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex4 = new Regex(QuotedTextDefinitions.QuotedTextRegex4, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex5 = new Regex(QuotedTextDefinitions.QuotedTextRegex5, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex6 = new Regex(QuotedTextDefinitions.QuotedTextRegex6, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex7 = new Regex(QuotedTextDefinitions.QuotedTextRegex7, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex8 = new Regex(QuotedTextDefinitions.QuotedTextRegex8, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex9 = new Regex(QuotedTextDefinitions.QuotedTextRegex9, RegexOptions.Compiled, RegexTimeOut); + + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChineseURLExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChineseURLExtractorConfiguration.cs index 9c3f8933c3..b4a6def724 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChineseURLExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChineseURLExtractorConfiguration.cs @@ -1,5 +1,7 @@ -using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; namespace Microsoft.Recognizers.Text.Sequence.Chinese @@ -9,8 +11,8 @@ public class ChineseURLExtractorConfiguration : URLConfiguration public ChineseURLExtractorConfiguration(SequenceOptions options) : base(options) { - UrlRegex = new Regex(URLDefinitions.UrlRegex, RegexOptions.Compiled); - IpUrlRegex = new Regex(URLDefinitions.IpUrlRegex, RegexOptions.Compiled); + UrlRegex = new Regex(URLDefinitions.UrlRegex, RegexOptions.Compiled, RegexTimeOut); + IpUrlRegex = new Regex(URLDefinitions.IpUrlRegex, RegexOptions.Compiled, RegexTimeOut); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Config/BaseSequenceConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Config/BaseSequenceConfiguration.cs new file mode 100644 index 0000000000..e978a79944 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Config/BaseSequenceConfiguration.cs @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence +{ + public class BaseSequenceConfiguration : ISequenceConfiguration + { + public BaseSequenceConfiguration(SequenceOptions options = SequenceOptions.None) + { + Options = options; + } + + public SequenceOptions Options { get; } + + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Config/ISequenceConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Config/ISequenceConfiguration.cs index b32b883d60..8e536182e7 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Config/ISequenceConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Config/ISequenceConfiguration.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Sequence +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence { public interface ISequenceConfiguration { diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Config/IpConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Config/IpConfiguration.cs index f50cacf278..1c195e1ffe 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Config/IpConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Config/IpConfiguration.cs @@ -1,4 +1,9 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Reflection; +using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Sequence { @@ -14,5 +19,7 @@ public IpConfiguration(SequenceOptions options = SequenceOptions.None) public Regex Ipv4Regex { get; set; } public Regex Ipv6Regex { get; set; } + + protected static TimeSpan RegexTimeOut => SequenceRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Config/PhoneNumberConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Config/PhoneNumberConfiguration.cs index 34c161d793..eb781b154e 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Config/PhoneNumberConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Config/PhoneNumberConfiguration.cs @@ -1,4 +1,9 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Reflection; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Sequence @@ -20,11 +25,16 @@ public PhoneNumberConfiguration(SequenceOptions options = SequenceOptions.None) public Regex ColonPrefixCheckRegex { get; set; } + public Regex FalsePositivePrefixRegex { get; set; } + public List ColonMarkers { get; set; } public List ForbiddenPrefixMarkers { get; set; } public List ForbiddenSuffixMarkers { get; set; } + public Dictionary AmbiguityFiltersDict { get; set; } + + protected static TimeSpan RegexTimeOut => SequenceRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); } } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Config/QuotedTextConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Config/QuotedTextConfiguration.cs new file mode 100644 index 0000000000..1d9f2f6d8f --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Config/QuotedTextConfiguration.cs @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Reflection; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.Sequence +{ + public class QuotedTextConfiguration : ISequenceConfiguration + { + public QuotedTextConfiguration(SequenceOptions options = SequenceOptions.None) + { + Options = options; + } + + public SequenceOptions Options { get; } + + public Regex QuotedTextRegex1 { get; set; } + + public Regex QuotedTextRegex2 { get; set; } + + public Regex QuotedTextRegex3 { get; set; } + + public Regex QuotedTextRegex4 { get; set; } + + public Regex QuotedTextRegex5 { get; set; } + + public Regex QuotedTextRegex6 { get; set; } + + public Regex QuotedTextRegex7 { get; set; } + + public Regex QuotedTextRegex8 { get; set; } + + public Regex QuotedTextRegex9 { get; set; } + + protected static TimeSpan RegexTimeOut => SequenceRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Config/URLConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Config/URLConfiguration.cs index 300e666baf..a0fe3dc78b 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Config/URLConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Config/URLConfiguration.cs @@ -1,4 +1,9 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Reflection; +using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Sequence { @@ -15,5 +20,6 @@ public URLConfiguration(SequenceOptions options = SequenceOptions.None) public Regex UrlRegex { get; set; } + protected static TimeSpan RegexTimeOut => SequenceRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); } } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Constants.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Constants.cs index e4619dca61..ea4e55f3e1 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Constants.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Constants.cs @@ -1,4 +1,7 @@ -using System.Diagnostics.CodeAnalysis; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Diagnostics.CodeAnalysis; namespace Microsoft.Recognizers.Text.Sequence { @@ -19,6 +22,8 @@ public static class Constants public const string SYS_GUID = "builtin.guid"; + public const string SYS_QUOTED_TEXT = "builtin.quotedText"; + // Model type name public const string MODEL_PHONE_NUMBER = "phonenumber"; @@ -34,6 +39,8 @@ public static class Constants public const string MODEL_GUID = "guid"; + public const string MODEL_QUOTED_TEXT = "quotedText"; + public const string IP_REGEX_IPV4 = "ipv4"; public const string IP_REGEX_IPV6 = "ipv6"; @@ -69,5 +76,7 @@ public static class Constants public const string URL_REGEX = "Url"; public const string GUID_REGEX = "Guid"; + + public const string QUOTED_TEXT_REGEX = "quotedText"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Dutch/DutchPhoneNumberExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Dutch/DutchPhoneNumberExtractorConfiguration.cs new file mode 100644 index 0000000000..51ca8fbe9b --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Dutch/DutchPhoneNumberExtractorConfiguration.cs @@ -0,0 +1,17 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Dutch; + +namespace Microsoft.Recognizers.Text.Sequence.Dutch +{ + public class DutchPhoneNumberExtractorConfiguration : BasePhoneNumberExtractorConfiguration + { + public DutchPhoneNumberExtractorConfiguration(SequenceOptions options) + : base(options) + { + FalsePositivePrefixRegex = null; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Dutch/DutchQuotedTextExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Dutch/DutchQuotedTextExtractorConfiguration.cs new file mode 100644 index 0000000000..944f7adac0 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Dutch/DutchQuotedTextExtractorConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Dutch; + +namespace Microsoft.Recognizers.Text.Sequence.Dutch +{ + public class DutchQuotedTextExtractorConfiguration : QuotedTextConfiguration + { + public DutchQuotedTextExtractorConfiguration(SequenceOptions options) + : base(options) + { + QuotedTextRegex1 = new Regex(QuotedTextDefinitions.QuotedTextRegex1, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex2 = new Regex(QuotedTextDefinitions.QuotedTextRegex2, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex3 = new Regex(QuotedTextDefinitions.QuotedTextRegex3, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex4 = new Regex(QuotedTextDefinitions.QuotedTextRegex4, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex5 = new Regex(QuotedTextDefinitions.QuotedTextRegex5, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex6 = new Regex(QuotedTextDefinitions.QuotedTextRegex6, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex7 = new Regex(QuotedTextDefinitions.QuotedTextRegex7, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex8 = new Regex(QuotedTextDefinitions.QuotedTextRegex8, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex9 = new Regex(QuotedTextDefinitions.QuotedTextRegex9, RegexOptions.Compiled, RegexTimeOut); + + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EmailExtractor.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EmailExtractor.cs index 8a43adf8ad..cb1cc2911a 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EmailExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EmailExtractor.cs @@ -1,6 +1,18 @@ -namespace Microsoft.Recognizers.Text.Sequence.English +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.English { + public class EmailExtractor : BaseEmailExtractor { + + public EmailExtractor(BaseSequenceConfiguration config) + : base(config) + { + + } + } + } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishIpExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishIpExtractorConfiguration.cs index 20449768ad..5256244698 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishIpExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishIpExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; namespace Microsoft.Recognizers.Text.Sequence.English @@ -8,8 +11,8 @@ public class EnglishIpExtractorConfiguration : IpConfiguration public EnglishIpExtractorConfiguration(SequenceOptions options) : base(options) { - Ipv4Regex = new Regex(BaseIp.Ipv4Regex, RegexOptions.Compiled); - Ipv6Regex = new Regex(BaseIp.Ipv6Regex, RegexOptions.Compiled); + Ipv4Regex = new Regex(BaseIp.Ipv4Regex, RegexOptions.Compiled, RegexTimeOut); + Ipv6Regex = new Regex(BaseIp.Ipv6Regex, RegexOptions.Compiled, RegexTimeOut); } } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishPhoneNumberExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishPhoneNumberExtractorConfiguration.cs index 3df3c23ade..5ecc8544e9 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishPhoneNumberExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishPhoneNumberExtractorConfiguration.cs @@ -1,21 +1,17 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.English; namespace Microsoft.Recognizers.Text.Sequence.English { - public class EnglishPhoneNumberExtractorConfiguration : PhoneNumberConfiguration + public class EnglishPhoneNumberExtractorConfiguration : BasePhoneNumberExtractorConfiguration { public EnglishPhoneNumberExtractorConfiguration(SequenceOptions options) : base(options) { - WordBoundariesRegex = BasePhoneNumbers.WordBoundariesRegex; - NonWordBoundariesRegex = BasePhoneNumbers.NonWordBoundariesRegex; - EndWordBoundariesRegex = BasePhoneNumbers.EndWordBoundariesRegex; - ColonPrefixCheckRegex = new Regex(BasePhoneNumbers.ColonPrefixCheckRegex); - ColonMarkers = (List)BasePhoneNumbers.ColonMarkers; - ForbiddenPrefixMarkers = (List)BasePhoneNumbers.ForbiddenPrefixMarkers; - ForbiddenSuffixMarkers = (List)BasePhoneNumbers.ForbiddenSuffixMarkers; + FalsePositivePrefixRegex = new Regex(PhoneNumbersDefinitions.FalsePositivePrefixRegex, RegexOptions.None, RegexTimeOut); } } } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishQuotedTextExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishQuotedTextExtractorConfiguration.cs new file mode 100644 index 0000000000..cafc1320d6 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishQuotedTextExtractorConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.English; + +namespace Microsoft.Recognizers.Text.Sequence.English +{ + public class EnglishQuotedTextExtractorConfiguration : QuotedTextConfiguration + { + public EnglishQuotedTextExtractorConfiguration(SequenceOptions options) + : base(options) + { + QuotedTextRegex1 = new Regex(QuotedTextDefinitions.QuotedTextRegex1, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex2 = new Regex(QuotedTextDefinitions.QuotedTextRegex2, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex3 = new Regex(QuotedTextDefinitions.QuotedTextRegex3, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex4 = new Regex(QuotedTextDefinitions.QuotedTextRegex4, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex5 = new Regex(QuotedTextDefinitions.QuotedTextRegex5, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex6 = new Regex(QuotedTextDefinitions.QuotedTextRegex6, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex7 = new Regex(QuotedTextDefinitions.QuotedTextRegex7, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex8 = new Regex(QuotedTextDefinitions.QuotedTextRegex8, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex9 = new Regex(QuotedTextDefinitions.QuotedTextRegex9, RegexOptions.Compiled, RegexTimeOut); + + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishURLExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishURLExtractorConfiguration.cs index 79771d02fb..e6a2d27f30 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishURLExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishURLExtractorConfiguration.cs @@ -1,4 +1,7 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; namespace Microsoft.Recognizers.Text.Sequence.English @@ -8,8 +11,8 @@ public class EnglishURLExtractorConfiguration : URLConfiguration public EnglishURLExtractorConfiguration(SequenceOptions options) : base(options) { - IpUrlRegex = new Regex(BaseURL.IpUrlRegex, RegexOptions.Compiled); - UrlRegex = new Regex(BaseURL.UrlRegex, RegexOptions.Compiled); + IpUrlRegex = new Regex(BaseURL.IpUrlRegex, RegexOptions.Compiled, RegexTimeOut); + UrlRegex = new Regex(BaseURL.UrlRegex, RegexOptions.Compiled, RegexTimeOut); } } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/GUIDExtractor.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/GUIDExtractor.cs index 0e114636fe..b934e97e9e 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/GUIDExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/GUIDExtractor.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Sequence.English +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.English { public class GUIDExtractor : BaseGUIDExtractor { diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/HashTagExtractor.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/HashTagExtractor.cs index ac8b4a640b..3ac8880e16 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/HashTagExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/HashTagExtractor.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Sequence.English +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.English { public class HashtagExtractor : BaseHashtagExtractor { diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/MentionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/MentionExtractor.cs index 499d96d56b..fe83ea94a7 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/MentionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/MentionExtractor.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Sequence.English +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.English { public class MentionExtractor : BaseMentionExtractor { diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/EmailParser.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/EmailParser.cs index c299b134b3..4925a148c9 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/EmailParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/EmailParser.cs @@ -1,9 +1,16 @@ -namespace Microsoft.Recognizers.Text.Sequence.English +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.English { public class EmailParser : BaseSequenceParser { - public EmailParser() + + private BaseSequenceConfiguration config; + + public EmailParser(BaseSequenceConfiguration config) { + this.config = config; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/GUIDParser.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/GUIDParser.cs index 15637bef64..9c22020c1b 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/GUIDParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/GUIDParser.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; @@ -15,7 +18,7 @@ public class GUIDParser : BaseSequenceParser private static string pureDigitRegex = @"^\d*$"; private static string formatRegex = @"-"; - private static readonly Regex GuidElementRegex = new Regex(BaseGUID.GUIDRegexElement, RegexOptions.Compiled); + private static readonly Regex GuidElementRegex = new Regex(BaseGUID.GUIDRegexElement, RegexOptions.Compiled, RegexTimeOut); public static double ScoreGUID(string textGUID) { diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/HashTagParser.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/HashTagParser.cs index c310b5d27e..4363d93059 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/HashTagParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/HashTagParser.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Sequence.English +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.English { public class HashtagParser : BaseSequenceParser { diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/IpParser.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/IpParser.cs index 657de85bbb..e8bd883d3c 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/IpParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/IpParser.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Sequence.English +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.English { public class IpParser : BaseIpParser { diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/MentionParser.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/MentionParser.cs index aa236f19cb..d1857811d7 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/MentionParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/MentionParser.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Sequence.English +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.English { public class MentionParser : BaseSequenceParser { diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/PhoneNumberParser.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/PhoneNumberParser.cs index 9261476ee6..da3cbe9d2a 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/PhoneNumberParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/PhoneNumberParser.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; @@ -33,10 +36,10 @@ public class PhoneNumberParser : BaseSequenceParser private static string continueDigitRegex = @"\d{5}\d*"; private static string digitRegex = @"\d"; - private static readonly Regex CountryCodeRegex = new Regex(BasePhoneNumbers.CountryCodeRegex); - private static readonly Regex AreaCodeRegex = new Regex(BasePhoneNumbers.AreaCodeIndicatorRegex); - private static readonly Regex FormatIndicatorRegex = new Regex(BasePhoneNumbers.FormatIndicatorRegex); - private static readonly Regex NoAreaCodeUSphonenumbeRegex = new Regex(BasePhoneNumbers.NoAreaCodeUSPhoneNumberRegex); + private static readonly Regex CountryCodeRegex = new Regex(BasePhoneNumbers.CountryCodeRegex, RegexOptions.None, RegexTimeOut); + private static readonly Regex AreaCodeRegex = new Regex(BasePhoneNumbers.AreaCodeIndicatorRegex, RegexOptions.None, RegexTimeOut); + private static readonly Regex FormatIndicatorRegex = new Regex(BasePhoneNumbers.FormatIndicatorRegex, RegexOptions.None, RegexTimeOut); + private static readonly Regex NoAreaCodeUsPhoneNumberRegex = new Regex(BasePhoneNumbers.NoAreaCodeUSPhoneNumberRegex, RegexOptions.None, RegexTimeOut); public static double ScorePhoneNumber(string phoneNumberText) { @@ -82,8 +85,8 @@ public static double ScorePhoneNumber(string phoneNumberText) // Continue digit deduction score -= Math.Max(Regex.Matches(phoneNumberText, continueDigitRegex).Count - 1, 0) * continueDigitDeductionScore; - // Special award for USphonenumber without area code, i.e. 223-4567 or 223 - 4567 - if (NoAreaCodeUSphonenumbeRegex.IsMatch(phoneNumberText)) + // Special award for US phonenumber without area code, i.e. 223-4567 or 223 - 4567 + if (NoAreaCodeUsPhoneNumberRegex.IsMatch(phoneNumberText)) { score += lengthAward * 1.5; } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/QuotedTextParser.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/QuotedTextParser.cs new file mode 100644 index 0000000000..2010bc8622 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/QuotedTextParser.cs @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.English +{ + public class QuotedTextParser : BaseSequenceParser + { + public QuotedTextParser() + { + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/URLParser.cs b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/URLParser.cs index 192e2ab097..f77f140d86 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/URLParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/English/Parsers/URLParser.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Sequence.English +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.English { public class URLParser : BaseSequenceParser { diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseEmailExtractor.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseEmailExtractor.cs index 82f303872a..feb00c832a 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseEmailExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseEmailExtractor.cs @@ -1,5 +1,11 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; +using System.Linq; +using System.Reflection; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; @@ -7,16 +13,24 @@ namespace Microsoft.Recognizers.Text.Sequence { public class BaseEmailExtractor : BaseSequenceExtractor { - public BaseEmailExtractor() + private static readonly Regex Rfc5322ValidationRegex = new Regex(BaseEmail.RFC5322Regex, RegexOptions.Compiled, RegexTimeOut); + + private static readonly char[] TrimmableChars = { '.' }; + + private readonly BaseSequenceConfiguration config; + + public BaseEmailExtractor(BaseSequenceConfiguration config) { + this.config = config; + var regexes = new Dictionary { { - new Regex(BaseEmail.EmailRegex, RegexOptions.Compiled), + new Regex(BaseEmail.EmailRegex, RegexOptions.Compiled, RegexTimeOut), Constants.EMAIL_REGEX }, { - new Regex(BaseEmail.EmailRegex2, RegexOptions.Compiled), + new Regex(BaseEmail.EmailRegex2, RegexOptions.Compiled, RegexTimeOut), Constants.EMAIL_REGEX }, }; @@ -26,6 +40,32 @@ public BaseEmailExtractor() internal override ImmutableDictionary Regexes { get; } + protected static TimeSpan RegexTimeOut => SequenceRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + protected sealed override string ExtractType { get; } = Constants.SYS_EMAIL; + + protected override List PostFilter(List results) + { + // If Relaxed is on, no extra validation is applied + if ((config.Options & SequenceOptions.Relaxed) != 0) + { + return results; + } + else + { + // Not return malformed e-mail addresses and trim ending '.' + foreach (var result in results) + { + if (result.Text.EndsWith(".", StringComparison.Ordinal)) + { + result.Text = result.Text.TrimEnd(TrimmableChars); + result.Length--; + } + } + + return results.Where(r => Rfc5322ValidationRegex.IsMatch(r.Text)).ToList(); + } + } + } } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseGUIDExtractor.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseGUIDExtractor.cs index b75005309a..ac6512d1ff 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseGUIDExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseGUIDExtractor.cs @@ -1,5 +1,10 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; +using System.Reflection; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; @@ -13,7 +18,7 @@ public BaseGUIDExtractor() var regexes = new Dictionary { { - new Regex(BaseGUID.GUIDRegex), + new Regex(BaseGUID.GUIDRegex, RegexOptions.None, RegexTimeOut), Constants.GUID_REGEX }, }; @@ -23,6 +28,8 @@ public BaseGUIDExtractor() internal override ImmutableDictionary Regexes { get; } + protected static TimeSpan RegexTimeOut => SequenceRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + protected sealed override string ExtractType { get; } = Constants.SYS_GUID; } } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseHashTagExtractor.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseHashTagExtractor.cs index 44e6c5866b..cf93b1c6e3 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseHashTagExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseHashTagExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseIpExtractor.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseIpExtractor.cs index f22d2b94b1..e822caa57f 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseIpExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseIpExtractor.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; @@ -13,6 +17,8 @@ public class BaseIpExtractor : BaseSequenceExtractor // The Ipv6 address regexes is written following the Recommendation: https://tools.ietf.org/html/rfc5952 public BaseIpExtractor(IpConfiguration config) { + this.config = config; + var regexes = new Dictionary { { @@ -69,13 +75,13 @@ public override List Extract(string text) var start = lastNotMatched + 1; var length = i - lastNotMatched; var substr = text.Substring(start, length); - if (substr.StartsWith(Constants.IPV6_ELLIPSIS) && + if (substr.StartsWith(Constants.IPV6_ELLIPSIS, StringComparison.Ordinal) && (start > 0 && char.IsLetterOrDigit(text[start - 1]) && !SimpleTokenizer.IsCjk(text[start - 1]))) { continue; } - if (substr.EndsWith(Constants.IPV6_ELLIPSIS) && + if (substr.EndsWith(Constants.IPV6_ELLIPSIS, StringComparison.Ordinal) && (i + 1 < text.Length && char.IsLetterOrDigit(text[i + 1]) && !SimpleTokenizer.IsCjk(text[start + 1]))) { continue; diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseMentionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseMentionExtractor.cs index caa6630866..6d99013712 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseMentionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseMentionExtractor.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BasePhoneNumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BasePhoneNumberExtractor.cs index f2d71ac7de..806286be3f 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BasePhoneNumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BasePhoneNumberExtractor.cs @@ -1,15 +1,18 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; -using System.Xml; using Microsoft.Recognizers.Definitions; namespace Microsoft.Recognizers.Text.Sequence { public class BasePhoneNumberExtractor : BaseSequenceExtractor { - private static readonly Regex InternationDialingPrefixRegex = new Regex(BasePhoneNumbers.InternationDialingPrefixRegex); + private static readonly Regex InternationalDialingPrefixRegex = new Regex(BasePhoneNumbers.InternationDialingPrefixRegex); private static readonly Regex PreCheckPhoneNumberRegex = new Regex(BasePhoneNumbers.PreCheckPhoneNumberRegex, RegexOptions.Compiled); @@ -97,6 +100,37 @@ public override List Extract(string text) continue; } + if (CountDigits(er.Text) == 16 && !er.Text.StartsWith("+", StringComparison.Ordinal)) + { + ers.Remove(er); + i--; + continue; + } + + if (CountDigits(er.Text) == 15) + { + var flag = false; + foreach (var numSpan in er.Text.Split(' ')) + { + if (CountDigits(numSpan) == 4 || CountDigits(numSpan) == 3) + { + flag = false; + } + else + { + flag = true; + break; + } + } + + if (flag == false) + { + ers.Remove(er); + i--; + continue; + } + } + if (er.Start + er.Length < text.Length) { var ch = text[(int)(er.Start + er.Length)]; @@ -111,6 +145,16 @@ public override List Extract(string text) if (er.Start != 0) { var ch = text[(int)(er.Start - 1)]; + var front = text.Substring(0, (int)(er.Start - 1)); + + if (this.config.FalsePositivePrefixRegex != null && + this.config.FalsePositivePrefixRegex.IsMatch(front)) + { + ers.Remove(er); + i--; + continue; + } + if (BasePhoneNumbers.BoundaryMarkers.Contains(ch)) { if (SpecialBoundaryMarkers.Contains(ch) && @@ -120,8 +164,11 @@ public override List Extract(string text) var charGap = text[(int)(er.Start - 2)]; if (!char.IsNumber(charGap) && !char.IsWhiteSpace(charGap)) { + // check if the extracted string has a non-digit string before "-". + var flag = Regex.IsMatch(text.Substring(0, (int)(er.Start - 2)), @"^[^0-9]+$"); + // Handle cases like "91a-677-0060". - if (char.IsLower(charGap)) + if (char.IsLower(charGap) && !flag) { ers.Remove(er); i--; @@ -131,10 +178,9 @@ public override List Extract(string text) } // check the international dialing prefix - var front = text.Substring(0, (int)(er.Start - 1)); - if (InternationDialingPrefixRegex.IsMatch(front)) + if (InternationalDialingPrefixRegex.IsMatch(front)) { - var moveOffset = InternationDialingPrefixRegex.Match(front).Length + 1; + var moveOffset = InternationalDialingPrefixRegex.Match(front).Length + 1; er.Start = er.Start - moveOffset; er.Length = er.Length + moveOffset; er.Text = text.Substring((int)er.Start, (int)er.Length); @@ -152,7 +198,6 @@ public override List Extract(string text) // Handle "tel:123456". if (BasePhoneNumbers.ColonMarkers.Contains(ch)) { - var front = text.Substring(0, (int)(er.Start - 1)); if (this.config.ColonPrefixCheckRegex.IsMatch(front)) { continue; @@ -165,7 +210,7 @@ public override List Extract(string text) } } - // filter hexadecimal address like 00 10 00 31 46 D9 E9 11 + // Filter hexadecimal address like 00 10 00 31 46 D9 E9 11 var maskMatchCollection = Regex.Matches(text, BasePhoneNumbers.PhoneNumberMaskRegex); for (var index = ers.Count - 1; index >= 0; --index) @@ -181,15 +226,18 @@ public override List Extract(string text) } } + // Remove common ambiguous cases + ers = FilterAmbiguity(ers, text); + return ers; } - private bool CheckFormattedPhoneNumber(string phoneNumberText) + private static bool CheckFormattedPhoneNumber(string phoneNumberText) { return Regex.IsMatch(phoneNumberText, BasePhoneNumbers.FormatIndicatorRegex); } - private int CountDigits(string candidateString) + private static int CountDigits(string candidateString) { var count = 0; foreach (var t in candidateString) @@ -202,5 +250,26 @@ private int CountDigits(string candidateString) return count; } + + private List FilterAmbiguity(List extractResults, string text) + { + if (this.config.AmbiguityFiltersDict != null) + { + foreach (var regex in this.config.AmbiguityFiltersDict) + { + foreach (var extractResult in extractResults) + { + if (regex.Key.IsMatch(extractResult.Text)) + { + var matches = regex.Value.Matches(text).Cast(); + extractResults = extractResults.Where(er => !matches.Any(m => m.Index < er.Start + er.Length && m.Index + m.Length > er.Start)) + .ToList(); + } + } + } + } + + return extractResults; + } } } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BasePhoneNumberExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BasePhoneNumberExtractorConfiguration.cs new file mode 100644 index 0000000000..fca47e559a --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BasePhoneNumberExtractorConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Definitions.Utilities; + +namespace Microsoft.Recognizers.Text.Sequence +{ + public class BasePhoneNumberExtractorConfiguration : PhoneNumberConfiguration + { + public BasePhoneNumberExtractorConfiguration(SequenceOptions options) + : base(options) + { + WordBoundariesRegex = BasePhoneNumbers.WordBoundariesRegex; + NonWordBoundariesRegex = BasePhoneNumbers.NonWordBoundariesRegex; + EndWordBoundariesRegex = BasePhoneNumbers.EndWordBoundariesRegex; + ColonPrefixCheckRegex = new Regex(BasePhoneNumbers.ColonPrefixCheckRegex, RegexOptions.None, RegexTimeOut); + ColonMarkers = (List)BasePhoneNumbers.ColonMarkers; + ForbiddenPrefixMarkers = (List)BasePhoneNumbers.ForbiddenPrefixMarkers; + ForbiddenSuffixMarkers = (List)BasePhoneNumbers.ForbiddenSuffixMarkers; + AmbiguityFiltersDict = AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(BasePhoneNumbers.AmbiguityFiltersDict); + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseQuotedTextExtractor.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseQuotedTextExtractor.cs new file mode 100644 index 0000000000..ac8cb2b197 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseQuotedTextExtractor.cs @@ -0,0 +1,143 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using System.Text.RegularExpressions; + +namespace Microsoft.Recognizers.Text.Sequence +{ + public class BaseQuotedTextExtractor : BaseSequenceExtractor + { + private QuotedTextConfiguration config; + + public BaseQuotedTextExtractor(QuotedTextConfiguration config) + { + this.config = config; + + var regexes = new Dictionary + { + { + config.QuotedTextRegex1, + Constants.QUOTED_TEXT_REGEX + }, + { + config.QuotedTextRegex2, + Constants.QUOTED_TEXT_REGEX + }, + { + config.QuotedTextRegex3, + Constants.QUOTED_TEXT_REGEX + }, + { + config.QuotedTextRegex4, + Constants.QUOTED_TEXT_REGEX + }, + { + config.QuotedTextRegex5, + Constants.QUOTED_TEXT_REGEX + }, + { + config.QuotedTextRegex6, + Constants.QUOTED_TEXT_REGEX + }, + { + config.QuotedTextRegex7, + Constants.QUOTED_TEXT_REGEX + }, + { + config.QuotedTextRegex8, + Constants.QUOTED_TEXT_REGEX + }, + { + config.QuotedTextRegex9, + Constants.QUOTED_TEXT_REGEX + }, + }; + + Regexes = regexes.ToImmutableDictionary(); + } + + internal override ImmutableDictionary Regexes { get; } + + protected sealed override string ExtractType { get; } = Constants.SYS_QUOTED_TEXT; + + public override List Extract(string text) + { + var result = new List(); + + if (string.IsNullOrEmpty(text)) + { + return result; + } + + var matchSource = new Dictionary(); + var matched = new bool[text.Length]; + + // Traverse every match results to see each position in the text is matched or not. + var collections = Regexes.ToDictionary(o => o.Key.Matches(text), p => p.Value); + foreach (var collection in collections) + { + for (int k = 0; k < text.Length; k++) + { + matched[k] = false; + } + + foreach (Match m in collection.Key) + { + if (IsValidMatch(m)) + { + for (var j = 0; j < m.Length; j++) + { + matched[m.Index + j] = true; + } + + // Keep Source Data for extra information + matchSource.Add(m, collection.Value); + } + } + + GetResult(matched, text, matchSource, result); + } + + // Form the extracted results mark all the matched intervals in the text. + return PostFilter(result); + } + + public void GetResult(bool[] matched, string text, Dictionary matchSource, List result) + { + var lastNotMatched = -1; + for (var i = 0; i < text.Length; i++) + { + if (matched[i]) + { + if (i + 1 == text.Length || !matched[i + 1]) + { + var start = lastNotMatched + 1; + var length = i - lastNotMatched; + var substr = text.Substring(start, length); + bool MatchFunc(Match o) => o.Index == start && o.Length == length; + + if (matchSource.Keys.Any(MatchFunc)) + { + var srcMatch = matchSource.Keys.First(MatchFunc); + result.Add(new ExtractResult + { + Start = start, + Length = length, + Text = substr, + Type = ExtractType, + Data = matchSource.ContainsKey(srcMatch) ? matchSource[srcMatch] : null, + }); + } + } + } + else + { + lastNotMatched = i; + } + } + } + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseSequenceExtractor.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseSequenceExtractor.cs index f8ca408609..6728c53bba 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseSequenceExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseSequenceExtractor.cs @@ -1,6 +1,11 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; +using System.Reflection; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Sequence @@ -75,12 +80,17 @@ public virtual List Extract(string text) } } - return result; + return PostFilter(result); } public virtual bool IsValidMatch(Match match) { return true; } + + protected virtual List PostFilter(List results) + { + return results.OrderBy(p => p.Start).ToList(); + } } } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseURLExtractor.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseURLExtractor.cs index ceee5ff7a3..e5b7c2195b 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseURLExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BaseURLExtractor.cs @@ -1,6 +1,11 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; +using System.Reflection; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; @@ -34,7 +39,7 @@ public BaseURLExtractor(URLConfiguration config) }; Regexes = regexes.ToImmutableDictionary(); - AmbiguousTimeTerm = new Regex(BaseURL.AmbiguousTimeTerm, RegexOptions.Compiled); + AmbiguousTimeTerm = new Regex(BaseURL.AmbiguousTimeTerm, RegexOptions.Compiled, RegexTimeOut); TldMatcher = new StringMatcher(); TldMatcher.Init(BaseURL.TldList); @@ -42,6 +47,8 @@ public BaseURLExtractor(URLConfiguration config) internal override ImmutableDictionary Regexes { get; } + protected static TimeSpan RegexTimeOut => SequenceRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + protected sealed override string ExtractType { get; } = Constants.SYS_URL; private StringMatcher TldMatcher { get; } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/French/FrenchPhoneNumberExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/French/FrenchPhoneNumberExtractorConfiguration.cs new file mode 100644 index 0000000000..cbb51a8e25 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/French/FrenchPhoneNumberExtractorConfiguration.cs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.French +{ + public class FrenchPhoneNumberExtractorConfiguration : BasePhoneNumberExtractorConfiguration + { + public FrenchPhoneNumberExtractorConfiguration(SequenceOptions options) + : base(options) + { + FalsePositivePrefixRegex = null; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/French/FrenchQuotedTextExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/French/FrenchQuotedTextExtractorConfiguration.cs new file mode 100644 index 0000000000..7adf3a4f79 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/French/FrenchQuotedTextExtractorConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.French; + +namespace Microsoft.Recognizers.Text.Sequence.French +{ + public class FrenchQuotedTextExtractorConfiguration : QuotedTextConfiguration + { + public FrenchQuotedTextExtractorConfiguration(SequenceOptions options) + : base(options) + { + QuotedTextRegex1 = new Regex(QuotedTextDefinitions.QuotedTextRegex1, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex2 = new Regex(QuotedTextDefinitions.QuotedTextRegex2, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex3 = new Regex(QuotedTextDefinitions.QuotedTextRegex3, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex4 = new Regex(QuotedTextDefinitions.QuotedTextRegex4, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex5 = new Regex(QuotedTextDefinitions.QuotedTextRegex5, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex6 = new Regex(QuotedTextDefinitions.QuotedTextRegex6, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex7 = new Regex(QuotedTextDefinitions.QuotedTextRegex7, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex8 = new Regex(QuotedTextDefinitions.QuotedTextRegex8, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex9 = new Regex(QuotedTextDefinitions.QuotedTextRegex9, RegexOptions.Compiled, RegexTimeOut); + + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/German/GermanPhoneNumberExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/German/GermanPhoneNumberExtractorConfiguration.cs new file mode 100644 index 0000000000..b9aa978d78 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/German/GermanPhoneNumberExtractorConfiguration.cs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.German +{ + public class GermanPhoneNumberExtractorConfiguration : BasePhoneNumberExtractorConfiguration + { + public GermanPhoneNumberExtractorConfiguration(SequenceOptions options) + : base(options) + { + FalsePositivePrefixRegex = null; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/German/GermanQuotedTextExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/German/GermanQuotedTextExtractorConfiguration.cs new file mode 100644 index 0000000000..6fdabf95b6 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/German/GermanQuotedTextExtractorConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.German; + +namespace Microsoft.Recognizers.Text.Sequence.German +{ + public class GermanQuotedTextExtractorConfiguration : QuotedTextConfiguration + { + public GermanQuotedTextExtractorConfiguration(SequenceOptions options) + : base(options) + { + QuotedTextRegex1 = new Regex(QuotedTextDefinitions.QuotedTextRegex1, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex2 = new Regex(QuotedTextDefinitions.QuotedTextRegex2, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex3 = new Regex(QuotedTextDefinitions.QuotedTextRegex3, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex4 = new Regex(QuotedTextDefinitions.QuotedTextRegex4, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex5 = new Regex(QuotedTextDefinitions.QuotedTextRegex5, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex6 = new Regex(QuotedTextDefinitions.QuotedTextRegex6, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex7 = new Regex(QuotedTextDefinitions.QuotedTextRegex7, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex8 = new Regex(QuotedTextDefinitions.QuotedTextRegex8, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex9 = new Regex(QuotedTextDefinitions.QuotedTextRegex9, RegexOptions.Compiled, RegexTimeOut); + + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Hindi/HindiPhoneNumberExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Hindi/HindiPhoneNumberExtractorConfiguration.cs new file mode 100644 index 0000000000..220e0d9e6a --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Hindi/HindiPhoneNumberExtractorConfiguration.cs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.Hindi +{ + public class HindiPhoneNumberExtractorConfiguration : BasePhoneNumberExtractorConfiguration + { + public HindiPhoneNumberExtractorConfiguration(SequenceOptions options) + : base(options) + { + FalsePositivePrefixRegex = null; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Hindi/HindiQuotedTextExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Hindi/HindiQuotedTextExtractorConfiguration.cs new file mode 100644 index 0000000000..d1989f905b --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Hindi/HindiQuotedTextExtractorConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Hindi; + +namespace Microsoft.Recognizers.Text.Sequence.Hindi +{ + public class HindiQuotedTextExtractorConfiguration : QuotedTextConfiguration + { + public HindiQuotedTextExtractorConfiguration(SequenceOptions options) + : base(options) + { + QuotedTextRegex1 = new Regex(QuotedTextDefinitions.QuotedTextRegex1, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex2 = new Regex(QuotedTextDefinitions.QuotedTextRegex2, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex3 = new Regex(QuotedTextDefinitions.QuotedTextRegex3, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex4 = new Regex(QuotedTextDefinitions.QuotedTextRegex4, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex5 = new Regex(QuotedTextDefinitions.QuotedTextRegex5, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex6 = new Regex(QuotedTextDefinitions.QuotedTextRegex6, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex7 = new Regex(QuotedTextDefinitions.QuotedTextRegex7, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex8 = new Regex(QuotedTextDefinitions.QuotedTextRegex8, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex9 = new Regex(QuotedTextDefinitions.QuotedTextRegex9, RegexOptions.Compiled, RegexTimeOut); + + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Italian/ItalianPhoneNumberExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Italian/ItalianPhoneNumberExtractorConfiguration.cs new file mode 100644 index 0000000000..ee4fd5c708 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Italian/ItalianPhoneNumberExtractorConfiguration.cs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.Italian +{ + public class ItalianPhoneNumberExtractorConfiguration : BasePhoneNumberExtractorConfiguration + { + public ItalianPhoneNumberExtractorConfiguration(SequenceOptions options) + : base(options) + { + FalsePositivePrefixRegex = null; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Italian/ItalianQuotedTextExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Italian/ItalianQuotedTextExtractorConfiguration.cs new file mode 100644 index 0000000000..f945f73424 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Italian/ItalianQuotedTextExtractorConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Italian; + +namespace Microsoft.Recognizers.Text.Sequence.Italian +{ + public class ItalianQuotedTextExtractorConfiguration : QuotedTextConfiguration + { + public ItalianQuotedTextExtractorConfiguration(SequenceOptions options) + : base(options) + { + QuotedTextRegex1 = new Regex(QuotedTextDefinitions.QuotedTextRegex1, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex2 = new Regex(QuotedTextDefinitions.QuotedTextRegex2, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex3 = new Regex(QuotedTextDefinitions.QuotedTextRegex3, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex4 = new Regex(QuotedTextDefinitions.QuotedTextRegex4, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex5 = new Regex(QuotedTextDefinitions.QuotedTextRegex5, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex6 = new Regex(QuotedTextDefinitions.QuotedTextRegex6, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex7 = new Regex(QuotedTextDefinitions.QuotedTextRegex7, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex8 = new Regex(QuotedTextDefinitions.QuotedTextRegex8, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex9 = new Regex(QuotedTextDefinitions.QuotedTextRegex9, RegexOptions.Compiled, RegexTimeOut); + + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Japanese/JapaneseQuotedTextExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Japanese/JapaneseQuotedTextExtractorConfiguration.cs new file mode 100644 index 0000000000..4ac9086132 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Japanese/JapaneseQuotedTextExtractorConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Japanese; + +namespace Microsoft.Recognizers.Text.Sequence.Japanese +{ + public class JapaneseQuotedTextExtractorConfiguration : QuotedTextConfiguration + { + public JapaneseQuotedTextExtractorConfiguration(SequenceOptions options) + : base(options) + { + QuotedTextRegex1 = new Regex(QuotedTextDefinitions.QuotedTextRegex1, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex2 = new Regex(QuotedTextDefinitions.QuotedTextRegex2, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex3 = new Regex(QuotedTextDefinitions.QuotedTextRegex3, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex4 = new Regex(QuotedTextDefinitions.QuotedTextRegex4, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex5 = new Regex(QuotedTextDefinitions.QuotedTextRegex5, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex6 = new Regex(QuotedTextDefinitions.QuotedTextRegex6, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex7 = new Regex(QuotedTextDefinitions.QuotedTextRegex7, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex8 = new Regex(QuotedTextDefinitions.QuotedTextRegex8, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex9 = new Regex(QuotedTextDefinitions.QuotedTextRegex9, RegexOptions.Compiled, RegexTimeOut); + + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Korean/KoreanPhoneNumberExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Korean/KoreanPhoneNumberExtractorConfiguration.cs new file mode 100644 index 0000000000..e8faafe5dd --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Korean/KoreanPhoneNumberExtractorConfiguration.cs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.Korean +{ + public class KoreanPhoneNumberExtractorConfiguration : BasePhoneNumberExtractorConfiguration + { + public KoreanPhoneNumberExtractorConfiguration(SequenceOptions options) + : base(options) + { + FalsePositivePrefixRegex = null; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Korean/KoreanQuotedTextExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Korean/KoreanQuotedTextExtractorConfiguration.cs new file mode 100644 index 0000000000..8d35802f8a --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Korean/KoreanQuotedTextExtractorConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Korean; + +namespace Microsoft.Recognizers.Text.Sequence.Korean +{ + public class KoreanQuotedTextExtractorConfiguration : QuotedTextConfiguration + { + public KoreanQuotedTextExtractorConfiguration(SequenceOptions options) + : base(options) + { + QuotedTextRegex1 = new Regex(QuotedTextDefinitions.QuotedTextRegex1, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex2 = new Regex(QuotedTextDefinitions.QuotedTextRegex2, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex3 = new Regex(QuotedTextDefinitions.QuotedTextRegex3, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex4 = new Regex(QuotedTextDefinitions.QuotedTextRegex4, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex5 = new Regex(QuotedTextDefinitions.QuotedTextRegex5, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex6 = new Regex(QuotedTextDefinitions.QuotedTextRegex6, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex7 = new Regex(QuotedTextDefinitions.QuotedTextRegex7, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex8 = new Regex(QuotedTextDefinitions.QuotedTextRegex8, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex9 = new Regex(QuotedTextDefinitions.QuotedTextRegex9, RegexOptions.Compiled, RegexTimeOut); + + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.csproj b/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.csproj index 7ab110fe51..fb10de7255 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.csproj +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.csproj @@ -1,13 +1,18 @@  - netstandard2.0;net462;net452;net45 + netstandard2.1;netstandard2.0;net462;net6.0 + 9 false false ../Recognizers-Text.ruleset + + true + ..\buildtools\35MSSharedLib1024.snk + true $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + Microsoft + nlp, entity-extraction, parser-library, recognizer, phonenumber, netstandard2.0 + Microsoft.Recognizers.Text.Sequence provides robust recognition and resolution of series entities like phone numbers, URLs, and e-mail and IP addresses. + MIT + https://github.com/Microsoft/Recognizers-Text + images\icon.png + © Microsoft Corporation. All rights reserved. + + - + all runtime; build; native; contentfiles; analyzers - + all runtime; build; native; contentfiles; analyzers - @@ -42,4 +54,5 @@ + diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.nuspec b/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.nuspec index f45d57c6b0..fac86ed142 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.nuspec +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.nuspec @@ -6,22 +6,20 @@ $title$ Microsoft true - Microsoft.Recognizers.Text.Sequence provides robust recognition and resolution of series entities like phone numbers and IP addresses. + Microsoft.Recognizers.Text.Sequence provides robust recognition and resolution of series entities like phone numbers, URLs, and e-mail and IP addresses. MIT https://github.com/Microsoft/Recognizers-Text - http://docs.botframework.com/images/bot_icon.png + images\icon.png © Microsoft Corporation. All rights reserved. nlp entity-extraction parser-library recognizer phonenumber netstandard2.0 - - - + diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.xml b/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.xml index d5259b64b2..e18095a369 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.xml +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.xml @@ -9,5 +9,10 @@ None + + + Relaxed. Likely match, don't perform extra validation. + + diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Models/AbstractSequenceModel.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Models/AbstractSequenceModel.cs index 7a9b75e246..4fe08864b8 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Models/AbstractSequenceModel.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Models/AbstractSequenceModel.cs @@ -1,11 +1,20 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; +using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.Sequence { public abstract class AbstractSequenceModel : IModel { + + private string culture; + + private string requestedCulture; + protected AbstractSequenceModel(IParser parser, IExtractor extractor) { this.Parser = parser; @@ -14,6 +23,10 @@ protected AbstractSequenceModel(IParser parser, IExtractor extractor) public abstract string ModelTypeName { get; } + public string Culture => this.culture; + + public string RequestedCulture => this.requestedCulture; + protected IExtractor Extractor { get; private set; } protected IParser Parser { get; private set; } @@ -22,6 +35,9 @@ public virtual List Parse(string query) { var parsedSequences = new List(); + // Preprocess the query + query = QueryProcessor.Preprocess(query); + try { var extractResults = Extractor.Extract(query); @@ -46,5 +62,11 @@ public virtual List Parse(string query) TypeName = ModelTypeName, }).ToList(); } + + public void SetCultureInfo(string culture, string requestedCulture = null) + { + this.culture = culture; + this.requestedCulture = requestedCulture; + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Models/EmailModel.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Models/EmailModel.cs index 6e4f275aa1..aeded0f5a7 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Models/EmailModel.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Models/EmailModel.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Sequence +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence { public class EmailModel : AbstractSequenceModel { diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Models/GUIDModel.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Models/GUIDModel.cs index 00716a7fbd..c83bd15be8 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Models/GUIDModel.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Models/GUIDModel.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using Microsoft.Recognizers.Text.Utilities; diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Models/HashTagModel.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Models/HashTagModel.cs index c7c33c3428..3baa5f8d4e 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Models/HashTagModel.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Models/HashTagModel.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Sequence +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence { public class HashtagModel : AbstractSequenceModel { diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Models/IpAddressModel.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Models/IpAddressModel.cs index 03c57f1888..775c4039b9 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Models/IpAddressModel.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Models/IpAddressModel.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Models/MentionModel.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Models/MentionModel.cs index e0e75c9452..d3683bfbda 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Models/MentionModel.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Models/MentionModel.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Sequence +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence { public class MentionModel : AbstractSequenceModel { diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Models/PhoneNumberModel.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Models/PhoneNumberModel.cs index 487c3b3c32..43fab1733d 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Models/PhoneNumberModel.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Models/PhoneNumberModel.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using Microsoft.Recognizers.Text.Utilities; diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Models/QuotedTextModel.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Models/QuotedTextModel.cs new file mode 100644 index 0000000000..d9bb9c0376 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Models/QuotedTextModel.cs @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence +{ + public class QuotedTextModel : AbstractSequenceModel +{ + public QuotedTextModel(IParser parser, IExtractor extractor) + : base(parser, extractor) + { + } + + public override string ModelTypeName => Constants.MODEL_QUOTED_TEXT; +} +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Models/URLModel.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Models/URLModel.cs index 7ec87926eb..d9fb9bc890 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Models/URLModel.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Models/URLModel.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Sequence +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence { public class URLModel : AbstractSequenceModel { diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Parsers/BaseIpParser.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Parsers/BaseIpParser.cs index 7e135641f1..b20c5b516d 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Parsers/BaseIpParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Parsers/BaseIpParser.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Sequence +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence { public class BaseIpParser : BaseSequenceParser { diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Parsers/BaseSequenceParser.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Parsers/BaseSequenceParser.cs index a298fe28b4..9c10a74880 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Parsers/BaseSequenceParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Parsers/BaseSequenceParser.cs @@ -1,7 +1,15 @@ -namespace Microsoft.Recognizers.Text.Sequence +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Reflection; + +namespace Microsoft.Recognizers.Text.Sequence { public class BaseSequenceParser : IParser { + protected static TimeSpan RegexTimeOut => SequenceRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType); + public virtual ParseResult Parse(ExtractResult extResult) { var result = new ParseResult diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Portuguese/PortuguesePhoneNumberExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Portuguese/PortuguesePhoneNumberExtractorConfiguration.cs new file mode 100644 index 0000000000..7dc3838d8b --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Portuguese/PortuguesePhoneNumberExtractorConfiguration.cs @@ -0,0 +1,17 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Portuguese; + +namespace Microsoft.Recognizers.Text.Sequence.Portuguese +{ + public class PortuguesePhoneNumberExtractorConfiguration : BasePhoneNumberExtractorConfiguration + { + public PortuguesePhoneNumberExtractorConfiguration(SequenceOptions options) + : base(options) + { + FalsePositivePrefixRegex = new Regex(PhoneNumbersDefinitions.FalsePositivePrefixRegex, RegexOptions.None, RegexTimeOut); + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Portuguese/PortugueseQuotedTextExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Portuguese/PortugueseQuotedTextExtractorConfiguration.cs new file mode 100644 index 0000000000..1c62c2e942 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Portuguese/PortugueseQuotedTextExtractorConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Portuguese; + +namespace Microsoft.Recognizers.Text.Sequence.Portuguese +{ + public class PortugueseQuotedTextExtractorConfiguration : QuotedTextConfiguration + { + public PortugueseQuotedTextExtractorConfiguration(SequenceOptions options) + : base(options) + { + QuotedTextRegex1 = new Regex(QuotedTextDefinitions.QuotedTextRegex1, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex2 = new Regex(QuotedTextDefinitions.QuotedTextRegex2, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex3 = new Regex(QuotedTextDefinitions.QuotedTextRegex3, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex4 = new Regex(QuotedTextDefinitions.QuotedTextRegex4, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex5 = new Regex(QuotedTextDefinitions.QuotedTextRegex5, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex6 = new Regex(QuotedTextDefinitions.QuotedTextRegex6, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex7 = new Regex(QuotedTextDefinitions.QuotedTextRegex7, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex8 = new Regex(QuotedTextDefinitions.QuotedTextRegex8, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex9 = new Regex(QuotedTextDefinitions.QuotedTextRegex9, RegexOptions.Compiled, RegexTimeOut); + + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/SequenceOptions.cs b/.NET/Microsoft.Recognizers.Text.Sequence/SequenceOptions.cs index 8617aaa6d1..8911891676 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/SequenceOptions.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/SequenceOptions.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; namespace Microsoft.Recognizers.Text.Sequence { @@ -9,5 +12,10 @@ public enum SequenceOptions /// None /// None = 0, + + /// + /// Relaxed. Likely match, don't perform extra validation. + /// + Relaxed = 1, } } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/SequenceRecognizer.cs b/.NET/Microsoft.Recognizers.Text.Sequence/SequenceRecognizer.cs index c75ccf2af5..6b2e591fc5 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/SequenceRecognizer.cs +++ b/.NET/Microsoft.Recognizers.Text.Sequence/SequenceRecognizer.cs @@ -1,15 +1,33 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; -using Microsoft.Recognizers.Text.Sequence; using Microsoft.Recognizers.Text.Sequence.Chinese; +using Microsoft.Recognizers.Text.Sequence.Dutch; using Microsoft.Recognizers.Text.Sequence.English; +using Microsoft.Recognizers.Text.Sequence.French; +using Microsoft.Recognizers.Text.Sequence.German; +using Microsoft.Recognizers.Text.Sequence.Hindi; +using Microsoft.Recognizers.Text.Sequence.Italian; +using Microsoft.Recognizers.Text.Sequence.Japanese; +using Microsoft.Recognizers.Text.Sequence.Korean; +using Microsoft.Recognizers.Text.Sequence.Portuguese; +using Microsoft.Recognizers.Text.Sequence.Spanish; +using Microsoft.Recognizers.Text.Sequence.Swedish; +using Microsoft.Recognizers.Text.Sequence.Turkish; namespace Microsoft.Recognizers.Text.Sequence { public class SequenceRecognizer : Recognizer { + public SequenceRecognizer(string targetCulture, SequenceOptions options, bool lazyInitialization, int timeoutInSeconds) + : base(targetCulture, options, lazyInitialization, timeoutInSeconds) + { + } + public SequenceRecognizer(string targetCulture, SequenceOptions options = SequenceOptions.None, bool lazyInitialization = false) - : base(targetCulture, options, lazyInitialization) + : base(targetCulture, options, lazyInitialization, 0) { } @@ -19,7 +37,7 @@ public SequenceRecognizer(string targetCulture, int options, bool lazyInitializa } public SequenceRecognizer(SequenceOptions options = SequenceOptions.None, bool lazyInitialization = true) - : base(null, options, lazyInitialization) + : base(null, options, lazyInitialization, 0) { } @@ -48,6 +66,11 @@ public static List RecognizeHashtag(string query, string culture, S return RecognizeByModel(recognizer => recognizer.GetHashtagModel(culture, fallbackToDefaultCulture), query, options); } + public static List RecognizeQuotedText(string query, string culture, SequenceOptions options = SequenceOptions.None, bool fallbackToDefaultCulture = true) + { + return RecognizeByModel(recognizer => recognizer.GetQuotedTextModel(culture, fallbackToDefaultCulture), query, options); + } + public static List RecognizeEmail(string query, string culture, SequenceOptions options = SequenceOptions.None, bool fallbackToDefaultCulture = true) { return RecognizeByModel(recognizer => recognizer.GetEmailModel(culture, fallbackToDefaultCulture), query, options); @@ -72,7 +95,7 @@ public IModel GetPhoneNumberModel(string culture = null, bool fallbackToDefaultC return GetModel(Culture.Chinese, fallbackToDefaultCulture); } - return GetModel(Culture.English, fallbackToDefaultCulture); + return GetModel(culture, fallbackToDefaultCulture); } public IModel GetIpAddressModel(string culture = null, bool fallbackToDefaultCulture = true) @@ -97,6 +120,16 @@ public IModel GetHashtagModel(string culture = null, bool fallbackToDefaultCultu return GetModel(Culture.English, fallbackToDefaultCulture); } + public IModel GetQuotedTextModel(string culture = null, bool fallbackToDefaultCulture = true) + { + if (culture != null && culture.ToLowerInvariant().StartsWith("zh-", StringComparison.Ordinal)) + { + return GetModel(Culture.Chinese, fallbackToDefaultCulture); + } + + return GetModel(culture, fallbackToDefaultCulture); + } + public IModel GetEmailModel(string culture = null, bool fallbackToDefaultCulture = true) { return GetModel(Culture.English, fallbackToDefaultCulture); @@ -104,8 +137,9 @@ public IModel GetEmailModel(string culture = null, bool fallbackToDefaultCulture public IModel GetURLModel(string culture = null, bool fallbackToDefaultCulture = true) { - if (culture.ToLowerInvariant().StartsWith("zh-", StringComparison.Ordinal) || - culture.ToLowerInvariant().StartsWith("ja-", StringComparison.Ordinal)) + if (culture != null && ( + culture.ToLowerInvariant().StartsWith("zh-", StringComparison.Ordinal) || + culture.ToLowerInvariant().StartsWith("ja-", StringComparison.Ordinal))) { return GetModel(Culture.Chinese, fallbackToDefaultCulture); } @@ -132,6 +166,60 @@ protected override void InitializeConfiguration() new PhoneNumberParser(), new BasePhoneNumberExtractor(new ChinesePhoneNumberExtractorConfiguration(options)))); + RegisterModel( + Culture.Portuguese, + (options) => new PhoneNumberModel( + new PhoneNumberParser(), + new BasePhoneNumberExtractor(new PortuguesePhoneNumberExtractorConfiguration(options)))); + + RegisterModel( + Culture.Spanish, + (options) => new PhoneNumberModel( + new PhoneNumberParser(), + new BasePhoneNumberExtractor(new SpanishPhoneNumberExtractorConfiguration(options)))); + + RegisterModel( + Culture.Dutch, + (options) => new PhoneNumberModel( + new PhoneNumberParser(), + new BasePhoneNumberExtractor(new DutchPhoneNumberExtractorConfiguration(options)))); + + RegisterModel( + Culture.French, + (options) => new PhoneNumberModel( + new PhoneNumberParser(), + new BasePhoneNumberExtractor(new FrenchPhoneNumberExtractorConfiguration(options)))); + + RegisterModel( + Culture.German, + (options) => new PhoneNumberModel( + new PhoneNumberParser(), + new BasePhoneNumberExtractor(new GermanPhoneNumberExtractorConfiguration(options)))); + + RegisterModel( + Culture.Hindi, + (options) => new PhoneNumberModel( + new PhoneNumberParser(), + new BasePhoneNumberExtractor(new HindiPhoneNumberExtractorConfiguration(options)))); + + RegisterModel( + Culture.Italian, + (options) => new PhoneNumberModel( + new PhoneNumberParser(), + new BasePhoneNumberExtractor(new ItalianPhoneNumberExtractorConfiguration(options)))); + + RegisterModel( + Culture.Korean, + (options) => new PhoneNumberModel( + new PhoneNumberParser(), + new BasePhoneNumberExtractor(new KoreanPhoneNumberExtractorConfiguration(options)))); + + RegisterModel( + Culture.Turkish, + (options) => new PhoneNumberModel( + new PhoneNumberParser(), + new BasePhoneNumberExtractor(new TurkishPhoneNumberExtractorConfiguration(options)))); + RegisterModel( Culture.English, (options) => new IpAddressModel( @@ -152,9 +240,88 @@ protected override void InitializeConfiguration() Culture.English, (options) => new HashtagModel(new HashtagParser(), new HashtagExtractor())); + RegisterModel( + Culture.Chinese, + (options) => new QuotedTextModel( + new QuotedTextParser(), + new BaseQuotedTextExtractor(new ChineseQuotedTextExtractorConfiguration(options)))); + + RegisterModel( + Culture.English, + (options) => new QuotedTextModel( + new QuotedTextParser(), + new BaseQuotedTextExtractor(new EnglishQuotedTextExtractorConfiguration(options)))); + + RegisterModel( + Culture.Dutch, + (options) => new QuotedTextModel( + new QuotedTextParser(), + new BaseQuotedTextExtractor(new DutchQuotedTextExtractorConfiguration(options)))); + + RegisterModel( + Culture.French, + (options) => new QuotedTextModel( + new QuotedTextParser(), + new BaseQuotedTextExtractor(new FrenchQuotedTextExtractorConfiguration(options)))); + + RegisterModel( + Culture.German, + (options) => new QuotedTextModel( + new QuotedTextParser(), + new BaseQuotedTextExtractor(new GermanQuotedTextExtractorConfiguration(options)))); + + RegisterModel( + Culture.Hindi, + (options) => new QuotedTextModel( + new QuotedTextParser(), + new BaseQuotedTextExtractor(new HindiQuotedTextExtractorConfiguration(options)))); + + RegisterModel( + Culture.Italian, + (options) => new QuotedTextModel( + new QuotedTextParser(), + new BaseQuotedTextExtractor(new ItalianQuotedTextExtractorConfiguration(options)))); + + RegisterModel( + Culture.Japanese, + (options) => new QuotedTextModel( + new QuotedTextParser(), + new BaseQuotedTextExtractor(new JapaneseQuotedTextExtractorConfiguration(options)))); + + RegisterModel( + Culture.Korean, + (options) => new QuotedTextModel( + new QuotedTextParser(), + new BaseQuotedTextExtractor(new KoreanQuotedTextExtractorConfiguration(options)))); + + RegisterModel( + Culture.Portuguese, + (options) => new QuotedTextModel( + new QuotedTextParser(), + new BaseQuotedTextExtractor(new PortugueseQuotedTextExtractorConfiguration(options)))); + + RegisterModel( + Culture.Spanish, + (options) => new QuotedTextModel( + new QuotedTextParser(), + new BaseQuotedTextExtractor(new SpanishQuotedTextExtractorConfiguration(options)))); + + RegisterModel( + Culture.Swedish, + (options) => new QuotedTextModel( + new QuotedTextParser(), + new BaseQuotedTextExtractor(new SwedishQuotedTextExtractorConfiguration(options)))); + + RegisterModel( + Culture.Turkish, + (options) => new QuotedTextModel( + new QuotedTextParser(), + new BaseQuotedTextExtractor(new TurkishQuotedTextExtractorConfiguration(options)))); + RegisterModel( Culture.English, - (options) => new EmailModel(new EmailParser(), new EmailExtractor())); + (options) => new EmailModel(new EmailParser(new BaseSequenceConfiguration(options)), + new EmailExtractor(new BaseSequenceConfiguration(options)))); RegisterModel( Culture.English, @@ -171,6 +338,22 @@ protected override void InitializeConfiguration() RegisterModel( Culture.English, (options) => new GUIDModel(new GUIDParser(), new GUIDExtractor())); + + } + + protected override List GetRelatedTypes() + { + return new List() + { + typeof(IpConfiguration), + typeof(PhoneNumberConfiguration), + typeof(QuotedTextConfiguration), + typeof(URLConfiguration), + typeof(BaseEmailExtractor), + typeof(BaseGUIDExtractor), + typeof(BaseURLExtractor), + typeof(BaseSequenceParser), + }; } private static List RecognizeByModel(Func getModelFunc, string query, SequenceOptions options) diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Spanish/SpanishPhoneNumberExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Spanish/SpanishPhoneNumberExtractorConfiguration.cs new file mode 100644 index 0000000000..f1b805852b --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Spanish/SpanishPhoneNumberExtractorConfiguration.cs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.Spanish +{ + public class SpanishPhoneNumberExtractorConfiguration : BasePhoneNumberExtractorConfiguration + { + public SpanishPhoneNumberExtractorConfiguration(SequenceOptions options) + : base(options) + { + FalsePositivePrefixRegex = null; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Spanish/SpanishQuotedTextExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Spanish/SpanishQuotedTextExtractorConfiguration.cs new file mode 100644 index 0000000000..5238901373 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Spanish/SpanishQuotedTextExtractorConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Spanish; + +namespace Microsoft.Recognizers.Text.Sequence.Spanish +{ + public class SpanishQuotedTextExtractorConfiguration : QuotedTextConfiguration + { + public SpanishQuotedTextExtractorConfiguration(SequenceOptions options) + : base(options) + { + QuotedTextRegex1 = new Regex(QuotedTextDefinitions.QuotedTextRegex1, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex2 = new Regex(QuotedTextDefinitions.QuotedTextRegex2, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex3 = new Regex(QuotedTextDefinitions.QuotedTextRegex3, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex4 = new Regex(QuotedTextDefinitions.QuotedTextRegex4, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex5 = new Regex(QuotedTextDefinitions.QuotedTextRegex5, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex6 = new Regex(QuotedTextDefinitions.QuotedTextRegex6, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex7 = new Regex(QuotedTextDefinitions.QuotedTextRegex7, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex8 = new Regex(QuotedTextDefinitions.QuotedTextRegex8, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex9 = new Regex(QuotedTextDefinitions.QuotedTextRegex9, RegexOptions.Compiled, RegexTimeOut); + + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Swedish/SwedishQuotedTextExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Swedish/SwedishQuotedTextExtractorConfiguration.cs new file mode 100644 index 0000000000..22e37692b5 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Swedish/SwedishQuotedTextExtractorConfiguration.cs @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Definitions.Swedish; + +namespace Microsoft.Recognizers.Text.Sequence.Swedish +{ + public class SwedishQuotedTextExtractorConfiguration : QuotedTextConfiguration + { + public SwedishQuotedTextExtractorConfiguration(SequenceOptions options) + : base(options) + { + QuotedTextRegex1 = new Regex(QuotedTextDefinitions.QuotedTextRegex1, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex2 = new Regex(QuotedTextDefinitions.QuotedTextRegex2, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex3 = new Regex(QuotedTextDefinitions.QuotedTextRegex3, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex4 = new Regex(QuotedTextDefinitions.QuotedTextRegex4, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex5 = new Regex(QuotedTextDefinitions.QuotedTextRegex5, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex6 = new Regex(QuotedTextDefinitions.QuotedTextRegex6, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex7 = new Regex(QuotedTextDefinitions.QuotedTextRegex7, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex8 = new Regex(QuotedTextDefinitions.QuotedTextRegex8, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex9 = new Regex(QuotedTextDefinitions.QuotedTextRegex9, RegexOptions.Compiled, RegexTimeOut); + + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Turkish/TurkishPhoneNumberExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Turkish/TurkishPhoneNumberExtractorConfiguration.cs new file mode 100644 index 0000000000..8a1d6e4fa4 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Turkish/TurkishPhoneNumberExtractorConfiguration.cs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Sequence.Turkish +{ + public class TurkishPhoneNumberExtractorConfiguration : BasePhoneNumberExtractorConfiguration + { + public TurkishPhoneNumberExtractorConfiguration(SequenceOptions options) + : base(options) + { + FalsePositivePrefixRegex = null; + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Turkish/TurkishQuotedTextExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Sequence/Turkish/TurkishQuotedTextExtractorConfiguration.cs new file mode 100644 index 0000000000..8684c6126c --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Turkish/TurkishQuotedTextExtractorConfiguration.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions.Turkish; + +namespace Microsoft.Recognizers.Text.Sequence.Turkish +{ + public class TurkishQuotedTextExtractorConfiguration : QuotedTextConfiguration + { + public TurkishQuotedTextExtractorConfiguration(SequenceOptions options) + : base(options) + { + QuotedTextRegex1 = new Regex(QuotedTextDefinitions.QuotedTextRegex1, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex2 = new Regex(QuotedTextDefinitions.QuotedTextRegex2, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex3 = new Regex(QuotedTextDefinitions.QuotedTextRegex3, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex4 = new Regex(QuotedTextDefinitions.QuotedTextRegex4, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex5 = new Regex(QuotedTextDefinitions.QuotedTextRegex5, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex6 = new Regex(QuotedTextDefinitions.QuotedTextRegex6, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex7 = new Regex(QuotedTextDefinitions.QuotedTextRegex7, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex8 = new Regex(QuotedTextDefinitions.QuotedTextRegex8, RegexOptions.Compiled, RegexTimeOut); + QuotedTextRegex9 = new Regex(QuotedTextDefinitions.QuotedTextRegex9, RegexOptions.Compiled, RegexTimeOut); + + } + } +} diff --git a/.NET/Microsoft.Recognizers.Text.sln b/.NET/Microsoft.Recognizers.Text.sln index 24ff5dfddf..b5d4fc6f8a 100644 --- a/.NET/Microsoft.Recognizers.Text.sln +++ b/.NET/Microsoft.Recognizers.Text.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.27130.2026 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.31402.337 MinimumVisualStudioVersion = 10.0.40219.1 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tests", "Tests", "{ED7B6456-AB0A-48CE-8F85-711FE87F09C2}" EndProject @@ -19,6 +19,9 @@ EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Patterns", "Patterns", "{23084BAE-6EB0-439A-BD71-31A783680711}" ProjectSection(SolutionItems) = preProject ..\Patterns\Arabic\Arabic-Choice.yaml = ..\Patterns\Arabic\Arabic-Choice.yaml + ..\Patterns\Arabic\Arabic-DateTime.yaml = ..\Patterns\Arabic\Arabic-DateTime.yaml + ..\Patterns\Arabic\Arabic-Numbers.yaml = ..\Patterns\Arabic\Arabic-Numbers.yaml + ..\Patterns\Arabic\Arabic-TimeZone.yaml = ..\Patterns\Arabic\Arabic-TimeZone.yaml ..\Patterns\Base-Currency.yaml = ..\Patterns\Base-Currency.yaml ..\Patterns\Base-DateTime.yaml = ..\Patterns\Base-DateTime.yaml ..\Patterns\Base-Email.yaml = ..\Patterns\Base-Email.yaml @@ -34,49 +37,70 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Patterns", "Patterns", "{23 ..\Patterns\Chinese\Chinese-Numbers.yaml = ..\Patterns\Chinese\Chinese-Numbers.yaml ..\Patterns\Chinese\Chinese-NumbersWithUnit.yaml = ..\Patterns\Chinese\Chinese-NumbersWithUnit.yaml ..\Patterns\Chinese\Chinese-PhoneNumbers.yaml = ..\Patterns\Chinese\Chinese-PhoneNumbers.yaml + ..\Patterns\Chinese\Chinese-QuotedText.yaml = ..\Patterns\Chinese\Chinese-QuotedText.yaml ..\Patterns\Dutch\Dutch-Choice.yaml = ..\Patterns\Dutch\Dutch-Choice.yaml ..\Patterns\Dutch\Dutch-DateTime.yaml = ..\Patterns\Dutch\Dutch-DateTime.yaml ..\Patterns\Dutch\Dutch-Numbers.yaml = ..\Patterns\Dutch\Dutch-Numbers.yaml ..\Patterns\Dutch\Dutch-NumbersWithUnit.yaml = ..\Patterns\Dutch\Dutch-NumbersWithUnit.yaml + ..\Patterns\Dutch\Dutch-QuotedText.yaml = ..\Patterns\Dutch\Dutch-QuotedText.yaml ..\Patterns\Dutch\Dutch-TimeZone.yaml = ..\Patterns\Dutch\Dutch-TimeZone.yaml ..\Patterns\English\English-Choice.yaml = ..\Patterns\English\English-Choice.yaml ..\Patterns\English\English-DateTime.yaml = ..\Patterns\English\English-DateTime.yaml ..\Patterns\English\English-Numbers.yaml = ..\Patterns\English\English-Numbers.yaml ..\Patterns\English\English-NumbersWithUnit.yaml = ..\Patterns\English\English-NumbersWithUnit.yaml + ..\Patterns\English\English-QuotedText.yaml = ..\Patterns\English\English-QuotedText.yaml ..\Patterns\English\English-TimeZone.yaml = ..\Patterns\English\English-TimeZone.yaml ..\Patterns\French\French-Choice.yaml = ..\Patterns\French\French-Choice.yaml ..\Patterns\French\French-DateTime.yaml = ..\Patterns\French\French-DateTime.yaml ..\Patterns\French\French-Numbers.yaml = ..\Patterns\French\French-Numbers.yaml ..\Patterns\French\French-NumbersWithUnit.yaml = ..\Patterns\French\French-NumbersWithUnit.yaml + ..\Patterns\French\French-QuotedText.yaml = ..\Patterns\French\French-QuotedText.yaml ..\Patterns\German\German-Choice.yaml = ..\Patterns\German\German-Choice.yaml ..\Patterns\German\German-DateTime.yaml = ..\Patterns\German\German-DateTime.yaml ..\Patterns\German\German-Numbers.yaml = ..\Patterns\German\German-Numbers.yaml ..\Patterns\German\German-NumbersWithUnit.yaml = ..\Patterns\German\German-NumbersWithUnit.yaml + ..\Patterns\German\German-QuotedText.yaml = ..\Patterns\German\German-QuotedText.yaml + ..\Patterns\Hindi\Hindi-Choice.yaml = ..\Patterns\Hindi\Hindi-Choice.yaml + ..\Patterns\Hindi\Hindi-DateTime.yaml = ..\Patterns\Hindi\Hindi-DateTime.yaml ..\Patterns\Hindi\Hindi-Numbers.yaml = ..\Patterns\Hindi\Hindi-Numbers.yaml ..\Patterns\Hindi\Hindi-NumbersWithUnit.yaml = ..\Patterns\Hindi\Hindi-NumbersWithUnit.yaml - ..\Patterns\Hindi\Hindi-Choice.yaml = ..\Patterns\Hindi\Hindi-Choice.yaml + ..\Patterns\Hindi\Hindi-QuotedText.yaml = ..\Patterns\Hindi\Hindi-QuotedText.yaml + ..\Patterns\Hindi\Hindi-TimeZone.yaml = ..\Patterns\Hindi\Hindi-TimeZone.yaml ..\Patterns\Italian\Italian-Choice.yaml = ..\Patterns\Italian\Italian-Choice.yaml ..\Patterns\Italian\Italian-DateTime.yaml = ..\Patterns\Italian\Italian-DateTime.yaml ..\Patterns\Italian\Italian-Numbers.yaml = ..\Patterns\Italian\Italian-Numbers.yaml ..\Patterns\Italian\Italian-NumbersWithUnit.yaml = ..\Patterns\Italian\Italian-NumbersWithUnit.yaml + ..\Patterns\Italian\Italian-QuotedText.yaml = ..\Patterns\Italian\Italian-QuotedText.yaml ..\Patterns\Japanese\Japanese-Choice.yaml = ..\Patterns\Japanese\Japanese-Choice.yaml ..\Patterns\Japanese\Japanese-DateTime.yaml = ..\Patterns\Japanese\Japanese-DateTime.yaml ..\Patterns\Japanese\Japanese-Numbers.yaml = ..\Patterns\Japanese\Japanese-Numbers.yaml ..\Patterns\Japanese\Japanese-NumbersWithUnit.yaml = ..\Patterns\Japanese\Japanese-NumbersWithUnit.yaml + ..\Patterns\Japanese\Japanese-QuotedText.yaml = ..\Patterns\Japanese\Japanese-QuotedText.yaml + ..\Patterns\Korean\Korean-DateTime.yaml = ..\Patterns\Korean\Korean-DateTime.yaml ..\Patterns\Korean\Korean-Numbers.yaml = ..\Patterns\Korean\Korean-Numbers.yaml + ..\Patterns\Korean\Korean-NumbersWithUnit.yaml = ..\Patterns\Korean\Korean-NumbersWithUnit.yaml + ..\Patterns\Korean\Korean-QuotedText.yaml = ..\Patterns\Korean\Korean-QuotedText.yaml ..\Patterns\Portuguese\Portuguese-Choice.yaml = ..\Patterns\Portuguese\Portuguese-Choice.yaml ..\Patterns\Portuguese\Portuguese-DateTime.yaml = ..\Patterns\Portuguese\Portuguese-DateTime.yaml ..\Patterns\Portuguese\Portuguese-Numbers.yaml = ..\Patterns\Portuguese\Portuguese-Numbers.yaml ..\Patterns\Portuguese\Portuguese-NumbersWithUnit.yaml = ..\Patterns\Portuguese\Portuguese-NumbersWithUnit.yaml + ..\Patterns\Portuguese\Portuguese-QuotedText.yaml = ..\Patterns\Portuguese\Portuguese-QuotedText.yaml ..\Patterns\Spanish\Spanish-Choice.yaml = ..\Patterns\Spanish\Spanish-Choice.yaml ..\Patterns\Spanish\Spanish-DateTime.yaml = ..\Patterns\Spanish\Spanish-DateTime.yaml ..\Patterns\Spanish\Spanish-Numbers.yaml = ..\Patterns\Spanish\Spanish-Numbers.yaml ..\Patterns\Spanish\Spanish-NumbersWithUnit.yaml = ..\Patterns\Spanish\Spanish-NumbersWithUnit.yaml + ..\Patterns\Spanish\Spanish-QuotedText.yaml = ..\Patterns\Spanish\Spanish-QuotedText.yaml ..\Patterns\Swedish\Swedish-Choice.yaml = ..\Patterns\Swedish\Swedish-Choice.yaml + ..\Patterns\Swedish\Swedish-DateTime.yaml = ..\Patterns\Swedish\Swedish-DateTime.yaml ..\Patterns\Swedish\Swedish-Numbers.yaml = ..\Patterns\Swedish\Swedish-Numbers.yaml + ..\Patterns\Swedish\Swedish-NumbersWithUnit.yaml = ..\Patterns\Swedish\Swedish-NumbersWithUnit.yaml + ..\Patterns\Swedish\Swedish-QuotedText.yaml = ..\Patterns\Swedish\Swedish-QuotedText.yaml + ..\Patterns\Swedish\Swedish-TimeZone.yaml = ..\Patterns\Swedish\Swedish-TimeZone.yaml ..\Patterns\Turkish\Turkish-DateTime.yaml = ..\Patterns\Turkish\Turkish-DateTime.yaml ..\Patterns\Turkish\Turkish-Numbers.yaml = ..\Patterns\Turkish\Turkish-Numbers.yaml ..\Patterns\Turkish\Turkish-NumbersWithUnit.yaml = ..\Patterns\Turkish\Turkish-NumbersWithUnit.yaml + ..\Patterns\Turkish\Turkish-QuotedText.yaml = ..\Patterns\Turkish\Turkish-QuotedText.yaml + ..\Patterns\Turkish\Turkish-TimeZone.yaml = ..\Patterns\Turkish\Turkish-TimeZone.yaml EndProjectSection EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Recognizers.Text.DataDrivenTests", "Microsoft.Recognizers.Text.DataDrivenTests\Microsoft.Recognizers.Text.DataDrivenTests.csproj", "{2783050D-97E2-4F10-99F0-E2D970FD996A}" @@ -110,6 +134,751 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "RecognizerFunction", "Sampl EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BotBuilderRecognizerBot", "Samples\BotBuilder\BotBuilderRecognizerBot.csproj", "{5039FC31-E88A-45EC-811B-3C406162A863}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Specs", "Specs", "{4406756F-4161-46CA-8021-DB26FD0CA2A7}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Choice", "Choice", "{55DDEF35-167D-45A5-B6D0-3747532E8314}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "DateTime", "DateTime", "{5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Number", "Number", "{2DF9886B-9586-4C62-8D1C-A723F8688574}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "NumberWithUnit", "NumberWithUnit", "{72FEAD3F-7C2A-4B85-9942-D9ADA122411A}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Sequence", "Sequence", "{59FC2C0D-BF12-4A99-BC50-A104948DB1DC}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Chinese", "Chinese", "{16B24511-1D2F-47FE-BF6D-5C2B29F45279}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Sequence\Chinese\IpAddressModel.json = ..\Specs\Sequence\Chinese\IpAddressModel.json + ..\Specs\Sequence\Chinese\PhoneNumberModel.json = ..\Specs\Sequence\Chinese\PhoneNumberModel.json + ..\Specs\Sequence\Chinese\QuotedTextModel.json = ..\Specs\Sequence\Chinese\QuotedTextModel.json + ..\Specs\Sequence\Chinese\URLModel.json = ..\Specs\Sequence\Chinese\URLModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Dutch", "Dutch", "{7218D1B7-55C9-43E0-AA97-CD36B6DC2822}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Sequence\Dutch\PhoneNumberModel.json = ..\Specs\Sequence\Dutch\PhoneNumberModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "English", "English", "{D80595E0-D724-466F-9F65-9F254B974C8A}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Sequence\English\EmailModel.json = ..\Specs\Sequence\English\EmailModel.json + ..\Specs\Sequence\English\GUIDModel.json = ..\Specs\Sequence\English\GUIDModel.json + ..\Specs\Sequence\English\HashtagModel.json = ..\Specs\Sequence\English\HashtagModel.json + ..\Specs\Sequence\English\IpAddressModel.json = ..\Specs\Sequence\English\IpAddressModel.json + ..\Specs\Sequence\English\MentionModel.json = ..\Specs\Sequence\English\MentionModel.json + ..\Specs\Sequence\English\PhoneNumberModel.json = ..\Specs\Sequence\English\PhoneNumberModel.json + ..\Specs\Sequence\English\QuotedTextModel.json = ..\Specs\Sequence\English\QuotedTextModel.json + ..\Specs\Sequence\English\URLModel.json = ..\Specs\Sequence\English\URLModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "French", "French", "{047F38CF-8AAA-4967-BDEF-5E2BC378670D}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Sequence\French\PhoneNumberModel.json = ..\Specs\Sequence\French\PhoneNumberModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "German", "German", "{BCD91F42-F366-4F68-8A3B-7AC4EEF42534}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Sequence\German\PhoneNumberModel.json = ..\Specs\Sequence\German\PhoneNumberModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Hindi", "Hindi", "{6DEA89B3-D580-48A4-BA52-8A077920AF09}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Sequence\Hindi\PhoneNumberModel.json = ..\Specs\Sequence\Hindi\PhoneNumberModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Italian", "Italian", "{04F976D1-D026-4511-A414-FF318BEF3321}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Sequence\Italian\PhoneNumberModel.json = ..\Specs\Sequence\Italian\PhoneNumberModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Japanese", "Japanese", "{81B5C120-8A99-42BF-88DD-0A8F0147142D}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Sequence\Japanese\IpAddressModel.json = ..\Specs\Sequence\Japanese\IpAddressModel.json + ..\Specs\Sequence\Japanese\PhoneNumberModel.json = ..\Specs\Sequence\Japanese\PhoneNumberModel.json + ..\Specs\Sequence\Japanese\URLModel.json = ..\Specs\Sequence\Japanese\URLModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Korean", "Korean", "{93CE72CE-A624-4075-B2DD-392F4D42DC77}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Sequence\Korean\PhoneNumberModel.json = ..\Specs\Sequence\Korean\PhoneNumberModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Portuguese", "Portuguese", "{C47CD83C-F6DC-4580-A143-A5C11EA9377A}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Sequence\Portuguese\IpAddressModel.json = ..\Specs\Sequence\Portuguese\IpAddressModel.json + ..\Specs\Sequence\Portuguese\PhoneNumberModel.json = ..\Specs\Sequence\Portuguese\PhoneNumberModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Spanish", "Spanish", "{D64B181C-B07E-4531-A016-14E9D827EB87}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Sequence\Spanish\PhoneNumberModel.json = ..\Specs\Sequence\Spanish\PhoneNumberModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Swedish", "Swedish", "{C99691DA-6E5B-4194-972E-7992F4CC4486}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Sequence\Swedish\QuotedTextModel.json = ..\Specs\Sequence\Swedish\QuotedTextModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Turkish", "Turkish", "{18C2DA46-BF61-4F87-B645-443D6525545A}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Sequence\Turkish\PhoneNumberModel.json = ..\Specs\Sequence\Turkish\PhoneNumberModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Arabic", "Arabic", "{78A33B3C-6340-4EE8-B802-63618D008AFF}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Choice\Arabic\BooleanModel.json = ..\Specs\Choice\Arabic\BooleanModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Bulgarian", "Bulgarian", "{39E7797E-768E-4170-9C29-5DAEE70D86E2}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Choice\Bulgarian\BooleanModel.json = ..\Specs\Choice\Bulgarian\BooleanModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Chinese", "Chinese", "{39E7797E-768E-4170-9C29-5DAEE70D86E3}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Choice\Chinese\BooleanModel.json = ..\Specs\Choice\Chinese\BooleanModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Dutch", "Dutch", "{39E7797E-768E-4170-9C29-5DAEE70D86E4}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Choice\Dutch\BooleanModel.json = ..\Specs\Choice\Dutch\BooleanModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "English", "English", "{39E7797E-768E-4170-9C29-5DAEE70D86E5}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Choice\English\BooleanModel.json = ..\Specs\Choice\English\BooleanModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "French", "French", "{39E7797E-768E-4170-9C29-5DAEE70D86E6}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Choice\French\BooleanModel.json = ..\Specs\Choice\French\BooleanModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "German", "German", "{39E7797E-768E-4170-9C29-5DAEE70D86E7}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Choice\German\BooleanModel.json = ..\Specs\Choice\German\BooleanModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Hindi", "Hindi", "{39E7797E-768E-4170-9C29-5DAEE70D86E8}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Choice\Hindi\BooleanModel.json = ..\Specs\Choice\Hindi\BooleanModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Italian", "Italian", "{39E7797E-768E-4170-9C29-5DAEE70D86E9}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Choice\Italian\BooleanModel.json = ..\Specs\Choice\Italian\BooleanModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Japanese", "Japanese", "{39E7797E-768E-4170-9C29-5DAEE70D86F0}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Choice\Japanese\BooleanModel.json = ..\Specs\Choice\Japanese\BooleanModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Korean", "Korean", "{39E7797E-768E-4170-9C29-5DAEE70D86F1}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Choice\Korean\BooleanModel.json = ..\Specs\Choice\Korean\BooleanModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Portuguese", "Portuguese", "{39E7797E-768E-4170-9C29-5DAEE70D86F2}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Choice\Portuguese\BooleanModel.json = ..\Specs\Choice\Portuguese\BooleanModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Spanish", "Spanish", "{39E7797E-768E-4170-9C29-5DAEE70D86F3}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Choice\Spanish\BooleanModel.json = ..\Specs\Choice\Spanish\BooleanModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Swedish", "Swedish", "{39E7797E-768E-4170-9C29-5DAEE70D86F4}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Choice\Swedish\BooleanModel.json = ..\Specs\Choice\Swedish\BooleanModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Turkish", "Turkish", "{39E7797E-768E-4170-9C29-5DAEE70D86F5}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Choice\Turkish\BooleanModel.json = ..\Specs\Choice\Turkish\BooleanModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Arabic", "Arabic", "{19152546-2D99-476A-9DCB-A3D8F70418A3}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Number\Arabic\NumberModel.json = ..\Specs\Number\Arabic\NumberModel.json + ..\Specs\Number\Arabic\NumberRangeModel.json = ..\Specs\Number\Arabic\NumberRangeModel.json + ..\Specs\Number\Arabic\OrdinalModel.json = ..\Specs\Number\Arabic\OrdinalModel.json + ..\Specs\Number\Arabic\PercentModel.json = ..\Specs\Number\Arabic\PercentModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Chinese", "Chinese", "{19152546-2D99-476A-9DCB-A3D8F70418A4}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Number\Chinese\NumberModel.json = ..\Specs\Number\Chinese\NumberModel.json + ..\Specs\Number\Chinese\NumberRangeModel.json = ..\Specs\Number\Chinese\NumberRangeModel.json + ..\Specs\Number\Chinese\NumberRangeModelExperimentalMode.json = ..\Specs\Number\Chinese\NumberRangeModelExperimentalMode.json + ..\Specs\Number\Chinese\OrdinalModel.json = ..\Specs\Number\Chinese\OrdinalModel.json + ..\Specs\Number\Chinese\PercentModel.json = ..\Specs\Number\Chinese\PercentModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Dutch", "Dutch", "{19152546-2D99-476A-9DCB-A3D8F70418A5}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Number\Dutch\NumberModel.json = ..\Specs\Number\Dutch\NumberModel.json + ..\Specs\Number\Dutch\NumberModelPercentMode.json = ..\Specs\Number\Dutch\NumberModelPercentMode.json + ..\Specs\Number\Dutch\NumberRangeModel.json = ..\Specs\Number\Dutch\NumberRangeModel.json + ..\Specs\Number\Dutch\OrdinalModel.json = ..\Specs\Number\Dutch\OrdinalModel.json + ..\Specs\Number\Dutch\PercentModel.json = ..\Specs\Number\Dutch\PercentModel.json + ..\Specs\Number\Dutch\PercentModelPercentMode.json = ..\Specs\Number\Dutch\PercentModelPercentMode.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "English", "English", "{19152546-2D99-476A-9DCB-A3D8F70418A6}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Number\English\NumberModel.json = ..\Specs\Number\English\NumberModel.json + ..\Specs\Number\English\NumberModelExperimentalMode.json = ..\Specs\Number\English\NumberModelExperimentalMode.json + ..\Specs\Number\English\NumberModelPercentMode.json = ..\Specs\Number\English\NumberModelPercentMode.json + ..\Specs\Number\English\NumberRangeModel.json = ..\Specs\Number\English\NumberRangeModel.json + ..\Specs\Number\English\NumberRangeModelExperimentalMode.json = ..\Specs\Number\English\NumberRangeModelExperimentalMode.json + ..\Specs\Number\English\OrdinalModel.json = ..\Specs\Number\English\OrdinalModel.json + ..\Specs\Number\English\OrdinalModelSuppressExtendedTypes.json = ..\Specs\Number\English\OrdinalModelSuppressExtendedTypes.json + ..\Specs\Number\English\PercentModel.json = ..\Specs\Number\English\PercentModel.json + ..\Specs\Number\English\PercentModelPercentMode.json = ..\Specs\Number\English\PercentModelPercentMode.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "French", "French", "{19152546-2D99-476A-9DCB-A3D8F70418A7}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Number\French\NumberModel.json = ..\Specs\Number\French\NumberModel.json + ..\Specs\Number\French\OrdinalModel.json = ..\Specs\Number\French\OrdinalModel.json + ..\Specs\Number\French\PercentModel.json = ..\Specs\Number\French\PercentModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "German", "German", "{19152546-2D99-476A-9DCB-A3D8F70418A8}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Number\German\NumberModel.json = ..\Specs\Number\German\NumberModel.json + ..\Specs\Number\German\OrdinalModel.json = ..\Specs\Number\German\OrdinalModel.json + ..\Specs\Number\German\OrdinalModelEnablePreview.json = ..\Specs\Number\German\OrdinalModelEnablePreview.json + ..\Specs\Number\German\PercentModel.json = ..\Specs\Number\German\PercentModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Hindi", "Hindi", "{19152546-2D99-476A-9DCB-A3D8F70418A9}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Number\Hindi\NumberModel.json = ..\Specs\Number\Hindi\NumberModel.json + ..\Specs\Number\Hindi\NumberRangeModel.json = ..\Specs\Number\Hindi\NumberRangeModel.json + ..\Specs\Number\Hindi\OrdinalModel.json = ..\Specs\Number\Hindi\OrdinalModel.json + ..\Specs\Number\Hindi\PercentModel.json = ..\Specs\Number\Hindi\PercentModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Italian", "Italian", "{19152546-2D99-476A-9DCB-A3D8F70418B0}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Number\Italian\NumberModel.json = ..\Specs\Number\Italian\NumberModel.json + ..\Specs\Number\Italian\NumberRangeModel.json = ..\Specs\Number\Italian\NumberRangeModel.json + ..\Specs\Number\Italian\OrdinalModel.json = ..\Specs\Number\Italian\OrdinalModel.json + ..\Specs\Number\Italian\PercentModel.json = ..\Specs\Number\Italian\PercentModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Japanese", "Japanese", "{19152546-2D99-476A-9DCB-A3D8F70418B1}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Number\Japanese\NumberModel.json = ..\Specs\Number\Japanese\NumberModel.json + ..\Specs\Number\Japanese\NumberRangeModel.json = ..\Specs\Number\Japanese\NumberRangeModel.json + ..\Specs\Number\Japanese\NumberRangeModelExperimentalMode.json = ..\Specs\Number\Japanese\NumberRangeModelExperimentalMode.json + ..\Specs\Number\Japanese\OrdinalModel.json = ..\Specs\Number\Japanese\OrdinalModel.json + ..\Specs\Number\Japanese\PercentModel.json = ..\Specs\Number\Japanese\PercentModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Korean", "Korean", "{19152546-2D99-476A-9DCB-A3D8F70418B2}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Number\Korean\NumberModel.json = ..\Specs\Number\Korean\NumberModel.json + ..\Specs\Number\Korean\NumberModelPercentMode.json = ..\Specs\Number\Korean\NumberModelPercentMode.json + ..\Specs\Number\Korean\NumberRangeModel.json = ..\Specs\Number\Korean\NumberRangeModel.json + ..\Specs\Number\Korean\OrdinalModel.json = ..\Specs\Number\Korean\OrdinalModel.json + ..\Specs\Number\Korean\PercentModel.json = ..\Specs\Number\Korean\PercentModel.json + ..\Specs\Number\Korean\PercentModelPercentMode.json = ..\Specs\Number\Korean\PercentModelPercentMode.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Portuguese", "Portuguese", "{19152546-2D99-476A-9DCB-A3D8F70418B3}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Number\Portuguese\NumberModel.json = ..\Specs\Number\Portuguese\NumberModel.json + ..\Specs\Number\Portuguese\OrdinalModel.json = ..\Specs\Number\Portuguese\OrdinalModel.json + ..\Specs\Number\Portuguese\PercentModel.json = ..\Specs\Number\Portuguese\PercentModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Spanish", "Spanish", "{19152546-2D99-476A-9DCB-A3D8F70418B4}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Number\Spanish\NumberModel.json = ..\Specs\Number\Spanish\NumberModel.json + ..\Specs\Number\Spanish\NumberRangeModel.json = ..\Specs\Number\Spanish\NumberRangeModel.json + ..\Specs\Number\Spanish\OrdinalModel.json = ..\Specs\Number\Spanish\OrdinalModel.json + ..\Specs\Number\Spanish\PercentModel.json = ..\Specs\Number\Spanish\PercentModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Swedish", "Swedish", "{19152546-2D99-476A-9DCB-A3D8F70418B5}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Number\Swedish\NumberModel.json = ..\Specs\Number\Swedish\NumberModel.json + ..\Specs\Number\Swedish\OrdinalModel.json = ..\Specs\Number\Swedish\OrdinalModel.json + ..\Specs\Number\Swedish\PercentModel.json = ..\Specs\Number\Swedish\PercentModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Turkish", "Turkish", "{19152546-2D99-476A-9DCB-A3D8F70418B6}" + ProjectSection(SolutionItems) = preProject + ..\Specs\Number\Turkish\NumberModel.json = ..\Specs\Number\Turkish\NumberModel.json + ..\Specs\Number\Turkish\NumberRangeModel.json = ..\Specs\Number\Turkish\NumberRangeModel.json + ..\Specs\Number\Turkish\OrdinalModel.json = ..\Specs\Number\Turkish\OrdinalModel.json + ..\Specs\Number\Turkish\PercentModel.json = ..\Specs\Number\Turkish\PercentModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Chinese", "Chinese", "{97D22337-7EC5-46CF-B619-C54FFD0BD5EB}" + ProjectSection(SolutionItems) = preProject + ..\Specs\NumberWithUnit\Chinese\AgeModel.json = ..\Specs\NumberWithUnit\Chinese\AgeModel.json + ..\Specs\NumberWithUnit\Chinese\CurrencyModel.json = ..\Specs\NumberWithUnit\Chinese\CurrencyModel.json + ..\Specs\NumberWithUnit\Chinese\DimensionModel.json = ..\Specs\NumberWithUnit\Chinese\DimensionModel.json + ..\Specs\NumberWithUnit\Chinese\TemperatureModel.json = ..\Specs\NumberWithUnit\Chinese\TemperatureModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Dutch", "Dutch", "{97D22337-7EC5-46CF-B619-C54FFD0BD5EC}" + ProjectSection(SolutionItems) = preProject + ..\Specs\NumberWithUnit\Dutch\AgeModel.json = ..\Specs\NumberWithUnit\Dutch\AgeModel.json + ..\Specs\NumberWithUnit\Dutch\CurrencyModel.json = ..\Specs\NumberWithUnit\Dutch\CurrencyModel.json + ..\Specs\NumberWithUnit\Dutch\DimensionModel.json = ..\Specs\NumberWithUnit\Dutch\DimensionModel.json + ..\Specs\NumberWithUnit\Dutch\TemperatureModel.json = ..\Specs\NumberWithUnit\Dutch\TemperatureModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "English", "English", "{97D22337-7EC5-46CF-B619-C54FFD0BD5ED}" + ProjectSection(SolutionItems) = preProject + ..\Specs\NumberWithUnit\English\AgeModel.json = ..\Specs\NumberWithUnit\English\AgeModel.json + ..\Specs\NumberWithUnit\English\CurrencyModel.json = ..\Specs\NumberWithUnit\English\CurrencyModel.json + ..\Specs\NumberWithUnit\English\DimensionModel.json = ..\Specs\NumberWithUnit\English\DimensionModel.json + ..\Specs\NumberWithUnit\English\TemperatureModel.json = ..\Specs\NumberWithUnit\English\TemperatureModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "French", "French", "{97D22337-7EC5-46CF-B619-C54FFD0BD5EE}" + ProjectSection(SolutionItems) = preProject + ..\Specs\NumberWithUnit\French\AgeModel.json = ..\Specs\NumberWithUnit\French\AgeModel.json + ..\Specs\NumberWithUnit\French\CurrencyModel.json = ..\Specs\NumberWithUnit\French\CurrencyModel.json + ..\Specs\NumberWithUnit\French\DimensionModel.json = ..\Specs\NumberWithUnit\French\DimensionModel.json + ..\Specs\NumberWithUnit\French\TemperatureModel.json = ..\Specs\NumberWithUnit\French\TemperatureModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "German", "German", "{97D22337-7EC5-46CF-B619-C54FFD0BD5EF}" + ProjectSection(SolutionItems) = preProject + ..\Specs\NumberWithUnit\German\AgeModel.json = ..\Specs\NumberWithUnit\German\AgeModel.json + ..\Specs\NumberWithUnit\German\CurrencyModel.json = ..\Specs\NumberWithUnit\German\CurrencyModel.json + ..\Specs\NumberWithUnit\German\DimensionModel.json = ..\Specs\NumberWithUnit\German\DimensionModel.json + ..\Specs\NumberWithUnit\German\TemperatureModel.json = ..\Specs\NumberWithUnit\German\TemperatureModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Hindi", "Hindi", "{97D22337-7EC5-46CF-B619-C54FFD0BD5F0}" + ProjectSection(SolutionItems) = preProject + ..\Specs\NumberWithUnit\Hindi\AgeModel.json = ..\Specs\NumberWithUnit\Hindi\AgeModel.json + ..\Specs\NumberWithUnit\Hindi\CurrencyModel.json = ..\Specs\NumberWithUnit\Hindi\CurrencyModel.json + ..\Specs\NumberWithUnit\Hindi\DimensionModel.json = ..\Specs\NumberWithUnit\Hindi\DimensionModel.json + ..\Specs\NumberWithUnit\Hindi\TemperatureModel.json = ..\Specs\NumberWithUnit\Hindi\TemperatureModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Italian", "Italian", "{97D22337-7EC5-46CF-B619-C54FFD0BD5F1}" + ProjectSection(SolutionItems) = preProject + ..\Specs\NumberWithUnit\Italian\AgeModel.json = ..\Specs\NumberWithUnit\Italian\AgeModel.json + ..\Specs\NumberWithUnit\Italian\CurrencyModel.json = ..\Specs\NumberWithUnit\Italian\CurrencyModel.json + ..\Specs\NumberWithUnit\Italian\DimensionModel.json = ..\Specs\NumberWithUnit\Italian\DimensionModel.json + ..\Specs\NumberWithUnit\Italian\TemperatureModel.json = ..\Specs\NumberWithUnit\Italian\TemperatureModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Japanese", "Japanese", "{97D22337-7EC5-46CF-B619-C54FFD0BD5F2}" + ProjectSection(SolutionItems) = preProject + ..\Specs\NumberWithUnit\Japanese\AgeModel.json = ..\Specs\NumberWithUnit\Japanese\AgeModel.json + ..\Specs\NumberWithUnit\Japanese\CurrencyModel.json = ..\Specs\NumberWithUnit\Japanese\CurrencyModel.json + ..\Specs\NumberWithUnit\Japanese\DimensionModel.json = ..\Specs\NumberWithUnit\Japanese\DimensionModel.json + ..\Specs\NumberWithUnit\Japanese\TemperatureModel.json = ..\Specs\NumberWithUnit\Japanese\TemperatureModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Korean", "Korean", "{97D22337-7EC5-46CF-B619-C54FFD0BD5F3}" + ProjectSection(SolutionItems) = preProject + ..\Specs\NumberWithUnit\Korean\AgeModel.json = ..\Specs\NumberWithUnit\Korean\AgeModel.json + ..\Specs\NumberWithUnit\Korean\CurrencyModel.json = ..\Specs\NumberWithUnit\Korean\CurrencyModel.json + ..\Specs\NumberWithUnit\Korean\DimensionModel.json = ..\Specs\NumberWithUnit\Korean\DimensionModel.json + ..\Specs\NumberWithUnit\Korean\TemperatureModel.json = ..\Specs\NumberWithUnit\Korean\TemperatureModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Portuguese", "Portuguese", "{97D22337-7EC5-46CF-B619-C54FFD0BD5F4}" + ProjectSection(SolutionItems) = preProject + ..\Specs\NumberWithUnit\Portuguese\AgeModel.json = ..\Specs\NumberWithUnit\Portuguese\AgeModel.json + ..\Specs\NumberWithUnit\Portuguese\CurrencyModel.json = ..\Specs\NumberWithUnit\Portuguese\CurrencyModel.json + ..\Specs\NumberWithUnit\Portuguese\DimensionModel.json = ..\Specs\NumberWithUnit\Portuguese\DimensionModel.json + ..\Specs\NumberWithUnit\Portuguese\TemperatureModel.json = ..\Specs\NumberWithUnit\Portuguese\TemperatureModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Spanish", "Spanish", "{97D22337-7EC5-46CF-B619-C54FFD0BD5F5}" + ProjectSection(SolutionItems) = preProject + ..\Specs\NumberWithUnit\Spanish\AgeModel.json = ..\Specs\NumberWithUnit\Spanish\AgeModel.json + ..\Specs\NumberWithUnit\Spanish\CurrencyModel.json = ..\Specs\NumberWithUnit\Spanish\CurrencyModel.json + ..\Specs\NumberWithUnit\Spanish\DimensionModel.json = ..\Specs\NumberWithUnit\Spanish\DimensionModel.json + ..\Specs\NumberWithUnit\Spanish\TemperatureModel.json = ..\Specs\NumberWithUnit\Spanish\TemperatureModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Swedish", "Swedish", "{ACCA0A3A-3935-4E4E-BDBA-0BEDDDF8D428}" + ProjectSection(SolutionItems) = preProject + ..\Specs\NumberWithUnit\Swedish\AgeModel.json = ..\Specs\NumberWithUnit\Swedish\AgeModel.json + ..\Specs\NumberWithUnit\Swedish\CurrencyModel.json = ..\Specs\NumberWithUnit\Swedish\CurrencyModel.json + ..\Specs\NumberWithUnit\Swedish\DimensionModel.json = ..\Specs\NumberWithUnit\Swedish\DimensionModel.json + ..\Specs\NumberWithUnit\Swedish\TemperatureModel.json = ..\Specs\NumberWithUnit\Swedish\TemperatureModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Turkish", "Turkish", "{97D22337-7EC5-46CF-B619-C54FFD0BD5F6}" + ProjectSection(SolutionItems) = preProject + ..\Specs\NumberWithUnit\Turkish\AgeModel.json = ..\Specs\NumberWithUnit\Turkish\AgeModel.json + ..\Specs\NumberWithUnit\Turkish\CurrencyModel.json = ..\Specs\NumberWithUnit\Turkish\CurrencyModel.json + ..\Specs\NumberWithUnit\Turkish\DimensionModel.json = ..\Specs\NumberWithUnit\Turkish\DimensionModel.json + ..\Specs\NumberWithUnit\Turkish\TemperatureModel.json = ..\Specs\NumberWithUnit\Turkish\TemperatureModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Chinese", "Chinese", "{2F41FB40-EA08-469A-A676-138F2E7C6273}" + ProjectSection(SolutionItems) = preProject + ..\Specs\DateTime\Chinese\DateExtractor.json = ..\Specs\DateTime\Chinese\DateExtractor.json + ..\Specs\DateTime\Chinese\DateParser.json = ..\Specs\DateTime\Chinese\DateParser.json + ..\Specs\DateTime\Chinese\DatePeriodExtractor.json = ..\Specs\DateTime\Chinese\DatePeriodExtractor.json + ..\Specs\DateTime\Chinese\DatePeriodParser.json = ..\Specs\DateTime\Chinese\DatePeriodParser.json + ..\Specs\DateTime\Chinese\DateTimeExtractor.json = ..\Specs\DateTime\Chinese\DateTimeExtractor.json + ..\Specs\DateTime\Chinese\DateTimeModel.json = ..\Specs\DateTime\Chinese\DateTimeModel.json + ..\Specs\DateTime\Chinese\DateTimeModelExperimentalMode.json = ..\Specs\DateTime\Chinese\DateTimeModelExperimentalMode.json + ..\Specs\DateTime\Chinese\DateTimeParser.json = ..\Specs\DateTime\Chinese\DateTimeParser.json + ..\Specs\DateTime\Chinese\DateTimePeriodExtractor.json = ..\Specs\DateTime\Chinese\DateTimePeriodExtractor.json + ..\Specs\DateTime\Chinese\DateTimePeriodParser.json = ..\Specs\DateTime\Chinese\DateTimePeriodParser.json + ..\Specs\DateTime\Chinese\DurationExtractor.json = ..\Specs\DateTime\Chinese\DurationExtractor.json + ..\Specs\DateTime\Chinese\DurationParser.json = ..\Specs\DateTime\Chinese\DurationParser.json + ..\Specs\DateTime\Chinese\HolidayExtractor.json = ..\Specs\DateTime\Chinese\HolidayExtractor.json + ..\Specs\DateTime\Chinese\HolidayParser.json = ..\Specs\DateTime\Chinese\HolidayParser.json + ..\Specs\DateTime\Chinese\MergedExtractor.json = ..\Specs\DateTime\Chinese\MergedExtractor.json + ..\Specs\DateTime\Chinese\MergedParser.json = ..\Specs\DateTime\Chinese\MergedParser.json + ..\Specs\DateTime\Chinese\SetExtractor.json = ..\Specs\DateTime\Chinese\SetExtractor.json + ..\Specs\DateTime\Chinese\SetParser.json = ..\Specs\DateTime\Chinese\SetParser.json + ..\Specs\DateTime\Chinese\TimeExtractor.json = ..\Specs\DateTime\Chinese\TimeExtractor.json + ..\Specs\DateTime\Chinese\TimeParser.json = ..\Specs\DateTime\Chinese\TimeParser.json + ..\Specs\DateTime\Chinese\TimePeriodExtractor.json = ..\Specs\DateTime\Chinese\TimePeriodExtractor.json + ..\Specs\DateTime\Chinese\TimePeriodParser.json = ..\Specs\DateTime\Chinese\TimePeriodParser.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Dutch", "Dutch", "{2F41FB40-EA08-469A-A676-138F2E7C6274}" + ProjectSection(SolutionItems) = preProject + ..\Specs\DateTime\Dutch\DateExtractor.json = ..\Specs\DateTime\Dutch\DateExtractor.json + ..\Specs\DateTime\Dutch\DateParser.json = ..\Specs\DateTime\Dutch\DateParser.json + ..\Specs\DateTime\Dutch\DatePeriodExtractor.json = ..\Specs\DateTime\Dutch\DatePeriodExtractor.json + ..\Specs\DateTime\Dutch\DatePeriodParser.json = ..\Specs\DateTime\Dutch\DatePeriodParser.json + ..\Specs\DateTime\Dutch\DateTimeExtractor.json = ..\Specs\DateTime\Dutch\DateTimeExtractor.json + ..\Specs\DateTime\Dutch\DateTimeModel.json = ..\Specs\DateTime\Dutch\DateTimeModel.json + ..\Specs\DateTime\Dutch\DateTimeParser.json = ..\Specs\DateTime\Dutch\DateTimeParser.json + ..\Specs\DateTime\Dutch\DateTimePeriodExtractor.json = ..\Specs\DateTime\Dutch\DateTimePeriodExtractor.json + ..\Specs\DateTime\Dutch\DateTimePeriodParser.json = ..\Specs\DateTime\Dutch\DateTimePeriodParser.json + ..\Specs\DateTime\Dutch\DurationExtractor.json = ..\Specs\DateTime\Dutch\DurationExtractor.json + ..\Specs\DateTime\Dutch\DurationParser.json = ..\Specs\DateTime\Dutch\DurationParser.json + ..\Specs\DateTime\Dutch\HolidayExtractor.json = ..\Specs\DateTime\Dutch\HolidayExtractor.json + ..\Specs\DateTime\Dutch\HolidayParser.json = ..\Specs\DateTime\Dutch\HolidayParser.json + ..\Specs\DateTime\Dutch\MergedExtractor.json = ..\Specs\DateTime\Dutch\MergedExtractor.json + ..\Specs\DateTime\Dutch\SetExtractor.json = ..\Specs\DateTime\Dutch\SetExtractor.json + ..\Specs\DateTime\Dutch\TimeExtractor.json = ..\Specs\DateTime\Dutch\TimeExtractor.json + ..\Specs\DateTime\Dutch\TimeParser.json = ..\Specs\DateTime\Dutch\TimeParser.json + ..\Specs\DateTime\Dutch\TimePeriodExtractor.json = ..\Specs\DateTime\Dutch\TimePeriodExtractor.json + ..\Specs\DateTime\Dutch\TimePeriodParser.json = ..\Specs\DateTime\Dutch\TimePeriodParser.json + ..\Specs\DateTime\Dutch\TimeZoneParser.json = ..\Specs\DateTime\Dutch\TimeZoneParser.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "English", "English", "{2F41FB40-EA08-469A-A676-138F2E7C6275}" + ProjectSection(SolutionItems) = preProject + ..\Specs\DateTime\English\DateExtractor.json = ..\Specs\DateTime\English\DateExtractor.json + ..\Specs\DateTime\English\DateParser.json = ..\Specs\DateTime\English\DateParser.json + ..\Specs\DateTime\English\DatePeriodExtractor.json = ..\Specs\DateTime\English\DatePeriodExtractor.json + ..\Specs\DateTime\English\DatePeriodParser.json = ..\Specs\DateTime\English\DatePeriodParser.json + ..\Specs\DateTime\English\DateTimeExtractor.json = ..\Specs\DateTime\English\DateTimeExtractor.json + ..\Specs\DateTime\English\DateTimeModel.json = ..\Specs\DateTime\English\DateTimeModel.json + ..\Specs\DateTime\English\DateTimeModelCalendarMode.json = ..\Specs\DateTime\English\DateTimeModelCalendarMode.json + ..\Specs\DateTime\English\DateTimeModelComplexCalendar.json = ..\Specs\DateTime\English\DateTimeModelComplexCalendar.json + ..\Specs\DateTime\English\DateTimeModelExperimentalMode.json = ..\Specs\DateTime\English\DateTimeModelExperimentalMode.json + ..\Specs\DateTime\English\DateTimeModelExtendedTypes.json = ..\Specs\DateTime\English\DateTimeModelExtendedTypes.json + ..\Specs\DateTime\English\DateTimeModelSplitDateAndTime.json = ..\Specs\DateTime\English\DateTimeModelSplitDateAndTime.json + ..\Specs\DateTime\English\DateTimeParser.json = ..\Specs\DateTime\English\DateTimeParser.json + ..\Specs\DateTime\English\DateTimePeriodExtractor.json = ..\Specs\DateTime\English\DateTimePeriodExtractor.json + ..\Specs\DateTime\English\DateTimePeriodParser.json = ..\Specs\DateTime\English\DateTimePeriodParser.json + ..\Specs\DateTime\English\DurationExtractor.json = ..\Specs\DateTime\English\DurationExtractor.json + ..\Specs\DateTime\English\DurationParser.json = ..\Specs\DateTime\English\DurationParser.json + ..\Specs\DateTime\English\HolidayExtractor.json = ..\Specs\DateTime\English\HolidayExtractor.json + ..\Specs\DateTime\English\HolidayParser.json = ..\Specs\DateTime\English\HolidayParser.json + ..\Specs\DateTime\English\MergedExtractor.json = ..\Specs\DateTime\English\MergedExtractor.json + ..\Specs\DateTime\English\MergedExtractorSkipFromTo.json = ..\Specs\DateTime\English\MergedExtractorSkipFromTo.json + ..\Specs\DateTime\English\MergedParser.json = ..\Specs\DateTime\English\MergedParser.json + ..\Specs\DateTime\English\SetExtractor.json = ..\Specs\DateTime\English\SetExtractor.json + ..\Specs\DateTime\English\SetParser.json = ..\Specs\DateTime\English\SetParser.json + ..\Specs\DateTime\English\TimeExtractor.json = ..\Specs\DateTime\English\TimeExtractor.json + ..\Specs\DateTime\English\TimeParser.json = ..\Specs\DateTime\English\TimeParser.json + ..\Specs\DateTime\English\TimePeriodExtractor.json = ..\Specs\DateTime\English\TimePeriodExtractor.json + ..\Specs\DateTime\English\TimePeriodParser.json = ..\Specs\DateTime\English\TimePeriodParser.json + ..\Specs\DateTime\English\TimeZoneExtractor.json = ..\Specs\DateTime\English\TimeZoneExtractor.json + ..\Specs\DateTime\English\TimeZoneParser.json = ..\Specs\DateTime\English\TimeZoneParser.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "EnglishOthers", "EnglishOthers", "{2F41FB40-EA08-469A-A676-138F2E7C6276}" + ProjectSection(SolutionItems) = preProject + ..\Specs\DateTime\EnglishOthers\DateParser.json = ..\Specs\DateTime\EnglishOthers\DateParser.json + ..\Specs\DateTime\EnglishOthers\DateTimeModel.json = ..\Specs\DateTime\EnglishOthers\DateTimeModel.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "French", "French", "{2F41FB40-EA08-469A-A676-138F2E7C6277}" + ProjectSection(SolutionItems) = preProject + ..\Specs\DateTime\French\DateExtractor.json = ..\Specs\DateTime\French\DateExtractor.json + ..\Specs\DateTime\French\DateParser.json = ..\Specs\DateTime\French\DateParser.json + ..\Specs\DateTime\French\DatePeriodExtractor.json = ..\Specs\DateTime\French\DatePeriodExtractor.json + ..\Specs\DateTime\French\DatePeriodParser.json = ..\Specs\DateTime\French\DatePeriodParser.json + ..\Specs\DateTime\French\DateTimeExtractor.json = ..\Specs\DateTime\French\DateTimeExtractor.json + ..\Specs\DateTime\French\DateTimeModel.json = ..\Specs\DateTime\French\DateTimeModel.json + ..\Specs\DateTime\French\DateTimeParser.json = ..\Specs\DateTime\French\DateTimeParser.json + ..\Specs\DateTime\French\DateTimePeriodExtractor.json = ..\Specs\DateTime\French\DateTimePeriodExtractor.json + ..\Specs\DateTime\French\DateTimePeriodParser.json = ..\Specs\DateTime\French\DateTimePeriodParser.json + ..\Specs\DateTime\French\DurationExtractor.json = ..\Specs\DateTime\French\DurationExtractor.json + ..\Specs\DateTime\French\DurationParser.json = ..\Specs\DateTime\French\DurationParser.json + ..\Specs\DateTime\French\HolidayExtractor.json = ..\Specs\DateTime\French\HolidayExtractor.json + ..\Specs\DateTime\French\HolidayParser.json = ..\Specs\DateTime\French\HolidayParser.json + ..\Specs\DateTime\French\MergedExtractor.json = ..\Specs\DateTime\French\MergedExtractor.json + ..\Specs\DateTime\French\MergedExtractorSkipFromTo.json = ..\Specs\DateTime\French\MergedExtractorSkipFromTo.json + ..\Specs\DateTime\French\MergedParser.json = ..\Specs\DateTime\French\MergedParser.json + ..\Specs\DateTime\French\SetExtractor.json = ..\Specs\DateTime\French\SetExtractor.json + ..\Specs\DateTime\French\SetParser.json = ..\Specs\DateTime\French\SetParser.json + ..\Specs\DateTime\French\TimeExtractor.json = ..\Specs\DateTime\French\TimeExtractor.json + ..\Specs\DateTime\French\TimeParser.json = ..\Specs\DateTime\French\TimeParser.json + ..\Specs\DateTime\French\TimePeriodExtractor.json = ..\Specs\DateTime\French\TimePeriodExtractor.json + ..\Specs\DateTime\French\TimePeriodParser.json = ..\Specs\DateTime\French\TimePeriodParser.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "German", "German", "{2F41FB40-EA08-469A-A676-138F2E7C6278}" + ProjectSection(SolutionItems) = preProject + ..\Specs\DateTime\German\DateExtractor.json = ..\Specs\DateTime\German\DateExtractor.json + ..\Specs\DateTime\German\DateParser.json = ..\Specs\DateTime\German\DateParser.json + ..\Specs\DateTime\German\DatePeriodExtractor.json = ..\Specs\DateTime\German\DatePeriodExtractor.json + ..\Specs\DateTime\German\DatePeriodParser.json = ..\Specs\DateTime\German\DatePeriodParser.json + ..\Specs\DateTime\German\DateTimeExtractor.json = ..\Specs\DateTime\German\DateTimeExtractor.json + ..\Specs\DateTime\German\DateTimeModel.json = ..\Specs\DateTime\German\DateTimeModel.json + ..\Specs\DateTime\German\DateTimeModelCalendarMode.json = ..\Specs\DateTime\German\DateTimeModelCalendarMode.json + ..\Specs\DateTime\German\DateTimeModelSplitDateAndTime.json = ..\Specs\DateTime\German\DateTimeModelSplitDateAndTime.json + ..\Specs\DateTime\German\DateTimeParser.json = ..\Specs\DateTime\German\DateTimeParser.json + ..\Specs\DateTime\German\DateTimePeriodExtractor.json = ..\Specs\DateTime\German\DateTimePeriodExtractor.json + ..\Specs\DateTime\German\DateTimePeriodParser.json = ..\Specs\DateTime\German\DateTimePeriodParser.json + ..\Specs\DateTime\German\DurationExtractor.json = ..\Specs\DateTime\German\DurationExtractor.json + ..\Specs\DateTime\German\DurationParser.json = ..\Specs\DateTime\German\DurationParser.json + ..\Specs\DateTime\German\HolidayExtractor.json = ..\Specs\DateTime\German\HolidayExtractor.json + ..\Specs\DateTime\German\HolidayParser.json = ..\Specs\DateTime\German\HolidayParser.json + ..\Specs\DateTime\German\MergedExtractor.json = ..\Specs\DateTime\German\MergedExtractor.json + ..\Specs\DateTime\German\MergedExtractorSkipFromTo.json = ..\Specs\DateTime\German\MergedExtractorSkipFromTo.json + ..\Specs\DateTime\German\MergedParser.json = ..\Specs\DateTime\German\MergedParser.json + ..\Specs\DateTime\German\SetExtractor.json = ..\Specs\DateTime\German\SetExtractor.json + ..\Specs\DateTime\German\SetParser.json = ..\Specs\DateTime\German\SetParser.json + ..\Specs\DateTime\German\TimeExtractor.json = ..\Specs\DateTime\German\TimeExtractor.json + ..\Specs\DateTime\German\TimeParser.json = ..\Specs\DateTime\German\TimeParser.json + ..\Specs\DateTime\German\TimePeriodExtractor.json = ..\Specs\DateTime\German\TimePeriodExtractor.json + ..\Specs\DateTime\German\TimePeriodParser.json = ..\Specs\DateTime\German\TimePeriodParser.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Hindi", "Hindi", "{2F41FB40-EA08-469A-A676-138F2E7C6279}" + ProjectSection(SolutionItems) = preProject + ..\Specs\DateTime\Hindi\DateExtractor.json = ..\Specs\DateTime\Hindi\DateExtractor.json + ..\Specs\DateTime\Hindi\DateParser.json = ..\Specs\DateTime\Hindi\DateParser.json + ..\Specs\DateTime\Hindi\DatePeriodExtractor.json = ..\Specs\DateTime\Hindi\DatePeriodExtractor.json + ..\Specs\DateTime\Hindi\DatePeriodParser.json = ..\Specs\DateTime\Hindi\DatePeriodParser.json + ..\Specs\DateTime\Hindi\DateTimeExtractor.json = ..\Specs\DateTime\Hindi\DateTimeExtractor.json + ..\Specs\DateTime\Hindi\DateTimeModel.json = ..\Specs\DateTime\Hindi\DateTimeModel.json + ..\Specs\DateTime\Hindi\DateTimeParser.json = ..\Specs\DateTime\Hindi\DateTimeParser.json + ..\Specs\DateTime\Hindi\DateTimePeriodExtractor.json = ..\Specs\DateTime\Hindi\DateTimePeriodExtractor.json + ..\Specs\DateTime\Hindi\DateTimePeriodParser.json = ..\Specs\DateTime\Hindi\DateTimePeriodParser.json + ..\Specs\DateTime\Hindi\DurationExtractor.json = ..\Specs\DateTime\Hindi\DurationExtractor.json + ..\Specs\DateTime\Hindi\DurationParser.json = ..\Specs\DateTime\Hindi\DurationParser.json + ..\Specs\DateTime\Hindi\HolidayExtractor.json = ..\Specs\DateTime\Hindi\HolidayExtractor.json + ..\Specs\DateTime\Hindi\HolidayParser.json = ..\Specs\DateTime\Hindi\HolidayParser.json + ..\Specs\DateTime\Hindi\MergedExtractor.json = ..\Specs\DateTime\Hindi\MergedExtractor.json + ..\Specs\DateTime\Hindi\MergedParser.json = ..\Specs\DateTime\Hindi\MergedParser.json + ..\Specs\DateTime\Hindi\SetExtractor.json = ..\Specs\DateTime\Hindi\SetExtractor.json + ..\Specs\DateTime\Hindi\SetParser.json = ..\Specs\DateTime\Hindi\SetParser.json + ..\Specs\DateTime\Hindi\TimeExtractor.json = ..\Specs\DateTime\Hindi\TimeExtractor.json + ..\Specs\DateTime\Hindi\TimeParser.json = ..\Specs\DateTime\Hindi\TimeParser.json + ..\Specs\DateTime\Hindi\TimePeriodExtractor.json = ..\Specs\DateTime\Hindi\TimePeriodExtractor.json + ..\Specs\DateTime\Hindi\TimePeriodParser.json = ..\Specs\DateTime\Hindi\TimePeriodParser.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Italian", "Italian", "{2F41FB40-EA08-469A-A676-138F2E7C627A}" + ProjectSection(SolutionItems) = preProject + ..\Specs\DateTime\Italian\DateExtractor.json = ..\Specs\DateTime\Italian\DateExtractor.json + ..\Specs\DateTime\Italian\DateParser.json = ..\Specs\DateTime\Italian\DateParser.json + ..\Specs\DateTime\Italian\DatePeriodExtractor.json = ..\Specs\DateTime\Italian\DatePeriodExtractor.json + ..\Specs\DateTime\Italian\DatePeriodParser.json = ..\Specs\DateTime\Italian\DatePeriodParser.json + ..\Specs\DateTime\Italian\DateTimeExtractor.json = ..\Specs\DateTime\Italian\DateTimeExtractor.json + ..\Specs\DateTime\Italian\DateTimeModel.json = ..\Specs\DateTime\Italian\DateTimeModel.json + ..\Specs\DateTime\Italian\DateTimeModelCalendarMode.json = ..\Specs\DateTime\Italian\DateTimeModelCalendarMode.json + ..\Specs\DateTime\Italian\DateTimeModelExtendedTypes.json = ..\Specs\DateTime\Italian\DateTimeModelExtendedTypes.json + ..\Specs\DateTime\Italian\DateTimeModelSplitDateAndTime.json = ..\Specs\DateTime\Italian\DateTimeModelSplitDateAndTime.json + ..\Specs\DateTime\Italian\DateTimeParser.json = ..\Specs\DateTime\Italian\DateTimeParser.json + ..\Specs\DateTime\Italian\DateTimePeriodExtractor.json = ..\Specs\DateTime\Italian\DateTimePeriodExtractor.json + ..\Specs\DateTime\Italian\DateTimePeriodParser.json = ..\Specs\DateTime\Italian\DateTimePeriodParser.json + ..\Specs\DateTime\Italian\DurationExtractor.json = ..\Specs\DateTime\Italian\DurationExtractor.json + ..\Specs\DateTime\Italian\DurationParser.json = ..\Specs\DateTime\Italian\DurationParser.json + ..\Specs\DateTime\Italian\HolidayExtractor.json = ..\Specs\DateTime\Italian\HolidayExtractor.json + ..\Specs\DateTime\Italian\HolidayParser.json = ..\Specs\DateTime\Italian\HolidayParser.json + ..\Specs\DateTime\Italian\MergedExtractor.json = ..\Specs\DateTime\Italian\MergedExtractor.json + ..\Specs\DateTime\Italian\MergedExtractorSkipFromTo.json = ..\Specs\DateTime\Italian\MergedExtractorSkipFromTo.json + ..\Specs\DateTime\Italian\MergedParser.json = ..\Specs\DateTime\Italian\MergedParser.json + ..\Specs\DateTime\Italian\SetExtractor.json = ..\Specs\DateTime\Italian\SetExtractor.json + ..\Specs\DateTime\Italian\SetParser.json = ..\Specs\DateTime\Italian\SetParser.json + ..\Specs\DateTime\Italian\TimeExtractor.json = ..\Specs\DateTime\Italian\TimeExtractor.json + ..\Specs\DateTime\Italian\TimeParser.json = ..\Specs\DateTime\Italian\TimeParser.json + ..\Specs\DateTime\Italian\TimePeriodExtractor.json = ..\Specs\DateTime\Italian\TimePeriodExtractor.json + ..\Specs\DateTime\Italian\TimePeriodParser.json = ..\Specs\DateTime\Italian\TimePeriodParser.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Japanese", "Japanese", "{2F41FB40-EA08-469A-A676-138F2E7C627B}" + ProjectSection(SolutionItems) = preProject + ..\Specs\DateTime\Japanese\DateExtractor.json = ..\Specs\DateTime\Japanese\DateExtractor.json + ..\Specs\DateTime\Japanese\DateParser.json = ..\Specs\DateTime\Japanese\DateParser.json + ..\Specs\DateTime\Japanese\DatePeriodExtractor.json = ..\Specs\DateTime\Japanese\DatePeriodExtractor.json + ..\Specs\DateTime\Japanese\DatePeriodParser.json = ..\Specs\DateTime\Japanese\DatePeriodParser.json + ..\Specs\DateTime\Japanese\DateTimeExtractor.json = ..\Specs\DateTime\Japanese\DateTimeExtractor.json + ..\Specs\DateTime\Japanese\DateTimeModel.json = ..\Specs\DateTime\Japanese\DateTimeModel.json + ..\Specs\DateTime\Japanese\DateTimeParser.json = ..\Specs\DateTime\Japanese\DateTimeParser.json + ..\Specs\DateTime\Japanese\DateTimePeriodExtractor.json = ..\Specs\DateTime\Japanese\DateTimePeriodExtractor.json + ..\Specs\DateTime\Japanese\DateTimePeriodParser.json = ..\Specs\DateTime\Japanese\DateTimePeriodParser.json + ..\Specs\DateTime\Japanese\DurationExtractor.json = ..\Specs\DateTime\Japanese\DurationExtractor.json + ..\Specs\DateTime\Japanese\DurationParser.json = ..\Specs\DateTime\Japanese\DurationParser.json + ..\Specs\DateTime\Japanese\HolidayExtractor.json = ..\Specs\DateTime\Japanese\HolidayExtractor.json + ..\Specs\DateTime\Japanese\HolidayParser.json = ..\Specs\DateTime\Japanese\HolidayParser.json + ..\Specs\DateTime\Japanese\MergedExtractor.json = ..\Specs\DateTime\Japanese\MergedExtractor.json + ..\Specs\DateTime\Japanese\SetExtractor.json = ..\Specs\DateTime\Japanese\SetExtractor.json + ..\Specs\DateTime\Japanese\SetParser.json = ..\Specs\DateTime\Japanese\SetParser.json + ..\Specs\DateTime\Japanese\TimeExtractor.json = ..\Specs\DateTime\Japanese\TimeExtractor.json + ..\Specs\DateTime\Japanese\TimeParser.json = ..\Specs\DateTime\Japanese\TimeParser.json + ..\Specs\DateTime\Japanese\TimePeriodExtractor.json = ..\Specs\DateTime\Japanese\TimePeriodExtractor.json + ..\Specs\DateTime\Japanese\TimePeriodParser.json = ..\Specs\DateTime\Japanese\TimePeriodParser.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Korean", "Korean", "{2F41FB40-EA08-469A-A676-138F2E7C627C}" + ProjectSection(SolutionItems) = preProject + ..\Specs\DateTime\Korean\DateExtractor.json = ..\Specs\DateTime\Korean\DateExtractor.json + ..\Specs\DateTime\Korean\DateParser.json = ..\Specs\DateTime\Korean\DateParser.json + ..\Specs\DateTime\Korean\DatePeriodExtractor.json = ..\Specs\DateTime\Korean\DatePeriodExtractor.json + ..\Specs\DateTime\Korean\DatePeriodParser.json = ..\Specs\DateTime\Korean\DatePeriodParser.json + ..\Specs\DateTime\Korean\DateTimeExtractor.json = ..\Specs\DateTime\Korean\DateTimeExtractor.json + ..\Specs\DateTime\Korean\DateTimeModel.json = ..\Specs\DateTime\Korean\DateTimeModel.json + ..\Specs\DateTime\Korean\DateTimeParser.json = ..\Specs\DateTime\Korean\DateTimeParser.json + ..\Specs\DateTime\Korean\DateTimePeriodExtractor.json = ..\Specs\DateTime\Korean\DateTimePeriodExtractor.json + ..\Specs\DateTime\Korean\DateTimePeriodParser.json = ..\Specs\DateTime\Korean\DateTimePeriodParser.json + ..\Specs\DateTime\Korean\DurationExtractor.json = ..\Specs\DateTime\Korean\DurationExtractor.json + ..\Specs\DateTime\Korean\DurationParser.json = ..\Specs\DateTime\Korean\DurationParser.json + ..\Specs\DateTime\Korean\HolidayExtractor.json = ..\Specs\DateTime\Korean\HolidayExtractor.json + ..\Specs\DateTime\Korean\HolidayParser.json = ..\Specs\DateTime\Korean\HolidayParser.json + ..\Specs\DateTime\Korean\MergedExtractor.json = ..\Specs\DateTime\Korean\MergedExtractor.json + ..\Specs\DateTime\Korean\MergedParser.json = ..\Specs\DateTime\Korean\MergedParser.json + ..\Specs\DateTime\Korean\SetExtractor.json = ..\Specs\DateTime\Korean\SetExtractor.json + ..\Specs\DateTime\Korean\SetParser.json = ..\Specs\DateTime\Korean\SetParser.json + ..\Specs\DateTime\Korean\TimeExtractor.json = ..\Specs\DateTime\Korean\TimeExtractor.json + ..\Specs\DateTime\Korean\TimeParser.json = ..\Specs\DateTime\Korean\TimeParser.json + ..\Specs\DateTime\Korean\TimePeriodExtractor.json = ..\Specs\DateTime\Korean\TimePeriodExtractor.json + ..\Specs\DateTime\Korean\TimePeriodParser.json = ..\Specs\DateTime\Korean\TimePeriodParser.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Portuguese", "Portuguese", "{2F41FB40-EA08-469A-A676-138F2E7C627D}" + ProjectSection(SolutionItems) = preProject + ..\Specs\DateTime\Portuguese\DateExtractor.json = ..\Specs\DateTime\Portuguese\DateExtractor.json + ..\Specs\DateTime\Portuguese\DateParser.json = ..\Specs\DateTime\Portuguese\DateParser.json + ..\Specs\DateTime\Portuguese\DatePeriodExtractor.json = ..\Specs\DateTime\Portuguese\DatePeriodExtractor.json + ..\Specs\DateTime\Portuguese\DatePeriodParser.json = ..\Specs\DateTime\Portuguese\DatePeriodParser.json + ..\Specs\DateTime\Portuguese\DateTimeExtractor.json = ..\Specs\DateTime\Portuguese\DateTimeExtractor.json + ..\Specs\DateTime\Portuguese\DateTimeModel.json = ..\Specs\DateTime\Portuguese\DateTimeModel.json + ..\Specs\DateTime\Portuguese\DateTimeParser.json = ..\Specs\DateTime\Portuguese\DateTimeParser.json + ..\Specs\DateTime\Portuguese\DateTimePeriodExtractor.json = ..\Specs\DateTime\Portuguese\DateTimePeriodExtractor.json + ..\Specs\DateTime\Portuguese\DateTimePeriodParser.json = ..\Specs\DateTime\Portuguese\DateTimePeriodParser.json + ..\Specs\DateTime\Portuguese\DurationExtractor.json = ..\Specs\DateTime\Portuguese\DurationExtractor.json + ..\Specs\DateTime\Portuguese\DurationParser.json = ..\Specs\DateTime\Portuguese\DurationParser.json + ..\Specs\DateTime\Portuguese\HolidayExtractor.json = ..\Specs\DateTime\Portuguese\HolidayExtractor.json + ..\Specs\DateTime\Portuguese\HolidayParser.json = ..\Specs\DateTime\Portuguese\HolidayParser.json + ..\Specs\DateTime\Portuguese\MergedExtractor.json = ..\Specs\DateTime\Portuguese\MergedExtractor.json + ..\Specs\DateTime\Portuguese\SetExtractor.json = ..\Specs\DateTime\Portuguese\SetExtractor.json + ..\Specs\DateTime\Portuguese\SetParser.json = ..\Specs\DateTime\Portuguese\SetParser.json + ..\Specs\DateTime\Portuguese\TimeExtractor.json = ..\Specs\DateTime\Portuguese\TimeExtractor.json + ..\Specs\DateTime\Portuguese\TimeParser.json = ..\Specs\DateTime\Portuguese\TimeParser.json + ..\Specs\DateTime\Portuguese\TimePeriodExtractor.json = ..\Specs\DateTime\Portuguese\TimePeriodExtractor.json + ..\Specs\DateTime\Portuguese\TimePeriodParser.json = ..\Specs\DateTime\Portuguese\TimePeriodParser.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Spanish", "Spanish", "{2F41FB40-EA08-469A-A676-138F2E7C627E}" + ProjectSection(SolutionItems) = preProject + ..\Specs\DateTime\Spanish\DateExtractor.json = ..\Specs\DateTime\Spanish\DateExtractor.json + ..\Specs\DateTime\Spanish\DateParser.json = ..\Specs\DateTime\Spanish\DateParser.json + ..\Specs\DateTime\Spanish\DatePeriodExtractor.json = ..\Specs\DateTime\Spanish\DatePeriodExtractor.json + ..\Specs\DateTime\Spanish\DatePeriodParser.json = ..\Specs\DateTime\Spanish\DatePeriodParser.json + ..\Specs\DateTime\Spanish\DateTimeExtractor.json = ..\Specs\DateTime\Spanish\DateTimeExtractor.json + ..\Specs\DateTime\Spanish\DateTimeModel.json = ..\Specs\DateTime\Spanish\DateTimeModel.json + ..\Specs\DateTime\Spanish\DateTimeParser.json = ..\Specs\DateTime\Spanish\DateTimeParser.json + ..\Specs\DateTime\Spanish\DateTimePeriodExtractor.json = ..\Specs\DateTime\Spanish\DateTimePeriodExtractor.json + ..\Specs\DateTime\Spanish\DateTimePeriodParser.json = ..\Specs\DateTime\Spanish\DateTimePeriodParser.json + ..\Specs\DateTime\Spanish\DurationExtractor.json = ..\Specs\DateTime\Spanish\DurationExtractor.json + ..\Specs\DateTime\Spanish\DurationParser.json = ..\Specs\DateTime\Spanish\DurationParser.json + ..\Specs\DateTime\Spanish\HolidayExtractor.json = ..\Specs\DateTime\Spanish\HolidayExtractor.json + ..\Specs\DateTime\Spanish\HolidayParser.json = ..\Specs\DateTime\Spanish\HolidayParser.json + ..\Specs\DateTime\Spanish\MergedExtractor.json = ..\Specs\DateTime\Spanish\MergedExtractor.json + ..\Specs\DateTime\Spanish\SetExtractor.json = ..\Specs\DateTime\Spanish\SetExtractor.json + ..\Specs\DateTime\Spanish\SetParser.json = ..\Specs\DateTime\Spanish\SetParser.json + ..\Specs\DateTime\Spanish\TimeExtractor.json = ..\Specs\DateTime\Spanish\TimeExtractor.json + ..\Specs\DateTime\Spanish\TimeParser.json = ..\Specs\DateTime\Spanish\TimeParser.json + ..\Specs\DateTime\Spanish\TimePeriodExtractor.json = ..\Specs\DateTime\Spanish\TimePeriodExtractor.json + ..\Specs\DateTime\Spanish\TimePeriodParser.json = ..\Specs\DateTime\Spanish\TimePeriodParser.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Turkish", "Turkish", "{2F41FB40-EA08-469A-A676-138F2E7C627F}" + ProjectSection(SolutionItems) = preProject + ..\Specs\DateTime\Turkish\DateExtractor.json = ..\Specs\DateTime\Turkish\DateExtractor.json + ..\Specs\DateTime\Turkish\DateParser.json = ..\Specs\DateTime\Turkish\DateParser.json + ..\Specs\DateTime\Turkish\DatePeriodExtractor.json = ..\Specs\DateTime\Turkish\DatePeriodExtractor.json + ..\Specs\DateTime\Turkish\DatePeriodParser.json = ..\Specs\DateTime\Turkish\DatePeriodParser.json + ..\Specs\DateTime\Turkish\DateTimeExtractor.json = ..\Specs\DateTime\Turkish\DateTimeExtractor.json + ..\Specs\DateTime\Turkish\DateTimeModel.json = ..\Specs\DateTime\Turkish\DateTimeModel.json + ..\Specs\DateTime\Turkish\DateTimeParser.json = ..\Specs\DateTime\Turkish\DateTimeParser.json + ..\Specs\DateTime\Turkish\DateTimePeriodExtractor.json = ..\Specs\DateTime\Turkish\DateTimePeriodExtractor.json + ..\Specs\DateTime\Turkish\DateTimePeriodParser.json = ..\Specs\DateTime\Turkish\DateTimePeriodParser.json + ..\Specs\DateTime\Turkish\DurationExtractor.json = ..\Specs\DateTime\Turkish\DurationExtractor.json + ..\Specs\DateTime\Turkish\DurationParser.json = ..\Specs\DateTime\Turkish\DurationParser.json + ..\Specs\DateTime\Turkish\HolidayExtractor.json = ..\Specs\DateTime\Turkish\HolidayExtractor.json + ..\Specs\DateTime\Turkish\HolidayParser.json = ..\Specs\DateTime\Turkish\HolidayParser.json + ..\Specs\DateTime\Turkish\MergedExtractor.json = ..\Specs\DateTime\Turkish\MergedExtractor.json + ..\Specs\DateTime\Turkish\MergedParser.json = ..\Specs\DateTime\Turkish\MergedParser.json + ..\Specs\DateTime\Turkish\SetExtractor.json = ..\Specs\DateTime\Turkish\SetExtractor.json + ..\Specs\DateTime\Turkish\SetParser.json = ..\Specs\DateTime\Turkish\SetParser.json + ..\Specs\DateTime\Turkish\TimeExtractor.json = ..\Specs\DateTime\Turkish\TimeExtractor.json + ..\Specs\DateTime\Turkish\TimeParser.json = ..\Specs\DateTime\Turkish\TimeParser.json + ..\Specs\DateTime\Turkish\TimePeriodExtractor.json = ..\Specs\DateTime\Turkish\TimePeriodExtractor.json + ..\Specs\DateTime\Turkish\TimePeriodParser.json = ..\Specs\DateTime\Turkish\TimePeriodParser.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tools", "Tools", "{21E1D26A-635F-4965-A56E-20263D2A09FF}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ValidationTool", "..\Tools\src\ValidationTool\ValidationTool.csproj", "{28FE2836-58CE-48E6-B657-0A3A836AEC14}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Arabic", "Arabic", "{E93CDEDA-0221-42F2-9408-896CE5FEE69D}" + ProjectSection(SolutionItems) = preProject + ..\Specs\DateTime\Arabic\DateExtractor.json = ..\Specs\DateTime\Arabic\DateExtractor.json + ..\Specs\DateTime\Arabic\DateParser.json = ..\Specs\DateTime\Arabic\DateParser.json + ..\Specs\DateTime\Arabic\DatePeriodExtractor.json = ..\Specs\DateTime\Arabic\DatePeriodExtractor.json + ..\Specs\DateTime\Arabic\DatePeriodParser.json = ..\Specs\DateTime\Arabic\DatePeriodParser.json + ..\Specs\DateTime\Arabic\DateTimeExtractor.json = ..\Specs\DateTime\Arabic\DateTimeExtractor.json + ..\Specs\DateTime\Arabic\DateTimeModel.json = ..\Specs\DateTime\Arabic\DateTimeModel.json + ..\Specs\DateTime\Arabic\DateTimeParser.json = ..\Specs\DateTime\Arabic\DateTimeParser.json + ..\Specs\DateTime\Arabic\DateTimePeriodExtractor.json = ..\Specs\DateTime\Arabic\DateTimePeriodExtractor.json + ..\Specs\DateTime\Arabic\DateTimePeriodParser.json = ..\Specs\DateTime\Arabic\DateTimePeriodParser.json + ..\Specs\DateTime\Arabic\DurationExtractor.json = ..\Specs\DateTime\Arabic\DurationExtractor.json + ..\Specs\DateTime\Arabic\DurationParser.json = ..\Specs\DateTime\Arabic\DurationParser.json + ..\Specs\DateTime\Arabic\HolidayExtractor.json = ..\Specs\DateTime\Arabic\HolidayExtractor.json + ..\Specs\DateTime\Arabic\HolidayParser.json = ..\Specs\DateTime\Arabic\HolidayParser.json + ..\Specs\DateTime\Arabic\MergedExtractor.json = ..\Specs\DateTime\Arabic\MergedExtractor.json + ..\Specs\DateTime\Arabic\MergedParser.json = ..\Specs\DateTime\Arabic\MergedParser.json + ..\Specs\DateTime\Arabic\SetExtractor.json = ..\Specs\DateTime\Arabic\SetExtractor.json + ..\Specs\DateTime\Arabic\SetParser.json = ..\Specs\DateTime\Arabic\SetParser.json + ..\Specs\DateTime\Arabic\TimeExtractor.json = ..\Specs\DateTime\Arabic\TimeExtractor.json + ..\Specs\DateTime\Arabic\TimeParser.json = ..\Specs\DateTime\Arabic\TimeParser.json + ..\Specs\DateTime\Arabic\TimePeriodExtractor.json = ..\Specs\DateTime\Arabic\TimePeriodExtractor.json + ..\Specs\DateTime\Arabic\TimePeriodParser.json = ..\Specs\DateTime\Arabic\TimePeriodParser.json + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Swedish", "Swedish", "{A45B902F-DE07-4502-8F8C-2671B987D72C}" + ProjectSection(SolutionItems) = preProject + ..\Specs\DateTime\Swedish\DateExtractor.json = ..\Specs\DateTime\Swedish\DateExtractor.json + ..\Specs\DateTime\Swedish\DateParser.json = ..\Specs\DateTime\Swedish\DateParser.json + ..\Specs\DateTime\Swedish\TimeZoneExtractor.json = ..\Specs\DateTime\Swedish\TimeZoneExtractor.json + ..\Specs\DateTime\Swedish\TimeZoneParser.json = ..\Specs\DateTime\Swedish\TimeZoneParser.json + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -172,6 +941,10 @@ Global {5039FC31-E88A-45EC-811B-3C406162A863}.Debug|Any CPU.Build.0 = Debug|Any CPU {5039FC31-E88A-45EC-811B-3C406162A863}.Release|Any CPU.ActiveCfg = Release|Any CPU {5039FC31-E88A-45EC-811B-3C406162A863}.Release|Any CPU.Build.0 = Release|Any CPU + {28FE2836-58CE-48E6-B657-0A3A836AEC14}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {28FE2836-58CE-48E6-B657-0A3A836AEC14}.Debug|Any CPU.Build.0 = Debug|Any CPU + {28FE2836-58CE-48E6-B657-0A3A836AEC14}.Release|Any CPU.ActiveCfg = Release|Any CPU + {28FE2836-58CE-48E6-B657-0A3A836AEC14}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -185,6 +958,83 @@ Global {32A4593C-3D2D-412E-8EC5-07346266A358} = {ED7B6456-AB0A-48CE-8F85-711FE87F09C2} {AC7B58DC-209E-47FA-9B03-A1E0FB5E7B05} = {452C724B-66C1-4F67-A718-E7733F79961D} {5039FC31-E88A-45EC-811B-3C406162A863} = {452C724B-66C1-4F67-A718-E7733F79961D} + {4406756F-4161-46CA-8021-DB26FD0CA2A7} = {F070C131-80C0-4B0E-A462-2F6DB796759D} + {55DDEF35-167D-45A5-B6D0-3747532E8314} = {4406756F-4161-46CA-8021-DB26FD0CA2A7} + {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} = {4406756F-4161-46CA-8021-DB26FD0CA2A7} + {2DF9886B-9586-4C62-8D1C-A723F8688574} = {4406756F-4161-46CA-8021-DB26FD0CA2A7} + {72FEAD3F-7C2A-4B85-9942-D9ADA122411A} = {4406756F-4161-46CA-8021-DB26FD0CA2A7} + {59FC2C0D-BF12-4A99-BC50-A104948DB1DC} = {4406756F-4161-46CA-8021-DB26FD0CA2A7} + {16B24511-1D2F-47FE-BF6D-5C2B29F45279} = {59FC2C0D-BF12-4A99-BC50-A104948DB1DC} + {7218D1B7-55C9-43E0-AA97-CD36B6DC2822} = {59FC2C0D-BF12-4A99-BC50-A104948DB1DC} + {D80595E0-D724-466F-9F65-9F254B974C8A} = {59FC2C0D-BF12-4A99-BC50-A104948DB1DC} + {047F38CF-8AAA-4967-BDEF-5E2BC378670D} = {59FC2C0D-BF12-4A99-BC50-A104948DB1DC} + {BCD91F42-F366-4F68-8A3B-7AC4EEF42534} = {59FC2C0D-BF12-4A99-BC50-A104948DB1DC} + {6DEA89B3-D580-48A4-BA52-8A077920AF09} = {59FC2C0D-BF12-4A99-BC50-A104948DB1DC} + {04F976D1-D026-4511-A414-FF318BEF3321} = {59FC2C0D-BF12-4A99-BC50-A104948DB1DC} + {81B5C120-8A99-42BF-88DD-0A8F0147142D} = {59FC2C0D-BF12-4A99-BC50-A104948DB1DC} + {93CE72CE-A624-4075-B2DD-392F4D42DC77} = {59FC2C0D-BF12-4A99-BC50-A104948DB1DC} + {C47CD83C-F6DC-4580-A143-A5C11EA9377A} = {59FC2C0D-BF12-4A99-BC50-A104948DB1DC} + {D64B181C-B07E-4531-A016-14E9D827EB87} = {59FC2C0D-BF12-4A99-BC50-A104948DB1DC} + {C99691DA-6E5B-4194-972E-7992F4CC4486} = {59FC2C0D-BF12-4A99-BC50-A104948DB1DC} + {18C2DA46-BF61-4F87-B645-443D6525545A} = {59FC2C0D-BF12-4A99-BC50-A104948DB1DC} + {78A33B3C-6340-4EE8-B802-63618D008AFF} = {55DDEF35-167D-45A5-B6D0-3747532E8314} + {39E7797E-768E-4170-9C29-5DAEE70D86E2} = {55DDEF35-167D-45A5-B6D0-3747532E8314} + {39E7797E-768E-4170-9C29-5DAEE70D86E3} = {55DDEF35-167D-45A5-B6D0-3747532E8314} + {39E7797E-768E-4170-9C29-5DAEE70D86E4} = {55DDEF35-167D-45A5-B6D0-3747532E8314} + {39E7797E-768E-4170-9C29-5DAEE70D86E5} = {55DDEF35-167D-45A5-B6D0-3747532E8314} + {39E7797E-768E-4170-9C29-5DAEE70D86E6} = {55DDEF35-167D-45A5-B6D0-3747532E8314} + {39E7797E-768E-4170-9C29-5DAEE70D86E7} = {55DDEF35-167D-45A5-B6D0-3747532E8314} + {39E7797E-768E-4170-9C29-5DAEE70D86E8} = {55DDEF35-167D-45A5-B6D0-3747532E8314} + {39E7797E-768E-4170-9C29-5DAEE70D86E9} = {55DDEF35-167D-45A5-B6D0-3747532E8314} + {39E7797E-768E-4170-9C29-5DAEE70D86F0} = {55DDEF35-167D-45A5-B6D0-3747532E8314} + {39E7797E-768E-4170-9C29-5DAEE70D86F1} = {55DDEF35-167D-45A5-B6D0-3747532E8314} + {39E7797E-768E-4170-9C29-5DAEE70D86F2} = {55DDEF35-167D-45A5-B6D0-3747532E8314} + {39E7797E-768E-4170-9C29-5DAEE70D86F3} = {55DDEF35-167D-45A5-B6D0-3747532E8314} + {39E7797E-768E-4170-9C29-5DAEE70D86F4} = {55DDEF35-167D-45A5-B6D0-3747532E8314} + {39E7797E-768E-4170-9C29-5DAEE70D86F5} = {55DDEF35-167D-45A5-B6D0-3747532E8314} + {19152546-2D99-476A-9DCB-A3D8F70418A3} = {2DF9886B-9586-4C62-8D1C-A723F8688574} + {19152546-2D99-476A-9DCB-A3D8F70418A4} = {2DF9886B-9586-4C62-8D1C-A723F8688574} + {19152546-2D99-476A-9DCB-A3D8F70418A5} = {2DF9886B-9586-4C62-8D1C-A723F8688574} + {19152546-2D99-476A-9DCB-A3D8F70418A6} = {2DF9886B-9586-4C62-8D1C-A723F8688574} + {19152546-2D99-476A-9DCB-A3D8F70418A7} = {2DF9886B-9586-4C62-8D1C-A723F8688574} + {19152546-2D99-476A-9DCB-A3D8F70418A8} = {2DF9886B-9586-4C62-8D1C-A723F8688574} + {19152546-2D99-476A-9DCB-A3D8F70418A9} = {2DF9886B-9586-4C62-8D1C-A723F8688574} + {19152546-2D99-476A-9DCB-A3D8F70418B0} = {2DF9886B-9586-4C62-8D1C-A723F8688574} + {19152546-2D99-476A-9DCB-A3D8F70418B1} = {2DF9886B-9586-4C62-8D1C-A723F8688574} + {19152546-2D99-476A-9DCB-A3D8F70418B2} = {2DF9886B-9586-4C62-8D1C-A723F8688574} + {19152546-2D99-476A-9DCB-A3D8F70418B3} = {2DF9886B-9586-4C62-8D1C-A723F8688574} + {19152546-2D99-476A-9DCB-A3D8F70418B4} = {2DF9886B-9586-4C62-8D1C-A723F8688574} + {19152546-2D99-476A-9DCB-A3D8F70418B5} = {2DF9886B-9586-4C62-8D1C-A723F8688574} + {19152546-2D99-476A-9DCB-A3D8F70418B6} = {2DF9886B-9586-4C62-8D1C-A723F8688574} + {97D22337-7EC5-46CF-B619-C54FFD0BD5EB} = {72FEAD3F-7C2A-4B85-9942-D9ADA122411A} + {97D22337-7EC5-46CF-B619-C54FFD0BD5EC} = {72FEAD3F-7C2A-4B85-9942-D9ADA122411A} + {97D22337-7EC5-46CF-B619-C54FFD0BD5ED} = {72FEAD3F-7C2A-4B85-9942-D9ADA122411A} + {97D22337-7EC5-46CF-B619-C54FFD0BD5EE} = {72FEAD3F-7C2A-4B85-9942-D9ADA122411A} + {97D22337-7EC5-46CF-B619-C54FFD0BD5EF} = {72FEAD3F-7C2A-4B85-9942-D9ADA122411A} + {97D22337-7EC5-46CF-B619-C54FFD0BD5F0} = {72FEAD3F-7C2A-4B85-9942-D9ADA122411A} + {97D22337-7EC5-46CF-B619-C54FFD0BD5F1} = {72FEAD3F-7C2A-4B85-9942-D9ADA122411A} + {97D22337-7EC5-46CF-B619-C54FFD0BD5F2} = {72FEAD3F-7C2A-4B85-9942-D9ADA122411A} + {97D22337-7EC5-46CF-B619-C54FFD0BD5F3} = {72FEAD3F-7C2A-4B85-9942-D9ADA122411A} + {97D22337-7EC5-46CF-B619-C54FFD0BD5F4} = {72FEAD3F-7C2A-4B85-9942-D9ADA122411A} + {97D22337-7EC5-46CF-B619-C54FFD0BD5F5} = {72FEAD3F-7C2A-4B85-9942-D9ADA122411A} + {ACCA0A3A-3935-4E4E-BDBA-0BEDDDF8D428} = {72FEAD3F-7C2A-4B85-9942-D9ADA122411A} + {97D22337-7EC5-46CF-B619-C54FFD0BD5F6} = {72FEAD3F-7C2A-4B85-9942-D9ADA122411A} + {2F41FB40-EA08-469A-A676-138F2E7C6273} = {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} + {2F41FB40-EA08-469A-A676-138F2E7C6274} = {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} + {2F41FB40-EA08-469A-A676-138F2E7C6275} = {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} + {2F41FB40-EA08-469A-A676-138F2E7C6276} = {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} + {2F41FB40-EA08-469A-A676-138F2E7C6277} = {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} + {2F41FB40-EA08-469A-A676-138F2E7C6278} = {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} + {2F41FB40-EA08-469A-A676-138F2E7C6279} = {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} + {2F41FB40-EA08-469A-A676-138F2E7C627A} = {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} + {2F41FB40-EA08-469A-A676-138F2E7C627B} = {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} + {2F41FB40-EA08-469A-A676-138F2E7C627C} = {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} + {2F41FB40-EA08-469A-A676-138F2E7C627D} = {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} + {2F41FB40-EA08-469A-A676-138F2E7C627E} = {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} + {2F41FB40-EA08-469A-A676-138F2E7C627F} = {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} + {28FE2836-58CE-48E6-B657-0A3A836AEC14} = {21E1D26A-635F-4965-A56E-20263D2A09FF} + {E93CDEDA-0221-42F2-9408-896CE5FEE69D} = {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} + {A45B902F-DE07-4502-8F8C-2671B987D72C} = {5DB5E281-52B2-4F4B-A316-7F9DAD32FF2E} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {FCFA9AFA-4914-4449-A66D-AE20900F0AA5} diff --git a/.NET/Microsoft.Recognizers.Text.sln.DotSettings b/.NET/Microsoft.Recognizers.Text.sln.DotSettings index d919d7f6b4..dc7609b36c 100644 --- a/.NET/Microsoft.Recognizers.Text.sln.DotSettings +++ b/.NET/Microsoft.Recognizers.Text.sln.DotSettings @@ -1,9 +1,37 @@  + <?xml version="1.0" encoding="utf-16"?><Profile name="Add Headers"><CSCodeStyleAttributes ArrangeTypeAccessModifier="False" ArrangeTypeMemberAccessModifier="False" SortModifiers="False" RemoveRedundantParentheses="False" AddMissingParentheses="False" ArrangeBraces="False" ArrangeAttributes="False" ArrangeArgumentsStyle="False" ArrangeCodeBodyStyle="False" ArrangeVarStyle="False" ArrangeTrailingCommas="False" ArrangeObjectCreation="False" ArrangeDefaultValue="False" /><CSOptimizeUsings><OptimizeUsings>False</OptimizeUsings><EmbraceInRegion>False</EmbraceInRegion><RegionName></RegionName></CSOptimizeUsings><XAMLCollapseEmptyTags>False</XAMLCollapseEmptyTags><CSUpdateFileHeader>True</CSUpdateFileHeader></Profile> + Copyright (c) Microsoft Corporation. All rights reserved. +Licensed under the MIT License. + + True + True + True + True + True True True True + True + True + True + True + True + True + True + True True True True + True + True + True + True True + True + True + True + True + True + True + True True \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text/Config/IConfiguration.cs b/.NET/Microsoft.Recognizers.Text/Config/IConfiguration.cs index 1524e00c33..e44a392f90 100644 --- a/.NET/Microsoft.Recognizers.Text/Config/IConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text/Config/IConfiguration.cs @@ -1,9 +1,14 @@ -namespace Microsoft.Recognizers.Text.Config +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text { public interface IConfiguration { string Culture { get; } + // string RequestedCulture { get; } + } } diff --git a/.NET/Microsoft.Recognizers.Text/Constants.cs b/.NET/Microsoft.Recognizers.Text/Constants.cs index 2f28803c89..4c81688895 100644 --- a/.NET/Microsoft.Recognizers.Text/Constants.cs +++ b/.NET/Microsoft.Recognizers.Text/Constants.cs @@ -1,7 +1,12 @@ -namespace Microsoft.Recognizers.Text +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text { public static class Constants { public const int InvalidIndex = -1; + + public const int MaxRegexTimeoutInSeconds = 7200; // two hours } } diff --git a/.NET/Microsoft.Recognizers.Text/Culture.cs b/.NET/Microsoft.Recognizers.Text/Culture.cs index d9e7461930..385087b47f 100644 --- a/.NET/Microsoft.Recognizers.Text/Culture.cs +++ b/.NET/Microsoft.Recognizers.Text/Culture.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Linq; namespace Microsoft.Recognizers.Text @@ -10,6 +13,7 @@ public sealed class Culture public const string EnglishOthers = "en-*"; public const string Chinese = "zh-cn"; public const string Spanish = "es-es"; + public const string SpanishMexican = "es-mx"; // Temporary workaround for language variant config issue public const string Portuguese = "pt-br"; public const string French = "fr-fr"; public const string German = "de-de"; @@ -29,6 +33,7 @@ public sealed class Culture new Culture("English", English), new Culture("Chinese", Chinese), new Culture("Spanish", Spanish), + new Culture("SpanishMexican", SpanishMexican), new Culture("Portuguese", Portuguese), new Culture("French", French), new Culture("German", German), diff --git a/.NET/Microsoft.Recognizers.Text/Extractors/IExtractor.cs b/.NET/Microsoft.Recognizers.Text/Extractors/IExtractor.cs index 51b678f170..c2337313b5 100644 --- a/.NET/Microsoft.Recognizers.Text/Extractors/IExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text/Extractors/IExtractor.cs @@ -1,4 +1,9 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; + +using Microsoft.Recognizers.Text.InternalCache; namespace Microsoft.Recognizers.Text { @@ -7,7 +12,7 @@ public interface IExtractor List Extract(string input); } - public class ExtractResult + public class ExtractResult : ICloneableType { public int? Start { get; set; } = null; @@ -20,5 +25,11 @@ public class ExtractResult public object Data { get; set; } = null; public Metadata Metadata { get; set; } = null; + + public ExtractResult Clone() + { + return (ExtractResult)MemberwiseClone(); + } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text/Extractors/Metadata.cs b/.NET/Microsoft.Recognizers.Text/Extractors/Metadata.cs index b02d8cd1c5..90f62e5640 100644 --- a/.NET/Microsoft.Recognizers.Text/Extractors/Metadata.cs +++ b/.NET/Microsoft.Recognizers.Text/Extractors/Metadata.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text { public class Metadata { @@ -8,11 +11,18 @@ public class Metadata // For cases like "2015年以前" (usually regards as "before 2015" in English), "5天以前" (usually regards as "5 days ago" in English) in Chinese, we need to decide whether this is a "Date with Mode" or "Duration with Before and After". We use this flag to avoid duplicate judgment both in the Extraction step and Parse step. // Currently, this flag is only used in Chinese DateTime as other languages don't have this ambiguity cases. - public bool IsDurationWithBeforeAndAfter { get; set; } = false; + public bool IsDurationWithAgoAndLater { get; set; } = false; // For Holiday cases as they are special cases of Date public bool IsHoliday { get; set; } = false; + // For special cases of holidays entities that should resolve to holiday ranges. + // This includes but is not exclusive to HolidayWeekends. + public bool IsHolidayRange { get; set; } = false; + + // For special cases of holidays entities that include weekend terms such as "Halloween weekend" + public bool IsHolidayWeekend { get; set; } = false; + // For Ordinal.relative, a subtype of Ordinal public bool IsOrdinalRelative { get; set; } = false; @@ -20,8 +30,21 @@ public class Metadata public string Offset { get; set; } = string.Empty; + // For extractions that contain a Holiday substring + public string HolidayName { get; set; } = string.Empty; + public string RelativeTo { get; set; } = string.Empty; public bool IsMealtime { get; set; } = false; + + // For cases where a language has variations in handling decimal separators + public bool TreatAsInteger { get; set; } = false; + + public bool IsDurationDateWithWeekday { get; set; } = false; + + public Metadata Clone() + { + return (Metadata)MemberwiseClone(); + } } } diff --git a/.NET/Microsoft.Recognizers.Text/InternalCache/ICloneableType.cs b/.NET/Microsoft.Recognizers.Text/InternalCache/ICloneableType.cs new file mode 100644 index 0000000000..6e3255ac95 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text/InternalCache/ICloneableType.cs @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.InternalCache +{ + public interface ICloneableType + { + T Clone(); + } +} diff --git a/.NET/Microsoft.Recognizers.Text/InternalCache/ResultsCache.cs b/.NET/Microsoft.Recognizers.Text/InternalCache/ResultsCache.cs new file mode 100644 index 0000000000..b358b66096 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text/InternalCache/ResultsCache.cs @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// ReSharper disable StaticMemberInGenericType + +using System; +using System.Collections.Generic; + +using Microsoft.Extensions.Caching.Memory; + +namespace Microsoft.Recognizers.Text.InternalCache +{ + public class ResultsCache + where TItem : ICloneableType + { + + private const long BaseCacheSize = 20000; + + private const double CompactionPercentage = 0.6; + + private static readonly MemoryCacheEntryOptions CacheEntryOptions = new MemoryCacheEntryOptions().SetSize(1); + + private readonly IMemoryCache resultsCache; + + // In recognizers usage, DateTime has 4 cache instances, while Number only has one. + public ResultsCache(int ratioFactor = 1) + { + + var cacheOptions = new MemoryCacheOptions + { + SizeLimit = BaseCacheSize * ratioFactor, + CompactionPercentage = CompactionPercentage, + ExpirationScanFrequency = TimeSpan.FromHours(24), + }; + + resultsCache = new MemoryCache(cacheOptions); + } + + public List GetOrCreate(object key, Func> createItem) + { + + if (!resultsCache.TryGetValue(key, out List results)) + { + results = createItem(); + + resultsCache.Set(key, results, CacheEntryOptions); + } + + return results.ConvertAll(e => e.Clone()); + } + + } +} diff --git a/.NET/Microsoft.Recognizers.Text/Matcher/AaNode.cs b/.NET/Microsoft.Recognizers.Text/Matcher/AaNode.cs index 06892ab764..b1d1679224 100644 --- a/.NET/Microsoft.Recognizers.Text/Matcher/AaNode.cs +++ b/.NET/Microsoft.Recognizers.Text/Matcher/AaNode.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Linq; namespace Microsoft.Recognizers.Text.Matcher diff --git a/.NET/Microsoft.Recognizers.Text/Matcher/AbstractMatcher.cs b/.NET/Microsoft.Recognizers.Text/Matcher/AbstractMatcher.cs index 7d909cbf2c..8e7b0e9fc7 100644 --- a/.NET/Microsoft.Recognizers.Text/Matcher/AbstractMatcher.cs +++ b/.NET/Microsoft.Recognizers.Text/Matcher/AbstractMatcher.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; diff --git a/.NET/Microsoft.Recognizers.Text/Matcher/AcAutomaton.cs b/.NET/Microsoft.Recognizers.Text/Matcher/AcAutomaton.cs index 7b2c5098f0..b0a74ddb8f 100644 --- a/.NET/Microsoft.Recognizers.Text/Matcher/AcAutomaton.cs +++ b/.NET/Microsoft.Recognizers.Text/Matcher/AcAutomaton.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Linq; namespace Microsoft.Recognizers.Text.Matcher diff --git a/.NET/Microsoft.Recognizers.Text/Matcher/IMatcher.cs b/.NET/Microsoft.Recognizers.Text/Matcher/IMatcher.cs index b493425bcb..a2549d8a9b 100644 --- a/.NET/Microsoft.Recognizers.Text/Matcher/IMatcher.cs +++ b/.NET/Microsoft.Recognizers.Text/Matcher/IMatcher.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.Matcher { diff --git a/.NET/Microsoft.Recognizers.Text/Matcher/ITokenizer.cs b/.NET/Microsoft.Recognizers.Text/Matcher/ITokenizer.cs index 8ceccce280..74190e420f 100644 --- a/.NET/Microsoft.Recognizers.Text/Matcher/ITokenizer.cs +++ b/.NET/Microsoft.Recognizers.Text/Matcher/ITokenizer.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.Matcher { diff --git a/.NET/Microsoft.Recognizers.Text/Matcher/MatchResult.cs b/.NET/Microsoft.Recognizers.Text/Matcher/MatchResult.cs index 85420cae64..88cb54717d 100644 --- a/.NET/Microsoft.Recognizers.Text/Matcher/MatchResult.cs +++ b/.NET/Microsoft.Recognizers.Text/Matcher/MatchResult.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.Matcher { diff --git a/.NET/Microsoft.Recognizers.Text/Matcher/MatchStrategy.cs b/.NET/Microsoft.Recognizers.Text/Matcher/MatchStrategy.cs index 5411d0e19e..6498244795 100644 --- a/.NET/Microsoft.Recognizers.Text/Matcher/MatchStrategy.cs +++ b/.NET/Microsoft.Recognizers.Text/Matcher/MatchStrategy.cs @@ -1,9 +1,12 @@ -namespace Microsoft.Recognizers.Text.Matcher +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Matcher { public enum MatchStrategy { /// - /// AcAtomaton + /// AcAutomaton /// AcAutomaton, diff --git a/.NET/Microsoft.Recognizers.Text/Matcher/Node.cs b/.NET/Microsoft.Recognizers.Text/Matcher/Node.cs index 1c21766721..7594f23cd4 100644 --- a/.NET/Microsoft.Recognizers.Text/Matcher/Node.cs +++ b/.NET/Microsoft.Recognizers.Text/Matcher/Node.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Linq; namespace Microsoft.Recognizers.Text.Matcher @@ -21,10 +24,7 @@ public Node this[T c] set { - if (Children == null) - { - Children = new Dictionary>(); - } + Children ??= new Dictionary>(); Children[c] = value; } @@ -37,10 +37,7 @@ public IEnumerator> GetEnumerator() public void AddValue(string value) { - if (Values == null) - { - Values = new HashSet(); - } + Values ??= new HashSet(); Values.Add(value); } diff --git a/.NET/Microsoft.Recognizers.Text/Matcher/NumberWithUnitTokenizer.cs b/.NET/Microsoft.Recognizers.Text/Matcher/NumberWithUnitTokenizer.cs index 70683443b3..a29683860b 100644 --- a/.NET/Microsoft.Recognizers.Text/Matcher/NumberWithUnitTokenizer.cs +++ b/.NET/Microsoft.Recognizers.Text/Matcher/NumberWithUnitTokenizer.cs @@ -1,10 +1,13 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.Matcher { public class NumberWithUnitTokenizer : SimpleTokenizer { - private static HashSet specialTokenCharacters = new HashSet { '$' }; + private static readonly HashSet SpecialTokenCharacters = new HashSet { '$' }; /* The main difference between this strategy and SimpleTokenizer is for cases like * 'Bob's $ 100 cash'. 's' and '$' are independent tokens in SimpleTokenizer. @@ -24,7 +27,9 @@ public override List Tokenize(string input) bool inToken = false; int tokenStart = 0; + var chars = input.ToCharArray(); + for (int i = 0; i < chars.Length; i++) { var c = chars[i]; @@ -36,7 +41,7 @@ public override List Tokenize(string input) inToken = false; } } - else if ((!specialTokenCharacters.Contains(c) && !char.IsLetterOrDigit(c)) || IsChinese(c) || IsJapanese(c)) + else if ((!SpecialTokenCharacters.Contains(c) && !char.IsLetterOrDigit(c)) || IsCjk(c)) { // Non-splittable currency units (as "$") are treated as regular letters. For instance, 'us$' should be a single token if (inToken) @@ -85,7 +90,7 @@ private bool IsSplittableUnit(char curChar, char preChar) } // Non-splittable currency units can't be mixed with digits. For example, '$100' or '100$' will be tokenized to '$' and '100', '1$50' will be tokenized to '1', '$', and '50' - if ((char.IsDigit(curChar) && specialTokenCharacters.Contains(preChar)) || (specialTokenCharacters.Contains(curChar) && char.IsDigit(preChar))) + if ((char.IsDigit(curChar) && SpecialTokenCharacters.Contains(preChar)) || (SpecialTokenCharacters.Contains(curChar) && char.IsDigit(preChar))) { return true; } diff --git a/.NET/Microsoft.Recognizers.Text/Matcher/SimpleTokenizer.cs b/.NET/Microsoft.Recognizers.Text/Matcher/SimpleTokenizer.cs index e8a69e64a1..5e83005347 100644 --- a/.NET/Microsoft.Recognizers.Text/Matcher/SimpleTokenizer.cs +++ b/.NET/Microsoft.Recognizers.Text/Matcher/SimpleTokenizer.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text.Matcher { diff --git a/.NET/Microsoft.Recognizers.Text/Matcher/StringMatcher.cs b/.NET/Microsoft.Recognizers.Text/Matcher/StringMatcher.cs index 2f37a33932..57a9e45247 100644 --- a/.NET/Microsoft.Recognizers.Text/Matcher/StringMatcher.cs +++ b/.NET/Microsoft.Recognizers.Text/Matcher/StringMatcher.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; @@ -20,15 +23,15 @@ public StringMatcher(MatchStrategy matchStrategy = MatchStrategy.TrieTree, IToke Matcher = new TrieTree(); break; default: - throw new ArgumentException($"Unsupported match strategy: {matchStrategy.ToString()}"); + throw new ArgumentException($"Unsupported match strategy: {matchStrategy}"); } } - private IMatcher Matcher { get; set; } + private IMatcher Matcher { get; } public void Init(IEnumerable values) { - Init(values, values.Select(v => v.ToString()).ToArray()); + Init(values, values.Select(v => v).ToArray()); } public void Init(IEnumerable values, string[] ids) @@ -77,13 +80,13 @@ public IEnumerable> Find(string queryText) var endToken = queryTokens[r.Start + r.Length - 1]; var start = startToken.Start; var length = endToken.End - startToken.Start; - var rtext = queryText.Substring(start, length); + var resultText = queryText.Substring(start, length); yield return new MatchResult() { Start = start, Length = length, - Text = rtext, + Text = resultText, CanonicalValues = r.CanonicalValues, }; } diff --git a/.NET/Microsoft.Recognizers.Text/Matcher/Token.cs b/.NET/Microsoft.Recognizers.Text/Matcher/Token.cs index 584b1fa8cb..2e8f4ab898 100644 --- a/.NET/Microsoft.Recognizers.Text/Matcher/Token.cs +++ b/.NET/Microsoft.Recognizers.Text/Matcher/Token.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text.Matcher +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text.Matcher { public class Token { diff --git a/.NET/Microsoft.Recognizers.Text/Matcher/TrieTree.cs b/.NET/Microsoft.Recognizers.Text/Matcher/TrieTree.cs index 70d5d23dce..9c68e32cf1 100644 --- a/.NET/Microsoft.Recognizers.Text/Matcher/TrieTree.cs +++ b/.NET/Microsoft.Recognizers.Text/Matcher/TrieTree.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; using System.Linq; namespace Microsoft.Recognizers.Text.Matcher diff --git a/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.csproj b/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.csproj index c5357694d3..644046e8af 100644 --- a/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.csproj +++ b/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.csproj @@ -1,35 +1,50 @@ - + + - netstandard2.0;net462;net452;net45 + netstandard2.1;netstandard2.0;net462;net6.0 + 9 false false ../Recognizers-Text.ruleset - + + + true + ..\buildtools\35MSSharedLib1024.snk + true $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + Microsoft + nlp, entity-extraction, parser-library, recognizer, text, netstandard2.0 + Microsoft.Recognizers.Text provides base classes for robust recognition and resolution of text entities. + MIT + https://github.com/Microsoft/Recognizers-Text + images\icon.png + © Microsoft Corporation. All rights reserved. + + - + all runtime; build; native; contentfiles; analyzers - + all runtime; build; native; contentfiles; analyzers - - - - + + + + + diff --git a/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.nuspec b/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.nuspec index aaef245597..6eab2df00f 100644 --- a/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.nuspec +++ b/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.nuspec @@ -9,24 +9,22 @@ Microsoft.Recognizers.Text provides base classes for robust recognition and resolution of text entities. MIT https://github.com/Microsoft/Recognizers-Text - http://docs.botframework.com/images/bot_icon.png + images\icon.png © Microsoft Corporation. All rights reserved. nlp entity-extraction parser-library recognizer text netstandard2.0 - - + + + - - - - + \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.xml b/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.xml index aac2d96963..19be60ddf1 100644 --- a/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.xml +++ b/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.xml @@ -6,7 +6,7 @@ - AcAtomaton + AcAutomaton diff --git a/.NET/Microsoft.Recognizers.Text/ModelFactory.cs b/.NET/Microsoft.Recognizers.Text/ModelFactory.cs index 4907357682..8d6b12d6a4 100644 --- a/.NET/Microsoft.Recognizers.Text/ModelFactory.cs +++ b/.NET/Microsoft.Recognizers.Text/ModelFactory.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; @@ -35,7 +38,7 @@ public T GetModel(string culture, bool fallbackToDefaultCulture, TModelOption public void InitializeModels(string targetCulture, TModelOptions options) { this.Keys - .Where(key => string.IsNullOrEmpty(targetCulture) || key.culture.Equals(targetCulture)) + .Where(key => string.IsNullOrEmpty(targetCulture) || key.culture.Equals(targetCulture, StringComparison.Ordinal)) .ToList() .ForEach(key => this.InitializeModel(key.modelType, key.culture, options)); } @@ -58,15 +61,15 @@ private bool TryGetModel(string culture, TModelOptions options, out T model) return result; } - private bool TryGetModel(Type modelType, string culture, TModelOptions options, out IModel model) + private bool TryGetModel(Type modelType, string reqCulture, TModelOptions options, out IModel model) { model = default(IModel); - if (string.IsNullOrEmpty(culture)) + if (string.IsNullOrEmpty(reqCulture)) { return false; } - culture = Culture.MapToNearestLanguage(culture); + var culture = Culture.MapToNearestLanguage(reqCulture); // Look in cache var cacheKey = (culture, modelType, options.ToString()); diff --git a/.NET/Microsoft.Recognizers.Text/Models/ExtendedModelResult.cs b/.NET/Microsoft.Recognizers.Text/Models/ExtendedModelResult.cs index 32a0e65d7a..fa5607dcf9 100644 --- a/.NET/Microsoft.Recognizers.Text/Models/ExtendedModelResult.cs +++ b/.NET/Microsoft.Recognizers.Text/Models/ExtendedModelResult.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text { public class ExtendedModelResult : ModelResult { diff --git a/.NET/Microsoft.Recognizers.Text/Models/IModel.cs b/.NET/Microsoft.Recognizers.Text/Models/IModel.cs index 363fa6a32a..0cb7315cb7 100644 --- a/.NET/Microsoft.Recognizers.Text/Models/IModel.cs +++ b/.NET/Microsoft.Recognizers.Text/Models/IModel.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text { @@ -6,6 +9,12 @@ public interface IModel { string ModelTypeName { get; } + string Culture { get; } + + string RequestedCulture { get; } + List Parse(string query); + + void SetCultureInfo(string culture, string requestedCulture = null); } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text/Models/ModelResult.cs b/.NET/Microsoft.Recognizers.Text/Models/ModelResult.cs index 48f16ab75b..fee2a85851 100644 --- a/.NET/Microsoft.Recognizers.Text/Models/ModelResult.cs +++ b/.NET/Microsoft.Recognizers.Text/Models/ModelResult.cs @@ -1,4 +1,7 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; namespace Microsoft.Recognizers.Text { diff --git a/.NET/Microsoft.Recognizers.Text/Models/ResolutionKey.cs b/.NET/Microsoft.Recognizers.Text/Models/ResolutionKey.cs index f78a835e74..08870df770 100644 --- a/.NET/Microsoft.Recognizers.Text/Models/ResolutionKey.cs +++ b/.NET/Microsoft.Recognizers.Text/Models/ResolutionKey.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text { public static class ResolutionKey { diff --git a/.NET/Microsoft.Recognizers.Text/Parsers/IParser.cs b/.NET/Microsoft.Recognizers.Text/Parsers/IParser.cs index 6f6f0d4dc3..f02eb72409 100644 --- a/.NET/Microsoft.Recognizers.Text/Parsers/IParser.cs +++ b/.NET/Microsoft.Recognizers.Text/Parsers/IParser.cs @@ -1,4 +1,7 @@ -namespace Microsoft.Recognizers.Text +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Microsoft.Recognizers.Text { public interface IParser { diff --git a/.NET/Microsoft.Recognizers.Text/Recognizer.cs b/.NET/Microsoft.Recognizers.Text/Recognizer.cs index 1f6019bb6b..1660178d5b 100644 --- a/.NET/Microsoft.Recognizers.Text/Recognizer.cs +++ b/.NET/Microsoft.Recognizers.Text/Recognizer.cs @@ -1,4 +1,8 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text @@ -6,14 +10,21 @@ namespace Microsoft.Recognizers.Text public abstract class Recognizer where TRecognizerOptions : struct { + private static readonly IDictionary TimeoutDictionary = new Dictionary(); + + private static readonly object _locker = new object(); + private readonly ModelFactory factory; - protected Recognizer(string targetCulture, TRecognizerOptions options, bool lazyInitialization) + protected Recognizer(string targetCulture, TRecognizerOptions options, bool lazyInitialization, int timeout = 0) { this.Options = options; this.TargetCulture = targetCulture; - + this.TimeoutInSeconds = timeout; this.factory = new ModelFactory(); + + AddRegexTimeoutValuesForType(); + InitializeConfiguration(); if (!lazyInitialization) @@ -26,8 +37,22 @@ protected Recognizer(string targetCulture, TRecognizerOptions options, bool lazy public TRecognizerOptions Options { get; private set; } + protected int TimeoutInSeconds { get; } + public static TRecognizerOptions GetOptions(int value) => EnumUtils.Convert(value); + public static TimeSpan GetTimeout(Type type) + { + return TimeoutDictionary.TryGetValue(type, out var timeInSeconds) && timeInSeconds > 0 ? + TimeSpan.FromSeconds(timeInSeconds) : TimeSpan.FromSeconds(Constants.MaxRegexTimeoutInSeconds); + } + + // For each Recognizer type (i.e., NumberRecognizer, DateTimeRecognizer, SequenceRecognizer and so on) + // Find all the types that should use the same timeout value set by that recognizer. + // Refer to the concrete implementation of each recognizer for the list of the types. These are + // the types that have a Regex object created in them and need a Timeout parameter. + protected abstract List GetRelatedTypes(); + protected T GetModel(string culture, bool fallbackToDefaultCulture) where T : IModel { @@ -45,5 +70,20 @@ private void InitializeModels(string targetCulture, TRecognizerOptions options) { this.factory.InitializeModels(targetCulture, options); } + + private void AddRegexTimeoutValuesForType() + { + // Foreach Recognizer type find the subtypes who are supposed to use the same + // Regex timeout value. Children of Recognzier get to have their own timeout value. + lock (_locker) + { + if (!TimeoutDictionary.ContainsKey(this.GetType())) + { + TimeoutDictionary.Add(this.GetType(), TimeoutInSeconds); + var relatedTypes = GetRelatedTypes(); + relatedTypes.ForEach(t => TimeoutDictionary.Add(t, TimeoutInSeconds)); + } + } + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text/Utilities/ConditionalMatch.cs b/.NET/Microsoft.Recognizers.Text/Utilities/ConditionalMatch.cs index f18281a79e..8c3e41c008 100644 --- a/.NET/Microsoft.Recognizers.Text/Utilities/ConditionalMatch.cs +++ b/.NET/Microsoft.Recognizers.Text/Utilities/ConditionalMatch.cs @@ -1,4 +1,7 @@ -using System.Text.RegularExpressions; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Utilities { diff --git a/.NET/Microsoft.Recognizers.Text/Utilities/EnumUtils.cs b/.NET/Microsoft.Recognizers.Text/Utilities/EnumUtils.cs index 97e8c0f94a..9e5481f29d 100644 --- a/.NET/Microsoft.Recognizers.Text/Utilities/EnumUtils.cs +++ b/.NET/Microsoft.Recognizers.Text/Utilities/EnumUtils.cs @@ -1,4 +1,8 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Globalization; namespace Microsoft.Recognizers.Text.Utilities { @@ -28,7 +32,7 @@ public static T Convert(int value) } else { - throw new ArgumentOutOfRangeException(value.ToString(), "Bad configuration parameter value."); + throw new ArgumentOutOfRangeException(value.ToString(CultureInfo.InvariantCulture), "Bad configuration parameter value."); } } } diff --git a/.NET/Microsoft.Recognizers.Text/Utilities/QueryProcessor.cs b/.NET/Microsoft.Recognizers.Text/Utilities/QueryProcessor.cs index b6b0bbe125..43affbc12d 100644 --- a/.NET/Microsoft.Recognizers.Text/Utilities/QueryProcessor.cs +++ b/.NET/Microsoft.Recognizers.Text/Utilities/QueryProcessor.cs @@ -1,4 +1,7 @@ -using System.Globalization; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Globalization; using System.Linq; using System.Text; using System.Text.RegularExpressions; @@ -7,9 +10,11 @@ namespace Microsoft.Recognizers.Text.Utilities { public static class QueryProcessor { - private const string Expression = @"(?<=(\s|\d))(kB|K[Bb]|K|M[Bb]|M|G[Bb]|G|B)\b"; - private static readonly Regex SpecialTokensRegex = new Regex(Expression, RegexOptions.Compiled); + // Must be in sync with Base-Numbers YAML due to inter-dependency issue with different .NET targets + private const string CaseSensitiveTerms = @"(?<=(\s|\d))(kB|K[Bb]?|M[BbM]?|G[Bb]?|B)\b"; + + private static readonly Regex SpecialTokensRegex = new Regex(CaseSensitiveTerms, RegexOptions.Compiled); public static string Preprocess(string query, bool caseSensitive = false, bool recode = true) { @@ -27,6 +32,7 @@ public static string Preprocess(string query, bool caseSensitive = false, bool r query = query.Replace("9", "9"); query = query.Replace(":", ":"); query = query.Replace("-", "-"); + query = query.Replace("−", "-"); query = query.Replace(",", ","); query = query.Replace("/", "/"); query = query.Replace("G", "G"); diff --git a/.NET/Microsoft.Recognizers.Text/Utilities/RegExpUtility.cs b/.NET/Microsoft.Recognizers.Text/Utilities/RegExpUtility.cs index cd36d052cc..1eb6fe546f 100644 --- a/.NET/Microsoft.Recognizers.Text/Utilities/RegExpUtility.cs +++ b/.NET/Microsoft.Recognizers.Text/Utilities/RegExpUtility.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; using System.Text.RegularExpressions; namespace Microsoft.Recognizers.Text.Utilities @@ -43,31 +47,61 @@ public static ConditionalMatch MatchExact(this Regex regex, string text, bool tr return new ConditionalMatch(match, match.Success && match.Length == length); } + public static bool IsNullOrEmpty(ReadOnlySpan span) + { + return span == null || span.IsEmpty; + } + + // @TODO Inefficient. public static ConditionalMatch MatchEnd(this Regex regex, string text, bool trim) { var match = Regex.Match(text, regex.ToString(), RegexOptions.RightToLeft | regex.Options); - var strAfter = text.Substring(match.Index + match.Length); + + var strAfter = text.AsSpan(match.Index + match.Length); if (trim) { strAfter = strAfter.Trim(); } - return new ConditionalMatch(match, match.Success && string.IsNullOrEmpty(strAfter)); + return new ConditionalMatch(match, match.Success && IsNullOrEmpty(strAfter)); } // We can't trim before match as we may use the match index later public static ConditionalMatch MatchBegin(this Regex regex, string text, bool trim) { var match = regex.Match(text); - var strBefore = text.Substring(0, match.Index); + var strBefore = text.AsSpan(0, match.Index); if (trim) { strBefore = strBefore.Trim(); } - return new ConditionalMatch(match, match.Success && string.IsNullOrEmpty(strBefore)); + return new ConditionalMatch(match, match.Success && IsNullOrEmpty(strBefore)); + } + + // MatchBegin can fail if multiple matches are present in text (e.g. regex = "\b(A|B)\b", text = "B ... A ...") + public static ConditionalMatch MatchesBegin(this Regex regex, string text, bool trim) + { + var matches = regex.Matches(text); + foreach (Match match in matches) + { + var strBefore = text.AsSpan(0, match.Index); + + if (trim) + { + strBefore = strBefore.Trim(); + } + + bool isMatchBegin = match.Success && IsNullOrEmpty(strBefore); + if (isMatchBegin) + { + return new ConditionalMatch(match, match.Success && IsNullOrEmpty(strBefore)); + } + } + + return new ConditionalMatch(null, false); } public static string[] Split(Regex regex, string source) diff --git a/.NET/Microsoft.Recognizers.Text/Utilities/ResultsProcessor.cs b/.NET/Microsoft.Recognizers.Text/Utilities/ResultsProcessor.cs new file mode 100644 index 0000000000..ac802e42de --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text/Utilities/ResultsProcessor.cs @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Globalization; + +namespace Microsoft.Recognizers.Text.Utilities +{ + public static class ResultsProcessor + { + + public static void UpdateUnicodeOffsets(string query, ref List results) + { + + var origin = query; + + // Save UTF-16 code unit index to Unicode Text Element index + // + // Example : "nai\u0308ve X" will generate a lookup array of [0,1,2,-1,3,4,5,6]. + // When we try to find the word "X", we know the UTF-16 offset of "X" is 7, + // and the lookup[7] is 6, so the text element index of "X" is 6. + var textElementIndex = new int[origin.Length]; + for (int i = 0; i < textElementIndex.Length; i++) + { + textElementIndex[i] = -1; + } + + var enumerator = StringInfo.GetTextElementEnumerator(origin); + int index = 0; + while (enumerator.MoveNext()) + { + textElementIndex[enumerator.ElementIndex] = index; + index++; + } + + foreach (var result in results) + { + var utf16Offset = result.Start; + var utf16End = result.End; + + result.Start = textElementIndex[utf16Offset]; + result.End = result.Start + new StringInfo(result.Text).LengthInTextElements - 1; + } + + Console.WriteLine(); + } + } +} diff --git a/.NET/README.md b/.NET/README.md index 0a8a535804..c0389efa19 100644 --- a/.NET/README.md +++ b/.NET/README.md @@ -4,7 +4,7 @@ Recognizer's are organized into groups and designed to be used in C#, Node.js, Python and Java to help you build great applications! To use the samples clone our GitHub repository using Git. - git clone https://github.com/Microsoft/Recognizers-Tex.git + git clone https://github.com/Microsoft/Recognizers-Text.git cd Recognizers-Text ## Setup @@ -169,7 +169,7 @@ Internally, both methods will cache the instance models to avoid extra costs. This model will find any Ipv4/Ipv6 presented. E.g. "My Ip is 8.8.8.8". - `SequenceRecognizer.RecognizeIpAddress"My Ip is 8.8.8.8", Culture.English)` + `SequenceRecognizer.RecognizeIpAddress("My Ip is 8.8.8.8", Culture.English)` Or you can obtain a model instance using: diff --git a/.NET/Recognizers-Text.ruleset b/.NET/Recognizers-Text.ruleset index f71135efbd..0418fc4f9e 100644 --- a/.NET/Recognizers-Text.ruleset +++ b/.NET/Recognizers-Text.ruleset @@ -357,7 +357,14 @@ + + + + + + + @@ -449,7 +456,7 @@ - + diff --git a/.NET/Samples/BotBuilder/BotBuilderRecognizerBot.csproj b/.NET/Samples/BotBuilder/BotBuilderRecognizerBot.csproj index 5cb3b7c282..15401c6aa1 100644 --- a/.NET/Samples/BotBuilder/BotBuilderRecognizerBot.csproj +++ b/.NET/Samples/BotBuilder/BotBuilderRecognizerBot.csproj @@ -18,8 +18,10 @@ - - + + + + @@ -38,7 +40,7 @@ - + all runtime; build; native; contentfiles; analyzers diff --git a/.NET/Samples/BotBuilder/Dialogs/DeliveryDialog.cs b/.NET/Samples/BotBuilder/Dialogs/DeliveryDialog.cs index d296f44e0f..206f27bc2a 100644 --- a/.NET/Samples/BotBuilder/Dialogs/DeliveryDialog.cs +++ b/.NET/Samples/BotBuilder/Dialogs/DeliveryDialog.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; using System.Linq; using System.Threading; diff --git a/.NET/Samples/BotBuilder/Dialogs/DeliveryState.cs b/.NET/Samples/BotBuilder/Dialogs/DeliveryState.cs index cdb9fb9335..760a5e0f5f 100644 --- a/.NET/Samples/BotBuilder/Dialogs/DeliveryState.cs +++ b/.NET/Samples/BotBuilder/Dialogs/DeliveryState.cs @@ -1,4 +1,7 @@ -namespace BotBuilderRecognizerBot +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace BotBuilderRecognizerBot { public class DeliveryState { diff --git a/.NET/Samples/RecognizerFunction/AnalyzeEntities.cs b/.NET/Samples/RecognizerFunction/AnalyzeEntities.cs index 6734c35d62..2de545e83c 100644 --- a/.NET/Samples/RecognizerFunction/AnalyzeEntities.cs +++ b/.NET/Samples/RecognizerFunction/AnalyzeEntities.cs @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + using System.Collections.Generic; using System.IO; using System.Threading.Tasks; @@ -122,6 +125,10 @@ private static IEnumerable ParseAll(string query, string culture) // E.g "bing.com" SequenceRecognizer.RecognizeURL(query, culture), + // Quoted text recognizer + // E.g "I meant "no"" + SequenceRecognizer.RecognizeQuotedText(query, culture), + // Add Boolean recognizer - This model will find yes/no like responses, including emoji - // E.g "yup, I need that" will return "True" ChoiceRecognizer.RecognizeBoolean(query, culture) diff --git a/.NET/Samples/RecognizerFunction/RecognizerFunction.csproj b/.NET/Samples/RecognizerFunction/RecognizerFunction.csproj index e514736685..6c0f1159bb 100644 --- a/.NET/Samples/RecognizerFunction/RecognizerFunction.csproj +++ b/.NET/Samples/RecognizerFunction/RecognizerFunction.csproj @@ -1,18 +1,18 @@  - netstandard2.0 - v2 + net6.0 + v3 © Microsoft Corporation. All rights reserved. - + all runtime; build; native; contentfiles; analyzers - + + - diff --git a/.NET/Samples/SimpleConsole/Program.cs b/.NET/Samples/SimpleConsole/Program.cs index 3dd4d06c51..b961bd0437 100644 --- a/.NET/Samples/SimpleConsole/Program.cs +++ b/.NET/Samples/SimpleConsole/Program.cs @@ -1,6 +1,12 @@ -using System; +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; using System.Collections.Generic; +using System.Globalization; using System.Linq; +using System.Text; +using System.Text.RegularExpressions; using Microsoft.Recognizers.Text; using Microsoft.Recognizers.Text.Choice; using Microsoft.Recognizers.Text.DateTime; @@ -18,26 +24,46 @@ public static class Program public static void Main(string[] args) { + // Enable support for multiple encodings, especially in .NET Core + Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); + ShowIntro(); + Console.InputEncoding = Encoding.UTF8; + Console.OutputEncoding = Encoding.UTF8; + string culture = DefaultCulture; + bool cultureSet = false; + while (true) { + if (!cultureSet) + { + culture = SetCulture(); + cultureSet = true; + } + // Read the text to recognize Console.WriteLine("Enter the text to recognize:"); var input = Console.ReadLine()?.Trim(); Console.WriteLine(); - if (input?.ToLower() == "exit") + if (input?.ToLower(CultureInfo.InvariantCulture) == "exit") { // Close application if user types "exit" break; } + if (input?.ToLower(CultureInfo.InvariantCulture) == "switch") + { + cultureSet = false; + continue; + } + // Validate input if (input?.Length > 0) { // Retrieve all the parsers and call 'Parse' to recognize all the values from the user input - var results = ParseAll(input, DefaultCulture); + var results = ParseAll(input, culture); // Write output Console.WriteLine(results.Any() ? $"I found the following entities ({results.Count():d}):" : "I found no entities."); @@ -47,6 +73,34 @@ public static void Main(string[] args) } } + private static string SetCulture() + { + string supportedCultures = string.Empty; + for (int i = 0; i < Culture.SupportedCultures.Length; i++) + { + supportedCultures += (i + 1) + ": " + Culture.SupportedCultures[i].CultureName + + ((i == Culture.SupportedCultures.Length - 1) ? string.Empty : Environment.NewLine); + } + + Console.WriteLine(supportedCultures + Environment.NewLine + "Please select language: "); + string culture = string.Empty; + if (int.TryParse(Console.ReadLine()?.Trim(), out int num) && num >= 1 && num <= Culture.SupportedCultures.Length) + { + culture = Culture.SupportedCultures[num - 1].CultureCode; + } + else + { + culture = DefaultCulture; + } + + var cultureName = Culture.SupportedCultures + .Where(c => c.CultureCode == culture) + .Select(c => c.CultureName) + .FirstOrDefault(); + Console.WriteLine("Culture {0},{1} is set.", cultureName, culture); + return culture; + } + /// /// Parse query with all recognizers. /// @@ -118,6 +172,10 @@ private static IEnumerable ParseAll(string query, string culture) // E.g "{123e4567-e89b-12d3-a456-426655440000}" SequenceRecognizer.RecognizeGUID(query, culture), + // Quoted text recognizer + // E.g "I meant "no"" + SequenceRecognizer.RecognizeQuotedText(query, culture), + // Add Boolean recognizer - This model will find yes/no like responses, including emoji - // E.g "yup, I need that" will return "True" ChoiceRecognizer.RecognizeBoolean(query, culture), diff --git a/.NET/Samples/SimpleConsole/SimpleConsole.csproj b/.NET/Samples/SimpleConsole/SimpleConsole.csproj index aa2c6ba14d..34f7308b44 100644 --- a/.NET/Samples/SimpleConsole/SimpleConsole.csproj +++ b/.NET/Samples/SimpleConsole/SimpleConsole.csproj @@ -2,7 +2,8 @@ Exe - netcoreapp2.1 + net6.0 + 9 © Microsoft Corporation. All rights reserved. ../../Recognizers-Text.ruleset @@ -20,12 +21,12 @@ - + all runtime; build; native; contentfiles; analyzers - - + + all runtime; build; native; contentfiles; analyzers diff --git a/.NET/buildtools/checkSpec.ps1 b/.NET/buildtools/checkSpec.ps1 index 6052d349fc..6bd56186bf 100644 --- a/.NET/buildtools/checkSpec.ps1 +++ b/.NET/buildtools/checkSpec.ps1 @@ -11,12 +11,25 @@ $global:duplicateFileDict = New-Object System.Collections.Hashtable function SpecInfo() { + Write-Host("SpecInfo start...") + foreach ($file in $input) { $parentName = $file.FullName | Split-Path -parent | Split-Path -leaf $typeFolder = $file.FullName | Split-Path -parent | Split-Path -parent | Split-Path -leaf - $contents = Get-Content $file.FullName -encoding utf8 | ConvertFrom-Json - CheckSpec -spec $contents -type $typeFolder -parent $parentName -name $file.Name + + try + { + $contents = Get-Content $file.FullName -encoding utf8 | ConvertFrom-Json + CheckSpec -spec $contents -type $typeFolder -parent $parentName -name $file.Name + } + catch + { + Write-Host("Error decoding spec file:`t" + $file.FullName) + Write-Warning(($Error[0] -split '\n')[0]) + exit 2 + } + } Write-Host("Total invalid input test cases:`t" + $global:totalEmpty) diff --git a/.NET/buildtools/nuget.exe b/.NET/buildtools/nuget.exe index fb4eb39f01..feb104d4e8 100644 Binary files a/.NET/buildtools/nuget.exe and b/.NET/buildtools/nuget.exe differ diff --git a/.NET/buildtools/tsComparer.ps1 b/.NET/buildtools/tsComparer.ps1 new file mode 100644 index 0000000000..73e2b8ab00 --- /dev/null +++ b/.NET/buildtools/tsComparer.ps1 @@ -0,0 +1,70 @@ +try +{ + $ttFile = $args[0] + + $result = $true + + $ttExtension = ".tt" + $codeExtension = ".cs" + $patternsExtension = ".yaml" + + $inputFile = Split-Path $ttFile -Leaf + $codeFile = $ttfile.Replace($ttExtension, $codeExtension) + + $isBase = $inputFile.Contains("Base") + + if (-Not $isBase) + { + $language = Split-Path $ttFile -Parent + $language = Split-Path $language -Leaf + $type = $inputFile.Replace("Definitions.tt", "") + } + else + { + $language = "Base" + $type = $inputFile.Replace("Base", "").Replace(".tt", "") + } + + #Write-Host $language $type + + $codeSubPath = ".NET\Microsoft.Recognizers.Definitions.Common" + $patternSubPath = "Patterns" + + $rootPath = $ttFile.Substring(0, $ttFile.IndexOf($codeSubPath)) + + if (-not $isBase) + { + $patternFile = [IO.Path]::Combine($rootPath, $patternSubPath, $language, $language + "-" + $type + $patternsExtension) + } + else + { + $patternFile = [IO.Path]::Combine($rootPath, $patternSubPath, $language + "-" + $type + $patternsExtension) + } + + # If code file is older than patterns file (time difference less than 0), we need to re-gen + $val = [datetime](Get-ItemProperty -Path $codeFile -Name LastWriteTime).lastwritetime -[datetime](Get-ItemProperty -Path $patternFile -Name LastWriteTime).lastwritetime + $result = $val -lt 0 + + #Write-Host + #Write-Host "TT:" $ttFile + #Write-Host "YAML:" $patternFile + #Write-Host $result + + if ($result) + { + #Write-Host "Re-gen for:" $patternFile + Write-Host 1 + exit 1 + } + else + { + Write-Host 0 + exit 0 + } +} +catch +{ + #Make sure any error causes a re-gen to happen + Write-Host 1 + exit 1 +} \ No newline at end of file diff --git a/.NET/images/icon.png b/.NET/images/icon.png new file mode 100644 index 0000000000..b42b11ad96 Binary files /dev/null and b/.NET/images/icon.png differ diff --git a/.NET/test-pack.sh b/.NET/test-pack.sh new file mode 100644 index 0000000000..4d1979e577 --- /dev/null +++ b/.NET/test-pack.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +nugetExe=$1 +version=$2 +echo "Version: $version" + +targetDir="./test-pack" + +config="release;basic=$version;number=$version;numberWithUnit=$version" + +$nugetExe pack ./Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.nuspec -NonInteractive -OutputDirectory $targetDir -Properties Configuration=$config -Symbols -version "$version" -Verbosity Detailed +$nugetExe pack ./Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.nuspec -NonInteractive -OutputDirectory $targetDir -Properties Configuration=$config -Symbols -version "$version" -Verbosity Detailed +$nugetExe pack ./Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.nuspec -NonInteractive -OutputDirectory $targetDir -Properties Configuration=$config -Symbols -version "$version" -Verbosity Detailed +$nugetExe pack ./Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.nuspec -NonInteractive -OutputDirectory $targetDir -Properties Configuration=$config -Symbols -version "$version" -Verbosity Detailed +$nugetExe pack ./Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.nuspec -NonInteractive -OutputDirectory $targetDir -Properties Configuration=$config -Symbols -version "$version" -Verbosity Detailed +$nugetExe pack ./Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.nuspec -NonInteractive -OutputDirectory $targetDir -Properties Configuration=$config -Symbols -version "$version" -Verbosity Detailed +$nugetExe pack ./Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.nuspec -NonInteractive -OutputDirectory $targetDir -Properties Configuration=$config -Symbols -version "$version" -Verbosity Detailed + diff --git a/.NET/tests.ci.cmd b/.NET/tests.ci.cmd index 131cd7505a..bd64c2fb4d 100644 --- a/.NET/tests.ci.cmd +++ b/.NET/tests.ci.cmd @@ -29,7 +29,7 @@ FOR /R %%f IN (*Tests.dll) DO ( ) ECHO "!VsTestDir!\vstest.console" -CALL "!VsTestDir!\vstest.console" /Parallel %testcontainer% +CALL "!VsTestDir!\vstest.console" /Logger:"console;verbosity=minimal" /Parallel %testcontainer% IF %ERRORLEVEL% NEQ 0 GOTO TEST_ERROR ECHO. diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index e02cdb8177..863845319f 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -21,7 +21,7 @@ A clear and concise description of what you expected to happen. As applicable, add examples of text input and json output to help explain the problem. **Platform (please complete the following information):** - - Platform: [e.g. .NER, JavaScript, Python, Java] + - Platform: [e.g. .NET, JavaScript, Python, Java] - Environment: [e.g. nuget package, npm package, LUIS, console app, in browser] - Version of package [e.g. v1.0.8.1] diff --git a/.gitignore b/.gitignore index 67f7575e48..4583882703 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,10 @@ Python/.cache/v/cache/lastfailed /.vs /.vscode /JavaScript/*.lerna_backup -.DS_Store \ No newline at end of file +.DS_Store +/JavaScript/packages/**/*.tgz +.history +/Tools/src/ValidationTool/bin +/Tools/src/ValidationTool/obj +/Python/venv +/Tools/.vs/Microsoft.Recognizers.Text.Validation/v16 diff --git a/Java/README.md b/Java/README.md index 5d285ade6c..4e9ef56fb4 100644 --- a/Java/README.md +++ b/Java/README.md @@ -18,6 +18,61 @@ Open a terminal and run the following commands: cd Java mvn clean install +### Installation +Install Recognizer's by adding the following dependencies in your `pom.xml`: +- Get core Recognizer's features: + ````xml + + com.microsoft.recognizers.text + recognizers-text + 1.0-SNAPSHOT + + ```` +- Get numbers Recognizer's features: + ````xml + + com.microsoft.recognizers.text.number + recognizers-text-number + 1.0-SNAPSHOT + + ```` + +- Get numbers with units Recognizer's features: + ````xml + + com.microsoft.recognizers.text.numberwithunit + recognizers-text-number-with-unit + 1.0-SNAPSHOT + + ```` + +- Get datetime Recognizer's features: + ````xml + + com.microsoft.recognizers.text.datetime + recognizers-text-date-time + 1.0-SNAPSHOT + + ```` + +- Get sequence Recognizer's features: + ````xml + + com.microsoft.recognizers.text.sequence + recognizers-text-sequence + 1.0-SNAPSHOT + + ```` + +- Get choice Recognizer's features: + ````xml + + com.microsoft.recognizers.text.choice + recognizers-text-choice + 1.0-SNAPSHOT + + ```` + ## API Documentation Once the proper modules are installed, you'll need to import the modules: diff --git a/Java/build.ci.cmd b/Java/build.ci.cmd index 10d4f95345..9999dea607 100644 --- a/Java/build.ci.cmd +++ b/Java/build.ci.cmd @@ -10,10 +10,11 @@ DEL /S /Q libraries\recognizers-text-number\src\main\java\com\microsoft\recogniz DEL /S /Q libraries\recognizers-text-number-with-unit\src\main\java\com\microsoft\recognizers\text\numberwithunit\resources\*.java DEL /S /Q libraries\recognizers-text-date-time\src\main\java\com\microsoft\recognizers\text\datetime\resources\*.java DEL /S /Q libraries\recognizers-text-choice\src\main\java\com\microsoft\recognizers\text\choice\resources\*.java +DEL /S /Q libraries\recognizers-text-sequence\src\main\java\com\microsoft\recognizers\text\sequence\resources\*.java ECHO # Generate resources CALL set MAVEN_OPTS=-Dfile.encoding=utf-8 -CALL mvn compile exec:java -pl libraries/resource-generator/ +CALL mvn compile exec:java --batch-mode -pl libraries/resource-generator/ ECHO # Building Java platform CALL mvn clean package --batch-mode -Dmaven.test.skip=true @@ -23,4 +24,4 @@ IF %ERRORLEVEL% NEQ 0 ( EXIT /b %ERRORLEVEL% ) -ECHO ============================== JAVA BUILD END ============================== \ No newline at end of file +ECHO ============================== JAVA BUILD END ============================== diff --git a/Java/libraries/recognizers-text-choice/pom.xml b/Java/libraries/recognizers-text-choice/pom.xml index 7fdb022edb..25025a1bae 100644 --- a/Java/libraries/recognizers-text-choice/pom.xml +++ b/Java/libraries/recognizers-text-choice/pom.xml @@ -47,7 +47,7 @@ com.google.guava guava - 24.1-jre + 29.0-jre diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/ChoiceOptions.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/ChoiceOptions.java index 16470e7079..69cd666d2e 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/ChoiceOptions.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/ChoiceOptions.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice; public enum ChoiceOptions { diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/ChoiceRecognizer.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/ChoiceRecognizer.java index 782046b0f0..14d90fd297 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/ChoiceRecognizer.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/ChoiceRecognizer.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice; import com.microsoft.recognizers.text.Culture; diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/Constants.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/Constants.java index 3db27c8dc5..b225387f2a 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/Constants.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/Constants.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice; public class Constants { diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/config/BooleanParserConfiguration.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/config/BooleanParserConfiguration.java index e19e35402f..84e4cdaa2b 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/config/BooleanParserConfiguration.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/config/BooleanParserConfiguration.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice.config; import com.google.common.collect.ImmutableMap; diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/config/IChoiceParserConfiguration.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/config/IChoiceParserConfiguration.java index d56112251c..09435fb2d2 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/config/IChoiceParserConfiguration.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/config/IChoiceParserConfiguration.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice.config; import java.util.Map; diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/english/extractors/EnglishBooleanExtractorConfiguration.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/english/extractors/EnglishBooleanExtractorConfiguration.java index 8b2c4ab653..ff889cf238 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/english/extractors/EnglishBooleanExtractorConfiguration.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/english/extractors/EnglishBooleanExtractorConfiguration.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice.english.extractors; import com.microsoft.recognizers.text.choice.Constants; diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/BooleanExtractor.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/BooleanExtractor.java index 6f9b8036f7..38e4bfde30 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/BooleanExtractor.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/BooleanExtractor.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice.extractors; public class BooleanExtractor extends ChoiceExtractor { diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/ChoiceExtractDataResult.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/ChoiceExtractDataResult.java index 7fd89481a0..7ff985031d 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/ChoiceExtractDataResult.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/ChoiceExtractDataResult.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice.extractors; import com.microsoft.recognizers.text.ExtractResult; diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/ChoiceExtractor.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/ChoiceExtractor.java index 55f351bb99..ee2f621b00 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/ChoiceExtractor.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/ChoiceExtractor.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice.extractors; import com.microsoft.recognizers.text.ExtractResult; diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/IBooleanExtractorConfiguration.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/IBooleanExtractorConfiguration.java index fbcbe8e9a7..c05849b73c 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/IBooleanExtractorConfiguration.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/IBooleanExtractorConfiguration.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice.extractors; import java.util.regex.Pattern; diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/IChoiceExtractorConfiguration.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/IChoiceExtractorConfiguration.java index 609a3b0d44..fc5143cdf7 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/IChoiceExtractorConfiguration.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/extractors/IChoiceExtractorConfiguration.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice.extractors; import java.util.Map; diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/models/BooleanModel.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/models/BooleanModel.java index 9c696f4463..67df7f66fa 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/models/BooleanModel.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/models/BooleanModel.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice.models; import com.microsoft.recognizers.text.IExtractor; diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/models/ChoiceModel.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/models/ChoiceModel.java index 516bb121c6..558638d10b 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/models/ChoiceModel.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/models/ChoiceModel.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice.models; import com.microsoft.recognizers.text.ExtractResult; diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/BooleanParser.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/BooleanParser.java index f0ce823825..9cb1403246 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/BooleanParser.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/BooleanParser.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice.parsers; import com.microsoft.recognizers.text.choice.config.BooleanParserConfiguration; diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/ChoiceParser.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/ChoiceParser.java index 91316e36a6..b983cac493 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/ChoiceParser.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/ChoiceParser.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice.parsers; import com.microsoft.recognizers.text.ExtractResult; diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/OptionsOtherMatchParseResult.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/OptionsOtherMatchParseResult.java index dd66f24b85..53e1dce7b9 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/OptionsOtherMatchParseResult.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/OptionsOtherMatchParseResult.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice.parsers; public class OptionsOtherMatchParseResult { diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/OptionsParseDataResult.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/OptionsParseDataResult.java index bd08beaa80..c36fa74c3b 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/OptionsParseDataResult.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/parsers/OptionsParseDataResult.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice.parsers; import java.util.ArrayList; diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/ChineseChoice.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/ChineseChoice.java index 19fa19526f..e7cba0bb05 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/ChineseChoice.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/ChineseChoice.java @@ -17,7 +17,11 @@ public class ChineseChoice { public static final String TokenizerRegex = "[^\\u3040-\\u30ff\\u3400-\\u4dbf\\u4e00-\\u9fff\\uf900-\\ufaff\\uff66-\\uff9f]"; - public static final String TrueRegex = "(好[的啊呀嘞哇]|没问题|可以|中|好|同意|行|是的|是|对)|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)"; + public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)"; - public static final String FalseRegex = "(不行|不好|拒绝|否定|不中|不可以|不是的|不是|不对|不)|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)"; + public static final String TrueRegex = "(好[的啊呀嘞哇]|没问题|可以|中|好|同意|行|是的|是|对)|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?" + .replace("{SkinToneRegex}", SkinToneRegex); + + public static final String FalseRegex = "(不行|不好|拒绝|否定|不中|不可以|不是的|不是|不对|不)|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?" + .replace("{SkinToneRegex}", SkinToneRegex); } diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/EnglishChoice.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/EnglishChoice.java index 440e5a162a..82d06783b2 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/EnglishChoice.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/EnglishChoice.java @@ -17,7 +17,11 @@ public class EnglishChoice { public static final String TokenizerRegex = "[^\\w\\d]"; - public static final String TrueRegex = "\\b(true|yes|yep|yup|yeah|y|sure|ok|agree)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C|\\u0001f44c)"; + public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)"; - public static final String FalseRegex = "\\b(false|nope|nop|no|not\\s+ok|disagree)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90|\\u0001F44E|\\u0001F590)"; + public static final String TrueRegex = "\\b(true|yes|yep|yup|yeah|y|sure|ok|agree)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C|\\u0001f44c){SkinToneRegex}?" + .replace("{SkinToneRegex}", SkinToneRegex); + + public static final String FalseRegex = "\\b(false|nope|nop|no|not\\s+ok|disagree)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90|\\u0001F44E|\\u0001F590){SkinToneRegex}?" + .replace("{SkinToneRegex}", SkinToneRegex); } diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/FrenchChoice.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/FrenchChoice.java index 7dcff1c3a6..2f3e0104d9 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/FrenchChoice.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/FrenchChoice.java @@ -17,7 +17,11 @@ public class FrenchChoice { public static final String TokenizerRegex = "[^\\w\\d\\u00E0-\\u00FC]"; - public static final String TrueRegex = "\\b(s[uû]r|ouais|oui|yep|y|sure|approuver|accepter|consentir|d'accord|ça march[eé])\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)"; + public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)"; - public static final String FalseRegex = "\\b(faux|nan|non|pas\\s+d'accord|pas\\s+concorder|n'est\\s+pas\\s+(correct|ok)|pas)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)"; + public static final String TrueRegex = "\\b(s[uû]r|ouais|oui|yep|y|sure|approuver|accepter|consentir|d'accord|ça march[eé])\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?" + .replace("{SkinToneRegex}", SkinToneRegex); + + public static final String FalseRegex = "\\b(faux|nan|non|pas\\s+d'accord|pas\\s+concorder|n'est\\s+pas\\s+(correct|ok)|pas)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?" + .replace("{SkinToneRegex}", SkinToneRegex); } diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/PortugueseChoice.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/PortugueseChoice.java index 7c6b48ef01..8a33849f30 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/PortugueseChoice.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/PortugueseChoice.java @@ -17,7 +17,11 @@ public class PortugueseChoice { public static final String TokenizerRegex = "[^\\w\\d\\u00E0-\\u00FC]"; - public static final String TrueRegex = "\\b(verdade|verdadeir[oa]|sim|isso|claro|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)"; + public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)"; - public static final String FalseRegex = "\\b(falso|n[aã]o|incorreto|nada disso)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)"; + public static final String TrueRegex = "\\b(verdade|verdadeir[oa]|sim|isso|claro|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?" + .replace("{SkinToneRegex}", SkinToneRegex); + + public static final String FalseRegex = "\\b(falso|n[aã]o|incorreto|nada disso)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?" + .replace("{SkinToneRegex}", SkinToneRegex); } diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/SpanishChoice.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/SpanishChoice.java index d3cd40413b..2752bcb4ac 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/SpanishChoice.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/SpanishChoice.java @@ -17,7 +17,11 @@ public class SpanishChoice { public static final String TokenizerRegex = "[^\\w\\d\\u00E0-\\u00FC]"; - public static final String TrueRegex = "\\b(verdad|verdadero|sí|sip|s|si|cierto|por supuesto|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)"; + public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)"; - public static final String FalseRegex = "\\b(falso|no|nop|n|no)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)"; + public static final String TrueRegex = "\\b(verdad|verdadero|sí|sip|s|si|cierto|por supuesto|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?" + .replace("{SkinToneRegex}", SkinToneRegex); + + public static final String FalseRegex = "\\b(falso|no|nop|n|no)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?" + .replace("{SkinToneRegex}", SkinToneRegex); } diff --git a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/utilities/UnicodeUtils.java b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/utilities/UnicodeUtils.java index fe7a281a47..03f0acca69 100644 --- a/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/utilities/UnicodeUtils.java +++ b/Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/utilities/UnicodeUtils.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.microsoft.recognizers.text.choice.utilities; import java.lang.Character; diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/pom.xml b/Java/libraries/recognizers-text-datatypes-timex-expression/pom.xml new file mode 100644 index 0000000000..69239c0edf --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/pom.xml @@ -0,0 +1,78 @@ + + + 4.0.0 + + com.microsoft.recognizers.text.datatypes.timex.expression + recognizers-text-datatypes-timex-expression + 1.0-SNAPSHOT + jar + + ${project.groupId}:${project.artifactId} + Microsoft.Recognizers.Text.DataTypes.TimexExpression provides parsing and evaluation of TIMEX expressions. + https://github.com/Microsoft/Recognizers-Text/ + + + com.microsoft.recognizers.text + recognizers-text-java + 1.0-SNAPSHOT + ../../ + + + + + MIT License + http://www.opensource.org/licenses/mit-license.php + + + + + + Microsoft Recognizers-Text + + Microsoft + https://www.microsoft.com/ + + + + UTF-8 + + + + junit + junit + 4.13.1 + test + + + org.apache.commons + commons-lang3 + 3.7 + + + com.microsoft.recognizers.text + recognizers-text + 1.0-SNAPSHOT + + + com.google.guava + guava + 29.0-jre + compile + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.7.0 + + 1.8 + 1.8 + + + + + \ No newline at end of file diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/Constants.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/Constants.java new file mode 100644 index 0000000000..b1e1d9ee40 --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/Constants.java @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +public class Constants { + + // Timex + public static final String TIMEX_YEAR = "Y"; + public static final String TIMEX_MONTH = "M"; + public static final String TIMEX_MONTH_FULL = "MON"; + public static final String TIMEX_WEEK = "W"; + public static final String TIMEX_DAY = "D"; + public static final String TIMEX_BUSINESS_DAY = "BD"; + public static final String TIMEX_WEEKEND = "WE"; + public static final String TIMEX_HOUR = "H"; + public static final String TIMEX_MINUTE = "M"; + public static final String TIMEX_SECOND = "S"; + public static final String TIMEX_NIGHT = "NI"; + public static final Character TIMEX_FUZZY = 'X'; + public static final String TIMEX_FUZZY_YEAR = "XXXX"; + public static final String TIMEX_FUZZY_MONTH = "XX"; + public static final String TIMEX_FUZZY_WEEK = "WXX"; + public static final String TIMEX_FUZZY_DAY = "XX"; + public static final String DATE_TIMEX_CONNECTOR = "-"; + public static final String TIME_TIMEX_CONNECTOR = ":"; + public static final String GENERAL_PERIOD_PREFIX = "P"; + public static final String TIME_TIMEX_PREFIX = "T"; + + public static final String YEAR_UNIT = "year"; + public static final String MONTH_UNIT = "month"; + public static final String WEEK_UNIT = "week"; + public static final String DAY_UNIT = "day"; + public static final String HOUR_UNIT = "hour"; + public static final String MINUTE_UNIT = "minute"; + public static final String SECOND_UNIT = "second"; + public static final String TIME_DURATION_UNIT = "s"; + + public static final String AM = "AM"; + public static final String PM = "PM"; + + public static final int INVALID_VALUE = -1; + + public static class TimexTypes { + public static final String PRESENT = "present"; + public static final String DEFINITE = "definite"; + public static final String DATE = "date"; + public static final String DATE_TIME = "datetime"; + public static final String DATE_RANGE = "daterange"; + public static final String DURATION = "duration"; + public static final String TIME = "time"; + public static final String TIME_RANGE = "timerange"; + public static final String DATE_TIME_RANGE = "datetimerange"; + } +} diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/DateRange.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/DateRange.java new file mode 100644 index 0000000000..6a2ecb3627 --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/DateRange.java @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +import java.time.LocalDateTime; + +public class DateRange { + private LocalDateTime start; + private LocalDateTime end; + + public LocalDateTime getStart() { + return start; + } + + public void setStart(LocalDateTime withStart) { + this.start = withStart; + } + + public LocalDateTime getEnd() { + return end; + } + + public void setEnd(LocalDateTime withEnd) { + this.end = withEnd; + } +} diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/Resolution.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/Resolution.java new file mode 100644 index 0000000000..1e7e23cc70 --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/Resolution.java @@ -0,0 +1,71 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +import java.util.ArrayList; +import java.util.List; + +public class Resolution { + private List values; + + public List getValues() { + return this.values; + } + + public Resolution() { + this.values = new ArrayList(); + } + + public static class Entry { + private String timex; + + private String type; + + private String value; + + private String start; + + private String end; + + public String getTimex() { + return timex; + } + + public void setTimex(String withTimex) { + this.timex = withTimex; + } + + public String getType() { + return type; + } + + public void setType(String withType) { + this.type = withType; + } + + public String getValue() { + return value; + } + + public void setValue(String withValue) { + this.value = withValue; + } + + public String getStart() { + return start; + } + + public void setStart(String withStart) { + this.start = withStart; + } + + public String getEnd() { + return end; + } + + public void setEnd(String withEnd) { + this.end = withEnd; + } + } +} diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/Time.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/Time.java new file mode 100644 index 0000000000..a0f178deb9 --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/Time.java @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +public class Time { + private Integer hour; + + private Integer minute; + + private Integer second; + + public Time(Integer withSeconds) { + this.hour = (int)Math.floor(withSeconds / 3600000d); + this.minute = (int)Math.floor((withSeconds - (this.hour * 3600000)) / 60000d); + this.second = (withSeconds - (this.hour * 3600000) - (this.minute * 60000)) / 1000; + } + + public Time(Integer withHour, Integer withMinute, Integer withSecond) { + this.hour = withHour; + this.minute = withMinute; + this.second = withSecond; + } + + public Integer getTime() { + return (this.second * 1000) + (this.minute * 60000) + (this.hour * 3600000); + } + + public Integer getHour() { + return hour; + } + + public void setHour(Integer withHour) { + this.hour = withHour; + } + + public Integer getMinute() { + return minute; + } + + public void setMinute(Integer withMinute) { + this.minute = withMinute; + } + + public Integer getSecond() { + return second; + } + + public void setSecond(Integer withSecond) { + this.second = withSecond; + } +} diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimeRange.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimeRange.java new file mode 100644 index 0000000000..541bff588f --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimeRange.java @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +public class TimeRange { + private Time start; + + private Time end; + + public Time getStart() { + return start; + } + + public void setStart(Time withStart) { + this.start = withStart; + } + + public Time getEnd() { + return end; + } + + public void setEnd(Time withEnd) { + this.end = withEnd; + } +} diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexConstraintsHelper.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexConstraintsHelper.java new file mode 100644 index 0000000000..76d1953a66 --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexConstraintsHelper.java @@ -0,0 +1,102 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +import java.util.List; + +public class TimexConstraintsHelper { + public static List collapseTimeRanges(List ranges) { + List r = ranges; + + while (TimexConstraintsHelper.innerCollapseTimeRanges(r)) { + + } + + r.sort((a, b) -> a.getStart().getTime() - b.getStart().getTime()); + + return r; + } + + public static List collapseDateRanges(List ranges) { + List r = ranges; + + while (TimexConstraintsHelper.innerCollapseDateRanges(r)) { + + } + + r.sort((a, b) -> a.getStart().compareTo(b.getStart())); + return r; + } + + public static Boolean isOverlapping(TimeRange r1, TimeRange r2) { + return (r1.getEnd().getTime() > r2.getStart().getTime() && r1.getStart().getTime() <= r2.getStart().getTime()) || + (r1.getStart().getTime() < r2.getEnd().getTime() && + r1.getStart().getTime() >= r2.getStart().getTime()); + } + + private static Boolean isOverlapping(DateRange r1, DateRange r2) { + return (r1.getEnd().isAfter(r2.getStart()) && (r1.getStart().isBefore(r2.getStart()) || r1.getStart().isEqual(r2.getStart()))) || + (r1.getStart().isBefore(r2.getEnd()) && (r1.getStart().isAfter(r2.getStart()) || r1.getStart().isEqual(r2.getStart()))); + } + + private static TimeRange collapseOverlapping(TimeRange r1, TimeRange r2) { + return new TimeRange() { + { + setStart(new Time(Math.max(r1.getStart().getTime(), r2.getStart().getTime()))); + setEnd(new Time(Math.min(r1.getEnd().getTime(), r2.getEnd().getTime()))); + } + }; + } + + private static DateRange collapseOverlapping(DateRange r1, DateRange r2) { + return new DateRange() { + { + setStart(r1.getStart().compareTo(r2.getStart()) > 0 ? r1.getStart() : r2.getStart()); + setEnd(r1.getEnd().compareTo(r2.getEnd()) < 0 ? r1.getEnd() : r2.getEnd()); + } + }; + } + + private static Boolean innerCollapseTimeRanges(List ranges) { + if (ranges.size() == 1) { + return false; + } + + for (int i = 0; i < ranges.size(); i++) { + TimeRange r1 = ranges.get(i); + for (int j = i + 1; j < ranges.size(); j++) { + TimeRange r2 = ranges.get(j); + if (TimexConstraintsHelper.isOverlapping(r1, r2)) { + ranges.subList(i, 1).clear(); + ranges.subList(j - 1, 1).clear(); + ranges.add(TimexConstraintsHelper.collapseOverlapping(r1, r2)); + return true; + } + } + } + + return false; + } + + private static Boolean innerCollapseDateRanges(List ranges) { + if (ranges.size() == 1) { + return false; + } + + for (int i = 0; i < ranges.size(); i++) { + DateRange r1 = ranges.get(i); + for (int j = i + 1; j < ranges.size(); j++) { + DateRange r2 = ranges.get(j); + if (TimexConstraintsHelper.isOverlapping(r1, r2)) { + ranges.subList(i, 1).clear(); + ranges.subList(j - 1, 1).clear(); + ranges.add(TimexConstraintsHelper.collapseOverlapping(r1, r2)); + return true; + } + } + } + + return false; + } +} diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexConvert.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexConvert.java new file mode 100644 index 0000000000..9b78441771 --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexConvert.java @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +import com.microsoft.recognizers.datatypes.timex.expression.english.TimexConvertEnglish; + +public class TimexConvert { + public static String convertTimexToString(TimexProperty timex) { + return TimexConvertEnglish.convertTimexToString(timex); + } + + public static String convertTimexSetToString(TimexSet timexSet) { + return TimexConvertEnglish.convertTimexSetToString(timexSet); + } +} diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexCreator.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexCreator.java new file mode 100644 index 0000000000..94920fdfd4 --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexCreator.java @@ -0,0 +1,88 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +import java.math.BigDecimal; +import java.time.DayOfWeek; +import java.time.LocalDateTime; + +public class TimexCreator { + // The following constants are consistent with the Recognizer results + public static final String MONDAY = "XXXX-WXX-1"; + public static final String TUESDAY = "XXXX-WXX-2"; + public static final String WEDNESDAY = "XXXX-WXX-3"; + public static final String THURSDAY = "XXXX-WXX-4"; + public static final String FRIDAY = "XXXX-WXX-5"; + public static final String SATURDAY = "XXXX-WXX-6"; + public static final String SUNDAY = "XXXX-WXX-7"; + public static final String MORNING = "(T08,T12,PT4H)"; + public static final String AFTERNOON = "(T12,T16,PT4H)"; + public static final String EVENING = "(T16,T20,PT4H)"; + public static final String DAYTIME = "(T08,T18,PT10H)"; + public static final String NIGHT = "(T20,T24,PT10H)"; + + public static String today(LocalDateTime date) { + return TimexProperty.fromDate(date == null ? LocalDateTime.now() : date).getTimexValue(); + } + + public static String tomorrow(LocalDateTime date) { + LocalDateTime d = (date == null) ? LocalDateTime.now() : date; + d = d.plusDays(1); + return TimexProperty.fromDate(d).getTimexValue(); + } + + public static String yesterday(LocalDateTime date) { + LocalDateTime d = (date == null) ? LocalDateTime.now() : date; + d = d.plusDays(-1); + return TimexProperty.fromDate(d).getTimexValue(); + } + + public static String weekFromToday(LocalDateTime date) { + LocalDateTime d = (date == null) ? LocalDateTime.now() : date; + TimexProperty t = TimexProperty.fromDate(d); + t.setDays(new BigDecimal(7)); + return t.getTimexValue(); + } + + public static String weekBackFromToday(LocalDateTime date) { + LocalDateTime d = (date == null) ? LocalDateTime.now() : date; + d = d.plusDays(-7); + TimexProperty t = TimexProperty.fromDate(d); + t.setDays(new BigDecimal(7)); + return t.getTimexValue(); + } + + public static String thisWeek(LocalDateTime date) { + LocalDateTime d = (date == null) ? LocalDateTime.now() : date; + d = d.plusDays(-7); + LocalDateTime start = TimexDateHelpers.dateOfNextDay(DayOfWeek.MONDAY, d); + TimexProperty t = TimexProperty.fromDate(start); + t.setDays(new BigDecimal(7)); + return t.getTimexValue(); + } + + public static String nextWeek(LocalDateTime date) { + LocalDateTime d = (date == null) ? LocalDateTime.now() : date; + LocalDateTime start = TimexDateHelpers.dateOfNextDay(DayOfWeek.MONDAY, d); + TimexProperty t = TimexProperty.fromDate(start); + t.setDays(new BigDecimal(7)); + return t.getTimexValue(); + } + + public static String lastWeek(LocalDateTime date) { + LocalDateTime d = (date == null) ? LocalDateTime.now() : date; + LocalDateTime start = TimexDateHelpers.dateOfLastDay(DayOfWeek.MONDAY, d); + start = start.plusDays(-7); + TimexProperty t = TimexProperty.fromDate(start); + t.setDays(new BigDecimal(7)); + return t.getTimexValue(); + } + + public static String nextWeeksFromToday(Integer n, LocalDateTime date) { + LocalDateTime d = (date == null) ? LocalDateTime.now() : date; + TimexProperty t = TimexProperty.fromDate(d); + t.setDays(new BigDecimal(n * 7)); + return t.getTimexValue(); + } +} diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexDateHelpers.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexDateHelpers.java new file mode 100644 index 0000000000..cdd5707fba --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexDateHelpers.java @@ -0,0 +1,126 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +import java.time.DayOfWeek; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.List; + +public class TimexDateHelpers { + public static LocalDateTime tomorrow(LocalDateTime date) { + date = date.plusDays(1); + return date; + } + + public static LocalDateTime yesterday(LocalDateTime date) { + date = date.plusDays(-1); + return date; + } + + public static Boolean datePartEquals(LocalDateTime dateX, LocalDateTime dateY) { + return (dateX.getYear() == dateY.getYear()) && + (dateX.getMonthValue() == dateY.getMonthValue()) && + (dateX.getDayOfMonth() == dateY.getDayOfMonth()); + } + + public static boolean isDateInWeek(LocalDateTime date, LocalDateTime startOfWeek) { + LocalDateTime d = startOfWeek; + for (int i = 0; i < 7; i++) { + if (TimexDateHelpers.datePartEquals(date, d)) { + return true; + } + + d = d.plusDays(1); + } + + return false; + } + + public static Boolean isThisWeek(LocalDateTime date, LocalDateTime referenceDate) { + // Note ISO 8601 week starts on a Monday + LocalDateTime startOfWeek = referenceDate; + while (TimexDateHelpers.getUSDayOfWeek(startOfWeek.getDayOfWeek()) > TimexDateHelpers.getUSDayOfWeek(DayOfWeek.MONDAY)) { + startOfWeek = startOfWeek.plusDays(-1); + } + + return TimexDateHelpers.isDateInWeek(date, startOfWeek); + } + + public static Boolean isNextWeek(LocalDateTime date, LocalDateTime referenceDate) { + LocalDateTime nextWeekDate = referenceDate; + nextWeekDate = nextWeekDate.plusDays(7); + return TimexDateHelpers.isThisWeek(date, nextWeekDate); + } + + public static Boolean isLastWeek(LocalDateTime date, LocalDateTime referenceDate) { + LocalDateTime nextWeekDate = referenceDate; + nextWeekDate = nextWeekDate.plusDays(-7); + return TimexDateHelpers.isThisWeek(date, nextWeekDate); + } + + public static Integer weekOfYear(LocalDateTime date) { + LocalDateTime ds = LocalDateTime.of(date.getYear(), 1, 1, 0, 0); + LocalDateTime de = LocalDateTime.of(date.getYear(), date.getMonthValue(), date.getDayOfMonth(), 0, 0); + Integer weeks = 1; + + while (ds.compareTo(de) < 0) { + Integer dayOfWeek = TimexDateHelpers.getUSDayOfWeek(ds.getDayOfWeek()); + + Integer isoDayOfWeek = (dayOfWeek == 0) ? 7 : dayOfWeek; + if (isoDayOfWeek == 7) { + weeks++; + } + + ds = ds.plusDays(1); + } + + return weeks; + } + + public static String fixedFormatNumber(Integer n, Integer size) { + return String.format("%1$" + size + "s", n.toString()).replace(' ', '0'); + } + + public static LocalDateTime dateOfLastDay(DayOfWeek day, LocalDateTime referenceDate) { + LocalDateTime result = referenceDate; + result = result.plusDays(-1); + + while (result.getDayOfWeek() != day) { + result = result.plusDays(-1); + } + + return result; + } + + public static LocalDateTime dateOfNextDay(DayOfWeek day, LocalDateTime referenceDate) { + LocalDateTime result = referenceDate; + result = result.plusDays(1); + + while (result.getDayOfWeek() != day) { + result = result.plusDays(1); + } + + return result; + } + + public static List datesMatchingDay(DayOfWeek day, LocalDateTime start, LocalDateTime end) { + List result = new ArrayList(); + LocalDateTime d = start; + + while (!TimexDateHelpers.datePartEquals(d, end)) { + if (d.getDayOfWeek() == day) { + result.add(d); + } + + d = d.plusDays(1); + } + + return result; + } + + public static Integer getUSDayOfWeek(DayOfWeek dayOfWeek) { + return dayOfWeek.getValue() % 7; + } +} diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexFormat.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexFormat.java new file mode 100644 index 0000000000..c7f8d2eba7 --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexFormat.java @@ -0,0 +1,195 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +import java.math.BigDecimal; +import java.text.NumberFormat; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; + +public class TimexFormat { + public static String format(TimexProperty timex) { + HashSet types = timex.getTypes().size() != 0 ? timex.getTypes() : TimexInference.infer(timex); + + if (types.contains(Constants.TimexTypes.PRESENT)) { + return "PRESENT_REF"; + } + + if ((types.contains(Constants.TimexTypes.DATE_TIME_RANGE) || types.contains(Constants.TimexTypes.DATE_RANGE) || + types.contains(Constants.TimexTypes.TIME_RANGE)) && types.contains(Constants.TimexTypes.DURATION)) { + TimexRange range = TimexHelpers.expandDateTimeRange(timex); + return String.format("(%1$s,%2$s,%3$s)", TimexFormat.format(range.getStart()), + TimexFormat.format(range.getEnd()), TimexFormat.format(range.getDuration())); + } + + if (types.contains(Constants.TimexTypes.DATE_TIME_RANGE)) { + return String.format("%1$s%2$s", TimexFormat.formatDate(timex), TimexFormat.formatTimeRange(timex)); + } + + if (types.contains(Constants.TimexTypes.DATE_RANGE)) { + return TimexFormat.formatDateRange(timex); + } + + if (types.contains(Constants.TimexTypes.TIME_RANGE)) { + return TimexFormat.formatTimeRange(timex); + } + + if (types.contains(Constants.TimexTypes.DATE_TIME)) { + return String.format("%1$s%2$s", TimexFormat.formatDate(timex), TimexFormat.formatTime(timex)); + } + + if (types.contains(Constants.TimexTypes.DURATION)) { + return TimexFormat.formatDuration(timex); + } + + if (types.contains(Constants.TimexTypes.DATE)) { + return TimexFormat.formatDate(timex); + } + + if (types.contains(Constants.TimexTypes.TIME)) { + return TimexFormat.formatTime(timex); + } + + return new String(); + } + + private static String formatDuration(TimexProperty timex) { + List timexList = new ArrayList(); + NumberFormat nf = NumberFormat.getInstance(Locale.getDefault()); + + if (timex.getYears() != null) { + nf.setMaximumFractionDigits(timex.getYears().scale()); + timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Year, + timex.getYears() != null ? timex.getYears() : BigDecimal.valueOf(Constants.INVALID_VALUE))); + } + + if (timex.getMonths() != null) { + nf.setMaximumFractionDigits(timex.getMonths().scale()); + timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Month, + timex.getMonths() != null ? timex.getMonths() : BigDecimal.valueOf(Constants.INVALID_VALUE))); + } + + if (timex.getWeeks() != null) { + nf.setMaximumFractionDigits(timex.getWeeks().scale()); + timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Week, + timex.getWeeks() != null ? timex.getWeeks() : BigDecimal.valueOf(Constants.INVALID_VALUE))); + } + + if (timex.getDays() != null) { + nf.setMaximumFractionDigits(timex.getDays().scale()); + timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Day, + timex.getDays() != null ? timex.getDays() : BigDecimal.valueOf(Constants.INVALID_VALUE))); + } + + if (timex.getHours() != null) { + nf.setMaximumFractionDigits(timex.getHours().scale()); + timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Hour, + timex.getHours() != null ? timex.getHours() : BigDecimal.valueOf(Constants.INVALID_VALUE))); + } + + if (timex.getMinutes() != null) { + nf.setMaximumFractionDigits(timex.getMinutes().scale()); + timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Minute, + timex.getMinutes() != null ? timex.getMinutes() : BigDecimal.valueOf(Constants.INVALID_VALUE))); + } + + if (timex.getSeconds() != null) { + nf.setMaximumFractionDigits(timex.getSeconds().scale()); + timexList.add(TimexHelpers.generateDurationTimex(TimexUnit.Second, + timex.getSeconds() != null ? timex.getSeconds() : BigDecimal.valueOf(Constants.INVALID_VALUE))); + } + + return TimexHelpers.generateCompoundDurationTimex(timexList); + } + + private static String formatTime(TimexProperty timex) { + if (timex.getMinute() == 0 && timex.getSecond() == 0) { + return String.format("T%s", TimexDateHelpers.fixedFormatNumber(timex.getHour(), 2)); + } + + if (timex.getSecond() == 0) { + return String.format("T%1$s:%2$s", TimexDateHelpers.fixedFormatNumber(timex.getHour(), 2), + TimexDateHelpers.fixedFormatNumber(timex.getMinute(), 2)); + } + + return String.format("T%1$s:%2$s:%3$s", TimexDateHelpers.fixedFormatNumber(timex.getHour(), 2), + TimexDateHelpers.fixedFormatNumber(timex.getMinute(), 2), + TimexDateHelpers.fixedFormatNumber(timex.getSecond(), 2)); + } + + private static String formatDate(TimexProperty timex) { + Integer year = timex.getYear() != null ? timex.getYear() : Constants.INVALID_VALUE; + Integer month = timex.getWeekOfYear() != null ? timex.getWeekOfYear() + : (timex.getMonth() != null ? timex.getMonth() : Constants.INVALID_VALUE); + Integer day = timex.getDayOfWeek() != null ? timex.getDayOfWeek() + : timex.getDayOfMonth() != null ? timex.getDayOfMonth() : Constants.INVALID_VALUE; + Integer weekOfMonth = timex.getWeekOfMonth() != null ? timex.getWeekOfMonth() : Constants.INVALID_VALUE; + + return TimexHelpers.generateDateTimex(year, month, day, weekOfMonth, timex.getDayOfWeek() != null); + } + + private static String formatDateRange(TimexProperty timex) { + if (timex.getYear() != null && timex.getWeekOfYear() != null && timex.getWeekend() != null) { + return String.format("%1$s-W%2$s-WE", TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4), + TimexDateHelpers.fixedFormatNumber(timex.getWeekOfYear(), 2)); + } + + if (timex.getYear() != null && timex.getWeekOfYear() != null) { + return String.format("%1$s-W%2$s", TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4), + TimexDateHelpers.fixedFormatNumber(timex.getWeekOfYear(), 2)); + } + + if (timex.getYear() != null && timex.getMonth() != null && timex.getWeekOfMonth() != null) { + return String.format("%1$s-%2$s-W%3$s", TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4), + TimexDateHelpers.fixedFormatNumber(timex.getMonth(), 2), + TimexDateHelpers.fixedFormatNumber(timex.getWeekOfMonth(), 2)); + } + + if (timex.getYear() != null && timex.getSeason() != null) { + return String.format("%1$s-%2$s", TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4), + timex.getSeason()); + } + + if (timex.getSeason() != null) { + return timex.getSeason(); + } + + if (timex.getYear() != null && timex.getMonth() != null) { + return String.format("%1$s-%2$s", TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4), + TimexDateHelpers.fixedFormatNumber(timex.getMonth(), 2)); + } + + if (timex.getYear() != null) { + return TimexDateHelpers.fixedFormatNumber(timex.getYear(), 4); + } + + if (timex.getMonth() != null && timex.getWeekOfMonth() != null && timex.getDayOfWeek() != null) { + return String.format("%1$s-%2$s-%3$s-%4$s-%5$s", Constants.TIMEX_FUZZY_YEAR, + TimexDateHelpers.fixedFormatNumber(timex.getMonth(), 2), Constants.TIMEX_FUZZY_WEEK, + timex.getWeekOfMonth(), timex.getDayOfWeek()); + } + + if (timex.getMonth() != null && timex.getWeekOfMonth() != null) { + return String.format("%1$s-%2$s-W%3$02d", Constants.TIMEX_FUZZY_YEAR, + TimexDateHelpers.fixedFormatNumber(timex.getMonth(), 2), timex.getWeekOfMonth()); + } + + if (timex.getMonth() != null) { + return String.format("%1$s-%2$s", Constants.TIMEX_FUZZY_YEAR, + TimexDateHelpers.fixedFormatNumber(timex.getMonth(), 2)); + } + + return new String(); + } + + private static String formatTimeRange(TimexProperty timex) { + if (timex.getPartOfDay() != null) { + return String.format("T%s", timex.getPartOfDay()); + } + + return new String(); + } +} diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexHelpers.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexHelpers.java new file mode 100644 index 0000000000..322c696deb --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexHelpers.java @@ -0,0 +1,515 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +import java.math.BigDecimal; +import java.time.DayOfWeek; +import java.time.LocalDateTime; +import java.time.temporal.TemporalField; +import java.time.temporal.WeekFields; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; + +import org.apache.commons.lang3.tuple.Pair; + +public class TimexHelpers { + public static final HashMap TIMEX_UNIT_TO_STRING_MAP = new HashMap() { + { + put(TimexUnit.Year, Constants.TIMEX_YEAR); + put(TimexUnit.Month, Constants.TIMEX_MONTH); + put(TimexUnit.Week, Constants.TIMEX_WEEK); + put(TimexUnit.Day, Constants.TIMEX_DAY); + put(TimexUnit.Hour, Constants.TIMEX_HOUR); + put(TimexUnit.Minute, Constants.TIMEX_MINUTE); + put(TimexUnit.Second, Constants.TIMEX_SECOND); + } + }; + + public static final List TimeTimexUnitList = Arrays.asList(TimexUnit.Hour, TimexUnit.Minute, + TimexUnit.Second); + + public static TimexRange expandDateTimeRange(TimexProperty timex) { + HashSet types = timex.getTypes().size() != 0 ? timex.getTypes() : TimexInference.infer(timex); + + if (types.contains(Constants.TimexTypes.DURATION)) { + TimexProperty start = TimexHelpers.cloneDateTime(timex); + TimexProperty duration = TimexHelpers.cloneDuration(timex); + return new TimexRange() { + { + setStart(start); + setEnd(TimexHelpers.timexDateTimeAdd(start, duration)); + setDuration(duration); + } + }; + } else { + if (timex.getYear() != null) { + Pair dateRange; + if (timex.getMonth() != null && timex.getWeekOfMonth() != null) { + dateRange = TimexHelpers.monthWeekDateRange(timex.getYear(), timex.getMonth(), + timex.getWeekOfMonth()); + } else if (timex.getMonth() != null) { + dateRange = TimexHelpers.monthDateRange(timex.getYear(), timex.getMonth()); + } else if (timex.getWeekOfYear() != null) { + dateRange = TimexHelpers.yearWeekDateRange(timex.getYear(), timex.getWeekOfYear(), + timex.getWeekend()); + } else { + dateRange = TimexHelpers.yearDateRange(timex.getYear()); + } + return new TimexRange() { + { + setStart(dateRange.getLeft()); + setEnd(dateRange.getRight()); + } + }; + } + } + + return new TimexRange() { + { + setStart(new TimexProperty()); + setEnd(new TimexProperty()); + } + }; + } + + public static TimexRange expandTimeRange(TimexProperty timex) { + if (!timex.getTypes().contains(Constants.TimexTypes.TIME_RANGE)) { + throw new IllegalArgumentException("argument must be a timerange: timex"); + } + + if (timex.getPartOfDay() != null) { + switch (timex.getPartOfDay()) { + case "DT": + timex = new TimexProperty(TimexCreator.DAYTIME); + break; + case "MO": + timex = new TimexProperty(TimexCreator.MORNING); + break; + case "AF": + timex = new TimexProperty(TimexCreator.AFTERNOON); + break; + case "EV": + timex = new TimexProperty(TimexCreator.EVENING); + break; + case "NI": + timex = new TimexProperty(TimexCreator.NIGHT); + break; + default: + throw new IllegalArgumentException("unrecognized part of day timerange: timex"); + } + } + + Integer hour = timex.getHour(); + Integer minute = timex.getMinute(); + Integer second = timex.getSecond(); + TimexProperty start = new TimexProperty() { + { + setHour(hour); + setMinute(minute); + setSecond(second); + } + }; + TimexProperty duration = TimexHelpers.cloneDuration(timex); + + return new TimexRange() { + { + setStart(start); + setEnd(TimexHelpers.timeAdd(start, duration)); + setDuration(duration); + } + }; + } + + public static TimexProperty timexDateAdd(TimexProperty start, TimexProperty duration) { + if (start.getDayOfWeek() != null) { + TimexProperty end = start.clone(); + if (duration.getDays() != null) { + Integer newDayOfWeek = end.getDayOfWeek() + (int)Math.round(duration.getDays().doubleValue()); + end.setDayOfWeek(newDayOfWeek); + } + + return end; + } + + if (start.getMonth() != null && start.getDayOfMonth() != null) { + Double durationDays = null; + if (duration.getDays() != null) { + durationDays = duration.getDays().doubleValue(); + } + + if (durationDays == null && duration.getWeeks() != null) { + durationDays = 7 * duration.getWeeks().doubleValue(); + } + + if (durationDays != null) { + if (start.getYear() != null) { + LocalDateTime d = LocalDateTime.of(start.getYear(), start.getMonth(), start.getDayOfMonth(), 0, 0, + 0); + LocalDateTime d2 = d.plusDays(durationDays.longValue()); + return new TimexProperty() { + { + setYear(d2.getYear()); + setMonth(d2.getMonthValue()); + setDayOfMonth(d2.getDayOfMonth()); + } + }; + } else { + LocalDateTime d = LocalDateTime.of(2001, start.getMonth(), start.getDayOfMonth(), 0, 0, 0); + LocalDateTime d2 = d.plusDays(durationDays.longValue()); + return new TimexProperty() { + { + setMonth(d2.getMonthValue()); + setDayOfMonth(d2.getDayOfMonth()); + } + }; + } + } + + if (duration.getYears() != null) { + if (start.getYear() != null) { + return new TimexProperty() { + { + setYear(start.getYear() + (int)Math.round(duration.getYears().doubleValue())); + setMonth(start.getMonth()); + setDayOfMonth(start.getDayOfMonth()); + } + }; + } + } + + if (duration.getMonths() != null) { + if (start.getMonth() != null) { + return new TimexProperty() { + { + setYear(start.getYear()); + setMonth(start.getMonth() + (int)Math.round(duration.getMonths().doubleValue())); + setDayOfMonth(start.getDayOfMonth()); + } + }; + } + } + } + + return start; + } + + public static String generateCompoundDurationTimex(List timexList) { + Boolean isTimeDurationAlreadyExist = false; + StringBuilder timexBuilder = new StringBuilder(Constants.GENERAL_PERIOD_PREFIX); + + for (String timexComponent : timexList) { + // The Time Duration component occurs first time + if (!isTimeDurationAlreadyExist && isTimeDurationTimex(timexComponent)) { + timexBuilder.append(Constants.TIME_TIMEX_PREFIX.concat(getDurationTimexWithoutPrefix(timexComponent))); + isTimeDurationAlreadyExist = true; + } else { + timexBuilder.append(getDurationTimexWithoutPrefix(timexComponent)); + } + } + + return timexBuilder.toString(); + } + + public static String generateDateTimex(Integer year, Integer monthOrWeekOfYear, Integer day, Integer weekOfMonth, + boolean byWeek) { + String yearString = year == Constants.INVALID_VALUE ? Constants.TIMEX_FUZZY_YEAR + : TimexDateHelpers.fixedFormatNumber(year, 4); + String monthWeekString = monthOrWeekOfYear == Constants.INVALID_VALUE ? Constants.TIMEX_FUZZY_MONTH + : TimexDateHelpers.fixedFormatNumber(monthOrWeekOfYear, 2); + String dayString; + if (byWeek) { + dayString = day.toString(); + if (weekOfMonth != Constants.INVALID_VALUE) { + monthWeekString = monthWeekString + String.format("-%s-", Constants.TIMEX_FUZZY_WEEK) + + weekOfMonth.toString(); + } else { + monthWeekString = Constants.TIMEX_WEEK + monthWeekString; + } + } else { + dayString = day == Constants.INVALID_VALUE ? Constants.TIMEX_FUZZY_DAY + : TimexDateHelpers.fixedFormatNumber(day, 2); + } + + return String.join("-", yearString, monthWeekString, dayString); + } + + public static String generateDurationTimex(TimexUnit unit, BigDecimal value) { + if (value.intValue() == Constants.INVALID_VALUE) { + return new String(); + } + + StringBuilder timexBuilder = new StringBuilder(Constants.GENERAL_PERIOD_PREFIX); + if (TimeTimexUnitList.contains(unit)) { + timexBuilder.append(Constants.TIME_TIMEX_PREFIX); + } + + timexBuilder.append(value.toString()); + timexBuilder.append(TIMEX_UNIT_TO_STRING_MAP.get(unit)); + return timexBuilder.toString(); + } + + public static TimexProperty timexTimeAdd(TimexProperty start, TimexProperty duration) { + + TimexProperty result = start.clone(); + if (duration.getMinutes() != null) { + result.setMinute(result.getMinute() + (int)Math.round(duration.getMinutes().doubleValue())); + + if (result.getMinute() > 59) { + result.setHour(((result.getHour() != null) ? result.getHour() : 0) + 1); + result.setMinute(result.getMinute() % 60); + } + } + + if (duration.getHours() != null) { + result.setHour(result.getHour() + (int)Math.round(duration.getHours().doubleValue())); + } + + if (result.getHour() != null && result.getHour() > 23) { + Double days = Math.floor(result.getHour() / 24d); + Integer hour = result.getHour() % 24; + result.setHour(hour); + + if (result.getYear() != null && result.getMonth() != null && result.getDayOfMonth() != null) { + LocalDateTime d = LocalDateTime.of(result.getYear(), result.getMonth(), result.getDayOfMonth(), 0, 0, + 0); + d = d.plusDays(days.longValue()); + + result.setYear(d.getYear()); + result.setMonth(d.getMonthValue()); + result.setDayOfMonth(d.getDayOfMonth()); + + return result; + } + + if (result.getDayOfWeek() != null) { + result.setDayOfWeek(result.getDayOfWeek() + (int)Math.round(days)); + return result; + } + } + + return result; + } + + public static TimexProperty timexDateTimeAdd(TimexProperty start, TimexProperty duration) { + return TimexHelpers.timexTimeAdd(TimexHelpers.timexDateAdd(start, duration), duration); + } + + public static LocalDateTime dateFromTimex(TimexProperty timex) { + Integer year = timex.getYear() != null ? timex.getYear() : 2001; + Integer month = timex.getMonth() != null ? timex.getMonth() : 1; + Integer day = timex.getDayOfMonth() != null ? timex.getDayOfMonth() : 1; + Integer hour = timex.getHour() != null ? timex.getHour() : 0; + Integer minute = timex.getMinute() != null ? timex.getMinute() : 0; + Integer second = timex.getSecond() != null ? timex.getSecond() : 0; + LocalDateTime date = LocalDateTime.of(year, month, day, hour, minute, second); + + return date; + } + + public static Time timeFromTimex(TimexProperty timex) { + Integer hour = timex.getHour() != null ? timex.getHour() : 0; + Integer minute = timex.getMinute() != null ? timex.getMinute() : 0; + Integer second = timex.getSecond() != null ? timex.getSecond() : 0; + return new Time(hour, minute, second); + } + + public static DateRange dateRangeFromTimex(TimexProperty timex) { + TimexRange expanded = TimexHelpers.expandDateTimeRange(timex); + return new DateRange() { + { + setStart(TimexHelpers.dateFromTimex(expanded.getStart())); + setEnd(TimexHelpers.dateFromTimex(expanded.getEnd())); + } + }; + } + + public static TimeRange timeRangeFromTimex(TimexProperty timex) { + TimexRange expanded = TimexHelpers.expandTimeRange(timex); + return new TimeRange() { + { + setStart(TimexHelpers.timeFromTimex(expanded.getStart())); + setEnd(TimexHelpers.timeFromTimex(expanded.getEnd())); + } + }; + } + + public static String formatResolvedDateValue(String dateValue, String timeValue) { + return String.format("%1$s %2$s", dateValue, timeValue); + } + + public static Pair monthWeekDateRange(Integer year, Integer month, + Integer weekOfMonth) { + LocalDateTime start = TimexHelpers.generateMonthWeekDateStart(year, month, weekOfMonth); + LocalDateTime end = start.plusDays(7); + TimexProperty value1 = new TimexProperty() { + { + setYear(start.getYear()); + setMonth(start.getMonth().getValue()); + setDayOfMonth(start.getDayOfMonth()); + } + }; + TimexProperty value2 = new TimexProperty() { + { + setYear(end.getYear()); + setMonth(end.getMonth().getValue()); + setDayOfMonth(end.getDayOfMonth()); + } + }; + return Pair.of(value1, value2); + } + + public static Pair monthDateRange(Integer year, Integer month) { + TimexProperty value1 = new TimexProperty() { + { + setYear(year); + setMonth(month); + setDayOfMonth(1); + } + }; + TimexProperty value2 = new TimexProperty() { + { + setYear(month == 12 ? year + 1 : year); + setMonth(month == 12 ? 1 : month + 1); + setDayOfMonth(1); + } + }; + return Pair.of(value1, value2); + } + + public static Pair yearDateRange(Integer year) { + TimexProperty value1 = new TimexProperty() { + { + setYear(year); + setMonth(1); + setDayOfMonth(1); + } + }; + TimexProperty value2 = new TimexProperty() { + { + setYear(year + 1); + setMonth(1); + setDayOfMonth(1); + } + }; + return Pair.of(value1, value2); + } + + public static Pair yearWeekDateRange(Integer year, Integer weekOfYear, + Boolean isWeekend) { + LocalDateTime firstMondayInWeek = TimexHelpers.firstDateOfWeek(year, weekOfYear, null); + + LocalDateTime start = (isWeekend == null || !isWeekend) ? firstMondayInWeek + : TimexDateHelpers.dateOfNextDay(DayOfWeek.SATURDAY, firstMondayInWeek); + LocalDateTime end = firstMondayInWeek.plusDays(7); + TimexProperty value1 = new TimexProperty() { + { + setYear(start.getYear()); + setMonth(start.getMonth().getValue()); + setDayOfMonth(start.getDayOfMonth()); + } + }; + TimexProperty value2 = new TimexProperty() { + { + setYear(end.getYear()); + setMonth(end.getMonth().getValue()); + setDayOfMonth(end.getDayOfMonth()); + } + }; + return Pair.of(value1, value2); + } + + // this is based on + // https://stackoverflow.com/questions/19901666/get-date-of-first-and-last-day-of-week-knowing-week-number/34727270 + public static LocalDateTime firstDateOfWeek(Integer year, Integer weekOfYear, Locale cultureInfo) { + // ISO uses FirstFourDayWeek, and Monday as first day of week, according to + // https://en.wikipedia.org/wiki/ISO_8601 + LocalDateTime jan1 = LocalDateTime.of(year, 1, 1, 0, 0); + Integer daysOffset = DayOfWeek.MONDAY.getValue() - TimexDateHelpers.getUSDayOfWeek(jan1.getDayOfWeek()); + LocalDateTime firstWeekDay = jan1; + firstWeekDay = firstWeekDay.plusDays(daysOffset); + + TemporalField woy = WeekFields.ISO.weekOfYear(); + Integer firstWeek = jan1.get(woy); + + if ((firstWeek <= 1 || firstWeek >= 52) && daysOffset >= -3) { + weekOfYear -= 1; + } + + firstWeekDay = firstWeekDay.plusDays(weekOfYear * 7); + + return firstWeekDay; + } + + public static LocalDateTime generateMonthWeekDateStart(Integer year, Integer month, Integer weekOfMonth) { + LocalDateTime dateInWeek = LocalDateTime.of(year, month, 1 + ((weekOfMonth - 1) * 7), 0, 0); + + // Align the date of the week according to Thursday, base on ISO 8601, + // https://en.wikipedia.org/wiki/ISO_8601 + if (dateInWeek.getDayOfWeek().getValue() > DayOfWeek.THURSDAY.getValue()) { + dateInWeek = dateInWeek.plusDays(7 - dateInWeek.getDayOfWeek().getValue() + 1); + } else { + dateInWeek = dateInWeek.plusDays(1 - dateInWeek.getDayOfWeek().getValue()); + } + + return dateInWeek; + } + + private static TimexProperty timeAdd(TimexProperty start, TimexProperty duration) { + Integer second = start.getSecond() + + (int)(duration.getSeconds() != null ? duration.getSeconds().intValue() : 0); + Integer minute = start.getMinute() + second / 60 + + (duration.getMinutes() != null ? duration.getMinutes().intValue() : 0); + Integer hour = start.getHour() + (minute / 60) + + (duration.getHours() != null ? duration.getHours().intValue() : 0); + + return new TimexProperty() { + { + setHour((hour == 24 && minute % 60 == 0 && second % 60 == 0) ? hour : hour % 24); + setMinute(minute % 60); + setSecond(second % 60); + } + }; + } + + private static TimexProperty cloneDateTime(TimexProperty timex) { + TimexProperty result = timex.clone(); + result.setYears(null); + result.setMonths(null); + result.setWeeks(null); + result.setDays(null); + result.setHours(null); + result.setMinutes(null); + result.setSeconds(null); + return result; + } + + private static TimexProperty cloneDuration(TimexProperty timex) { + TimexProperty result = timex.clone(); + result.setYear(null); + result.setMonth(null); + result.setDayOfMonth(null); + result.setDayOfWeek(null); + result.setWeekOfYear(null); + result.setWeekOfMonth(null); + result.setSeason(null); + result.setHour(null); + result.setMinute(null); + result.setSecond(null); + result.setWeekend(null); + result.setPartOfDay(null); + return result; + } + + private static Boolean isTimeDurationTimex(String timex) { + return timex.startsWith(Constants.GENERAL_PERIOD_PREFIX.concat(Constants.TIME_TIMEX_PREFIX)); + } + + private static String getDurationTimexWithoutPrefix(String timex) { + // Remove "PT" prefix for TimeDuration, Remove "P" prefix for DateDuration + return timex.substring(isTimeDurationTimex(timex) ? 2 : 1); + } +} diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexInference.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexInference.java new file mode 100644 index 0000000000..b7e8d11b65 --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexInference.java @@ -0,0 +1,100 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +import java.util.HashSet; + +public class TimexInference { + public static HashSet infer(TimexProperty timexProperty) { + HashSet types = new HashSet(); + + if (TimexInference.isPresent(timexProperty)) { + types.add(Constants.TimexTypes.PRESENT); + } + + if (TimexInference.isDefinite(timexProperty)) { + types.add(Constants.TimexTypes.DEFINITE); + } + + if (TimexInference.isDate(timexProperty)) { + types.add(Constants.TimexTypes.DATE); + } + + if (TimexInference.isDateRange(timexProperty)) { + types.add(Constants.TimexTypes.DATE_RANGE); + } + + if (TimexInference.isDuration(timexProperty)) { + types.add(Constants.TimexTypes.DURATION); + } + + if (TimexInference.isTime(timexProperty)) { + types.add(Constants.TimexTypes.TIME); + } + + if (TimexInference.isTimeRange(timexProperty)) { + types.add(Constants.TimexTypes.TIME_RANGE); + } + + if (types.contains(Constants.TimexTypes.PRESENT)) { + types.add(Constants.TimexTypes.DATE); + types.add(Constants.TimexTypes.TIME); + } + + if (types.contains(Constants.TimexTypes.TIME) && types.contains(Constants.TimexTypes.DURATION)) { + types.add(Constants.TimexTypes.TIME_RANGE); + } + + if (types.contains(Constants.TimexTypes.DATE) && types.contains(Constants.TimexTypes.TIME)) { + types.add(Constants.TimexTypes.DATE_TIME); + } + + if (types.contains(Constants.TimexTypes.DATE) && types.contains(Constants.TimexTypes.DURATION)) { + types.add(Constants.TimexTypes.DATE_RANGE); + } + + if (types.contains(Constants.TimexTypes.DATE_TIME) && types.contains(Constants.TimexTypes.DURATION)) { + types.add((Constants.TimexTypes.DATE_TIME_RANGE)); + } + + if (types.contains(Constants.TimexTypes.DATE) && types.contains(Constants.TimexTypes.TIME_RANGE)) { + types.add(Constants.TimexTypes.DATE_TIME_RANGE); + } + + return types; + } + + private static Boolean isPresent(TimexProperty timexProperty) { + return timexProperty.getNow() != null && timexProperty.getNow() == true; + } + + private static Boolean isDuration(TimexProperty timexProperty) { + return timexProperty.getYears() != null || timexProperty.getMonths() != null || timexProperty.getWeeks() != null || + timexProperty.getDays() != null | timexProperty.getHours() != null || + timexProperty.getMinutes() != null || timexProperty.getSeconds() != null; + } + + private static Boolean isTime(TimexProperty timexProperty) { + return timexProperty.getHour() != null && timexProperty.getMinute() != null && timexProperty.getSecond() != null; + } + + private static Boolean isDate(TimexProperty timexProperty) { + return timexProperty.getDayOfMonth() != null || timexProperty.getDayOfWeek() != null; + } + + private static Boolean isTimeRange(TimexProperty timexProperty) { + return timexProperty.getPartOfDay() != null; + } + + private static Boolean isDateRange(TimexProperty timexProperty) { + return (timexProperty.getDayOfMonth() == null && timexProperty.getDayOfWeek() == null) && + (timexProperty.getYear() != null || timexProperty.getMonth() != null || + timexProperty.getSeason() != null || timexProperty.getWeekOfYear() != null || + timexProperty.getWeekOfMonth() != null); + } + + private static Boolean isDefinite(TimexProperty timexProperty) { + return timexProperty.getYear() != null & timexProperty.getMonth() != null && timexProperty.getDayOfMonth() != null; + } +} diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexParsing.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexParsing.java new file mode 100644 index 0000000000..6692d755e7 --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexParsing.java @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +import java.util.HashMap; +import java.util.Map; + +public class TimexParsing { + public static void parseString(String timex, TimexProperty timexProperty) { + // a reference to the present + if (timex == "PRESENT_REF") { + timexProperty.setNow(true); + } else if (timex.startsWith("P")) { + // duration + TimexParsing.extractDuration(timex, timexProperty); + } else if (timex.startsWith("(") && timex.endsWith(")")) { + // range indicated with start and end dates and a duration + TimexParsing.extractStartEndRange(timex, timexProperty); + } else { + // date andt ime and their respective ranges + TimexParsing.extractDateTime(timex, timexProperty); + } + } + + private static void extractDuration(String s, TimexProperty timexProperty) { + Map extracted = new HashMap(); + TimexRegex.extract("period", s, extracted); + timexProperty.assignProperties(extracted); + } + + private static void extractStartEndRange(String s, TimexProperty timexProperty) { + String[] parts = s.substring(1, s.length() - 1).split(","); + + if (parts.length == 3) { + TimexParsing.extractDateTime(parts[0], timexProperty); + TimexParsing.extractDuration(parts[2], timexProperty); + } + } + + private static void extractDateTime(String s, TimexProperty timexProperty) { + Integer indexOfT = s.indexOf("T"); + + if (indexOfT == -1) { + Map extracted = new HashMap(); + TimexRegex.extract("date", s, extracted); + timexProperty.assignProperties(extracted); + + } else { + Map extracted = new HashMap(); + TimexRegex.extract("date", s.substring(0, indexOfT), extracted); + TimexRegex.extract("time", s.substring(indexOfT), extracted); + timexProperty.assignProperties(extracted); + } + } +} diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexProperty.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexProperty.java new file mode 100644 index 0000000000..97ac5691ca --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexProperty.java @@ -0,0 +1,445 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +import java.math.BigDecimal; +import java.time.LocalDateTime; +import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.commons.lang3.StringUtils; + +public class TimexProperty { + private Time time; + + private String timexValue; + + private HashSet types; + + private Boolean now; + + private BigDecimal years; + + private BigDecimal months; + + private BigDecimal weeks; + + private BigDecimal days; + + private BigDecimal hours; + + private BigDecimal minutes; + + private BigDecimal seconds; + + private Integer year; + + private Integer month; + + private Integer dayOfMonth; + + private Integer dayOfWeek; + + private String season; + + private Integer weekOfYear; + + private Boolean weekend; + + public Integer weekOfMonth; + + private Integer hour; + + private Integer minute; + + private Integer second; + + private String partOfDay; + + public TimexProperty() { + + } + + public TimexProperty(String timex) { + TimexParsing.parseString(timex, this); + } + + public String getTimexValue() { + return TimexFormat.format(this); + } + + public void setTimexValue(String withTimexValue) { + this.timexValue = withTimexValue; + } + + public HashSet getTypes() { + return TimexInference.infer(this); + } + + public void setTypes(HashSet withTypes) { + this.types = withTypes; + } + + public Boolean getNow() { + return now; + } + + public void setNow(Boolean withNow) { + this.now = withNow; + } + + public BigDecimal getYears() { + return years; + } + + public void setYears(BigDecimal withYears) { + this.years = withYears; + } + + public BigDecimal getMonths() { + return months; + } + + public void setMonths(BigDecimal withMonths) { + this.months = withMonths; + } + + public BigDecimal getWeeks() { + return weeks; + } + + public void setWeeks(BigDecimal withWeeks) { + this.weeks = withWeeks; + } + + public BigDecimal getDays() { + return days; + } + + public void setDays(BigDecimal withDays) { + this.days = withDays; + } + + public BigDecimal getHours() { + return hours; + } + + public void setHours(BigDecimal withHours) { + this.hours = withHours; + } + + public BigDecimal getMinutes() { + return minutes; + } + + public void setMinutes(BigDecimal withMinutes) { + this.minutes = withMinutes; + } + + public BigDecimal getSeconds() { + return seconds; + } + + public void setSeconds(BigDecimal withSeconds) { + this.seconds = withSeconds; + } + + public Integer getYear() { + return year; + } + + public void setYear(Integer withYear) { + this.year = withYear; + } + + public Integer getMonth() { + return month; + } + + public void setMonth(Integer withMonth) { + this.month = withMonth; + } + + public Integer getDayOfMonth() { + return dayOfMonth; + } + + public void setDayOfMonth(Integer withDayOfMonth) { + this.dayOfMonth = withDayOfMonth; + } + + public Integer getDayOfWeek() { + return dayOfWeek; + } + + public void setDayOfWeek(Integer withDayOfWeek) { + this.dayOfWeek = withDayOfWeek; + } + + public String getSeason() { + return season; + } + + public void setSeason(String withSeason) { + this.season = withSeason; + } + + public Integer getWeekOfYear() { + return weekOfYear; + } + + public void setWeekOfYear(Integer withWeekOfYear) { + this.weekOfYear = withWeekOfYear; + } + + public Boolean getWeekend() { + return weekend; + } + + public void setWeekend(Boolean withWeekend) { + this.weekend = withWeekend; + } + + public Integer getWeekOfMonth() { + return weekOfMonth; + } + + public void setWeekOfMonth(Integer withWeekOfMonth) { + this.weekOfMonth = withWeekOfMonth; + } + + public Integer getHour() { + if (this.time != null) { + return this.time.getHour(); + } + + return null; + } + + public void setHour(Integer withHour) { + if (withHour != null) { + if (this.time == null) { + this.time = new Time(withHour, 0, 0); + } else { + this.time.setHour(withHour); + } + } else { + this.time = null; + } + } + + public Integer getMinute() { + if (this.time != null) { + return this.time.getMinute(); + } + + return null; + } + + public void setMinute(Integer withMinute) { + if (withMinute != null) { + if (this.time == null) { + time = new Time(0, withMinute, 0); + } else { + time.setMinute(withMinute); + } + } else { + this.time = null; + } + } + + public Integer getSecond() { + if (this.time != null) { + return this.time.getSecond(); + } + + return null; + } + + public void setSecond(Integer withSecond) { + if (withSecond != null) { + if (this.time == null) { + this.time = new Time(0, 0, withSecond); + } else { + this.time.setSecond(withSecond); + } + } else { + this.time = null; + } + } + + public String getPartOfDay() { + return partOfDay; + } + + public void setPartOfDay(String wthPartOfDay) { + this.partOfDay = wthPartOfDay; + } + + public static TimexProperty fromDate(LocalDateTime date) { + TimexProperty timex = new TimexProperty() { + { + setYear(date.getYear()); + setMonth(date.getMonthValue()); + setDayOfMonth(date.getDayOfMonth()); + } + }; + return timex; + } + + public static TimexProperty fromDateTime(LocalDateTime datetime) { + TimexProperty timex = TimexProperty.fromDate(datetime); + timex.setHour(datetime.getHour()); + timex.setMinute(datetime.getMinute()); + timex.setSecond(datetime.getSecond()); + return timex; + } + + public static TimexProperty fromTime(Time time) { + return new TimexProperty() { + { + setHour(time.getHour()); + setMinute(time.getMinute()); + setSecond(time.getSecond()); + } + }; + } + + @Override + public String toString() { + return TimexConvert.convertTimexToString(this); + } + + public String toNaturalLanguage(LocalDateTime referenceDate) { + return TimexRelativeConvert.convertTimexToStringRelative(this, referenceDate); + } + + public TimexProperty clone() { + Boolean now = this.getNow(); + BigDecimal years = this.getYears(); + BigDecimal months = this.getMonths(); + BigDecimal weeks = this.getWeeks(); + BigDecimal days = this.getDays(); + BigDecimal hours = this.getHours(); + BigDecimal minutes = this.getMinutes(); + BigDecimal seconds = this.getSeconds(); + Integer year = this.getYear(); + Integer month = this.getMonth(); + Integer dayOfMonth = this.getDayOfMonth(); + Integer dayOfWeek = this.getDayOfWeek(); + String season = this.getSeason(); + Integer weekOfYear = this.getWeekOfYear(); + Boolean weekend = this.getWeekend(); + Integer innerWeekOfMonth = this.getWeekOfMonth(); + Integer hour = this.getHour(); + Integer minute = this.getMinute(); + Integer second = this.getSecond(); + String partOfDay = this.getPartOfDay(); + + return new TimexProperty() { + { + setNow(now); + setYears(years); + setMonths(months); + setWeeks(weeks); + setDays(days); + setHours(hours); + setMinutes(minutes); + setSeconds(seconds); + setYear(year); + setMonth(month); + setDayOfMonth(dayOfMonth); + setDayOfWeek(dayOfWeek); + setSeason(season); + setWeekOfYear(weekOfYear); + setWeekend(weekend); + setWeekOfMonth(innerWeekOfMonth); + setHour(hour); + setMinute(minute); + setSecond(second); + setPartOfDay(partOfDay); + } + }; + } + + public void assignProperties(Map source) { + for (Entry item : source.entrySet()) { + + if (StringUtils.isBlank(item.getValue())) { + continue; + } + + switch (item.getKey()) { + case "year": + setYear(Integer.parseInt(item.getValue())); + break; + case "month": + setMonth(Integer.parseInt(item.getValue())); + break; + case "dayOfMonth": + setDayOfMonth(Integer.parseInt(item.getValue())); + break; + case "dayOfWeek": + setDayOfWeek(Integer.parseInt(item.getValue())); + break; + case "season": + setSeason(item.getValue()); + break; + case "weekOfYear": + setWeekOfYear(Integer.parseInt(item.getValue())); + break; + case "weekend": + setWeekend(true); + break; + case "weekOfMonth": + setWeekOfMonth(Integer.parseInt(item.getValue())); + break; + case "hour": + setHour(Integer.parseInt(item.getValue())); + break; + case "minute": + setMinute(Integer.parseInt(item.getValue())); + break; + case "second": + setSecond(Integer.parseInt(item.getValue())); + break; + case "partOfDay": + setPartOfDay(item.getValue()); + break; + case "dateUnit": + this.assignDateDuration(source); + break; + case "hourAmount": + setHours(new BigDecimal(item.getValue())); + break; + case "minuteAmount": + setMinutes(new BigDecimal(item.getValue())); + break; + case "secondAmount": + setSeconds(new BigDecimal(item.getValue())); + break; + default: + } + } + } + + private void assignDateDuration(Map source) { + switch (source.get("dateUnit")) { + case "Y": + this.years = new BigDecimal(source.get("amount")); + break; + case "M": + this.months = new BigDecimal(source.get("amount")); + break; + case "W": + this.weeks = new BigDecimal(source.get("amount")); + break; + case "D": + this.days = new BigDecimal(source.get("amount")); + break; + default: + } + } +} diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexRange.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexRange.java new file mode 100644 index 0000000000..f15efd61a2 --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexRange.java @@ -0,0 +1,36 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +public class TimexRange { + private TimexProperty start; + + private TimexProperty end; + + private TimexProperty duration; + + public TimexProperty getStart() { + return start; + } + + public void setStart(TimexProperty withStart) { + this.start = withStart; + } + + public TimexProperty getEnd() { + return end; + } + + public void setEnd(TimexProperty withEnd) { + this.end = withEnd; + } + + public TimexProperty getDuration() { + return duration; + } + + public void setDuration(TimexProperty withDuration) { + this.duration = withDuration; + } +} diff --git a/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexRangeResolver.java b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexRangeResolver.java new file mode 100644 index 0000000000..c1f2bc76c9 --- /dev/null +++ b/Java/libraries/recognizers-text-datatypes-timex-expression/src/main/java/com/microsoft/recognizers/datatypes/timex/expression/TimexRangeResolver.java @@ -0,0 +1,266 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.microsoft.recognizers.datatypes.timex.expression; + +import java.time.DayOfWeek; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +public class TimexRangeResolver { + public static List evaluate(Set candidates, List constraints) { + List timexConstraints = constraints.stream().map(x -> { + return new TimexProperty(x); + }).collect(Collectors.toList()); + Set candidatesWithDurationsResolved = TimexRangeResolver.resolveDurations(candidates, timexConstraints); + Set candidatesAccordingToDate = TimexRangeResolver + .resolveByDateRangeConstraints(candidatesWithDurationsResolved, timexConstraints); + Set candidatesWithAddedTime = TimexRangeResolver.resolveByTimeConstraints(candidatesAccordingToDate, + timexConstraints); + Set candidatesFilteredByTime = TimexRangeResolver.resolveByTimeRangeConstraints(candidatesWithAddedTime, + timexConstraints); + + List timexResults = candidatesFilteredByTime.stream().map(x -> { + return new TimexProperty(x); + }).collect(Collectors.toList()); + + return timexResults; + } + + public static Set resolveDurations(Set candidates, List constraints) { + Set results = new HashSet(); + for (String candidate : candidates) { + TimexProperty timex = new TimexProperty(candidate); + if (timex.getTypes().contains(Constants.TimexTypes.DURATION)) { + List r = TimexRangeResolver.resolveDuration(timex, constraints); + for (TimexProperty resolved : r) { + results.add(resolved.getTimexValue()); + } + } else { + results.add(candidate); + } + } + + return results; + } + + private static List resolveDuration(TimexProperty candidate, List constraints) { + List results = new ArrayList(); + for (TimexProperty constraint : constraints) { + if (constraint.getTypes().contains(Constants.TimexTypes.DATE_TIME)) { + results.add(TimexHelpers.timexDateTimeAdd(constraint, candidate)); + } else if (constraint.getTypes().contains(Constants.TimexTypes.TIME)) { + results.add(TimexHelpers.timexTimeAdd(constraint, candidate)); + } + } + + return results; + } + + private static Set resolveByDateRangeConstraints(Set candidates, + List timexConstraints) { + List dateRangeconstraints = timexConstraints.stream().filter(timex -> { + return timex.getTypes().contains(Constants.TimexTypes.DATE_RANGE); + }).map(timex -> { + return TimexHelpers.dateRangeFromTimex(timex); + }).collect(Collectors.toList()); + + List collapseDateRanges = TimexConstraintsHelper.collapseDateRanges(dateRangeconstraints); + + if (collapseDateRanges.isEmpty()) { + return candidates; + } + + List resolution = new ArrayList(); + for (String timex : candidates) { + List r = TimexRangeResolver.resolveDate(new TimexProperty(timex), collapseDateRanges); + resolution.addAll(r); + } + + return TimexRangeResolver.removeDuplicates(resolution); + } + + private static List resolveDate(TimexProperty timex, List constraints) { + List result = new ArrayList(); + for (DateRange constraint : constraints) { + result.addAll(TimexRangeResolver.resolveDateAgainstConstraint(timex, constraint)); + } + + return result; + } + + private static Set resolveByTimeRangeConstraints(Set candidates, + List timexConstrainst) { + List timeRangeConstraints = timexConstrainst.stream().filter(timex -> { + return timex.getTypes().contains(Constants.TimexTypes.TIME_RANGE); + }).map(timex -> { + return TimexHelpers.timeRangeFromTimex(timex); + }).collect(Collectors.toList()); + + List collapsedTimeRanges = TimexConstraintsHelper.collapseTimeRanges(timeRangeConstraints); + + if (collapsedTimeRanges.isEmpty()) { + return candidates; + } + + List resolution = new ArrayList(); + for (String timex : candidates) { + TimexProperty t = new TimexProperty(timex); + if (t.getTypes().contains(Constants.TimexTypes.TIME_RANGE)) { + List r = TimexRangeResolver.resolveTimeRange(t, collapsedTimeRanges); + resolution.addAll(r); + } else if (t.getTypes().contains(Constants.TimexTypes.TIME)) { + List r = TimexRangeResolver.resolveTime(t, collapsedTimeRanges); + resolution.addAll(r); + } + } + + return TimexRangeResolver.removeDuplicates(resolution); + } + + private static List resolveTimeRange(TimexProperty timex, List constraints) { + TimeRange candidate = TimexHelpers.timeRangeFromTimex(timex); + + List result = new ArrayList(); + for (TimeRange constraint : constraints) { + if (TimexConstraintsHelper.isOverlapping(candidate, constraint)) { + Integer start = Math.max(candidate.getStart().getTime(), constraint.getStart().getTime()); + Time time = new Time(start); + + // TODO: consider a method on TimexProperty to do this clone/overwrite pattern + TimexProperty resolved = timex.clone(); + resolved.setPartOfDay(null); + resolved.setSeconds(null); + resolved.setMinutes(null); + resolved.setHours(null); + resolved.setSecond(time.getSecond()); + resolved.setMinute(time.getMinute()); + resolved.setHour(time.getHour()); + + result.add(resolved.getTimexValue()); + } + } + + return result; + } + + private static List resolveTime(TimexProperty timex, List constraints) { + List result = new ArrayList(); + for (TimeRange constraint : constraints) { + result.addAll(TimexRangeResolver.resolveTimeAgainstConstraint(timex, constraint)); + } + + return result; + } + + private static List resolveTimeAgainstConstraint(TimexProperty timex, TimeRange constraint) { + Time t = new Time(timex.getHour(), timex.getMinute(), timex.getSecond()); + if (t.getTime() >= constraint.getStart().getTime() && t.getTime() < constraint.getEnd().getTime()) { + return new ArrayList() { + { + add(timex.getTimexValue()); + } + }; + } + + return new ArrayList(); + } + + private static Set removeDuplicates(List original) { + return new HashSet(original); + } + + private static List resolveDefiniteAgainstConstraint(TimexProperty timex, DateRange constraint) { + LocalDateTime timexDate = TimexHelpers.dateFromTimex(timex); + if (timexDate.compareTo(constraint.getStart()) >= 0 && timexDate.compareTo(constraint.getEnd()) < 0) { + return new ArrayList() { + { + add(timex.getTimexValue()); + } + }; + } + + return new ArrayList(); + } + + private static List resolveDateAgainstConstraint(TimexProperty timex, DateRange constraint) { + if (timex.getMonth() != null && timex.getDayOfMonth() != null) { + List result = new ArrayList(); + for (int year = constraint.getStart().getYear(); year <= constraint.getEnd() + .getYear(); year++) { + TimexProperty t = timex.clone(); + t.setYear(year); + result.addAll(TimexRangeResolver.resolveDefiniteAgainstConstraint(t, constraint)); + } + + return result; + } + + if (timex.getDayOfWeek() != null) { + // convert between ISO day of week and .NET day of week + DayOfWeek day = timex.getDayOfWeek() == 7 ? DayOfWeek.SUNDAY : DayOfWeek.of(timex.getDayOfWeek()); + List dates = TimexDateHelpers.datesMatchingDay(day, constraint.getStart(), constraint.getEnd()); + List result = new ArrayList(); + + for (LocalDateTime d : dates) { + TimexProperty t = timex.clone(); + t.setDayOfWeek(null); + t.setYear(d.getYear()); + t.setMonth(d.getMonthValue()); + t.setDayOfMonth(d.getDayOfMonth()); + result.add(t.getTimexValue()); + } + + return result; + } + + if (timex.getHour() != null) { + List result = new ArrayList(); + LocalDateTime day = constraint.getStart(); + while (day.compareTo(constraint.getEnd()) <= 0) { + TimexProperty t = timex.clone(); + t.setYear(day.getYear()); + t.setMonth(day.getMonthValue()); + t.setDayOfMonth(day.getDayOfMonth()); + result.addAll(TimexRangeResolver.resolveDefiniteAgainstConstraint(t, constraint)); + day = day.plusDays(1); + } + + return result; + } + + return new ArrayList(); + } + + private static Set resolveByTimeConstraints(Set candidates, List timexConstrainst) { + List