From d98a4f680d7b20dbaa27fc0a7786064e49124732 Mon Sep 17 00:00:00 2001 From: Edmo Vamerlatti Costa <11836452+edmocosta@users.noreply.github.com> Date: Fri, 20 Sep 2024 18:45:52 +0200 Subject: [PATCH] [ottl/pkg] Add support for locale in the Time converter (#35107) **Description:** Added support for locale in the `Time` converter, so it can parse timestamps written in non-english languages. The new `locale` parameter's value is optional, and can be specified as: `Time("Febrero 25 lunes, 2002, 02:03:04 p.m.", "%B %d %A, %Y, %r", "America/New_York", "es-ES")` The value must be a well-formed BCP-47 language tag, and a known [CLDR](https://cldr.unicode.org) v45 locale. **Link to tracking Issue:** https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32978 **Testing:** Unit tests **Documentation:** ottl/README was updated to include the new optional `locale` parameter. --- .chloggen/ottl_time_func_locale_support.yaml | 27 +++++++ internal/coreinternal/timeutils/parser.go | 53 +++++++++---- .../coreinternal/timeutils/parser_test.go | 70 ++++++++++++++++++ pkg/ottl/ottlfuncs/README.md | 15 +++- pkg/ottl/ottlfuncs/func_time.go | 23 +++++- pkg/ottl/ottlfuncs/func_time_test.go | 74 ++++++++++++++++--- 6 files changed, 233 insertions(+), 29 deletions(-) create mode 100644 .chloggen/ottl_time_func_locale_support.yaml diff --git a/.chloggen/ottl_time_func_locale_support.yaml b/.chloggen/ottl_time_func_locale_support.yaml new file mode 100644 index 000000000000..b58cd99ee91e --- /dev/null +++ b/.chloggen/ottl_time_func_locale_support.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: pkg/ottl + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Added support for locale in the Time converter + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [32978] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/internal/coreinternal/timeutils/parser.go b/internal/coreinternal/timeutils/parser.go index 7b15ca4054f4..82c5523292e1 100644 --- a/internal/coreinternal/timeutils/parser.go +++ b/internal/coreinternal/timeutils/parser.go @@ -4,6 +4,7 @@ package timeutils // import "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/timeutils" import ( + "errors" "fmt" "regexp" "strings" @@ -28,27 +29,15 @@ func ParseStrptime(layout string, value any, location *time.Location) (time.Time return ParseGotime(goLayout, value, location) } -// ParseLocalizedStrptime is like ParseStrptime, but instead of parsing a formatted time in -// English, it parses a value in foreign language, and returns the [time.Time] it represents. -// The language argument must be a well-formed BCP 47 language tag (e.g.: "en", "en-US"), and -// a known CLDR locale. +// ParseLocalizedStrptime is like ParseLocalizedGotime, but instead of using the native Go time layout, +// it uses the ctime-like format. func ParseLocalizedStrptime(layout string, value any, location *time.Location, language string) (time.Time, error) { goLayout, err := strptime.ToNative(layout) if err != nil { return time.Time{}, err } - stringValue, err := convertParsingValue(value) - if err != nil { - return time.Time{}, err - } - - translatedVal, err := lunes.Translate(goLayout, stringValue, language) - if err != nil { - return time.Time{}, err - } - - return ParseGotime(goLayout, translatedVal, location) + return ParseLocalizedGotime(goLayout, value, location, language) } func GetLocation(location *string, layout *string) (*time.Location, error) { @@ -69,6 +58,24 @@ func GetLocation(location *string, layout *string) (*time.Location, error) { return time.Local, nil } +// ParseLocalizedGotime is like ParseGotime, but instead of parsing a formatted time in +// English, it parses a value in foreign language, and returns the [time.Time] it represents. +// The language argument must be a well-formed BCP 47 language tag (e.g.: "en", "en-US"), and +// a known CLDR locale. +func ParseLocalizedGotime(layout string, value any, location *time.Location, language string) (time.Time, error) { + stringValue, err := convertParsingValue(value) + if err != nil { + return time.Time{}, err + } + + translatedVal, err := lunes.Translate(layout, stringValue, language) + if err != nil { + return time.Time{}, err + } + + return ParseGotime(layout, translatedVal, location) +} + func ParseGotime(layout string, value any, location *time.Location) (time.Time, error) { timeValue, err := parseGotime(layout, value, location) if err != nil { @@ -155,5 +162,21 @@ func ValidateGotime(layout string) error { return nil } +// ValidateLocale checks the given locale and returns an error if the language tag +// is not supported by the localized parser functions. +func ValidateLocale(locale string) error { + _, err := lunes.NewDefaultLocale(locale) + if err == nil { + return nil + } + + var e *lunes.ErrUnsupportedLocale + if errors.As(err, &e) { + return fmt.Errorf("unsupported locale '%s', value must be a supported BCP 47 language tag", locale) + } + + return fmt.Errorf("invalid locale '%s': %w", locale, err) +} + // Allows tests to override with deterministic value var Now = time.Now diff --git a/internal/coreinternal/timeutils/parser_test.go b/internal/coreinternal/timeutils/parser_test.go index 5d20725561b4..f83133c18589 100644 --- a/internal/coreinternal/timeutils/parser_test.go +++ b/internal/coreinternal/timeutils/parser_test.go @@ -165,3 +165,73 @@ func TestParseLocalizedStrptimeInvalidType(t *testing.T) { require.Error(t, err) require.ErrorContains(t, err, "cannot be parsed as a time") } + +func TestParseLocalizedGotime(t *testing.T) { + tests := []struct { + name string + format string + value any + language string + expected time.Time + location *time.Location + }{ + { + name: "Foreign language", + format: "January 02 Monday, 2006, 03:04:05 pm", + value: "Febrero 25 jueves, 1993, 02:03:04 p.m.", + expected: time.Date(1993, 2, 25, 14, 3, 4, 0, time.Local), + location: time.Local, + language: "es-ES", + }, + { + name: "Foreign language with location", + format: "Monday Jan _2 2006", + value: "mercoledì set 4 2024", + expected: time.Date(2024, 9, 4, 0, 0, 0, 0, time.UTC), + location: time.UTC, + language: "it-IT", + }, + { + name: "String value", + format: "January 02 Monday, 2006, 03:04:05 PM", + value: "March 12 Friday, 2004, 02:03:04 AM", + expected: time.Date(2004, 3, 12, 2, 3, 4, 0, time.Local), + location: time.Local, + language: "en", + }, + { + name: "Bytes value", + format: "Jan 02 Mon, 06, 03:04:05 PM", + value: []byte("Jun 10 Fri, 04, 02:03:04 AM"), + expected: time.Date(2004, 6, 10, 2, 3, 4, 0, time.Local), + location: time.Local, + language: "en-US", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := ParseLocalizedGotime(tt.format, tt.value, tt.location, tt.language) + require.NoError(t, err) + assert.Equal(t, tt.expected.UnixNano(), result.UnixNano()) + }) + } +} + +func TestParseLocalizedGotimeInvalidType(t *testing.T) { + value := time.Now().UnixNano() + _, err := ParseLocalizedStrptime("Mon", value, time.Local, "en") + require.Error(t, err) + require.ErrorContains(t, err, "cannot be parsed as a time") +} + +func TestValidateLocale(t *testing.T) { + require.NoError(t, ValidateLocale("es")) + require.NoError(t, ValidateLocale("en-US")) + require.NoError(t, ValidateLocale("ca-ES-valencia")) +} + +func TestValidateLocaleUnsupported(t *testing.T) { + err := ValidateLocale("foo-bar") + require.ErrorContains(t, err, "unsupported locale 'foo-bar'") +} diff --git a/pkg/ottl/ottlfuncs/README.md b/pkg/ottl/ottlfuncs/README.md index 7c21b4cd6dcc..d818640c2409 100644 --- a/pkg/ottl/ottlfuncs/README.md +++ b/pkg/ottl/ottlfuncs/README.md @@ -1455,11 +1455,11 @@ Examples: ### Time -`Time(target, format, Optional[location])` +`Time(target, format, Optional[location], Optional[locale])` The `Time` Converter takes a string representation of a time and converts it to a Golang `time.Time`. -`target` is a string. `format` is a string, `location` is an optional string. +`target` is a string. `format` is a string, `location` is an optional string, `locale` is an optional string. If either `target` or `format` are nil, an error is returned. The parser used is the parser at [internal/coreinternal/parser](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/internal/coreinternal/timeutils). If the `target` and `format` do not follow the parsing rules used by this parser, an error is returned. @@ -1519,6 +1519,17 @@ Examples: - `Time("2012-11-01T22:08:41+0000 EST", "%Y-%m-%dT%H:%M:%S%z %Z")` - `Time("2023-05-26 12:34:56", "%Y-%m-%d %H:%M:%S", "America/New_York")` +`locale` specifies the input language of the `target` value. It is used to interpret timestamp values written in a specific language, +ensuring that the function can correctly parse the localized month names, day names, and periods of the day based on the provided language. + +The value must be a well-formed BCP 47 language tag, and a known [CLDR](https://cldr.unicode.org) v45 locale. +If not supplied, English (`en`) is used. + +Examples: + +- `Time("mercoledì set 4 2024", "%A %h %e %Y", "", "it")` +- `Time("Febrero 25 lunes, 2002, 02:03:04 p.m.", "%B %d %A, %Y, %r", "America/New_York", "es-ES")` + ### TraceID `TraceID(bytes)` diff --git a/pkg/ottl/ottlfuncs/func_time.go b/pkg/ottl/ottlfuncs/func_time.go index 44371a94de6e..af5e37326a4f 100644 --- a/pkg/ottl/ottlfuncs/func_time.go +++ b/pkg/ottl/ottlfuncs/func_time.go @@ -6,6 +6,7 @@ package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-c import ( "context" "fmt" + "time" "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/timeutils" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" @@ -15,6 +16,7 @@ type TimeArguments[K any] struct { Time ottl.StringGetter[K] Format string Location ottl.Optional[string] + Locale ottl.Optional[string] } func NewTimeFactory[K any]() ottl.Factory[K] { @@ -27,10 +29,10 @@ func createTimeFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ot return nil, fmt.Errorf("TimeFactory args must be of type *TimeArguments[K]") } - return Time(args.Time, args.Format, args.Location) + return Time(args.Time, args.Format, args.Location, args.Locale) } -func Time[K any](inputTime ottl.StringGetter[K], format string, location ottl.Optional[string]) (ottl.ExprFunc[K], error) { +func Time[K any](inputTime ottl.StringGetter[K], format string, location ottl.Optional[string], locale ottl.Optional[string]) (ottl.ExprFunc[K], error) { if format == "" { return nil, fmt.Errorf("format cannot be nil") } @@ -49,6 +51,16 @@ func Time[K any](inputTime ottl.StringGetter[K], format string, location ottl.Op if err != nil { return nil, err } + + var inputTimeLocale *string + if !locale.IsEmpty() { + l := locale.Get() + if err = timeutils.ValidateLocale(l); err != nil { + return nil, err + } + inputTimeLocale = &l + } + return func(ctx context.Context, tCtx K) (any, error) { t, err := inputTime.Get(ctx, tCtx) if err != nil { @@ -57,7 +69,12 @@ func Time[K any](inputTime ottl.StringGetter[K], format string, location ottl.Op if t == "" { return nil, fmt.Errorf("time cannot be nil") } - timestamp, err := timeutils.ParseGotime(gotimeFormat, t, loc) + var timestamp time.Time + if inputTimeLocale != nil { + timestamp, err = timeutils.ParseLocalizedGotime(gotimeFormat, t, loc, *inputTimeLocale) + } else { + timestamp, err = timeutils.ParseGotime(gotimeFormat, t, loc) + } if err != nil { return nil, err } diff --git a/pkg/ottl/ottlfuncs/func_time_test.go b/pkg/ottl/ottlfuncs/func_time_test.go index cc9ce2a795f1..c98b094f39c7 100644 --- a/pkg/ottl/ottlfuncs/func_time_test.go +++ b/pkg/ottl/ottlfuncs/func_time_test.go @@ -24,6 +24,7 @@ func Test_Time(t *testing.T) { format string expected time.Time location string + locale string }{ { name: "simple short form", @@ -188,14 +189,52 @@ func Test_Time(t *testing.T) { format: "%Y-%m-%dT%H:%M:%S %Z", expected: time.Date(1986, 10, 01, 00, 17, 33, 00, time.FixedZone("MST", -7*60*60)), }, + { + name: "with locale", + time: &ottl.StandardStringGetter[any]{ + Getter: func(_ context.Context, _ any) (any, error) { + return "Febrero 25 lunes, 2002, 02:03:04 p.m.", nil + }, + }, + format: "%B %d %A, %Y, %r", + locale: "es-ES", + expected: time.Date(2002, 2, 25, 14, 03, 04, 0, time.Local), + }, + { + name: "with locale - date only", + time: &ottl.StandardStringGetter[any]{ + Getter: func(_ context.Context, _ any) (any, error) { + return "mercoledì set 4 2024", nil + }, + }, + format: "%A %h %e %Y", + locale: "it", + expected: time.Date(2024, 9, 4, 0, 0, 0, 0, time.Local), + }, + { + name: "with locale and location", + time: &ottl.StandardStringGetter[any]{ + Getter: func(_ context.Context, _ any) (any, error) { + return "Febrero 25 lunes, 2002, 02:03:04 p.m.", nil + }, + }, + format: "%B %d %A, %Y, %r", + location: "America/New_York", + locale: "es-ES", + expected: time.Date(2002, 2, 25, 14, 03, 04, 0, locationAmericaNewYork), + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - var locOptional ottl.Optional[string] + var locationOptional ottl.Optional[string] if tt.location != "" { - locOptional = ottl.NewTestingOptional(tt.location) + locationOptional = ottl.NewTestingOptional(tt.location) } - exprFunc, err := Time(tt.time, tt.format, locOptional) + var localeOptional ottl.Optional[string] + if tt.locale != "" { + localeOptional = ottl.NewTestingOptional(tt.locale) + } + exprFunc, err := Time(tt.time, tt.format, locationOptional, localeOptional) assert.NoError(t, err) result, err := exprFunc(nil, nil) assert.NoError(t, err) @@ -234,8 +273,9 @@ func Test_TimeError(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - var locOptional ottl.Optional[string] - exprFunc, err := Time[any](tt.time, tt.format, locOptional) + var locationOptional ottl.Optional[string] + var localeOptional ottl.Optional[string] + exprFunc, err := Time[any](tt.time, tt.format, locationOptional, localeOptional) require.NoError(t, err) _, err = exprFunc(context.Background(), nil) assert.ErrorContains(t, err, tt.expectedError) @@ -250,6 +290,7 @@ func Test_TimeFormatError(t *testing.T) { format string expectedError string location string + locale string }{ { name: "invalid short with no format", @@ -272,14 +313,29 @@ func Test_TimeFormatError(t *testing.T) { location: "Jupiter/Ganymede", expectedError: "unknown time zone Jupiter/Ganymede", }, + { + name: "with unsupported locale", + time: &ottl.StandardStringGetter[any]{ + Getter: func(_ context.Context, _ any) (any, error) { + return "2023-05-26 12:34:56", nil + }, + }, + format: "%Y-%m-%d %H:%M:%S", + locale: "foo-bar", + expectedError: "unsupported locale 'foo-bar'", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - var locOptional ottl.Optional[string] + var locationOptional ottl.Optional[string] if tt.location != "" { - locOptional = ottl.NewTestingOptional(tt.location) + locationOptional = ottl.NewTestingOptional(tt.location) + } + var localeOptional ottl.Optional[string] + if tt.locale != "" { + localeOptional = ottl.NewTestingOptional(tt.locale) } - _, err := Time[any](tt.time, tt.format, locOptional) + _, err := Time[any](tt.time, tt.format, locationOptional, localeOptional) assert.ErrorContains(t, err, tt.expectedError) }) } @@ -465,7 +521,7 @@ func Benchmark_Time(t *testing.B) { if tt.location != "" { locOptional = ottl.NewTestingOptional(tt.location) } - exprFunc, err := Time(tt.time, tt.format, locOptional) + exprFunc, err := Time(tt.time, tt.format, locOptional, ottl.Optional[string]{}) assert.NoError(t, err) t.Run(tt.name, func(t *testing.B) {