diff --git a/changelog/fragments/1764777232-ingest-GZIP-logs-is-GA-and-enabled-by-default.yaml b/changelog/fragments/1764777232-ingest-GZIP-logs-is-GA-and-enabled-by-default.yaml new file mode 100644 index 000000000000..870988ec9c19 --- /dev/null +++ b/changelog/fragments/1764777232-ingest-GZIP-logs-is-GA-and-enabled-by-default.yaml @@ -0,0 +1,47 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: feature + +# Change summary; a 80ish characters long description of the change. +summary: "filestream: GZIP support is now GA and enabled by default" + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +description: | + GZIP file support in the filestream input is now generally available and + enabled by default. Previously, this feature was beta and required setting + `gzip_experimental: true`. + + The `gzip_experimental` flag is now deprecated. It is kept for backward + compatibility but its value is ignored. If this flag is present in the + configuration, a warning will be logged suggesting the new flag. + + To disable GZIP support, a new boolean flag `gzip_disabled` has been + introduced. Set it to `true` to revert to the old behavior of not + decompressing GZIP files. + + GZIP support requires the `file_identity` option to be set to `fingerprint`. + If a different `file_identity` is used, GZIP support must be explicitly + disabled by setting `gzip_disabled: true`. Failure to do so will result + in an error, preventing the filestream input from starting. + +component: filebeat + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: https://github.com/elastic/beats/pull/47893 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: https://github.com/elastic/beats/issues/47880 diff --git a/filebeat/filebeat.reference.yml b/filebeat/filebeat.reference.yml index 8b9040c17580..9e5a1095c63c 100644 --- a/filebeat/filebeat.reference.yml +++ b/filebeat/filebeat.reference.yml @@ -669,6 +669,10 @@ filebeat.inputs: - /var/log/*.log #- c:\programdata\elasticsearch\logs\* + # filestream input supports parsing of GZIP files. By default, GZIP support is + # enabled. If you want to disable it, set `gzip_disabled` to true. + #gzip_disabled: false + # Configure the file encoding for reading files with international characters # following the W3C recommendation for HTML5 (http://www.w3.org/TR/encoding). # Some sample encodings: diff --git a/filebeat/filebeat.yml b/filebeat/filebeat.yml index bcefc53d20d8..c753023330bf 100644 --- a/filebeat/filebeat.yml +++ b/filebeat/filebeat.yml @@ -32,6 +32,10 @@ filebeat.inputs: - /var/log/*.log #- c:\programdata\elasticsearch\logs\* + # filestream input supports parsing of GZIP files. By default, GZIP support is + # enabled. If you want to disable it, set `gzip_disabled` to true. + #gzip_disabled: false + # Exclude lines. A list of regular expressions to match. It drops the lines that are # matching any regular expression from the list. # Line filtering happens after the parsers pipeline. If you would like to filter lines diff --git a/filebeat/input/filestream/config.go b/filebeat/input/filestream/config.go index e835251e0395..2f421143af4e 100644 --- a/filebeat/input/filestream/config.go +++ b/filebeat/input/filestream/config.go @@ -26,7 +26,6 @@ import ( "github.com/dustin/go-humanize" loginp "github.com/elastic/beats/v7/filebeat/input/filestream/internal/input-logfile" - "github.com/elastic/beats/v7/libbeat/common/cfgwarn" "github.com/elastic/beats/v7/libbeat/common/match" "github.com/elastic/beats/v7/libbeat/reader/parser" "github.com/elastic/beats/v7/libbeat/reader/readfile" @@ -44,10 +43,13 @@ type config struct { FileWatcher fileWatcherConfig `config:"prospector.scanner"` FileIdentity *conf.Namespace `config:"file_identity"` - // GZIPExperimental enables beta support for ingesting GZIP files. - // When set to true the input will transparently stream-decompress GZIP files. - // This feature is experimental and subject to change. - GZIPExperimental bool `config:"gzip_experimental"` + // GZIPDisabled disables decompressing GZIP at ingestion time. + GZIPDisabled bool `config:"gzip_disabled"` + + // GZIPExperimental is deprecated and ignored. It is kept to log a warning + // if it's set. Use GZIPDisabled to configure GZIP behaviour. + // Deprecated. + GZIPExperimental *bool `config:"gzip_experimental"` // -1 means that registry will never be cleaned, disabling clean_inactive. // Setting it to 0 also disables clean_inactive @@ -207,11 +209,11 @@ func (c *config) Validate() error { } } - if c.GZIPExperimental { - // Validate file_identity must be fingerprint when gzip support is enabled. + if !c.GZIPDisabled { + // file_identity must be fingerprint when gzip support is enabled. if c.FileIdentity != nil && c.FileIdentity.Name() != fingerprintName { return fmt.Errorf( - "gzip_experimental=true requires file_identity to be 'fingerprint'") + "to use a file identity other than 'fingerprint', disable gzip, set 'gzip_disabled: true'") } } @@ -230,9 +232,10 @@ func (c config) checkUnsupportedParams(logger *logp.Logger) { "duplication and incomplete input metrics, it's use is " + "highly discouraged.") } - if c.GZIPExperimental { - logger.Named("filestream").Warn(cfgwarn.Beta( - "filestream: beta gzip support enabled")) + if c.GZIPExperimental != nil { + logger.Named("filestream").Warn( + "'gzip_experimental' has been removed. GZIP support is now " + + "enabled by default. To disable it, use 'gzip_disabled: true'") } } diff --git a/filebeat/input/filestream/config_test.go b/filebeat/input/filestream/config_test.go index a0cdbe68c72b..054250eba5a7 100644 --- a/filebeat/input/filestream/config_test.go +++ b/filebeat/input/filestream/config_test.go @@ -68,14 +68,13 @@ func TestConfigValidate(t *testing.T) { assert.Error(t, err) }) - t.Run("gzip_experimental works with file_identity.fingerprint", func(t *testing.T) { + t.Run("gzip works by default with file_identity.fingerprint", func(t *testing.T) { c, err := conf.NewConfigFrom(` id: 'some id' paths: [/foo/bar*] -gzip_experimental: true -file_identity.fingerprint: ~ `) require.NoError(t, err, "could not create config from string") + got := defaultConfig() err = c.Unpack(&got) require.NoError(t, err, "could not unpack config") @@ -84,19 +83,74 @@ file_identity.fingerprint: ~ assert.NoError(t, err) }) - t.Run("gzip_experimental requires file_identity.fingerprint", func(t *testing.T) { + t.Run("gzip requires file_identity.fingerprint", func(t *testing.T) { c, err := conf.NewConfigFrom(` id: 'some id' paths: [/foo/bar*] -gzip_experimental: true -file_identity.path: ~ +file_identity.native: ~ `) require.NoError(t, err, "could not create config from string") + got := defaultConfig() err = c.Unpack(&got) + assert.ErrorContains(t, err, - "gzip_experimental=true requires file_identity to be 'fingerprint") + "to use a file identity other than 'fingerprint', disable gzip, set 'gzip_disabled: true'") + }) + + t.Run("gzip_disabled allows non-fingerprint file_identity", func(t *testing.T) { + c, err := conf.NewConfigFrom(` +id: 'some id' +paths: [/foo/bar*] +gzip_disabled: true +file_identity.path: ~ +`) + require.NoError(t, err, "could not create config from string") + + got := defaultConfig() + err = c.Unpack(&got) + require.NoError(t, err, "could not unpack config") + + err = got.Validate() + assert.NoError(t, err) + }) + + t.Run("gzip_experimental true is accepted but ignored", func(t *testing.T) { + c, err := conf.NewConfigFrom(` +id: 'some id' +paths: [/foo/bar*] +gzip_experimental: true +file_identity.fingerprint: ~ +`) + require.NoError(t, err, "could not create config from string") + + got := defaultConfig() + err = c.Unpack(&got) + require.NoError(t, err, "could not unpack config") + + err = got.Validate() + assert.NoError(t, err) + // gzip_experimental is ignored, gzip is enabled by default + assert.False(t, got.GZIPDisabled, "gzip should be enabled") + }) + + t.Run("gzip_experimental false is accepted but ignored", func(t *testing.T) { + c, err := conf.NewConfigFrom(` +id: 'some id' +paths: [/foo/bar*] +gzip_experimental: false +file_identity.fingerprint: ~ +`) + require.NoError(t, err, "could not create config from string") + got := defaultConfig() + err = c.Unpack(&got) + require.NoError(t, err, "could not unpack config") + + err = got.Validate() + assert.NoError(t, err) + // gzip_experimental is ignored, gzip is still enabled by default + assert.False(t, got.GZIPDisabled, "gzip should be enabled") }) } diff --git a/filebeat/input/filestream/filestream_test.go b/filebeat/input/filestream/filestream_test.go index ef0aefa186df..92826ae6c074 100644 --- a/filebeat/input/filestream/filestream_test.go +++ b/filebeat/input/filestream/filestream_test.go @@ -72,8 +72,7 @@ func TestLogFileTimedClosing(t *testing.T) { for _, tc := range testCases { fs := filestream{ - readerConfig: readerConfig{BufferSize: 512}, - gzipExperimental: true} + readerConfig: readerConfig{BufferSize: 512}} f, err := fs.newFile(tc.createFile(t)) require.NoError(t, err, "could not create file for reading") @@ -151,8 +150,7 @@ func TestLogFileTruncated(t *testing.T) { osFile := tc.createFile(t) fs := filestream{ - readerConfig: readerConfig{BufferSize: 512}, - gzipExperimental: true} + readerConfig: readerConfig{BufferSize: 512}} f, err := fs.newFile(osFile) require.NoError(t, err, "could not create file for reading") diff --git a/filebeat/input/filestream/input.go b/filebeat/input/filestream/input.go index bc0b2f19c922..ba860eceed3a 100644 --- a/filebeat/input/filestream/input.go +++ b/filebeat/input/filestream/input.go @@ -68,7 +68,7 @@ type filestream struct { parsers parser.Config takeOver loginp.TakeOverConfig scannerCheckInterval time.Duration - gzipExperimental bool + gzipDisabled bool // Function references for testing waitGracePeriodFn func( @@ -141,7 +141,7 @@ func configure( closerConfig: c.Close, parsers: c.Reader.Parsers, takeOver: c.TakeOver, - gzipExperimental: c.GZIPExperimental, + gzipDisabled: c.GZIPDisabled, deleterConfig: c.Delete, waitGracePeriodFn: waitGracePeriod, tickFn: time.Tick, @@ -186,8 +186,7 @@ func (inp *filestream) Run( log := ctx.Logger.With("path", fs.newPath).With("state-id", src.Name()) state := initState(log, cursor, fs) if state.EOF { - // TODO: change it to debug once GZIP isn't experimental anymore. - log.Infof("GZIP file already read to EOF, not reading it again, file name '%s'", + log.Debugf("GZIP file already read to EOF, not reading it again, file name '%s'", fs.newPath) return nil } @@ -582,19 +581,20 @@ func (inp *filestream) openFile( return f, enc, truncated, nil } -// newFile wraps the given os.File into an appropriate File interface implementation. +// newFile wraps the given os.File into an appropriate File interface +// implementation. // -// If the 'gzip_experimental' flag is false, it returns a plain file reader +// If the 'gzip_disabled' config is true, it returns a plain file reader // (plainFile). // -// If the 'gzip_experimental' flag is true, it attempts to detect if the -// underlying file is GZIP compressed. If it is, it returns a GZIP-aware file -// reader (gzipSeekerReader). If the file is not GZIP compressed, it returns a -// plain file reader (plainFile). +// If the 'gzip_disabled' flag is false (the default), it attempts to detect if +// the underlying file is GZIP compressed. If it is, it returns a GZIP-aware +// file reader (gzipSeekerReader). If the file is not GZIP compressed, it +// returns a plain file reader (plainFile). // // It returns an error if any happens. func (inp *filestream) newFile(rawFile *os.File) (File, error) { - if !inp.gzipExperimental { + if inp.gzipDisabled { return newPlainFile(rawFile), nil } diff --git a/filebeat/input/filestream/input_test.go b/filebeat/input/filestream/input_test.go index 97782bb8eaba..3f2fb20c7152 100644 --- a/filebeat/input/filestream/input_test.go +++ b/filebeat/input/filestream/input_test.go @@ -190,7 +190,7 @@ func TestNewFile(t *testing.T) { require.NoError(t, err) testCases := map[string]struct { - gzipEnabled bool + gzipDisabled bool filePath string expectedType interface{} expectError bool @@ -198,23 +198,23 @@ func TestNewFile(t *testing.T) { setup func(t *testing.T, filePath string) *os.File }{ "gzip_disabled_returns_plain_file": { - gzipEnabled: false, + gzipDisabled: true, filePath: plainFilePath, expectedType: &plainFile{}, }, "gzip_enabled_with_plain_file_returns_plain_file": { - gzipEnabled: true, + gzipDisabled: false, filePath: plainFilePath, expectedType: &plainFile{}, }, "gzip_enabled_with_gzip_file_returns_gzip_reader": { - gzipEnabled: true, + gzipDisabled: false, filePath: gzippedFilePath, expectedType: &gzipSeekerReader{}, }, "gzip_enabled_with_unreadable_file_returns_error": { - gzipEnabled: true, - filePath: plainFilePath, // content doesn't matter + gzipDisabled: false, + filePath: plainFilePath, // content doesn't matter setup: func(t *testing.T, filePath string) *os.File { // Return a file that is already closed to trigger a read error // in IsGZIP @@ -231,8 +231,8 @@ func TestNewFile(t *testing.T) { for name, tc := range testCases { t.Run(name, func(t *testing.T) { inp := &filestream{ - gzipExperimental: tc.gzipEnabled, - readerConfig: defaultReaderConfig(), + gzipDisabled: tc.gzipDisabled, + readerConfig: defaultReaderConfig(), } var rawFile *os.File @@ -278,33 +278,33 @@ func TestOpenFile_GZIPNeverTruncated(t *testing.T) { require.NoError(t, err, "could not save gzip file") tcs := []struct { - name string - gzipExperimental bool - path string - want bool - errMsg string + name string + gzipDisabled bool + path string + want bool + errMsg string }{ { - name: "plain file is truncated", - gzipExperimental: false, - path: plainPath, - want: true, - errMsg: "plain file should be considered truncated", + name: "plain file is truncated", + gzipDisabled: true, + path: plainPath, + want: true, + errMsg: "plain file should be considered truncated", }, { - name: "GZIP file is never truncated", - gzipExperimental: true, - path: gzPath, - want: false, - errMsg: "GZIP file skips truncated validation", + name: "GZIP file is never truncated", + gzipDisabled: false, + path: gzPath, + want: false, + errMsg: "GZIP file skips truncated validation", }, } for _, tc := range tcs { inp := filestream{ - gzipExperimental: tc.gzipExperimental, - encodingFactory: encoding.Plain, - readerConfig: readerConfig{BufferSize: 32}, + gzipDisabled: tc.gzipDisabled, + encodingFactory: encoding.Plain, + readerConfig: readerConfig{BufferSize: 32}, } f, _, truncated, err := inp.openFile( diff --git a/filebeat/input/filestream/prospector_creator.go b/filebeat/input/filestream/prospector_creator.go index a99146305317..d264045cc023 100644 --- a/filebeat/input/filestream/prospector_creator.go +++ b/filebeat/input/filestream/prospector_creator.go @@ -60,7 +60,7 @@ func newProspector( logger, config.Paths, config.FileWatcher, - config.GZIPExperimental, + !config.GZIPDisabled, config.Delete.Enabled, identifier, srci, diff --git a/filebeat/testing/integration/filestream_gzip_test.go b/filebeat/testing/integration/filestream_gzip_test.go index 6b9a45b2dfa0..79d15081a14d 100644 --- a/filebeat/testing/integration/filestream_gzip_test.go +++ b/filebeat/testing/integration/filestream_gzip_test.go @@ -90,7 +90,6 @@ filebeat.inputs: id: "test-filestream" paths: - %s - gzip_experimental: true output.file: enabled: true path: %s @@ -127,7 +126,7 @@ output.file: } }) - t.Run("BetaWarning", func(t *testing.T) { + t.Run("gzip_experimental_deprecation_warning", func(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() @@ -162,7 +161,7 @@ output.console: WithReportOptions(reportOptions). ExpectStart(). ExpectOutput( - "BETA: filestream: beta gzip support enabled"). + "'gzip_experimental' has been removed. GZIP support is now enabled by default. To disable it, use 'gzip_disabled: true'"). Start(ctx). Wait() }) diff --git a/filebeat/testing/integration/sample_test.go b/filebeat/testing/integration/sample_test.go index d0bf9110c053..a899327f0bdd 100644 --- a/filebeat/testing/integration/sample_test.go +++ b/filebeat/testing/integration/sample_test.go @@ -61,7 +61,6 @@ filebeat.inputs: id: test-filestream paths: - %s - gzip_experimental: true # we want to check that all messages are ingested # without using an external service, this is an easy way diff --git a/filebeat/tests/integration/filestream_gzip_test.go b/filebeat/tests/integration/filestream_gzip_test.go index 4a45b7275c06..2afb4caa3e7d 100644 --- a/filebeat/tests/integration/filestream_gzip_test.go +++ b/filebeat/tests/integration/filestream_gzip_test.go @@ -149,7 +149,6 @@ filebeat.inputs: paths: - %s prospector.scanner.check_interval: 1s - gzip_experimental: true output.file: enabled: true path: %s @@ -231,7 +230,6 @@ filebeat.inputs: id: "test-gzip-eof" paths: - %s - gzip_experimental: true path.home: %s filebeat.registry.flush: 1s output.discard: @@ -333,7 +331,6 @@ filebeat.inputs: id: "test-filestream" paths: - %s - gzip_experimental: true output.file: enabled: true path: %s @@ -405,7 +402,6 @@ filebeat.inputs: id: "test-filestream" paths: - %s - gzip_experimental: true output.file: enabled: true path: %s @@ -587,7 +583,6 @@ filebeat.inputs: id: "test-filestream" paths: - %s - gzip_experimental: true output.file: enabled: true path: %s @@ -672,7 +667,6 @@ filebeat.inputs: id: "test-filestream" paths: - %s - gzip_experimental: true output.file: enabled: true path: %s @@ -812,8 +806,6 @@ filebeat.inputs: id: "test-filestream" paths: - %s - gzip_experimental: true - #rotation.external.strategy.copytruncate.suffix_regex: \.\d+(\.gz)?$ output.file: enabled: true path: %s @@ -984,7 +976,6 @@ filebeat.inputs: id: "test-filestream" paths: - %s - gzip_experimental: true output.file: enabled: true path: %s @@ -1108,7 +1099,6 @@ filebeat.inputs: id: "test-filestream" paths: - %s - gzip_experimental: true output.file: enabled: true path: %s diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml index 675180c8926b..35f844777ee3 100644 --- a/x-pack/filebeat/filebeat.reference.yml +++ b/x-pack/filebeat/filebeat.reference.yml @@ -2353,6 +2353,10 @@ filebeat.inputs: - /var/log/*.log #- c:\programdata\elasticsearch\logs\* + # filestream input supports parsing of GZIP files. By default, GZIP support is + # enabled. If you want to disable it, set `gzip_disabled` to true. + #gzip_disabled: false + # Configure the file encoding for reading files with international characters # following the W3C recommendation for HTML5 (http://www.w3.org/TR/encoding). # Some sample encodings: diff --git a/x-pack/filebeat/filebeat.yml b/x-pack/filebeat/filebeat.yml index bcefc53d20d8..c753023330bf 100644 --- a/x-pack/filebeat/filebeat.yml +++ b/x-pack/filebeat/filebeat.yml @@ -32,6 +32,10 @@ filebeat.inputs: - /var/log/*.log #- c:\programdata\elasticsearch\logs\* + # filestream input supports parsing of GZIP files. By default, GZIP support is + # enabled. If you want to disable it, set `gzip_disabled` to true. + #gzip_disabled: false + # Exclude lines. A list of regular expressions to match. It drops the lines that are # matching any regular expression from the list. # Line filtering happens after the parsers pipeline. If you would like to filter lines