Skip to content

Commit 1a9f0b9

Browse files
committed
filebeat: make GZIP GA and enabled by default
This commit refactors the GZIP support in the filestream from beta to GA and enables it by default. The `GZIPExperimental` config field has been replaced with `GZIPDisabled` to make the zero value (`false`) represent the new default behavior (GZIP enabled). The logic throughout the input has been inverted to reflect this change. For backward compatibility, `GZIPExperimental` is retained in the config struct as a `*bool`. This allows detecting if the field was explicitly set by the user. If it is set, its value is ignored, and a deprecation warning is logged, guiding the user to the new `gzip_disabled` flag. The validation logic in `config.Validate()` is updated to check `!c.GZIPDisabled`. The error message is now more explicit, instructing developers to set `gzip_disabled: true` if a `file_identity` other than `fingerprint` is required for their use case. Unit and integration tests have been updated accordingly to remove dependencies on the old `gzip_experimental` flag and to verify the new default behavior, the disable flag, and the deprecation warning. AI tools used: Cursor.
1 parent fdd2b21 commit 1a9f0b9

File tree

14 files changed

+180
-74
lines changed

14 files changed

+180
-74
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Kind can be one of:
2+
# - breaking-change: a change to previously-documented behavior
3+
# - deprecation: functionality that is being removed in a later release
4+
# - bug-fix: fixes a problem in a previous version
5+
# - enhancement: extends functionality but does not break or fix existing behavior
6+
# - feature: new functionality
7+
# - known-issue: problems that we are aware of in a given version
8+
# - security: impacts on the security of a product or a user’s deployment.
9+
# - upgrade: important information for someone upgrading from a prior version
10+
# - other: does not fit into any of the other categories
11+
kind: feature
12+
13+
# Change summary; a 80ish characters long description of the change.
14+
summary: "filestream: GZIP support is now GA and enabled by default"
15+
16+
# Long description; in case the summary is not enough to describe the change
17+
# this field accommodate a description without length limits.
18+
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
19+
description: |
20+
GZIP file support in the filestream input is now generally available and
21+
enabled by default. Previously, this feature was beta and required setting
22+
`gzip_experimental: true`.
23+
24+
The `gzip_experimental` flag is now deprecated. It is kept for backward
25+
compatibility but its value is ignored. If this flag is present in the
26+
configuration, a warning will be logged suggesting the new flag.
27+
28+
To disable GZIP support, a new boolean flag `gzip_disabled` has been
29+
introduced. Set it to `true` to revert to the old behavior of not
30+
decompressing GZIP files.
31+
32+
GZIP support requires the `file_identity` option to be set to `fingerprint`.
33+
If a different `file_identity` is used, GZIP support must be explicitly
34+
disabled by setting `gzip_disabled: true`. Failure to do so will result
35+
in an error, preventing the filestream input from starting.
36+
37+
component: filebeat
38+
39+
# PR URL; optional; the PR number that added the changeset.
40+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
41+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
42+
# Please provide it if you are adding a fragment for a different PR.
43+
pr: https://github.com/elastic/beats/pull/47893
44+
45+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
46+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
47+
issue: https://github.com/elastic/beats/issues/47880

filebeat/filebeat.reference.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,10 @@ filebeat.inputs:
669669
- /var/log/*.log
670670
#- c:\programdata\elasticsearch\logs\*
671671

672+
# filestream input supports parsing of GZIP files. By default, GZIP support is
673+
# enabled. If you want to disable it, set `gzip_disabled` to true.
674+
#gzip_disabled: false
675+
672676
# Configure the file encoding for reading files with international characters
673677
# following the W3C recommendation for HTML5 (http://www.w3.org/TR/encoding).
674678
# Some sample encodings:

filebeat/filebeat.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ filebeat.inputs:
3232
- /var/log/*.log
3333
#- c:\programdata\elasticsearch\logs\*
3434

35+
# filestream input supports parsing of GZIP files. By default, GZIP support is
36+
# enabled. If you want to disable it, set `gzip_disabled` to true.
37+
#gzip_disabled: false
38+
3539
# Exclude lines. A list of regular expressions to match. It drops the lines that are
3640
# matching any regular expression from the list.
3741
# Line filtering happens after the parsers pipeline. If you would like to filter lines

filebeat/input/filestream/config.go

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import (
2626
"github.com/dustin/go-humanize"
2727

2828
loginp "github.com/elastic/beats/v7/filebeat/input/filestream/internal/input-logfile"
29-
"github.com/elastic/beats/v7/libbeat/common/cfgwarn"
3029
"github.com/elastic/beats/v7/libbeat/common/match"
3130
"github.com/elastic/beats/v7/libbeat/reader/parser"
3231
"github.com/elastic/beats/v7/libbeat/reader/readfile"
@@ -44,10 +43,13 @@ type config struct {
4443
FileWatcher fileWatcherConfig `config:"prospector.scanner"`
4544
FileIdentity *conf.Namespace `config:"file_identity"`
4645

47-
// GZIPExperimental enables beta support for ingesting GZIP files.
48-
// When set to true the input will transparently stream-decompress GZIP files.
49-
// This feature is experimental and subject to change.
50-
GZIPExperimental bool `config:"gzip_experimental"`
46+
// GZIPDisabled disables decompressing GZIP at ingestion time.
47+
GZIPDisabled bool `config:"gzip_disabled"`
48+
49+
// GZIPExperimental is deprecated and ignored. It is kept to log a warning
50+
// if it's set. Use GZIPDisabled to configure GZIP behaviour.
51+
// Deprecated.
52+
GZIPExperimental *bool `config:"gzip_experimental"`
5153

5254
// -1 means that registry will never be cleaned, disabling clean_inactive.
5355
// Setting it to 0 also disables clean_inactive
@@ -207,11 +209,11 @@ func (c *config) Validate() error {
207209
}
208210
}
209211

210-
if c.GZIPExperimental {
211-
// Validate file_identity must be fingerprint when gzip support is enabled.
212+
if !c.GZIPDisabled {
213+
// file_identity must be fingerprint when gzip support is enabled.
212214
if c.FileIdentity != nil && c.FileIdentity.Name() != fingerprintName {
213215
return fmt.Errorf(
214-
"gzip_experimental=true requires file_identity to be 'fingerprint'")
216+
"to use a file identity other than 'fingerprint', disable gzip, set 'gzip_disabled: true'")
215217
}
216218
}
217219

@@ -230,9 +232,10 @@ func (c config) checkUnsupportedParams(logger *logp.Logger) {
230232
"duplication and incomplete input metrics, it's use is " +
231233
"highly discouraged.")
232234
}
233-
if c.GZIPExperimental {
234-
logger.Named("filestream").Warn(cfgwarn.Beta(
235-
"filestream: beta gzip support enabled"))
235+
if c.GZIPExperimental != nil {
236+
logger.Named("filestream").Warn(
237+
"'gzip_experimental' has been removed. GZIP support is now " +
238+
"enabled by default. To disable it, use 'gzip_disabled: true'")
236239
}
237240
}
238241

filebeat/input/filestream/config_test.go

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,14 +68,13 @@ func TestConfigValidate(t *testing.T) {
6868
assert.Error(t, err)
6969
})
7070

71-
t.Run("gzip_experimental works with file_identity.fingerprint", func(t *testing.T) {
71+
t.Run("gzip works by default with file_identity.fingerprint", func(t *testing.T) {
7272
c, err := conf.NewConfigFrom(`
7373
id: 'some id'
7474
paths: [/foo/bar*]
75-
gzip_experimental: true
76-
file_identity.fingerprint: ~
7775
`)
7876
require.NoError(t, err, "could not create config from string")
77+
7978
got := defaultConfig()
8079
err = c.Unpack(&got)
8180
require.NoError(t, err, "could not unpack config")
@@ -84,19 +83,74 @@ file_identity.fingerprint: ~
8483
assert.NoError(t, err)
8584
})
8685

87-
t.Run("gzip_experimental requires file_identity.fingerprint", func(t *testing.T) {
86+
t.Run("gzip requires file_identity.fingerprint", func(t *testing.T) {
8887
c, err := conf.NewConfigFrom(`
8988
id: 'some id'
9089
paths: [/foo/bar*]
91-
gzip_experimental: true
92-
file_identity.path: ~
90+
file_identity.native: ~
9391
`)
9492
require.NoError(t, err, "could not create config from string")
93+
9594
got := defaultConfig()
9695
err = c.Unpack(&got)
96+
9797
assert.ErrorContains(t,
9898
err,
99-
"gzip_experimental=true requires file_identity to be 'fingerprint")
99+
"to use a file identity other than 'fingerprint', disable gzip, set 'gzip_disabled: true'")
100+
})
101+
102+
t.Run("gzip_disabled allows non-fingerprint file_identity", func(t *testing.T) {
103+
c, err := conf.NewConfigFrom(`
104+
id: 'some id'
105+
paths: [/foo/bar*]
106+
gzip_disabled: true
107+
file_identity.path: ~
108+
`)
109+
require.NoError(t, err, "could not create config from string")
110+
111+
got := defaultConfig()
112+
err = c.Unpack(&got)
113+
require.NoError(t, err, "could not unpack config")
114+
115+
err = got.Validate()
116+
assert.NoError(t, err)
117+
})
118+
119+
t.Run("gzip_experimental true is accepted but ignored", func(t *testing.T) {
120+
c, err := conf.NewConfigFrom(`
121+
id: 'some id'
122+
paths: [/foo/bar*]
123+
gzip_experimental: true
124+
file_identity.fingerprint: ~
125+
`)
126+
require.NoError(t, err, "could not create config from string")
127+
128+
got := defaultConfig()
129+
err = c.Unpack(&got)
130+
require.NoError(t, err, "could not unpack config")
131+
132+
err = got.Validate()
133+
assert.NoError(t, err)
134+
// gzip_experimental is ignored, gzip is enabled by default
135+
assert.False(t, got.GZIPDisabled, "gzip should be enabled")
136+
})
137+
138+
t.Run("gzip_experimental false is accepted but ignored", func(t *testing.T) {
139+
c, err := conf.NewConfigFrom(`
140+
id: 'some id'
141+
paths: [/foo/bar*]
142+
gzip_experimental: false
143+
file_identity.fingerprint: ~
144+
`)
145+
require.NoError(t, err, "could not create config from string")
146+
got := defaultConfig()
147+
err = c.Unpack(&got)
148+
require.NoError(t, err, "could not unpack config")
149+
150+
err = got.Validate()
151+
assert.NoError(t, err)
152+
// gzip_experimental is ignored, gzip is still enabled by default
153+
assert.False(t, got.GZIPDisabled, "gzip should be enabled")
100154
})
101155
}
102156

filebeat/input/filestream/filestream_test.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,7 @@ func TestLogFileTimedClosing(t *testing.T) {
7272

7373
for _, tc := range testCases {
7474
fs := filestream{
75-
readerConfig: readerConfig{BufferSize: 512},
76-
gzipExperimental: true}
75+
readerConfig: readerConfig{BufferSize: 512}}
7776
f, err := fs.newFile(tc.createFile(t))
7877
require.NoError(t, err,
7978
"could not create file for reading")
@@ -151,8 +150,7 @@ func TestLogFileTruncated(t *testing.T) {
151150
osFile := tc.createFile(t)
152151

153152
fs := filestream{
154-
readerConfig: readerConfig{BufferSize: 512},
155-
gzipExperimental: true}
153+
readerConfig: readerConfig{BufferSize: 512}}
156154

157155
f, err := fs.newFile(osFile)
158156
require.NoError(t, err, "could not create file for reading")

filebeat/input/filestream/input.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ type filestream struct {
6868
parsers parser.Config
6969
takeOver loginp.TakeOverConfig
7070
scannerCheckInterval time.Duration
71-
gzipExperimental bool
71+
gzipDisabled bool
7272

7373
// Function references for testing
7474
waitGracePeriodFn func(
@@ -141,7 +141,7 @@ func configure(
141141
closerConfig: c.Close,
142142
parsers: c.Reader.Parsers,
143143
takeOver: c.TakeOver,
144-
gzipExperimental: c.GZIPExperimental,
144+
gzipDisabled: c.GZIPDisabled,
145145
deleterConfig: c.Delete,
146146
waitGracePeriodFn: waitGracePeriod,
147147
tickFn: time.Tick,
@@ -186,8 +186,7 @@ func (inp *filestream) Run(
186186
log := ctx.Logger.With("path", fs.newPath).With("state-id", src.Name())
187187
state := initState(log, cursor, fs)
188188
if state.EOF {
189-
// TODO: change it to debug once GZIP isn't experimental anymore.
190-
log.Infof("GZIP file already read to EOF, not reading it again, file name '%s'",
189+
log.Debugf("GZIP file already read to EOF, not reading it again, file name '%s'",
191190
fs.newPath)
192191
return nil
193192
}
@@ -582,19 +581,20 @@ func (inp *filestream) openFile(
582581
return f, enc, truncated, nil
583582
}
584583

585-
// newFile wraps the given os.File into an appropriate File interface implementation.
584+
// newFile wraps the given os.File into an appropriate File interface
585+
// implementation.
586586
//
587-
// If the 'gzip_experimental' flag is false, it returns a plain file reader
587+
// If the 'gzip_disabled' config is true, it returns a plain file reader
588588
// (plainFile).
589589
//
590-
// If the 'gzip_experimental' flag is true, it attempts to detect if the
591-
// underlying file is GZIP compressed. If it is, it returns a GZIP-aware file
592-
// reader (gzipSeekerReader). If the file is not GZIP compressed, it returns a
593-
// plain file reader (plainFile).
590+
// If the 'gzip_disabled' flag is false (the default), it attempts to detect if
591+
// the underlying file is GZIP compressed. If it is, it returns a GZIP-aware
592+
// file reader (gzipSeekerReader). If the file is not GZIP compressed, it
593+
// returns a plain file reader (plainFile).
594594
//
595595
// It returns an error if any happens.
596596
func (inp *filestream) newFile(rawFile *os.File) (File, error) {
597-
if !inp.gzipExperimental {
597+
if inp.gzipDisabled {
598598
return newPlainFile(rawFile), nil
599599
}
600600

filebeat/input/filestream/input_test.go

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -190,31 +190,31 @@ func TestNewFile(t *testing.T) {
190190
require.NoError(t, err)
191191

192192
testCases := map[string]struct {
193-
gzipEnabled bool
193+
gzipDisabled bool
194194
filePath string
195195
expectedType interface{}
196196
expectError bool
197197
errorContains string
198198
setup func(t *testing.T, filePath string) *os.File
199199
}{
200200
"gzip_disabled_returns_plain_file": {
201-
gzipEnabled: false,
201+
gzipDisabled: true,
202202
filePath: plainFilePath,
203203
expectedType: &plainFile{},
204204
},
205205
"gzip_enabled_with_plain_file_returns_plain_file": {
206-
gzipEnabled: true,
206+
gzipDisabled: false,
207207
filePath: plainFilePath,
208208
expectedType: &plainFile{},
209209
},
210210
"gzip_enabled_with_gzip_file_returns_gzip_reader": {
211-
gzipEnabled: true,
211+
gzipDisabled: false,
212212
filePath: gzippedFilePath,
213213
expectedType: &gzipSeekerReader{},
214214
},
215215
"gzip_enabled_with_unreadable_file_returns_error": {
216-
gzipEnabled: true,
217-
filePath: plainFilePath, // content doesn't matter
216+
gzipDisabled: false,
217+
filePath: plainFilePath, // content doesn't matter
218218
setup: func(t *testing.T, filePath string) *os.File {
219219
// Return a file that is already closed to trigger a read error
220220
// in IsGZIP
@@ -231,8 +231,8 @@ func TestNewFile(t *testing.T) {
231231
for name, tc := range testCases {
232232
t.Run(name, func(t *testing.T) {
233233
inp := &filestream{
234-
gzipExperimental: tc.gzipEnabled,
235-
readerConfig: defaultReaderConfig(),
234+
gzipDisabled: tc.gzipDisabled,
235+
readerConfig: defaultReaderConfig(),
236236
}
237237

238238
var rawFile *os.File
@@ -278,33 +278,33 @@ func TestOpenFile_GZIPNeverTruncated(t *testing.T) {
278278
require.NoError(t, err, "could not save gzip file")
279279

280280
tcs := []struct {
281-
name string
282-
gzipExperimental bool
283-
path string
284-
want bool
285-
errMsg string
281+
name string
282+
gzipDisabled bool
283+
path string
284+
want bool
285+
errMsg string
286286
}{
287287
{
288-
name: "plain file is truncated",
289-
gzipExperimental: false,
290-
path: plainPath,
291-
want: true,
292-
errMsg: "plain file should be considered truncated",
288+
name: "plain file is truncated",
289+
gzipDisabled: true,
290+
path: plainPath,
291+
want: true,
292+
errMsg: "plain file should be considered truncated",
293293
},
294294
{
295-
name: "GZIP file is never truncated",
296-
gzipExperimental: true,
297-
path: gzPath,
298-
want: false,
299-
errMsg: "GZIP file skips truncated validation",
295+
name: "GZIP file is never truncated",
296+
gzipDisabled: false,
297+
path: gzPath,
298+
want: false,
299+
errMsg: "GZIP file skips truncated validation",
300300
},
301301
}
302302

303303
for _, tc := range tcs {
304304
inp := filestream{
305-
gzipExperimental: tc.gzipExperimental,
306-
encodingFactory: encoding.Plain,
307-
readerConfig: readerConfig{BufferSize: 32},
305+
gzipDisabled: tc.gzipDisabled,
306+
encodingFactory: encoding.Plain,
307+
readerConfig: readerConfig{BufferSize: 32},
308308
}
309309

310310
f, _, truncated, err := inp.openFile(

0 commit comments

Comments
 (0)