Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: feature

# Change summary; a 80ish characters long description of the change.
summary: "filestream: GZIP support is now GA and enabled by default"

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
description: |
GZIP file support in the filestream input is now generally available and
enabled by default. Previously, this feature was beta and required setting
`gzip_experimental: true`.

The `gzip_experimental` flag is now deprecated. It is kept for backward
compatibility but its value is ignored. If this flag is present in the
configuration, a warning will be logged suggesting the new flag.

To disable GZIP support, a new boolean flag `gzip_disabled` has been
introduced. Set it to `true` to revert to the old behavior of not
decompressing GZIP files.

GZIP support requires the `file_identity` option to be set to `fingerprint`.
If a different `file_identity` is used, GZIP support must be explicitly
disabled by setting `gzip_disabled: true`. Failure to do so will result
in an error, preventing the filestream input from starting.

component: filebeat

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
pr: https://github.com/elastic/beats/pull/47893

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
issue: https://github.com/elastic/beats/issues/47880
4 changes: 4 additions & 0 deletions filebeat/filebeat.reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,10 @@ filebeat.inputs:
- /var/log/*.log
#- c:\programdata\elasticsearch\logs\*

# filestream input supports parsing of GZIP files. By default, GZIP support is
# enabled. If you want to disable it, set `gzip_disabled` to true.
#gzip_disabled: false

# Configure the file encoding for reading files with international characters
# following the W3C recommendation for HTML5 (http://www.w3.org/TR/encoding).
# Some sample encodings:
Expand Down
4 changes: 4 additions & 0 deletions filebeat/filebeat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ filebeat.inputs:
- /var/log/*.log
#- c:\programdata\elasticsearch\logs\*

# filestream input supports parsing of GZIP files. By default, GZIP support is
# enabled. If you want to disable it, set `gzip_disabled` to true.
#gzip_disabled: false

# Exclude lines. A list of regular expressions to match. It drops the lines that are
# matching any regular expression from the list.
# Line filtering happens after the parsers pipeline. If you would like to filter lines
Expand Down
25 changes: 14 additions & 11 deletions filebeat/input/filestream/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
"github.com/dustin/go-humanize"

loginp "github.com/elastic/beats/v7/filebeat/input/filestream/internal/input-logfile"
"github.com/elastic/beats/v7/libbeat/common/cfgwarn"
"github.com/elastic/beats/v7/libbeat/common/match"
"github.com/elastic/beats/v7/libbeat/reader/parser"
"github.com/elastic/beats/v7/libbeat/reader/readfile"
Expand All @@ -44,10 +43,13 @@
FileWatcher fileWatcherConfig `config:"prospector.scanner"`
FileIdentity *conf.Namespace `config:"file_identity"`

// GZIPExperimental enables beta support for ingesting GZIP files.
// When set to true the input will transparently stream-decompress GZIP files.
// This feature is experimental and subject to change.
GZIPExperimental bool `config:"gzip_experimental"`
// GZIPDisabled disables decompressing GZIP at ingestion time.
GZIPDisabled bool `config:"gzip_disabled"`

// GZIPExperimental is deprecated and ignored. It is kept to log a warning
// if it's set. Use GZIPDisabled to configure GZIP behaviour.
// Deprecated.
GZIPExperimental *bool `config:"gzip_experimental"`

// -1 means that registry will never be cleaned, disabling clean_inactive.
// Setting it to 0 also disables clean_inactive
Expand Down Expand Up @@ -207,11 +209,11 @@
}
}

if c.GZIPExperimental {
// Validate file_identity must be fingerprint when gzip support is enabled.
if !c.GZIPDisabled {
// file_identity must be fingerprint when gzip support is enabled.
if c.FileIdentity != nil && c.FileIdentity.Name() != fingerprintName {
return fmt.Errorf(
"gzip_experimental=true requires file_identity to be 'fingerprint'")
"to use a file identity other than 'fingerprint', disable gzip, set 'gzip_disabled: true'")
}
}

Expand All @@ -222,7 +224,7 @@
return nil
}

// checkUnsupportedParams checks if unsupported/deprecated/discouraged paramaters are set and logs a warning

Check failure on line 227 in filebeat/input/filestream/config.go

View workflow job for this annotation

GitHub Actions / lint (ubuntu-latest)

`paramaters` is a misspelling of `parameters` (misspell)
func (c config) checkUnsupportedParams(logger *logp.Logger) {
if c.AllowIDDuplication {
logger.Named("filestream").Warn(
Expand All @@ -230,9 +232,10 @@
"duplication and incomplete input metrics, it's use is " +
"highly discouraged.")
}
if c.GZIPExperimental {
logger.Named("filestream").Warn(cfgwarn.Beta(
"filestream: beta gzip support enabled"))
if c.GZIPExperimental != nil {
logger.Named("filestream").Warn(
"'gzip_experimental' has been removed. GZIP support is now " +
"enabled by default. To disable it, use 'gzip_disabled: true'")
}
}

Expand Down
68 changes: 61 additions & 7 deletions filebeat/input/filestream/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,13 @@ func TestConfigValidate(t *testing.T) {
assert.Error(t, err)
})

t.Run("gzip_experimental works with file_identity.fingerprint", func(t *testing.T) {
t.Run("gzip works by default with file_identity.fingerprint", func(t *testing.T) {
c, err := conf.NewConfigFrom(`
id: 'some id'
paths: [/foo/bar*]
gzip_experimental: true
file_identity.fingerprint: ~
`)
require.NoError(t, err, "could not create config from string")

got := defaultConfig()
err = c.Unpack(&got)
require.NoError(t, err, "could not unpack config")
Expand All @@ -84,19 +83,74 @@ file_identity.fingerprint: ~
assert.NoError(t, err)
})

t.Run("gzip_experimental requires file_identity.fingerprint", func(t *testing.T) {
t.Run("gzip requires file_identity.fingerprint", func(t *testing.T) {
c, err := conf.NewConfigFrom(`
id: 'some id'
paths: [/foo/bar*]
gzip_experimental: true
file_identity.path: ~
file_identity.native: ~
`)
require.NoError(t, err, "could not create config from string")

got := defaultConfig()
err = c.Unpack(&got)

assert.ErrorContains(t,
err,
"gzip_experimental=true requires file_identity to be 'fingerprint")
"to use a file identity other than 'fingerprint', disable gzip, set 'gzip_disabled: true'")
})

t.Run("gzip_disabled allows non-fingerprint file_identity", func(t *testing.T) {
c, err := conf.NewConfigFrom(`
id: 'some id'
paths: [/foo/bar*]
gzip_disabled: true
file_identity.path: ~
`)
require.NoError(t, err, "could not create config from string")

got := defaultConfig()
err = c.Unpack(&got)
require.NoError(t, err, "could not unpack config")

err = got.Validate()
assert.NoError(t, err)
})

t.Run("gzip_experimental true is accepted but ignored", func(t *testing.T) {
c, err := conf.NewConfigFrom(`
id: 'some id'
paths: [/foo/bar*]
gzip_experimental: true
file_identity.fingerprint: ~
`)
require.NoError(t, err, "could not create config from string")

got := defaultConfig()
err = c.Unpack(&got)
require.NoError(t, err, "could not unpack config")

err = got.Validate()
assert.NoError(t, err)
// gzip_experimental is ignored, gzip is enabled by default
assert.False(t, got.GZIPDisabled, "gzip should be enabled")
})

t.Run("gzip_experimental false is accepted but ignored", func(t *testing.T) {
c, err := conf.NewConfigFrom(`
id: 'some id'
paths: [/foo/bar*]
gzip_experimental: false
file_identity.fingerprint: ~
`)
require.NoError(t, err, "could not create config from string")
got := defaultConfig()
err = c.Unpack(&got)
require.NoError(t, err, "could not unpack config")

err = got.Validate()
assert.NoError(t, err)
// gzip_experimental is ignored, gzip is still enabled by default
assert.False(t, got.GZIPDisabled, "gzip should be enabled")
})
}

Expand Down
6 changes: 2 additions & 4 deletions filebeat/input/filestream/filestream_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@ func TestLogFileTimedClosing(t *testing.T) {

for _, tc := range testCases {
fs := filestream{
readerConfig: readerConfig{BufferSize: 512},
gzipExperimental: true}
readerConfig: readerConfig{BufferSize: 512}}
f, err := fs.newFile(tc.createFile(t))
require.NoError(t, err,
"could not create file for reading")
Expand Down Expand Up @@ -151,8 +150,7 @@ func TestLogFileTruncated(t *testing.T) {
osFile := tc.createFile(t)

fs := filestream{
readerConfig: readerConfig{BufferSize: 512},
gzipExperimental: true}
readerConfig: readerConfig{BufferSize: 512}}

f, err := fs.newFile(osFile)
require.NoError(t, err, "could not create file for reading")
Expand Down
22 changes: 11 additions & 11 deletions filebeat/input/filestream/input.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ type filestream struct {
parsers parser.Config
takeOver loginp.TakeOverConfig
scannerCheckInterval time.Duration
gzipExperimental bool
gzipDisabled bool

// Function references for testing
waitGracePeriodFn func(
Expand Down Expand Up @@ -141,7 +141,7 @@ func configure(
closerConfig: c.Close,
parsers: c.Reader.Parsers,
takeOver: c.TakeOver,
gzipExperimental: c.GZIPExperimental,
gzipDisabled: c.GZIPDisabled,
deleterConfig: c.Delete,
waitGracePeriodFn: waitGracePeriod,
tickFn: time.Tick,
Expand Down Expand Up @@ -186,8 +186,7 @@ func (inp *filestream) Run(
log := ctx.Logger.With("path", fs.newPath).With("state-id", src.Name())
state := initState(log, cursor, fs)
if state.EOF {
// TODO: change it to debug once GZIP isn't experimental anymore.
log.Infof("GZIP file already read to EOF, not reading it again, file name '%s'",
log.Debugf("GZIP file already read to EOF, not reading it again, file name '%s'",
fs.newPath)
return nil
}
Expand Down Expand Up @@ -582,19 +581,20 @@ func (inp *filestream) openFile(
return f, enc, truncated, nil
}

// newFile wraps the given os.File into an appropriate File interface implementation.
// newFile wraps the given os.File into an appropriate File interface
// implementation.
//
// If the 'gzip_experimental' flag is false, it returns a plain file reader
// If the 'gzip_disabled' config is true, it returns a plain file reader
// (plainFile).
//
// If the 'gzip_experimental' flag is true, it attempts to detect if the
// underlying file is GZIP compressed. If it is, it returns a GZIP-aware file
// reader (gzipSeekerReader). If the file is not GZIP compressed, it returns a
// plain file reader (plainFile).
// If the 'gzip_disabled' flag is false (the default), it attempts to detect if
// the underlying file is GZIP compressed. If it is, it returns a GZIP-aware
// file reader (gzipSeekerReader). If the file is not GZIP compressed, it
// returns a plain file reader (plainFile).
//
// It returns an error if any happens.
func (inp *filestream) newFile(rawFile *os.File) (File, error) {
if !inp.gzipExperimental {
if inp.gzipDisabled {
return newPlainFile(rawFile), nil
}

Expand Down
52 changes: 26 additions & 26 deletions filebeat/input/filestream/input_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
)

func BenchmarkFilestream(b *testing.B) {
logp.TestingSetup(logp.ToDiscardOutput())

Check failure on line 50 in filebeat/input/filestream/input_test.go

View workflow job for this annotation

GitHub Actions / lint (ubuntu-latest)

SA1019: logp.TestingSetup is deprecated: Prefer using localized loggers. Use logptest.NewTestingLogger. (staticcheck)

b.Run("single file", func(b *testing.B) {
lineCount := 10000
Expand Down Expand Up @@ -190,31 +190,31 @@
require.NoError(t, err)

testCases := map[string]struct {
gzipEnabled bool
gzipDisabled bool
filePath string
expectedType interface{}
expectError bool
errorContains string
setup func(t *testing.T, filePath string) *os.File
}{
"gzip_disabled_returns_plain_file": {
gzipEnabled: false,
gzipDisabled: true,
filePath: plainFilePath,
expectedType: &plainFile{},
},
"gzip_enabled_with_plain_file_returns_plain_file": {
gzipEnabled: true,
gzipDisabled: false,
filePath: plainFilePath,
expectedType: &plainFile{},
},
"gzip_enabled_with_gzip_file_returns_gzip_reader": {
gzipEnabled: true,
gzipDisabled: false,
filePath: gzippedFilePath,
expectedType: &gzipSeekerReader{},
},
"gzip_enabled_with_unreadable_file_returns_error": {
gzipEnabled: true,
filePath: plainFilePath, // content doesn't matter
gzipDisabled: false,
filePath: plainFilePath, // content doesn't matter
setup: func(t *testing.T, filePath string) *os.File {
// Return a file that is already closed to trigger a read error
// in IsGZIP
Expand All @@ -231,8 +231,8 @@
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
inp := &filestream{
gzipExperimental: tc.gzipEnabled,
readerConfig: defaultReaderConfig(),
gzipDisabled: tc.gzipDisabled,
readerConfig: defaultReaderConfig(),
}

var rawFile *os.File
Expand Down Expand Up @@ -278,33 +278,33 @@
require.NoError(t, err, "could not save gzip file")

tcs := []struct {
name string
gzipExperimental bool
path string
want bool
errMsg string
name string
gzipDisabled bool
path string
want bool
errMsg string
}{
{
name: "plain file is truncated",
gzipExperimental: false,
path: plainPath,
want: true,
errMsg: "plain file should be considered truncated",
name: "plain file is truncated",
gzipDisabled: true,
path: plainPath,
want: true,
errMsg: "plain file should be considered truncated",
},
{
name: "GZIP file is never truncated",
gzipExperimental: true,
path: gzPath,
want: false,
errMsg: "GZIP file skips truncated validation",
name: "GZIP file is never truncated",
gzipDisabled: false,
path: gzPath,
want: false,
errMsg: "GZIP file skips truncated validation",
},
}

for _, tc := range tcs {
inp := filestream{
gzipExperimental: tc.gzipExperimental,
encodingFactory: encoding.Plain,
readerConfig: readerConfig{BufferSize: 32},
gzipDisabled: tc.gzipDisabled,
encodingFactory: encoding.Plain,
readerConfig: readerConfig{BufferSize: 32},
}

f, _, truncated, err := inp.openFile(
Expand Down
Loading
Loading