open-telemetry · carsonip · Apr 23, 2024 · Apr 23, 2024 · Apr 23, 2024 · Apr 23, 2024
@@ -0,0 +1,33 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: deprecation
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: elasticsearchexporter
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Improve reliability when used with persistent queue. Deprecate config options `flush.*`, use `batcher.*` instead.
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [32377]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext: |
+  Move buffering from bulk indexer to batch sender to improve reliability.
-  Move buffering from bulk indexer to batch sender to improve reliability.
+  Moves buffering from bulk indexer to batch sender to improve reliability.
-  Move buffering from bulk indexer to batch sender to improve reliability.
+  Moves buffering from bulk indexer to batch sender to improve reliability.
+  With this change, there should be no event loss when used with persistent queue in the event of a collector crash.
+  Introduce `batcher.*` to configure the batch sender which is now enabled by default.
+  Option `flush.bytes` is deprecated. Use the new `batcher.min_size_items` option to control the minimum number of items (log records, spans) to trigger a flush. `batcher.min_size_items` will be set to the value of `flush.bytes` / 1000 if `flush.bytes` is non-zero.
+  Option `flush.interval` is deprecated. Use the new `batcher.flush_timeout` option to control max age of buffer. `batcher.flush_timeout` will be set to the value of `flush.interval` if `flush.interval` is non-zero.
+  Queue sender `sending_queue.enabled` defaults to `true`.
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: [user]
@@ -78,7 +78,20 @@ All other defaults are as defined by [confighttp].
 
 ### Queuing
 
-The Elasticsearch exporter supports the common [`sending_queue` settings][exporterhelper]. However, the sending queue is currently disabled by default.
+The Elasticsearch exporter supports the common [`sending_queue` settings][exporterhelper]. The sending queue is enabled by default.
+
+Default `num_consumers` is `100`. 
+
+When persistent queue is used, there should be no event loss even on collector crashes.
+
+### Batching
+
+The Elasticsearch exporter supports the common `batcher` settings.
+
+- `enabled` (default=true): Enable batching of requests into a single bulk request.
+- `min_size_items` (default=5000): Minimum number of log records / spans in the buffer to trigger a flush immediately.
+- `max_size_items` (default=10000): Maximum number of log records / spans in a request.
+- `flush_timeout` (default=30s): Maximum time of the oldest item spent inside the buffer, aka "max age of buffer". A flush will happen regardless of the size of content in buffer.
 
 ### Elasticsearch document routing
 
@@ -160,10 +173,10 @@ This can be configured through the following settings:
 The Elasticsearch exporter uses the [Elasticsearch Bulk API] for indexing documents.
 The behaviour of this bulk indexing can be configured with the following settings:
 
-- `num_workers` (default=runtime.NumCPU()): Number of workers publishing bulk requests concurrently.
+- `num_workers` (default=runtime.NumCPU()): Maximum number of concurrent bulk requests.
 - `flush`: Event bulk indexer buffer flush settings
-  - `bytes` (default=5000000): Write buffer flush size limit.
-  - `interval` (default=30s): Write buffer flush time limit.
+  - `bytes` (DEPRECATED, use `batcher.min_size_items` instead): Write buffer flush size limit.
+  - `interval` (DEPRECATED, use `batcher.flush_timeout` instead): Maximum time of the oldest item spent inside the buffer, aka "max age of buffer". A flush will happen regardless of the size of content in buffer.
 - `retry`: Elasticsearch bulk request retry settings
   - `enabled` (default=true): Enable/Disable request retry on error. Failed requests are retried with exponential backoff.
   - `max_requests` (default=3): Number of HTTP request retries.

@@ -14,13 +14,19 @@ import (
 
 	"go.opentelemetry.io/collector/config/confighttp"
 	"go.opentelemetry.io/collector/config/configopaque"
+	"go.opentelemetry.io/collector/exporter/exporterbatcher"
 	"go.opentelemetry.io/collector/exporter/exporterhelper"
 	"go.uber.org/zap"
 )
 
 // Config defines configuration for Elastic exporter.
 type Config struct {
 	exporterhelper.QueueSettings `mapstructure:"sending_queue"`
+
+	// Experimental: This configuration is at the early stage of development and may change without backward compatibility
+	// until https://github.com/open-telemetry/opentelemetry-collector/issues/8122 is resolved.
+	BatcherConfig exporterbatcher.Config `mapstructure:"batcher"`
+
 	// Endpoints holds the Elasticsearch URLs the exporter should send events to.
 	//
 	// This setting is required if CloudID is not set and if the
@@ -69,7 +75,7 @@ type Config struct {
 	Authentication          AuthenticationSettings `mapstructure:",squash"`
 	Discovery               DiscoverySettings      `mapstructure:"discover"`
 	Retry                   RetrySettings          `mapstructure:"retry"`
-	Flush                   FlushSettings          `mapstructure:"flush"`
+	Flush                   FlushSettings          `mapstructure:"flush"` // Deprecated: use `batcher` instead.
 	Mapping                 MappingsSettings       `mapstructure:"mapping"`
 	LogstashFormat          LogstashFormatSettings `mapstructure:"logstash_format"`
 
@@ -131,9 +137,13 @@ type DiscoverySettings struct {
 // all events already serialized into the send-buffer.
 type FlushSettings struct {
 	// Bytes sets the send buffer flushing limit.
+	//
+	// Deprecated: Use `batcher.min_size_items` instead.
 	Bytes int `mapstructure:"bytes"`
 
 	// Interval configures the max age of a document in the send buffer.
+	//
+	// Deprecated: Use `batcher.flush_timeout` instead.
 	Interval time.Duration `mapstructure:"interval"`
 }
 

@@ -16,7 +16,9 @@ import (
 	"go.opentelemetry.io/collector/config/confighttp"
 	"go.opentelemetry.io/collector/config/configopaque"
 	"go.opentelemetry.io/collector/confmap/confmaptest"
+	"go.opentelemetry.io/collector/exporter/exporterbatcher"
 	"go.opentelemetry.io/collector/exporter/exporterhelper"
+	"go.opentelemetry.io/collector/exporter/exporterqueue"
 
 	"github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/metadata"
 )
@@ -53,9 +55,9 @@ func TestConfig(t *testing.T) {
 			configFile: "config.yaml",
 			expected: &Config{
 				QueueSettings: exporterhelper.QueueSettings{
-					Enabled:      false,
-					NumConsumers: exporterhelper.NewDefaultQueueSettings().NumConsumers,
-					QueueSize:    exporterhelper.NewDefaultQueueSettings().QueueSize,
+					Enabled:      true,
+					NumConsumers: 100,
+					QueueSize:    exporterqueue.NewDefaultConfig().QueueSize,
 				},
 				Endpoints: []string{"https://elastic.example.com:9200"},
 				Index:     "",
@@ -88,8 +90,11 @@ func TestConfig(t *testing.T) {
 				Discovery: DiscoverySettings{
 					OnStart: true,
 				},
-				Flush: FlushSettings{
-					Bytes: 10485760,
+				BatcherConfig: exporterbatcher.Config{
+					Enabled:       true,
+					FlushTimeout:  5 * time.Second,
+					MinSizeConfig: exporterbatcher.MinSizeConfig{MinSizeItems: 100},
+					MaxSizeConfig: exporterbatcher.MaxSizeConfig{MaxSizeItems: 200},
 				},
 				Retry: RetrySettings{
 					Enabled:         true,
@@ -108,16 +113,17 @@ func TestConfig(t *testing.T) {
 					PrefixSeparator: "-",
 					DateFormat:      "%Y.%m.%d",
 				},
+				NumWorkers: 1,
 			},
 		},
 		{
 			id:         component.NewIDWithName(metadata.Type, "log"),
 			configFile: "config.yaml",
 			expected: &Config{
 				QueueSettings: exporterhelper.QueueSettings{
-					Enabled:      true,
-					NumConsumers: exporterhelper.NewDefaultQueueSettings().NumConsumers,
-					QueueSize:    exporterhelper.NewDefaultQueueSettings().QueueSize,
+					Enabled:      false,
+					NumConsumers: 100,
+					QueueSize:    exporterqueue.NewDefaultConfig().QueueSize,
 				},
 				Endpoints: []string{"http://localhost:9200"},
 				Index:     "",
@@ -150,8 +156,11 @@ func TestConfig(t *testing.T) {
 				Discovery: DiscoverySettings{
 					OnStart: true,
 				},
-				Flush: FlushSettings{
-					Bytes: 10485760,
+				BatcherConfig: exporterbatcher.Config{
+					Enabled:       true,
+					FlushTimeout:  5 * time.Second,
+					MinSizeConfig: exporterbatcher.MinSizeConfig{MinSizeItems: 100},
+					MaxSizeConfig: exporterbatcher.MaxSizeConfig{MaxSizeItems: 200},
 				},
 				Retry: RetrySettings{
 					Enabled:         true,
@@ -170,6 +179,7 @@ func TestConfig(t *testing.T) {
 					PrefixSeparator: "-",
 					DateFormat:      "%Y.%m.%d",
 				},
+				NumWorkers: 1,
 			},
 		},
 		{
@@ -178,7 +188,7 @@ func TestConfig(t *testing.T) {
 			expected: &Config{
 				QueueSettings: exporterhelper.QueueSettings{
 					Enabled:      true,
-					NumConsumers: exporterhelper.NewDefaultQueueSettings().NumConsumers,
+					NumConsumers: 100,
 					QueueSize:    exporterhelper.NewDefaultQueueSettings().QueueSize,
 				},
 				Endpoints: []string{"http://localhost:9200"},
@@ -212,8 +222,11 @@ func TestConfig(t *testing.T) {
 				Discovery: DiscoverySettings{
 					OnStart: true,
 				},
-				Flush: FlushSettings{
-					Bytes: 10485760,
+				BatcherConfig: exporterbatcher.Config{
+					Enabled:       true,
+					FlushTimeout:  5 * time.Second,
+					MinSizeConfig: exporterbatcher.MinSizeConfig{MinSizeItems: 100},
+					MaxSizeConfig: exporterbatcher.MaxSizeConfig{MaxSizeItems: 200},
 				},
 				Retry: RetrySettings{
 					Enabled:         true,
@@ -232,6 +245,7 @@ func TestConfig(t *testing.T) {
 					PrefixSeparator: "-",
 					DateFormat:      "%Y.%m.%d",
 				},
+				NumWorkers: 1,
 			},
 		},
 		{