From 3cd390395a820b917b976d448f477c656d22c96d Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 23 Apr 2024 12:42:53 +0800 Subject: [PATCH 001/117] WIP --- exporter/elasticsearchexporter/factory.go | 55 ++++++++++++++++--- .../elasticsearchexporter/logs_exporter.go | 54 ++++++++++++++++++ exporter/elasticsearchexporter/request.go | 38 +++++++++++++ 3 files changed, 140 insertions(+), 7 deletions(-) create mode 100644 exporter/elasticsearchexporter/request.go diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index f50a8e614ecd..2ab9ab075a95 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -14,7 +14,9 @@ import ( "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/exporter" + "go.opentelemetry.io/collector/exporter/exporterbatcher" "go.opentelemetry.io/collector/exporter/exporterhelper" + "go.opentelemetry.io/collector/exporter/exporterqueue" "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/metadata" ) @@ -31,7 +33,7 @@ func NewFactory() exporter.Factory { return exporter.NewFactory( metadata.Type, createDefaultConfig, - exporter.WithLogs(createLogsExporter, metadata.LogsStability), + exporter.WithLogs(createLogsRequestExporter, metadata.LogsStability), exporter.WithTraces(createTracesExporter, metadata.TracesStability), ) } @@ -73,10 +75,10 @@ func createDefaultConfig() component.Config { } } -// createLogsExporter creates a new exporter for logs. +// createLogsRequestExporter creates a new request exporter for logs. // // Logs are directly indexed into Elasticsearch. -func createLogsExporter( +func createLogsRequestExporter( ctx context.Context, set exporter.CreateSettings, cfg component.Config, @@ -93,13 +95,52 @@ func createLogsExporter( return nil, fmt.Errorf("cannot configure Elasticsearch logsExporter: %w", err) } - return exporterhelper.NewLogsExporter( + batchMergeFunc := func(ctx context.Context, r1, r2 exporterhelper.Request) (exporterhelper.Request, error) { + rr1 := r1.(*Request) + rr2 := r2.(*Request) + req := newRequest(logsExporter.bulkIndexer, logsExporter.mu) + req.Items = append(rr1.Items, rr2.Items...) + return req, nil + } + + batchMergeSplitFunc := func(ctx context.Context, conf exporterbatcher.MaxSizeConfig, optReq, req exporterhelper.Request) ([]exporterhelper.Request, error) { + // FIXME: implement merge split func + panic("not implemented") + return nil, nil + } + + marshalRequest := func(req exporterhelper.Request) ([]byte, error) { + b, err := json.Marshal(*req.(*Request)) + return b, err + } + + unmarshalRequest := func(b []byte) (exporterhelper.Request, error) { + var req Request + err := json.Unmarshal(b, &req) + req.bulkIndexer = logsExporter.bulkIndexer + req.mu = logsExporter.mu + return &req, err + } + + batcherCfg := exporterbatcher.NewDefaultConfig() + + // FIXME: is this right? + queueCfg := exporterqueue.NewDefaultConfig() + queueCfg.Enabled = cf.QueueSettings.Enabled + queueCfg.NumConsumers = cf.QueueSettings.NumConsumers + queueCfg.QueueSize = cf.QueueSettings.QueueSize + + return exporterhelper.NewLogsRequestExporter( ctx, set, - cfg, - logsExporter.pushLogsData, + logsExporter.logsDataToRequest, + exporterhelper.WithBatcher(batcherCfg, exporterhelper.WithRequestBatchFuncs(batchMergeFunc, batchMergeSplitFunc)), exporterhelper.WithShutdown(logsExporter.Shutdown), - exporterhelper.WithQueue(cf.QueueSettings), + exporterhelper.WithRequestQueue(queueCfg, + exporterqueue.NewPersistentQueueFactory[exporterhelper.Request](cf.QueueSettings.StorageID, exporterqueue.PersistentQueueSettings[exporterhelper.Request]{ + Marshaler: marshalRequest, + Unmarshaler: unmarshalRequest, + })), ) } diff --git a/exporter/elasticsearchexporter/logs_exporter.go b/exporter/elasticsearchexporter/logs_exporter.go index f7ab2ea8a58f..af82f58bca34 100644 --- a/exporter/elasticsearchexporter/logs_exporter.go +++ b/exporter/elasticsearchexporter/logs_exporter.go @@ -11,6 +11,7 @@ import ( "fmt" "time" + "go.opentelemetry.io/collector/exporter/exporterhelper" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/plog" "go.uber.org/zap" @@ -70,6 +71,59 @@ func (e *elasticsearchLogsExporter) Shutdown(ctx context.Context) error { return e.bulkIndexer.Close(ctx) } +func (e *elasticsearchLogsExporter) logsDataToRequest(ctx context.Context, ld plog.Logs) (exporterhelper.Request, error) { + request := newRequest(e.bulkIndexer) + + var errs []error + + rls := ld.ResourceLogs() + for i := 0; i < rls.Len(); i++ { + rl := rls.At(i) + resource := rl.Resource() + ills := rl.ScopeLogs() + for j := 0; j < ills.Len(); j++ { + scope := ills.At(j).Scope() + logs := ills.At(j).LogRecords() + for k := 0; k < logs.Len(); k++ { + if err := e.appendLogRecord(ctx, request, resource, logs.At(k), scope); err != nil { + if cerr := ctx.Err(); cerr != nil { + return request, cerr + } + + errs = append(errs, err) + } + } + } + } + + return request, errors.Join(errs...) +} + +func (e *elasticsearchLogsExporter) appendLogRecord(ctx context.Context, req *Request, resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) error { + fIndex := e.index + if e.dynamicIndex { + prefix := getFromAttributes(indexPrefix, resource, scope, record) + suffix := getFromAttributes(indexSuffix, resource, scope, record) + + fIndex = fmt.Sprintf("%s%s%s", prefix, fIndex, suffix) + } + + if e.logstashFormat.Enabled { + formattedIndex, err := generateIndexWithLogstashFormat(fIndex, &e.logstashFormat, time.Now()) + if err != nil { + return err + } + fIndex = formattedIndex + } + + document, err := e.model.encodeLog(resource, record, scope) + if err != nil { + return fmt.Errorf("Failed to encode log event: %w", err) + } + req.Add(fIndex, document) + return nil +} + func (e *elasticsearchLogsExporter) pushLogsData(ctx context.Context, ld plog.Logs) error { var errs []error diff --git a/exporter/elasticsearchexporter/request.go b/exporter/elasticsearchexporter/request.go new file mode 100644 index 000000000000..18c5af8a38a4 --- /dev/null +++ b/exporter/elasticsearchexporter/request.go @@ -0,0 +1,38 @@ +package elasticsearchexporter + +import ( + "bytes" + "context" +) + +type Request struct { + bulkIndexer *esBulkIndexerCurrent + Items []BulkIndexerItem +} + +func newRequest(bulkIndexer *esBulkIndexerCurrent) *Request { + return &Request{bulkIndexer: bulkIndexer} +} + +func (r *Request) Export(ctx context.Context) error { + for _, item := range r.Items { + if err := r.bulkIndexer.Add(ctx, item.Index, bytes.NewReader(item.Body)); err != nil { + return err // FIXME: merge errors + } + } + _, err := r.bulkIndexer.Flush(ctx) + return err +} + +func (r *Request) ItemsCount() int { + return len(r.Items) +} + +func (r *Request) Add(index string, body []byte) { + r.Items = append(r.Items, BulkIndexerItem{Index: index, Body: body}) +} + +type BulkIndexerItem struct { + Index string + Body []byte +} From 4341a378d77a72f393ab378b87602bc291d65ca5 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 23 Apr 2024 14:57:22 +0800 Subject: [PATCH 002/117] Pass integration test --- .../elasticsearch_bulk.go | 92 +++++++++---------- exporter/elasticsearchexporter/factory.go | 4 +- .../elasticsearchexporter/logs_exporter.go | 55 +---------- exporter/elasticsearchexporter/request.go | 11 ++- .../elasticsearchexporter/trace_exporter.go | 4 + 5 files changed, 60 insertions(+), 106 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index e52a4cd5d232..486615764335 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -6,11 +6,9 @@ package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter" import ( - "bytes" "context" "crypto/tls" "fmt" - "io" "net/http" "runtime" "sync" @@ -162,10 +160,6 @@ func createElasticsearchBackoffFunc(config *RetrySettings) func(int) time.Durati } } -func pushDocuments(ctx context.Context, index string, document []byte, bulkIndexer *esBulkIndexerCurrent) error { - return bulkIndexer.Add(ctx, index, bytes.NewReader(document)) -} - func newBulkIndexer(logger *zap.Logger, client *elasticsearch7.Client, config *Config) (*esBulkIndexerCurrent, error) { numWorkers := config.NumWorkers if numWorkers == 0 { @@ -190,9 +184,10 @@ func newBulkIndexer(logger *zap.Logger, client *elasticsearch7.Client, config *C } pool := &bulkIndexerPool{ - wg: sync.WaitGroup{}, - items: make(chan esBulkIndexerItem, config.NumWorkers), - stats: bulkIndexerStats{}, + wg: sync.WaitGroup{}, + closeCh: make(chan struct{}), + stats: bulkIndexerStats{}, + available: make(chan *worker, numWorkers), } pool.wg.Add(numWorkers) @@ -208,10 +203,9 @@ func newBulkIndexer(logger *zap.Logger, client *elasticsearch7.Client, config *C } w := worker{ indexer: bi, - items: pool.items, + closeCh: pool.closeCh, flushInterval: flushInterval, flushTimeout: config.Timeout, - flushBytes: flushBytes, logger: logger, stats: &pool.stats, } @@ -219,6 +213,7 @@ func newBulkIndexer(logger *zap.Logger, client *elasticsearch7.Client, config *C defer pool.wg.Done() w.run() }() + pool.available <- &w } return pool, nil } @@ -228,30 +223,27 @@ type bulkIndexerStats struct { } type bulkIndexerPool struct { - items chan esBulkIndexerItem - wg sync.WaitGroup - stats bulkIndexerStats + closeCh chan struct{} + wg sync.WaitGroup + stats bulkIndexerStats + available chan *worker } -// Add adds an item to the bulk indexer pool. -// -// Adding an item after a call to Close() will panic. -func (p *bulkIndexerPool) Add(ctx context.Context, index string, document io.WriterTo) error { - item := esBulkIndexerItem{ - Index: index, - Body: document, - } +func (p *bulkIndexerPool) AddBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem) error { select { case <-ctx.Done(): return ctx.Err() - case p.items <- item: - return nil + case worker := <-p.available: + defer func() { + p.available <- worker + }() + return worker.addBatchAndFlush(batch) } } -// Close closes the items channel and waits for the workers to drain it. +// Close closes the closeCh channel and wait for workers to finish. func (p *bulkIndexerPool) Close(ctx context.Context) error { - close(p.items) + close(p.closeCh) doneCh := make(chan struct{}) go func() { p.wg.Wait() @@ -267,46 +259,53 @@ func (p *bulkIndexerPool) Close(ctx context.Context) error { type worker struct { indexer *docappender.BulkIndexer - items <-chan esBulkIndexerItem + closeCh <-chan struct{} flushInterval time.Duration flushTimeout time.Duration - flushBytes int + //flushBytes int + mu sync.Mutex stats *bulkIndexerStats logger *zap.Logger } +func (w *worker) addBatchAndFlush(batch []esBulkIndexerItem) error { + w.mu.Lock() + defer w.mu.Unlock() + for _, item := range batch { + if err := w.indexer.Add(item); err != nil { + w.logger.Error("error adding item to bulk indexer", zap.Error(err)) + } + } + return w.flush() +} + func (w *worker) run() { flushTick := time.NewTicker(w.flushInterval) defer flushTick.Stop() for { select { - case item, ok := <-w.items: - // if channel is closed, flush and return - if !ok { - w.flush() - return - } - - if err := w.indexer.Add(item); err != nil { - w.logger.Error("error adding item to bulk indexer", zap.Error(err)) - } - - // w.indexer.Len() can be either compressed or uncompressed bytes - if w.indexer.Len() >= w.flushBytes { - w.flush() - flushTick.Reset(w.flushInterval) - } case <-flushTick.C: + w.mu.Lock() // bulk indexer needs to be flushed every flush interval because // there may be pending bytes in bulk indexer buffer due to e.g. document level 429 - w.flush() + if err := w.flush(); err != nil { + w.logger.Error("bulk indexer background flush error", zap.Error(err)) + } + w.mu.Unlock() + case <-w.closeCh: + w.mu.Lock() + if err := w.flush(); err != nil { + w.logger.Error("bulk indexer background flush error", zap.Error(err)) + } + return + // no need to unlock } } } -func (w *worker) flush() { +func (w *worker) flush() error { ctx, cancel := context.WithTimeout(context.Background(), w.flushTimeout) defer cancel() stat, err := w.indexer.Flush(ctx) @@ -318,4 +317,5 @@ func (w *worker) flush() { w.logger.Error(fmt.Sprintf("Drop docs: failed to index: %#v", resp.Error), zap.Int("status", resp.Status)) } + return err } diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 2ab9ab075a95..21e1e787a90d 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -7,6 +7,7 @@ package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry import ( "context" + "encoding/json" "fmt" "net/http" "runtime" @@ -98,7 +99,7 @@ func createLogsRequestExporter( batchMergeFunc := func(ctx context.Context, r1, r2 exporterhelper.Request) (exporterhelper.Request, error) { rr1 := r1.(*Request) rr2 := r2.(*Request) - req := newRequest(logsExporter.bulkIndexer, logsExporter.mu) + req := newRequest(logsExporter.bulkIndexer) req.Items = append(rr1.Items, rr2.Items...) return req, nil } @@ -118,7 +119,6 @@ func createLogsRequestExporter( var req Request err := json.Unmarshal(b, &req) req.bulkIndexer = logsExporter.bulkIndexer - req.mu = logsExporter.mu return &req, err } diff --git a/exporter/elasticsearchexporter/logs_exporter.go b/exporter/elasticsearchexporter/logs_exporter.go index af82f58bca34..585f78148d6c 100644 --- a/exporter/elasticsearchexporter/logs_exporter.go +++ b/exporter/elasticsearchexporter/logs_exporter.go @@ -85,7 +85,7 @@ func (e *elasticsearchLogsExporter) logsDataToRequest(ctx context.Context, ld pl scope := ills.At(j).Scope() logs := ills.At(j).LogRecords() for k := 0; k < logs.Len(); k++ { - if err := e.appendLogRecord(ctx, request, resource, logs.At(k), scope); err != nil { + if err := e.addLogRecordToRequest(ctx, request, resource, logs.At(k), scope); err != nil { if cerr := ctx.Err(); cerr != nil { return request, cerr } @@ -99,7 +99,7 @@ func (e *elasticsearchLogsExporter) logsDataToRequest(ctx context.Context, ld pl return request, errors.Join(errs...) } -func (e *elasticsearchLogsExporter) appendLogRecord(ctx context.Context, req *Request, resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) error { +func (e *elasticsearchLogsExporter) addLogRecordToRequest(ctx context.Context, req *Request, resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) error { fIndex := e.index if e.dynamicIndex { prefix := getFromAttributes(indexPrefix, resource, scope, record) @@ -123,54 +123,3 @@ func (e *elasticsearchLogsExporter) appendLogRecord(ctx context.Context, req *Re req.Add(fIndex, document) return nil } - -func (e *elasticsearchLogsExporter) pushLogsData(ctx context.Context, ld plog.Logs) error { - var errs []error - - rls := ld.ResourceLogs() - for i := 0; i < rls.Len(); i++ { - rl := rls.At(i) - resource := rl.Resource() - ills := rl.ScopeLogs() - for j := 0; j < ills.Len(); j++ { - ill := ills.At(j) - scope := ill.Scope() - logs := ill.LogRecords() - for k := 0; k < logs.Len(); k++ { - if err := e.pushLogRecord(ctx, resource, logs.At(k), scope); err != nil { - if cerr := ctx.Err(); cerr != nil { - return cerr - } - - errs = append(errs, err) - } - } - } - } - - return errors.Join(errs...) -} - -func (e *elasticsearchLogsExporter) pushLogRecord(ctx context.Context, resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) error { - fIndex := e.index - if e.dynamicIndex { - prefix := getFromAttributes(indexPrefix, resource, scope, record) - suffix := getFromAttributes(indexSuffix, resource, scope, record) - - fIndex = fmt.Sprintf("%s%s%s", prefix, fIndex, suffix) - } - - if e.logstashFormat.Enabled { - formattedIndex, err := generateIndexWithLogstashFormat(fIndex, &e.logstashFormat, time.Now()) - if err != nil { - return err - } - fIndex = formattedIndex - } - - document, err := e.model.encodeLog(resource, record, scope) - if err != nil { - return fmt.Errorf("Failed to encode log event: %w", err) - } - return pushDocuments(ctx, fIndex, document, e.bulkIndexer) -} diff --git a/exporter/elasticsearchexporter/request.go b/exporter/elasticsearchexporter/request.go index 18c5af8a38a4..394c96cf3819 100644 --- a/exporter/elasticsearchexporter/request.go +++ b/exporter/elasticsearchexporter/request.go @@ -15,13 +15,14 @@ func newRequest(bulkIndexer *esBulkIndexerCurrent) *Request { } func (r *Request) Export(ctx context.Context) error { - for _, item := range r.Items { - if err := r.bulkIndexer.Add(ctx, item.Index, bytes.NewReader(item.Body)); err != nil { - return err // FIXME: merge errors + batch := make([]esBulkIndexerItem, len(r.Items)) + for i, item := range r.Items { + batch[i] = esBulkIndexerItem{ + Index: item.Index, + Body: bytes.NewReader(item.Body), } } - _, err := r.bulkIndexer.Flush(ctx) - return err + return r.bulkIndexer.AddBatchAndFlush(ctx, batch) } func (r *Request) ItemsCount() int { diff --git a/exporter/elasticsearchexporter/trace_exporter.go b/exporter/elasticsearchexporter/trace_exporter.go index 073bed4d8b6a..4c3ce5bbd3b8 100644 --- a/exporter/elasticsearchexporter/trace_exporter.go +++ b/exporter/elasticsearchexporter/trace_exporter.go @@ -117,3 +117,7 @@ func (e *elasticsearchTracesExporter) pushTraceRecord(ctx context.Context, resou } return pushDocuments(ctx, fIndex, document, e.bulkIndexer) } + +func pushDocuments(ctx context.Context, index string, document []byte, current *esBulkIndexerCurrent) error { + return fmt.Errorf("not implemented") +} From 5c042040fe798a891f1378c35ed21ba234083950 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 23 Apr 2024 15:02:28 +0800 Subject: [PATCH 003/117] Refactor --- exporter/elasticsearchexporter/logs_exporter.go | 17 +++++++++++------ exporter/elasticsearchexporter/request.go | 4 ++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/exporter/elasticsearchexporter/logs_exporter.go b/exporter/elasticsearchexporter/logs_exporter.go index 585f78148d6c..f075b82b83d7 100644 --- a/exporter/elasticsearchexporter/logs_exporter.go +++ b/exporter/elasticsearchexporter/logs_exporter.go @@ -85,13 +85,16 @@ func (e *elasticsearchLogsExporter) logsDataToRequest(ctx context.Context, ld pl scope := ills.At(j).Scope() logs := ills.At(j).LogRecords() for k := 0; k < logs.Len(); k++ { - if err := e.addLogRecordToRequest(ctx, request, resource, logs.At(k), scope); err != nil { + item, err := e.logRecordToItem(ctx, resource, logs.At(k), scope) + if err != nil { if cerr := ctx.Err(); cerr != nil { return request, cerr } errs = append(errs, err) + continue } + request.Add(item) } } } @@ -99,7 +102,7 @@ func (e *elasticsearchLogsExporter) logsDataToRequest(ctx context.Context, ld pl return request, errors.Join(errs...) } -func (e *elasticsearchLogsExporter) addLogRecordToRequest(ctx context.Context, req *Request, resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) error { +func (e *elasticsearchLogsExporter) logRecordToItem(ctx context.Context, resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) (BulkIndexerItem, error) { fIndex := e.index if e.dynamicIndex { prefix := getFromAttributes(indexPrefix, resource, scope, record) @@ -111,15 +114,17 @@ func (e *elasticsearchLogsExporter) addLogRecordToRequest(ctx context.Context, r if e.logstashFormat.Enabled { formattedIndex, err := generateIndexWithLogstashFormat(fIndex, &e.logstashFormat, time.Now()) if err != nil { - return err + return BulkIndexerItem{}, err } fIndex = formattedIndex } document, err := e.model.encodeLog(resource, record, scope) if err != nil { - return fmt.Errorf("Failed to encode log event: %w", err) + return BulkIndexerItem{}, fmt.Errorf("Failed to encode log event: %w", err) } - req.Add(fIndex, document) - return nil + return BulkIndexerItem{ + Index: fIndex, + Body: document, + }, nil } diff --git a/exporter/elasticsearchexporter/request.go b/exporter/elasticsearchexporter/request.go index 394c96cf3819..2d16e1168405 100644 --- a/exporter/elasticsearchexporter/request.go +++ b/exporter/elasticsearchexporter/request.go @@ -29,8 +29,8 @@ func (r *Request) ItemsCount() int { return len(r.Items) } -func (r *Request) Add(index string, body []byte) { - r.Items = append(r.Items, BulkIndexerItem{Index: index, Body: body}) +func (r *Request) Add(item BulkIndexerItem) { + r.Items = append(r.Items, item) } type BulkIndexerItem struct { From 39f0fa14d679e8566ea0dec44b0768c3c06bbc75 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 23 Apr 2024 15:02:50 +0800 Subject: [PATCH 004/117] Unexport BulkIndexerItem --- exporter/elasticsearchexporter/logs_exporter.go | 8 ++++---- exporter/elasticsearchexporter/request.go | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/exporter/elasticsearchexporter/logs_exporter.go b/exporter/elasticsearchexporter/logs_exporter.go index f075b82b83d7..d3dd5ac86154 100644 --- a/exporter/elasticsearchexporter/logs_exporter.go +++ b/exporter/elasticsearchexporter/logs_exporter.go @@ -102,7 +102,7 @@ func (e *elasticsearchLogsExporter) logsDataToRequest(ctx context.Context, ld pl return request, errors.Join(errs...) } -func (e *elasticsearchLogsExporter) logRecordToItem(ctx context.Context, resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) (BulkIndexerItem, error) { +func (e *elasticsearchLogsExporter) logRecordToItem(ctx context.Context, resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) (bulkIndexerItem, error) { fIndex := e.index if e.dynamicIndex { prefix := getFromAttributes(indexPrefix, resource, scope, record) @@ -114,16 +114,16 @@ func (e *elasticsearchLogsExporter) logRecordToItem(ctx context.Context, resourc if e.logstashFormat.Enabled { formattedIndex, err := generateIndexWithLogstashFormat(fIndex, &e.logstashFormat, time.Now()) if err != nil { - return BulkIndexerItem{}, err + return bulkIndexerItem{}, err } fIndex = formattedIndex } document, err := e.model.encodeLog(resource, record, scope) if err != nil { - return BulkIndexerItem{}, fmt.Errorf("Failed to encode log event: %w", err) + return bulkIndexerItem{}, fmt.Errorf("Failed to encode log event: %w", err) } - return BulkIndexerItem{ + return bulkIndexerItem{ Index: fIndex, Body: document, }, nil diff --git a/exporter/elasticsearchexporter/request.go b/exporter/elasticsearchexporter/request.go index 2d16e1168405..59e62c9772c8 100644 --- a/exporter/elasticsearchexporter/request.go +++ b/exporter/elasticsearchexporter/request.go @@ -7,7 +7,7 @@ import ( type Request struct { bulkIndexer *esBulkIndexerCurrent - Items []BulkIndexerItem + Items []bulkIndexerItem } func newRequest(bulkIndexer *esBulkIndexerCurrent) *Request { @@ -29,11 +29,11 @@ func (r *Request) ItemsCount() int { return len(r.Items) } -func (r *Request) Add(item BulkIndexerItem) { +func (r *Request) Add(item bulkIndexerItem) { r.Items = append(r.Items, item) } -type BulkIndexerItem struct { +type bulkIndexerItem struct { Index string Body []byte } From bc692f07eff25378dbedbe197dd2b3a89ec7fc31 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 23 Apr 2024 15:07:17 +0800 Subject: [PATCH 005/117] Unexport Request --- exporter/elasticsearchexporter/factory.go | 8 ++++---- exporter/elasticsearchexporter/request.go | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 21e1e787a90d..474559423bea 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -97,8 +97,8 @@ func createLogsRequestExporter( } batchMergeFunc := func(ctx context.Context, r1, r2 exporterhelper.Request) (exporterhelper.Request, error) { - rr1 := r1.(*Request) - rr2 := r2.(*Request) + rr1 := r1.(*request) + rr2 := r2.(*request) req := newRequest(logsExporter.bulkIndexer) req.Items = append(rr1.Items, rr2.Items...) return req, nil @@ -111,12 +111,12 @@ func createLogsRequestExporter( } marshalRequest := func(req exporterhelper.Request) ([]byte, error) { - b, err := json.Marshal(*req.(*Request)) + b, err := json.Marshal(*req.(*request)) return b, err } unmarshalRequest := func(b []byte) (exporterhelper.Request, error) { - var req Request + var req request err := json.Unmarshal(b, &req) req.bulkIndexer = logsExporter.bulkIndexer return &req, err diff --git a/exporter/elasticsearchexporter/request.go b/exporter/elasticsearchexporter/request.go index 59e62c9772c8..5b489fc70258 100644 --- a/exporter/elasticsearchexporter/request.go +++ b/exporter/elasticsearchexporter/request.go @@ -5,16 +5,16 @@ import ( "context" ) -type Request struct { +type request struct { bulkIndexer *esBulkIndexerCurrent Items []bulkIndexerItem } -func newRequest(bulkIndexer *esBulkIndexerCurrent) *Request { - return &Request{bulkIndexer: bulkIndexer} +func newRequest(bulkIndexer *esBulkIndexerCurrent) *request { + return &request{bulkIndexer: bulkIndexer} } -func (r *Request) Export(ctx context.Context) error { +func (r *request) Export(ctx context.Context) error { batch := make([]esBulkIndexerItem, len(r.Items)) for i, item := range r.Items { batch[i] = esBulkIndexerItem{ @@ -25,11 +25,11 @@ func (r *Request) Export(ctx context.Context) error { return r.bulkIndexer.AddBatchAndFlush(ctx, batch) } -func (r *Request) ItemsCount() int { +func (r *request) ItemsCount() int { return len(r.Items) } -func (r *Request) Add(item bulkIndexerItem) { +func (r *request) Add(item bulkIndexerItem) { r.Items = append(r.Items, item) } From c5fd34c0d8a78477c848185a7b18a6643f84c795 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 23 Apr 2024 16:34:33 +0800 Subject: [PATCH 006/117] Fix es bulk test --- .../elasticsearch_bulk_test.go | 105 +++++++++--------- 1 file changed, 55 insertions(+), 50 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk_test.go b/exporter/elasticsearchexporter/elasticsearch_bulk_test.go index 020d29fae623..5a4a51e74a1c 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk_test.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk_test.go @@ -16,8 +16,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/zap" - "go.uber.org/zap/zapcore" - "go.uber.org/zap/zaptest/observer" ) var defaultRoundTripFunc = func(*http.Request) (*http.Response, error) { @@ -50,7 +48,7 @@ const successResp = `{ ] }` -func TestBulkIndexer_flushOnClose(t *testing.T) { +func TestBulkIndexer_addBatchAndFlush(t *testing.T) { cfg := Config{NumWorkers: 1, Flush: FlushSettings{Interval: time.Hour, Bytes: 2 << 30}} client, err := elasticsearch.NewClient(elasticsearch.Config{Transport: &mockTransport{ RoundTripFunc: func(*http.Request) (*http.Response, error) { @@ -63,51 +61,56 @@ func TestBulkIndexer_flushOnClose(t *testing.T) { require.NoError(t, err) bulkIndexer, err := newBulkIndexer(zap.NewNop(), client, &cfg) require.NoError(t, err) - assert.NoError(t, bulkIndexer.Add(context.Background(), "foo", strings.NewReader(`{"foo": "bar"}`))) - assert.NoError(t, bulkIndexer.Close(context.Background())) + assert.NoError(t, bulkIndexer.AddBatchAndFlush(context.Background(), + []esBulkIndexerItem{ + { + Index: "foo", + Body: strings.NewReader(`{"foo": "bar"}`), + }, + })) assert.Equal(t, int64(1), bulkIndexer.stats.docsIndexed.Load()) } -func TestBulkIndexer_flush(t *testing.T) { - tests := []struct { - name string - config Config - }{ - { - name: "flush.bytes", - config: Config{NumWorkers: 1, Flush: FlushSettings{Interval: time.Hour, Bytes: 1}}, - }, - { - name: "flush.interval", - config: Config{NumWorkers: 1, Flush: FlushSettings{Interval: 50 * time.Millisecond, Bytes: 2 << 30}}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - client, err := elasticsearch.NewClient(elasticsearch.Config{Transport: &mockTransport{ - RoundTripFunc: func(*http.Request) (*http.Response, error) { - return &http.Response{ - Header: http.Header{"X-Elastic-Product": []string{"Elasticsearch"}}, - Body: io.NopCloser(strings.NewReader(successResp)), - }, nil - }, - }}) - require.NoError(t, err) - bulkIndexer, err := newBulkIndexer(zap.NewNop(), client, &tt.config) - require.NoError(t, err) - assert.NoError(t, bulkIndexer.Add(context.Background(), "foo", strings.NewReader(`{"foo": "bar"}`))) - // should flush - time.Sleep(100 * time.Millisecond) - assert.Equal(t, int64(1), bulkIndexer.stats.docsIndexed.Load()) - assert.NoError(t, bulkIndexer.Close(context.Background())) - }) - } -} +//func TestBulkIndexer_flush(t *testing.T) { +// tests := []struct { +// name string +// config Config +// }{ +// { +// name: "flush.bytes", +// config: Config{NumWorkers: 1, Flush: FlushSettings{Interval: time.Hour, Bytes: 1}}, +// }, +// { +// name: "flush.interval", +// config: Config{NumWorkers: 1, Flush: FlushSettings{Interval: 50 * time.Millisecond, Bytes: 2 << 30}}, +// }, +// } +// +// for _, tt := range tests { +// tt := tt +// t.Run(tt.name, func(t *testing.T) { +// t.Parallel() +// client, err := elasticsearch.NewClient(elasticsearch.Config{Transport: &mockTransport{ +// RoundTripFunc: func(*http.Request) (*http.Response, error) { +// return &http.Response{ +// Header: http.Header{"X-Elastic-Product": []string{"Elasticsearch"}}, +// Body: io.NopCloser(strings.NewReader(successResp)), +// }, nil +// }, +// }}) +// require.NoError(t, err) +// bulkIndexer, err := newBulkIndexer(zap.NewNop(), client, &tt.config) +// require.NoError(t, err) +// assert.NoError(t, bulkIndexer.Add(context.Background(), "foo", strings.NewReader(`{"foo": "bar"}`))) +// // should flush +// time.Sleep(100 * time.Millisecond) +// assert.Equal(t, int64(1), bulkIndexer.stats.docsIndexed.Load()) +// assert.NoError(t, bulkIndexer.Close(context.Background())) +// }) +// } +//} -func TestBulkIndexer_flush_error(t *testing.T) { +func TestBulkIndexer_addBatchAndFlush_error(t *testing.T) { tests := []struct { name string roundTripFunc func(*http.Request) (*http.Response, error) @@ -149,15 +152,17 @@ func TestBulkIndexer_flush_error(t *testing.T) { RoundTripFunc: tt.roundTripFunc, }}) require.NoError(t, err) - core, observed := observer.New(zap.NewAtomicLevelAt(zapcore.DebugLevel)) - bulkIndexer, err := newBulkIndexer(zap.New(core), client, &cfg) + bulkIndexer, err := newBulkIndexer(zap.NewNop(), client, &cfg) require.NoError(t, err) - assert.NoError(t, bulkIndexer.Add(context.Background(), "foo", strings.NewReader(`{"foo": "bar"}`))) - // should flush - time.Sleep(100 * time.Millisecond) + assert.ErrorContains(t, bulkIndexer.AddBatchAndFlush(context.Background(), + []esBulkIndexerItem{ + { + Index: "foo", + Body: strings.NewReader(`{"foo": "bar"}`), + }, + }), "failed to execute the request") assert.Equal(t, int64(0), bulkIndexer.stats.docsIndexed.Load()) assert.NoError(t, bulkIndexer.Close(context.Background())) - assert.Equal(t, 1, observed.FilterMessage("bulk indexer flush error").Len()) }) } } From 7fdf150cd8b10e9540025cb81b3ae290c7bbc31c Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 23 Apr 2024 17:39:06 +0800 Subject: [PATCH 007/117] Fix logs exporter test --- .../logs_exporter_test.go | 41 +++++++++++++------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/exporter/elasticsearchexporter/logs_exporter_test.go b/exporter/elasticsearchexporter/logs_exporter_test.go index 28ce2fb0f624..18c51ae0dd85 100644 --- a/exporter/elasticsearchexporter/logs_exporter_test.go +++ b/exporter/elasticsearchexporter/logs_exporter_test.go @@ -162,8 +162,7 @@ func TestExporter_PushEvent(t *testing.T) { }) exporter := newTestExporter(t, server.URL) - mustSend(t, exporter, `{"message": "test1"}`) - mustSend(t, exporter, `{"message": "test2"}`) + mustSend(t, exporter, `{"message": "test1"}`, `{"message": "test2"}`) rec.WaitItems(2) }) @@ -392,7 +391,6 @@ func TestExporter_PushEvent(t *testing.T) { exporter := newTestExporter(t, server.URL, func(cfg *Config) { *cfg = *testConfig }) mustSend(t, exporter, `{"message": "test1"}`) - time.Sleep(200 * time.Millisecond) assert.Equal(t, int64(1), attempts.Load()) }) } @@ -408,9 +406,9 @@ func TestExporter_PushEvent(t *testing.T) { }) exporter := newTestExporter(t, server.URL) - mustSend(t, exporter, `{"message": "test1"}`) + err := send(t, exporter, `{"message": "test1"}`) + assert.ErrorContains(t, err, "flush failed") - time.Sleep(200 * time.Millisecond) assert.Equal(t, int64(1), attempts.Load()) }) @@ -444,7 +442,6 @@ func TestExporter_PushEvent(t *testing.T) { exporter := newTestExporter(t, server.URL) mustSend(t, exporter, `{"message": "test1"}`) - time.Sleep(200 * time.Millisecond) assert.Equal(t, int64(1), attempts.Load()) }) @@ -482,9 +479,8 @@ func TestExporter_PushEvent(t *testing.T) { cfg.Retry.InitialInterval = 1 * time.Millisecond cfg.Retry.MaxInterval = 10 * time.Millisecond }) - mustSend(t, exporter, `{"message": "test1", "idx": 0}`) - mustSend(t, exporter, `{"message": "test2", "idx": 1}`) - mustSend(t, exporter, `{"message": "test3", "idx": 2}`) + mustSend(t, exporter, `{"message": "test1", "idx": 0}`, + `{"message": "test2", "idx": 1}`, `{"message": "test3", "idx": 2}`) wg.Wait() // <- this blocks forever if the event is not retried @@ -515,8 +511,22 @@ func withTestExporterConfig(fns ...func(*Config)) func(string) *Config { } } -func mustSend(t *testing.T, exporter *elasticsearchLogsExporter, contents string) { - err := pushDocuments(context.TODO(), exporter.index, []byte(contents), exporter.bulkIndexer) +func send(t *testing.T, exporter *elasticsearchLogsExporter, contents ...string) error { + req := request{ + bulkIndexer: exporter.bulkIndexer, + Items: nil, + } + for _, body := range contents { + req.Add(bulkIndexerItem{ + Index: exporter.index, + Body: []byte(body), + }) + } + return req.Export(context.TODO()) +} + +func mustSend(t *testing.T, exporter *elasticsearchLogsExporter, contents ...string) { + err := send(t, exporter, contents...) require.NoError(t, err) } @@ -528,6 +538,13 @@ func mustSendLogsWithAttributes(t *testing.T, exporter *elasticsearchLogsExporte logRecords := scopeLog.LogRecords().At(0) logRecords.Body().SetStr(body) - err := exporter.pushLogRecord(context.TODO(), resSpans.Resource(), logRecords, scopeLog.Scope()) + req := request{ + bulkIndexer: exporter.bulkIndexer, + Items: nil, + } + item, err := exporter.logRecordToItem(context.TODO(), resSpans.Resource(), logRecords, scopeLog.Scope()) + require.NoError(t, err) + req.Add(item) + err = req.Export(context.TODO()) require.NoError(t, err) } From 97d5c192f0080b00d06d7cb008bad30e6d4e0d10 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 23 Apr 2024 18:32:00 +0800 Subject: [PATCH 008/117] Add traces --- exporter/elasticsearchexporter/factory.go | 52 ++++++++++++++++--- .../elasticsearchexporter/logs_exporter.go | 10 ++-- .../logs_exporter_test.go | 2 +- .../elasticsearchexporter/trace_exporter.go | 26 ++++++---- .../traces_exporter_test.go | 37 +++++++++---- 5 files changed, 95 insertions(+), 32 deletions(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 474559423bea..422d59cf6153 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -35,7 +35,7 @@ func NewFactory() exporter.Factory { metadata.Type, createDefaultConfig, exporter.WithLogs(createLogsRequestExporter, metadata.LogsStability), - exporter.WithTraces(createTracesExporter, metadata.TracesStability), + exporter.WithTraces(createTracesRequestExporter, metadata.TracesStability), ) } @@ -144,7 +144,7 @@ func createLogsRequestExporter( ) } -func createTracesExporter(ctx context.Context, +func createTracesRequestExporter(ctx context.Context, set exporter.CreateSettings, cfg component.Config) (exporter.Traces, error) { @@ -156,13 +156,53 @@ func createTracesExporter(ctx context.Context, if err != nil { return nil, fmt.Errorf("cannot configure Elasticsearch tracesExporter: %w", err) } - return exporterhelper.NewTracesExporter( + + batchMergeFunc := func(ctx context.Context, r1, r2 exporterhelper.Request) (exporterhelper.Request, error) { + rr1 := r1.(*request) + rr2 := r2.(*request) + req := newRequest(tracesExporter.bulkIndexer) + req.Items = append(rr1.Items, rr2.Items...) + return req, nil + } + + batchMergeSplitFunc := func(ctx context.Context, conf exporterbatcher.MaxSizeConfig, optReq, req exporterhelper.Request) ([]exporterhelper.Request, error) { + // FIXME: implement merge split func + panic("not implemented") + return nil, nil + } + + marshalRequest := func(req exporterhelper.Request) ([]byte, error) { + b, err := json.Marshal(*req.(*request)) + return b, err + } + + unmarshalRequest := func(b []byte) (exporterhelper.Request, error) { + var req request + err := json.Unmarshal(b, &req) + req.bulkIndexer = tracesExporter.bulkIndexer + return &req, err + } + + batcherCfg := exporterbatcher.NewDefaultConfig() + + // FIXME: is this right? + queueCfg := exporterqueue.NewDefaultConfig() + queueCfg.Enabled = cf.QueueSettings.Enabled + queueCfg.NumConsumers = cf.QueueSettings.NumConsumers + queueCfg.QueueSize = cf.QueueSettings.QueueSize + + return exporterhelper.NewTracesRequestExporter( ctx, set, - cfg, - tracesExporter.pushTraceData, + tracesExporter.traceDataToRequest, + exporterhelper.WithBatcher(batcherCfg, exporterhelper.WithRequestBatchFuncs(batchMergeFunc, batchMergeSplitFunc)), exporterhelper.WithShutdown(tracesExporter.Shutdown), - exporterhelper.WithQueue(cf.QueueSettings)) + exporterhelper.WithRequestQueue(queueCfg, + exporterqueue.NewPersistentQueueFactory[exporterhelper.Request](cf.QueueSettings.StorageID, exporterqueue.PersistentQueueSettings[exporterhelper.Request]{ + Marshaler: marshalRequest, + Unmarshaler: unmarshalRequest, + })), + ) } // set default User-Agent header with BuildInfo if User-Agent is empty diff --git a/exporter/elasticsearchexporter/logs_exporter.go b/exporter/elasticsearchexporter/logs_exporter.go index d3dd5ac86154..7a66a8e28cd4 100644 --- a/exporter/elasticsearchexporter/logs_exporter.go +++ b/exporter/elasticsearchexporter/logs_exporter.go @@ -72,10 +72,8 @@ func (e *elasticsearchLogsExporter) Shutdown(ctx context.Context) error { } func (e *elasticsearchLogsExporter) logsDataToRequest(ctx context.Context, ld plog.Logs) (exporterhelper.Request, error) { - request := newRequest(e.bulkIndexer) - + req := newRequest(e.bulkIndexer) var errs []error - rls := ld.ResourceLogs() for i := 0; i < rls.Len(); i++ { rl := rls.At(i) @@ -88,18 +86,18 @@ func (e *elasticsearchLogsExporter) logsDataToRequest(ctx context.Context, ld pl item, err := e.logRecordToItem(ctx, resource, logs.At(k), scope) if err != nil { if cerr := ctx.Err(); cerr != nil { - return request, cerr + return req, cerr } errs = append(errs, err) continue } - request.Add(item) + req.Add(item) } } } - return request, errors.Join(errs...) + return req, errors.Join(errs...) } func (e *elasticsearchLogsExporter) logRecordToItem(ctx context.Context, resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) (bulkIndexerItem, error) { diff --git a/exporter/elasticsearchexporter/logs_exporter_test.go b/exporter/elasticsearchexporter/logs_exporter_test.go index 18c51ae0dd85..b4516bb2656d 100644 --- a/exporter/elasticsearchexporter/logs_exporter_test.go +++ b/exporter/elasticsearchexporter/logs_exporter_test.go @@ -407,7 +407,7 @@ func TestExporter_PushEvent(t *testing.T) { exporter := newTestExporter(t, server.URL) err := send(t, exporter, `{"message": "test1"}`) - assert.ErrorContains(t, err, "flush failed") + assert.ErrorContains(t, err, "flush failed: [400 Bad Request] oops") assert.Equal(t, int64(1), attempts.Load()) }) diff --git a/exporter/elasticsearchexporter/trace_exporter.go b/exporter/elasticsearchexporter/trace_exporter.go index 4c3ce5bbd3b8..e6281785ca2a 100644 --- a/exporter/elasticsearchexporter/trace_exporter.go +++ b/exporter/elasticsearchexporter/trace_exporter.go @@ -9,6 +9,7 @@ import ( "context" "errors" "fmt" + "go.opentelemetry.io/collector/exporter/exporterhelper" "time" "go.opentelemetry.io/collector/pdata/pcommon" @@ -65,10 +66,11 @@ func (e *elasticsearchTracesExporter) Shutdown(ctx context.Context) error { return e.bulkIndexer.Close(ctx) } -func (e *elasticsearchTracesExporter) pushTraceData( +func (e *elasticsearchTracesExporter) traceDataToRequest( ctx context.Context, td ptrace.Traces, -) error { +) (exporterhelper.Request, error) { + req := newRequest(e.bulkIndexer) var errs []error resourceSpans := td.ResourceSpans() for i := 0; i < resourceSpans.Len(); i++ { @@ -81,20 +83,23 @@ func (e *elasticsearchTracesExporter) pushTraceData( spans := scopeSpan.Spans() for k := 0; k < spans.Len(); k++ { span := spans.At(k) - if err := e.pushTraceRecord(ctx, resource, span, scope); err != nil { + item, err := e.traceRecordToItem(ctx, resource, span, scope) + if err != nil { if cerr := ctx.Err(); cerr != nil { - return cerr + return req, cerr } errs = append(errs, err) + continue } + req.Add(item) } } } - return errors.Join(errs...) + return req, errors.Join(errs...) } -func (e *elasticsearchTracesExporter) pushTraceRecord(ctx context.Context, resource pcommon.Resource, span ptrace.Span, scope pcommon.InstrumentationScope) error { +func (e *elasticsearchTracesExporter) traceRecordToItem(ctx context.Context, resource pcommon.Resource, span ptrace.Span, scope pcommon.InstrumentationScope) (bulkIndexerItem, error) { fIndex := e.index if e.dynamicIndex { prefix := getFromAttributes(indexPrefix, resource, scope, span) @@ -106,16 +111,19 @@ func (e *elasticsearchTracesExporter) pushTraceRecord(ctx context.Context, resou if e.logstashFormat.Enabled { formattedIndex, err := generateIndexWithLogstashFormat(fIndex, &e.logstashFormat, time.Now()) if err != nil { - return err + return bulkIndexerItem{}, err } fIndex = formattedIndex } document, err := e.model.encodeSpan(resource, span, scope) if err != nil { - return fmt.Errorf("Failed to encode trace record: %w", err) + return bulkIndexerItem{}, fmt.Errorf("Failed to encode trace record: %w", err) } - return pushDocuments(ctx, fIndex, document, e.bulkIndexer) + return bulkIndexerItem{ + Index: fIndex, + Body: document, + }, nil } func pushDocuments(ctx context.Context, index string, document []byte, current *esBulkIndexerCurrent) error { diff --git a/exporter/elasticsearchexporter/traces_exporter_test.go b/exporter/elasticsearchexporter/traces_exporter_test.go index c5490398a56c..d6b1c8dd1c46 100644 --- a/exporter/elasticsearchexporter/traces_exporter_test.go +++ b/exporter/elasticsearchexporter/traces_exporter_test.go @@ -330,7 +330,6 @@ func TestExporter_PushTraceRecord(t *testing.T) { exporter := newTestTracesExporter(t, server.URL, func(cfg *Config) { *cfg = *testConfig }) mustSendTraces(t, exporter, `{"message": "test1"}`) - time.Sleep(200 * time.Millisecond) assert.Equal(t, int64(1), attempts.Load()) }) } @@ -346,9 +345,9 @@ func TestExporter_PushTraceRecord(t *testing.T) { }) exporter := newTestTracesExporter(t, server.URL) - mustSendTraces(t, exporter, `{"message": "test1"}`) + err := sendTraces(t, exporter, `{"message": "test1"}`) + assert.ErrorContains(t, err, "flush failed: [400 Bad Request] oops") - time.Sleep(200 * time.Millisecond) assert.Equal(t, int64(1), attempts.Load()) }) @@ -382,7 +381,6 @@ func TestExporter_PushTraceRecord(t *testing.T) { exporter := newTestTracesExporter(t, server.URL) mustSendTraces(t, exporter, `{"message": "test1"}`) - time.Sleep(200 * time.Millisecond) assert.Equal(t, int64(1), attempts.Load()) }) @@ -420,9 +418,7 @@ func TestExporter_PushTraceRecord(t *testing.T) { cfg.Retry.InitialInterval = 1 * time.Millisecond cfg.Retry.MaxInterval = 10 * time.Millisecond }) - mustSendTraces(t, exporter, `{"message": "test1", "idx": 0}`) - mustSendTraces(t, exporter, `{"message": "test2", "idx": 1}`) - mustSendTraces(t, exporter, `{"message": "test3", "idx": 2}`) + mustSendTraces(t, exporter, `{"message": "test1", "idx": 0}`, `{"message": "test2", "idx": 1}`, `{"message": "test3", "idx": 2}`) wg.Wait() // <- this blocks forever if the trace is not retried @@ -462,8 +458,22 @@ func withTestTracesExporterConfig(fns ...func(*Config)) func(string) *Config { } } -func mustSendTraces(t *testing.T, exporter *elasticsearchTracesExporter, contents string) { - err := pushDocuments(context.TODO(), exporter.index, []byte(contents), exporter.bulkIndexer) +func sendTraces(t *testing.T, exporter *elasticsearchTracesExporter, contents ...string) error { + req := request{ + bulkIndexer: exporter.bulkIndexer, + Items: nil, + } + for _, body := range contents { + req.Add(bulkIndexerItem{ + Index: exporter.index, + Body: []byte(body), + }) + } + return req.Export(context.TODO()) +} + +func mustSendTraces(t *testing.T, exporter *elasticsearchTracesExporter, contents ...string) { + err := sendTraces(t, exporter, contents...) require.NoError(t, err) } @@ -474,6 +484,13 @@ func mustSendTracesWithAttributes(t *testing.T, exporter *elasticsearchTracesExp span := resSpans.ScopeSpans().At(0).Spans().At(0) scope := resSpans.ScopeSpans().At(0).Scope() - err := exporter.pushTraceRecord(context.TODO(), resSpans.Resource(), span, scope) + req := request{ + bulkIndexer: exporter.bulkIndexer, + Items: nil, + } + item, err := exporter.traceRecordToItem(context.TODO(), resSpans.Resource(), span, scope) + require.NoError(t, err) + req.Add(item) + err = req.Export(context.TODO()) require.NoError(t, err) } From 5608755654bc60f5fb35185d5b00bafbbe51bcb2 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 9 May 2024 16:46:09 +0100 Subject: [PATCH 009/117] Log once only --- exporter/elasticsearchexporter/elasticsearch_bulk.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 486615764335..36ee41063290 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -290,15 +290,11 @@ func (w *worker) run() { w.mu.Lock() // bulk indexer needs to be flushed every flush interval because // there may be pending bytes in bulk indexer buffer due to e.g. document level 429 - if err := w.flush(); err != nil { - w.logger.Error("bulk indexer background flush error", zap.Error(err)) - } + _ = w.flush() w.mu.Unlock() case <-w.closeCh: w.mu.Lock() - if err := w.flush(); err != nil { - w.logger.Error("bulk indexer background flush error", zap.Error(err)) - } + _ = w.flush() return // no need to unlock } From 889fe7f59f1c58614755890ce72030368b136986 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 13 May 2024 17:46:35 +0100 Subject: [PATCH 010/117] gofmt --- exporter/elasticsearchexporter/trace_exporter.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/trace_exporter.go b/exporter/elasticsearchexporter/trace_exporter.go index e6281785ca2a..7e523a2a8ef1 100644 --- a/exporter/elasticsearchexporter/trace_exporter.go +++ b/exporter/elasticsearchexporter/trace_exporter.go @@ -9,9 +9,9 @@ import ( "context" "errors" "fmt" - "go.opentelemetry.io/collector/exporter/exporterhelper" "time" + "go.opentelemetry.io/collector/exporter/exporterhelper" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/ptrace" "go.uber.org/zap" From f71d3b2d09fc58beefc9c8038a36d46e29558fad Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 13 May 2024 17:58:02 +0100 Subject: [PATCH 011/117] Fix lifecycle tests --- exporter/elasticsearchexporter/generated_component_test.go | 2 -- exporter/elasticsearchexporter/metadata.yaml | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/exporter/elasticsearchexporter/generated_component_test.go b/exporter/elasticsearchexporter/generated_component_test.go index b96fb32029b3..be0acd62fbd1 100644 --- a/exporter/elasticsearchexporter/generated_component_test.go +++ b/exporter/elasticsearchexporter/generated_component_test.go @@ -99,8 +99,6 @@ func TestComponentLifecycle(t *testing.T) { } }) - require.NoError(t, err) - err = c.Shutdown(context.Background()) require.NoError(t, err) }) diff --git a/exporter/elasticsearchexporter/metadata.yaml b/exporter/elasticsearchexporter/metadata.yaml index 1d5820e9b746..a8e301cf4dbb 100644 --- a/exporter/elasticsearchexporter/metadata.yaml +++ b/exporter/elasticsearchexporter/metadata.yaml @@ -11,4 +11,5 @@ status: tests: config: - endpoints: [http://localhost:9200] \ No newline at end of file + endpoints: [http://localhost:9200] + expect_consumer_error: true \ No newline at end of file From 709ca0159cc623b8a3d968e9d3fa5b1c59ea43e5 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 13 May 2024 18:00:06 +0100 Subject: [PATCH 012/117] Use PersistentQueueConfig --- exporter/elasticsearchexporter/config.go | 4 +-- exporter/elasticsearchexporter/config_test.go | 32 +++++++++++-------- exporter/elasticsearchexporter/factory.go | 29 ++++++----------- 3 files changed, 31 insertions(+), 34 deletions(-) diff --git a/exporter/elasticsearchexporter/config.go b/exporter/elasticsearchexporter/config.go index fd7d93145943..a9d60b781e51 100644 --- a/exporter/elasticsearchexporter/config.go +++ b/exporter/elasticsearchexporter/config.go @@ -12,12 +12,12 @@ import ( "go.opentelemetry.io/collector/config/configopaque" "go.opentelemetry.io/collector/config/configtls" - "go.opentelemetry.io/collector/exporter/exporterhelper" + "go.opentelemetry.io/collector/exporter/exporterqueue" ) // Config defines configuration for Elastic exporter. type Config struct { - exporterhelper.QueueSettings `mapstructure:"sending_queue"` + PersistentQueueConfig exporterqueue.PersistentQueueConfig `mapstructure:"sending_queue"` // Endpoints holds the Elasticsearch URLs the exporter should send events to. // // This setting is required if CloudID is not set and if the diff --git a/exporter/elasticsearchexporter/config_test.go b/exporter/elasticsearchexporter/config_test.go index 59c6a290692f..cee71e588e84 100644 --- a/exporter/elasticsearchexporter/config_test.go +++ b/exporter/elasticsearchexporter/config_test.go @@ -13,7 +13,7 @@ import ( "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/confmap/confmaptest" - "go.opentelemetry.io/collector/exporter/exporterhelper" + "go.opentelemetry.io/collector/exporter/exporterqueue" "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/metadata" ) @@ -29,10 +29,12 @@ func TestLoad_DeprecatedIndexConfigOption(t *testing.T) { require.NoError(t, component.UnmarshalConfig(sub, cfg)) assert.Equal(t, cfg, &Config{ - QueueSettings: exporterhelper.QueueSettings{ - Enabled: false, - NumConsumers: exporterhelper.NewDefaultQueueSettings().NumConsumers, - QueueSize: exporterhelper.NewDefaultQueueSettings().QueueSize, + PersistentQueueConfig: exporterqueue.PersistentQueueConfig{ + Config: exporterqueue.Config{ + Enabled: false, + NumConsumers: exporterqueue.NewDefaultConfig().NumConsumers, + QueueSize: exporterqueue.NewDefaultConfig().QueueSize, + }, }, Endpoints: []string{"http://localhost:9200"}, CloudID: "TRNMxjXlNJEt", @@ -111,10 +113,12 @@ func TestLoadConfig(t *testing.T) { id: component.NewIDWithName(metadata.Type, "trace"), configFile: "config.yaml", expected: &Config{ - QueueSettings: exporterhelper.QueueSettings{ - Enabled: false, - NumConsumers: exporterhelper.NewDefaultQueueSettings().NumConsumers, - QueueSize: exporterhelper.NewDefaultQueueSettings().QueueSize, + PersistentQueueConfig: exporterqueue.PersistentQueueConfig{ + Config: exporterqueue.Config{ + Enabled: false, + NumConsumers: exporterqueue.NewDefaultConfig().NumConsumers, + QueueSize: exporterqueue.NewDefaultConfig().QueueSize, + }, }, Endpoints: []string{"https://elastic.example.com:9200"}, CloudID: "TRNMxjXlNJEt", @@ -162,10 +166,12 @@ func TestLoadConfig(t *testing.T) { id: component.NewIDWithName(metadata.Type, "log"), configFile: "config.yaml", expected: &Config{ - QueueSettings: exporterhelper.QueueSettings{ - Enabled: true, - NumConsumers: exporterhelper.NewDefaultQueueSettings().NumConsumers, - QueueSize: exporterhelper.NewDefaultQueueSettings().QueueSize, + PersistentQueueConfig: exporterqueue.PersistentQueueConfig{ + Config: exporterqueue.Config{ + Enabled: true, + NumConsumers: exporterqueue.NewDefaultConfig().NumConsumers, + QueueSize: exporterqueue.NewDefaultConfig().QueueSize, + }, }, Endpoints: []string{"http://localhost:9200"}, CloudID: "TRNMxjXlNJEt", diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 422d59cf6153..1ed743347bc9 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -40,10 +40,13 @@ func NewFactory() exporter.Factory { } func createDefaultConfig() component.Config { - qs := exporterhelper.NewDefaultQueueSettings() - qs.Enabled = false + qs := exporterqueue.PersistentQueueConfig{ + Config: exporterqueue.NewDefaultConfig(), + StorageID: nil, + } + qs.Enabled = false // FIXME: how does batching without queuing look like? return &Config{ - QueueSettings: qs, + PersistentQueueConfig: qs, ClientConfig: ClientConfig{ Timeout: 90 * time.Second, }, @@ -124,20 +127,14 @@ func createLogsRequestExporter( batcherCfg := exporterbatcher.NewDefaultConfig() - // FIXME: is this right? - queueCfg := exporterqueue.NewDefaultConfig() - queueCfg.Enabled = cf.QueueSettings.Enabled - queueCfg.NumConsumers = cf.QueueSettings.NumConsumers - queueCfg.QueueSize = cf.QueueSettings.QueueSize - return exporterhelper.NewLogsRequestExporter( ctx, set, logsExporter.logsDataToRequest, exporterhelper.WithBatcher(batcherCfg, exporterhelper.WithRequestBatchFuncs(batchMergeFunc, batchMergeSplitFunc)), exporterhelper.WithShutdown(logsExporter.Shutdown), - exporterhelper.WithRequestQueue(queueCfg, - exporterqueue.NewPersistentQueueFactory[exporterhelper.Request](cf.QueueSettings.StorageID, exporterqueue.PersistentQueueSettings[exporterhelper.Request]{ + exporterhelper.WithRequestQueue(cf.PersistentQueueConfig.Config, + exporterqueue.NewPersistentQueueFactory[exporterhelper.Request](cf.PersistentQueueConfig.StorageID, exporterqueue.PersistentQueueSettings[exporterhelper.Request]{ Marshaler: marshalRequest, Unmarshaler: unmarshalRequest, })), @@ -185,20 +182,14 @@ func createTracesRequestExporter(ctx context.Context, batcherCfg := exporterbatcher.NewDefaultConfig() - // FIXME: is this right? - queueCfg := exporterqueue.NewDefaultConfig() - queueCfg.Enabled = cf.QueueSettings.Enabled - queueCfg.NumConsumers = cf.QueueSettings.NumConsumers - queueCfg.QueueSize = cf.QueueSettings.QueueSize - return exporterhelper.NewTracesRequestExporter( ctx, set, tracesExporter.traceDataToRequest, exporterhelper.WithBatcher(batcherCfg, exporterhelper.WithRequestBatchFuncs(batchMergeFunc, batchMergeSplitFunc)), exporterhelper.WithShutdown(tracesExporter.Shutdown), - exporterhelper.WithRequestQueue(queueCfg, - exporterqueue.NewPersistentQueueFactory[exporterhelper.Request](cf.QueueSettings.StorageID, exporterqueue.PersistentQueueSettings[exporterhelper.Request]{ + exporterhelper.WithRequestQueue(cf.PersistentQueueConfig.Config, + exporterqueue.NewPersistentQueueFactory[exporterhelper.Request](cf.PersistentQueueConfig.StorageID, exporterqueue.PersistentQueueSettings[exporterhelper.Request]{ Marshaler: marshalRequest, Unmarshaler: unmarshalRequest, })), From 4474b3b15bd7ea832546e499eb69ec6692db292a Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 13 May 2024 18:17:42 +0100 Subject: [PATCH 013/117] Fix failing tests --- exporter/elasticsearchexporter/elasticsearch_bulk_test.go | 1 + exporter/elasticsearchexporter/logs_exporter_test.go | 2 +- exporter/elasticsearchexporter/traces_exporter_test.go | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk_test.go b/exporter/elasticsearchexporter/elasticsearch_bulk_test.go index 5a4a51e74a1c..732883e2142d 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk_test.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk_test.go @@ -69,6 +69,7 @@ func TestBulkIndexer_addBatchAndFlush(t *testing.T) { }, })) assert.Equal(t, int64(1), bulkIndexer.stats.docsIndexed.Load()) + assert.NoError(t, bulkIndexer.Close(context.Background())) } //func TestBulkIndexer_flush(t *testing.T) { diff --git a/exporter/elasticsearchexporter/logs_exporter_test.go b/exporter/elasticsearchexporter/logs_exporter_test.go index b4516bb2656d..01f6ea5afe56 100644 --- a/exporter/elasticsearchexporter/logs_exporter_test.go +++ b/exporter/elasticsearchexporter/logs_exporter_test.go @@ -389,7 +389,7 @@ func TestExporter_PushEvent(t *testing.T) { testConfig := configurer(server.URL) exporter := newTestExporter(t, server.URL, func(cfg *Config) { *cfg = *testConfig }) - mustSend(t, exporter, `{"message": "test1"}`) + _ = send(t, exporter, `{"message": "test1"}`) assert.Equal(t, int64(1), attempts.Load()) }) diff --git a/exporter/elasticsearchexporter/traces_exporter_test.go b/exporter/elasticsearchexporter/traces_exporter_test.go index d6b1c8dd1c46..6e13ceb53f44 100644 --- a/exporter/elasticsearchexporter/traces_exporter_test.go +++ b/exporter/elasticsearchexporter/traces_exporter_test.go @@ -328,7 +328,7 @@ func TestExporter_PushTraceRecord(t *testing.T) { testConfig := configurer(server.URL) exporter := newTestTracesExporter(t, server.URL, func(cfg *Config) { *cfg = *testConfig }) - mustSendTraces(t, exporter, `{"message": "test1"}`) + _ = sendTraces(t, exporter, `{"message": "test1"}`) assert.Equal(t, int64(1), attempts.Load()) }) From 215295e2c6ce3894a3a0f88661c41a0ba287b65f Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 13 May 2024 18:18:33 +0100 Subject: [PATCH 014/117] Missing newline --- exporter/elasticsearchexporter/metadata.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/metadata.yaml b/exporter/elasticsearchexporter/metadata.yaml index a8e301cf4dbb..0f47ad411972 100644 --- a/exporter/elasticsearchexporter/metadata.yaml +++ b/exporter/elasticsearchexporter/metadata.yaml @@ -12,4 +12,4 @@ status: tests: config: endpoints: [http://localhost:9200] - expect_consumer_error: true \ No newline at end of file + expect_consumer_error: true From 961ffaba4d05df9da34460f748067ade27b332eb Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 13 May 2024 18:24:49 +0100 Subject: [PATCH 015/117] Add FIXME --- exporter/elasticsearchexporter/factory.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 1ed743347bc9..7f01369a64cb 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -125,7 +125,7 @@ func createLogsRequestExporter( return &req, err } - batcherCfg := exporterbatcher.NewDefaultConfig() + batcherCfg := exporterbatcher.NewDefaultConfig() // FIXME: configurable batcher return exporterhelper.NewLogsRequestExporter( ctx, @@ -180,7 +180,7 @@ func createTracesRequestExporter(ctx context.Context, return &req, err } - batcherCfg := exporterbatcher.NewDefaultConfig() + batcherCfg := exporterbatcher.NewDefaultConfig() // FIXME: configurable batcher return exporterhelper.NewTracesRequestExporter( ctx, From 683dca322b4784bed9b9bb888fb6100f4a56ded0 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 14 May 2024 10:48:05 +0100 Subject: [PATCH 016/117] Configure batcher --- exporter/elasticsearchexporter/factory.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 7f01369a64cb..f1fa84f8cc35 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -125,7 +125,11 @@ func createLogsRequestExporter( return &req, err } - batcherCfg := exporterbatcher.NewDefaultConfig() // FIXME: configurable batcher + batcherCfg := exporterbatcher.NewDefaultConfig() + batcherCfg.Enabled = true + batcherCfg.FlushTimeout = cf.Flush.Interval + batcherCfg.MinSizeItems = 125 + batcherCfg.MaxSizeItems = 0 return exporterhelper.NewLogsRequestExporter( ctx, @@ -180,7 +184,11 @@ func createTracesRequestExporter(ctx context.Context, return &req, err } - batcherCfg := exporterbatcher.NewDefaultConfig() // FIXME: configurable batcher + batcherCfg := exporterbatcher.NewDefaultConfig() + batcherCfg.Enabled = true + batcherCfg.FlushTimeout = cf.Flush.Interval + batcherCfg.MinSizeItems = 125 + batcherCfg.MaxSizeItems = 0 return exporterhelper.NewTracesRequestExporter( ctx, From e46b165d6df597fc2b408190357252eaed9901b9 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 14 May 2024 11:02:57 +0100 Subject: [PATCH 017/117] FIXME --- exporter/elasticsearchexporter/factory.go | 1 + 1 file changed, 1 insertion(+) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index f1fa84f8cc35..a4b8c49d7b45 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -119,6 +119,7 @@ func createLogsRequestExporter( } unmarshalRequest := func(b []byte) (exporterhelper.Request, error) { + // FIXME: back-compat unmarshaling in case of residue in persistent queue var req request err := json.Unmarshal(b, &req) req.bulkIndexer = logsExporter.bulkIndexer From ca810d41ea13d7e098fbfbc11b38038de3e5d030 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 14 May 2024 11:43:48 +0100 Subject: [PATCH 018/117] Clean up --- .../elasticsearch_bulk_test.go | 39 ------------------- .../elasticsearchexporter/trace_exporter.go | 4 -- 2 files changed, 43 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk_test.go b/exporter/elasticsearchexporter/elasticsearch_bulk_test.go index 732883e2142d..255d3446bf3b 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk_test.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk_test.go @@ -72,45 +72,6 @@ func TestBulkIndexer_addBatchAndFlush(t *testing.T) { assert.NoError(t, bulkIndexer.Close(context.Background())) } -//func TestBulkIndexer_flush(t *testing.T) { -// tests := []struct { -// name string -// config Config -// }{ -// { -// name: "flush.bytes", -// config: Config{NumWorkers: 1, Flush: FlushSettings{Interval: time.Hour, Bytes: 1}}, -// }, -// { -// name: "flush.interval", -// config: Config{NumWorkers: 1, Flush: FlushSettings{Interval: 50 * time.Millisecond, Bytes: 2 << 30}}, -// }, -// } -// -// for _, tt := range tests { -// tt := tt -// t.Run(tt.name, func(t *testing.T) { -// t.Parallel() -// client, err := elasticsearch.NewClient(elasticsearch.Config{Transport: &mockTransport{ -// RoundTripFunc: func(*http.Request) (*http.Response, error) { -// return &http.Response{ -// Header: http.Header{"X-Elastic-Product": []string{"Elasticsearch"}}, -// Body: io.NopCloser(strings.NewReader(successResp)), -// }, nil -// }, -// }}) -// require.NoError(t, err) -// bulkIndexer, err := newBulkIndexer(zap.NewNop(), client, &tt.config) -// require.NoError(t, err) -// assert.NoError(t, bulkIndexer.Add(context.Background(), "foo", strings.NewReader(`{"foo": "bar"}`))) -// // should flush -// time.Sleep(100 * time.Millisecond) -// assert.Equal(t, int64(1), bulkIndexer.stats.docsIndexed.Load()) -// assert.NoError(t, bulkIndexer.Close(context.Background())) -// }) -// } -//} - func TestBulkIndexer_addBatchAndFlush_error(t *testing.T) { tests := []struct { name string diff --git a/exporter/elasticsearchexporter/trace_exporter.go b/exporter/elasticsearchexporter/trace_exporter.go index 7e523a2a8ef1..bab6bca137b2 100644 --- a/exporter/elasticsearchexporter/trace_exporter.go +++ b/exporter/elasticsearchexporter/trace_exporter.go @@ -125,7 +125,3 @@ func (e *elasticsearchTracesExporter) traceRecordToItem(ctx context.Context, res Body: document, }, nil } - -func pushDocuments(ctx context.Context, index string, document []byte, current *esBulkIndexerCurrent) error { - return fmt.Errorf("not implemented") -} From 364e27dd2ad82d3dd2f097d226e7271a1c6dc071 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 14 May 2024 22:59:35 +0100 Subject: [PATCH 019/117] Enable remaining integration tests --- .../integrationtest/datareceiver.go | 3 +++ .../integrationtest/exporter_test.go | 9 ++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/datareceiver.go b/exporter/elasticsearchexporter/integrationtest/datareceiver.go index c35267948f3f..76f25b5b8ca4 100644 --- a/exporter/elasticsearchexporter/integrationtest/datareceiver.go +++ b/exporter/elasticsearchexporter/integrationtest/datareceiver.go @@ -76,6 +76,9 @@ func (es *esDataReceiver) GenConfigYAMLStr() string { interval: 1s sending_queue: enabled: true + storage: file_storage/elasticsearchexporter + num_consumers: 100 + queue_size: 100000 retry: enabled: true max_requests: 10000 diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_test.go index ed52e460d692..0874e90d7152 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_test.go @@ -35,10 +35,8 @@ func TestExporter(t *testing.T) { }{ {name: "basic"}, {name: "es_intermittent_failure", mockESFailure: true}, - /* TODO: Below tests should be enabled after https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/30792 is fixed {name: "collector_restarts", restartCollector: true}, {name: "collector_restart_with_es_intermittent_failure", mockESFailure: true, restartCollector: true}, - */ } { t.Run(fmt.Sprintf("%s/%s", eventType, tc.name), func(t *testing.T) { runner(t, eventType, tc.restartCollector, tc.mockESFailure) @@ -71,7 +69,12 @@ func runner(t *testing.T, eventType string, restartCollector, mockESFailure bool } provider := testbed.NewPerfTestDataProvider(loadOpts) - cfg := createConfigYaml(t, sender, receiver, nil, nil, eventType, getDebugFlag(t)) + tempDir := t.TempDir() + extensions := map[string]string{ + "file_storage/elasticsearchexporter": fmt.Sprintf(`file_storage/elasticsearchexporter: + directory: %s`, tempDir), + } + cfg := createConfigYaml(t, sender, receiver, nil, extensions, eventType, getDebugFlag(t)) t.Log("test otel collector configuration:", cfg) collector := newRecreatableOtelCol(t) cleanup, err := collector.PrepareConfig(cfg) From 946bf2f513e369087cf7f1b28bc8dfe39395a14f Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 14 May 2024 23:12:31 +0100 Subject: [PATCH 020/117] Double select to abort early when ctx is done --- exporter/elasticsearchexporter/elasticsearch_bulk.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 36ee41063290..e69a6bebc16f 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -237,6 +237,11 @@ func (p *bulkIndexerPool) AddBatchAndFlush(ctx context.Context, batch []esBulkIn defer func() { p.available <- worker }() + select { + case <-ctx.Done(): + return ctx.Err() + default: + } return worker.addBatchAndFlush(batch) } } From ea3edd389c88caa454e55304e56c492b409df9bd Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 15 May 2024 00:29:18 +0100 Subject: [PATCH 021/117] Ensure all retries are finished when flush returns --- .../elasticsearchexporter/elasticsearch_bulk.go | 14 +++++++++++++- .../integrationtest/datareceiver.go | 2 ++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index e69a6bebc16f..f9dc7986c40f 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -206,6 +206,7 @@ func newBulkIndexer(logger *zap.Logger, client *elasticsearch7.Client, config *C closeCh: pool.closeCh, flushInterval: flushInterval, flushTimeout: config.Timeout, + retryBackoff: createElasticsearchBackoffFunc(&config.Retry), logger: logger, stats: &pool.stats, } @@ -270,6 +271,8 @@ type worker struct { //flushBytes int mu sync.Mutex + retryBackoff func(int) time.Duration + stats *bulkIndexerStats logger *zap.Logger @@ -283,7 +286,15 @@ func (w *worker) addBatchAndFlush(batch []esBulkIndexerItem) error { w.logger.Error("error adding item to bulk indexer", zap.Error(err)) } } - return w.flush() + for attempts := 0; ; attempts++ { + if err := w.flush(); err != nil { + return err + } else if w.indexer.Items() == 0 { + return nil + } + backoff := w.retryBackoff(attempts + 1) + time.Sleep(backoff) + } } func (w *worker) run() { @@ -293,6 +304,7 @@ func (w *worker) run() { select { case <-flushTick.C: w.mu.Lock() + // FIXME: this is no longer needed // bulk indexer needs to be flushed every flush interval because // there may be pending bytes in bulk indexer buffer due to e.g. document level 429 _ = w.flush() diff --git a/exporter/elasticsearchexporter/integrationtest/datareceiver.go b/exporter/elasticsearchexporter/integrationtest/datareceiver.go index 76f25b5b8ca4..bbfaaeddd0bc 100644 --- a/exporter/elasticsearchexporter/integrationtest/datareceiver.go +++ b/exporter/elasticsearchexporter/integrationtest/datareceiver.go @@ -82,6 +82,8 @@ func (es *esDataReceiver) GenConfigYAMLStr() string { retry: enabled: true max_requests: 10000 + initial_interval: 100ms + max_interval: 1s ` return fmt.Sprintf(cfgFormat, es.endpoint) } From 16ece1c0f8167a37677e80457d385981eb946c1e Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 16 May 2024 09:56:46 +0100 Subject: [PATCH 022/117] Remove bulk indexer pool background goroutine --- .../elasticsearch_bulk.go | 41 +++++++------------ 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index f9dc7986c40f..3f6ecf67d0ba 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -210,10 +210,6 @@ func newBulkIndexer(logger *zap.Logger, client *elasticsearch7.Client, config *C logger: logger, stats: &pool.stats, } - go func() { - defer pool.wg.Done() - w.run() - }() pool.available <- &w } return pool, nil @@ -234,6 +230,8 @@ func (p *bulkIndexerPool) AddBatchAndFlush(ctx context.Context, batch []esBulkIn select { case <-ctx.Done(): return ctx.Err() + case <-p.closeCh: + return fmt.Errorf("bulk indexer is closed") case worker := <-p.available: defer func() { p.available <- worker @@ -241,9 +239,11 @@ func (p *bulkIndexerPool) AddBatchAndFlush(ctx context.Context, batch []esBulkIn select { case <-ctx.Done(): return ctx.Err() + case <-p.closeCh: + return fmt.Errorf("bulk indexer is closed") default: } - return worker.addBatchAndFlush(batch) + return worker.addBatchAndFlush(ctx, batch) } } @@ -252,7 +252,9 @@ func (p *bulkIndexerPool) Close(ctx context.Context) error { close(p.closeCh) doneCh := make(chan struct{}) go func() { - p.wg.Wait() + for i := 0; i < cap(p.available); i++ { + <-p.available + } close(doneCh) }() select { @@ -278,7 +280,7 @@ type worker struct { logger *zap.Logger } -func (w *worker) addBatchAndFlush(batch []esBulkIndexerItem) error { +func (w *worker) addBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem) error { w.mu.Lock() defer w.mu.Unlock() for _, item := range batch { @@ -293,27 +295,14 @@ func (w *worker) addBatchAndFlush(batch []esBulkIndexerItem) error { return nil } backoff := w.retryBackoff(attempts + 1) - time.Sleep(backoff) - } -} - -func (w *worker) run() { - flushTick := time.NewTicker(w.flushInterval) - defer flushTick.Stop() - for { + timer := time.NewTimer(backoff) + defer timer.Stop() select { - case <-flushTick.C: - w.mu.Lock() - // FIXME: this is no longer needed - // bulk indexer needs to be flushed every flush interval because - // there may be pending bytes in bulk indexer buffer due to e.g. document level 429 - _ = w.flush() - w.mu.Unlock() + case <-ctx.Done(): + return ctx.Err() case <-w.closeCh: - w.mu.Lock() - _ = w.flush() - return - // no need to unlock + return fmt.Errorf("bulk indexer is closed") + case <-timer.C: } } } From 9d83826e8eb49d3d681265218c51ab361a650091 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 16 May 2024 09:57:58 +0100 Subject: [PATCH 023/117] Disable failing integration test --- .../elasticsearchexporter/integrationtest/exporter_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_test.go index 0874e90d7152..41e5128354be 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_test.go @@ -36,7 +36,9 @@ func TestExporter(t *testing.T) { {name: "basic"}, {name: "es_intermittent_failure", mockESFailure: true}, {name: "collector_restarts", restartCollector: true}, - {name: "collector_restart_with_es_intermittent_failure", mockESFailure: true, restartCollector: true}, + // Test is failing because exporter does not shut down when in-flight requests block indefinitely. + // See https://github.com/open-telemetry/opentelemetry-collector/issues/10166 + // {name: "collector_restart_with_es_intermittent_failure", mockESFailure: true, restartCollector: true}, } { t.Run(fmt.Sprintf("%s/%s", eventType, tc.name), func(t *testing.T) { runner(t, eventType, tc.restartCollector, tc.mockESFailure) From bf115c2cc1405194dd8748d857cb452bc91bbda7 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 16 May 2024 10:58:22 +0100 Subject: [PATCH 024/117] Add -> add --- exporter/elasticsearchexporter/logs_exporter.go | 2 +- exporter/elasticsearchexporter/logs_exporter_test.go | 4 ++-- exporter/elasticsearchexporter/request.go | 2 +- exporter/elasticsearchexporter/trace_exporter.go | 2 +- exporter/elasticsearchexporter/traces_exporter_test.go | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/exporter/elasticsearchexporter/logs_exporter.go b/exporter/elasticsearchexporter/logs_exporter.go index 7a66a8e28cd4..72981298318c 100644 --- a/exporter/elasticsearchexporter/logs_exporter.go +++ b/exporter/elasticsearchexporter/logs_exporter.go @@ -92,7 +92,7 @@ func (e *elasticsearchLogsExporter) logsDataToRequest(ctx context.Context, ld pl errs = append(errs, err) continue } - req.Add(item) + req.add(item) } } } diff --git a/exporter/elasticsearchexporter/logs_exporter_test.go b/exporter/elasticsearchexporter/logs_exporter_test.go index 01f6ea5afe56..0259dc98e5c5 100644 --- a/exporter/elasticsearchexporter/logs_exporter_test.go +++ b/exporter/elasticsearchexporter/logs_exporter_test.go @@ -517,7 +517,7 @@ func send(t *testing.T, exporter *elasticsearchLogsExporter, contents ...string) Items: nil, } for _, body := range contents { - req.Add(bulkIndexerItem{ + req.add(bulkIndexerItem{ Index: exporter.index, Body: []byte(body), }) @@ -544,7 +544,7 @@ func mustSendLogsWithAttributes(t *testing.T, exporter *elasticsearchLogsExporte } item, err := exporter.logRecordToItem(context.TODO(), resSpans.Resource(), logRecords, scopeLog.Scope()) require.NoError(t, err) - req.Add(item) + req.add(item) err = req.Export(context.TODO()) require.NoError(t, err) } diff --git a/exporter/elasticsearchexporter/request.go b/exporter/elasticsearchexporter/request.go index 5b489fc70258..406a9ccad17d 100644 --- a/exporter/elasticsearchexporter/request.go +++ b/exporter/elasticsearchexporter/request.go @@ -29,7 +29,7 @@ func (r *request) ItemsCount() int { return len(r.Items) } -func (r *request) Add(item bulkIndexerItem) { +func (r *request) add(item bulkIndexerItem) { r.Items = append(r.Items, item) } diff --git a/exporter/elasticsearchexporter/trace_exporter.go b/exporter/elasticsearchexporter/trace_exporter.go index bab6bca137b2..35a8fe9dc7e9 100644 --- a/exporter/elasticsearchexporter/trace_exporter.go +++ b/exporter/elasticsearchexporter/trace_exporter.go @@ -91,7 +91,7 @@ func (e *elasticsearchTracesExporter) traceDataToRequest( errs = append(errs, err) continue } - req.Add(item) + req.add(item) } } } diff --git a/exporter/elasticsearchexporter/traces_exporter_test.go b/exporter/elasticsearchexporter/traces_exporter_test.go index 6e13ceb53f44..ea71cdd3a588 100644 --- a/exporter/elasticsearchexporter/traces_exporter_test.go +++ b/exporter/elasticsearchexporter/traces_exporter_test.go @@ -464,7 +464,7 @@ func sendTraces(t *testing.T, exporter *elasticsearchTracesExporter, contents .. Items: nil, } for _, body := range contents { - req.Add(bulkIndexerItem{ + req.add(bulkIndexerItem{ Index: exporter.index, Body: []byte(body), }) @@ -490,7 +490,7 @@ func mustSendTracesWithAttributes(t *testing.T, exporter *elasticsearchTracesExp } item, err := exporter.traceRecordToItem(context.TODO(), resSpans.Resource(), span, scope) require.NoError(t, err) - req.Add(item) + req.add(item) err = req.Export(context.TODO()) require.NoError(t, err) } From 725870420d71f7bb7ccc8ff72f510715f2904bd6 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 16 May 2024 10:58:58 +0100 Subject: [PATCH 025/117] Remove bulkindexerpool wg --- exporter/elasticsearchexporter/elasticsearch_bulk.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 3f6ecf67d0ba..c1bc9fd37ba0 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -184,12 +184,10 @@ func newBulkIndexer(logger *zap.Logger, client *elasticsearch7.Client, config *C } pool := &bulkIndexerPool{ - wg: sync.WaitGroup{}, closeCh: make(chan struct{}), stats: bulkIndexerStats{}, available: make(chan *worker, numWorkers), } - pool.wg.Add(numWorkers) for i := 0; i < numWorkers; i++ { bi, err := docappender.NewBulkIndexer(docappender.BulkIndexerConfig{ @@ -221,7 +219,6 @@ type bulkIndexerStats struct { type bulkIndexerPool struct { closeCh chan struct{} - wg sync.WaitGroup stats bulkIndexerStats available chan *worker } From 2958362d30eb3f5c01c02744fb8967ba49fa761c Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 16 May 2024 11:06:17 +0100 Subject: [PATCH 026/117] Remove flush settings in bulk indexer --- .../elasticsearch_bulk.go | 33 ++++++------------- .../elasticsearch_bulk_test.go | 14 ++++---- exporter/elasticsearchexporter/factory.go | 6 ++++ 3 files changed, 22 insertions(+), 31 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index c1bc9fd37ba0..061314e07f48 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -166,16 +166,6 @@ func newBulkIndexer(logger *zap.Logger, client *elasticsearch7.Client, config *C numWorkers = runtime.NumCPU() } - flushInterval := config.Flush.Interval - if flushInterval == 0 { - flushInterval = 30 * time.Second - } - - flushBytes := config.Flush.Bytes - if flushBytes == 0 { - flushBytes = 5e+6 - } - var maxDocRetry int if config.Retry.Enabled { // max_requests includes initial attempt @@ -200,13 +190,12 @@ func newBulkIndexer(logger *zap.Logger, client *elasticsearch7.Client, config *C return nil, err } w := worker{ - indexer: bi, - closeCh: pool.closeCh, - flushInterval: flushInterval, - flushTimeout: config.Timeout, - retryBackoff: createElasticsearchBackoffFunc(&config.Retry), - logger: logger, - stats: &pool.stats, + indexer: bi, + closeCh: pool.closeCh, + flushTimeout: config.Timeout, + retryBackoff: createElasticsearchBackoffFunc(&config.Retry), + logger: logger, + stats: &pool.stats, } pool.available <- &w } @@ -263,12 +252,10 @@ func (p *bulkIndexerPool) Close(ctx context.Context) error { } type worker struct { - indexer *docappender.BulkIndexer - closeCh <-chan struct{} - flushInterval time.Duration - flushTimeout time.Duration - //flushBytes int - mu sync.Mutex + indexer *docappender.BulkIndexer + closeCh <-chan struct{} + flushTimeout time.Duration + mu sync.Mutex retryBackoff func(int) time.Duration diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk_test.go b/exporter/elasticsearchexporter/elasticsearch_bulk_test.go index 255d3446bf3b..7e2937b1ce30 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk_test.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk_test.go @@ -6,16 +6,14 @@ package elasticsearchexporter import ( "context" "errors" - "io" - "net/http" - "strings" - "testing" - "time" - "github.com/elastic/go-elasticsearch/v7" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/zap" + "io" + "net/http" + "strings" + "testing" ) var defaultRoundTripFunc = func(*http.Request) (*http.Response, error) { @@ -49,7 +47,7 @@ const successResp = `{ }` func TestBulkIndexer_addBatchAndFlush(t *testing.T) { - cfg := Config{NumWorkers: 1, Flush: FlushSettings{Interval: time.Hour, Bytes: 2 << 30}} + cfg := Config{NumWorkers: 1} client, err := elasticsearch.NewClient(elasticsearch.Config{Transport: &mockTransport{ RoundTripFunc: func(*http.Request) (*http.Response, error) { return &http.Response{ @@ -109,7 +107,7 @@ func TestBulkIndexer_addBatchAndFlush_error(t *testing.T) { tt := tt t.Run(tt.name, func(t *testing.T) { t.Parallel() - cfg := Config{NumWorkers: 1, Flush: FlushSettings{Interval: time.Hour, Bytes: 1}} + cfg := Config{NumWorkers: 1} client, err := elasticsearch.NewClient(elasticsearch.Config{Transport: &mockTransport{ RoundTripFunc: tt.roundTripFunc, }}) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index a4b8c49d7b45..80ed12914c2e 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -129,6 +129,9 @@ func createLogsRequestExporter( batcherCfg := exporterbatcher.NewDefaultConfig() batcherCfg.Enabled = true batcherCfg.FlushTimeout = cf.Flush.Interval + if batcherCfg.FlushTimeout == 0 { + batcherCfg.FlushTimeout = 30 * time.Second + } batcherCfg.MinSizeItems = 125 batcherCfg.MaxSizeItems = 0 @@ -188,6 +191,9 @@ func createTracesRequestExporter(ctx context.Context, batcherCfg := exporterbatcher.NewDefaultConfig() batcherCfg.Enabled = true batcherCfg.FlushTimeout = cf.Flush.Interval + if batcherCfg.FlushTimeout == 0 { + batcherCfg.FlushTimeout = 30 * time.Second + } batcherCfg.MinSizeItems = 125 batcherCfg.MaxSizeItems = 0 From 6db1c6586d588e1a1882be106d9b62918b40e4a1 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 16 May 2024 11:29:52 +0100 Subject: [PATCH 027/117] Add flush.documents, ignore flush.bytes --- exporter/elasticsearchexporter/README.md | 5 +-- exporter/elasticsearchexporter/config.go | 4 +++ exporter/elasticsearchexporter/config_test.go | 12 +++++-- exporter/elasticsearchexporter/factory.go | 34 +++++++++---------- .../config-use-deprecated-index_option.yaml | 6 ++-- .../testdata/config.yaml | 6 ++-- 6 files changed, 40 insertions(+), 27 deletions(-) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index 7a6d815475c5..625bcd718497 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -51,8 +51,9 @@ This exporter supports sending OpenTelemetry logs and traces to [Elasticsearch]( - `date_format`(default=`%Y.%m.%d`): Time format (based on strftime) to generate the second part of the Index name. - `pipeline` (optional): Optional [Ingest pipeline](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html) ID used for processing documents published by the exporter. - `flush`: Event bulk indexer buffer flush settings - - `bytes` (default=5000000): Write buffer flush size limit. - - `interval` (default=30s): Write buffer flush time limit. + - `bytes` (default=5000000): Write buffer flush size limit. WARNING: This configuration is ignored. Use `flush.documents` instead. + - `documents` (default=125): Minimum number of documents in the buffer to trigger a flush immediately. + - `interval` (default=30s): Max age of a document in the buffer. A flush will happen regardless of the size of buffer. - `retry`: Elasticsearch bulk request retry settings - `enabled` (default=true): Enable/Disable request retry on error. Failed requests are retried with exponential backoff. - `max_requests` (default=3): Number of HTTP request retries. diff --git a/exporter/elasticsearchexporter/config.go b/exporter/elasticsearchexporter/config.go index a9d60b781e51..d77a50a04bd2 100644 --- a/exporter/elasticsearchexporter/config.go +++ b/exporter/elasticsearchexporter/config.go @@ -131,8 +131,12 @@ type DiscoverySettings struct { // all events already serialized into the send-buffer. type FlushSettings struct { // Bytes sets the send buffer flushing limit. + // WARNING: This configuration is ignored. Use `flush.documents` instead. Bytes int `mapstructure:"bytes"` + // Documents configures the minimum number of documents in the send buffer to trigger a flush. + Documents int `mapstructure:"documents"` + // Interval configures the max age of a document in the send buffer. Interval time.Duration `mapstructure:"interval"` } diff --git a/exporter/elasticsearchexporter/config_test.go b/exporter/elasticsearchexporter/config_test.go index cee71e588e84..f466766b5fd9 100644 --- a/exporter/elasticsearchexporter/config_test.go +++ b/exporter/elasticsearchexporter/config_test.go @@ -57,7 +57,9 @@ func TestLoad_DeprecatedIndexConfigOption(t *testing.T) { OnStart: true, }, Flush: FlushSettings{ - Bytes: 10485760, + Bytes: 5000000, + Documents: 100, + Interval: 5 * time.Second, }, Retry: RetrySettings{ Enabled: true, @@ -141,7 +143,9 @@ func TestLoadConfig(t *testing.T) { OnStart: true, }, Flush: FlushSettings{ - Bytes: 10485760, + Bytes: 5000000, + Documents: 100, + Interval: 5 * time.Second, }, Retry: RetrySettings{ Enabled: true, @@ -194,7 +198,9 @@ func TestLoadConfig(t *testing.T) { OnStart: true, }, Flush: FlushSettings{ - Bytes: 10485760, + Bytes: 5000000, + Documents: 100, + Interval: 5 * time.Second, }, Retry: RetrySettings{ Enabled: true, diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 80ed12914c2e..18a06b5d5753 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -66,6 +66,11 @@ func createDefaultConfig() component.Config { http.StatusGatewayTimeout, }, }, + Flush: FlushSettings{ + Bytes: 5000000, + Documents: 125, + Interval: 30 * time.Second, + }, Mapping: MappingsSettings{ Mode: "none", Dedup: true, @@ -126,15 +131,7 @@ func createLogsRequestExporter( return &req, err } - batcherCfg := exporterbatcher.NewDefaultConfig() - batcherCfg.Enabled = true - batcherCfg.FlushTimeout = cf.Flush.Interval - if batcherCfg.FlushTimeout == 0 { - batcherCfg.FlushTimeout = 30 * time.Second - } - batcherCfg.MinSizeItems = 125 - batcherCfg.MaxSizeItems = 0 - + batcherCfg := getBatcherConfig(cf) return exporterhelper.NewLogsRequestExporter( ctx, set, @@ -188,15 +185,7 @@ func createTracesRequestExporter(ctx context.Context, return &req, err } - batcherCfg := exporterbatcher.NewDefaultConfig() - batcherCfg.Enabled = true - batcherCfg.FlushTimeout = cf.Flush.Interval - if batcherCfg.FlushTimeout == 0 { - batcherCfg.FlushTimeout = 30 * time.Second - } - batcherCfg.MinSizeItems = 125 - batcherCfg.MaxSizeItems = 0 - + batcherCfg := getBatcherConfig(cf) return exporterhelper.NewTracesRequestExporter( ctx, set, @@ -221,3 +210,12 @@ func setDefaultUserAgentHeader(cf *Config, info component.BuildInfo) { } cf.Headers[userAgentHeaderKey] = fmt.Sprintf("%s/%s (%s/%s)", info.Description, info.Version, runtime.GOOS, runtime.GOARCH) } + +func getBatcherConfig(cf *Config) exporterbatcher.Config { + batcherCfg := exporterbatcher.NewDefaultConfig() + batcherCfg.Enabled = true + batcherCfg.FlushTimeout = cf.Flush.Interval + batcherCfg.MinSizeItems = cf.Flush.Documents + batcherCfg.MaxSizeItems = 0 + return batcherCfg +} diff --git a/exporter/elasticsearchexporter/testdata/config-use-deprecated-index_option.yaml b/exporter/elasticsearchexporter/testdata/config-use-deprecated-index_option.yaml index 9bf57e686b00..47b3ad14cb3e 100644 --- a/exporter/elasticsearchexporter/testdata/config-use-deprecated-index_option.yaml +++ b/exporter/elasticsearchexporter/testdata/config-use-deprecated-index_option.yaml @@ -14,7 +14,8 @@ elasticsearch/trace: discover: on_start: true flush: - bytes: 10485760 + documents: 100 + interval: 5s retry: max_requests: 5 elasticsearch/log: @@ -33,6 +34,7 @@ elasticsearch/log: discover: on_start: true flush: - bytes: 10485760 + documents: 100 + interval: 5s retry: max_requests: 5 diff --git a/exporter/elasticsearchexporter/testdata/config.yaml b/exporter/elasticsearchexporter/testdata/config.yaml index ba323702ea43..672b5efa33f7 100644 --- a/exporter/elasticsearchexporter/testdata/config.yaml +++ b/exporter/elasticsearchexporter/testdata/config.yaml @@ -16,7 +16,8 @@ elasticsearch/trace: discover: on_start: true flush: - bytes: 10485760 + documents: 100 + interval: 5s retry: max_requests: 5 retry_on_status: @@ -38,7 +39,8 @@ elasticsearch/log: discover: on_start: true flush: - bytes: 10485760 + documents: 100 + interval: 5s retry: max_requests: 5 retry_on_status: From 332ee78f542d319b90e590f2fed9e97396709b65 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 16 May 2024 20:29:46 +0100 Subject: [PATCH 028/117] Warn about flush.bytes removal --- exporter/elasticsearchexporter/config_test.go | 6 +++--- exporter/elasticsearchexporter/factory.go | 10 +++++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/exporter/elasticsearchexporter/config_test.go b/exporter/elasticsearchexporter/config_test.go index f466766b5fd9..582be93dbb45 100644 --- a/exporter/elasticsearchexporter/config_test.go +++ b/exporter/elasticsearchexporter/config_test.go @@ -57,7 +57,7 @@ func TestLoad_DeprecatedIndexConfigOption(t *testing.T) { OnStart: true, }, Flush: FlushSettings{ - Bytes: 5000000, + Bytes: 0, Documents: 100, Interval: 5 * time.Second, }, @@ -143,7 +143,7 @@ func TestLoadConfig(t *testing.T) { OnStart: true, }, Flush: FlushSettings{ - Bytes: 5000000, + Bytes: 0, Documents: 100, Interval: 5 * time.Second, }, @@ -198,7 +198,7 @@ func TestLoadConfig(t *testing.T) { OnStart: true, }, Flush: FlushSettings{ - Bytes: 5000000, + Bytes: 0, Documents: 100, Interval: 5 * time.Second, }, diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 18a06b5d5753..b50aaa809b7c 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -67,7 +67,7 @@ func createDefaultConfig() component.Config { }, }, Flush: FlushSettings{ - Bytes: 5000000, + Bytes: 0, Documents: 125, Interval: 30 * time.Second, }, @@ -97,6 +97,10 @@ func createLogsRequestExporter( set.Logger.Warn("index option are deprecated and replaced with logs_index and traces_index.") } + if cf.Flush.Bytes != 0 { + set.Logger.Warn("flush.bytes option is ignored. Use flush.documents instead.") + } + setDefaultUserAgentHeader(cf, set.BuildInfo) logsExporter, err := newLogsExporter(set.Logger, cf) @@ -152,6 +156,10 @@ func createTracesRequestExporter(ctx context.Context, cf := cfg.(*Config) + if cf.Flush.Bytes != 0 { + set.Logger.Warn("flush.bytes option is ignored. Use flush.documents instead.") + } + setDefaultUserAgentHeader(cf, set.BuildInfo) tracesExporter, err := newTracesExporter(set.Logger, cf) From 7978e89143b297533149fd5ace2164e8253e4ae8 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 16 May 2024 21:35:26 +0100 Subject: [PATCH 029/117] WIP: bench with persistent queue --- .../integrationtest/exporter_bench_test.go | 57 ++++++++++++++++++- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index 7350c44add69..fa83954d62d4 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -10,11 +10,15 @@ import ( "time" "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/component/componenttest" "go.opentelemetry.io/collector/consumer" "go.opentelemetry.io/collector/exporter/exportertest" + "go.opentelemetry.io/collector/extension" "go.opentelemetry.io/collector/pdata/plog" "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter" + "github.com/open-telemetry/opentelemetry-collector-contrib/extension/storage/filestorage" "github.com/open-telemetry/opentelemetry-collector-contrib/testbed/testbed" ) @@ -34,16 +38,55 @@ func BenchmarkLogsExporter(b *testing.B) { } } +type mockHost struct { + component.Host + ext map[component.ID]component.Component +} + +func (nh *mockHost) GetExtensions() map[component.ID]component.Component { + return nh.ext +} + func benchmarkLogs(b *testing.B, batchSize int) { var generatedCount, observedCount atomic.Uint64 + storage := filestorage.NewFactory() + storageCfg := storage.CreateDefaultConfig().(*filestorage.Config) + storageCfg.Directory = b.TempDir() + componentID := component.NewIDWithName(storage.Type(), "elasticsearch") + fileExtension, err := storage.CreateExtension(context.Background(), + extension.CreateSettings{ + ID: componentID, + TelemetrySettings: componenttest.NewNopTelemetrySettings(), + BuildInfo: component.NewDefaultBuildInfo(), + }, + storageCfg) + require.NoError(b, err) + + host := &mockHost{ + ext: map[component.ID]component.Component{ + componentID: fileExtension, + }, + } + + require.NoError(b, fileExtension.Start(context.Background(), host)) + defer fileExtension.Shutdown(context.Background()) + receiver := newElasticsearchDataReceiver(b) factory := elasticsearchexporter.NewFactory() cfg := factory.CreateDefaultConfig().(*elasticsearchexporter.Config) + //cfg.QueueSettings.Enabled = true + //cfg.QueueSettings.NumConsumers = 100 + //cfg.QueueSettings.QueueSize = 100000 + cfg.PersistentQueueConfig.Enabled = true + cfg.PersistentQueueConfig.NumConsumers = 100 + cfg.PersistentQueueConfig.QueueSize = 100000 + cfg.PersistentQueueConfig.StorageID = &componentID cfg.Endpoints = []string{receiver.endpoint} - cfg.Flush.Interval = 10 * time.Millisecond - cfg.NumWorkers = 1 + cfg.Flush.Interval = 100 * time.Millisecond + cfg.Flush.Bytes = 125 * 300 + cfg.NumWorkers = 4 exporter, err := factory.CreateLogsExporter( context.Background(), @@ -51,6 +94,7 @@ func benchmarkLogs(b *testing.B, batchSize int) { cfg, ) require.NoError(b, err) + exporter.Start(context.Background(), host) provider := testbed.NewPerfTestDataProvider(testbed.LoadOptions{ItemsPerBatch: batchSize}) provider.SetLoadGeneratorCounters(&generatedCount) @@ -75,6 +119,13 @@ func benchmarkLogs(b *testing.B, batchSize int) { b.StartTimer() require.NoError(b, exporter.ConsumeLogs(ctx, logs)) } + // FIXME: persistent queue doesn't drain on shutdown + for { + if observedCount.Load() >= generatedCount.Load() { + break + } + time.Sleep(10 * time.Millisecond) + } require.NoError(b, exporter.Shutdown(ctx)) - require.Equal(b, generatedCount.Load(), observedCount.Load(), "failed to send all logs to backend") + require.Equal(b, int64(generatedCount.Load()), int64(observedCount.Load()), "failed to send all logs to backend") } From 33c4bfe44380f46bd06e5703701e147bd9c42c37 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Fri, 17 May 2024 14:58:43 +0100 Subject: [PATCH 030/117] Bench with ecs, worker=1 --- .../integrationtest/exporter_bench_test.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index fa83954d62d4..74ab5131ac60 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -76,17 +76,15 @@ func benchmarkLogs(b *testing.B, batchSize int) { factory := elasticsearchexporter.NewFactory() cfg := factory.CreateDefaultConfig().(*elasticsearchexporter.Config) - //cfg.QueueSettings.Enabled = true - //cfg.QueueSettings.NumConsumers = 100 - //cfg.QueueSettings.QueueSize = 100000 + cfg.Mapping.Mode = "ecs" cfg.PersistentQueueConfig.Enabled = true - cfg.PersistentQueueConfig.NumConsumers = 100 + cfg.PersistentQueueConfig.NumConsumers = 200 cfg.PersistentQueueConfig.QueueSize = 100000 cfg.PersistentQueueConfig.StorageID = &componentID cfg.Endpoints = []string{receiver.endpoint} cfg.Flush.Interval = 100 * time.Millisecond cfg.Flush.Bytes = 125 * 300 - cfg.NumWorkers = 4 + cfg.NumWorkers = 1 exporter, err := factory.CreateLogsExporter( context.Background(), From 6bcb9998619fd7e2fdbef7e1bf09ceb66d7ae86f Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Fri, 17 May 2024 15:19:49 +0100 Subject: [PATCH 031/117] Rename documents -> min_documents --- exporter/elasticsearchexporter/README.md | 4 ++-- exporter/elasticsearchexporter/config.go | 6 +++--- exporter/elasticsearchexporter/config_test.go | 18 +++++++++--------- exporter/elasticsearchexporter/factory.go | 12 ++++++------ .../config-use-deprecated-index_option.yaml | 4 ++-- .../elasticsearchexporter/testdata/config.yaml | 4 ++-- 6 files changed, 24 insertions(+), 24 deletions(-) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index 625bcd718497..600b91bad361 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -51,8 +51,8 @@ This exporter supports sending OpenTelemetry logs and traces to [Elasticsearch]( - `date_format`(default=`%Y.%m.%d`): Time format (based on strftime) to generate the second part of the Index name. - `pipeline` (optional): Optional [Ingest pipeline](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html) ID used for processing documents published by the exporter. - `flush`: Event bulk indexer buffer flush settings - - `bytes` (default=5000000): Write buffer flush size limit. WARNING: This configuration is ignored. Use `flush.documents` instead. - - `documents` (default=125): Minimum number of documents in the buffer to trigger a flush immediately. + - `bytes` (default=5000000): Write buffer flush size limit. WARNING: This configuration is ignored. Use `flush.min_documents` instead. + - `min_documents` (default=125): Minimum number of documents in the buffer to trigger a flush immediately. - `interval` (default=30s): Max age of a document in the buffer. A flush will happen regardless of the size of buffer. - `retry`: Elasticsearch bulk request retry settings - `enabled` (default=true): Enable/Disable request retry on error. Failed requests are retried with exponential backoff. diff --git a/exporter/elasticsearchexporter/config.go b/exporter/elasticsearchexporter/config.go index d77a50a04bd2..112000387432 100644 --- a/exporter/elasticsearchexporter/config.go +++ b/exporter/elasticsearchexporter/config.go @@ -131,11 +131,11 @@ type DiscoverySettings struct { // all events already serialized into the send-buffer. type FlushSettings struct { // Bytes sets the send buffer flushing limit. - // WARNING: This configuration is ignored. Use `flush.documents` instead. + // WARNING: This configuration is ignored. Use `flush.min_documents` instead. Bytes int `mapstructure:"bytes"` - // Documents configures the minimum number of documents in the send buffer to trigger a flush. - Documents int `mapstructure:"documents"` + // MinDocuments configures the minimum number of documents in the send buffer to trigger a flush. + MinDocuments int `mapstructure:"min_documents"` // Interval configures the max age of a document in the send buffer. Interval time.Duration `mapstructure:"interval"` diff --git a/exporter/elasticsearchexporter/config_test.go b/exporter/elasticsearchexporter/config_test.go index 582be93dbb45..c1d310be8289 100644 --- a/exporter/elasticsearchexporter/config_test.go +++ b/exporter/elasticsearchexporter/config_test.go @@ -57,9 +57,9 @@ func TestLoad_DeprecatedIndexConfigOption(t *testing.T) { OnStart: true, }, Flush: FlushSettings{ - Bytes: 0, - Documents: 100, - Interval: 5 * time.Second, + Bytes: 0, + MinDocuments: 100, + Interval: 5 * time.Second, }, Retry: RetrySettings{ Enabled: true, @@ -143,9 +143,9 @@ func TestLoadConfig(t *testing.T) { OnStart: true, }, Flush: FlushSettings{ - Bytes: 0, - Documents: 100, - Interval: 5 * time.Second, + Bytes: 0, + MinDocuments: 100, + Interval: 5 * time.Second, }, Retry: RetrySettings{ Enabled: true, @@ -198,9 +198,9 @@ func TestLoadConfig(t *testing.T) { OnStart: true, }, Flush: FlushSettings{ - Bytes: 0, - Documents: 100, - Interval: 5 * time.Second, + Bytes: 0, + MinDocuments: 100, + Interval: 5 * time.Second, }, Retry: RetrySettings{ Enabled: true, diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index b50aaa809b7c..5b6b770a198b 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -67,9 +67,9 @@ func createDefaultConfig() component.Config { }, }, Flush: FlushSettings{ - Bytes: 0, - Documents: 125, - Interval: 30 * time.Second, + Bytes: 0, + MinDocuments: 125, + Interval: 30 * time.Second, }, Mapping: MappingsSettings{ Mode: "none", @@ -98,7 +98,7 @@ func createLogsRequestExporter( } if cf.Flush.Bytes != 0 { - set.Logger.Warn("flush.bytes option is ignored. Use flush.documents instead.") + set.Logger.Warn("flush.bytes option is ignored. Use flush.min_documents instead.") } setDefaultUserAgentHeader(cf, set.BuildInfo) @@ -157,7 +157,7 @@ func createTracesRequestExporter(ctx context.Context, cf := cfg.(*Config) if cf.Flush.Bytes != 0 { - set.Logger.Warn("flush.bytes option is ignored. Use flush.documents instead.") + set.Logger.Warn("flush.bytes option is ignored. Use flush.min_documents instead.") } setDefaultUserAgentHeader(cf, set.BuildInfo) @@ -223,7 +223,7 @@ func getBatcherConfig(cf *Config) exporterbatcher.Config { batcherCfg := exporterbatcher.NewDefaultConfig() batcherCfg.Enabled = true batcherCfg.FlushTimeout = cf.Flush.Interval - batcherCfg.MinSizeItems = cf.Flush.Documents + batcherCfg.MinSizeItems = cf.Flush.MinDocuments batcherCfg.MaxSizeItems = 0 return batcherCfg } diff --git a/exporter/elasticsearchexporter/testdata/config-use-deprecated-index_option.yaml b/exporter/elasticsearchexporter/testdata/config-use-deprecated-index_option.yaml index 47b3ad14cb3e..44d30ec96a20 100644 --- a/exporter/elasticsearchexporter/testdata/config-use-deprecated-index_option.yaml +++ b/exporter/elasticsearchexporter/testdata/config-use-deprecated-index_option.yaml @@ -14,7 +14,7 @@ elasticsearch/trace: discover: on_start: true flush: - documents: 100 + min_documents: 100 interval: 5s retry: max_requests: 5 @@ -34,7 +34,7 @@ elasticsearch/log: discover: on_start: true flush: - documents: 100 + min_documents: 100 interval: 5s retry: max_requests: 5 diff --git a/exporter/elasticsearchexporter/testdata/config.yaml b/exporter/elasticsearchexporter/testdata/config.yaml index 672b5efa33f7..8a9bae3bab70 100644 --- a/exporter/elasticsearchexporter/testdata/config.yaml +++ b/exporter/elasticsearchexporter/testdata/config.yaml @@ -16,7 +16,7 @@ elasticsearch/trace: discover: on_start: true flush: - documents: 100 + min_documents: 100 interval: 5s retry: max_requests: 5 @@ -39,7 +39,7 @@ elasticsearch/log: discover: on_start: true flush: - documents: 100 + min_documents: 100 interval: 5s retry: max_requests: 5 From 8aa720b099cba3b6aa2884d766a7eb97ba037c9a Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Fri, 17 May 2024 15:24:14 +0100 Subject: [PATCH 032/117] Add changelog --- .../elasticsearchexporter_batchsender.yaml | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .chloggen/elasticsearchexporter_batchsender.yaml diff --git a/.chloggen/elasticsearchexporter_batchsender.yaml b/.chloggen/elasticsearchexporter_batchsender.yaml new file mode 100644 index 000000000000..b38a7b018a74 --- /dev/null +++ b/.chloggen/elasticsearchexporter_batchsender.yaml @@ -0,0 +1,31 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: breaking + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: elasticsearchexporter + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Improve reliability when used with persistent queue. Remove support for option `flush.bytes`. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [32377] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + Move buffering from bulk indexer to batchsender to improve reliability. + With this change, there should be no event loss when used with persistent queue in the event of a collector crash. + Option `flush.bytes` is now ignored. + Use the new `flush.min_documents` option to control the minimum number of documents to trigger a flush. + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] From 160de516050f9b8870edd151802a1e762b636cb2 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Fri, 17 May 2024 15:26:01 +0100 Subject: [PATCH 033/117] Clarify flush.interval --- exporter/elasticsearchexporter/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index 600b91bad361..a36740a36eba 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -53,7 +53,7 @@ This exporter supports sending OpenTelemetry logs and traces to [Elasticsearch]( - `flush`: Event bulk indexer buffer flush settings - `bytes` (default=5000000): Write buffer flush size limit. WARNING: This configuration is ignored. Use `flush.min_documents` instead. - `min_documents` (default=125): Minimum number of documents in the buffer to trigger a flush immediately. - - `interval` (default=30s): Max age of a document in the buffer. A flush will happen regardless of the size of buffer. + - `interval` (default=30s): Max age of a document in the buffer. A flush will happen regardless of the size of content in buffer. - `retry`: Elasticsearch bulk request retry settings - `enabled` (default=true): Enable/Disable request retry on error. Failed requests are retried with exponential backoff. - `max_requests` (default=3): Number of HTTP request retries. From 686e8ae9af594138c6485febea78e9d659afd172 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Fri, 17 May 2024 15:36:41 +0100 Subject: [PATCH 034/117] Refactor and fix TODOs --- exporter/elasticsearchexporter/factory.go | 94 +++++++++++------------ 1 file changed, 43 insertions(+), 51 deletions(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 5b6b770a198b..5be783913922 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -84,6 +84,45 @@ func createDefaultConfig() component.Config { } } +func makeMarshalUnmarshalRequestFuncs(bulkIndexer *bulkIndexerPool) ( + func(exporterhelper.Request) ([]byte, error), + func([]byte) (exporterhelper.Request, error), +) { + marshalRequest := func(req exporterhelper.Request) ([]byte, error) { + // FIXME: use a better and faster serialization + b, err := json.Marshal(*req.(*request)) + return b, err + } + unmarshalRequest := func(b []byte) (exporterhelper.Request, error) { + // FIXME: possibly handle back-compat or forward-compat in case of residue in persistent queue on upgrades / downgrades + var req request + err := json.Unmarshal(b, &req) + req.bulkIndexer = bulkIndexer + return &req, err + } + return marshalRequest, unmarshalRequest +} + +func makeBatchFuncs(bulkIndexer *bulkIndexerPool) ( + func(ctx context.Context, r1, r2 exporterhelper.Request) (exporterhelper.Request, error), + func(ctx context.Context, conf exporterbatcher.MaxSizeConfig, optReq, req exporterhelper.Request) ([]exporterhelper.Request, error), +) { + batchMergeFunc := func(ctx context.Context, r1, r2 exporterhelper.Request) (exporterhelper.Request, error) { + rr1 := r1.(*request) + rr2 := r2.(*request) + req := newRequest(bulkIndexer) + req.Items = append(rr1.Items, rr2.Items...) + return req, nil + } + + batchMergeSplitFunc := func(ctx context.Context, conf exporterbatcher.MaxSizeConfig, optReq, req exporterhelper.Request) ([]exporterhelper.Request, error) { + // TODO: implement merge split func once max_documents is supported in addition to min_documents + panic("not implemented") + return nil, nil + } + return batchMergeFunc, batchMergeSplitFunc +} + // createLogsRequestExporter creates a new request exporter for logs. // // Logs are directly indexed into Elasticsearch. @@ -108,32 +147,8 @@ func createLogsRequestExporter( return nil, fmt.Errorf("cannot configure Elasticsearch logsExporter: %w", err) } - batchMergeFunc := func(ctx context.Context, r1, r2 exporterhelper.Request) (exporterhelper.Request, error) { - rr1 := r1.(*request) - rr2 := r2.(*request) - req := newRequest(logsExporter.bulkIndexer) - req.Items = append(rr1.Items, rr2.Items...) - return req, nil - } - - batchMergeSplitFunc := func(ctx context.Context, conf exporterbatcher.MaxSizeConfig, optReq, req exporterhelper.Request) ([]exporterhelper.Request, error) { - // FIXME: implement merge split func - panic("not implemented") - return nil, nil - } - - marshalRequest := func(req exporterhelper.Request) ([]byte, error) { - b, err := json.Marshal(*req.(*request)) - return b, err - } - - unmarshalRequest := func(b []byte) (exporterhelper.Request, error) { - // FIXME: back-compat unmarshaling in case of residue in persistent queue - var req request - err := json.Unmarshal(b, &req) - req.bulkIndexer = logsExporter.bulkIndexer - return &req, err - } + batchMergeFunc, batchMergeSplitFunc := makeBatchFuncs(logsExporter.bulkIndexer) + marshalRequest, unmarshalRequest := makeMarshalUnmarshalRequestFuncs(logsExporter.bulkIndexer) batcherCfg := getBatcherConfig(cf) return exporterhelper.NewLogsRequestExporter( @@ -167,31 +182,8 @@ func createTracesRequestExporter(ctx context.Context, return nil, fmt.Errorf("cannot configure Elasticsearch tracesExporter: %w", err) } - batchMergeFunc := func(ctx context.Context, r1, r2 exporterhelper.Request) (exporterhelper.Request, error) { - rr1 := r1.(*request) - rr2 := r2.(*request) - req := newRequest(tracesExporter.bulkIndexer) - req.Items = append(rr1.Items, rr2.Items...) - return req, nil - } - - batchMergeSplitFunc := func(ctx context.Context, conf exporterbatcher.MaxSizeConfig, optReq, req exporterhelper.Request) ([]exporterhelper.Request, error) { - // FIXME: implement merge split func - panic("not implemented") - return nil, nil - } - - marshalRequest := func(req exporterhelper.Request) ([]byte, error) { - b, err := json.Marshal(*req.(*request)) - return b, err - } - - unmarshalRequest := func(b []byte) (exporterhelper.Request, error) { - var req request - err := json.Unmarshal(b, &req) - req.bulkIndexer = tracesExporter.bulkIndexer - return &req, err - } + batchMergeFunc, batchMergeSplitFunc := makeBatchFuncs(tracesExporter.bulkIndexer) + marshalRequest, unmarshalRequest := makeMarshalUnmarshalRequestFuncs(tracesExporter.bulkIndexer) batcherCfg := getBatcherConfig(cf) return exporterhelper.NewTracesRequestExporter( From 10a2748aa513d84c7e75b3ea328e802989237cde Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Fri, 17 May 2024 15:38:55 +0100 Subject: [PATCH 035/117] Make hack clear --- .../integrationtest/exporter_bench_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index 74ab5131ac60..fd662c95ef30 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -117,7 +117,8 @@ func benchmarkLogs(b *testing.B, batchSize int) { b.StartTimer() require.NoError(b, exporter.ConsumeLogs(ctx, logs)) } - // FIXME: persistent queue doesn't drain on shutdown + // HACK: persistent queue doesn't drain on shutdown, poll until queue is drained. + // FIXME: is there a better way to do it? for { if observedCount.Load() >= generatedCount.Load() { break From d48f75f61d223b11367e7fbf29bd7679817440fc Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Wed, 29 May 2024 14:03:47 +0100 Subject: [PATCH 036/117] Switch to msgpack and add merge split func --- exporter/elasticsearchexporter/factory.go | 88 ++++++++++-- .../elasticsearchexporter/factory_test.go | 128 ++++++++++++++++++ exporter/elasticsearchexporter/go.mod | 2 + exporter/elasticsearchexporter/go.sum | 4 + .../integrationtest/.gitignore | 1 + .../integrationtest/exporter_bench_test.go | 42 +++++- .../integrationtest/go.mod | 3 + .../integrationtest/go.sum | 4 + 8 files changed, 253 insertions(+), 19 deletions(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 5be783913922..8f7717a51bf7 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -7,7 +7,7 @@ package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry import ( "context" - "encoding/json" + "errors" "fmt" "net/http" "runtime" @@ -20,6 +20,7 @@ import ( "go.opentelemetry.io/collector/exporter/exporterqueue" "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/metadata" + "github.com/vmihailenco/msgpack/v5" ) const ( @@ -89,14 +90,13 @@ func makeMarshalUnmarshalRequestFuncs(bulkIndexer *bulkIndexerPool) ( func([]byte) (exporterhelper.Request, error), ) { marshalRequest := func(req exporterhelper.Request) ([]byte, error) { - // FIXME: use a better and faster serialization - b, err := json.Marshal(*req.(*request)) + b, err := msgpack.Marshal(req.(*request)) return b, err } unmarshalRequest := func(b []byte) (exporterhelper.Request, error) { // FIXME: possibly handle back-compat or forward-compat in case of residue in persistent queue on upgrades / downgrades var req request - err := json.Unmarshal(b, &req) + err := msgpack.Unmarshal(b, &req) req.bulkIndexer = bulkIndexer return &req, err } @@ -108,17 +108,70 @@ func makeBatchFuncs(bulkIndexer *bulkIndexerPool) ( func(ctx context.Context, conf exporterbatcher.MaxSizeConfig, optReq, req exporterhelper.Request) ([]exporterhelper.Request, error), ) { batchMergeFunc := func(ctx context.Context, r1, r2 exporterhelper.Request) (exporterhelper.Request, error) { - rr1 := r1.(*request) - rr2 := r2.(*request) + rr1, ok1 := r1.(*request) + rr2, ok2 := r2.(*request) + if !ok1 || !ok2 { + return nil, errors.New("invalid input type") + } + req := newRequest(bulkIndexer) req.Items = append(rr1.Items, rr2.Items...) return req, nil } - batchMergeSplitFunc := func(ctx context.Context, conf exporterbatcher.MaxSizeConfig, optReq, req exporterhelper.Request) ([]exporterhelper.Request, error) { - // TODO: implement merge split func once max_documents is supported in addition to min_documents - panic("not implemented") - return nil, nil + batchMergeSplitFunc := func( + ctx context.Context, + cfg exporterbatcher.MaxSizeConfig, + r1, r2 exporterhelper.Request, + ) ([]exporterhelper.Request, error) { + rr1, ok1 := r1.(*request) + rr2, ok2 := r2.(*request) + if !ok1 || !ok2 { + return nil, errors.New("invalid input type") + } + + totalItems := r1.ItemsCount() + r2.ItemsCount() + if cfg.MaxSizeItems == 0 || totalItems <= cfg.MaxSizeItems { + r, err := batchMergeFunc(ctx, r1, r2) + if err != nil { + return nil, err + } + return []exporterhelper.Request{r}, nil + } + + if r1.ItemsCount() <= cfg.MaxSizeItems && r2.ItemsCount() <= cfg.MaxSizeItems { + // no point in merging or splitting, return the requests without changes + return []exporterhelper.Request{r1, r2}, nil + } + + // pack each bucket to the max and put anything remaining in last bucket + reqBuckets := 1 + (totalItems-1)/cfg.MaxSizeItems + requests := make([]exporterhelper.Request, 0, reqBuckets) + rCurr := rr1 + var start int + for i := 0; i < reqBuckets; i++ { + req := newRequest(bulkIndexer) + requests = append(requests, req) + end := start + cfg.MaxSizeItems + if end >= rCurr.ItemsCount() { + req.Items = append(req.Items, rCurr.Items[start:]...) + if rCurr == rr2 { + // Both r1 and r2 are processed + break + } + // Switch over to r2 and add it to the bucket as required + start = 0 + end = end - rCurr.ItemsCount() + rCurr = rr2 + if end > rCurr.ItemsCount() { + end = rCurr.ItemsCount() + } + } + req.Items = append(req.Items, rCurr.Items[start:end]...) + start = end + } + + return requests, nil } return batchMergeFunc, batchMergeSplitFunc } @@ -157,11 +210,16 @@ func createLogsRequestExporter( logsExporter.logsDataToRequest, exporterhelper.WithBatcher(batcherCfg, exporterhelper.WithRequestBatchFuncs(batchMergeFunc, batchMergeSplitFunc)), exporterhelper.WithShutdown(logsExporter.Shutdown), - exporterhelper.WithRequestQueue(cf.PersistentQueueConfig.Config, - exporterqueue.NewPersistentQueueFactory[exporterhelper.Request](cf.PersistentQueueConfig.StorageID, exporterqueue.PersistentQueueSettings[exporterhelper.Request]{ - Marshaler: marshalRequest, - Unmarshaler: unmarshalRequest, - })), + exporterhelper.WithRequestQueue( + cf.PersistentQueueConfig.Config, + exporterqueue.NewPersistentQueueFactory[exporterhelper.Request]( + cf.PersistentQueueConfig.StorageID, + exporterqueue.PersistentQueueSettings[exporterhelper.Request]{ + Marshaler: marshalRequest, + Unmarshaler: unmarshalRequest, + }, + ), + ), ) } diff --git a/exporter/elasticsearchexporter/factory_test.go b/exporter/elasticsearchexporter/factory_test.go index 86557365b09b..8ef3e3f161bb 100644 --- a/exporter/elasticsearchexporter/factory_test.go +++ b/exporter/elasticsearchexporter/factory_test.go @@ -5,6 +5,7 @@ package elasticsearchexporter import ( "context" + "fmt" "strings" "testing" @@ -12,7 +13,9 @@ import ( "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/component/componenttest" + "go.opentelemetry.io/collector/exporter/exporterbatcher" "go.opentelemetry.io/collector/exporter/exportertest" + "go.uber.org/zap" ) func TestCreateDefaultConfig(t *testing.T) { @@ -89,3 +92,128 @@ func TestSetDefaultUserAgentHeader(t *testing.T) { assert.Equal(t, cfg.Headers[userAgentHeaderKey], "mock user agent header") }) } + +func TestMakeBatchFuncs(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + logger, err := zap.NewDevelopment() + require.NoError(t, err) + esClient, err := newElasticsearchClient(logger, cfg) + require.NoError(t, err) + bi, err := newBulkIndexer(logger, esClient, cfg) + require.NoError(t, err) + + // creates bulk indexer items based on the count and offset. For a + // given count and offset identical output will be generated. This + // allows testing for merge operations assuming each batch is + // identified by a sequence number. + createRandomBulkIndexerItems := func(count, offset int) []bulkIndexerItem { + ret := make([]bulkIndexerItem, 0, count) + for i := offset; i < offset+count; i++ { + tmp := fmt.Sprintf("test-%d", i) + ret = append(ret, bulkIndexerItem{ + Index: tmp, + Body: []byte(tmp), + }) + } + return ret + } + _, batchMergeSplit := makeBatchFuncs(bi) + + for _, tc := range []struct { + name string + maxItems int + input1 []bulkIndexerItem + input2 []bulkIndexerItem + expected [][]bulkIndexerItem + }{ + { + name: "merge_to_one", + maxItems: 10, + input1: createRandomBulkIndexerItems(7, 0), + input2: createRandomBulkIndexerItems(3, 7), + expected: [][]bulkIndexerItem{createRandomBulkIndexerItems(10, 0)}, + }, + { + name: "no_merge", + maxItems: 10, + input1: createRandomBulkIndexerItems(7, 0), + input2: createRandomBulkIndexerItems(5, 0), + expected: [][]bulkIndexerItem{ + createRandomBulkIndexerItems(7, 0), + createRandomBulkIndexerItems(5, 0), + }, + }, + { + name: "large_first_req", + maxItems: 10, + input1: createRandomBulkIndexerItems(101, 0), + input2: createRandomBulkIndexerItems(5, 101), + expected: [][]bulkIndexerItem{ + createRandomBulkIndexerItems(10, 0), + createRandomBulkIndexerItems(10, 10), + createRandomBulkIndexerItems(10, 20), + createRandomBulkIndexerItems(10, 30), + createRandomBulkIndexerItems(10, 40), + createRandomBulkIndexerItems(10, 50), + createRandomBulkIndexerItems(10, 60), + createRandomBulkIndexerItems(10, 70), + createRandomBulkIndexerItems(10, 80), + createRandomBulkIndexerItems(10, 90), + createRandomBulkIndexerItems(6, 100), + }, + }, + { + name: "large_second_req", + maxItems: 10, + input1: createRandomBulkIndexerItems(5, 0), + input2: createRandomBulkIndexerItems(101, 5), + expected: [][]bulkIndexerItem{ + createRandomBulkIndexerItems(10, 0), + createRandomBulkIndexerItems(10, 10), + createRandomBulkIndexerItems(10, 20), + createRandomBulkIndexerItems(10, 30), + createRandomBulkIndexerItems(10, 40), + createRandomBulkIndexerItems(10, 50), + createRandomBulkIndexerItems(10, 60), + createRandomBulkIndexerItems(10, 70), + createRandomBulkIndexerItems(10, 80), + createRandomBulkIndexerItems(10, 90), + createRandomBulkIndexerItems(6, 100), + }, + }, + { + name: "large_reqs", + maxItems: 10, + input1: createRandomBulkIndexerItems(54, 0), + input2: createRandomBulkIndexerItems(55, 54), + expected: [][]bulkIndexerItem{ + createRandomBulkIndexerItems(10, 0), + createRandomBulkIndexerItems(10, 10), + createRandomBulkIndexerItems(10, 20), + createRandomBulkIndexerItems(10, 30), + createRandomBulkIndexerItems(10, 40), + createRandomBulkIndexerItems(10, 50), + createRandomBulkIndexerItems(10, 60), + createRandomBulkIndexerItems(10, 70), + createRandomBulkIndexerItems(10, 80), + createRandomBulkIndexerItems(10, 90), + createRandomBulkIndexerItems(9, 100), + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + out, err := batchMergeSplit( + context.Background(), + exporterbatcher.MaxSizeConfig{MaxSizeItems: tc.maxItems}, + &request{bulkIndexer: bi, Items: tc.input1}, + &request{bulkIndexer: bi, Items: tc.input2}, + ) + require.NoError(t, err) + require.Equal(t, len(tc.expected), len(out)) + for i := 0; i < len(tc.expected); i++ { + assert.Equal(t, tc.expected[i], out[i].(*request).Items) + } + }) + } +} diff --git a/exporter/elasticsearchexporter/go.mod b/exporter/elasticsearchexporter/go.mod index 22d04d50d9d7..5cfb3d47bcb1 100644 --- a/exporter/elasticsearchexporter/go.mod +++ b/exporter/elasticsearchexporter/go.mod @@ -11,6 +11,7 @@ require ( github.com/open-telemetry/opentelemetry-collector-contrib/internal/common v0.101.0 github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal v0.101.0 github.com/stretchr/testify v1.9.0 + github.com/vmihailenco/msgpack/v5 v5.4.1 go.opentelemetry.io/collector/component v0.101.0 go.opentelemetry.io/collector/config/configopaque v1.8.0 go.opentelemetry.io/collector/config/configtls v0.101.0 @@ -55,6 +56,7 @@ require ( github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.53.0 // indirect github.com/prometheus/procfs v0.13.0 // indirect + github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect go.elastic.co/apm/module/apmzap/v2 v2.6.0 // indirect go.elastic.co/apm/v2 v2.6.0 // indirect go.elastic.co/fastjson v1.3.0 // indirect diff --git a/exporter/elasticsearchexporter/go.sum b/exporter/elasticsearchexporter/go.sum index 50c8e444615a..30bb66c038f4 100644 --- a/exporter/elasticsearchexporter/go.sum +++ b/exporter/elasticsearchexporter/go.sum @@ -96,6 +96,10 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8= +github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok= +github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= +github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.elastic.co/apm/module/apmelasticsearch/v2 v2.6.0 h1:ukMcwyMaDXsS1dRK2qRYXT2AsfwaUy74TOOYCqkWJow= diff --git a/exporter/elasticsearchexporter/integrationtest/.gitignore b/exporter/elasticsearchexporter/integrationtest/.gitignore index 484ab7e5c61d..a664f66810d6 100644 --- a/exporter/elasticsearchexporter/integrationtest/.gitignore +++ b/exporter/elasticsearchexporter/integrationtest/.gitignore @@ -1 +1,2 @@ results/* +*.test diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index 65c8abbbda1a..b94ba5dd3bcf 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -11,15 +11,19 @@ import ( "time" "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/component/componenttest" "go.opentelemetry.io/collector/consumer" "go.opentelemetry.io/collector/exporter" "go.opentelemetry.io/collector/exporter/exportertest" + "go.opentelemetry.io/collector/extension" "go.opentelemetry.io/collector/pdata/plog" "go.opentelemetry.io/collector/pdata/pmetric" "go.opentelemetry.io/collector/pdata/ptrace" "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter" + "github.com/open-telemetry/opentelemetry-collector-contrib/extension/storage/filestorage" + "github.com/open-telemetry/opentelemetry-collector-contrib/extension/storage/storagetest" "github.com/open-telemetry/opentelemetry-collector-contrib/testbed/testbed" ) @@ -50,12 +54,13 @@ func benchmarkLogs(b *testing.B, batchSize int) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - runnerCfg := prepareBenchmark(b, batchSize) + host := storagetest.NewStorageHost() + runnerCfg := prepareBenchmark(b, host, batchSize) exporter, err := runnerCfg.factory.CreateLogsExporter( ctx, exportertest.NewNopCreateSettings(), runnerCfg.esCfg, ) require.NoError(b, err) - require.NoError(b, exporter.Start(ctx, componenttest.NewNopHost())) + require.NoError(b, exporter.Start(ctx, host)) b.ReportAllocs() b.ResetTimer() @@ -77,12 +82,13 @@ func benchmarkTraces(b *testing.B, batchSize int) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - runnerCfg := prepareBenchmark(b, batchSize) + host := storagetest.NewStorageHost() + runnerCfg := prepareBenchmark(b, host, batchSize) exporter, err := runnerCfg.factory.CreateTracesExporter( ctx, exportertest.NewNopCreateSettings(), runnerCfg.esCfg, ) require.NoError(b, err) - require.NoError(b, exporter.Start(ctx, componenttest.NewNopHost())) + require.NoError(b, exporter.Start(ctx, host)) b.ReportAllocs() b.ResetTimer() @@ -110,10 +116,14 @@ type benchRunnerCfg struct { func prepareBenchmark( b *testing.B, + host *storagetest.StorageHost, batchSize int, ) *benchRunnerCfg { b.Helper() + fileExtID, fileExt := getFileStorageExtension(b) + host.WithExtension(fileExtID, fileExt) + cfg := &benchRunnerCfg{} // Benchmarks don't decode the bulk requests to avoid allocations to pollute the results. receiver := newElasticsearchDataReceiver(b, false /* DecodeBulkRequest */) @@ -122,6 +132,11 @@ func prepareBenchmark( cfg.factory = elasticsearchexporter.NewFactory() cfg.esCfg = cfg.factory.CreateDefaultConfig().(*elasticsearchexporter.Config) + cfg.esCfg.Mapping.Mode = "ecs" + cfg.esCfg.PersistentQueueConfig.Enabled = true + cfg.esCfg.PersistentQueueConfig.NumConsumers = 200 + cfg.esCfg.PersistentQueueConfig.QueueSize = 100_000 + cfg.esCfg.PersistentQueueConfig.StorageID = &fileExtID cfg.esCfg.Endpoints = []string{receiver.endpoint} cfg.esCfg.LogsIndex = TestLogsIndex cfg.esCfg.TracesIndex = TestTracesIndex @@ -146,3 +161,22 @@ func prepareBenchmark( return cfg } + +func getFileStorageExtension(b testing.TB) (component.ID, extension.Extension) { + storage := filestorage.NewFactory() + componentID := component.NewIDWithName(storage.Type(), "esexporterbench") + + storageCfg := storage.CreateDefaultConfig().(*filestorage.Config) + storageCfg.Directory = b.TempDir() + fileExt, err := storage.CreateExtension( + context.Background(), + extension.CreateSettings{ + ID: componentID, + TelemetrySettings: componenttest.NewNopTelemetrySettings(), + BuildInfo: component.NewDefaultBuildInfo(), + }, + storageCfg, + ) + require.NoError(b, err) + return componentID, fileExt +} diff --git a/exporter/elasticsearchexporter/integrationtest/go.mod b/exporter/elasticsearchexporter/integrationtest/go.mod index c52d44736028..208a2f6f9b40 100644 --- a/exporter/elasticsearchexporter/integrationtest/go.mod +++ b/exporter/elasticsearchexporter/integrationtest/go.mod @@ -6,6 +6,7 @@ require ( github.com/elastic/go-docappender/v2 v2.1.2 github.com/gorilla/mux v1.8.1 github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter v0.101.0 + github.com/open-telemetry/opentelemetry-collector-contrib/extension/storage v0.101.0 github.com/open-telemetry/opentelemetry-collector-contrib/extension/storage/filestorage v0.101.0 github.com/open-telemetry/opentelemetry-collector-contrib/internal/common v0.101.0 github.com/open-telemetry/opentelemetry-collector-contrib/internal/sharedcomponent v0.101.0 @@ -115,6 +116,8 @@ require ( github.com/tklauser/go-sysconf v0.3.13 // indirect github.com/tklauser/numcpus v0.7.0 // indirect github.com/valyala/fastjson v1.6.4 // indirect + github.com/vmihailenco/msgpack/v5 v5.4.1 // indirect + github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect go.elastic.co/apm/module/apmelasticsearch/v2 v2.6.0 // indirect go.elastic.co/apm/module/apmhttp/v2 v2.6.0 // indirect diff --git a/exporter/elasticsearchexporter/integrationtest/go.sum b/exporter/elasticsearchexporter/integrationtest/go.sum index aa7812e0d362..d59bd0f82d88 100644 --- a/exporter/elasticsearchexporter/integrationtest/go.sum +++ b/exporter/elasticsearchexporter/integrationtest/go.sum @@ -251,6 +251,10 @@ github.com/tklauser/numcpus v0.7.0 h1:yjuerZP127QG9m5Zh/mSO4wqurYil27tHrqwRoRjpr github.com/tklauser/numcpus v0.7.0/go.mod h1:bb6dMVcj8A42tSE7i32fsIUCbQNllK5iDguyOZRUzAY= github.com/valyala/fastjson v1.6.4 h1:uAUNq9Z6ymTgGhcm0UynUAB6tlbakBrz6CQFax3BXVQ= github.com/valyala/fastjson v1.6.4/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY= +github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8= +github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok= +github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= +github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= From 6ecce33a7c547a70b7901d7378767130e4b3b59a Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 30 May 2024 18:17:57 +0100 Subject: [PATCH 037/117] Add FIXME --- exporter/elasticsearchexporter/elasticsearch_bulk.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 061314e07f48..3cfea7ebe442 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -106,7 +106,7 @@ func newElasticsearchClient(logger *zap.Logger, config *Config) (*esClientCurren // configure retry behavior RetryOnStatus: config.Retry.RetryOnStatus, DisableRetry: retryDisabled, - EnableRetryOnTimeout: config.Retry.Enabled, + EnableRetryOnTimeout: config.Retry.Enabled, // FIXME: maybe this needs to be disabled //RetryOnError: retryOnError, // should be used from esclient version 8 onwards MaxRetries: maxRetries, RetryBackoff: createElasticsearchBackoffFunc(&config.Retry), From 8afdc9455cc2f8e046b7fbb7a4eb953c533fbf8e Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 3 Jun 2024 17:59:16 +0100 Subject: [PATCH 038/117] Remove custom request --- exporter/elasticsearchexporter/config.go | 4 +- exporter/elasticsearchexporter/config_test.go | 31 ++-- .../elasticsearch_bulk_test.go | 9 +- exporter/elasticsearchexporter/factory.go | 136 ++---------------- .../elasticsearchexporter/factory_test.go | 128 ----------------- .../integrationtest/exporter_bench_test.go | 8 +- .../elasticsearchexporter/logs_exporter.go | 11 +- .../elasticsearchexporter/trace_exporter.go | 13 +- 8 files changed, 52 insertions(+), 288 deletions(-) diff --git a/exporter/elasticsearchexporter/config.go b/exporter/elasticsearchexporter/config.go index 112000387432..1a289ea0b2d8 100644 --- a/exporter/elasticsearchexporter/config.go +++ b/exporter/elasticsearchexporter/config.go @@ -12,12 +12,12 @@ import ( "go.opentelemetry.io/collector/config/configopaque" "go.opentelemetry.io/collector/config/configtls" - "go.opentelemetry.io/collector/exporter/exporterqueue" + "go.opentelemetry.io/collector/exporter/exporterhelper" ) // Config defines configuration for Elastic exporter. type Config struct { - PersistentQueueConfig exporterqueue.PersistentQueueConfig `mapstructure:"sending_queue"` + exporterhelper.QueueSettings `mapstructure:"sending_queue"` // Endpoints holds the Elasticsearch URLs the exporter should send events to. // // This setting is required if CloudID is not set and if the diff --git a/exporter/elasticsearchexporter/config_test.go b/exporter/elasticsearchexporter/config_test.go index c1d310be8289..e600d6e899ff 100644 --- a/exporter/elasticsearchexporter/config_test.go +++ b/exporter/elasticsearchexporter/config_test.go @@ -13,6 +13,7 @@ import ( "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/confmap/confmaptest" + "go.opentelemetry.io/collector/exporter/exporterhelper" "go.opentelemetry.io/collector/exporter/exporterqueue" "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/metadata" @@ -29,12 +30,10 @@ func TestLoad_DeprecatedIndexConfigOption(t *testing.T) { require.NoError(t, component.UnmarshalConfig(sub, cfg)) assert.Equal(t, cfg, &Config{ - PersistentQueueConfig: exporterqueue.PersistentQueueConfig{ - Config: exporterqueue.Config{ - Enabled: false, - NumConsumers: exporterqueue.NewDefaultConfig().NumConsumers, - QueueSize: exporterqueue.NewDefaultConfig().QueueSize, - }, + QueueSettings: exporterhelper.QueueSettings{ + Enabled: false, + NumConsumers: exporterqueue.NewDefaultConfig().NumConsumers, + QueueSize: exporterqueue.NewDefaultConfig().QueueSize, }, Endpoints: []string{"http://localhost:9200"}, CloudID: "TRNMxjXlNJEt", @@ -115,12 +114,10 @@ func TestLoadConfig(t *testing.T) { id: component.NewIDWithName(metadata.Type, "trace"), configFile: "config.yaml", expected: &Config{ - PersistentQueueConfig: exporterqueue.PersistentQueueConfig{ - Config: exporterqueue.Config{ - Enabled: false, - NumConsumers: exporterqueue.NewDefaultConfig().NumConsumers, - QueueSize: exporterqueue.NewDefaultConfig().QueueSize, - }, + QueueSettings: exporterhelper.QueueSettings{ + Enabled: false, + NumConsumers: exporterqueue.NewDefaultConfig().NumConsumers, + QueueSize: exporterqueue.NewDefaultConfig().QueueSize, }, Endpoints: []string{"https://elastic.example.com:9200"}, CloudID: "TRNMxjXlNJEt", @@ -170,12 +167,10 @@ func TestLoadConfig(t *testing.T) { id: component.NewIDWithName(metadata.Type, "log"), configFile: "config.yaml", expected: &Config{ - PersistentQueueConfig: exporterqueue.PersistentQueueConfig{ - Config: exporterqueue.Config{ - Enabled: true, - NumConsumers: exporterqueue.NewDefaultConfig().NumConsumers, - QueueSize: exporterqueue.NewDefaultConfig().QueueSize, - }, + QueueSettings: exporterhelper.QueueSettings{ + Enabled: true, + NumConsumers: exporterqueue.NewDefaultConfig().NumConsumers, + QueueSize: exporterqueue.NewDefaultConfig().QueueSize, }, Endpoints: []string{"http://localhost:9200"}, CloudID: "TRNMxjXlNJEt", diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk_test.go b/exporter/elasticsearchexporter/elasticsearch_bulk_test.go index 7e2937b1ce30..4b021ca2a800 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk_test.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk_test.go @@ -6,14 +6,15 @@ package elasticsearchexporter import ( "context" "errors" - "github.com/elastic/go-elasticsearch/v7" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.uber.org/zap" "io" "net/http" "strings" "testing" + + "github.com/elastic/go-elasticsearch/v7" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/zap" ) var defaultRoundTripFunc = func(*http.Request) (*http.Response, error) { diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 8f7717a51bf7..7dfa3197ee02 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -7,7 +7,6 @@ package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry import ( "context" - "errors" "fmt" "net/http" "runtime" @@ -20,7 +19,6 @@ import ( "go.opentelemetry.io/collector/exporter/exporterqueue" "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/metadata" - "github.com/vmihailenco/msgpack/v5" ) const ( @@ -47,7 +45,11 @@ func createDefaultConfig() component.Config { } qs.Enabled = false // FIXME: how does batching without queuing look like? return &Config{ - PersistentQueueConfig: qs, + QueueSettings: exporterhelper.QueueSettings{ + Enabled: false, + NumConsumers: exporterhelper.NewDefaultQueueSettings().NumConsumers, + QueueSize: exporterhelper.NewDefaultQueueSettings().QueueSize, + }, ClientConfig: ClientConfig{ Timeout: 90 * time.Second, }, @@ -85,97 +87,6 @@ func createDefaultConfig() component.Config { } } -func makeMarshalUnmarshalRequestFuncs(bulkIndexer *bulkIndexerPool) ( - func(exporterhelper.Request) ([]byte, error), - func([]byte) (exporterhelper.Request, error), -) { - marshalRequest := func(req exporterhelper.Request) ([]byte, error) { - b, err := msgpack.Marshal(req.(*request)) - return b, err - } - unmarshalRequest := func(b []byte) (exporterhelper.Request, error) { - // FIXME: possibly handle back-compat or forward-compat in case of residue in persistent queue on upgrades / downgrades - var req request - err := msgpack.Unmarshal(b, &req) - req.bulkIndexer = bulkIndexer - return &req, err - } - return marshalRequest, unmarshalRequest -} - -func makeBatchFuncs(bulkIndexer *bulkIndexerPool) ( - func(ctx context.Context, r1, r2 exporterhelper.Request) (exporterhelper.Request, error), - func(ctx context.Context, conf exporterbatcher.MaxSizeConfig, optReq, req exporterhelper.Request) ([]exporterhelper.Request, error), -) { - batchMergeFunc := func(ctx context.Context, r1, r2 exporterhelper.Request) (exporterhelper.Request, error) { - rr1, ok1 := r1.(*request) - rr2, ok2 := r2.(*request) - if !ok1 || !ok2 { - return nil, errors.New("invalid input type") - } - - req := newRequest(bulkIndexer) - req.Items = append(rr1.Items, rr2.Items...) - return req, nil - } - - batchMergeSplitFunc := func( - ctx context.Context, - cfg exporterbatcher.MaxSizeConfig, - r1, r2 exporterhelper.Request, - ) ([]exporterhelper.Request, error) { - rr1, ok1 := r1.(*request) - rr2, ok2 := r2.(*request) - if !ok1 || !ok2 { - return nil, errors.New("invalid input type") - } - - totalItems := r1.ItemsCount() + r2.ItemsCount() - if cfg.MaxSizeItems == 0 || totalItems <= cfg.MaxSizeItems { - r, err := batchMergeFunc(ctx, r1, r2) - if err != nil { - return nil, err - } - return []exporterhelper.Request{r}, nil - } - - if r1.ItemsCount() <= cfg.MaxSizeItems && r2.ItemsCount() <= cfg.MaxSizeItems { - // no point in merging or splitting, return the requests without changes - return []exporterhelper.Request{r1, r2}, nil - } - - // pack each bucket to the max and put anything remaining in last bucket - reqBuckets := 1 + (totalItems-1)/cfg.MaxSizeItems - requests := make([]exporterhelper.Request, 0, reqBuckets) - rCurr := rr1 - var start int - for i := 0; i < reqBuckets; i++ { - req := newRequest(bulkIndexer) - requests = append(requests, req) - end := start + cfg.MaxSizeItems - if end >= rCurr.ItemsCount() { - req.Items = append(req.Items, rCurr.Items[start:]...) - if rCurr == rr2 { - // Both r1 and r2 are processed - break - } - // Switch over to r2 and add it to the bucket as required - start = 0 - end = end - rCurr.ItemsCount() - rCurr = rr2 - if end > rCurr.ItemsCount() { - end = rCurr.ItemsCount() - } - } - req.Items = append(req.Items, rCurr.Items[start:end]...) - start = end - } - - return requests, nil - } - return batchMergeFunc, batchMergeSplitFunc -} - // createLogsRequestExporter creates a new request exporter for logs. // // Logs are directly indexed into Elasticsearch. @@ -200,26 +111,15 @@ func createLogsRequestExporter( return nil, fmt.Errorf("cannot configure Elasticsearch logsExporter: %w", err) } - batchMergeFunc, batchMergeSplitFunc := makeBatchFuncs(logsExporter.bulkIndexer) - marshalRequest, unmarshalRequest := makeMarshalUnmarshalRequestFuncs(logsExporter.bulkIndexer) - batcherCfg := getBatcherConfig(cf) - return exporterhelper.NewLogsRequestExporter( + return exporterhelper.NewLogsExporter( ctx, set, - logsExporter.logsDataToRequest, - exporterhelper.WithBatcher(batcherCfg, exporterhelper.WithRequestBatchFuncs(batchMergeFunc, batchMergeSplitFunc)), + cfg, + logsExporter.pushLogsData, + exporterhelper.WithBatcher(batcherCfg), exporterhelper.WithShutdown(logsExporter.Shutdown), - exporterhelper.WithRequestQueue( - cf.PersistentQueueConfig.Config, - exporterqueue.NewPersistentQueueFactory[exporterhelper.Request]( - cf.PersistentQueueConfig.StorageID, - exporterqueue.PersistentQueueSettings[exporterhelper.Request]{ - Marshaler: marshalRequest, - Unmarshaler: unmarshalRequest, - }, - ), - ), + exporterhelper.WithQueue(cf.QueueSettings), ) } @@ -240,21 +140,15 @@ func createTracesRequestExporter(ctx context.Context, return nil, fmt.Errorf("cannot configure Elasticsearch tracesExporter: %w", err) } - batchMergeFunc, batchMergeSplitFunc := makeBatchFuncs(tracesExporter.bulkIndexer) - marshalRequest, unmarshalRequest := makeMarshalUnmarshalRequestFuncs(tracesExporter.bulkIndexer) - batcherCfg := getBatcherConfig(cf) - return exporterhelper.NewTracesRequestExporter( + return exporterhelper.NewTracesExporter( ctx, set, - tracesExporter.traceDataToRequest, - exporterhelper.WithBatcher(batcherCfg, exporterhelper.WithRequestBatchFuncs(batchMergeFunc, batchMergeSplitFunc)), + cfg, + tracesExporter.pushTraceData, + exporterhelper.WithBatcher(batcherCfg), exporterhelper.WithShutdown(tracesExporter.Shutdown), - exporterhelper.WithRequestQueue(cf.PersistentQueueConfig.Config, - exporterqueue.NewPersistentQueueFactory[exporterhelper.Request](cf.PersistentQueueConfig.StorageID, exporterqueue.PersistentQueueSettings[exporterhelper.Request]{ - Marshaler: marshalRequest, - Unmarshaler: unmarshalRequest, - })), + exporterhelper.WithQueue(cf.QueueSettings), ) } diff --git a/exporter/elasticsearchexporter/factory_test.go b/exporter/elasticsearchexporter/factory_test.go index 8ef3e3f161bb..86557365b09b 100644 --- a/exporter/elasticsearchexporter/factory_test.go +++ b/exporter/elasticsearchexporter/factory_test.go @@ -5,7 +5,6 @@ package elasticsearchexporter import ( "context" - "fmt" "strings" "testing" @@ -13,9 +12,7 @@ import ( "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/component/componenttest" - "go.opentelemetry.io/collector/exporter/exporterbatcher" "go.opentelemetry.io/collector/exporter/exportertest" - "go.uber.org/zap" ) func TestCreateDefaultConfig(t *testing.T) { @@ -92,128 +89,3 @@ func TestSetDefaultUserAgentHeader(t *testing.T) { assert.Equal(t, cfg.Headers[userAgentHeaderKey], "mock user agent header") }) } - -func TestMakeBatchFuncs(t *testing.T) { - factory := NewFactory() - cfg := factory.CreateDefaultConfig().(*Config) - logger, err := zap.NewDevelopment() - require.NoError(t, err) - esClient, err := newElasticsearchClient(logger, cfg) - require.NoError(t, err) - bi, err := newBulkIndexer(logger, esClient, cfg) - require.NoError(t, err) - - // creates bulk indexer items based on the count and offset. For a - // given count and offset identical output will be generated. This - // allows testing for merge operations assuming each batch is - // identified by a sequence number. - createRandomBulkIndexerItems := func(count, offset int) []bulkIndexerItem { - ret := make([]bulkIndexerItem, 0, count) - for i := offset; i < offset+count; i++ { - tmp := fmt.Sprintf("test-%d", i) - ret = append(ret, bulkIndexerItem{ - Index: tmp, - Body: []byte(tmp), - }) - } - return ret - } - _, batchMergeSplit := makeBatchFuncs(bi) - - for _, tc := range []struct { - name string - maxItems int - input1 []bulkIndexerItem - input2 []bulkIndexerItem - expected [][]bulkIndexerItem - }{ - { - name: "merge_to_one", - maxItems: 10, - input1: createRandomBulkIndexerItems(7, 0), - input2: createRandomBulkIndexerItems(3, 7), - expected: [][]bulkIndexerItem{createRandomBulkIndexerItems(10, 0)}, - }, - { - name: "no_merge", - maxItems: 10, - input1: createRandomBulkIndexerItems(7, 0), - input2: createRandomBulkIndexerItems(5, 0), - expected: [][]bulkIndexerItem{ - createRandomBulkIndexerItems(7, 0), - createRandomBulkIndexerItems(5, 0), - }, - }, - { - name: "large_first_req", - maxItems: 10, - input1: createRandomBulkIndexerItems(101, 0), - input2: createRandomBulkIndexerItems(5, 101), - expected: [][]bulkIndexerItem{ - createRandomBulkIndexerItems(10, 0), - createRandomBulkIndexerItems(10, 10), - createRandomBulkIndexerItems(10, 20), - createRandomBulkIndexerItems(10, 30), - createRandomBulkIndexerItems(10, 40), - createRandomBulkIndexerItems(10, 50), - createRandomBulkIndexerItems(10, 60), - createRandomBulkIndexerItems(10, 70), - createRandomBulkIndexerItems(10, 80), - createRandomBulkIndexerItems(10, 90), - createRandomBulkIndexerItems(6, 100), - }, - }, - { - name: "large_second_req", - maxItems: 10, - input1: createRandomBulkIndexerItems(5, 0), - input2: createRandomBulkIndexerItems(101, 5), - expected: [][]bulkIndexerItem{ - createRandomBulkIndexerItems(10, 0), - createRandomBulkIndexerItems(10, 10), - createRandomBulkIndexerItems(10, 20), - createRandomBulkIndexerItems(10, 30), - createRandomBulkIndexerItems(10, 40), - createRandomBulkIndexerItems(10, 50), - createRandomBulkIndexerItems(10, 60), - createRandomBulkIndexerItems(10, 70), - createRandomBulkIndexerItems(10, 80), - createRandomBulkIndexerItems(10, 90), - createRandomBulkIndexerItems(6, 100), - }, - }, - { - name: "large_reqs", - maxItems: 10, - input1: createRandomBulkIndexerItems(54, 0), - input2: createRandomBulkIndexerItems(55, 54), - expected: [][]bulkIndexerItem{ - createRandomBulkIndexerItems(10, 0), - createRandomBulkIndexerItems(10, 10), - createRandomBulkIndexerItems(10, 20), - createRandomBulkIndexerItems(10, 30), - createRandomBulkIndexerItems(10, 40), - createRandomBulkIndexerItems(10, 50), - createRandomBulkIndexerItems(10, 60), - createRandomBulkIndexerItems(10, 70), - createRandomBulkIndexerItems(10, 80), - createRandomBulkIndexerItems(10, 90), - createRandomBulkIndexerItems(9, 100), - }, - }, - } { - t.Run(tc.name, func(t *testing.T) { - out, err := batchMergeSplit( - context.Background(), - exporterbatcher.MaxSizeConfig{MaxSizeItems: tc.maxItems}, - &request{bulkIndexer: bi, Items: tc.input1}, - &request{bulkIndexer: bi, Items: tc.input2}, - ) - require.NoError(t, err) - require.Equal(t, len(tc.expected), len(out)) - for i := 0; i < len(tc.expected); i++ { - assert.Equal(t, tc.expected[i], out[i].(*request).Items) - } - }) - } -} diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index b94ba5dd3bcf..0b8ab1b514c9 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -133,10 +133,10 @@ func prepareBenchmark( cfg.factory = elasticsearchexporter.NewFactory() cfg.esCfg = cfg.factory.CreateDefaultConfig().(*elasticsearchexporter.Config) cfg.esCfg.Mapping.Mode = "ecs" - cfg.esCfg.PersistentQueueConfig.Enabled = true - cfg.esCfg.PersistentQueueConfig.NumConsumers = 200 - cfg.esCfg.PersistentQueueConfig.QueueSize = 100_000 - cfg.esCfg.PersistentQueueConfig.StorageID = &fileExtID + cfg.esCfg.QueueSettings.Enabled = true + cfg.esCfg.QueueSettings.NumConsumers = 200 + cfg.esCfg.QueueSettings.QueueSize = 100_000 + cfg.esCfg.QueueSettings.StorageID = &fileExtID cfg.esCfg.Endpoints = []string{receiver.endpoint} cfg.esCfg.LogsIndex = TestLogsIndex cfg.esCfg.TracesIndex = TestTracesIndex diff --git a/exporter/elasticsearchexporter/logs_exporter.go b/exporter/elasticsearchexporter/logs_exporter.go index 72981298318c..fd8c8628d7e3 100644 --- a/exporter/elasticsearchexporter/logs_exporter.go +++ b/exporter/elasticsearchexporter/logs_exporter.go @@ -11,7 +11,6 @@ import ( "fmt" "time" - "go.opentelemetry.io/collector/exporter/exporterhelper" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/plog" "go.uber.org/zap" @@ -71,7 +70,7 @@ func (e *elasticsearchLogsExporter) Shutdown(ctx context.Context) error { return e.bulkIndexer.Close(ctx) } -func (e *elasticsearchLogsExporter) logsDataToRequest(ctx context.Context, ld plog.Logs) (exporterhelper.Request, error) { +func (e *elasticsearchLogsExporter) pushLogsData(ctx context.Context, ld plog.Logs) error { req := newRequest(e.bulkIndexer) var errs []error rls := ld.ResourceLogs() @@ -86,7 +85,7 @@ func (e *elasticsearchLogsExporter) logsDataToRequest(ctx context.Context, ld pl item, err := e.logRecordToItem(ctx, resource, logs.At(k), scope) if err != nil { if cerr := ctx.Err(); cerr != nil { - return req, cerr + return cerr } errs = append(errs, err) @@ -96,8 +95,10 @@ func (e *elasticsearchLogsExporter) logsDataToRequest(ctx context.Context, ld pl } } } - - return req, errors.Join(errs...) + if err := req.Export(ctx); err != nil { + errs = append(errs, err) + } + return errors.Join(errs...) } func (e *elasticsearchLogsExporter) logRecordToItem(ctx context.Context, resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) (bulkIndexerItem, error) { diff --git a/exporter/elasticsearchexporter/trace_exporter.go b/exporter/elasticsearchexporter/trace_exporter.go index 35a8fe9dc7e9..4e6db666d48f 100644 --- a/exporter/elasticsearchexporter/trace_exporter.go +++ b/exporter/elasticsearchexporter/trace_exporter.go @@ -11,7 +11,6 @@ import ( "fmt" "time" - "go.opentelemetry.io/collector/exporter/exporterhelper" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/ptrace" "go.uber.org/zap" @@ -66,10 +65,10 @@ func (e *elasticsearchTracesExporter) Shutdown(ctx context.Context) error { return e.bulkIndexer.Close(ctx) } -func (e *elasticsearchTracesExporter) traceDataToRequest( +func (e *elasticsearchTracesExporter) pushTraceData( ctx context.Context, td ptrace.Traces, -) (exporterhelper.Request, error) { +) error { req := newRequest(e.bulkIndexer) var errs []error resourceSpans := td.ResourceSpans() @@ -86,7 +85,7 @@ func (e *elasticsearchTracesExporter) traceDataToRequest( item, err := e.traceRecordToItem(ctx, resource, span, scope) if err != nil { if cerr := ctx.Err(); cerr != nil { - return req, cerr + return cerr } errs = append(errs, err) continue @@ -95,8 +94,10 @@ func (e *elasticsearchTracesExporter) traceDataToRequest( } } } - - return req, errors.Join(errs...) + if err := req.Export(ctx); err != nil { + errs = append(errs, err) + } + return errors.Join(errs...) } func (e *elasticsearchTracesExporter) traceRecordToItem(ctx context.Context, resource pcommon.Resource, span ptrace.Span, scope pcommon.InstrumentationScope) (bulkIndexerItem, error) { From 04495ff675efd3fe845c92ccfb618286b83035e7 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 3 Jun 2024 18:18:00 +0100 Subject: [PATCH 039/117] Add FIXME for retry_sender --- exporter/elasticsearchexporter/elasticsearch_bulk.go | 2 +- exporter/elasticsearchexporter/integrationtest/exporter_test.go | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 3cfea7ebe442..4533b3ad28a2 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -278,7 +278,7 @@ func (w *worker) addBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem } else if w.indexer.Items() == 0 { return nil } - backoff := w.retryBackoff(attempts + 1) + backoff := w.retryBackoff(attempts + 1) // FIXME: use retry_sender timer := time.NewTimer(backoff) defer timer.Stop() select { diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_test.go index ed5bf6a542a5..ee8a08c2ead5 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_test.go @@ -31,6 +31,7 @@ func TestExporter(t *testing.T) { {name: "collector_restarts", restartCollector: true}, // Test is failing because exporter does not shut down when in-flight requests block indefinitely. // See https://github.com/open-telemetry/opentelemetry-collector/issues/10166 + // FIXME: re-enable test // {name: "collector_restart_with_es_intermittent_failure", mockESFailure: true, restartCollector: true}, } { t.Run(fmt.Sprintf("%s/%s", eventType, tc.name), func(t *testing.T) { From 7c4b5b20d38236425fa882688171465558a5048e Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 4 Jun 2024 13:30:16 +0100 Subject: [PATCH 040/117] Clean up --- exporter/elasticsearchexporter/exporter.go | 5 +++-- .../elasticsearchexporter/exporter_test.go | 2 +- exporter/elasticsearchexporter/factory.go | 19 +++++++------------ 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/exporter/elasticsearchexporter/exporter.go b/exporter/elasticsearchexporter/exporter.go index 0aa32e91595a..070d19a82509 100644 --- a/exporter/elasticsearchexporter/exporter.go +++ b/exporter/elasticsearchexporter/exporter.go @@ -73,8 +73,9 @@ func (e *elasticsearchExporter) pushLogsData(ctx context.Context, ld plog.Logs) resource := rl.Resource() ills := rl.ScopeLogs() for j := 0; j < ills.Len(); j++ { - scope := ills.At(j).Scope() - logs := ills.At(j).LogRecords() + ill := ills.At(j) + scope := ill.Scope() + logs := ill.LogRecords() for k := 0; k < logs.Len(); k++ { item, err := e.logRecordToItem(ctx, resource, logs.At(k), scope) if err != nil { diff --git a/exporter/elasticsearchexporter/exporter_test.go b/exporter/elasticsearchexporter/exporter_test.go index bed67c74a971..9eec54952791 100644 --- a/exporter/elasticsearchexporter/exporter_test.go +++ b/exporter/elasticsearchexporter/exporter_test.go @@ -7,7 +7,6 @@ import ( "context" "encoding/json" "fmt" - "go.opentelemetry.io/collector/component/componenttest" "net/http" "runtime" "strings" @@ -18,6 +17,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" "go.opentelemetry.io/collector/exporter" "go.opentelemetry.io/collector/exporter/exportertest" "go.opentelemetry.io/collector/pdata/plog" diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 133b5d176a98..6df748c27bc4 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -16,7 +16,6 @@ import ( "go.opentelemetry.io/collector/exporter" "go.opentelemetry.io/collector/exporter/exporterbatcher" "go.opentelemetry.io/collector/exporter/exporterhelper" - "go.opentelemetry.io/collector/exporter/exporterqueue" "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/metadata" ) @@ -33,20 +32,15 @@ func NewFactory() exporter.Factory { return exporter.NewFactory( metadata.Type, createDefaultConfig, - exporter.WithLogs(createLogsRequestExporter, metadata.LogsStability), - exporter.WithTraces(createTracesRequestExporter, metadata.TracesStability), + exporter.WithLogs(createLogsExporter, metadata.LogsStability), + exporter.WithTraces(createTracesExporter, metadata.TracesStability), ) } func createDefaultConfig() component.Config { - qs := exporterqueue.PersistentQueueConfig{ - Config: exporterqueue.NewDefaultConfig(), - StorageID: nil, - } - qs.Enabled = false // FIXME: how does batching without queuing look like? return &Config{ QueueSettings: exporterhelper.QueueSettings{ - Enabled: false, + Enabled: false, // FIXME: how does batching without queuing look like? NumConsumers: exporterhelper.NewDefaultQueueSettings().NumConsumers, QueueSize: exporterhelper.NewDefaultQueueSettings().QueueSize, }, @@ -87,10 +81,10 @@ func createDefaultConfig() component.Config { } } -// createLogsRequestExporter creates a new request exporter for logs. +// createLogsExporter creates a new exporter for logs. // // Logs are directly indexed into Elasticsearch. -func createLogsRequestExporter( +func createLogsExporter( ctx context.Context, set exporter.CreateSettings, cfg component.Config, @@ -126,7 +120,8 @@ func createLogsRequestExporter( ) } -func createTracesRequestExporter(ctx context.Context, +// createTracesExporter creates a new exporter for traces. +func createTracesExporter(ctx context.Context, set exporter.CreateSettings, cfg component.Config) (exporter.Traces, error) { From 91ea93d32ef7085f6b985e6f2a823df9f60918e9 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 4 Jun 2024 15:07:15 +0100 Subject: [PATCH 041/117] Add max_documents --- .chloggen/elasticsearchexporter_batchsender.yaml | 1 + exporter/elasticsearchexporter/README.md | 1 + exporter/elasticsearchexporter/config.go | 4 ++++ exporter/elasticsearchexporter/config_test.go | 2 ++ exporter/elasticsearchexporter/factory.go | 3 ++- exporter/elasticsearchexporter/testdata/config.yaml | 2 ++ 6 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.chloggen/elasticsearchexporter_batchsender.yaml b/.chloggen/elasticsearchexporter_batchsender.yaml index b38a7b018a74..4af4f85e2813 100644 --- a/.chloggen/elasticsearchexporter_batchsender.yaml +++ b/.chloggen/elasticsearchexporter_batchsender.yaml @@ -20,6 +20,7 @@ subtext: | With this change, there should be no event loss when used with persistent queue in the event of a collector crash. Option `flush.bytes` is now ignored. Use the new `flush.min_documents` option to control the minimum number of documents to trigger a flush. + Introduce option `flush.max_documents` to control the maximum number of documents in a request. # If your change doesn't affect end users or the exported elements of any package, # you should instead start your pull request title with [chore] or use the "Skip Changelog" label. diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index a36740a36eba..f9e939e989e9 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -53,6 +53,7 @@ This exporter supports sending OpenTelemetry logs and traces to [Elasticsearch]( - `flush`: Event bulk indexer buffer flush settings - `bytes` (default=5000000): Write buffer flush size limit. WARNING: This configuration is ignored. Use `flush.min_documents` instead. - `min_documents` (default=125): Minimum number of documents in the buffer to trigger a flush immediately. + - `max_documents` (default=0): Maximum number of documents in a request. In practice, the number of documents in a request may exceed MaxDocuments if the request cannot be split into smaller ones. - `interval` (default=30s): Max age of a document in the buffer. A flush will happen regardless of the size of content in buffer. - `retry`: Elasticsearch bulk request retry settings - `enabled` (default=true): Enable/Disable request retry on error. Failed requests are retried with exponential backoff. diff --git a/exporter/elasticsearchexporter/config.go b/exporter/elasticsearchexporter/config.go index 5532894b3498..4c515cf39460 100644 --- a/exporter/elasticsearchexporter/config.go +++ b/exporter/elasticsearchexporter/config.go @@ -139,6 +139,10 @@ type FlushSettings struct { // MinDocuments configures the minimum number of documents in the send buffer to trigger a flush. MinDocuments int `mapstructure:"min_documents"` + // MaxDocuments configures the maximum number of documents in a request. + // In practice, the number of documents in a request may exceed MaxDocuments if the request cannot be split into smaller ones. + MaxDocuments int `mapstructure:"max_documents"` + // Interval configures the max age of a document in the send buffer. Interval time.Duration `mapstructure:"interval"` } diff --git a/exporter/elasticsearchexporter/config_test.go b/exporter/elasticsearchexporter/config_test.go index 95eebe39d206..bc54fc8c6967 100644 --- a/exporter/elasticsearchexporter/config_test.go +++ b/exporter/elasticsearchexporter/config_test.go @@ -74,6 +74,7 @@ func TestConfig(t *testing.T) { Flush: FlushSettings{ Bytes: 0, MinDocuments: 100, + MaxDocuments: 200, Interval: 5 * time.Second, }, Retry: RetrySettings{ @@ -126,6 +127,7 @@ func TestConfig(t *testing.T) { Flush: FlushSettings{ Bytes: 0, MinDocuments: 100, + MaxDocuments: 200, Interval: 5 * time.Second, }, Retry: RetrySettings{ diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 6df748c27bc4..5f4e272277c6 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -66,6 +66,7 @@ func createDefaultConfig() component.Config { Flush: FlushSettings{ Bytes: 0, MinDocuments: 125, + MaxDocuments: 0, Interval: 30 * time.Second, }, Mapping: MappingsSettings{ @@ -166,6 +167,6 @@ func getBatcherConfig(cf *Config) exporterbatcher.Config { batcherCfg.Enabled = true batcherCfg.FlushTimeout = cf.Flush.Interval batcherCfg.MinSizeItems = cf.Flush.MinDocuments - batcherCfg.MaxSizeItems = 0 + batcherCfg.MaxSizeItems = cf.Flush.MaxDocuments return batcherCfg } diff --git a/exporter/elasticsearchexporter/testdata/config.yaml b/exporter/elasticsearchexporter/testdata/config.yaml index 70a6ab3c95d2..0e54b81b19a2 100644 --- a/exporter/elasticsearchexporter/testdata/config.yaml +++ b/exporter/elasticsearchexporter/testdata/config.yaml @@ -16,6 +16,7 @@ elasticsearch/trace: on_start: true flush: min_documents: 100 + max_documents: 200 interval: 5s retry: max_requests: 5 @@ -38,6 +39,7 @@ elasticsearch/log: on_start: true flush: min_documents: 100 + max_documents: 200 interval: 5s retry: max_requests: 5 From ec37579ac1629205df1c2c6b208a2784740b8ccb Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 4 Jun 2024 15:11:19 +0100 Subject: [PATCH 042/117] Use errors.New --- exporter/elasticsearchexporter/elasticsearch_bulk.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index b1715c713ec6..b29995902c70 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -6,6 +6,7 @@ package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry import ( "context" "crypto/tls" + "errors" "fmt" "net/http" "runtime" @@ -224,7 +225,7 @@ func (p *bulkIndexerPool) AddBatchAndFlush(ctx context.Context, batch []esBulkIn case <-ctx.Done(): return ctx.Err() case <-p.closeCh: - return fmt.Errorf("bulk indexer is closed") + return errors.New("bulk indexer is closed") default: } return worker.addBatchAndFlush(ctx, batch) @@ -283,7 +284,7 @@ func (w *worker) addBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem case <-ctx.Done(): return ctx.Err() case <-w.closeCh: - return fmt.Errorf("bulk indexer is closed") + return errors.New("bulk indexer is closed") case <-timer.C: } } From f59660249cc7b8a656138f1e9154d1ebec372615 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 4 Jun 2024 16:27:11 +0100 Subject: [PATCH 043/117] Respect context in Export --- exporter/elasticsearchexporter/elasticsearch_bulk.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index b29995902c70..b3721a9893d7 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -272,7 +272,7 @@ func (w *worker) addBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem } } for attempts := 0; ; attempts++ { - if err := w.flush(); err != nil { + if err := w.flush(ctx); err != nil { return err } else if w.indexer.Items() == 0 { return nil @@ -290,8 +290,8 @@ func (w *worker) addBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem } } -func (w *worker) flush() error { - ctx, cancel := context.WithTimeout(context.Background(), w.flushTimeout) +func (w *worker) flush(ctx context.Context) error { + ctx, cancel := context.WithTimeout(ctx, w.flushTimeout) defer cancel() stat, err := w.indexer.Flush(ctx) w.stats.docsIndexed.Add(stat.Indexed) From 32029c2de38f07e340c6bcebf8a762110ed94fb5 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 4 Jun 2024 16:47:17 +0100 Subject: [PATCH 044/117] Change FIXME to TODO --- exporter/elasticsearchexporter/elasticsearch_bulk.go | 2 +- exporter/elasticsearchexporter/integrationtest/exporter_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index b3721a9893d7..1db2dcf60734 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -277,7 +277,7 @@ func (w *worker) addBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem } else if w.indexer.Items() == 0 { return nil } - backoff := w.retryBackoff(attempts + 1) // FIXME: use retry_sender + backoff := w.retryBackoff(attempts + 1) // TODO: use exporterhelper retry_sender timer := time.NewTimer(backoff) defer timer.Stop() select { diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_test.go index ee8a08c2ead5..1cf6e6a6c1f6 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_test.go @@ -31,7 +31,7 @@ func TestExporter(t *testing.T) { {name: "collector_restarts", restartCollector: true}, // Test is failing because exporter does not shut down when in-flight requests block indefinitely. // See https://github.com/open-telemetry/opentelemetry-collector/issues/10166 - // FIXME: re-enable test + // TODO: re-enable test // {name: "collector_restart_with_es_intermittent_failure", mockESFailure: true, restartCollector: true}, } { t.Run(fmt.Sprintf("%s/%s", eventType, tc.name), func(t *testing.T) { From babf6ad33250952ed68070618483359531a2f3f4 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 4 Jun 2024 16:57:13 +0100 Subject: [PATCH 045/117] Explicitly deprecate flush.bytes --- exporter/elasticsearchexporter/README.md | 2 +- exporter/elasticsearchexporter/config.go | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index f9e939e989e9..1805c64e7d8a 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -51,7 +51,7 @@ This exporter supports sending OpenTelemetry logs and traces to [Elasticsearch]( - `date_format`(default=`%Y.%m.%d`): Time format (based on strftime) to generate the second part of the Index name. - `pipeline` (optional): Optional [Ingest pipeline](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html) ID used for processing documents published by the exporter. - `flush`: Event bulk indexer buffer flush settings - - `bytes` (default=5000000): Write buffer flush size limit. WARNING: This configuration is ignored. Use `flush.min_documents` instead. + - `bytes` (DEPRECATED, use `flush.min_documents` instead): Write buffer flush size limit. WARNING: This configuration is ignored. - `min_documents` (default=125): Minimum number of documents in the buffer to trigger a flush immediately. - `max_documents` (default=0): Maximum number of documents in a request. In practice, the number of documents in a request may exceed MaxDocuments if the request cannot be split into smaller ones. - `interval` (default=30s): Max age of a document in the buffer. A flush will happen regardless of the size of content in buffer. diff --git a/exporter/elasticsearchexporter/config.go b/exporter/elasticsearchexporter/config.go index 4c515cf39460..96a03dd15a4a 100644 --- a/exporter/elasticsearchexporter/config.go +++ b/exporter/elasticsearchexporter/config.go @@ -133,7 +133,8 @@ type DiscoverySettings struct { // all events already serialized into the send-buffer. type FlushSettings struct { // Bytes sets the send buffer flushing limit. - // WARNING: This configuration is ignored. Use `flush.min_documents` instead. + // + // Deprecated: This configuration is ignored. Use `flush.min_documents` instead. Bytes int `mapstructure:"bytes"` // MinDocuments configures the minimum number of documents in the send buffer to trigger a flush. From 454540a5d27a9ae611090bef0fefe1579f637810 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 4 Jun 2024 17:19:19 +0100 Subject: [PATCH 046/117] Comment for EnableRetryOnTimeout --- exporter/elasticsearchexporter/elasticsearch_bulk.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 1db2dcf60734..9bc164f53817 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -105,7 +105,7 @@ func newElasticsearchClient(logger *zap.Logger, config *Config) (*esClientCurren // configure retry behavior RetryOnStatus: config.Retry.RetryOnStatus, DisableRetry: retryDisabled, - EnableRetryOnTimeout: config.Retry.Enabled, // FIXME: maybe this needs to be disabled + EnableRetryOnTimeout: config.Retry.Enabled, // for timeouts in underlying transport layers //RetryOnError: retryOnError, // should be used from esclient version 8 onwards MaxRetries: maxRetries, RetryBackoff: createElasticsearchBackoffFunc(&config.Retry), From 14e1804b545db65ce6d083063f3ed271aef07bed Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 4 Jun 2024 17:58:16 +0100 Subject: [PATCH 047/117] Disable timeout_sender --- exporter/elasticsearchexporter/factory.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 5f4e272277c6..28740606ad36 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -118,6 +118,7 @@ func createLogsExporter( exporterhelper.WithBatcher(batcherCfg), exporterhelper.WithShutdown(exporter.Shutdown), exporterhelper.WithQueue(cf.QueueSettings), + exporterhelper.WithTimeout(getTimeoutConfig()), ) } @@ -148,6 +149,7 @@ func createTracesExporter(ctx context.Context, exporterhelper.WithBatcher(batcherCfg), exporterhelper.WithShutdown(exporter.Shutdown), exporterhelper.WithQueue(cf.QueueSettings), + exporterhelper.WithTimeout(getTimeoutConfig()), ) } @@ -170,3 +172,9 @@ func getBatcherConfig(cf *Config) exporterbatcher.Config { batcherCfg.MaxSizeItems = cf.Flush.MaxDocuments return batcherCfg } + +func getTimeoutConfig() exporterhelper.TimeoutSettings { + return exporterhelper.TimeoutSettings{ + Timeout: time.Duration(0), // effectively disable timeout_sender because timeout is enforced in bulk indexer + } +} From 7dfea92f119dc9bd79bc9d465e26777bf8025e5c Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 4 Jun 2024 19:19:16 +0100 Subject: [PATCH 048/117] Update sending queue default consumers --- exporter/elasticsearchexporter/README.md | 6 +++--- exporter/elasticsearchexporter/factory.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index 1805c64e7d8a..a55ce17801a0 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -80,9 +80,9 @@ This exporter supports sending OpenTelemetry logs and traces to [Elasticsearch]( - `dedot` (default=true): When enabled attributes with `.` will be split into proper json objects. - `sending_queue` - - `enabled` (default = false) - - `num_consumers` (default = 10): Number of consumers that dequeue batches; ignored if `enabled` is `false` - - `queue_size` (default = 1000): Maximum number of batches kept in queue; ignored if `enabled` is `false`; + - `enabled` (default=false) + - `num_consumers` (default=100): Number of consumers that dequeue batches. A combined batch cannot contain more batches than the number of consumers. + - `queue_size` (default=1000): Maximum number of batches kept in queue. ### HTTP settings - `read_buffer_size` (default=0): Read buffer size of HTTP client. diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 28740606ad36..cbf856f8699e 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -41,7 +41,7 @@ func createDefaultConfig() component.Config { return &Config{ QueueSettings: exporterhelper.QueueSettings{ Enabled: false, // FIXME: how does batching without queuing look like? - NumConsumers: exporterhelper.NewDefaultQueueSettings().NumConsumers, + NumConsumers: 100, QueueSize: exporterhelper.NewDefaultQueueSettings().QueueSize, }, ClientConfig: ClientConfig{ From 622d6e8a08d179e5f427eb544a5166869ea45468 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 4 Jun 2024 19:25:18 +0100 Subject: [PATCH 049/117] Add persistent queue to readme --- exporter/elasticsearchexporter/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index a55ce17801a0..37c726e725e0 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -83,6 +83,7 @@ This exporter supports sending OpenTelemetry logs and traces to [Elasticsearch]( - `enabled` (default=false) - `num_consumers` (default=100): Number of consumers that dequeue batches. A combined batch cannot contain more batches than the number of consumers. - `queue_size` (default=1000): Maximum number of batches kept in queue. + - `storage` (optional): If not empty, it enables the persistent storage and uses the component specified as a storage extension for the persistent queue. When persistent queue is used, there should be no event loss even on collector crashes. ### HTTP settings - `read_buffer_size` (default=0): Read buffer size of HTTP client. From 36bcd9c4332403266a51e1ce7edb5a64ff9a4c02 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 5 Jun 2024 12:17:55 +0100 Subject: [PATCH 050/117] Fix num_consumers config test --- exporter/elasticsearchexporter/config_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/config_test.go b/exporter/elasticsearchexporter/config_test.go index bc54fc8c6967..e8dbbba2017f 100644 --- a/exporter/elasticsearchexporter/config_test.go +++ b/exporter/elasticsearchexporter/config_test.go @@ -49,7 +49,7 @@ func TestConfig(t *testing.T) { expected: &Config{ QueueSettings: exporterhelper.QueueSettings{ Enabled: false, - NumConsumers: exporterqueue.NewDefaultConfig().NumConsumers, + NumConsumers: 100, QueueSize: exporterqueue.NewDefaultConfig().QueueSize, }, Endpoints: []string{"https://elastic.example.com:9200"}, @@ -102,7 +102,7 @@ func TestConfig(t *testing.T) { expected: &Config{ QueueSettings: exporterhelper.QueueSettings{ Enabled: true, - NumConsumers: exporterqueue.NewDefaultConfig().NumConsumers, + NumConsumers: 100, QueueSize: exporterqueue.NewDefaultConfig().QueueSize, }, Endpoints: []string{"http://localhost:9200"}, From 18e9ce412912e9c8fc0d75b18c534af2afcd2591 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 6 Jun 2024 19:34:47 +0100 Subject: [PATCH 051/117] Remove use of num_workers --- .../elasticsearch_bulk.go | 103 ++++++++---------- 1 file changed, 44 insertions(+), 59 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 9bc164f53817..6b35d246f44c 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -9,7 +9,6 @@ import ( "errors" "fmt" "net/http" - "runtime" "sync" "sync/atomic" "time" @@ -25,7 +24,7 @@ import ( type esClientCurrent = elasticsearch7.Client type esConfigCurrent = elasticsearch7.Config -type esBulkIndexerCurrent = bulkIndexerPool +type esBulkIndexerCurrent = bulkIndexerManager type esBulkIndexerItem = docappender.BulkIndexerItem @@ -160,43 +159,13 @@ func createElasticsearchBackoffFunc(config *RetrySettings) func(int) time.Durati } func newBulkIndexer(logger *zap.Logger, client *elasticsearch7.Client, config *Config) (*esBulkIndexerCurrent, error) { - numWorkers := config.NumWorkers - if numWorkers == 0 { - numWorkers = runtime.NumCPU() - } - - var maxDocRetry int - if config.Retry.Enabled { - // max_requests includes initial attempt - // See https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32344 - maxDocRetry = config.Retry.MaxRequests - 1 - } - - pool := &bulkIndexerPool{ - closeCh: make(chan struct{}), - stats: bulkIndexerStats{}, - available: make(chan *worker, numWorkers), - } - - for i := 0; i < numWorkers; i++ { - bi, err := docappender.NewBulkIndexer(docappender.BulkIndexerConfig{ - Client: client, - MaxDocumentRetries: maxDocRetry, - Pipeline: config.Pipeline, - RetryOnDocumentStatus: config.Retry.RetryOnStatus, - }) - if err != nil { - return nil, err - } - w := worker{ - indexer: bi, - closeCh: pool.closeCh, - flushTimeout: config.Timeout, - retryBackoff: createElasticsearchBackoffFunc(&config.Retry), - logger: logger, - stats: &pool.stats, - } - pool.available <- &w + pool := &bulkIndexerManager{ + closeCh: make(chan struct{}), + stats: bulkIndexerStats{}, + esClient: client, + logger: logger, + config: config, + wg: &sync.WaitGroup{}, } return pool, nil } @@ -205,41 +174,57 @@ type bulkIndexerStats struct { docsIndexed atomic.Int64 } -type bulkIndexerPool struct { - closeCh chan struct{} - stats bulkIndexerStats - available chan *worker +type bulkIndexerManager struct { + closeCh chan struct{} + stats bulkIndexerStats + esClient *elasticsearch7.Client + logger *zap.Logger + config *Config + wg *sync.WaitGroup } -func (p *bulkIndexerPool) AddBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem) error { +func (p *bulkIndexerManager) AddBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem) error { select { case <-ctx.Done(): return ctx.Err() case <-p.closeCh: return fmt.Errorf("bulk indexer is closed") - case worker := <-p.available: - defer func() { - p.available <- worker - }() - select { - case <-ctx.Done(): - return ctx.Err() - case <-p.closeCh: - return errors.New("bulk indexer is closed") - default: + default: + var maxDocRetry int + if p.config.Retry.Enabled { + // max_requests includes initial attempt + // See https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32344 + maxDocRetry = p.config.Retry.MaxRequests - 1 + } + bi, err := docappender.NewBulkIndexer(docappender.BulkIndexerConfig{ + Client: p.esClient, + MaxDocumentRetries: maxDocRetry, + Pipeline: p.config.Pipeline, + RetryOnDocumentStatus: p.config.Retry.RetryOnStatus, + }) + if err != nil { + return err } - return worker.addBatchAndFlush(ctx, batch) + p.wg.Add(1) + defer p.wg.Done() + w := worker{ + indexer: bi, + closeCh: p.closeCh, + flushTimeout: p.config.Timeout, + retryBackoff: createElasticsearchBackoffFunc(&p.config.Retry), + logger: p.logger, + stats: &p.stats, + } + return w.addBatchAndFlush(ctx, batch) } } // Close closes the closeCh channel and wait for workers to finish. -func (p *bulkIndexerPool) Close(ctx context.Context) error { +func (p *bulkIndexerManager) Close(ctx context.Context) error { close(p.closeCh) doneCh := make(chan struct{}) go func() { - for i := 0; i < cap(p.available); i++ { - <-p.available - } + p.wg.Wait() close(doneCh) }() select { From 179430b91b47dbe85d2647519f904f0f7ac0cb66 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Fri, 7 Jun 2024 11:19:33 +0100 Subject: [PATCH 052/117] Deprecate flush.*, use batcher.* --- exporter/elasticsearchexporter/README.md | 8 +++-- exporter/elasticsearchexporter/config.go | 26 ++++++++------- exporter/elasticsearchexporter/config_test.go | 21 ++++++------ .../elasticsearchexporter/exporter_test.go | 6 ++-- exporter/elasticsearchexporter/factory.go | 33 ++++++++----------- .../integrationtest/datareceiver.go | 4 +-- .../integrationtest/exporter_bench_test.go | 2 +- .../testdata/config.yaml | 16 ++++----- 8 files changed, 57 insertions(+), 59 deletions(-) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index 37c726e725e0..953affddd895 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -50,11 +50,13 @@ This exporter supports sending OpenTelemetry logs and traces to [Elasticsearch]( - `prefix_separator`(default=`-`): Set a separator between logstash_prefix and date. - `date_format`(default=`%Y.%m.%d`): Time format (based on strftime) to generate the second part of the Index name. - `pipeline` (optional): Optional [Ingest pipeline](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html) ID used for processing documents published by the exporter. +- `batcher`: Exporter batching settings + - `min_size_items` (default=125): Minimum number of documents in the buffer to trigger a flush immediately. + - `max_size_items` (default=0): Maximum number of documents in a request. In practice, the number of documents in a request may exceed MaxDocuments if the request cannot be split into smaller ones. + - `flush_timeout` (default=30s): Max age of a document in the buffer. A flush will happen regardless of the size of content in buffer. - `flush`: Event bulk indexer buffer flush settings - `bytes` (DEPRECATED, use `flush.min_documents` instead): Write buffer flush size limit. WARNING: This configuration is ignored. - - `min_documents` (default=125): Minimum number of documents in the buffer to trigger a flush immediately. - - `max_documents` (default=0): Maximum number of documents in a request. In practice, the number of documents in a request may exceed MaxDocuments if the request cannot be split into smaller ones. - - `interval` (default=30s): Max age of a document in the buffer. A flush will happen regardless of the size of content in buffer. + - `interval` (DEPRECATED, use `batcher.flush_timeout` instead): Max age of a document in the buffer. A flush will happen regardless of the size of content in buffer. WARNING: This configuration is ignored. - `retry`: Elasticsearch bulk request retry settings - `enabled` (default=true): Enable/Disable request retry on error. Failed requests are retried with exponential backoff. - `max_requests` (default=3): Number of HTTP request retries. diff --git a/exporter/elasticsearchexporter/config.go b/exporter/elasticsearchexporter/config.go index 96a03dd15a4a..d168ca93f0e9 100644 --- a/exporter/elasticsearchexporter/config.go +++ b/exporter/elasticsearchexporter/config.go @@ -14,12 +14,18 @@ import ( "go.opentelemetry.io/collector/config/configopaque" "go.opentelemetry.io/collector/config/configtls" + "go.opentelemetry.io/collector/exporter/exporterbatcher" "go.opentelemetry.io/collector/exporter/exporterhelper" ) // Config defines configuration for Elastic exporter. type Config struct { exporterhelper.QueueSettings `mapstructure:"sending_queue"` + + // Experimental: This configuration is at the early stage of development and may change without backward compatibility + // until https://github.com/open-telemetry/opentelemetry-collector/issues/8122 is resolved. + BatcherConfig exporterbatcher.Config `mapstructure:"batcher"` + // Endpoints holds the Elasticsearch URLs the exporter should send events to. // // This setting is required if CloudID is not set and if the @@ -58,10 +64,11 @@ type Config struct { // https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html Pipeline string `mapstructure:"pipeline"` - ClientConfig `mapstructure:",squash"` - Discovery DiscoverySettings `mapstructure:"discover"` - Retry RetrySettings `mapstructure:"retry"` - Flush FlushSettings `mapstructure:"flush"` + ClientConfig `mapstructure:",squash"` + Discovery DiscoverySettings `mapstructure:"discover"` + Retry RetrySettings `mapstructure:"retry"` + Flush FlushSettings `mapstructure:"flush"` // Deprecated: use `batcher` instead. + Mapping MappingsSettings `mapstructure:"mapping"` LogstashFormat LogstashFormatSettings `mapstructure:"logstash_format"` } @@ -134,17 +141,12 @@ type DiscoverySettings struct { type FlushSettings struct { // Bytes sets the send buffer flushing limit. // - // Deprecated: This configuration is ignored. Use `flush.min_documents` instead. + // Deprecated: This configuration is ignored. Use `batcher.min_size_items` instead. Bytes int `mapstructure:"bytes"` - // MinDocuments configures the minimum number of documents in the send buffer to trigger a flush. - MinDocuments int `mapstructure:"min_documents"` - - // MaxDocuments configures the maximum number of documents in a request. - // In practice, the number of documents in a request may exceed MaxDocuments if the request cannot be split into smaller ones. - MaxDocuments int `mapstructure:"max_documents"` - // Interval configures the max age of a document in the send buffer. + // + // Deprecated: This configuration is ignored. Use `batcher.flush_timeout` instead. Interval time.Duration `mapstructure:"interval"` } diff --git a/exporter/elasticsearchexporter/config_test.go b/exporter/elasticsearchexporter/config_test.go index e8dbbba2017f..ab12c3cd3852 100644 --- a/exporter/elasticsearchexporter/config_test.go +++ b/exporter/elasticsearchexporter/config_test.go @@ -13,6 +13,7 @@ import ( "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/confmap/confmaptest" + "go.opentelemetry.io/collector/exporter/exporterbatcher" "go.opentelemetry.io/collector/exporter/exporterhelper" "go.opentelemetry.io/collector/exporter/exporterqueue" @@ -71,11 +72,11 @@ func TestConfig(t *testing.T) { Discovery: DiscoverySettings{ OnStart: true, }, - Flush: FlushSettings{ - Bytes: 0, - MinDocuments: 100, - MaxDocuments: 200, - Interval: 5 * time.Second, + BatcherConfig: exporterbatcher.Config{ + Enabled: true, + FlushTimeout: 5 * time.Second, + MinSizeConfig: exporterbatcher.MinSizeConfig{MinSizeItems: 100}, + MaxSizeConfig: exporterbatcher.MaxSizeConfig{MaxSizeItems: 200}, }, Retry: RetrySettings{ Enabled: true, @@ -124,11 +125,11 @@ func TestConfig(t *testing.T) { Discovery: DiscoverySettings{ OnStart: true, }, - Flush: FlushSettings{ - Bytes: 0, - MinDocuments: 100, - MaxDocuments: 200, - Interval: 5 * time.Second, + BatcherConfig: exporterbatcher.Config{ + Enabled: true, + FlushTimeout: 5 * time.Second, + MinSizeConfig: exporterbatcher.MinSizeConfig{MinSizeItems: 100}, + MaxSizeConfig: exporterbatcher.MaxSizeConfig{MaxSizeItems: 200}, }, Retry: RetrySettings{ Enabled: true, diff --git a/exporter/elasticsearchexporter/exporter_test.go b/exporter/elasticsearchexporter/exporter_test.go index 9eec54952791..1b8f8dc4e835 100644 --- a/exporter/elasticsearchexporter/exporter_test.go +++ b/exporter/elasticsearchexporter/exporter_test.go @@ -437,7 +437,7 @@ func TestExporterLogs(t *testing.T) { }) exporter := newTestLogsExporter(t, server.URL, func(cfg *Config) { - cfg.Flush.Interval = 50 * time.Millisecond + cfg.BatcherConfig.FlushTimeout = 50 * time.Millisecond cfg.Retry.InitialInterval = 1 * time.Millisecond cfg.Retry.MaxInterval = 10 * time.Millisecond }) @@ -598,7 +598,7 @@ func newTestTracesExporter(t *testing.T, url string, fns ...func(*Config)) expor cfg := withDefaultConfig(append([]func(*Config){func(cfg *Config) { cfg.Endpoints = []string{url} cfg.NumWorkers = 1 - cfg.Flush.Interval = 10 * time.Millisecond + cfg.BatcherConfig.FlushTimeout = 10 * time.Millisecond }}, fns...)...) exp, err := f.CreateTracesExporter(context.Background(), exportertest.NewNopCreateSettings(), cfg) require.NoError(t, err) @@ -615,7 +615,7 @@ func newTestLogsExporter(t *testing.T, url string, fns ...func(*Config)) exporte cfg := withDefaultConfig(append([]func(*Config){func(cfg *Config) { cfg.Endpoints = []string{url} cfg.NumWorkers = 1 - cfg.Flush.Interval = 10 * time.Millisecond + cfg.BatcherConfig.FlushTimeout = 10 * time.Millisecond }}, fns...)...) exp, err := f.CreateLogsExporter(context.Background(), exportertest.NewNopCreateSettings(), cfg) require.NoError(t, err) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index cbf856f8699e..d20e8fda6e2e 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -63,11 +63,13 @@ func createDefaultConfig() component.Config { http.StatusGatewayTimeout, }, }, - Flush: FlushSettings{ - Bytes: 0, - MinDocuments: 125, - MaxDocuments: 0, - Interval: 30 * time.Second, + BatcherConfig: exporterbatcher.Config{ + Enabled: true, + FlushTimeout: 30 * time.Second, + MinSizeConfig: exporterbatcher.MinSizeConfig{ + MinSizeItems: 125, + }, + MaxSizeConfig: exporterbatcher.MaxSizeConfig{}, }, Mapping: MappingsSettings{ Mode: "none", @@ -99,7 +101,7 @@ func createLogsExporter( } if cf.Flush.Bytes != 0 { - set.Logger.Warn("flush.bytes option is ignored. Use flush.min_documents instead.") + set.Logger.Warn("flush.bytes option is ignored. Use batcher.min_size_items instead.") } setDefaultUserAgentHeader(cf, set.BuildInfo) @@ -109,13 +111,13 @@ func createLogsExporter( return nil, fmt.Errorf("cannot configure Elasticsearch exporter: %w", err) } - batcherCfg := getBatcherConfig(cf) + cf.BatcherConfig.Enabled = true return exporterhelper.NewLogsExporter( ctx, set, cfg, exporter.pushLogsData, - exporterhelper.WithBatcher(batcherCfg), + exporterhelper.WithBatcher(cf.BatcherConfig), exporterhelper.WithShutdown(exporter.Shutdown), exporterhelper.WithQueue(cf.QueueSettings), exporterhelper.WithTimeout(getTimeoutConfig()), @@ -130,7 +132,7 @@ func createTracesExporter(ctx context.Context, cf := cfg.(*Config) if cf.Flush.Bytes != 0 { - set.Logger.Warn("flush.bytes option is ignored. Use flush.min_documents instead.") + set.Logger.Warn("flush.bytes option is ignored. Use batcher.min_size_items instead.") } setDefaultUserAgentHeader(cf, set.BuildInfo) @@ -140,13 +142,13 @@ func createTracesExporter(ctx context.Context, return nil, fmt.Errorf("cannot configure Elasticsearch exporter: %w", err) } - batcherCfg := getBatcherConfig(cf) + cf.BatcherConfig.Enabled = true return exporterhelper.NewTracesExporter( ctx, set, cfg, exporter.pushTraceData, - exporterhelper.WithBatcher(batcherCfg), + exporterhelper.WithBatcher(cf.BatcherConfig), exporterhelper.WithShutdown(exporter.Shutdown), exporterhelper.WithQueue(cf.QueueSettings), exporterhelper.WithTimeout(getTimeoutConfig()), @@ -164,15 +166,6 @@ func setDefaultUserAgentHeader(cf *Config, info component.BuildInfo) { cf.Headers[userAgentHeaderKey] = fmt.Sprintf("%s/%s (%s/%s)", info.Description, info.Version, runtime.GOOS, runtime.GOARCH) } -func getBatcherConfig(cf *Config) exporterbatcher.Config { - batcherCfg := exporterbatcher.NewDefaultConfig() - batcherCfg.Enabled = true - batcherCfg.FlushTimeout = cf.Flush.Interval - batcherCfg.MinSizeItems = cf.Flush.MinDocuments - batcherCfg.MaxSizeItems = cf.Flush.MaxDocuments - return batcherCfg -} - func getTimeoutConfig() exporterhelper.TimeoutSettings { return exporterhelper.TimeoutSettings{ Timeout: time.Duration(0), // effectively disable timeout_sender because timeout is enforced in bulk indexer diff --git a/exporter/elasticsearchexporter/integrationtest/datareceiver.go b/exporter/elasticsearchexporter/integrationtest/datareceiver.go index 03e745327d61..7e416d80da92 100644 --- a/exporter/elasticsearchexporter/integrationtest/datareceiver.go +++ b/exporter/elasticsearchexporter/integrationtest/datareceiver.go @@ -107,8 +107,8 @@ func (es *esDataReceiver) GenConfigYAMLStr() string { endpoints: [%s] logs_index: %s traces_index: %s - flush: - interval: 1s + batcher: + flush_timeout: 1s sending_queue: enabled: true storage: file_storage/elasticsearchexporter diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index 0b8ab1b514c9..b8599ce038a5 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -140,7 +140,7 @@ func prepareBenchmark( cfg.esCfg.Endpoints = []string{receiver.endpoint} cfg.esCfg.LogsIndex = TestLogsIndex cfg.esCfg.TracesIndex = TestTracesIndex - cfg.esCfg.Flush.Interval = 10 * time.Millisecond + cfg.esCfg.BatcherConfig.FlushTimeout = 10 * time.Millisecond cfg.esCfg.NumWorkers = 1 tc, err := consumer.NewTraces(func(context.Context, ptrace.Traces) error { diff --git a/exporter/elasticsearchexporter/testdata/config.yaml b/exporter/elasticsearchexporter/testdata/config.yaml index 0e54b81b19a2..03e31e6c6aba 100644 --- a/exporter/elasticsearchexporter/testdata/config.yaml +++ b/exporter/elasticsearchexporter/testdata/config.yaml @@ -14,10 +14,10 @@ elasticsearch/trace: api_key: AvFsEiPs== discover: on_start: true - flush: - min_documents: 100 - max_documents: 200 - interval: 5s + batcher: + min_size_items: 100 + max_size_items: 200 + flush_timeout: 5s retry: max_requests: 5 retry_on_status: @@ -37,10 +37,10 @@ elasticsearch/log: api_key: AvFsEiPs== discover: on_start: true - flush: - min_documents: 100 - max_documents: 200 - interval: 5s + batcher: + min_size_items: 100 + max_size_items: 200 + flush_timeout: 5s retry: max_requests: 5 retry_on_status: From 6fc2df9454271f6893ac104f79d5883cfbd2da0a Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Fri, 7 Jun 2024 11:23:11 +0100 Subject: [PATCH 053/117] Remove select in AddBatchAndFlush --- .../elasticsearch_bulk.go | 57 ++++++++----------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 6b35d246f44c..9299a7b28c41 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -184,39 +184,32 @@ type bulkIndexerManager struct { } func (p *bulkIndexerManager) AddBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem) error { - select { - case <-ctx.Done(): - return ctx.Err() - case <-p.closeCh: - return fmt.Errorf("bulk indexer is closed") - default: - var maxDocRetry int - if p.config.Retry.Enabled { - // max_requests includes initial attempt - // See https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32344 - maxDocRetry = p.config.Retry.MaxRequests - 1 - } - bi, err := docappender.NewBulkIndexer(docappender.BulkIndexerConfig{ - Client: p.esClient, - MaxDocumentRetries: maxDocRetry, - Pipeline: p.config.Pipeline, - RetryOnDocumentStatus: p.config.Retry.RetryOnStatus, - }) - if err != nil { - return err - } - p.wg.Add(1) - defer p.wg.Done() - w := worker{ - indexer: bi, - closeCh: p.closeCh, - flushTimeout: p.config.Timeout, - retryBackoff: createElasticsearchBackoffFunc(&p.config.Retry), - logger: p.logger, - stats: &p.stats, - } - return w.addBatchAndFlush(ctx, batch) + var maxDocRetry int + if p.config.Retry.Enabled { + // max_requests includes initial attempt + // See https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32344 + maxDocRetry = p.config.Retry.MaxRequests - 1 + } + bi, err := docappender.NewBulkIndexer(docappender.BulkIndexerConfig{ + Client: p.esClient, + MaxDocumentRetries: maxDocRetry, + Pipeline: p.config.Pipeline, + RetryOnDocumentStatus: p.config.Retry.RetryOnStatus, + }) + if err != nil { + return err + } + p.wg.Add(1) + defer p.wg.Done() + w := worker{ + indexer: bi, + closeCh: p.closeCh, + flushTimeout: p.config.Timeout, + retryBackoff: createElasticsearchBackoffFunc(&p.config.Retry), + logger: p.logger, + stats: &p.stats, } + return w.addBatchAndFlush(ctx, batch) } // Close closes the closeCh channel and wait for workers to finish. From da8a530855ef08fabbe72723d214a250b9afd8a9 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Fri, 7 Jun 2024 11:38:41 +0100 Subject: [PATCH 054/117] Use semaphore in bulkindexer --- exporter/elasticsearchexporter/config_test.go | 2 ++ .../elasticsearchexporter/elasticsearch_bulk.go | 13 ++++++++++--- exporter/elasticsearchexporter/factory.go | 1 + exporter/elasticsearchexporter/go.mod | 2 +- exporter/elasticsearchexporter/testdata/config.yaml | 2 ++ 5 files changed, 16 insertions(+), 4 deletions(-) diff --git a/exporter/elasticsearchexporter/config_test.go b/exporter/elasticsearchexporter/config_test.go index ab12c3cd3852..74ded5812325 100644 --- a/exporter/elasticsearchexporter/config_test.go +++ b/exporter/elasticsearchexporter/config_test.go @@ -95,6 +95,7 @@ func TestConfig(t *testing.T) { PrefixSeparator: "-", DateFormat: "%Y.%m.%d", }, + NumWorkers: 1, }, }, { @@ -148,6 +149,7 @@ func TestConfig(t *testing.T) { PrefixSeparator: "-", DateFormat: "%Y.%m.%d", }, + NumWorkers: 1, }, }, { diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 9299a7b28c41..e8756b819d55 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -17,6 +17,7 @@ import ( "github.com/elastic/go-docappender/v2" elasticsearch7 "github.com/elastic/go-elasticsearch/v7" "go.uber.org/zap" + "golang.org/x/sync/semaphore" "github.com/open-telemetry/opentelemetry-collector-contrib/internal/common/sanitize" ) @@ -166,6 +167,7 @@ func newBulkIndexer(logger *zap.Logger, client *elasticsearch7.Client, config *C logger: logger, config: config, wg: &sync.WaitGroup{}, + sem: semaphore.NewWeighted(int64(config.NumWorkers)), } return pool, nil } @@ -181,6 +183,7 @@ type bulkIndexerManager struct { logger *zap.Logger config *Config wg *sync.WaitGroup + sem *semaphore.Weighted } func (p *bulkIndexerManager) AddBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem) error { @@ -208,6 +211,7 @@ func (p *bulkIndexerManager) AddBatchAndFlush(ctx context.Context, batch []esBul retryBackoff: createElasticsearchBackoffFunc(&p.config.Retry), logger: p.logger, stats: &p.stats, + sem: p.sem, } return w.addBatchAndFlush(ctx, batch) } @@ -232,7 +236,7 @@ type worker struct { indexer *docappender.BulkIndexer closeCh <-chan struct{} flushTimeout time.Duration - mu sync.Mutex + sem *semaphore.Weighted retryBackoff func(int) time.Duration @@ -242,8 +246,11 @@ type worker struct { } func (w *worker) addBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem) error { - w.mu.Lock() - defer w.mu.Unlock() + if err := w.sem.Acquire(ctx, 1); err != nil { + return err + } + defer w.sem.Release(1) + for _, item := range batch { if err := w.indexer.Add(item); err != nil { w.logger.Error("error adding item to bulk indexer", zap.Error(err)) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index d20e8fda6e2e..f13da929b1fe 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -81,6 +81,7 @@ func createDefaultConfig() component.Config { PrefixSeparator: "-", DateFormat: "%Y.%m.%d", }, + NumWorkers: runtime.NumCPU(), } } diff --git a/exporter/elasticsearchexporter/go.mod b/exporter/elasticsearchexporter/go.mod index 67894baffb45..83c679deb6ed 100644 --- a/exporter/elasticsearchexporter/go.mod +++ b/exporter/elasticsearchexporter/go.mod @@ -22,6 +22,7 @@ require ( go.opentelemetry.io/otel/trace v1.27.0 go.uber.org/goleak v1.3.0 go.uber.org/zap v1.27.0 + golang.org/x/sync v0.7.0 ) require ( @@ -70,7 +71,6 @@ require ( go.opentelemetry.io/otel/sdk/metric v1.27.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/net v0.25.0 // indirect - golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.20.0 // indirect golang.org/x/text v0.15.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240520151616-dc85e6b867a5 // indirect diff --git a/exporter/elasticsearchexporter/testdata/config.yaml b/exporter/elasticsearchexporter/testdata/config.yaml index 03e31e6c6aba..cd8092f95fbf 100644 --- a/exporter/elasticsearchexporter/testdata/config.yaml +++ b/exporter/elasticsearchexporter/testdata/config.yaml @@ -23,6 +23,7 @@ elasticsearch/trace: retry_on_status: - 429 - 500 + num_workers: 1 elasticsearch/log: tls: insecure: false @@ -48,6 +49,7 @@ elasticsearch/log: - 500 sending_queue: enabled: true + num_workers: 1 elasticsearch/logstash_format: endpoints: [http://localhost:9200] logstash_format: From caa68cf30c4e116bc330c5adf95794a84eef3b2d Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Fri, 7 Jun 2024 11:47:40 +0100 Subject: [PATCH 055/117] Push semaphore up from worker to manager --- .../elasticsearchexporter/elasticsearch_bulk.go | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index e8756b819d55..93192a399030 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -160,7 +160,7 @@ func createElasticsearchBackoffFunc(config *RetrySettings) func(int) time.Durati } func newBulkIndexer(logger *zap.Logger, client *elasticsearch7.Client, config *Config) (*esBulkIndexerCurrent, error) { - pool := &bulkIndexerManager{ + manager := &bulkIndexerManager{ closeCh: make(chan struct{}), stats: bulkIndexerStats{}, esClient: client, @@ -169,7 +169,7 @@ func newBulkIndexer(logger *zap.Logger, client *elasticsearch7.Client, config *C wg: &sync.WaitGroup{}, sem: semaphore.NewWeighted(int64(config.NumWorkers)), } - return pool, nil + return manager, nil } type bulkIndexerStats struct { @@ -187,6 +187,11 @@ type bulkIndexerManager struct { } func (p *bulkIndexerManager) AddBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem) error { + if err := p.sem.Acquire(ctx, 1); err != nil { + return err + } + defer p.sem.Release(1) + var maxDocRetry int if p.config.Retry.Enabled { // max_requests includes initial attempt @@ -211,7 +216,6 @@ func (p *bulkIndexerManager) AddBatchAndFlush(ctx context.Context, batch []esBul retryBackoff: createElasticsearchBackoffFunc(&p.config.Retry), logger: p.logger, stats: &p.stats, - sem: p.sem, } return w.addBatchAndFlush(ctx, batch) } @@ -236,7 +240,6 @@ type worker struct { indexer *docappender.BulkIndexer closeCh <-chan struct{} flushTimeout time.Duration - sem *semaphore.Weighted retryBackoff func(int) time.Duration @@ -246,11 +249,6 @@ type worker struct { } func (w *worker) addBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem) error { - if err := w.sem.Acquire(ctx, 1); err != nil { - return err - } - defer w.sem.Release(1) - for _, item := range batch { if err := w.indexer.Add(item); err != nil { w.logger.Error("error adding item to bulk indexer", zap.Error(err)) From 428f7d7d44ebbd8c57fb599aa5efc33c1158f95d Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Fri, 7 Jun 2024 14:58:42 +0100 Subject: [PATCH 056/117] Move wg before sem --- exporter/elasticsearchexporter/elasticsearch_bulk.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 93192a399030..eeaeea642d66 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -187,6 +187,9 @@ type bulkIndexerManager struct { } func (p *bulkIndexerManager) AddBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem) error { + p.wg.Add(1) + defer p.wg.Done() + if err := p.sem.Acquire(ctx, 1); err != nil { return err } @@ -207,8 +210,6 @@ func (p *bulkIndexerManager) AddBatchAndFlush(ctx context.Context, batch []esBul if err != nil { return err } - p.wg.Add(1) - defer p.wg.Done() w := worker{ indexer: bi, closeCh: p.closeCh, From 714e26d98e8bf0f4925a7bbb5368b03fffffcf12 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Fri, 7 Jun 2024 16:52:17 +0100 Subject: [PATCH 057/117] Handle retry disabled --- exporter/elasticsearchexporter/elasticsearch_bulk.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index eeaeea642d66..06dbbb7ab6d2 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -258,9 +258,16 @@ func (w *worker) addBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem for attempts := 0; ; attempts++ { if err := w.flush(ctx); err != nil { return err - } else if w.indexer.Items() == 0 { + } + if w.indexer.Items() == 0 { return nil } + if w.retryBackoff == nil { + // This should never happen in practice. + // When retry is disabled / document level retry limit is reached, + // documents should go into FailedDocs instead of indexer buffer. + return errors.New("bulk indexer contains documents pending retry but retry is disabled") + } backoff := w.retryBackoff(attempts + 1) // TODO: use exporterhelper retry_sender timer := time.NewTimer(backoff) defer timer.Stop() From 99f2b63ba9f67aeb74dbff26a5d030f006811e91 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Fri, 7 Jun 2024 17:59:12 +0100 Subject: [PATCH 058/117] Update changelog --- .chloggen/elasticsearchexporter_batchsender.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.chloggen/elasticsearchexporter_batchsender.yaml b/.chloggen/elasticsearchexporter_batchsender.yaml index 4af4f85e2813..5702961aed00 100644 --- a/.chloggen/elasticsearchexporter_batchsender.yaml +++ b/.chloggen/elasticsearchexporter_batchsender.yaml @@ -7,7 +7,7 @@ change_type: breaking component: elasticsearchexporter # A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). -note: Improve reliability when used with persistent queue. Remove support for option `flush.bytes`. +note: Improve reliability when used with persistent queue. Remove support for `flush.*`, use `batcher.*` instead. # Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. issues: [32377] @@ -16,11 +16,11 @@ issues: [32377] # These lines will be padded with 2 spaces and then inserted directly into the document. # Use pipe (|) for multiline entries. subtext: | - Move buffering from bulk indexer to batchsender to improve reliability. + Move buffering from bulk indexer to batch sender to improve reliability. With this change, there should be no event loss when used with persistent queue in the event of a collector crash. - Option `flush.bytes` is now ignored. - Use the new `flush.min_documents` option to control the minimum number of documents to trigger a flush. - Introduce option `flush.max_documents` to control the maximum number of documents in a request. + Option `flush.bytes` is now ignored. Use the new `batcher.min_documents` option to control the minimum number of documents to trigger a flush. + Option `flush.interval` is now ignored. Use the new `batcher.flush_timeout` option to control max age of buffer. + Introduce option `batch.max_documents` to control the maximum number of documents in a request. # If your change doesn't affect end users or the exported elements of any package, # you should instead start your pull request title with [chore] or use the "Skip Changelog" label. From 5f051162e9c9b79a32d2d76b7f4bfa2c6e46cee6 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 10 Jun 2024 16:54:54 +0100 Subject: [PATCH 059/117] Enable queue sender by default --- .chloggen/elasticsearchexporter_batchsender.yaml | 3 ++- exporter/elasticsearchexporter/config_test.go | 4 ++-- exporter/elasticsearchexporter/factory.go | 2 +- exporter/elasticsearchexporter/testdata/config.yaml | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.chloggen/elasticsearchexporter_batchsender.yaml b/.chloggen/elasticsearchexporter_batchsender.yaml index 5702961aed00..25d618f9305e 100644 --- a/.chloggen/elasticsearchexporter_batchsender.yaml +++ b/.chloggen/elasticsearchexporter_batchsender.yaml @@ -18,9 +18,10 @@ issues: [32377] subtext: | Move buffering from bulk indexer to batch sender to improve reliability. With this change, there should be no event loss when used with persistent queue in the event of a collector crash. + Introduce `batcher.*` to configure the batch sender which is now enabled by default. Option `flush.bytes` is now ignored. Use the new `batcher.min_documents` option to control the minimum number of documents to trigger a flush. Option `flush.interval` is now ignored. Use the new `batcher.flush_timeout` option to control max age of buffer. - Introduce option `batch.max_documents` to control the maximum number of documents in a request. + Queue sender `sending_queue.enabled` defaults to `true`. # If your change doesn't affect end users or the exported elements of any package, # you should instead start your pull request title with [chore] or use the "Skip Changelog" label. diff --git a/exporter/elasticsearchexporter/config_test.go b/exporter/elasticsearchexporter/config_test.go index bffd9a37cdf8..67bb906db42c 100644 --- a/exporter/elasticsearchexporter/config_test.go +++ b/exporter/elasticsearchexporter/config_test.go @@ -49,7 +49,7 @@ func TestConfig(t *testing.T) { configFile: "config.yaml", expected: &Config{ QueueSettings: exporterhelper.QueueSettings{ - Enabled: false, + Enabled: true, NumConsumers: 100, QueueSize: exporterqueue.NewDefaultConfig().QueueSize, }, @@ -103,7 +103,7 @@ func TestConfig(t *testing.T) { configFile: "config.yaml", expected: &Config{ QueueSettings: exporterhelper.QueueSettings{ - Enabled: true, + Enabled: false, NumConsumers: 100, QueueSize: exporterqueue.NewDefaultConfig().QueueSize, }, diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index ec159cb2d148..7328a59e2076 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -40,7 +40,7 @@ func NewFactory() exporter.Factory { func createDefaultConfig() component.Config { return &Config{ QueueSettings: exporterhelper.QueueSettings{ - Enabled: false, // FIXME: how does batching without queuing look like? + Enabled: exporterhelper.NewDefaultQueueSettings().Enabled, NumConsumers: 100, QueueSize: exporterhelper.NewDefaultQueueSettings().QueueSize, }, diff --git a/exporter/elasticsearchexporter/testdata/config.yaml b/exporter/elasticsearchexporter/testdata/config.yaml index cd8092f95fbf..013e223d36ad 100644 --- a/exporter/elasticsearchexporter/testdata/config.yaml +++ b/exporter/elasticsearchexporter/testdata/config.yaml @@ -48,7 +48,7 @@ elasticsearch/log: - 429 - 500 sending_queue: - enabled: true + enabled: false num_workers: 1 elasticsearch/logstash_format: endpoints: [http://localhost:9200] From b91a2ada1f207c4f0c8382dc08b2f84c57368438 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 10 Jun 2024 17:03:54 +0100 Subject: [PATCH 060/117] Remove force enabled batcher --- exporter/elasticsearchexporter/factory.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 7328a59e2076..8c31ca5a9ef1 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -112,7 +112,6 @@ func createLogsExporter( return nil, fmt.Errorf("cannot configure Elasticsearch exporter: %w", err) } - cf.BatcherConfig.Enabled = true return exporterhelper.NewLogsExporter( ctx, set, @@ -143,7 +142,6 @@ func createTracesExporter(ctx context.Context, return nil, fmt.Errorf("cannot configure Elasticsearch exporter: %w", err) } - cf.BatcherConfig.Enabled = true return exporterhelper.NewTracesExporter( ctx, set, From 3a7c19cfc4317b819ce358a974ea74669ca84500 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 10 Jun 2024 17:27:53 +0100 Subject: [PATCH 061/117] Handle deprecated options properly --- exporter/elasticsearchexporter/factory.go | 46 ++++++++++++++++------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 8c31ca5a9ef1..835fd6103439 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -7,6 +7,7 @@ package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry import ( "context" + "errors" "fmt" "net/http" "runtime" @@ -16,6 +17,7 @@ import ( "go.opentelemetry.io/collector/exporter" "go.opentelemetry.io/collector/exporter/exporterbatcher" "go.opentelemetry.io/collector/exporter/exporterhelper" + "go.uber.org/zap" "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/metadata" ) @@ -95,19 +97,13 @@ func createLogsExporter( ) (exporter.Logs, error) { cf := cfg.(*Config) - index := cf.LogsIndex - if cf.Index != "" { - set.Logger.Warn("index option are deprecated and replaced with logs_index and traces_index.") - index = cf.Index - } + setDefaultUserAgentHeader(cf, set.BuildInfo) - if cf.Flush.Bytes != 0 { - set.Logger.Warn("flush.bytes option is ignored. Use batcher.min_size_items instead.") + if err := handleDeprecations(cf, set.Logger); err != nil { + return nil, err } - setDefaultUserAgentHeader(cf, set.BuildInfo) - - exporter, err := newExporter(set.Logger, cf, index, cf.LogsDynamicIndex.Enabled) + exporter, err := newExporter(set.Logger, cf, cf.LogsIndex, cf.LogsDynamicIndex.Enabled) if err != nil { return nil, fmt.Errorf("cannot configure Elasticsearch exporter: %w", err) } @@ -131,12 +127,12 @@ func createTracesExporter(ctx context.Context, cf := cfg.(*Config) - if cf.Flush.Bytes != 0 { - set.Logger.Warn("flush.bytes option is ignored. Use batcher.min_size_items instead.") - } - setDefaultUserAgentHeader(cf, set.BuildInfo) + if err := handleDeprecations(cf, set.Logger); err != nil { + return nil, err + } + exporter, err := newExporter(set.Logger, cf, cf.TracesIndex, cf.TracesDynamicIndex.Enabled) if err != nil { return nil, fmt.Errorf("cannot configure Elasticsearch exporter: %w", err) @@ -170,3 +166,25 @@ func getTimeoutConfig() exporterhelper.TimeoutSettings { Timeout: time.Duration(0), // effectively disable timeout_sender because timeout is enforced in bulk indexer } } + +// handleDeprecations handles deprecated config options. +// If possible, translate deprecated config options to new config options +// Otherwise, return an error so that the user is aware of an unsupported option. +func handleDeprecations(cf *Config, logger *zap.Logger) error { + if cf.Index != "" { + logger.Warn(`"index" option is deprecated and replaced with "logs_index" and "traces_index". "logs_index" is set to the value of "index".`) + cf.LogsIndex = cf.Index + } + + if cf.Flush.Bytes != 0 { + // cannot translate flush.bytes to batcher.min_size_items because they are in different units + return errors.New(`"flush.bytes" option is unsupported, use "batcher.min_size_items" instead`) + } + + if cf.Flush.Interval != 0 { + logger.Warn(`"flush.interval" option is deprecated and replaced with "batcher.flush_timeout". "batcher.flush_timeout" is set to the value of "flush.interval".`) + cf.BatcherConfig.FlushTimeout = cf.Flush.Interval + } + + return nil +} From ca69b649ea72060e1c36845b3812f81efe136971 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 10 Jun 2024 17:30:45 +0100 Subject: [PATCH 062/117] Update changelog --- .chloggen/elasticsearchexporter_batchsender.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.chloggen/elasticsearchexporter_batchsender.yaml b/.chloggen/elasticsearchexporter_batchsender.yaml index 25d618f9305e..a849a39304f8 100644 --- a/.chloggen/elasticsearchexporter_batchsender.yaml +++ b/.chloggen/elasticsearchexporter_batchsender.yaml @@ -19,7 +19,7 @@ subtext: | Move buffering from bulk indexer to batch sender to improve reliability. With this change, there should be no event loss when used with persistent queue in the event of a collector crash. Introduce `batcher.*` to configure the batch sender which is now enabled by default. - Option `flush.bytes` is now ignored. Use the new `batcher.min_documents` option to control the minimum number of documents to trigger a flush. + Option `flush.bytes` is now ignored. Exporter will not start if `flush.bytes` is configured. Use the new `batcher.min_documents` option to control the minimum number of documents to trigger a flush. Option `flush.interval` is now ignored. Use the new `batcher.flush_timeout` option to control max age of buffer. Queue sender `sending_queue.enabled` defaults to `true`. From e68b427f1c705dd9b4ec5ee97d3d25d3682bafe6 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 10 Jun 2024 17:32:20 +0100 Subject: [PATCH 063/117] Update readme num_workers --- exporter/elasticsearchexporter/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index 953affddd895..8010caf53f1c 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -22,7 +22,7 @@ This exporter supports sending OpenTelemetry logs and traces to [Elasticsearch]( [ID](https://www.elastic.co/guide/en/cloud/current/ec-cloud-id.html) of the Elastic Cloud Cluster to publish events to. The `cloudid` can be used instead of `endpoints`. -- `num_workers` (default=runtime.NumCPU()): Number of workers publishing bulk requests concurrently. +- `num_workers` (default=runtime.NumCPU()): Maximum number of concurrent bulk requests. - `index` (DEPRECATED, please use `logs_index` for logs, `traces_index` for traces): The [index](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices.html) or [data stream](https://www.elastic.co/guide/en/elasticsearch/reference/current/data-streams.html) From 7312e3638fbe93c75a7090a0be3e6c32d585134d Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 10 Jun 2024 18:16:51 +0100 Subject: [PATCH 064/117] Use sync.Pool for bulk indexer --- .../elasticsearch_bulk.go | 75 ++++++++++++------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 3e7427e6403e..63be3506d77a 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -157,14 +157,40 @@ func createElasticsearchBackoffFunc(config *RetrySettings) func(int) time.Durati func newBulkIndexer(logger *zap.Logger, client *elasticsearch7.Client, config *Config) (*esBulkIndexerCurrent, error) { manager := &bulkIndexerManager{ - closeCh: make(chan struct{}), - stats: bulkIndexerStats{}, - esClient: client, - logger: logger, - config: config, - wg: &sync.WaitGroup{}, - sem: semaphore.NewWeighted(int64(config.NumWorkers)), + closeCh: make(chan struct{}), + stats: bulkIndexerStats{}, + logger: logger, + config: config, + wg: &sync.WaitGroup{}, + sem: semaphore.NewWeighted(int64(config.NumWorkers)), } + + var maxDocRetry int + if config.Retry.Enabled { + // max_requests includes initial attempt + // See https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32344 + maxDocRetry = config.Retry.MaxRequests - 1 + } + manager.pool = &sync.Pool{ + New: func() any { + bi, err := docappender.NewBulkIndexer(docappender.BulkIndexerConfig{ + Client: client, + MaxDocumentRetries: maxDocRetry, + Pipeline: config.Pipeline, + RetryOnDocumentStatus: config.Retry.RetryOnStatus, + }) + if err != nil { + return fmt.Errorf("error creating docappender bulk indexer: %w", err) + } + return bi + }, + } + + // Create a bulk indexer once to validate the config options + if err, ok := manager.pool.Get().(error); ok { + return nil, err + } + return manager, nil } @@ -173,13 +199,13 @@ type bulkIndexerStats struct { } type bulkIndexerManager struct { - closeCh chan struct{} - stats bulkIndexerStats - esClient *elasticsearch7.Client - logger *zap.Logger - config *Config - wg *sync.WaitGroup - sem *semaphore.Weighted + closeCh chan struct{} + stats bulkIndexerStats + logger *zap.Logger + config *Config + wg *sync.WaitGroup + sem *semaphore.Weighted + pool *sync.Pool } func (p *bulkIndexerManager) AddBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem) error { @@ -191,21 +217,14 @@ func (p *bulkIndexerManager) AddBatchAndFlush(ctx context.Context, batch []esBul } defer p.sem.Release(1) - var maxDocRetry int - if p.config.Retry.Enabled { - // max_requests includes initial attempt - // See https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32344 - maxDocRetry = p.config.Retry.MaxRequests - 1 - } - bi, err := docappender.NewBulkIndexer(docappender.BulkIndexerConfig{ - Client: p.esClient, - MaxDocumentRetries: maxDocRetry, - Pipeline: p.config.Pipeline, - RetryOnDocumentStatus: p.config.Retry.RetryOnStatus, - }) - if err != nil { - return err + bi := p.pool.Get().(*docappender.BulkIndexer) + // Bulk indexer buffer should never contain any items, but double check for safety. + for bi.Items() != 0 { + p.logger.Error("bug: bulk indexer buffer contains unexpected residue") + bi = p.pool.Get().(*docappender.BulkIndexer) } + defer p.pool.Put(bi) + w := worker{ indexer: bi, closeCh: p.closeCh, From 5d428e1fa89eba61b18c65da7aa1332bd20d5c1c Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 10 Jun 2024 18:33:28 +0100 Subject: [PATCH 065/117] Update description --- .../elasticsearchexporter_batchsender.yaml | 2 +- exporter/elasticsearchexporter/README.md | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.chloggen/elasticsearchexporter_batchsender.yaml b/.chloggen/elasticsearchexporter_batchsender.yaml index a849a39304f8..993db1fa079c 100644 --- a/.chloggen/elasticsearchexporter_batchsender.yaml +++ b/.chloggen/elasticsearchexporter_batchsender.yaml @@ -19,7 +19,7 @@ subtext: | Move buffering from bulk indexer to batch sender to improve reliability. With this change, there should be no event loss when used with persistent queue in the event of a collector crash. Introduce `batcher.*` to configure the batch sender which is now enabled by default. - Option `flush.bytes` is now ignored. Exporter will not start if `flush.bytes` is configured. Use the new `batcher.min_documents` option to control the minimum number of documents to trigger a flush. + Option `flush.bytes` is now ignored. Exporter will not start if `flush.bytes` is configured. Use the new `batcher.min_size_items` option to control the minimum number of items (log records, spans) to trigger a flush. Option `flush.interval` is now ignored. Use the new `batcher.flush_timeout` option to control max age of buffer. Queue sender `sending_queue.enabled` defaults to `true`. diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index 8010caf53f1c..5a7c3f3e3bce 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -51,12 +51,13 @@ This exporter supports sending OpenTelemetry logs and traces to [Elasticsearch]( - `date_format`(default=`%Y.%m.%d`): Time format (based on strftime) to generate the second part of the Index name. - `pipeline` (optional): Optional [Ingest pipeline](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html) ID used for processing documents published by the exporter. - `batcher`: Exporter batching settings - - `min_size_items` (default=125): Minimum number of documents in the buffer to trigger a flush immediately. - - `max_size_items` (default=0): Maximum number of documents in a request. In practice, the number of documents in a request may exceed MaxDocuments if the request cannot be split into smaller ones. - - `flush_timeout` (default=30s): Max age of a document in the buffer. A flush will happen regardless of the size of content in buffer. -- `flush`: Event bulk indexer buffer flush settings - - `bytes` (DEPRECATED, use `flush.min_documents` instead): Write buffer flush size limit. WARNING: This configuration is ignored. - - `interval` (DEPRECATED, use `batcher.flush_timeout` instead): Max age of a document in the buffer. A flush will happen regardless of the size of content in buffer. WARNING: This configuration is ignored. + - `enabled` (default=true): Enable batching of requests into a single bulk request. + - `min_size_items` (default=125): Minimum number of log records / spans in the buffer to trigger a flush immediately. + - `max_size_items` (default=0): Maximum number of log records / spans in a request. 0 means there is no limit on the maximum number. + - `flush_timeout` (default=30s): Maximum time of the oldest item spent inside the buffer, aka "max age of buffer". A flush will happen regardless of the size of content in buffer. +- `flush`: (DEPRECATED) Event bulk indexer buffer flush settings + - `bytes` (DEPRECATED, use `batcher.min_size_items` instead): Write buffer flush size limit. WARNING: This configuration is ignored. + - `interval` (DEPRECATED, use `batcher.flush_timeout` instead): Max age of a document in the buffer. A flush will happen regardless of the size of content in buffer. - `retry`: Elasticsearch bulk request retry settings - `enabled` (default=true): Enable/Disable request retry on error. Failed requests are retried with exponential backoff. - `max_requests` (default=3): Number of HTTP request retries. @@ -81,8 +82,8 @@ This exporter supports sending OpenTelemetry logs and traces to [Elasticsearch]( will reject documents that have duplicate fields. - `dedot` (default=true): When enabled attributes with `.` will be split into proper json objects. -- `sending_queue` - - `enabled` (default=false) +- `sending_queue`: Queue sender settings + - `enabled` (default=true) - `num_consumers` (default=100): Number of consumers that dequeue batches. A combined batch cannot contain more batches than the number of consumers. - `queue_size` (default=1000): Maximum number of batches kept in queue. - `storage` (optional): If not empty, it enables the persistent storage and uses the component specified as a storage extension for the persistent queue. When persistent queue is used, there should be no event loss even on collector crashes. From 27f055d8c15b1b8244611bcab6dc4e90a052e5d4 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 10 Jun 2024 18:37:41 +0100 Subject: [PATCH 066/117] Better log --- exporter/elasticsearchexporter/factory.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 835fd6103439..78ac1fb09e96 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -172,7 +172,7 @@ func getTimeoutConfig() exporterhelper.TimeoutSettings { // Otherwise, return an error so that the user is aware of an unsupported option. func handleDeprecations(cf *Config, logger *zap.Logger) error { if cf.Index != "" { - logger.Warn(`"index" option is deprecated and replaced with "logs_index" and "traces_index". "logs_index" is set to the value of "index".`) + logger.Warn(`"index" option is deprecated and replaced with "logs_index" and "traces_index". Setting "logs_index" to the value of "index".`) cf.LogsIndex = cf.Index } @@ -182,7 +182,7 @@ func handleDeprecations(cf *Config, logger *zap.Logger) error { } if cf.Flush.Interval != 0 { - logger.Warn(`"flush.interval" option is deprecated and replaced with "batcher.flush_timeout". "batcher.flush_timeout" is set to the value of "flush.interval".`) + logger.Warn(`"flush.interval" option is deprecated and replaced with "batcher.flush_timeout". Setting "batcher.flush_timeout" to the value of "flush.interval".`) cf.BatcherConfig.FlushTimeout = cf.Flush.Interval } From 3a17fa53fb5e4edee945ac3747d31c108dcf7552 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 11 Jun 2024 10:20:42 +0100 Subject: [PATCH 067/117] Update TODO in integrationtest --- .../elasticsearchexporter/integrationtest/exporter_test.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_test.go index 1cf6e6a6c1f6..f78d8e9ae111 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_test.go @@ -29,9 +29,10 @@ func TestExporter(t *testing.T) { {name: "basic"}, {name: "es_intermittent_failure", mockESFailure: true}, {name: "collector_restarts", restartCollector: true}, - // Test is failing because exporter does not shut down when in-flight requests block indefinitely. - // See https://github.com/open-telemetry/opentelemetry-collector/issues/10166 - // TODO: re-enable test + // Test is failing due to timeout because in-flight requests are not aware of shutdown + // and will keep retrying up to the configured retry limit + // TODO: re-enable test after moving to use retry sender + // as https://github.com/open-telemetry/opentelemetry-collector/issues/10166 is fixed // {name: "collector_restart_with_es_intermittent_failure", mockESFailure: true, restartCollector: true}, } { t.Run(fmt.Sprintf("%s/%s", eventType, tc.name), func(t *testing.T) { From a354670262dae4d370c45f2946b3a4c6ed9b674f Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 11 Jun 2024 17:04:12 +0100 Subject: [PATCH 068/117] Refactor default config --- exporter/elasticsearchexporter/factory.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 78ac1fb09e96..05c994f84310 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -40,12 +40,10 @@ func NewFactory() exporter.Factory { } func createDefaultConfig() component.Config { + qs := exporterhelper.NewDefaultQueueSettings() + qs.NumConsumers = 100 // default is too small as it also sets batch sender concurrency limit return &Config{ - QueueSettings: exporterhelper.QueueSettings{ - Enabled: exporterhelper.NewDefaultQueueSettings().Enabled, - NumConsumers: 100, - QueueSize: exporterhelper.NewDefaultQueueSettings().QueueSize, - }, + QueueSettings: qs, ClientConfig: ClientConfig{ Timeout: 90 * time.Second, }, From f6910bbcc23eef9e5af0f03166891c1ea6634af7 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 11 Jun 2024 17:08:29 +0100 Subject: [PATCH 069/117] Update changelog --- .chloggen/elasticsearchexporter_batchsender.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.chloggen/elasticsearchexporter_batchsender.yaml b/.chloggen/elasticsearchexporter_batchsender.yaml index 993db1fa079c..53ed16e5e30e 100644 --- a/.chloggen/elasticsearchexporter_batchsender.yaml +++ b/.chloggen/elasticsearchexporter_batchsender.yaml @@ -7,7 +7,7 @@ change_type: breaking component: elasticsearchexporter # A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). -note: Improve reliability when used with persistent queue. Remove support for `flush.*`, use `batcher.*` instead. +note: Improve reliability when used with persistent queue. Deprecate config options `flush.*`, use `batcher.*` instead. # Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. issues: [32377] @@ -19,8 +19,8 @@ subtext: | Move buffering from bulk indexer to batch sender to improve reliability. With this change, there should be no event loss when used with persistent queue in the event of a collector crash. Introduce `batcher.*` to configure the batch sender which is now enabled by default. - Option `flush.bytes` is now ignored. Exporter will not start if `flush.bytes` is configured. Use the new `batcher.min_size_items` option to control the minimum number of items (log records, spans) to trigger a flush. - Option `flush.interval` is now ignored. Use the new `batcher.flush_timeout` option to control max age of buffer. + Option `flush.bytes` is now ignored. Exporter will not start if `flush.bytes` is non-zero. Use the new `batcher.min_size_items` option to control the minimum number of items (log records, spans) to trigger a flush. + Option `flush.interval` is deprecated. Use the new `batcher.flush_timeout` option to control max age of buffer. `batcher.flush_timeout` will be set to the value of `flush.interval` if `flush.interval` is non-zero. Queue sender `sending_queue.enabled` defaults to `true`. # If your change doesn't affect end users or the exported elements of any package, From 9313acb05d349fb48eea607b267ac4f4d169f58d Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 11 Jun 2024 17:14:57 +0100 Subject: [PATCH 070/117] Return err on error adding item to bulk indexer --- exporter/elasticsearchexporter/elasticsearch_bulk.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 63be3506d77a..7c4dab116022 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -267,7 +267,7 @@ type worker struct { func (w *worker) addBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem) error { for _, item := range batch { if err := w.indexer.Add(item); err != nil { - w.logger.Error("error adding item to bulk indexer", zap.Error(err)) + return fmt.Errorf("failed to add item to bulk indexer: %w", err) } } for attempts := 0; ; attempts++ { From 96fa3f8e61c308aeafcaea6111c7785fd8670392 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 11 Jun 2024 17:31:21 +0100 Subject: [PATCH 071/117] Inline custom request model to avoid confusion --- exporter/elasticsearchexporter/exporter.go | 33 +++++++++--------- exporter/elasticsearchexporter/request.go | 39 ---------------------- 2 files changed, 17 insertions(+), 55 deletions(-) delete mode 100644 exporter/elasticsearchexporter/request.go diff --git a/exporter/elasticsearchexporter/exporter.go b/exporter/elasticsearchexporter/exporter.go index 070d19a82509..f6412c929a55 100644 --- a/exporter/elasticsearchexporter/exporter.go +++ b/exporter/elasticsearchexporter/exporter.go @@ -4,6 +4,7 @@ package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter" import ( + "bytes" "context" "errors" "fmt" @@ -65,7 +66,7 @@ func (e *elasticsearchExporter) Shutdown(ctx context.Context) error { } func (e *elasticsearchExporter) pushLogsData(ctx context.Context, ld plog.Logs) error { - req := newRequest(e.bulkIndexer) + items := make([]esBulkIndexerItem, 0, ld.LogRecordCount()) var errs []error rls := ld.ResourceLogs() for i := 0; i < rls.Len(); i++ { @@ -86,17 +87,17 @@ func (e *elasticsearchExporter) pushLogsData(ctx context.Context, ld plog.Logs) errs = append(errs, err) continue } - req.add(item) + items = append(items, item) } } } - if err := req.Export(ctx); err != nil { + if err := e.bulkIndexer.AddBatchAndFlush(ctx, items); err != nil { errs = append(errs, err) } return errors.Join(errs...) } -func (e *elasticsearchExporter) logRecordToItem(ctx context.Context, resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) (bulkIndexerItem, error) { +func (e *elasticsearchExporter) logRecordToItem(ctx context.Context, resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) (esBulkIndexerItem, error) { fIndex := e.index if e.dynamicIndex { prefix := getFromAttributes(indexPrefix, resource, scope, record) @@ -108,18 +109,18 @@ func (e *elasticsearchExporter) logRecordToItem(ctx context.Context, resource pc if e.logstashFormat.Enabled { formattedIndex, err := generateIndexWithLogstashFormat(fIndex, &e.logstashFormat, time.Now()) if err != nil { - return bulkIndexerItem{}, err + return esBulkIndexerItem{}, err } fIndex = formattedIndex } document, err := e.model.encodeLog(resource, record, scope) if err != nil { - return bulkIndexerItem{}, fmt.Errorf("Failed to encode log event: %w", err) + return esBulkIndexerItem{}, fmt.Errorf("Failed to encode log event: %w", err) } - return bulkIndexerItem{ + return esBulkIndexerItem{ Index: fIndex, - Body: document, + Body: bytes.NewReader(document), }, nil } @@ -127,7 +128,7 @@ func (e *elasticsearchExporter) pushTraceData( ctx context.Context, td ptrace.Traces, ) error { - req := newRequest(e.bulkIndexer) + items := make([]esBulkIndexerItem, 0, td.SpanCount()) var errs []error resourceSpans := td.ResourceSpans() for i := 0; i < resourceSpans.Len(); i++ { @@ -148,17 +149,17 @@ func (e *elasticsearchExporter) pushTraceData( errs = append(errs, err) continue } - req.add(item) + items = append(items, item) } } } - if err := req.Export(ctx); err != nil { + if err := e.bulkIndexer.AddBatchAndFlush(ctx, items); err != nil { errs = append(errs, err) } return errors.Join(errs...) } -func (e *elasticsearchExporter) traceRecordToItem(ctx context.Context, resource pcommon.Resource, span ptrace.Span, scope pcommon.InstrumentationScope) (bulkIndexerItem, error) { +func (e *elasticsearchExporter) traceRecordToItem(ctx context.Context, resource pcommon.Resource, span ptrace.Span, scope pcommon.InstrumentationScope) (esBulkIndexerItem, error) { fIndex := e.index if e.dynamicIndex { prefix := getFromAttributes(indexPrefix, resource, scope, span) @@ -170,17 +171,17 @@ func (e *elasticsearchExporter) traceRecordToItem(ctx context.Context, resource if e.logstashFormat.Enabled { formattedIndex, err := generateIndexWithLogstashFormat(fIndex, &e.logstashFormat, time.Now()) if err != nil { - return bulkIndexerItem{}, err + return esBulkIndexerItem{}, err } fIndex = formattedIndex } document, err := e.model.encodeSpan(resource, span, scope) if err != nil { - return bulkIndexerItem{}, fmt.Errorf("Failed to encode trace record: %w", err) + return esBulkIndexerItem{}, fmt.Errorf("Failed to encode trace record: %w", err) } - return bulkIndexerItem{ + return esBulkIndexerItem{ Index: fIndex, - Body: document, + Body: bytes.NewReader(document), }, nil } diff --git a/exporter/elasticsearchexporter/request.go b/exporter/elasticsearchexporter/request.go deleted file mode 100644 index 406a9ccad17d..000000000000 --- a/exporter/elasticsearchexporter/request.go +++ /dev/null @@ -1,39 +0,0 @@ -package elasticsearchexporter - -import ( - "bytes" - "context" -) - -type request struct { - bulkIndexer *esBulkIndexerCurrent - Items []bulkIndexerItem -} - -func newRequest(bulkIndexer *esBulkIndexerCurrent) *request { - return &request{bulkIndexer: bulkIndexer} -} - -func (r *request) Export(ctx context.Context) error { - batch := make([]esBulkIndexerItem, len(r.Items)) - for i, item := range r.Items { - batch[i] = esBulkIndexerItem{ - Index: item.Index, - Body: bytes.NewReader(item.Body), - } - } - return r.bulkIndexer.AddBatchAndFlush(ctx, batch) -} - -func (r *request) ItemsCount() int { - return len(r.Items) -} - -func (r *request) add(item bulkIndexerItem) { - r.Items = append(r.Items, item) -} - -type bulkIndexerItem struct { - Index string - Body []byte -} From 4f591d09693dea119e093815f3b273f2daf7a5c7 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 11 Jun 2024 17:32:16 +0100 Subject: [PATCH 072/117] Refactor functions in exporter --- exporter/elasticsearchexporter/exporter.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/exporter/elasticsearchexporter/exporter.go b/exporter/elasticsearchexporter/exporter.go index f6412c929a55..e140b6bcbd5a 100644 --- a/exporter/elasticsearchexporter/exporter.go +++ b/exporter/elasticsearchexporter/exporter.go @@ -78,7 +78,7 @@ func (e *elasticsearchExporter) pushLogsData(ctx context.Context, ld plog.Logs) scope := ill.Scope() logs := ill.LogRecords() for k := 0; k < logs.Len(); k++ { - item, err := e.logRecordToItem(ctx, resource, logs.At(k), scope) + item, err := e.logRecordToItem(resource, logs.At(k), scope) if err != nil { if cerr := ctx.Err(); cerr != nil { return cerr @@ -97,7 +97,7 @@ func (e *elasticsearchExporter) pushLogsData(ctx context.Context, ld plog.Logs) return errors.Join(errs...) } -func (e *elasticsearchExporter) logRecordToItem(ctx context.Context, resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) (esBulkIndexerItem, error) { +func (e *elasticsearchExporter) logRecordToItem(resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) (esBulkIndexerItem, error) { fIndex := e.index if e.dynamicIndex { prefix := getFromAttributes(indexPrefix, resource, scope, record) @@ -116,7 +116,7 @@ func (e *elasticsearchExporter) logRecordToItem(ctx context.Context, resource pc document, err := e.model.encodeLog(resource, record, scope) if err != nil { - return esBulkIndexerItem{}, fmt.Errorf("Failed to encode log event: %w", err) + return esBulkIndexerItem{}, fmt.Errorf("failed to encode log event: %w", err) } return esBulkIndexerItem{ Index: fIndex, @@ -141,7 +141,7 @@ func (e *elasticsearchExporter) pushTraceData( spans := scopeSpan.Spans() for k := 0; k < spans.Len(); k++ { span := spans.At(k) - item, err := e.traceRecordToItem(ctx, resource, span, scope) + item, err := e.traceRecordToItem(resource, span, scope) if err != nil { if cerr := ctx.Err(); cerr != nil { return cerr @@ -159,7 +159,7 @@ func (e *elasticsearchExporter) pushTraceData( return errors.Join(errs...) } -func (e *elasticsearchExporter) traceRecordToItem(ctx context.Context, resource pcommon.Resource, span ptrace.Span, scope pcommon.InstrumentationScope) (esBulkIndexerItem, error) { +func (e *elasticsearchExporter) traceRecordToItem(resource pcommon.Resource, span ptrace.Span, scope pcommon.InstrumentationScope) (esBulkIndexerItem, error) { fIndex := e.index if e.dynamicIndex { prefix := getFromAttributes(indexPrefix, resource, scope, span) @@ -178,7 +178,7 @@ func (e *elasticsearchExporter) traceRecordToItem(ctx context.Context, resource document, err := e.model.encodeSpan(resource, span, scope) if err != nil { - return esBulkIndexerItem{}, fmt.Errorf("Failed to encode trace record: %w", err) + return esBulkIndexerItem{}, fmt.Errorf("failed to encode trace record: %w", err) } return esBulkIndexerItem{ Index: fIndex, From cf6f599793f4c0162f00201970915addaaa4d5c4 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 11 Jun 2024 17:43:20 +0100 Subject: [PATCH 073/117] Remove unused param --- exporter/elasticsearchexporter/exporter_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/exporter_test.go b/exporter/elasticsearchexporter/exporter_test.go index aa5062c6070c..70a16ad0b9be 100644 --- a/exporter/elasticsearchexporter/exporter_test.go +++ b/exporter/elasticsearchexporter/exporter_test.go @@ -626,7 +626,7 @@ func sendLogRecords(t *testing.T, exporter exporter.Logs, records ...plog.LogRec for _, record := range records { record.CopyTo(scopeLogs.LogRecords().AppendEmpty()) } - return sendLogs(t, exporter, logs) + return sendLogs(exporter, logs) } func mustSendLogRecords(t *testing.T, exporter exporter.Logs, records ...plog.LogRecord) { @@ -634,7 +634,7 @@ func mustSendLogRecords(t *testing.T, exporter exporter.Logs, records ...plog.Lo require.NoError(t, err) } -func sendLogs(_ *testing.T, exporter exporter.Logs, logs plog.Logs) error { +func sendLogs(exporter exporter.Logs, logs plog.Logs) error { return exporter.ConsumeLogs(context.Background(), logs) } From 0e5c381796a7eeb65f59ea6087675d3855dff25a Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 12 Jun 2024 23:02:48 +0100 Subject: [PATCH 074/117] Fix merge conflicts --- exporter/elasticsearchexporter/exporter_test.go | 3 --- exporter/elasticsearchexporter/factory.go | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/exporter/elasticsearchexporter/exporter_test.go b/exporter/elasticsearchexporter/exporter_test.go index 5f531a2a3ee2..9e9de4bfc807 100644 --- a/exporter/elasticsearchexporter/exporter_test.go +++ b/exporter/elasticsearchexporter/exporter_test.go @@ -623,9 +623,6 @@ func newTestTracesExporter(t *testing.T, url string, fns ...func(*Config)) expor }}, fns...)...) exp, err := f.CreateTracesExporter(context.Background(), exportertest.NewNopSettings(), cfg) require.NoError(t, err) - err = exp.Start(context.Background(), componenttest.NewNopHost()) - require.NoError(t, err) - err = exp.Start(context.Background(), componenttest.NewNopHost()) require.NoError(t, err) t.Cleanup(func() { diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 1c2e9429ad65..8782048f811d 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -10,6 +10,7 @@ import ( "errors" "fmt" "net/http" + "runtime" "time" "go.opentelemetry.io/collector/component" From f6ad0e246a714da16ea1aa3b46c8165dbf2ed5b8 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 12 Jun 2024 23:11:27 +0100 Subject: [PATCH 075/117] Handle 0 flushTimeout --- .../elasticsearchexporter/elasticsearch_bulk.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 0e19ece63aef..58bd1526ba74 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -246,8 +246,10 @@ func (p *bulkIndexerManager) Close(ctx context.Context) error { } type worker struct { - indexer *docappender.BulkIndexer - closeCh <-chan struct{} + indexer *docappender.BulkIndexer + closeCh <-chan struct{} + + // timeout on a single bulk request, not to be confused with `batcher.flush_timeout` option flushTimeout time.Duration retryBackoff func(int) time.Duration @@ -290,8 +292,11 @@ func (w *worker) addBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem } func (w *worker) flush(ctx context.Context) error { - ctx, cancel := context.WithTimeout(ctx, w.flushTimeout) - defer cancel() + if w.flushTimeout > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, w.flushTimeout) + defer cancel() + } stat, err := w.indexer.Flush(ctx) w.stats.docsIndexed.Add(stat.Indexed) if err != nil { From 16b3a79bcb87105dac97053ae529683d7e891918 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 12 Jun 2024 23:25:36 +0100 Subject: [PATCH 076/117] gofmt --- exporter/elasticsearchexporter/config.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exporter/elasticsearchexporter/config.go b/exporter/elasticsearchexporter/config.go index 1f3c3b40d4fa..f92cfdc1593f 100644 --- a/exporter/elasticsearchexporter/config.go +++ b/exporter/elasticsearchexporter/config.go @@ -66,9 +66,9 @@ type Config struct { confighttp.ClientConfig `mapstructure:",squash"` Authentication AuthenticationSettings `mapstructure:",squash"` - Discovery DiscoverySettings `mapstructure:"discover"` - Retry RetrySettings `mapstructure:"retry"` - Flush FlushSettings `mapstructure:"flush"` // Deprecated: use `batcher` instead. + Discovery DiscoverySettings `mapstructure:"discover"` + Retry RetrySettings `mapstructure:"retry"` + Flush FlushSettings `mapstructure:"flush"` // Deprecated: use `batcher` instead. Mapping MappingsSettings `mapstructure:"mapping"` LogstashFormat LogstashFormatSettings `mapstructure:"logstash_format"` From b98f7b2029278cc0f29caa539cd776bac587644c Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 13 Jun 2024 10:00:20 +0100 Subject: [PATCH 077/117] Convert flush.bytes to batcher.min_size_items --- .chloggen/elasticsearchexporter_batchsender.yaml | 2 +- exporter/elasticsearchexporter/README.md | 2 +- exporter/elasticsearchexporter/factory.go | 9 +++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.chloggen/elasticsearchexporter_batchsender.yaml b/.chloggen/elasticsearchexporter_batchsender.yaml index 53ed16e5e30e..68db8369dc9a 100644 --- a/.chloggen/elasticsearchexporter_batchsender.yaml +++ b/.chloggen/elasticsearchexporter_batchsender.yaml @@ -19,7 +19,7 @@ subtext: | Move buffering from bulk indexer to batch sender to improve reliability. With this change, there should be no event loss when used with persistent queue in the event of a collector crash. Introduce `batcher.*` to configure the batch sender which is now enabled by default. - Option `flush.bytes` is now ignored. Exporter will not start if `flush.bytes` is non-zero. Use the new `batcher.min_size_items` option to control the minimum number of items (log records, spans) to trigger a flush. + Option `flush.bytes` is deprecated. Use the new `batcher.min_size_items` option to control the minimum number of items (log records, spans) to trigger a flush. `batcher.min_size_items` will be set to the value of `flush.bytes` / 1000 if `flush.bytes` is non-zero. Option `flush.interval` is deprecated. Use the new `batcher.flush_timeout` option to control max age of buffer. `batcher.flush_timeout` will be set to the value of `flush.interval` if `flush.interval` is non-zero. Queue sender `sending_queue.enabled` defaults to `true`. diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index e80ed05846de..b0df57b52eca 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -87,7 +87,7 @@ When persistent queue is used, there should be no event loss even on collector c The Elasticsearch exporter supports the common `batcher` settings. - `enabled` (default=true): Enable batching of requests into a single bulk request. -- `min_size_items` (default=125): Minimum number of log records / spans in the buffer to trigger a flush immediately. +- `min_size_items` (default=5000): Minimum number of log records / spans in the buffer to trigger a flush immediately. - `max_size_items` (default=0): Maximum number of log records / spans in a request. 0 means there is no limit on the maximum number. - `flush_timeout` (default=30s): Maximum time of the oldest item spent inside the buffer, aka "max age of buffer". A flush will happen regardless of the size of content in buffer. diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 8782048f811d..1b9a6cbf38ed 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -7,7 +7,6 @@ package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry import ( "context" - "errors" "fmt" "net/http" "runtime" @@ -69,7 +68,7 @@ func createDefaultConfig() component.Config { Enabled: true, FlushTimeout: 30 * time.Second, MinSizeConfig: exporterbatcher.MinSizeConfig{ - MinSizeItems: 125, + MinSizeItems: 5000, }, MaxSizeConfig: exporterbatcher.MaxSizeConfig{}, }, @@ -164,8 +163,10 @@ func handleDeprecations(cf *Config, logger *zap.Logger) error { } if cf.Flush.Bytes != 0 { - // cannot translate flush.bytes to batcher.min_size_items because they are in different units - return errors.New(`"flush.bytes" option is unsupported, use "batcher.min_size_items" instead`) + const factor = 1000 + val := cf.Flush.Bytes / factor + logger.Warn(fmt.Sprintf(`"flush.bytes" option is deprecated and replaced with "batcher.min_size_items". Setting "batcher.min_size_items" to the value of "flush.bytes" / %d.`, factor), zap.Int("value", val)) + cf.BatcherConfig.MinSizeItems = val } if cf.Flush.Interval != 0 { From d17554dfb23d9a9ae0505c065f524a194809fef3 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 13 Jun 2024 10:04:34 +0100 Subject: [PATCH 078/117] Set default max_size_items --- exporter/elasticsearchexporter/README.md | 2 +- exporter/elasticsearchexporter/factory.go | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index b0df57b52eca..243f0cb9d9c4 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -88,7 +88,7 @@ The Elasticsearch exporter supports the common `batcher` settings. - `enabled` (default=true): Enable batching of requests into a single bulk request. - `min_size_items` (default=5000): Minimum number of log records / spans in the buffer to trigger a flush immediately. -- `max_size_items` (default=0): Maximum number of log records / spans in a request. 0 means there is no limit on the maximum number. +- `max_size_items` (default=10000): Maximum number of log records / spans in a request. - `flush_timeout` (default=30s): Maximum time of the oldest item spent inside the buffer, aka "max age of buffer". A flush will happen regardless of the size of content in buffer. ### Elasticsearch document routing diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 1b9a6cbf38ed..165da022eba8 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -70,7 +70,9 @@ func createDefaultConfig() component.Config { MinSizeConfig: exporterbatcher.MinSizeConfig{ MinSizeItems: 5000, }, - MaxSizeConfig: exporterbatcher.MaxSizeConfig{}, + MaxSizeConfig: exporterbatcher.MaxSizeConfig{ + MaxSizeItems: 10000, + }, }, Mapping: MappingsSettings{ Mode: "none", From 77e99c9ff59472e4122e16300bbb8bfd1ca837af Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 13 Jun 2024 10:08:11 +0100 Subject: [PATCH 079/117] changelog: change to deprecation --- .chloggen/elasticsearchexporter_batchsender.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.chloggen/elasticsearchexporter_batchsender.yaml b/.chloggen/elasticsearchexporter_batchsender.yaml index 68db8369dc9a..909a909ae2de 100644 --- a/.chloggen/elasticsearchexporter_batchsender.yaml +++ b/.chloggen/elasticsearchexporter_batchsender.yaml @@ -1,7 +1,7 @@ # Use this changelog template to create an entry for release notes. # One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' -change_type: breaking +change_type: deprecation # The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) component: elasticsearchexporter From 78a88008dc23ce6ad9e3dc2e3a49dcd560513c17 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 13 Jun 2024 10:13:36 +0100 Subject: [PATCH 080/117] Log deprecated value --- exporter/elasticsearchexporter/factory.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 165da022eba8..0ba7d169da7a 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -160,7 +160,7 @@ func getTimeoutConfig() exporterhelper.TimeoutSettings { // Otherwise, return an error so that the user is aware of an unsupported option. func handleDeprecations(cf *Config, logger *zap.Logger) error { if cf.Index != "" { - logger.Warn(`"index" option is deprecated and replaced with "logs_index" and "traces_index". Setting "logs_index" to the value of "index".`) + logger.Warn(`"index" option is deprecated and replaced with "logs_index" and "traces_index". Setting "logs_index" to the value of "index".`, zap.String("value", cf.Index)) cf.LogsIndex = cf.Index } @@ -172,7 +172,7 @@ func handleDeprecations(cf *Config, logger *zap.Logger) error { } if cf.Flush.Interval != 0 { - logger.Warn(`"flush.interval" option is deprecated and replaced with "batcher.flush_timeout". Setting "batcher.flush_timeout" to the value of "flush.interval".`) + logger.Warn(`"flush.interval" option is deprecated and replaced with "batcher.flush_timeout". Setting "batcher.flush_timeout" to the value of "flush.interval".`, zap.Duration("value", cf.Flush.Interval)) cf.BatcherConfig.FlushTimeout = cf.Flush.Interval } From 0939b457e4da666a79c83bf3dc8308e12188cc8e Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 13 Jun 2024 10:28:21 +0100 Subject: [PATCH 081/117] Clean up --- exporter/elasticsearchexporter/config.go | 9 ++++----- exporter/elasticsearchexporter/elasticsearch_bulk.go | 8 +++++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/exporter/elasticsearchexporter/config.go b/exporter/elasticsearchexporter/config.go index f92cfdc1593f..03eee03cd8a5 100644 --- a/exporter/elasticsearchexporter/config.go +++ b/exporter/elasticsearchexporter/config.go @@ -69,9 +69,8 @@ type Config struct { Discovery DiscoverySettings `mapstructure:"discover"` Retry RetrySettings `mapstructure:"retry"` Flush FlushSettings `mapstructure:"flush"` // Deprecated: use `batcher` instead. - - Mapping MappingsSettings `mapstructure:"mapping"` - LogstashFormat LogstashFormatSettings `mapstructure:"logstash_format"` + Mapping MappingsSettings `mapstructure:"mapping"` + LogstashFormat LogstashFormatSettings `mapstructure:"logstash_format"` } type LogstashFormatSettings struct { @@ -123,12 +122,12 @@ type DiscoverySettings struct { type FlushSettings struct { // Bytes sets the send buffer flushing limit. // - // Deprecated: This configuration is ignored. Use `batcher.min_size_items` instead. + // Deprecated: Use `batcher.min_size_items` instead. Bytes int `mapstructure:"bytes"` // Interval configures the max age of a document in the send buffer. // - // Deprecated: This configuration is ignored. Use `batcher.flush_timeout` instead. + // Deprecated: Use `batcher.flush_timeout` instead. Interval time.Duration `mapstructure:"interval"` } diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 58bd1526ba74..9fbe5bbab598 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -229,7 +229,7 @@ func (p *bulkIndexerManager) AddBatchAndFlush(ctx context.Context, batch []esBul return w.addBatchAndFlush(ctx, batch) } -// Close closes the closeCh channel and wait for workers to finish. +// Close closes the closeCh channel and wait for all p.AddBatchAndFlush to finish. func (p *bulkIndexerManager) Close(ctx context.Context) error { close(p.closeCh) doneCh := make(chan struct{}) @@ -270,21 +270,23 @@ func (w *worker) addBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem return err } if w.indexer.Items() == 0 { + // No documents in buffer waiting for per-document retry, exit retry loop. return nil } if w.retryBackoff == nil { - // This should never happen in practice. + // BUG: This should never happen in practice. // When retry is disabled / document level retry limit is reached, // documents should go into FailedDocs instead of indexer buffer. return errors.New("bulk indexer contains documents pending retry but retry is disabled") } backoff := w.retryBackoff(attempts + 1) // TODO: use exporterhelper retry_sender timer := time.NewTimer(backoff) - defer timer.Stop() select { case <-ctx.Done(): + timer.Stop() return ctx.Err() case <-w.closeCh: + timer.Stop() return errors.New("bulk indexer is closed") case <-timer.C: } From 71d84f6b88736f371554ecd64a8a91f491e2ef3c Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 13 Jun 2024 10:39:49 +0100 Subject: [PATCH 082/117] Bench persistent queue --- .../integrationtest/exporter_bench_test.go | 57 ++++++++++--------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index 9d7282473669..d5fb5a7f1ac0 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -30,34 +30,36 @@ import ( func BenchmarkExporter(b *testing.B) { for _, eventType := range []string{"logs", "traces"} { for _, mappingMode := range []string{"none", "ecs", "raw"} { - for _, tc := range []struct { - name string - batchSize int - }{ - {name: "small_batch", batchSize: 10}, - {name: "medium_batch", batchSize: 100}, - {name: "large_batch", batchSize: 1000}, - {name: "xlarge_batch", batchSize: 10000}, - } { - b.Run(fmt.Sprintf("%s/%s/%s", eventType, mappingMode, tc.name), func(b *testing.B) { - switch eventType { - case "logs": - benchmarkLogs(b, tc.batchSize, mappingMode) - case "traces": - benchmarkTraces(b, tc.batchSize, mappingMode) - } - }) + for _, persistentQueue := range []bool{false, true} { + for _, tc := range []struct { + name string + batchSize int + }{ + {name: "small_batch", batchSize: 10}, + {name: "medium_batch", batchSize: 100}, + {name: "large_batch", batchSize: 1000}, + {name: "xlarge_batch", batchSize: 10000}, + } { + b.Run(fmt.Sprintf("%s/%s/persistentQueue=%v/%s", eventType, mappingMode, persistentQueue, tc.name), func(b *testing.B) { + switch eventType { + case "logs": + benchmarkLogs(b, tc.batchSize, mappingMode, persistentQueue) + case "traces": + benchmarkTraces(b, tc.batchSize, mappingMode, persistentQueue) + } + }) + } } } } } -func benchmarkLogs(b *testing.B, batchSize int, mappingMode string) { +func benchmarkLogs(b *testing.B, batchSize int, mappingMode string, persistentQueue bool) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() host := storagetest.NewStorageHost() - runnerCfg := prepareBenchmark(b, host, batchSize, mappingMode) + runnerCfg := prepareBenchmark(b, host, batchSize, mappingMode, persistentQueue) exporter, err := runnerCfg.factory.CreateLogsExporter( ctx, exportertest.NewNopSettings(), runnerCfg.esCfg, ) @@ -80,12 +82,12 @@ func benchmarkLogs(b *testing.B, batchSize int, mappingMode string) { require.NoError(b, exporter.Shutdown(ctx)) } -func benchmarkTraces(b *testing.B, batchSize int, mappingMode string) { +func benchmarkTraces(b *testing.B, batchSize int, mappingMode string, persistentQueue bool) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() host := storagetest.NewStorageHost() - runnerCfg := prepareBenchmark(b, host, batchSize, mappingMode) + runnerCfg := prepareBenchmark(b, host, batchSize, mappingMode, persistentQueue) exporter, err := runnerCfg.factory.CreateTracesExporter( ctx, exportertest.NewNopSettings(), runnerCfg.esCfg, ) @@ -121,12 +123,10 @@ func prepareBenchmark( host *storagetest.StorageHost, batchSize int, mappingMode string, + persistentQueue bool, ) *benchRunnerCfg { b.Helper() - fileExtID, fileExt := getFileStorageExtension(b) - host.WithExtension(fileExtID, fileExt) - cfg := &benchRunnerCfg{} // Benchmarks don't decode the bulk requests to avoid allocations to pollute the results. receiver := newElasticsearchDataReceiver(b, false /* DecodeBulkRequest */) @@ -136,10 +136,11 @@ func prepareBenchmark( cfg.factory = elasticsearchexporter.NewFactory() cfg.esCfg = cfg.factory.CreateDefaultConfig().(*elasticsearchexporter.Config) cfg.esCfg.Mapping.Mode = mappingMode - cfg.esCfg.QueueSettings.Enabled = true - cfg.esCfg.QueueSettings.NumConsumers = 200 - cfg.esCfg.QueueSettings.QueueSize = 100_000 - cfg.esCfg.QueueSettings.StorageID = &fileExtID + if persistentQueue { + fileExtID, fileExt := getFileStorageExtension(b) + host.WithExtension(fileExtID, fileExt) + cfg.esCfg.QueueSettings.StorageID = &fileExtID + } cfg.esCfg.Endpoints = []string{receiver.endpoint} cfg.esCfg.LogsIndex = TestLogsIndex cfg.esCfg.TracesIndex = TestTracesIndex From 6fb2fc321ddc901aa9ccc35f583791ff2bbfcbc0 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 18 Jun 2024 01:12:53 +0100 Subject: [PATCH 083/117] [chore][exporter/elasticsearch] Use RunParallel in bench --- .../integrationtest/exporter_bench_test.go | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index 1fed5da91576..c0c83d59aad8 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -62,17 +62,24 @@ func benchmarkLogs(b *testing.B, batchSize int, mappingMode string) { b.ReportAllocs() b.ResetTimer() b.StopTimer() + logsArr := make([]plog.Logs, b.N) for i := 0; i < b.N; i++ { - logs, _ := runnerCfg.provider.GenerateLogs() - b.StartTimer() - require.NoError(b, exporter.ConsumeLogs(ctx, logs)) - b.StopTimer() + logsArr[i], _ = runnerCfg.provider.GenerateLogs() } + i := atomic.Int64{} + i.Store(-1) + b.SetParallelism(100) + b.StartTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + require.NoError(b, exporter.ConsumeLogs(ctx, logsArr[i.Add(1)])) + } + }) + require.NoError(b, exporter.Shutdown(ctx)) b.ReportMetric( float64(runnerCfg.generatedCount.Load())/b.Elapsed().Seconds(), "events/s", ) - require.NoError(b, exporter.Shutdown(ctx)) } func benchmarkTraces(b *testing.B, batchSize int, mappingMode string) { @@ -89,17 +96,25 @@ func benchmarkTraces(b *testing.B, batchSize int, mappingMode string) { b.ReportAllocs() b.ResetTimer() b.StopTimer() + + tracesArr := make([]ptrace.Traces, b.N) for i := 0; i < b.N; i++ { - traces, _ := runnerCfg.provider.GenerateTraces() - b.StartTimer() - require.NoError(b, exporter.ConsumeTraces(ctx, traces)) - b.StopTimer() + tracesArr[i], _ = runnerCfg.provider.GenerateTraces() } + i := atomic.Int64{} + i.Store(-1) + b.SetParallelism(100) + b.StartTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + require.NoError(b, exporter.ConsumeTraces(ctx, tracesArr[i.Add(1)])) + } + }) + require.NoError(b, exporter.Shutdown(ctx)) b.ReportMetric( float64(runnerCfg.generatedCount.Load())/b.Elapsed().Seconds(), "events/s", ) - require.NoError(b, exporter.Shutdown(ctx)) } type benchRunnerCfg struct { From 4c2ba652c170f4b5b4196a8493bc4e6841f721d3 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 18 Jun 2024 01:29:13 +0100 Subject: [PATCH 084/117] Use parallel bench --- .../integrationtest/exporter_bench_test.go | 118 ++++++++---------- 1 file changed, 51 insertions(+), 67 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index d5fb5a7f1ac0..4298f4932ae4 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -11,103 +11,110 @@ import ( "time" "github.com/stretchr/testify/require" - "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/component/componenttest" "go.opentelemetry.io/collector/consumer" "go.opentelemetry.io/collector/exporter" "go.opentelemetry.io/collector/exporter/exportertest" - "go.opentelemetry.io/collector/extension" "go.opentelemetry.io/collector/pdata/plog" "go.opentelemetry.io/collector/pdata/pmetric" "go.opentelemetry.io/collector/pdata/ptrace" "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter" - "github.com/open-telemetry/opentelemetry-collector-contrib/extension/storage/filestorage" - "github.com/open-telemetry/opentelemetry-collector-contrib/extension/storage/storagetest" "github.com/open-telemetry/opentelemetry-collector-contrib/testbed/testbed" ) func BenchmarkExporter(b *testing.B) { for _, eventType := range []string{"logs", "traces"} { for _, mappingMode := range []string{"none", "ecs", "raw"} { - for _, persistentQueue := range []bool{false, true} { - for _, tc := range []struct { - name string - batchSize int - }{ - {name: "small_batch", batchSize: 10}, - {name: "medium_batch", batchSize: 100}, - {name: "large_batch", batchSize: 1000}, - {name: "xlarge_batch", batchSize: 10000}, - } { - b.Run(fmt.Sprintf("%s/%s/persistentQueue=%v/%s", eventType, mappingMode, persistentQueue, tc.name), func(b *testing.B) { - switch eventType { - case "logs": - benchmarkLogs(b, tc.batchSize, mappingMode, persistentQueue) - case "traces": - benchmarkTraces(b, tc.batchSize, mappingMode, persistentQueue) - } - }) - } + for _, tc := range []struct { + name string + batchSize int + }{ + {name: "small_batch", batchSize: 10}, + {name: "medium_batch", batchSize: 100}, + {name: "large_batch", batchSize: 1000}, + {name: "xlarge_batch", batchSize: 10000}, + } { + b.Run(fmt.Sprintf("%s/%s/%s", eventType, mappingMode, tc.name), func(b *testing.B) { + switch eventType { + case "logs": + benchmarkLogs(b, tc.batchSize, mappingMode) + case "traces": + benchmarkTraces(b, tc.batchSize, mappingMode) + } + }) } } } } -func benchmarkLogs(b *testing.B, batchSize int, mappingMode string, persistentQueue bool) { +func benchmarkLogs(b *testing.B, batchSize int, mappingMode string) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - host := storagetest.NewStorageHost() - runnerCfg := prepareBenchmark(b, host, batchSize, mappingMode, persistentQueue) + runnerCfg := prepareBenchmark(b, batchSize, mappingMode) exporter, err := runnerCfg.factory.CreateLogsExporter( ctx, exportertest.NewNopSettings(), runnerCfg.esCfg, ) require.NoError(b, err) - require.NoError(b, exporter.Start(ctx, host)) + require.NoError(b, exporter.Start(ctx, componenttest.NewNopHost())) b.ReportAllocs() b.ResetTimer() b.StopTimer() + logsArr := make([]plog.Logs, b.N) for i := 0; i < b.N; i++ { - logs, _ := runnerCfg.provider.GenerateLogs() - b.StartTimer() - require.NoError(b, exporter.ConsumeLogs(ctx, logs)) - b.StopTimer() + logsArr[i], _ = runnerCfg.provider.GenerateLogs() } + i := atomic.Int64{} + i.Store(-1) + b.SetParallelism(100) + b.StartTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + require.NoError(b, exporter.ConsumeLogs(ctx, logsArr[i.Add(1)])) + } + }) + require.NoError(b, exporter.Shutdown(ctx)) b.ReportMetric( float64(runnerCfg.generatedCount.Load())/b.Elapsed().Seconds(), "events/s", ) - require.NoError(b, exporter.Shutdown(ctx)) } -func benchmarkTraces(b *testing.B, batchSize int, mappingMode string, persistentQueue bool) { +func benchmarkTraces(b *testing.B, batchSize int, mappingMode string) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - host := storagetest.NewStorageHost() - runnerCfg := prepareBenchmark(b, host, batchSize, mappingMode, persistentQueue) + runnerCfg := prepareBenchmark(b, batchSize, mappingMode) exporter, err := runnerCfg.factory.CreateTracesExporter( ctx, exportertest.NewNopSettings(), runnerCfg.esCfg, ) require.NoError(b, err) - require.NoError(b, exporter.Start(ctx, host)) + require.NoError(b, exporter.Start(ctx, componenttest.NewNopHost())) b.ReportAllocs() b.ResetTimer() b.StopTimer() + + tracesArr := make([]ptrace.Traces, b.N) for i := 0; i < b.N; i++ { - traces, _ := runnerCfg.provider.GenerateTraces() - b.StartTimer() - require.NoError(b, exporter.ConsumeTraces(ctx, traces)) - b.StopTimer() + tracesArr[i], _ = runnerCfg.provider.GenerateTraces() } + i := atomic.Int64{} + i.Store(-1) + b.SetParallelism(100) + b.StartTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + require.NoError(b, exporter.ConsumeTraces(ctx, tracesArr[i.Add(1)])) + } + }) + require.NoError(b, exporter.Shutdown(ctx)) b.ReportMetric( float64(runnerCfg.generatedCount.Load())/b.Elapsed().Seconds(), "events/s", ) - require.NoError(b, exporter.Shutdown(ctx)) } type benchRunnerCfg struct { @@ -120,10 +127,8 @@ type benchRunnerCfg struct { func prepareBenchmark( b *testing.B, - host *storagetest.StorageHost, batchSize int, mappingMode string, - persistentQueue bool, ) *benchRunnerCfg { b.Helper() @@ -136,15 +141,13 @@ func prepareBenchmark( cfg.factory = elasticsearchexporter.NewFactory() cfg.esCfg = cfg.factory.CreateDefaultConfig().(*elasticsearchexporter.Config) cfg.esCfg.Mapping.Mode = mappingMode - if persistentQueue { - fileExtID, fileExt := getFileStorageExtension(b) - host.WithExtension(fileExtID, fileExt) - cfg.esCfg.QueueSettings.StorageID = &fileExtID - } cfg.esCfg.Endpoints = []string{receiver.endpoint} cfg.esCfg.LogsIndex = TestLogsIndex cfg.esCfg.TracesIndex = TestTracesIndex cfg.esCfg.BatcherConfig.FlushTimeout = 10 * time.Millisecond + cfg.esCfg.BatcherConfig.MinSizeItems = 10000000000 + cfg.esCfg.BatcherConfig.MaxSizeItems = 10000000000 + cfg.esCfg.QueueSettings.Enabled = false cfg.esCfg.NumWorkers = 1 tc, err := consumer.NewTraces(func(context.Context, ptrace.Traces) error { @@ -165,22 +168,3 @@ func prepareBenchmark( return cfg } - -func getFileStorageExtension(b testing.TB) (component.ID, extension.Extension) { - storage := filestorage.NewFactory() - componentID := component.NewIDWithName(storage.Type(), "esexporterbench") - - storageCfg := storage.CreateDefaultConfig().(*filestorage.Config) - storageCfg.Directory = b.TempDir() - fileExt, err := storage.CreateExtension( - context.Background(), - extension.CreateSettings{ - ID: componentID, - TelemetrySettings: componenttest.NewNopTelemetrySettings(), - BuildInfo: component.NewDefaultBuildInfo(), - }, - storageCfg, - ) - require.NoError(b, err) - return componentID, fileExt -} From 8d4d4c154dcaabfec9f44b801382fb20d347e6b4 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 18 Jun 2024 11:23:39 +0100 Subject: [PATCH 085/117] Measure docs/s --- .../integrationtest/datareceiver.go | 24 +++++++++++++++++-- .../integrationtest/exporter_bench_test.go | 19 ++++++++++++--- .../integrationtest/exporter_test.go | 2 +- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/datareceiver.go b/exporter/elasticsearchexporter/integrationtest/datareceiver.go index e530d864fb35..4327bbd9b5fb 100644 --- a/exporter/elasticsearchexporter/integrationtest/datareceiver.go +++ b/exporter/elasticsearchexporter/integrationtest/datareceiver.go @@ -4,12 +4,14 @@ package integrationtest // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/integrationtest" import ( + "bufio" "context" "encoding/json" "errors" "fmt" "net/http" "net/url" + "sync/atomic" "testing" "github.com/elastic/go-docappender/v2/docappendertest" @@ -47,14 +49,16 @@ type esDataReceiver struct { receiver receiver.Logs endpoint string decodeBulkRequest bool + docCount *atomic.Int64 t testing.TB } -func newElasticsearchDataReceiver(t testing.TB, decodeBulkRequest bool) *esDataReceiver { +func newElasticsearchDataReceiver(t testing.TB, decodeBulkRequest bool, docCount *atomic.Int64) *esDataReceiver { return &esDataReceiver{ DataReceiverBase: testbed.DataReceiverBase{}, endpoint: fmt.Sprintf("http://%s:%d", testbed.DefaultHost, testutil.GetAvailablePort(t)), decodeBulkRequest: decodeBulkRequest, + docCount: docCount, t: t, } } @@ -73,6 +77,7 @@ func (es *esDataReceiver) Start(tc consumer.Traces, _ consumer.Metrics, lc consu cfg := factory.CreateDefaultConfig().(*config) cfg.ServerConfig.Endpoint = esURL.Host cfg.DecodeBulkRequests = es.decodeBulkRequest + cfg.DocCount = es.docCount set := receivertest.NewNopSettings() // Use an actual logger to log errors. @@ -131,6 +136,9 @@ type config struct { // set to false then the consumers will not consume any events and the // bulk request will always return http.StatusOK. DecodeBulkRequests bool + + // DocCount stores the sum of number of events from bulk requests. + DocCount *atomic.Int64 } func createDefaultConfig() component.Config { @@ -217,9 +225,21 @@ func (es *mockESReceiver) Start(ctx context.Context, host component.Host) error r.HandleFunc("/_bulk", func(w http.ResponseWriter, r *http.Request) { if !es.config.DecodeBulkRequests { fmt.Fprintln(w, "{}") + defer r.Body.Close() + s := bufio.NewScanner(r.Body) + var cnt int64 + for s.Scan() { + cnt++ + } + if es.config.DocCount != nil { + es.config.DocCount.Add(cnt / 2) // 1 line for action, 1 line for document + } return } - _, response := docappendertest.DecodeBulkRequest(r) + docs, response := docappendertest.DecodeBulkRequest(r) + if es.config.DocCount != nil { + es.config.DocCount.Add(int64(len(docs))) + } for _, itemMap := range response.Items { for k, item := range itemMap { var consumeErr error diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index d82e9537f913..431598747496 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -54,9 +54,11 @@ func benchmarkLogs(b *testing.B, batchSize int, mappingMode string) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() + var docCount atomic.Int64 + exporterSettings := exportertest.NewNopSettings() exporterSettings.TelemetrySettings.Logger = zaptest.NewLogger(b, zaptest.Level(zap.WarnLevel)) - runnerCfg := prepareBenchmark(b, batchSize, mappingMode) + runnerCfg := prepareBenchmark(b, batchSize, mappingMode, &docCount) exporter, err := runnerCfg.factory.CreateLogsExporter( ctx, exporterSettings, runnerCfg.esCfg, ) @@ -84,15 +86,21 @@ func benchmarkLogs(b *testing.B, batchSize int, mappingMode string) { float64(runnerCfg.generatedCount.Load())/b.Elapsed().Seconds(), "events/s", ) + b.ReportMetric( + float64(docCount.Load())/b.Elapsed().Seconds(), + "docs/s", + ) } func benchmarkTraces(b *testing.B, batchSize int, mappingMode string) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() + var docCount atomic.Int64 + exporterSettings := exportertest.NewNopSettings() exporterSettings.TelemetrySettings.Logger = zaptest.NewLogger(b, zaptest.Level(zap.WarnLevel)) - runnerCfg := prepareBenchmark(b, batchSize, mappingMode) + runnerCfg := prepareBenchmark(b, batchSize, mappingMode, &docCount) exporter, err := runnerCfg.factory.CreateTracesExporter( ctx, exporterSettings, runnerCfg.esCfg, ) @@ -121,6 +129,10 @@ func benchmarkTraces(b *testing.B, batchSize int, mappingMode string) { float64(runnerCfg.generatedCount.Load())/b.Elapsed().Seconds(), "events/s", ) + b.ReportMetric( + float64(docCount.Load())/b.Elapsed().Seconds(), + "docs/s", + ) } type benchRunnerCfg struct { @@ -135,12 +147,13 @@ func prepareBenchmark( b *testing.B, batchSize int, mappingMode string, + docCount *atomic.Int64, ) *benchRunnerCfg { b.Helper() cfg := &benchRunnerCfg{} // Benchmarks don't decode the bulk requests to avoid allocations to pollute the results. - receiver := newElasticsearchDataReceiver(b, false /* DecodeBulkRequest */) + receiver := newElasticsearchDataReceiver(b, false /* DecodeBulkRequest */, docCount) cfg.provider = testbed.NewPerfTestDataProvider(testbed.LoadOptions{ItemsPerBatch: batchSize}) cfg.provider.SetLoadGeneratorCounters(&cfg.generatedCount) diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_test.go index c0df3d575308..d8cd47a86483 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_test.go @@ -57,7 +57,7 @@ func runner(t *testing.T, eventType string, restartCollector, mockESFailure bool t.Fatalf("failed to create data sender for type: %s", eventType) } - receiver := newElasticsearchDataReceiver(t, true) + receiver := newElasticsearchDataReceiver(t, true, nil) loadOpts := testbed.LoadOptions{ DataItemsPerSecond: 1_000, ItemsPerBatch: 10, From 97494edfac81620264a5ca9a1eaba8967ea970b0 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 18 Jun 2024 11:23:39 +0100 Subject: [PATCH 086/117] Measure docs/s --- .../integrationtest/datareceiver.go | 24 +++++++++++++++++-- .../integrationtest/exporter_bench_test.go | 19 ++++++++++++--- .../integrationtest/exporter_test.go | 2 +- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/datareceiver.go b/exporter/elasticsearchexporter/integrationtest/datareceiver.go index ae27fcc95516..da94b36bce9d 100644 --- a/exporter/elasticsearchexporter/integrationtest/datareceiver.go +++ b/exporter/elasticsearchexporter/integrationtest/datareceiver.go @@ -4,12 +4,14 @@ package integrationtest // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/integrationtest" import ( + "bufio" "context" "encoding/json" "errors" "fmt" "net/http" "net/url" + "sync/atomic" "testing" "github.com/elastic/go-docappender/v2/docappendertest" @@ -47,14 +49,16 @@ type esDataReceiver struct { receiver receiver.Logs endpoint string decodeBulkRequest bool + docCount *atomic.Int64 t testing.TB } -func newElasticsearchDataReceiver(t testing.TB, decodeBulkRequest bool) *esDataReceiver { +func newElasticsearchDataReceiver(t testing.TB, decodeBulkRequest bool, docCount *atomic.Int64) *esDataReceiver { return &esDataReceiver{ DataReceiverBase: testbed.DataReceiverBase{}, endpoint: fmt.Sprintf("http://%s:%d", testbed.DefaultHost, testutil.GetAvailablePort(t)), decodeBulkRequest: decodeBulkRequest, + docCount: docCount, t: t, } } @@ -73,6 +77,7 @@ func (es *esDataReceiver) Start(tc consumer.Traces, _ consumer.Metrics, lc consu cfg := factory.CreateDefaultConfig().(*config) cfg.ServerConfig.Endpoint = esURL.Host cfg.DecodeBulkRequests = es.decodeBulkRequest + cfg.DocCount = es.docCount set := receivertest.NewNopSettings() // Use an actual logger to log errors. @@ -136,6 +141,9 @@ type config struct { // set to false then the consumers will not consume any events and the // bulk request will always return http.StatusOK. DecodeBulkRequests bool + + // DocCount stores the sum of number of events from bulk requests. + DocCount *atomic.Int64 } func createDefaultConfig() component.Config { @@ -222,9 +230,21 @@ func (es *mockESReceiver) Start(ctx context.Context, host component.Host) error r.HandleFunc("/_bulk", func(w http.ResponseWriter, r *http.Request) { if !es.config.DecodeBulkRequests { fmt.Fprintln(w, "{}") + defer r.Body.Close() + s := bufio.NewScanner(r.Body) + var cnt int64 + for s.Scan() { + cnt++ + } + if es.config.DocCount != nil { + es.config.DocCount.Add(cnt / 2) // 1 line for action, 1 line for document + } return } - _, response := docappendertest.DecodeBulkRequest(r) + docs, response := docappendertest.DecodeBulkRequest(r) + if es.config.DocCount != nil { + es.config.DocCount.Add(int64(len(docs))) + } for _, itemMap := range response.Items { for k, item := range itemMap { var consumeErr error diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index 3539dbed12e7..3449fbeb39cb 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -54,9 +54,11 @@ func benchmarkLogs(b *testing.B, batchSize int, mappingMode string) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() + var docCount atomic.Int64 + exporterSettings := exportertest.NewNopSettings() exporterSettings.TelemetrySettings.Logger = zaptest.NewLogger(b, zaptest.Level(zap.WarnLevel)) - runnerCfg := prepareBenchmark(b, batchSize, mappingMode) + runnerCfg := prepareBenchmark(b, batchSize, mappingMode, &docCount) exporter, err := runnerCfg.factory.CreateLogsExporter( ctx, exporterSettings, runnerCfg.esCfg, ) @@ -84,15 +86,21 @@ func benchmarkLogs(b *testing.B, batchSize int, mappingMode string) { float64(runnerCfg.generatedCount.Load())/b.Elapsed().Seconds(), "events/s", ) + b.ReportMetric( + float64(docCount.Load())/b.Elapsed().Seconds(), + "docs/s", + ) } func benchmarkTraces(b *testing.B, batchSize int, mappingMode string) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() + var docCount atomic.Int64 + exporterSettings := exportertest.NewNopSettings() exporterSettings.TelemetrySettings.Logger = zaptest.NewLogger(b, zaptest.Level(zap.WarnLevel)) - runnerCfg := prepareBenchmark(b, batchSize, mappingMode) + runnerCfg := prepareBenchmark(b, batchSize, mappingMode, &docCount) exporter, err := runnerCfg.factory.CreateTracesExporter( ctx, exporterSettings, runnerCfg.esCfg, ) @@ -121,6 +129,10 @@ func benchmarkTraces(b *testing.B, batchSize int, mappingMode string) { float64(runnerCfg.generatedCount.Load())/b.Elapsed().Seconds(), "events/s", ) + b.ReportMetric( + float64(docCount.Load())/b.Elapsed().Seconds(), + "docs/s", + ) } type benchRunnerCfg struct { @@ -135,12 +147,13 @@ func prepareBenchmark( b *testing.B, batchSize int, mappingMode string, + docCount *atomic.Int64, ) *benchRunnerCfg { b.Helper() cfg := &benchRunnerCfg{} // Benchmarks don't decode the bulk requests to avoid allocations to pollute the results. - receiver := newElasticsearchDataReceiver(b, false /* DecodeBulkRequest */) + receiver := newElasticsearchDataReceiver(b, false /* DecodeBulkRequest */, docCount) cfg.provider = testbed.NewPerfTestDataProvider(testbed.LoadOptions{ItemsPerBatch: batchSize}) cfg.provider.SetLoadGeneratorCounters(&cfg.generatedCount) diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_test.go index f78d8e9ae111..f1ea974145b0 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_test.go @@ -59,7 +59,7 @@ func runner(t *testing.T, eventType string, restartCollector, mockESFailure bool t.Fatalf("failed to create data sender for type: %s", eventType) } - receiver := newElasticsearchDataReceiver(t, true) + receiver := newElasticsearchDataReceiver(t, true, nil) loadOpts := testbed.LoadOptions{ DataItemsPerSecond: 1_000, ItemsPerBatch: 10, From d9bd55f5b96e96c00e1a81466e6c32fdaa7c0388 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 18 Jun 2024 11:30:33 +0100 Subject: [PATCH 087/117] Explicitly disable queue --- .../elasticsearchexporter/integrationtest/exporter_bench_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index 431598747496..87b312659718 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -165,6 +165,7 @@ func prepareBenchmark( cfg.esCfg.TracesIndex = TestTracesIndex cfg.esCfg.Flush.Interval = 10 * time.Millisecond cfg.esCfg.NumWorkers = 1 + cfg.esCfg.QueueSettings.Enabled = false tc, err := consumer.NewTraces(func(context.Context, ptrace.Traces) error { return nil From 0e75f8b00bd4930874234515c897dadacaf0ac84 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 24 Jun 2024 16:00:44 +0100 Subject: [PATCH 088/117] Remove bulk indexer pooling --- .../elasticsearch_bulk.go | 57 +++++++------------ 1 file changed, 20 insertions(+), 37 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 9fbe5bbab598..2842ce050e6f 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -148,43 +148,16 @@ func createElasticsearchBackoffFunc(config *RetrySettings) func(int) time.Durati } } -func newBulkIndexer(logger *zap.Logger, client *elasticsearch7.Client, config *Config) (*esBulkIndexerCurrent, error) { - manager := &bulkIndexerManager{ +func newBulkIndexer(logger *zap.Logger, client *esClientCurrent, config *Config) (*esBulkIndexerCurrent, error) { + return &bulkIndexerManager{ closeCh: make(chan struct{}), stats: bulkIndexerStats{}, logger: logger, config: config, wg: &sync.WaitGroup{}, sem: semaphore.NewWeighted(int64(config.NumWorkers)), - } - - var maxDocRetry int - if config.Retry.Enabled { - // max_requests includes initial attempt - // See https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32344 - maxDocRetry = config.Retry.MaxRequests - 1 - } - manager.pool = &sync.Pool{ - New: func() any { - bi, err := docappender.NewBulkIndexer(docappender.BulkIndexerConfig{ - Client: client, - MaxDocumentRetries: maxDocRetry, - Pipeline: config.Pipeline, - RetryOnDocumentStatus: config.Retry.RetryOnStatus, - }) - if err != nil { - return fmt.Errorf("error creating docappender bulk indexer: %w", err) - } - return bi - }, - } - - // Create a bulk indexer once to validate the config options - if err, ok := manager.pool.Get().(error); ok { - return nil, err - } - - return manager, nil + client: client, + }, nil } type bulkIndexerStats struct { @@ -199,6 +172,7 @@ type bulkIndexerManager struct { wg *sync.WaitGroup sem *semaphore.Weighted pool *sync.Pool + client *esClientCurrent } func (p *bulkIndexerManager) AddBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem) error { @@ -210,13 +184,22 @@ func (p *bulkIndexerManager) AddBatchAndFlush(ctx context.Context, batch []esBul } defer p.sem.Release(1) - bi := p.pool.Get().(*docappender.BulkIndexer) - // Bulk indexer buffer should never contain any items, but double check for safety. - for bi.Items() != 0 { - p.logger.Error("bug: bulk indexer buffer contains unexpected residue") - bi = p.pool.Get().(*docappender.BulkIndexer) + var maxDocRetry int + if p.config.Retry.Enabled { + // max_requests includes initial attempt + // See https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32344 + maxDocRetry = p.config.Retry.MaxRequests - 1 + } + + bi, err := docappender.NewBulkIndexer(docappender.BulkIndexerConfig{ + Client: p.client, + MaxDocumentRetries: maxDocRetry, + Pipeline: p.config.Pipeline, + RetryOnDocumentStatus: p.config.Retry.RetryOnStatus, + }) + if err != nil { + return fmt.Errorf("error creating docappender bulk indexer: %w", err) } - defer p.pool.Put(bi) w := worker{ indexer: bi, From 4006cd9699ba054a2ee35e80f9e808511a9c6872 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 24 Jun 2024 23:49:51 +0100 Subject: [PATCH 089/117] Fix http request body too large --- .../integrationtest/datareceiver.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/datareceiver.go b/exporter/elasticsearchexporter/integrationtest/datareceiver.go index da94b36bce9d..242a2f4e65d9 100644 --- a/exporter/elasticsearchexporter/integrationtest/datareceiver.go +++ b/exporter/elasticsearchexporter/integrationtest/datareceiver.go @@ -9,6 +9,7 @@ import ( "encoding/json" "errors" "fmt" + "math" "net/http" "net/url" "sync/atomic" @@ -149,7 +150,8 @@ type config struct { func createDefaultConfig() component.Config { return &config{ ServerConfig: confighttp.ServerConfig{ - Endpoint: "127.0.0.1:9200", + Endpoint: "127.0.0.1:9200", + MaxRequestBodySize: math.MaxInt64, }, DecodeBulkRequests: true, } @@ -229,7 +231,6 @@ func (es *mockESReceiver) Start(ctx context.Context, host component.Host) error }) r.HandleFunc("/_bulk", func(w http.ResponseWriter, r *http.Request) { if !es.config.DecodeBulkRequests { - fmt.Fprintln(w, "{}") defer r.Body.Close() s := bufio.NewScanner(r.Body) var cnt int64 @@ -239,6 +240,11 @@ func (es *mockESReceiver) Start(ctx context.Context, host component.Host) error if es.config.DocCount != nil { es.config.DocCount.Add(cnt / 2) // 1 line for action, 1 line for document } + if s.Err() != nil { + w.WriteHeader(400) + return + } + fmt.Fprintln(w, "{}") return } docs, response := docappendertest.DecodeBulkRequest(r) From dae359015a81d0fcf9b222b614b15eca2405ccdb Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 24 Jun 2024 23:49:51 +0100 Subject: [PATCH 090/117] Fix http request body too large --- .../integrationtest/datareceiver.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/datareceiver.go b/exporter/elasticsearchexporter/integrationtest/datareceiver.go index 4327bbd9b5fb..e524197db37e 100644 --- a/exporter/elasticsearchexporter/integrationtest/datareceiver.go +++ b/exporter/elasticsearchexporter/integrationtest/datareceiver.go @@ -9,6 +9,7 @@ import ( "encoding/json" "errors" "fmt" + "math" "net/http" "net/url" "sync/atomic" @@ -144,7 +145,8 @@ type config struct { func createDefaultConfig() component.Config { return &config{ ServerConfig: confighttp.ServerConfig{ - Endpoint: "127.0.0.1:9200", + Endpoint: "127.0.0.1:9200", + MaxRequestBodySize: math.MaxInt64, }, DecodeBulkRequests: true, } @@ -224,7 +226,6 @@ func (es *mockESReceiver) Start(ctx context.Context, host component.Host) error }) r.HandleFunc("/_bulk", func(w http.ResponseWriter, r *http.Request) { if !es.config.DecodeBulkRequests { - fmt.Fprintln(w, "{}") defer r.Body.Close() s := bufio.NewScanner(r.Body) var cnt int64 @@ -234,6 +235,11 @@ func (es *mockESReceiver) Start(ctx context.Context, host component.Host) error if es.config.DocCount != nil { es.config.DocCount.Add(cnt / 2) // 1 line for action, 1 line for document } + if s.Err() != nil { + w.WriteHeader(400) + return + } + fmt.Fprintln(w, "{}") return } docs, response := docappendertest.DecodeBulkRequest(r) From 1d10bbf042c87905937e5c0d436559a207dfbc7f Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 25 Jun 2024 18:07:11 +0100 Subject: [PATCH 091/117] Print error --- exporter/elasticsearchexporter/integrationtest/datareceiver.go | 1 + 1 file changed, 1 insertion(+) diff --git a/exporter/elasticsearchexporter/integrationtest/datareceiver.go b/exporter/elasticsearchexporter/integrationtest/datareceiver.go index e524197db37e..87769d4a2e1c 100644 --- a/exporter/elasticsearchexporter/integrationtest/datareceiver.go +++ b/exporter/elasticsearchexporter/integrationtest/datareceiver.go @@ -237,6 +237,7 @@ func (es *mockESReceiver) Start(ctx context.Context, host component.Host) error } if s.Err() != nil { w.WriteHeader(400) + fmt.Fprintln(w, s.Err()) return } fmt.Fprintln(w, "{}") From ebd99e296365f17ad9e502f8cd97509ba67354eb Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 25 Jun 2024 18:37:11 +0100 Subject: [PATCH 092/117] Record bulkReqs/s --- .../integrationtest/datareceiver.go | 29 +++++++++++-------- .../integrationtest/exporter_bench_test.go | 29 ++++++++++++------- 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/datareceiver.go b/exporter/elasticsearchexporter/integrationtest/datareceiver.go index 87769d4a2e1c..0f9417b779df 100644 --- a/exporter/elasticsearchexporter/integrationtest/datareceiver.go +++ b/exporter/elasticsearchexporter/integrationtest/datareceiver.go @@ -45,21 +45,27 @@ const ( TestTracesIndex = "traces-test-idx" ) +type counters struct { + observedDocCount atomic.Int64 + observedBulkRequests atomic.Int64 +} + type esDataReceiver struct { testbed.DataReceiverBase receiver receiver.Logs endpoint string decodeBulkRequest bool - docCount *atomic.Int64 t testing.TB + + *counters } -func newElasticsearchDataReceiver(t testing.TB, decodeBulkRequest bool, docCount *atomic.Int64) *esDataReceiver { +func newElasticsearchDataReceiver(t testing.TB, decodeBulkRequest bool, counts *counters) *esDataReceiver { return &esDataReceiver{ DataReceiverBase: testbed.DataReceiverBase{}, endpoint: fmt.Sprintf("http://%s:%d", testbed.DefaultHost, testutil.GetAvailablePort(t)), decodeBulkRequest: decodeBulkRequest, - docCount: docCount, + counters: counts, t: t, } } @@ -78,7 +84,9 @@ func (es *esDataReceiver) Start(tc consumer.Traces, _ consumer.Metrics, lc consu cfg := factory.CreateDefaultConfig().(*config) cfg.ServerConfig.Endpoint = esURL.Host cfg.DecodeBulkRequests = es.decodeBulkRequest - cfg.DocCount = es.docCount + if es.counters != nil { + cfg.counters = es.counters + } set := receivertest.NewNopSettings() // Use an actual logger to log errors. @@ -138,8 +146,7 @@ type config struct { // bulk request will always return http.StatusOK. DecodeBulkRequests bool - // DocCount stores the sum of number of events from bulk requests. - DocCount *atomic.Int64 + *counters } func createDefaultConfig() component.Config { @@ -149,6 +156,7 @@ func createDefaultConfig() component.Config { MaxRequestBodySize: math.MaxInt64, }, DecodeBulkRequests: true, + counters: &counters{}, } } @@ -225,6 +233,7 @@ func (es *mockESReceiver) Start(ctx context.Context, host component.Host) error fmt.Fprintln(w, `{"version":{"number":"1.2.3"}}`) }) r.HandleFunc("/_bulk", func(w http.ResponseWriter, r *http.Request) { + es.config.observedBulkRequests.Add(1) if !es.config.DecodeBulkRequests { defer r.Body.Close() s := bufio.NewScanner(r.Body) @@ -232,9 +241,7 @@ func (es *mockESReceiver) Start(ctx context.Context, host component.Host) error for s.Scan() { cnt++ } - if es.config.DocCount != nil { - es.config.DocCount.Add(cnt / 2) // 1 line for action, 1 line for document - } + es.config.observedDocCount.Add(cnt / 2) // 1 line for action, 1 line for document if s.Err() != nil { w.WriteHeader(400) fmt.Fprintln(w, s.Err()) @@ -244,9 +251,7 @@ func (es *mockESReceiver) Start(ctx context.Context, host component.Host) error return } docs, response := docappendertest.DecodeBulkRequest(r) - if es.config.DocCount != nil { - es.config.DocCount.Add(int64(len(docs))) - } + es.config.observedDocCount.Add(int64(len(docs))) for _, itemMap := range response.Items { for k, item := range itemMap { var consumeErr error diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index 87b312659718..556185851bc0 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -54,11 +54,9 @@ func benchmarkLogs(b *testing.B, batchSize int, mappingMode string) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - var docCount atomic.Int64 - exporterSettings := exportertest.NewNopSettings() exporterSettings.TelemetrySettings.Logger = zaptest.NewLogger(b, zaptest.Level(zap.WarnLevel)) - runnerCfg := prepareBenchmark(b, batchSize, mappingMode, &docCount) + runnerCfg := prepareBenchmark(b, batchSize, mappingMode) exporter, err := runnerCfg.factory.CreateLogsExporter( ctx, exporterSettings, runnerCfg.esCfg, ) @@ -87,20 +85,22 @@ func benchmarkLogs(b *testing.B, batchSize int, mappingMode string) { "events/s", ) b.ReportMetric( - float64(docCount.Load())/b.Elapsed().Seconds(), + float64(runnerCfg.observedDocCount.Load())/b.Elapsed().Seconds(), "docs/s", ) + b.ReportMetric( + float64(runnerCfg.observedBulkRequests.Load())/b.Elapsed().Seconds(), + "bulkReqs/s", + ) } func benchmarkTraces(b *testing.B, batchSize int, mappingMode string) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - var docCount atomic.Int64 - exporterSettings := exportertest.NewNopSettings() exporterSettings.TelemetrySettings.Logger = zaptest.NewLogger(b, zaptest.Level(zap.WarnLevel)) - runnerCfg := prepareBenchmark(b, batchSize, mappingMode, &docCount) + runnerCfg := prepareBenchmark(b, batchSize, mappingMode) exporter, err := runnerCfg.factory.CreateTracesExporter( ctx, exporterSettings, runnerCfg.esCfg, ) @@ -130,9 +130,13 @@ func benchmarkTraces(b *testing.B, batchSize int, mappingMode string) { "events/s", ) b.ReportMetric( - float64(docCount.Load())/b.Elapsed().Seconds(), + float64(runnerCfg.observedDocCount.Load())/b.Elapsed().Seconds(), "docs/s", ) + b.ReportMetric( + float64(runnerCfg.observedBulkRequests.Load())/b.Elapsed().Seconds(), + "bulkReqs/s", + ) } type benchRunnerCfg struct { @@ -141,19 +145,22 @@ type benchRunnerCfg struct { esCfg *elasticsearchexporter.Config generatedCount atomic.Uint64 + + *counters } func prepareBenchmark( b *testing.B, batchSize int, mappingMode string, - docCount *atomic.Int64, ) *benchRunnerCfg { b.Helper() - cfg := &benchRunnerCfg{} + cfg := &benchRunnerCfg{ + counters: &counters{}, + } // Benchmarks don't decode the bulk requests to avoid allocations to pollute the results. - receiver := newElasticsearchDataReceiver(b, false /* DecodeBulkRequest */, docCount) + receiver := newElasticsearchDataReceiver(b, false /* DecodeBulkRequest */, cfg.counters) cfg.provider = testbed.NewPerfTestDataProvider(testbed.LoadOptions{ItemsPerBatch: batchSize}) cfg.provider.SetLoadGeneratorCounters(&cfg.generatedCount) From a64b62b4b03baffd2207db3a0b27cfa3d1883ef3 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 25 Jun 2024 18:38:08 +0100 Subject: [PATCH 093/117] Use http.StatusBadRequest instead of 400 --- exporter/elasticsearchexporter/integrationtest/datareceiver.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/integrationtest/datareceiver.go b/exporter/elasticsearchexporter/integrationtest/datareceiver.go index 0f9417b779df..488b9974bc43 100644 --- a/exporter/elasticsearchexporter/integrationtest/datareceiver.go +++ b/exporter/elasticsearchexporter/integrationtest/datareceiver.go @@ -243,7 +243,7 @@ func (es *mockESReceiver) Start(ctx context.Context, host component.Host) error } es.config.observedDocCount.Add(cnt / 2) // 1 line for action, 1 line for document if s.Err() != nil { - w.WriteHeader(400) + w.WriteHeader(http.StatusBadRequest) fmt.Fprintln(w, s.Err()) return } From 0b4acf31a27b6e442294b13e7f69e499fe05312c Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 26 Jun 2024 11:58:14 +0100 Subject: [PATCH 094/117] Call next Consume* --- .../integrationtest/datareceiver.go | 24 ++++++++++++++----- .../integrationtest/exporter_bench_test.go | 12 ++++++---- .../integrationtest/go.mod | 3 +++ .../integrationtest/go.sum | 6 +++++ 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/datareceiver.go b/exporter/elasticsearchexporter/integrationtest/datareceiver.go index 488b9974bc43..275b925bd31e 100644 --- a/exporter/elasticsearchexporter/integrationtest/datareceiver.go +++ b/exporter/elasticsearchexporter/integrationtest/datareceiver.go @@ -18,6 +18,7 @@ import ( "github.com/elastic/go-docappender/v2/docappendertest" "github.com/gorilla/mux" "github.com/stretchr/testify/require" + "github.com/tidwall/gjson" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/component/componenttest" "go.opentelemetry.io/collector/config/confighttp" @@ -46,7 +47,6 @@ const ( ) type counters struct { - observedDocCount atomic.Int64 observedBulkRequests atomic.Int64 } @@ -237,11 +237,23 @@ func (es *mockESReceiver) Start(ctx context.Context, host component.Host) error if !es.config.DecodeBulkRequests { defer r.Body.Close() s := bufio.NewScanner(r.Body) - var cnt int64 for s.Scan() { - cnt++ + action := gjson.GetBytes(s.Bytes(), "create._index") + if !action.Exists() { + // might be the last newline, skip + continue + } + switch action.Str { + case TestLogsIndex: + _ = es.logsConsumer.ConsumeLogs(context.Background(), emptyLogs) + case TestTracesIndex: + _ = es.tracesConsumer.ConsumeTraces(context.Background(), emptyTrace) + default: + w.WriteHeader(http.StatusBadRequest) + return + } + s.Scan() // skip next line } - es.config.observedDocCount.Add(cnt / 2) // 1 line for action, 1 line for document if s.Err() != nil { w.WriteHeader(http.StatusBadRequest) fmt.Fprintln(w, s.Err()) @@ -250,8 +262,8 @@ func (es *mockESReceiver) Start(ctx context.Context, host component.Host) error fmt.Fprintln(w, "{}") return } - docs, response := docappendertest.DecodeBulkRequest(r) - es.config.observedDocCount.Add(int64(len(docs))) + + _, response := docappendertest.DecodeBulkRequest(r) for _, itemMap := range response.Items { for k, item := range itemMap { var consumeErr error diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index 556185851bc0..9e3b6b41bd60 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -144,7 +144,8 @@ type benchRunnerCfg struct { provider testbed.DataProvider esCfg *elasticsearchexporter.Config - generatedCount atomic.Uint64 + generatedCount atomic.Uint64 + observedDocCount atomic.Int64 *counters } @@ -174,15 +175,18 @@ func prepareBenchmark( cfg.esCfg.NumWorkers = 1 cfg.esCfg.QueueSettings.Enabled = false - tc, err := consumer.NewTraces(func(context.Context, ptrace.Traces) error { + tc, err := consumer.NewTraces(func(_ context.Context, traces ptrace.Traces) error { + cfg.observedDocCount.Add(int64(traces.SpanCount())) return nil }) require.NoError(b, err) - mc, err := consumer.NewMetrics(func(context.Context, pmetric.Metrics) error { + mc, err := consumer.NewMetrics(func(_ context.Context, metrics pmetric.Metrics) error { + cfg.observedDocCount.Add(int64(metrics.DataPointCount())) return nil }) require.NoError(b, err) - lc, err := consumer.NewLogs(func(context.Context, plog.Logs) error { + lc, err := consumer.NewLogs(func(_ context.Context, logs plog.Logs) error { + cfg.observedDocCount.Add(int64(logs.LogRecordCount())) return nil }) require.NoError(b, err) diff --git a/exporter/elasticsearchexporter/integrationtest/go.mod b/exporter/elasticsearchexporter/integrationtest/go.mod index 1714a554e0ce..7d27ae5b9256 100644 --- a/exporter/elasticsearchexporter/integrationtest/go.mod +++ b/exporter/elasticsearchexporter/integrationtest/go.mod @@ -12,6 +12,7 @@ require ( github.com/open-telemetry/opentelemetry-collector-contrib/testbed v0.103.0 github.com/shirou/gopsutil/v4 v4.24.5 github.com/stretchr/testify v1.9.0 + github.com/tidwall/gjson v1.17.1 go.opentelemetry.io/collector/component v0.103.0 go.opentelemetry.io/collector/config/confighttp v0.103.0 go.opentelemetry.io/collector/confmap v0.103.0 @@ -112,6 +113,8 @@ require ( github.com/soheilhy/cmux v0.1.5 // indirect github.com/spf13/cobra v1.8.0 // indirect github.com/spf13/pflag v1.0.5 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.0 // indirect github.com/tilinna/clock v1.1.0 // indirect github.com/tklauser/go-sysconf v0.3.13 // indirect github.com/tklauser/numcpus v0.7.0 // indirect diff --git a/exporter/elasticsearchexporter/integrationtest/go.sum b/exporter/elasticsearchexporter/integrationtest/go.sum index ffe9a0f5c3c1..c8236c73e6e8 100644 --- a/exporter/elasticsearchexporter/integrationtest/go.sum +++ b/exporter/elasticsearchexporter/integrationtest/go.sum @@ -232,6 +232,12 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +github.com/tidwall/gjson v1.17.1 h1:wlYEnwqAHgzmhNUFfw7Xalt2JzQvsMx2Se4PcoFCT/U= +github.com/tidwall/gjson v1.17.1/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tilinna/clock v1.1.0 h1:6IQQQCo6KoBxVudv6gwtY8o4eDfhHo8ojA5dP0MfhSs= github.com/tilinna/clock v1.1.0/go.mod h1:ZsP7BcY7sEEz7ktc0IVy8Us6boDrK8VradlKRUGfOao= github.com/tklauser/go-sysconf v0.3.13 h1:GBUpcahXSpR2xN01jhkNAbTLRk2Yzgggk8IM08lq3r4= From 031c3c832605358c0846b40a0787843ae086c5f9 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 26 Jun 2024 14:52:57 +0100 Subject: [PATCH 095/117] Bench with different parallelisms --- .../integrationtest/exporter_bench_test.go | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index 9e3b6b41bd60..14e5dc461e71 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -37,14 +37,17 @@ func BenchmarkExporter(b *testing.B) { {name: "large_batch", batchSize: 1000}, {name: "xlarge_batch", batchSize: 10000}, } { - b.Run(fmt.Sprintf("%s/%s/%s", eventType, mappingMode, tc.name), func(b *testing.B) { - switch eventType { - case "logs": - benchmarkLogs(b, tc.batchSize, mappingMode) - case "traces": - benchmarkTraces(b, tc.batchSize, mappingMode) - } - }) + for _, parallelism := range []int{1, 100} { + b.Run(fmt.Sprintf("%s/%s/%s/parallelism=%d", eventType, mappingMode, tc.name, parallelism), func(b *testing.B) { + b.SetParallelism(parallelism) + switch eventType { + case "logs": + benchmarkLogs(b, tc.batchSize, mappingMode) + case "traces": + benchmarkTraces(b, tc.batchSize, mappingMode) + } + }) + } } } } @@ -72,7 +75,6 @@ func benchmarkLogs(b *testing.B, batchSize int, mappingMode string) { } i := atomic.Int64{} i.Store(-1) - b.SetParallelism(100) b.StartTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { @@ -117,7 +119,6 @@ func benchmarkTraces(b *testing.B, batchSize int, mappingMode string) { } i := atomic.Int64{} i.Store(-1) - b.SetParallelism(100) b.StartTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { From eae71c007e1dcf52c601a7d38304a1834f6f5203 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 1 Jul 2024 18:00:16 +0100 Subject: [PATCH 096/117] Ignore max_size_items for metrics exporter --- exporter/elasticsearchexporter/factory.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 0c1e4fb53fac..a5750e2e327f 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -143,6 +143,11 @@ func createMetricsExporter( cf := cfg.(*Config) logConfigDeprecationWarnings(cf, set.Logger) + // Workaround to avoid rejections from Elasticsearch + // TSDB does not accept 2 documents with the same timestamp and dimensions + cf.BatcherConfig.MaxSizeConfig.MaxSizeItems = 0 + set.Logger.Warn("batcher.max_size_items is ignored: metrics exporter does not support batch splitting") + exporter, err := newExporter(cf, set, cf.MetricsIndex, cf.MetricsDynamicIndex.Enabled) if err != nil { return nil, fmt.Errorf("cannot configure Elasticsearch exporter: %w", err) @@ -154,8 +159,10 @@ func createMetricsExporter( exporter.pushMetricsData, exporterhelper.WithCapabilities(consumer.Capabilities{MutatesData: true}), exporterhelper.WithStart(exporter.Start), + exporterhelper.WithBatcher(cf.BatcherConfig), exporterhelper.WithShutdown(exporter.Shutdown), exporterhelper.WithQueue(cf.QueueSettings), + exporterhelper.WithTimeout(getTimeoutConfig()), ) } From 0c39b7358aa67bd4fdc45dd6c7aa6e68f176f64e Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 1 Jul 2024 18:18:38 +0100 Subject: [PATCH 097/117] Fix metrics support --- cmd/otelcontribcol/go.mod | 6 +++--- cmd/otelcontribcol/go.sum | 3 +++ exporter/elasticsearchexporter/exporter.go | 18 +++++++++++++----- exporter/elasticsearchexporter/go.mod | 1 + exporter/elasticsearchexporter/go.sum | 2 ++ .../integrationtest/go.mod | 2 +- .../integrationtest/go.sum | 4 ++-- 7 files changed, 25 insertions(+), 11 deletions(-) diff --git a/cmd/otelcontribcol/go.mod b/cmd/otelcontribcol/go.mod index ecf5abc1a00f..f16cdf2047f5 100644 --- a/cmd/otelcontribcol/go.mod +++ b/cmd/otelcontribcol/go.mod @@ -794,15 +794,15 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect golang.org/x/crypto v0.24.0 // indirect - golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect - golang.org/x/mod v0.17.0 // indirect + golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 // indirect + golang.org/x/mod v0.18.0 // indirect golang.org/x/net v0.26.0 // indirect golang.org/x/oauth2 v0.21.0 // indirect golang.org/x/sync v0.7.0 // indirect golang.org/x/term v0.21.0 // indirect golang.org/x/text v0.16.0 // indirect golang.org/x/time v0.5.0 // indirect - golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect + golang.org/x/tools v0.22.0 // indirect golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect gonum.org/v1/gonum v0.15.0 // indirect google.golang.org/api v0.185.0 // indirect diff --git a/cmd/otelcontribcol/go.sum b/cmd/otelcontribcol/go.sum index a991e5cfe1fe..c3cb2b7f3af0 100644 --- a/cmd/otelcontribcol/go.sum +++ b/cmd/otelcontribcol/go.sum @@ -2535,6 +2535,7 @@ golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMk golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM= golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= +golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI= golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= @@ -2580,6 +2581,7 @@ golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.9.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -2958,6 +2960,7 @@ golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s= golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/exporter/elasticsearchexporter/exporter.go b/exporter/elasticsearchexporter/exporter.go index 51cb22c9434b..6c2083adb10c 100644 --- a/exporter/elasticsearchexporter/exporter.go +++ b/exporter/elasticsearchexporter/exporter.go @@ -17,6 +17,7 @@ import ( "go.opentelemetry.io/collector/pdata/plog" "go.opentelemetry.io/collector/pdata/pmetric" "go.opentelemetry.io/collector/pdata/ptrace" + "golang.org/x/exp/slices" "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/objmodel" ) @@ -150,6 +151,9 @@ func (e *elasticsearchExporter) pushMetricsData( ctx context.Context, metrics pmetric.Metrics, ) error { + // Ideally the slice will be preallocated once and for all + // but the actual length is uncertain due to grouping + var items []esBulkIndexerItem var errs []error resourceMetrics := metrics.ResourceMetrics() @@ -192,6 +196,8 @@ func (e *elasticsearchExporter) pushMetricsData( } } + items = slices.Grow(items, len(resourceDocs)) + for fIndex, docs := range resourceDocs { for _, doc := range docs { var ( @@ -204,16 +210,18 @@ func (e *elasticsearchExporter) pushMetricsData( continue } - if err := pushDocuments(ctx, fIndex, docBytes, e.bulkIndexer); err != nil { - if cerr := ctx.Err(); cerr != nil { - return cerr - } - errs = append(errs, err) + item := esBulkIndexerItem{ + Index: fIndex, + Body: bytes.NewReader(docBytes), } + items = append(items, item) } } } + if err := e.bulkIndexer.AddBatchAndFlush(ctx, items); err != nil { + errs = append(errs, err) + } return errors.Join(errs...) } diff --git a/exporter/elasticsearchexporter/go.mod b/exporter/elasticsearchexporter/go.mod index 700c6d2d1d01..f2ef505ae40d 100644 --- a/exporter/elasticsearchexporter/go.mod +++ b/exporter/elasticsearchexporter/go.mod @@ -26,6 +26,7 @@ require ( go.opentelemetry.io/otel/trace v1.27.0 go.uber.org/goleak v1.3.0 go.uber.org/zap v1.27.0 + golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 golang.org/x/sync v0.7.0 ) diff --git a/exporter/elasticsearchexporter/go.sum b/exporter/elasticsearchexporter/go.sum index 29d3fbf8760a..96f2469f5ca3 100644 --- a/exporter/elasticsearchexporter/go.sum +++ b/exporter/elasticsearchexporter/go.sum @@ -179,6 +179,8 @@ go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY= +golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= diff --git a/exporter/elasticsearchexporter/integrationtest/go.mod b/exporter/elasticsearchexporter/integrationtest/go.mod index 2188c792d0a7..8f7667555f4c 100644 --- a/exporter/elasticsearchexporter/integrationtest/go.mod +++ b/exporter/elasticsearchexporter/integrationtest/go.mod @@ -172,7 +172,7 @@ require ( go.opentelemetry.io/otel/trace v1.27.0 // indirect go.opentelemetry.io/proto/otlp v1.2.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect + golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 // indirect golang.org/x/net v0.26.0 // indirect golang.org/x/sys v0.21.0 // indirect golang.org/x/text v0.16.0 // indirect diff --git a/exporter/elasticsearchexporter/integrationtest/go.sum b/exporter/elasticsearchexporter/integrationtest/go.sum index a6e12b03311e..f431a3f7bc4e 100644 --- a/exporter/elasticsearchexporter/integrationtest/go.sum +++ b/exporter/elasticsearchexporter/integrationtest/go.sum @@ -390,8 +390,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM= -golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= +golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY= +golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= From a0c4c064409f302a667bb511f52b485e630040b7 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 1 Jul 2024 18:24:34 +0100 Subject: [PATCH 098/117] Log at info --- exporter/elasticsearchexporter/factory.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index a5750e2e327f..dc418f73e996 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -146,7 +146,7 @@ func createMetricsExporter( // Workaround to avoid rejections from Elasticsearch // TSDB does not accept 2 documents with the same timestamp and dimensions cf.BatcherConfig.MaxSizeConfig.MaxSizeItems = 0 - set.Logger.Warn("batcher.max_size_items is ignored: metrics exporter does not support batch splitting") + set.Logger.Info("batcher.max_size_items is ignored: metrics exporter does not support batch splitting") exporter, err := newExporter(cf, set, cf.MetricsIndex, cf.MetricsDynamicIndex.Enabled) if err != nil { From 887b06db3213f0ec6ca7d8c5d0cbec81f5bc22e4 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 1 Jul 2024 18:43:57 +0100 Subject: [PATCH 099/117] Fix tests --- exporter/elasticsearchexporter/config_test.go | 10 +++++++--- exporter/elasticsearchexporter/testdata/config.yaml | 7 +++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/exporter/elasticsearchexporter/config_test.go b/exporter/elasticsearchexporter/config_test.go index c8ab66d932c3..aff847dcc0b8 100644 --- a/exporter/elasticsearchexporter/config_test.go +++ b/exporter/elasticsearchexporter/config_test.go @@ -156,8 +156,11 @@ func TestConfig(t *testing.T) { Discovery: DiscoverySettings{ OnStart: true, }, - Flush: FlushSettings{ - Bytes: 10485760, + BatcherConfig: exporterbatcher.Config{ + Enabled: true, + FlushTimeout: 5 * time.Second, + MinSizeConfig: exporterbatcher.MinSizeConfig{MinSizeItems: 100}, + MaxSizeConfig: exporterbatcher.MaxSizeConfig{MaxSizeItems: 200}, }, Retry: RetrySettings{ Enabled: true, @@ -176,6 +179,7 @@ func TestConfig(t *testing.T) { PrefixSeparator: "-", DateFormat: "%Y.%m.%d", }, + NumWorkers: 1, }, }, { @@ -184,7 +188,7 @@ func TestConfig(t *testing.T) { expected: &Config{ QueueSettings: exporterhelper.QueueSettings{ Enabled: true, - NumConsumers: exporterhelper.NewDefaultQueueSettings().NumConsumers, + NumConsumers: 100, QueueSize: exporterhelper.NewDefaultQueueSettings().QueueSize, }, Endpoints: []string{"http://localhost:9200"}, diff --git a/exporter/elasticsearchexporter/testdata/config.yaml b/exporter/elasticsearchexporter/testdata/config.yaml index 57708cd77723..ccbd56d47681 100644 --- a/exporter/elasticsearchexporter/testdata/config.yaml +++ b/exporter/elasticsearchexporter/testdata/config.yaml @@ -38,8 +38,10 @@ elasticsearch/metric: api_key: AvFsEiPs== discover: on_start: true - flush: - bytes: 10485760 + batcher: + min_size_items: 100 + max_size_items: 200 + flush_timeout: 5s retry: max_requests: 5 retry_on_status: @@ -47,6 +49,7 @@ elasticsearch/metric: - 500 sending_queue: enabled: true + num_workers: 1 elasticsearch/log: tls: insecure: false From 18ce06ddad336568056536caf39b3076fb3600e4 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 1 Jul 2024 21:36:15 +0100 Subject: [PATCH 100/117] Add BenchmarkExporterFlushItems --- .../integrationtest/exporter_bench_test.go | 144 ++++++++++++++---- 1 file changed, 114 insertions(+), 30 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index 14e5dc461e71..5c98bc15605d 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -6,6 +6,7 @@ package integrationtest import ( "context" "fmt" + "math" "sync/atomic" "testing" "time" @@ -25,7 +26,96 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/testbed/testbed" ) -func BenchmarkExporter(b *testing.B) { +// BenchmarkExporterFlushItems benchmarks exporter flush triggered by flush batch size settings, e.g. min_size_items. +func BenchmarkExporterFlushItems(b *testing.B) { + updateESCfg := func(esCfg *elasticsearchexporter.Config) { + esCfg.BatcherConfig.MinSizeItems = 100 // has to be smaller than the smallest batch size, otherwise it will block + esCfg.BatcherConfig.MaxSizeItems = 500 + esCfg.BatcherConfig.FlushTimeout = time.Hour + } + for _, eventType := range []string{"logs", "traces"} { + for _, mappingMode := range []string{"none", "ecs", "raw"} { + for _, tc := range []struct { + name string + batchSize int + }{ + {name: "medium_batch", batchSize: 100}, + {name: "large_batch", batchSize: 1000}, + {name: "xlarge_batch", batchSize: 10000}, + } { + b.Run(fmt.Sprintf("%s/%s/%s", eventType, mappingMode, tc.name), func(b *testing.B) { + switch eventType { + case "logs": + benchmarkLogs(b, tc.batchSize, mappingMode, updateESCfg) + case "traces": + benchmarkTraces(b, tc.batchSize, mappingMode, updateESCfg) + } + }) + } + } + } +} + +func benchmarkLogs(b *testing.B, batchSize int, mappingMode string, updateESCfg func(*elasticsearchexporter.Config)) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + exporterSettings := exportertest.NewNopSettings() + exporterSettings.TelemetrySettings.Logger = zaptest.NewLogger(b, zaptest.Level(zap.WarnLevel)) + runnerCfg := prepareBenchmark(b, batchSize, mappingMode) + updateESCfg(runnerCfg.esCfg) + exporter, err := runnerCfg.factory.CreateLogsExporter( + ctx, exporterSettings, runnerCfg.esCfg, + ) + require.NoError(b, err) + require.NoError(b, exporter.Start(ctx, componenttest.NewNopHost())) + + b.ReportAllocs() + b.ResetTimer() + b.StopTimer() + for i := 0; i < b.N; i++ { + logs, _ := runnerCfg.provider.GenerateLogs() + b.StartTimer() + require.NoError(b, exporter.ConsumeLogs(ctx, logs)) + b.StopTimer() + } + require.NoError(b, exporter.Shutdown(ctx)) + reportMetrics(b, runnerCfg) +} + +func benchmarkTraces(b *testing.B, batchSize int, mappingMode string, updateESCfg func(*elasticsearchexporter.Config)) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + exporterSettings := exportertest.NewNopSettings() + exporterSettings.TelemetrySettings.Logger = zaptest.NewLogger(b, zaptest.Level(zap.WarnLevel)) + runnerCfg := prepareBenchmark(b, batchSize, mappingMode) + updateESCfg(runnerCfg.esCfg) + exporter, err := runnerCfg.factory.CreateTracesExporter( + ctx, exporterSettings, runnerCfg.esCfg, + ) + require.NoError(b, err) + require.NoError(b, exporter.Start(ctx, componenttest.NewNopHost())) + + b.ReportAllocs() + b.ResetTimer() + b.StopTimer() + for i := 0; i < b.N; i++ { + traces, _ := runnerCfg.provider.GenerateTraces() + b.StartTimer() + require.NoError(b, exporter.ConsumeTraces(ctx, traces)) + b.StopTimer() + } + require.NoError(b, exporter.Shutdown(ctx)) + reportMetrics(b, runnerCfg) +} + +// BenchmarkExporterFlushTimeout benchmarks exporter flush triggered by "flush timeout" aka flush interval. +func BenchmarkExporterFlushTimeout(b *testing.B) { + updateESCfg := func(esCfg *elasticsearchexporter.Config) { + esCfg.BatcherConfig.MinSizeItems = math.MaxInt + esCfg.BatcherConfig.FlushTimeout = 10 * time.Millisecond + } for _, eventType := range []string{"logs", "traces"} { for _, mappingMode := range []string{"none", "ecs", "raw"} { for _, tc := range []struct { @@ -42,9 +132,9 @@ func BenchmarkExporter(b *testing.B) { b.SetParallelism(parallelism) switch eventType { case "logs": - benchmarkLogs(b, tc.batchSize, mappingMode) + benchmarkLogsParallel(b, tc.batchSize, mappingMode, updateESCfg) case "traces": - benchmarkTraces(b, tc.batchSize, mappingMode) + benchmarkTracesParallel(b, tc.batchSize, mappingMode, updateESCfg) } }) } @@ -53,13 +143,14 @@ func BenchmarkExporter(b *testing.B) { } } -func benchmarkLogs(b *testing.B, batchSize int, mappingMode string) { +func benchmarkLogsParallel(b *testing.B, batchSize int, mappingMode string, updateESCfg func(*elasticsearchexporter.Config)) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() exporterSettings := exportertest.NewNopSettings() exporterSettings.TelemetrySettings.Logger = zaptest.NewLogger(b, zaptest.Level(zap.WarnLevel)) runnerCfg := prepareBenchmark(b, batchSize, mappingMode) + updateESCfg(runnerCfg.esCfg) exporter, err := runnerCfg.factory.CreateLogsExporter( ctx, exporterSettings, runnerCfg.esCfg, ) @@ -82,27 +173,17 @@ func benchmarkLogs(b *testing.B, batchSize int, mappingMode string) { } }) require.NoError(b, exporter.Shutdown(ctx)) - b.ReportMetric( - float64(runnerCfg.generatedCount.Load())/b.Elapsed().Seconds(), - "events/s", - ) - b.ReportMetric( - float64(runnerCfg.observedDocCount.Load())/b.Elapsed().Seconds(), - "docs/s", - ) - b.ReportMetric( - float64(runnerCfg.observedBulkRequests.Load())/b.Elapsed().Seconds(), - "bulkReqs/s", - ) + reportMetrics(b, runnerCfg) } -func benchmarkTraces(b *testing.B, batchSize int, mappingMode string) { +func benchmarkTracesParallel(b *testing.B, batchSize int, mappingMode string, updateESCfg func(*elasticsearchexporter.Config)) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() exporterSettings := exportertest.NewNopSettings() exporterSettings.TelemetrySettings.Logger = zaptest.NewLogger(b, zaptest.Level(zap.WarnLevel)) runnerCfg := prepareBenchmark(b, batchSize, mappingMode) + updateESCfg(runnerCfg.esCfg) exporter, err := runnerCfg.factory.CreateTracesExporter( ctx, exporterSettings, runnerCfg.esCfg, ) @@ -126,18 +207,7 @@ func benchmarkTraces(b *testing.B, batchSize int, mappingMode string) { } }) require.NoError(b, exporter.Shutdown(ctx)) - b.ReportMetric( - float64(runnerCfg.generatedCount.Load())/b.Elapsed().Seconds(), - "events/s", - ) - b.ReportMetric( - float64(runnerCfg.observedDocCount.Load())/b.Elapsed().Seconds(), - "docs/s", - ) - b.ReportMetric( - float64(runnerCfg.observedBulkRequests.Load())/b.Elapsed().Seconds(), - "bulkReqs/s", - ) + reportMetrics(b, runnerCfg) } type benchRunnerCfg struct { @@ -172,7 +242,6 @@ func prepareBenchmark( cfg.esCfg.Endpoints = []string{receiver.endpoint} cfg.esCfg.LogsIndex = TestLogsIndex cfg.esCfg.TracesIndex = TestTracesIndex - cfg.esCfg.Flush.Interval = 10 * time.Millisecond cfg.esCfg.NumWorkers = 1 cfg.esCfg.QueueSettings.Enabled = false @@ -197,3 +266,18 @@ func prepareBenchmark( return cfg } + +func reportMetrics(b *testing.B, runnerCfg *benchRunnerCfg) { + b.ReportMetric( + float64(runnerCfg.generatedCount.Load())/b.Elapsed().Seconds(), + "events/s", + ) + b.ReportMetric( + float64(runnerCfg.observedDocCount.Load())/b.Elapsed().Seconds(), + "docs/s", + ) + b.ReportMetric( + float64(runnerCfg.observedBulkRequests.Load())/b.Elapsed().Seconds(), + "bulkReqs/s", + ) +} From 5652cbf1fa9864b1d635a08ced0f511f35cd35a5 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 2 Jul 2024 09:40:25 +0100 Subject: [PATCH 101/117] Use slices instead of x/exp/slices --- cmd/otelcontribcol/go.sum | 9 +++------ exporter/elasticsearchexporter/exporter.go | 2 +- exporter/elasticsearchexporter/go.mod | 1 - exporter/elasticsearchexporter/go.sum | 2 -- 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/cmd/otelcontribcol/go.sum b/cmd/otelcontribcol/go.sum index c3cb2b7f3af0..b321d1760537 100644 --- a/cmd/otelcontribcol/go.sum +++ b/cmd/otelcontribcol/go.sum @@ -2533,8 +2533,7 @@ golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= -golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM= -golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= +golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY= golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI= golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= @@ -2579,8 +2578,7 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91 golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.9.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= -golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -2958,8 +2956,7 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc golang.org/x/tools v0.3.0/go.mod h1:/rWhSS2+zyEVwoJf8YAX6L2f0ntZ7Kn/mGgAWcipA5k= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/exporter/elasticsearchexporter/exporter.go b/exporter/elasticsearchexporter/exporter.go index 6c2083adb10c..6a193e85347c 100644 --- a/exporter/elasticsearchexporter/exporter.go +++ b/exporter/elasticsearchexporter/exporter.go @@ -9,6 +9,7 @@ import ( "errors" "fmt" "runtime" + "slices" "time" "go.opentelemetry.io/collector/component" @@ -17,7 +18,6 @@ import ( "go.opentelemetry.io/collector/pdata/plog" "go.opentelemetry.io/collector/pdata/pmetric" "go.opentelemetry.io/collector/pdata/ptrace" - "golang.org/x/exp/slices" "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/objmodel" ) diff --git a/exporter/elasticsearchexporter/go.mod b/exporter/elasticsearchexporter/go.mod index f2ef505ae40d..700c6d2d1d01 100644 --- a/exporter/elasticsearchexporter/go.mod +++ b/exporter/elasticsearchexporter/go.mod @@ -26,7 +26,6 @@ require ( go.opentelemetry.io/otel/trace v1.27.0 go.uber.org/goleak v1.3.0 go.uber.org/zap v1.27.0 - golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 golang.org/x/sync v0.7.0 ) diff --git a/exporter/elasticsearchexporter/go.sum b/exporter/elasticsearchexporter/go.sum index 96f2469f5ca3..29d3fbf8760a 100644 --- a/exporter/elasticsearchexporter/go.sum +++ b/exporter/elasticsearchexporter/go.sum @@ -179,8 +179,6 @@ go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY= -golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= From 6140933723c926c6515d342054b8e7f49aa26d87 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 2 Jul 2024 10:16:35 +0100 Subject: [PATCH 102/117] Update exporter/elasticsearchexporter/factory.go Co-authored-by: Andrew Wilkins --- exporter/elasticsearchexporter/factory.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index dc418f73e996..e12d1bd1a82d 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -143,8 +143,12 @@ func createMetricsExporter( cf := cfg.(*Config) logConfigDeprecationWarnings(cf, set.Logger) - // Workaround to avoid rejections from Elasticsearch - // TSDB does not accept 2 documents with the same timestamp and dimensions + // Workaround to avoid rejections from Elasticsearch. + // TSDB does not accept 2 documents with the same timestamp and dimensions. + // + // Setting MaxSizeItems ensures that the batcher will not split a set of + // metrics into multiple batches, potentially sending two metric data points + // with the same timestamp and dimensions as separate documents. cf.BatcherConfig.MaxSizeConfig.MaxSizeItems = 0 set.Logger.Info("batcher.max_size_items is ignored: metrics exporter does not support batch splitting") From 5b25bba19d4a392804115f7b798c1c416b19d14e Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 2 Jul 2024 10:57:01 +0100 Subject: [PATCH 103/117] Use warn --- exporter/elasticsearchexporter/factory.go | 2 +- exporter/elasticsearchexporter/factory_test.go | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index dc418f73e996..a5750e2e327f 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -146,7 +146,7 @@ func createMetricsExporter( // Workaround to avoid rejections from Elasticsearch // TSDB does not accept 2 documents with the same timestamp and dimensions cf.BatcherConfig.MaxSizeConfig.MaxSizeItems = 0 - set.Logger.Info("batcher.max_size_items is ignored: metrics exporter does not support batch splitting") + set.Logger.Warn("batcher.max_size_items is ignored: metrics exporter does not support batch splitting") exporter, err := newExporter(cf, set, cf.MetricsIndex, cf.MetricsDynamicIndex.Enabled) if err != nil { diff --git a/exporter/elasticsearchexporter/factory_test.go b/exporter/elasticsearchexporter/factory_test.go index edce90da056d..9b4923f33587 100644 --- a/exporter/elasticsearchexporter/factory_test.go +++ b/exporter/elasticsearchexporter/factory_test.go @@ -126,10 +126,13 @@ func TestFactory_DedupDeprecated(t *testing.T) { require.NoError(t, metricsExporter.Shutdown(context.Background())) records := logObserver.AllUntimed() - assert.Len(t, records, 3) - assert.Equal(t, "dedup has been deprecated, and will always be enabled in future", records[0].Message) - assert.Equal(t, "dedup has been deprecated, and will always be enabled in future", records[1].Message) - assert.Equal(t, "dedup has been deprecated, and will always be enabled in future", records[2].Message) + var cnt int + for _, record := range records { + if record.Message == "dedup has been deprecated, and will always be enabled in future" { + cnt++ + } + } + assert.Equal(t, 3, cnt) } func TestFactory_DedotDeprecated(t *testing.T) { @@ -165,8 +168,11 @@ func TestFactory_DedotDeprecated(t *testing.T) { } records := logObserver.AllUntimed() - assert.Len(t, records, 6) + var cnt int for _, record := range records { - assert.Equal(t, "dedot has been deprecated: in the future, dedotting will always be performed in ECS mode only", record.Message) + if record.Message == "dedot has been deprecated: in the future, dedotting will always be performed in ECS mode only" { + cnt++ + } } + assert.Equal(t, 6, cnt) } From ac900bd9ca626dbdc30a3773175c8e8b4bea6229 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 2 Jul 2024 15:05:23 +0100 Subject: [PATCH 104/117] Remove flush_timeout-based bench --- .../integrationtest/exporter_bench_test.go | 101 ------------------ 1 file changed, 101 deletions(-) diff --git a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go index 5c98bc15605d..8008ea72691c 100644 --- a/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go +++ b/exporter/elasticsearchexporter/integrationtest/exporter_bench_test.go @@ -6,7 +6,6 @@ package integrationtest import ( "context" "fmt" - "math" "sync/atomic" "testing" "time" @@ -110,106 +109,6 @@ func benchmarkTraces(b *testing.B, batchSize int, mappingMode string, updateESCf reportMetrics(b, runnerCfg) } -// BenchmarkExporterFlushTimeout benchmarks exporter flush triggered by "flush timeout" aka flush interval. -func BenchmarkExporterFlushTimeout(b *testing.B) { - updateESCfg := func(esCfg *elasticsearchexporter.Config) { - esCfg.BatcherConfig.MinSizeItems = math.MaxInt - esCfg.BatcherConfig.FlushTimeout = 10 * time.Millisecond - } - for _, eventType := range []string{"logs", "traces"} { - for _, mappingMode := range []string{"none", "ecs", "raw"} { - for _, tc := range []struct { - name string - batchSize int - }{ - {name: "small_batch", batchSize: 10}, - {name: "medium_batch", batchSize: 100}, - {name: "large_batch", batchSize: 1000}, - {name: "xlarge_batch", batchSize: 10000}, - } { - for _, parallelism := range []int{1, 100} { - b.Run(fmt.Sprintf("%s/%s/%s/parallelism=%d", eventType, mappingMode, tc.name, parallelism), func(b *testing.B) { - b.SetParallelism(parallelism) - switch eventType { - case "logs": - benchmarkLogsParallel(b, tc.batchSize, mappingMode, updateESCfg) - case "traces": - benchmarkTracesParallel(b, tc.batchSize, mappingMode, updateESCfg) - } - }) - } - } - } - } -} - -func benchmarkLogsParallel(b *testing.B, batchSize int, mappingMode string, updateESCfg func(*elasticsearchexporter.Config)) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - exporterSettings := exportertest.NewNopSettings() - exporterSettings.TelemetrySettings.Logger = zaptest.NewLogger(b, zaptest.Level(zap.WarnLevel)) - runnerCfg := prepareBenchmark(b, batchSize, mappingMode) - updateESCfg(runnerCfg.esCfg) - exporter, err := runnerCfg.factory.CreateLogsExporter( - ctx, exporterSettings, runnerCfg.esCfg, - ) - require.NoError(b, err) - require.NoError(b, exporter.Start(ctx, componenttest.NewNopHost())) - - b.ReportAllocs() - b.ResetTimer() - b.StopTimer() - logsArr := make([]plog.Logs, b.N) - for i := 0; i < b.N; i++ { - logsArr[i], _ = runnerCfg.provider.GenerateLogs() - } - i := atomic.Int64{} - i.Store(-1) - b.StartTimer() - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - require.NoError(b, exporter.ConsumeLogs(ctx, logsArr[i.Add(1)])) - } - }) - require.NoError(b, exporter.Shutdown(ctx)) - reportMetrics(b, runnerCfg) -} - -func benchmarkTracesParallel(b *testing.B, batchSize int, mappingMode string, updateESCfg func(*elasticsearchexporter.Config)) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - exporterSettings := exportertest.NewNopSettings() - exporterSettings.TelemetrySettings.Logger = zaptest.NewLogger(b, zaptest.Level(zap.WarnLevel)) - runnerCfg := prepareBenchmark(b, batchSize, mappingMode) - updateESCfg(runnerCfg.esCfg) - exporter, err := runnerCfg.factory.CreateTracesExporter( - ctx, exporterSettings, runnerCfg.esCfg, - ) - require.NoError(b, err) - require.NoError(b, exporter.Start(ctx, componenttest.NewNopHost())) - - b.ReportAllocs() - b.ResetTimer() - b.StopTimer() - - tracesArr := make([]ptrace.Traces, b.N) - for i := 0; i < b.N; i++ { - tracesArr[i], _ = runnerCfg.provider.GenerateTraces() - } - i := atomic.Int64{} - i.Store(-1) - b.StartTimer() - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - require.NoError(b, exporter.ConsumeTraces(ctx, tracesArr[i.Add(1)])) - } - }) - require.NoError(b, exporter.Shutdown(ctx)) - reportMetrics(b, runnerCfg) -} - type benchRunnerCfg struct { factory exporter.Factory provider testbed.DataProvider From fd56e5fbed49ff1bab0029acc978f5f1807eac39 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 2 Jul 2024 15:38:35 +0100 Subject: [PATCH 105/117] Make linter happy --- exporter/elasticsearchexporter/elasticsearch_bulk.go | 3 +-- exporter/elasticsearchexporter/exporter_test.go | 2 +- exporter/elasticsearchexporter/factory.go | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 2842ce050e6f..92706d2f5206 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -148,7 +148,7 @@ func createElasticsearchBackoffFunc(config *RetrySettings) func(int) time.Durati } } -func newBulkIndexer(logger *zap.Logger, client *esClientCurrent, config *Config) (*esBulkIndexerCurrent, error) { +func newBulkIndexer(logger *zap.Logger, client *esClientCurrent, config *Config) (*esBulkIndexerCurrent, error) { //nolint:unparam return &bulkIndexerManager{ closeCh: make(chan struct{}), stats: bulkIndexerStats{}, @@ -171,7 +171,6 @@ type bulkIndexerManager struct { config *Config wg *sync.WaitGroup sem *semaphore.Weighted - pool *sync.Pool client *esClientCurrent } diff --git a/exporter/elasticsearchexporter/exporter_test.go b/exporter/elasticsearchexporter/exporter_test.go index ebebdca6b855..26bacf977817 100644 --- a/exporter/elasticsearchexporter/exporter_test.go +++ b/exporter/elasticsearchexporter/exporter_test.go @@ -868,7 +868,7 @@ func newUnstartedTestLogsExporter(t *testing.T, url string, fns ...func(*Config) return exp } -func sendLogRecords(t *testing.T, exporter exporter.Logs, records ...plog.LogRecord) error { +func sendLogRecords(_ *testing.T, exporter exporter.Logs, records ...plog.LogRecord) error { logs := plog.NewLogs() resourceLogs := logs.ResourceLogs().AppendEmpty() scopeLogs := resourceLogs.ScopeLogs().AppendEmpty() diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index a528a2edf853..4bd661b24f14 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -210,7 +210,7 @@ func getTimeoutConfig() exporterhelper.TimeoutSettings { // handleDeprecations handles deprecated config options. // If possible, translate deprecated config options to new config options // Otherwise, return an error so that the user is aware of an unsupported option. -func handleDeprecations(cf *Config, logger *zap.Logger) error { +func handleDeprecations(cf *Config, logger *zap.Logger) error { //nolint:unparam if cf.Index != "" { logger.Warn(`"index" option is deprecated and replaced with "logs_index" and "traces_index". Setting "logs_index" to the value of "index".`, zap.String("value", cf.Index)) cf.LogsIndex = cf.Index From 96ce9d67ed74427023f2128053d78f370daadae2 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 2 Jul 2024 16:53:22 +0100 Subject: [PATCH 106/117] Fix integration test --- .../elasticsearchexporter/integrationtest/datareceiver.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/exporter/elasticsearchexporter/integrationtest/datareceiver.go b/exporter/elasticsearchexporter/integrationtest/datareceiver.go index 1497633a7a3e..81bfcf7079d5 100644 --- a/exporter/elasticsearchexporter/integrationtest/datareceiver.go +++ b/exporter/elasticsearchexporter/integrationtest/datareceiver.go @@ -121,11 +121,18 @@ func (es *esDataReceiver) GenConfigYAMLStr() string { endpoints: [%s] logs_index: %s traces_index: %s + batcher: + flush_timeout: 1s sending_queue: enabled: true + storage: file_storage/elasticsearchexporter + num_consumers: 100 + queue_size: 100000 retry: enabled: true max_requests: 10000 + initial_interval: 100ms + max_interval: 1s ` return fmt.Sprintf(cfgFormat, es.endpoint, TestLogsIndex, TestTracesIndex) } From 7c24c27eb1b3c415c3f5d692ff7e2d13b6eae466 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 2 Jul 2024 17:29:59 +0100 Subject: [PATCH 107/117] Revert otelcontribcol --- cmd/otelcontribcol/go.mod | 6 +++--- cmd/otelcontribcol/go.sum | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cmd/otelcontribcol/go.mod b/cmd/otelcontribcol/go.mod index ec07a2c76f8e..65caae345473 100644 --- a/cmd/otelcontribcol/go.mod +++ b/cmd/otelcontribcol/go.mod @@ -794,15 +794,15 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect golang.org/x/crypto v0.24.0 // indirect - golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 // indirect - golang.org/x/mod v0.18.0 // indirect + golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect + golang.org/x/mod v0.17.0 // indirect golang.org/x/net v0.26.0 // indirect golang.org/x/oauth2 v0.21.0 // indirect golang.org/x/sync v0.7.0 // indirect golang.org/x/term v0.21.0 // indirect golang.org/x/text v0.16.0 // indirect golang.org/x/time v0.5.0 // indirect - golang.org/x/tools v0.22.0 // indirect + golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect gonum.org/v1/gonum v0.15.0 // indirect google.golang.org/api v0.185.0 // indirect diff --git a/cmd/otelcontribcol/go.sum b/cmd/otelcontribcol/go.sum index cd9c2b561f88..c3211d47f005 100644 --- a/cmd/otelcontribcol/go.sum +++ b/cmd/otelcontribcol/go.sum @@ -2537,8 +2537,8 @@ golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= -golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY= -golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= @@ -2582,8 +2582,8 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91 golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.9.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= -golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -2960,8 +2960,8 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc golang.org/x/tools v0.3.0/go.mod h1:/rWhSS2+zyEVwoJf8YAX6L2f0ntZ7Kn/mGgAWcipA5k= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s= -golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= -golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= From fb6fa029ff12dc92e14b99c756f3d35ac34c6361 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 3 Jul 2024 19:51:40 +0100 Subject: [PATCH 108/117] Make a copy of batcherConfig --- exporter/elasticsearchexporter/factory.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 1e20296a6468..33c0ab121a4b 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -153,7 +153,8 @@ func createMetricsExporter( // Setting MaxSizeItems ensures that the batcher will not split a set of // metrics into multiple batches, potentially sending two metric data points // with the same timestamp and dimensions as separate documents. - cf.BatcherConfig.MaxSizeConfig.MaxSizeItems = 0 + batcherCfg := cf.BatcherConfig + batcherCfg.MaxSizeConfig.MaxSizeItems = 0 set.Logger.Warn("batcher.max_size_items is ignored: metrics exporter does not support batch splitting") exporter, err := newExporter(cf, set, cf.MetricsIndex, cf.MetricsDynamicIndex.Enabled) @@ -167,7 +168,7 @@ func createMetricsExporter( exporter.pushMetricsData, exporterhelper.WithCapabilities(consumer.Capabilities{MutatesData: true}), exporterhelper.WithStart(exporter.Start), - exporterhelper.WithBatcher(cf.BatcherConfig), + exporterhelper.WithBatcher(batcherCfg), exporterhelper.WithShutdown(exporter.Shutdown), exporterhelper.WithQueue(cf.QueueSettings), exporterhelper.WithTimeout(getTimeoutConfig()), From b860dc0646d422dda04d3601421d6a9eaac82e36 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 3 Jul 2024 19:53:59 +0100 Subject: [PATCH 109/117] Add handleDeprecations to metrics --- exporter/elasticsearchexporter/factory.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 33c0ab121a4b..02e8025bee9a 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -145,6 +145,10 @@ func createMetricsExporter( cfg component.Config, ) (exporter.Metrics, error) { cf := cfg.(*Config) + + if err := handleDeprecations(cf, set.Logger); err != nil { + return nil, err + } logConfigDeprecationWarnings(cf, set.Logger) // Workaround to avoid rejections from Elasticsearch. From 6a5a2e3c0e139a320c07d895b5a33027b039016a Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 10 Jul 2024 13:45:15 +0100 Subject: [PATCH 110/117] Add link to batcher settings and mention experimental --- exporter/elasticsearchexporter/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index f03f91eeffea..602c9b412652 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -86,7 +86,7 @@ When persistent queue is used, there should be no event loss even on collector c ### Batching -The Elasticsearch exporter supports the common `batcher` settings. +The Elasticsearch exporter supports the [common `batcher` settings](https://github.com/open-telemetry/opentelemetry-collector/blob/main/exporter/exporterbatcher/config.go). The `batcher` config is experimental and may change without notice. - `enabled` (default=true): Enable batching of requests into a single bulk request. - `min_size_items` (default=5000): Minimum number of log records / spans in the buffer to trigger a flush immediately. From 50e9353db1c83ce222988ee1a9c4be8c7a0007fb Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 10 Jul 2024 13:48:28 +0100 Subject: [PATCH 111/117] sending_queue.num_consumers vs num_workers --- exporter/elasticsearchexporter/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index 602c9b412652..2c960fb745ee 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -80,10 +80,10 @@ All other defaults are as defined by [confighttp]. The Elasticsearch exporter supports the common [`sending_queue` settings][exporterhelper]. The sending queue is enabled by default. -Default `num_consumers` is `100`. - When persistent queue is used, there should be no event loss even on collector crashes. +`num_consumers` (default=100) controls the number of concurrent requests being fetched from the queue to the batcher, or directly to bulk indexer if batcher is disabled. However, the actual number of concurrent bulk requests is controlled by `num_workers`. + ### Batching The Elasticsearch exporter supports the [common `batcher` settings](https://github.com/open-telemetry/opentelemetry-collector/blob/main/exporter/exporterbatcher/config.go). The `batcher` config is experimental and may change without notice. From e87e797845ef5151403b151b55660f96c7f6abdd Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 10 Jul 2024 13:52:43 +0100 Subject: [PATCH 112/117] Link to persistent queue --- exporter/elasticsearchexporter/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index 2c960fb745ee..a2387510062d 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -80,7 +80,7 @@ All other defaults are as defined by [confighttp]. The Elasticsearch exporter supports the common [`sending_queue` settings][exporterhelper]. The sending queue is enabled by default. -When persistent queue is used, there should be no event loss even on collector crashes. +When [persistent queue](https://github.com/open-telemetry/opentelemetry-collector/blob/main/exporter/exporterhelper/README.md#persistent-queue) is used, there should be no event loss even on collector crashes. `num_consumers` (default=100) controls the number of concurrent requests being fetched from the queue to the batcher, or directly to bulk indexer if batcher is disabled. However, the actual number of concurrent bulk requests is controlled by `num_workers`. From 38c01eb9c8c8c1312937b11f69bf5f0080bd74fe Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 10 Jul 2024 13:58:22 +0100 Subject: [PATCH 113/117] Mention flush.bytes to batcher.min_size_items translation --- exporter/elasticsearchexporter/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index a2387510062d..fe570e39589d 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -173,7 +173,7 @@ The behaviour of this bulk indexing can be configured with the following setting - `num_workers` (default=runtime.NumCPU()): Maximum number of concurrent bulk requests. - `flush`: Event bulk indexer buffer flush settings - - `bytes` (DEPRECATED, use `batcher.min_size_items` instead): Write buffer flush size limit. + - `bytes` (DEPRECATED, use `batcher.min_size_items` instead): Write buffer flush size limit. When specified, it is translated to `batcher.min_size_items` using an estimate of average item size of 1000 bytes. - `interval` (DEPRECATED, use `batcher.flush_timeout` instead): Maximum time of the oldest item spent inside the buffer, aka "max age of buffer". A flush will happen regardless of the size of content in buffer. - `retry`: Elasticsearch bulk request retry settings - `enabled` (default=true): Enable/Disable request retry on error. Failed requests are retried with exponential backoff. From 9113d1c5ea6d80dc7e1af6efbe2ec173547c861b Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 10 Jul 2024 13:59:38 +0100 Subject: [PATCH 114/117] Refactor handleDeprecations --- exporter/elasticsearchexporter/config.go | 10 ---------- exporter/elasticsearchexporter/factory.go | 10 +++++++--- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/exporter/elasticsearchexporter/config.go b/exporter/elasticsearchexporter/config.go index c343b7473646..7098410b38f7 100644 --- a/exporter/elasticsearchexporter/config.go +++ b/exporter/elasticsearchexporter/config.go @@ -16,7 +16,6 @@ import ( "go.opentelemetry.io/collector/config/configopaque" "go.opentelemetry.io/collector/exporter/exporterbatcher" "go.opentelemetry.io/collector/exporter/exporterhelper" - "go.uber.org/zap" ) // Config defines configuration for Elastic exporter. @@ -330,12 +329,3 @@ func parseCloudID(input string) (*url.URL, error) { func (cfg *Config) MappingMode() MappingMode { return mappingModes[cfg.Mapping.Mode] } - -func logConfigDeprecationWarnings(cfg *Config, logger *zap.Logger) { - if !cfg.Mapping.Dedup { - logger.Warn("dedup has been deprecated, and will always be enabled in future") - } - if cfg.Mapping.Dedot && cfg.MappingMode() != MappingECS || !cfg.Mapping.Dedot && cfg.MappingMode() == MappingECS { - logger.Warn("dedot has been deprecated: in the future, dedotting will always be performed in ECS mode only") - } -} diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 02e8025bee9a..635fc49a3c94 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -118,7 +118,6 @@ func createLogsExporter( if err := handleDeprecations(cf, set.Logger); err != nil { return nil, err } - logConfigDeprecationWarnings(cf, set.Logger) exporter, err := newExporter(cf, set, cf.LogsIndex, cf.LogsDynamicIndex.Enabled) if err != nil { @@ -149,7 +148,6 @@ func createMetricsExporter( if err := handleDeprecations(cf, set.Logger); err != nil { return nil, err } - logConfigDeprecationWarnings(cf, set.Logger) // Workaround to avoid rejections from Elasticsearch. // TSDB does not accept 2 documents with the same timestamp and dimensions. @@ -185,7 +183,6 @@ func createTracesExporter(ctx context.Context, cfg component.Config) (exporter.Traces, error) { cf := cfg.(*Config) - logConfigDeprecationWarnings(cf, set.Logger) if err := handleDeprecations(cf, set.Logger); err != nil { return nil, err @@ -225,6 +222,13 @@ func handleDeprecations(cf *Config, logger *zap.Logger) error { //nolint:unparam cf.LogsIndex = cf.Index } + if !cf.Mapping.Dedup { + logger.Warn("dedup has been deprecated, and will always be enabled in future") + } + if cf.Mapping.Dedot && cf.MappingMode() != MappingECS || !cf.Mapping.Dedot && cf.MappingMode() == MappingECS { + logger.Warn("dedot has been deprecated: in the future, dedotting will always be performed in ECS mode only") + } + if cf.Flush.Bytes != 0 { const factor = 1000 val := cf.Flush.Bytes / factor From 1ad29d4dc91a305738b3e44ed1ef73edb3a59dff Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 10 Jul 2024 14:00:44 +0100 Subject: [PATCH 115/117] Remove getTimeoutConfig --- exporter/elasticsearchexporter/factory.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 635fc49a3c94..89a5a4d6e360 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -134,7 +134,8 @@ func createLogsExporter( exporterhelper.WithBatcher(cf.BatcherConfig), exporterhelper.WithShutdown(exporter.Shutdown), exporterhelper.WithQueue(cf.QueueSettings), - exporterhelper.WithTimeout(getTimeoutConfig()), + // effectively disable timeout_sender because timeout is enforced in bulk indexer + exporterhelper.WithTimeout(exporterhelper.TimeoutSettings{Timeout: 0}), ) } @@ -173,7 +174,8 @@ func createMetricsExporter( exporterhelper.WithBatcher(batcherCfg), exporterhelper.WithShutdown(exporter.Shutdown), exporterhelper.WithQueue(cf.QueueSettings), - exporterhelper.WithTimeout(getTimeoutConfig()), + // effectively disable timeout_sender because timeout is enforced in bulk indexer + exporterhelper.WithTimeout(exporterhelper.TimeoutSettings{Timeout: 0}), ) } @@ -203,16 +205,11 @@ func createTracesExporter(ctx context.Context, exporterhelper.WithBatcher(cf.BatcherConfig), exporterhelper.WithShutdown(exporter.Shutdown), exporterhelper.WithQueue(cf.QueueSettings), - exporterhelper.WithTimeout(getTimeoutConfig()), + // effectively disable timeout_sender because timeout is enforced in bulk indexer + exporterhelper.WithTimeout(exporterhelper.TimeoutSettings{Timeout: 0}), ) } -func getTimeoutConfig() exporterhelper.TimeoutSettings { - return exporterhelper.TimeoutSettings{ - Timeout: time.Duration(0), // effectively disable timeout_sender because timeout is enforced in bulk indexer - } -} - // handleDeprecations handles deprecated config options. // If possible, translate deprecated config options to new config options // Otherwise, return an error so that the user is aware of an unsupported option. From 862876e2516bade911596671607a627929e11727 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 10 Jul 2024 14:47:01 +0100 Subject: [PATCH 116/117] Stop using type alias --- .../elasticsearch_bulk.go | 10 +++---- .../elasticsearch_bulk_test.go | 5 ++-- exporter/elasticsearchexporter/exporter.go | 27 ++++++++++--------- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk.go b/exporter/elasticsearchexporter/elasticsearch_bulk.go index 4ede2925d73e..405c7d2cc332 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk.go @@ -26,10 +26,6 @@ import ( type esClientCurrent = elasticsearch7.Client type esConfigCurrent = elasticsearch7.Config -type esBulkIndexerCurrent = bulkIndexerManager - -type esBulkIndexerItem = docappender.BulkIndexerItem - // clientLogger implements the estransport.Logger interface // that is required by the Elasticsearch client for logging. type clientLogger struct { @@ -177,7 +173,7 @@ func createElasticsearchBackoffFunc(config *RetrySettings) func(int) time.Durati } } -func newBulkIndexer(logger *zap.Logger, client *esClientCurrent, config *Config) (*esBulkIndexerCurrent, error) { //nolint:unparam +func newBulkIndexer(logger *zap.Logger, client *esClientCurrent, config *Config) (*bulkIndexerManager, error) { //nolint:unparam return &bulkIndexerManager{ closeCh: make(chan struct{}), stats: bulkIndexerStats{}, @@ -203,7 +199,7 @@ type bulkIndexerManager struct { client *esClientCurrent } -func (p *bulkIndexerManager) AddBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem) error { +func (p *bulkIndexerManager) AddBatchAndFlush(ctx context.Context, batch []docappender.BulkIndexerItem) error { p.wg.Add(1) defer p.wg.Done() @@ -270,7 +266,7 @@ type worker struct { logger *zap.Logger } -func (w *worker) addBatchAndFlush(ctx context.Context, batch []esBulkIndexerItem) error { +func (w *worker) addBatchAndFlush(ctx context.Context, batch []docappender.BulkIndexerItem) error { for _, item := range batch { if err := w.indexer.Add(item); err != nil { return fmt.Errorf("failed to add item to bulk indexer: %w", err) diff --git a/exporter/elasticsearchexporter/elasticsearch_bulk_test.go b/exporter/elasticsearchexporter/elasticsearch_bulk_test.go index 4b021ca2a800..e999737ccba1 100644 --- a/exporter/elasticsearchexporter/elasticsearch_bulk_test.go +++ b/exporter/elasticsearchexporter/elasticsearch_bulk_test.go @@ -11,6 +11,7 @@ import ( "strings" "testing" + "github.com/elastic/go-docappender/v2" "github.com/elastic/go-elasticsearch/v7" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -61,7 +62,7 @@ func TestBulkIndexer_addBatchAndFlush(t *testing.T) { bulkIndexer, err := newBulkIndexer(zap.NewNop(), client, &cfg) require.NoError(t, err) assert.NoError(t, bulkIndexer.AddBatchAndFlush(context.Background(), - []esBulkIndexerItem{ + []docappender.BulkIndexerItem{ { Index: "foo", Body: strings.NewReader(`{"foo": "bar"}`), @@ -116,7 +117,7 @@ func TestBulkIndexer_addBatchAndFlush_error(t *testing.T) { bulkIndexer, err := newBulkIndexer(zap.NewNop(), client, &cfg) require.NoError(t, err) assert.ErrorContains(t, bulkIndexer.AddBatchAndFlush(context.Background(), - []esBulkIndexerItem{ + []docappender.BulkIndexerItem{ { Index: "foo", Body: strings.NewReader(`{"foo": "bar"}`), diff --git a/exporter/elasticsearchexporter/exporter.go b/exporter/elasticsearchexporter/exporter.go index 6a193e85347c..b3d129903409 100644 --- a/exporter/elasticsearchexporter/exporter.go +++ b/exporter/elasticsearchexporter/exporter.go @@ -12,6 +12,7 @@ import ( "slices" "time" + "github.com/elastic/go-docappender/v2" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/exporter" "go.opentelemetry.io/collector/pdata/pcommon" @@ -32,7 +33,7 @@ type elasticsearchExporter struct { dynamicIndex bool model mappingModel - bulkIndexer *esBulkIndexerCurrent + bulkIndexer *bulkIndexerManager } func newExporter( @@ -92,7 +93,7 @@ func (e *elasticsearchExporter) Shutdown(ctx context.Context) error { } func (e *elasticsearchExporter) pushLogsData(ctx context.Context, ld plog.Logs) error { - items := make([]esBulkIndexerItem, 0, ld.LogRecordCount()) + items := make([]docappender.BulkIndexerItem, 0, ld.LogRecordCount()) var errs []error rls := ld.ResourceLogs() for i := 0; i < rls.Len(); i++ { @@ -123,7 +124,7 @@ func (e *elasticsearchExporter) pushLogsData(ctx context.Context, ld plog.Logs) return errors.Join(errs...) } -func (e *elasticsearchExporter) logRecordToItem(resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) (esBulkIndexerItem, error) { +func (e *elasticsearchExporter) logRecordToItem(resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) (docappender.BulkIndexerItem, error) { fIndex := e.index if e.dynamicIndex { fIndex = routeLogRecord(record, scope, resource, fIndex) @@ -132,16 +133,16 @@ func (e *elasticsearchExporter) logRecordToItem(resource pcommon.Resource, recor if e.logstashFormat.Enabled { formattedIndex, err := generateIndexWithLogstashFormat(fIndex, &e.logstashFormat, time.Now()) if err != nil { - return esBulkIndexerItem{}, err + return docappender.BulkIndexerItem{}, err } fIndex = formattedIndex } document, err := e.model.encodeLog(resource, record, scope) if err != nil { - return esBulkIndexerItem{}, fmt.Errorf("failed to encode log event: %w", err) + return docappender.BulkIndexerItem{}, fmt.Errorf("failed to encode log event: %w", err) } - return esBulkIndexerItem{ + return docappender.BulkIndexerItem{ Index: fIndex, Body: bytes.NewReader(document), }, nil @@ -153,7 +154,7 @@ func (e *elasticsearchExporter) pushMetricsData( ) error { // Ideally the slice will be preallocated once and for all // but the actual length is uncertain due to grouping - var items []esBulkIndexerItem + var items []docappender.BulkIndexerItem var errs []error resourceMetrics := metrics.ResourceMetrics() @@ -210,7 +211,7 @@ func (e *elasticsearchExporter) pushMetricsData( continue } - item := esBulkIndexerItem{ + item := docappender.BulkIndexerItem{ Index: fIndex, Body: bytes.NewReader(docBytes), } @@ -249,7 +250,7 @@ func (e *elasticsearchExporter) pushTraceData( ctx context.Context, td ptrace.Traces, ) error { - items := make([]esBulkIndexerItem, 0, td.SpanCount()) + items := make([]docappender.BulkIndexerItem, 0, td.SpanCount()) var errs []error resourceSpans := td.ResourceSpans() for i := 0; i < resourceSpans.Len(); i++ { @@ -280,7 +281,7 @@ func (e *elasticsearchExporter) pushTraceData( return errors.Join(errs...) } -func (e *elasticsearchExporter) traceRecordToItem(resource pcommon.Resource, span ptrace.Span, scope pcommon.InstrumentationScope) (esBulkIndexerItem, error) { +func (e *elasticsearchExporter) traceRecordToItem(resource pcommon.Resource, span ptrace.Span, scope pcommon.InstrumentationScope) (docappender.BulkIndexerItem, error) { fIndex := e.index if e.dynamicIndex { fIndex = routeSpan(span, scope, resource, fIndex) @@ -289,16 +290,16 @@ func (e *elasticsearchExporter) traceRecordToItem(resource pcommon.Resource, spa if e.logstashFormat.Enabled { formattedIndex, err := generateIndexWithLogstashFormat(fIndex, &e.logstashFormat, time.Now()) if err != nil { - return esBulkIndexerItem{}, err + return docappender.BulkIndexerItem{}, err } fIndex = formattedIndex } document, err := e.model.encodeSpan(resource, span, scope) if err != nil { - return esBulkIndexerItem{}, fmt.Errorf("failed to encode trace record: %w", err) + return docappender.BulkIndexerItem{}, fmt.Errorf("failed to encode trace record: %w", err) } - return esBulkIndexerItem{ + return docappender.BulkIndexerItem{ Index: fIndex, Body: bytes.NewReader(document), }, nil From be1a7f77c09df6f1e9ba2854db2d4f5ff23b9f70 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 16 Jul 2024 12:03:04 +0100 Subject: [PATCH 117/117] Update exporter/elasticsearchexporter/README.md Co-authored-by: Andrzej Stencel --- exporter/elasticsearchexporter/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index fe570e39589d..42c451e193c6 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -82,7 +82,7 @@ The Elasticsearch exporter supports the common [`sending_queue` settings][export When [persistent queue](https://github.com/open-telemetry/opentelemetry-collector/blob/main/exporter/exporterhelper/README.md#persistent-queue) is used, there should be no event loss even on collector crashes. -`num_consumers` (default=100) controls the number of concurrent requests being fetched from the queue to the batcher, or directly to bulk indexer if batcher is disabled. However, the actual number of concurrent bulk requests is controlled by `num_workers`. +`sending_queue::num_consumers` (default=100) controls the number of concurrent requests being fetched from the queue to the batcher, or directly to bulk indexer if batcher is disabled. However, the actual number of concurrent bulk requests is controlled by `num_workers`. ### Batching