Skip to content

Commit ee40f69

Browse files
authored
Handle multiline processors (#2063)
1 parent 1801fd1 commit ee40f69

File tree

3 files changed

+563
-62
lines changed

3 files changed

+563
-62
lines changed

internal/elasticsearch/ingest/processors.go

Lines changed: 80 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
package ingest
66

77
import (
8+
"bufio"
9+
"bytes"
810
"fmt"
911

1012
"gopkg.in/yaml.v3"
@@ -50,14 +52,15 @@ func (p Pipeline) OriginalProcessors() (procs []Processor, err error) {
5052
return procs, nil
5153
}
5254

53-
// extract a list of processors from a pipeline definition in YAML format.
55+
// processorsFromYAML extracts a list of processors from a pipeline definition in YAML format.
5456
func processorsFromYAML(content []byte) (procs []Processor, err error) {
5557
var p struct {
5658
Processors []yaml.Node
5759
}
5860
if err = yaml.Unmarshal(content, &p); err != nil {
5961
return nil, err
6062
}
63+
6164
for idx, entry := range p.Processors {
6265
if entry.Kind != yaml.MappingNode || len(entry.Content) != 2 {
6366
return nil, fmt.Errorf("processor#%d is not a single-key map (kind:%v content:%d)", idx, entry.Kind, len(entry.Content))
@@ -70,22 +73,88 @@ func processorsFromYAML(content []byte) (procs []Processor, err error) {
7073
return nil, fmt.Errorf("error decoding processor#%d type: %w", idx, err)
7174
}
7275
proc.FirstLine = entry.Line
73-
proc.LastLine = lastLine(&entry)
76+
lastLine, err := getProcessorLastLine(idx, p.Processors, proc, content)
77+
if err != nil {
78+
return nil, err
79+
}
80+
proc.LastLine = lastLine
81+
7482
procs = append(procs, proc)
7583
}
76-
return procs, nil
84+
return procs, err
7785
}
7886

79-
// returns the last (greater) line number used by a yaml.Node.
80-
func lastLine(node *yaml.Node) int {
87+
// getProcessorLastLine determines the last line number for the given processor.
88+
func getProcessorLastLine(idx int, processors []yaml.Node, currentProcessor Processor, content []byte) (int, error) {
89+
if idx < len(processors)-1 {
90+
var endProcessor = processors[idx+1].Line - 1
91+
if endProcessor < currentProcessor.FirstLine {
92+
return currentProcessor.FirstLine, nil
93+
} else {
94+
return processors[idx+1].Line - 1, nil
95+
}
96+
}
97+
98+
return nextProcessorOrEndOfPipeline(content)
99+
}
100+
101+
// nextProcessorOrEndOfPipeline get the line before the node after the processors node. If there is none, it returns the end of file line
102+
func nextProcessorOrEndOfPipeline(content []byte) (int, error) {
103+
var root yaml.Node
104+
if err := yaml.Unmarshal(content, &root); err != nil {
105+
return 0, fmt.Errorf("error unmarshaling YAML: %v", err)
106+
}
107+
108+
var nodes []*yaml.Node
109+
extractNodesFromMapping(&root, &nodes)
110+
for i, node := range nodes {
111+
112+
if node.Value == "processors" {
113+
if i < len(nodes)-1 {
114+
115+
return nodes[i+1].Line - 1, nil
116+
}
117+
}
118+
119+
}
120+
return countLinesInBytes(content)
121+
}
122+
123+
// extractNodesFromMapping recursively extracts all nodes from MappingNodes within DocumentNodes.
124+
func extractNodesFromMapping(node *yaml.Node, nodes *[]*yaml.Node) {
81125
if node == nil {
82-
return 0
126+
return
83127
}
84-
last := node.Line
85-
for _, inner := range node.Content {
86-
if line := lastLine(inner); line > last {
87-
last = line
128+
129+
if node.Kind == yaml.DocumentNode {
130+
for _, child := range node.Content {
131+
extractNodesFromMapping(child, nodes)
132+
}
133+
return
134+
}
135+
136+
if node.Kind == yaml.MappingNode {
137+
for _, child := range node.Content {
138+
if child.Kind == yaml.MappingNode || child.Kind == yaml.ScalarNode {
139+
*nodes = append(*nodes, child)
140+
}
141+
extractNodesFromMapping(child, nodes)
88142
}
89143
}
90-
return last
144+
}
145+
146+
// countLinesInBytes counts the number of lines in the given byte slice.
147+
func countLinesInBytes(data []byte) (int, error) {
148+
scanner := bufio.NewScanner(bytes.NewReader(data))
149+
lineCount := 0
150+
151+
for scanner.Scan() {
152+
lineCount++
153+
}
154+
155+
if err := scanner.Err(); err != nil {
156+
return 0, fmt.Errorf("error reading data: %w", err)
157+
}
158+
159+
return lineCount, nil
91160
}

0 commit comments

Comments
 (0)