55package ingest
66
77import (
8+ "bufio"
9+ "bytes"
810 "fmt"
911
1012 "gopkg.in/yaml.v3"
@@ -50,14 +52,15 @@ func (p Pipeline) OriginalProcessors() (procs []Processor, err error) {
5052 return procs , nil
5153}
5254
53- // extract a list of processors from a pipeline definition in YAML format.
55+ // processorsFromYAML extracts a list of processors from a pipeline definition in YAML format.
5456func processorsFromYAML (content []byte ) (procs []Processor , err error ) {
5557 var p struct {
5658 Processors []yaml.Node
5759 }
5860 if err = yaml .Unmarshal (content , & p ); err != nil {
5961 return nil , err
6062 }
63+
6164 for idx , entry := range p .Processors {
6265 if entry .Kind != yaml .MappingNode || len (entry .Content ) != 2 {
6366 return nil , fmt .Errorf ("processor#%d is not a single-key map (kind:%v content:%d)" , idx , entry .Kind , len (entry .Content ))
@@ -70,22 +73,88 @@ func processorsFromYAML(content []byte) (procs []Processor, err error) {
7073 return nil , fmt .Errorf ("error decoding processor#%d type: %w" , idx , err )
7174 }
7275 proc .FirstLine = entry .Line
73- proc .LastLine = lastLine (& entry )
76+ lastLine , err := getProcessorLastLine (idx , p .Processors , proc , content )
77+ if err != nil {
78+ return nil , err
79+ }
80+ proc .LastLine = lastLine
81+
7482 procs = append (procs , proc )
7583 }
76- return procs , nil
84+ return procs , err
7785}
7886
79- // returns the last (greater) line number used by a yaml.Node.
80- func lastLine (node * yaml.Node ) int {
87+ // getProcessorLastLine determines the last line number for the given processor.
88+ func getProcessorLastLine (idx int , processors []yaml.Node , currentProcessor Processor , content []byte ) (int , error ) {
89+ if idx < len (processors )- 1 {
90+ var endProcessor = processors [idx + 1 ].Line - 1
91+ if endProcessor < currentProcessor .FirstLine {
92+ return currentProcessor .FirstLine , nil
93+ } else {
94+ return processors [idx + 1 ].Line - 1 , nil
95+ }
96+ }
97+
98+ return nextProcessorOrEndOfPipeline (content )
99+ }
100+
101+ // nextProcessorOrEndOfPipeline get the line before the node after the processors node. If there is none, it returns the end of file line
102+ func nextProcessorOrEndOfPipeline (content []byte ) (int , error ) {
103+ var root yaml.Node
104+ if err := yaml .Unmarshal (content , & root ); err != nil {
105+ return 0 , fmt .Errorf ("error unmarshaling YAML: %v" , err )
106+ }
107+
108+ var nodes []* yaml.Node
109+ extractNodesFromMapping (& root , & nodes )
110+ for i , node := range nodes {
111+
112+ if node .Value == "processors" {
113+ if i < len (nodes )- 1 {
114+
115+ return nodes [i + 1 ].Line - 1 , nil
116+ }
117+ }
118+
119+ }
120+ return countLinesInBytes (content )
121+ }
122+
123+ // extractNodesFromMapping recursively extracts all nodes from MappingNodes within DocumentNodes.
124+ func extractNodesFromMapping (node * yaml.Node , nodes * []* yaml.Node ) {
81125 if node == nil {
82- return 0
126+ return
83127 }
84- last := node .Line
85- for _ , inner := range node .Content {
86- if line := lastLine (inner ); line > last {
87- last = line
128+
129+ if node .Kind == yaml .DocumentNode {
130+ for _ , child := range node .Content {
131+ extractNodesFromMapping (child , nodes )
132+ }
133+ return
134+ }
135+
136+ if node .Kind == yaml .MappingNode {
137+ for _ , child := range node .Content {
138+ if child .Kind == yaml .MappingNode || child .Kind == yaml .ScalarNode {
139+ * nodes = append (* nodes , child )
140+ }
141+ extractNodesFromMapping (child , nodes )
88142 }
89143 }
90- return last
144+ }
145+
146+ // countLinesInBytes counts the number of lines in the given byte slice.
147+ func countLinesInBytes (data []byte ) (int , error ) {
148+ scanner := bufio .NewScanner (bytes .NewReader (data ))
149+ lineCount := 0
150+
151+ for scanner .Scan () {
152+ lineCount ++
153+ }
154+
155+ if err := scanner .Err (); err != nil {
156+ return 0 , fmt .Errorf ("error reading data: %w" , err )
157+ }
158+
159+ return lineCount , nil
91160}
0 commit comments