@@ -53,9 +53,11 @@ type options struct {
53
53
// Delimiter that separates CSV fields.
54
54
Delimiter string
55
55
// FlushEvery flushes the ingestion buffer after the specified duration. It
56
- // is only valid when ingesting a stream of newline delimited JSON objects
57
- // of unknown length .
56
+ // is only valid when ingesting batchable data, e.g. newline delimited JSON
57
+ // and CSV (with field names explicitly set) data that is not encoded .
58
58
FlushEvery time.Duration
59
+ // BatchSize to aim for when ingesting batchable data.
60
+ BatchSize uint
59
61
// ContentType of the data to ingest.
60
62
ContentType axiom.ContentType
61
63
contentType string // for the flag value
@@ -81,7 +83,7 @@ func NewCmd(f *cmdutil.Factory) *cobra.Command {
81
83
}
82
84
83
85
cmd := & cobra.Command {
84
- Use : "ingest <dataset-name> [(-f|--file) <filename> [ ...]] [--timestamp-field <timestamp-field>] [--timestamp-format <timestamp-format>] [-- flush-every <duration>] [(-t|--content-type <content-type>] [(-e|--content-encoding <content-encoding>] [(-l|--label) <key>:<value> [ ...]]" ,
86
+ Use : "ingest <dataset-name> [(-f|--file) <filename> [ ...]] [--timestamp-field <timestamp-field>] [--timestamp-format <timestamp-format>] [(-d|--delimiter <delimiter>] [-- flush-every <duration>] [(-b|--batch-size <batch-size>] [(- t|--content-type <content-type>] [(-e|--content-encoding <content-encoding>] [(-l|--label) <key>:<value> [ ...]] [--csv-fields <field> [ ...]] [--continue-on-error <TRUE|FALSE> ]" ,
85
87
Short : "Ingest structured data" ,
86
88
Long : heredoc .Doc (`
87
89
Ingest structured data into an Axiom dataset.
@@ -193,15 +195,22 @@ func NewCmd(f *cmdutil.Factory) *cobra.Command {
193
195
if err := complete (cmd .Context (), opts ); err != nil {
194
196
return err
195
197
}
196
- return run (cmd .Context (), opts , cmd .Flag ("flush-every" ).Changed , cmd .Flag ("csv-fields" ).Changed )
198
+ return run (
199
+ cmd .Context (),
200
+ opts ,
201
+ cmd .Flag ("flush-every" ).Changed ,
202
+ cmd .Flag ("batch-size" ).Changed ,
203
+ cmd .Flag ("csv-fields" ).Changed ,
204
+ )
197
205
},
198
206
}
199
207
200
208
cmd .Flags ().StringSliceVarP (& opts .Filenames , "file" , "f" , nil , "File(s) to ingest (- to read from stdin). If stdin is a pipe the default value is -, otherwise this is a required parameter" )
201
209
cmd .Flags ().StringVar (& opts .TimestampField , "timestamp-field" , "" , "Field to take the ingestion time from (defaults to _time)" )
202
210
cmd .Flags ().StringVar (& opts .TimestampFormat , "timestamp-format" , "" , "Format used in the the timestamp field. Default uses a heuristic parser. Must be expressed using the reference time 'Mon Jan 2 15:04:05 -0700 MST 2006'" )
203
211
cmd .Flags ().StringVarP (& opts .Delimiter , "delimiter" , "d" , "" , "Delimiter that separates CSV fields (only valid when input is CSV" )
204
- cmd .Flags ().DurationVar (& opts .FlushEvery , "flush-every" , time .Second , "Buffer flush interval for newline delimited JSON streams of unknown length" )
212
+ cmd .Flags ().DurationVar (& opts .FlushEvery , "flush-every" , time .Second * 5 , "Buffer flush interval for batchable data" )
213
+ cmd .Flags ().UintVarP (& opts .BatchSize , "batch-size" , "b" , 10_000 , "Batch size to aim for" )
205
214
cmd .Flags ().StringVarP (& opts .contentType , "content-type" , "t" , "" , "Content type of the data to ingest (will auto-detect if not set, must be set if content encoding is set and content type is not identity)" )
206
215
cmd .Flags ().StringVarP (& opts .contentEncoding , "content-encoding" , "e" , axiom .Identity .String (), "Content encoding of the data to ingest" )
207
216
cmd .Flags ().StringSliceVarP (& opts .labels , "label" , "l" , nil , "Labels to attach to the ingested events, server side" )
@@ -212,9 +221,11 @@ func NewCmd(f *cmdutil.Factory) *cobra.Command {
212
221
_ = cmd .RegisterFlagCompletionFunc ("timestamp-format" , cmdutil .NoCompletion )
213
222
_ = cmd .RegisterFlagCompletionFunc ("delimiter" , cmdutil .NoCompletion )
214
223
_ = cmd .RegisterFlagCompletionFunc ("flush-every" , cmdutil .NoCompletion )
224
+ _ = cmd .RegisterFlagCompletionFunc ("batch-size" , cmdutil .NoCompletion )
215
225
_ = cmd .RegisterFlagCompletionFunc ("content-type" , contentTypeCompletion )
216
226
_ = cmd .RegisterFlagCompletionFunc ("content-encoding" , contentEncodingCompletion )
217
227
_ = cmd .RegisterFlagCompletionFunc ("label" , cmdutil .NoCompletion )
228
+ _ = cmd .RegisterFlagCompletionFunc ("csv-fields" , cmdutil .NoCompletion )
218
229
_ = cmd .RegisterFlagCompletionFunc ("continue-on-error" , cmdutil .NoCompletion )
219
230
220
231
if opts .IO .IsStdinTTY () {
@@ -265,7 +276,7 @@ func complete(ctx context.Context, opts *options) error {
265
276
}, & opts .Dataset , opts .IO .SurveyIO ())
266
277
}
267
278
268
- func run (ctx context.Context , opts * options , flushEverySet , csvFieldsSet bool ) error {
279
+ func run (ctx context.Context , opts * options , flushEverySet , batchSizeSet , csvFieldsSet bool ) error {
269
280
client , err := opts .Client (ctx )
270
281
if err != nil {
271
282
return err
@@ -305,20 +316,23 @@ func run(ctx context.Context, opts *options, flushEverySet, csvFieldsSet bool) e
305
316
typ = opts .ContentType
306
317
}
307
318
308
- if flushEverySet && typ != axiom .NDJSON {
309
- return cmdutil .NewFlagErrorf ("--flush-every not valid when content type is not newline delimited JSON" )
310
- }
311
319
if opts .Delimiter != "" && typ != axiom .CSV {
312
320
return cmdutil .NewFlagErrorf ("--delimier/-d not valid when content type is not CSV" )
313
321
}
314
322
315
323
var (
316
- batchable = typ == axiom .NDJSON || (typ == axiom .CSV && csvFieldsSet )
324
+ batchable = (typ == axiom .NDJSON || (typ == axiom .CSV && csvFieldsSet )) &&
325
+ opts .ContentEncoding == axiom .Identity
317
326
ingestRes * ingest.Status
318
327
)
319
- if filename == "stdin" && batchable && opts . ContentEncoding == axiom . Identity {
328
+ if batchable {
320
329
ingestRes , err = ingestEvery (ctx , client , r , typ , opts )
321
330
} else {
331
+ if flushEverySet {
332
+ return cmdutil .NewFlagErrorf ("--flush-every not valid when data is not batchable" )
333
+ } else if batchSizeSet {
334
+ return cmdutil .NewFlagErrorf ("--batch-size not valid when data is not batchable" )
335
+ }
322
336
ingestRes , err = ingestReader (ctx , client , r , typ , opts )
323
337
}
324
338
@@ -375,17 +389,16 @@ func ingestEvery(ctx context.Context, client *axiom.Client, r io.Reader, typ axi
375
389
defer t .Stop ()
376
390
377
391
readers := make (chan io.Reader )
378
-
379
392
go func () {
380
393
defer close (readers )
381
394
382
395
// Add first reader.
383
396
pr , pw := io .Pipe ()
384
397
readers <- pr
385
398
386
- // Start with a 64 byte buffer, check up until 1 MB per line.
399
+ // Start with a 1 KB buffer, check up until 1 MB per line.
387
400
scanner := bufio .NewScanner (r )
388
- scanner .Buffer (make ([]byte , 64 ), 1024 * 1024 )
401
+ scanner .Buffer (make ([]byte , 1024 ), 1024 * 1024 )
389
402
scanner .Split (splitLinesMulti )
390
403
391
404
// We need to scan in a go func to make sure we don't block on
@@ -413,23 +426,35 @@ func ingestEvery(ctx context.Context, client *axiom.Client, r io.Reader, typ axi
413
426
}
414
427
}()
415
428
429
+ var lineCount uint
430
+ flushBatch := func () {
431
+ if err := pw .Close (); err != nil {
432
+ return
433
+ }
434
+
435
+ pr , pw = io .Pipe ()
436
+ readers <- pr
437
+
438
+ lineCount = 0
439
+ t .Reset (opts .FlushEvery )
440
+ }
416
441
for {
417
442
select {
418
443
case <- ctx .Done ():
419
444
_ = pw .CloseWithError (ctx .Err ())
420
445
return
421
446
case <- t .C :
422
- if err := pw .Close (); err != nil {
423
- return
447
+ flushBatch ()
448
+ case line := <- lines :
449
+ if lineCount >= opts .BatchSize {
450
+ flushBatch ()
424
451
}
425
452
426
- pr , pw = io .Pipe ()
427
- readers <- pr
428
- case line := <- lines :
429
453
if _ , err := pw .Write (line ); err != nil {
430
454
_ = pw .CloseWithError (err )
431
455
return
432
456
}
457
+ lineCount ++
433
458
case <- done :
434
459
_ = pw .Close ()
435
460
return
@@ -480,7 +505,7 @@ func ingestReader(ctx context.Context, client *axiom.Client, r io.Reader, typ ax
480
505
ingestOptions = append (ingestOptions , opts .Labels ... )
481
506
ingestOptions = append (ingestOptions , opts .CSVFields ... )
482
507
483
- res , err := client .Datasets . Ingest (ctx , opts .Dataset , r , typ , enc , ingestOptions ... )
508
+ res , err := client .Ingest (ctx , opts .Dataset , r , typ , enc , ingestOptions ... )
484
509
if err != nil {
485
510
return nil , err
486
511
}
0 commit comments