Skip to content

Commit 26bff48

Browse files
author
Andrea Spacca
authored
Refactor date period last event now (#116)
* call m.Run() in TestMain * TDD refactor date period and bindTime * refactor date period and bindTime * update docs * fix typo in TestMain * better docs on period * extract nearTime logic * extract generation logic
1 parent 94109a4 commit 26bff48

File tree

6 files changed

+180
-99
lines changed

6 files changed

+180
-99
lines changed

docs/fields-configuration.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ For each config entry the following fields are available:
1313
- `fuzziness` *optional (`long` and `double` type only)*: when generating data you could want generated values to change in a known interval. Fuzziness allow to specify the maximum delta a generated value can have from the previous value (for the same field), as a delta percentage; value must be between 0.0 and 1.0, where 0 is 0% and 1 is 100%. When not specified there is no constraint on the generated values, boundaries will be defined by the underlying field type
1414
- `range` *optional (`long` and `double` type only)*: value will be generated between `min` and `max`
1515
- `cardinality` *optional*: number of different values for the field; note that this value may not be respected if not enough events are generated. Es `cardinality: 1000` with `100` generated events would produce `100` different values, not `1000`.
16-
- `period` *optional (`date` type only)*: values will be evenly generated between `time.Now()` and `time.Now().Add(period)`, where period is expressed as `time.Duration`
16+
- `period` *optional (`date` type only)*: values will be evenly generated between `time.Now()` and `time.Now().Add(period)`, where period is expressed as `time.Duration`. It accepts also a negative duration: in this case values will be evenly generated between `time.Now().Add(period)` and `time.Now()`.
1717
- `object_keys` *optional (`object` type only)*: list of field names to generate in a object field type; if not specified a random number of field names will be generated in the object filed type
1818
- `value` *optional*: hardcoded value to set for the field (any `cardinality` will be ignored)
1919
- `enum` *optional (`keyword` type only)*: list of strings to randomly chose from a value to set for the field (any `cardinality` will be applied limited to the size of the `enum` values)

docs/performances.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ _GeneratorTextTemplateVPCFlowLogs-16 95.0 ± 0%
5656
5757
```
5858

59-
If you are curios how those benchmarks translate to time needed for generating dataset, we ran some test runs monitoring the execution times.
59+
If you are curious how those benchmarks translate to time needed for generating dataset, we ran some test runs monitoring the execution times.
6060
We generated directly from the built binaries 20GB of "aws dynamodb 1.28.3" Schema C data.
6161

6262
```

pkg/genlib/generator_interface.go

Lines changed: 58 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ const (
4848
FieldTypeFlattened = "flattened"
4949
FieldTypeGeoPoint = "geo_point"
5050

51-
FieldTypeTimeRange = 3600 // seconds
52-
FieldTypeTimeLayout = "2006-01-02T15:04:05.999999Z07:00"
51+
FieldTypeDurationSpan = 1000 // milliseconds
52+
FieldTypeTimeLayout = "2006-01-02T15:04:05.999999Z07:00"
5353
)
5454

5555
var (
@@ -338,7 +338,7 @@ func genNounsNWithReturn(n int) string {
338338
return value
339339
}
340340

341-
func randGeoPoint(buf *bytes.Buffer) error {
341+
func randGeoPoint() (int, int, int, int) {
342342
lat := customRand.Intn(181) - 90
343343
var latD int
344344
if lat != -90 && lat != 90 {
@@ -349,23 +349,8 @@ func randGeoPoint(buf *bytes.Buffer) error {
349349
if long != -180 && long != 180 {
350350
longD = customRand.Intn(100)
351351
}
352-
_, err := fmt.Fprintf(buf, "%d.%d,%d.%d", lat, latD, long, longD)
353-
return err
354-
}
355352

356-
func randGeoPointWithReturn() string {
357-
lat := customRand.Intn(181) - 90
358-
var latD int
359-
if lat != -90 && lat != 90 {
360-
latD = customRand.Intn(100)
361-
}
362-
var longD int
363-
long := customRand.Intn(361) - 180
364-
if long != -180 && long != 180 {
365-
longD = customRand.Intn(100)
366-
}
367-
368-
return fmt.Sprintf("%d.%d,%d.%d", lat, latD, long, longD)
353+
return lat, latD, long, longD
369354
}
370355

371356
func bindConstantKeyword(field Field, fieldMap map[string]any) error {
@@ -396,19 +381,7 @@ func bindKeyword(fieldCfg ConfigField, field Field, fieldMap map[string]any) err
396381

397382
fieldMap[field.Name] = emitFNotReturn
398383
} else if len(field.Example) > 0 {
399-
400-
totWords := len(keywordRegex.Split(field.Example, -1))
401-
402-
var joiner string
403-
if strings.Contains(field.Example, "\\.") {
404-
joiner = "\\."
405-
} else if strings.Contains(field.Example, "-") {
406-
joiner = "-"
407-
} else if strings.Contains(field.Example, "_") {
408-
joiner = "_"
409-
} else if strings.Contains(field.Example, " ") {
410-
joiner = " "
411-
}
384+
totWords, joiner := totWordsAndJoiner(field.Example)
412385

413386
return bindJoinRand(field, totWords, joiner, fieldMap)
414387
} else {
@@ -424,6 +397,22 @@ func bindKeyword(fieldCfg ConfigField, field Field, fieldMap map[string]any) err
424397
return nil
425398
}
426399

400+
func totWordsAndJoiner(fieldExample string) (int, string) {
401+
totWords := len(keywordRegex.Split(fieldExample, -1))
402+
403+
var joiner string
404+
if strings.Contains(fieldExample, "\\.") {
405+
joiner = "\\."
406+
} else if strings.Contains(fieldExample, "-") {
407+
joiner = "-"
408+
} else if strings.Contains(fieldExample, "_") {
409+
joiner = "_"
410+
} else if strings.Contains(fieldExample, " ") {
411+
joiner = " "
412+
}
413+
414+
return totWords, joiner
415+
}
427416
func bindJoinRand(field Field, N int, joiner string, fieldMap map[string]any) error {
428417
var emitFNotReturn emitFNotReturn
429418
emitFNotReturn = func(state *genState, buf *bytes.Buffer) error {
@@ -477,7 +466,9 @@ func bindBool(field Field, fieldMap map[string]any) error {
477466
func bindGeoPoint(field Field, fieldMap map[string]any) error {
478467
var emitFNotReturn emitFNotReturn
479468
emitFNotReturn = func(state *genState, buf *bytes.Buffer) error {
480-
return randGeoPoint(buf)
469+
lat, latD, long, longD := randGeoPoint()
470+
_, err := fmt.Fprintf(buf, "%d.%d,%d.%d", lat, latD, long, longD)
471+
return err
481472
}
482473

483474
fieldMap[field.Name] = emitFNotReturn
@@ -498,14 +489,7 @@ func bindWordN(field Field, n int, fieldMap map[string]any) error {
498489
func bindNearTime(fieldCfg ConfigField, field Field, fieldMap map[string]any) error {
499490
var emitFNotReturn emitFNotReturn
500491
emitFNotReturn = func(state *genState, buf *bytes.Buffer) error {
501-
var offset time.Duration
502-
if fieldCfg.Period > 0 && state.totEvents > 0 {
503-
offset = time.Duration((fieldCfg.Period.Nanoseconds() / int64(state.totEvents)) * int64(state.counter))
504-
} else {
505-
offset = time.Duration(customRand.Intn(FieldTypeTimeRange)*-1) * time.Second
506-
}
507-
508-
newTime := timeNowToBind.Add(offset)
492+
newTime := nearTime(fieldCfg, state)
509493

510494
buf.WriteString(newTime.Format(FieldTypeTimeLayout))
511495
return nil
@@ -514,13 +498,29 @@ func bindNearTime(fieldCfg ConfigField, field Field, fieldMap map[string]any) er
514498
return nil
515499
}
516500

501+
func nearTime(fieldCfg ConfigField, state *genState) time.Time {
502+
var offset time.Duration
503+
if fieldCfg.Period > 0 && state.totEvents > 0 {
504+
offset = time.Duration((fieldCfg.Period.Nanoseconds() / int64(state.totEvents)) * int64(state.counter))
505+
} else if fieldCfg.Period < 0 && state.totEvents > 0 {
506+
offset = time.Duration((fieldCfg.Period.Nanoseconds() / int64(state.totEvents)) * (int64(state.totEvents - state.counter)))
507+
} else {
508+
offset = time.Duration(customRand.Intn(FieldTypeDurationSpan)) * time.Millisecond
509+
}
510+
511+
newTime := timeNowToBind.Add(offset)
512+
513+
if state.totEvents <= 0 {
514+
timeNowToBind = newTime
515+
}
516+
517+
return newTime
518+
}
519+
517520
func bindIP(field Field, fieldMap map[string]any) error {
518521
var emitFNotReturn emitFNotReturn
519522
emitFNotReturn = func(state *genState, buf *bytes.Buffer) error {
520-
i0 := customRand.Intn(255)
521-
i1 := customRand.Intn(255)
522-
i2 := customRand.Intn(255)
523-
i3 := customRand.Intn(255)
523+
i0, i1, i2, i3 := randIP()
524524

525525
_, err := fmt.Fprintf(buf, "%d.%d.%d.%d", i0, i1, i2, i3)
526526
return err
@@ -744,19 +744,7 @@ func bindKeywordWithReturn(fieldCfg ConfigField, field Field, fieldMap map[strin
744744

745745
fieldMap[field.Name] = emitF
746746
} else if len(field.Example) > 0 {
747-
748-
totWords := len(keywordRegex.Split(field.Example, -1))
749-
750-
var joiner string
751-
if strings.Contains(field.Example, "\\.") {
752-
joiner = "\\."
753-
} else if strings.Contains(field.Example, "-") {
754-
joiner = "-"
755-
} else if strings.Contains(field.Example, "_") {
756-
joiner = "_"
757-
} else if strings.Contains(field.Example, " ") {
758-
joiner = " "
759-
}
747+
totWords, joiner := totWordsAndJoiner(field.Example)
760748

761749
return bindJoinRandWithReturn(field, totWords, joiner, fieldMap)
762750
} else {
@@ -818,7 +806,8 @@ func bindBoolWithReturn(field Field, fieldMap map[string]any) error {
818806
func bindGeoPointWithReturn(field Field, fieldMap map[string]any) error {
819807
var emitF EmitF
820808
emitF = func(state *genState) any {
821-
return randGeoPointWithReturn()
809+
lat, latD, long, longD := randGeoPoint()
810+
return fmt.Sprintf("%d.%d,%d.%d", lat, latD, long, longD)
822811
}
823812

824813
fieldMap[field.Name] = emitF
@@ -838,36 +827,32 @@ func bindWordNWithReturn(field Field, n int, fieldMap map[string]any) error {
838827
func bindNearTimeWithReturn(fieldCfg ConfigField, field Field, fieldMap map[string]any) error {
839828
var emitF EmitF
840829
emitF = func(state *genState) any {
841-
var offset time.Duration
842-
if fieldCfg.Period > 0 {
843-
offset = time.Duration((fieldCfg.Period.Nanoseconds() / int64(state.totEvents)) * int64(state.counter))
844-
} else {
845-
offset = time.Duration(customRand.Intn(FieldTypeTimeRange)*-1) * time.Second
846-
}
847-
848-
newTime := timeNowToBind.Add(offset)
849-
850-
return newTime
830+
return nearTime(fieldCfg, state)
851831
}
832+
852833
fieldMap[field.Name] = emitF
853834
return nil
854835
}
855836

856837
func bindIPWithReturn(field Field, fieldMap map[string]any) error {
857838
var emitF EmitF
858839
emitF = func(state *genState) any {
859-
i0 := customRand.Intn(255)
860-
i1 := customRand.Intn(255)
861-
i2 := customRand.Intn(255)
862-
i3 := customRand.Intn(255)
840+
i0, i1, i2, i3 := randIP()
863841

864842
return fmt.Sprintf("%d.%d.%d.%d", i0, i1, i2, i3)
865843
}
866844

867845
fieldMap[field.Name] = emitF
868846
return nil
869847
}
848+
func randIP() (int, int, int, int) {
849+
i0 := customRand.Intn(255)
850+
i1 := customRand.Intn(255)
851+
i2 := customRand.Intn(255)
852+
i3 := customRand.Intn(255)
870853

854+
return i0, i1, i2, i3
855+
}
871856
func bindLongWithReturn(fieldCfg ConfigField, field Field, fieldMap map[string]any) error {
872857
dummyFunc := makeIntFunc(fieldCfg, field)
873858

pkg/genlib/generator_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"github.com/elastic/elastic-integration-corpus-generator-tool/pkg/genlib/fields"
88
"log"
99
"math/rand"
10+
"os"
1011
"testing"
1112
"time"
1213
)
@@ -21,6 +22,7 @@ func TestMain(m *testing.M) {
2122
InitGeneratorRandSeed(randSeed)
2223
InitGeneratorTimeNow(timeNow)
2324

25+
os.Exit(m.Run())
2426
}
2527

2628
func Benchmark_GeneratorCustomTemplateJSONContent(b *testing.B) {

pkg/genlib/generator_with_custom_template_test.go

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -442,30 +442,25 @@ func Test_FieldDateWithCustomTemplate(t *testing.T) {
442442
t.Logf("with template: %s", string(template))
443443
nSpins := rand.Intn(1024) + 1
444444
for i := 0; i < nSpins; i++ {
445-
now := time.Now()
445+
previous := timeNowToBind
446446

447447
b := testSingleTWithCustomTemplate[string](t, fld, nil, template)
448448

449449
if ts, err := time.Parse(FieldTypeTimeLayout, b); err != nil {
450450
t.Errorf("Fail parse timestamp %v", err)
451451
} else {
452-
// Timestamp should be +- FieldTypeDurationSpan from now within a second of slop
453-
ts.Add(time.Second * -1)
454-
ts.Add(time.Second)
455-
456-
diff := ts.Sub(now)
457-
if diff < 0 {
458-
diff = -diff
452+
// Timestamp should be from now within a FieldTypeDurationSpan milliseconds of slop
453+
diff := ts.Sub(previous)
454+
if diff < 0 || diff > FieldTypeDurationSpan*time.Millisecond {
455+
t.Errorf("Data generated before now, diff: %v", diff)
459456
}
460457

461-
if diff >= FieldTypeTimeRange*time.Second {
462-
t.Errorf("Date generated out of span range %v", diff)
463-
}
458+
previous = ts
464459
}
465460
}
466461
}
467462

468-
func Test_FieldDateAndPeriodWithCustomTemplate(t *testing.T) {
463+
func Test_FieldDateAndPeriodPositiveWithCustomTemplate(t *testing.T) {
469464
fld := Field{
470465
Name: "alpha",
471466
Type: FieldTypeDate,
@@ -517,6 +512,58 @@ func Test_FieldDateAndPeriodWithCustomTemplate(t *testing.T) {
517512
}
518513
}
519514

515+
func Test_FieldDateAndPeriodNegativeWithCustomTemplate(t *testing.T) {
516+
fld := Field{
517+
Name: "alpha",
518+
Type: FieldTypeDate,
519+
}
520+
521+
template := []byte(`{"alpha":"{{.alpha}}"}`)
522+
configYaml := []byte("fields:\n - name: alpha\n period: -10s")
523+
t.Logf("with template: %s", string(template))
524+
525+
cfg, err := config.LoadConfigFromYaml(configYaml)
526+
if err != nil {
527+
t.Fatal(err)
528+
}
529+
530+
g := makeGeneratorWithCustomTemplate(t, cfg, []Field{fld}, template, 10)
531+
532+
var buf bytes.Buffer
533+
534+
nSpins := 10
535+
for i := 0; i < nSpins; i++ {
536+
if err := g.Emit(&buf); err != nil {
537+
t.Fatal(err)
538+
}
539+
540+
m := unmarshalJSONT[string](t, buf.Bytes())
541+
buf.Reset()
542+
543+
if len(m) != 1 {
544+
t.Errorf("Expected map size 1, got %d", len(m))
545+
}
546+
547+
v, ok := m[fld.Name]
548+
549+
if !ok {
550+
t.Errorf("Missing key %v", fld.Name)
551+
}
552+
553+
if ts, err := time.Parse(FieldTypeTimeLayout, v); err != nil {
554+
t.Errorf("Fail parse timestamp %v", err)
555+
} else {
556+
// Timestamp should be +1s for every iteration
557+
expectedTime := timeNowToBind.Truncate(time.Millisecond).Add(-10*time.Second + time.Second*time.Duration(i))
558+
559+
diff := expectedTime.Sub(ts.Truncate(time.Millisecond))
560+
if diff != 0 {
561+
t.Errorf("Date generated out of period range %v", diff)
562+
}
563+
}
564+
}
565+
}
566+
520567
func Test_FieldIPWithCustomTemplate(t *testing.T) {
521568
fld := Field{
522569
Name: "alpha",

0 commit comments

Comments
 (0)