diff --git a/cli/azd/.vscode/cspell-azd-dictionary.txt b/cli/azd/.vscode/cspell-azd-dictionary.txt index a1da38a9dbd..77d77e40c6e 100644 --- a/cli/azd/.vscode/cspell-azd-dictionary.txt +++ b/cli/azd/.vscode/cspell-azd-dictionary.txt @@ -228,5 +228,6 @@ webfrontend westus2 wireinject yacspin +yamlnode ymlt zerr diff --git a/cli/azd/pkg/yamlnode/yamlnode.go b/cli/azd/pkg/yamlnode/yamlnode.go index 03ac28a8551..32d0fb51eac 100644 --- a/cli/azd/pkg/yamlnode/yamlnode.go +++ b/cli/azd/pkg/yamlnode/yamlnode.go @@ -1,11 +1,35 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -// yamlnode allows for manipulation of YAML nodes using a dotted-path syntax. +// Package yamlnode allows for manipulation of YAML nodes using a dotted-path syntax. // // Examples of dotted-path syntax: -// - a.object.key -// - b.item_list[1] +// - a.map.key +// - b.items[1] +// +// When using [Set] or [Append], an optional qualifier `?` can be used in a path element +// to indicate that the node is conditionally present, and should be created if not present. +// A preceding bracket-pair, `[]`, can be used to indicate that a sequence node should be created. +// +// Optional qualifier examples: +// - a?.map.key - if 'a' is not present, it will be created +// - a.map?.key - if 'map' is not present, it will be created +// - b.items[]? - if 'items' is not present, it will be created as a sequence +// +// The special characters in a dotted-path syntax are: +// - `.` (dot) - separates key elements +// - `[` (open bracket) - used in sequences +// - `]` (close bracket) - used in sequences +// - `?` (question mark) - optional qualifier +// - `"` (double quote) - used to indicate a quoted-string +// +// If these special characters are part of a key, the key can be surrounded as a quoted-string using `"` (double quotes) +// to indicate their literalness. +// If a `"` (double-quote) character is also part of the key, a preceding backslash `\"` may be used to escape it. +// +// Quoted-string examples: +// - "esc.ape.d" -> esc.ape.d +// - "\"hello\"..[world]" -> "hello"..[world] package yamlnode import ( @@ -20,18 +44,24 @@ import ( var ErrNodeNotFound = errors.New("node not found") // ErrNodeWrongKind is returned when the node kind is not as expected. -// -// This error may be useful for nodes that have multiple possible kinds. +// This error may be useful to detect nodes that have multiple possible kinds and need to be handled specially. var ErrNodeWrongKind = errors.New("unexpected node kind") // Find retrieves a node at the given path. +// +// Examples of dotted-paths: +// - a.map.key +// - b.items[1] func Find(root *yaml.Node, path string) (*yaml.Node, error) { parts, err := parsePath(path) if err != nil { return nil, err } - found := find(root, parts) + found, err := find(root, parts, true) + if err != nil { + return nil, err + } if found == nil { return nil, fmt.Errorf("%w: %s", ErrNodeNotFound, path) } @@ -39,55 +69,67 @@ func Find(root *yaml.Node, path string) (*yaml.Node, error) { return found, nil } -// Set sets the node at the given path to the provided value. +// Set sets the node at the path to the provided value. +// +// An optional qualifier `?` can be used automatically create node(s) that are conditionally present. +// +// Examples: +// - a?.map.key - if 'a' is not present, it will be created +// - a.map?.key - if 'map' is not present, it will be created func Set(root *yaml.Node, path string, value *yaml.Node) error { parts, err := parsePath(path) if err != nil { return err } - // find the anchor node - anchor := find(root, parts[:len(parts)-1]) + anchor, err := find(root, parts[:len(parts)-1], false) + if err != nil { + return err + } if anchor == nil { return fmt.Errorf("%w: %s", ErrNodeNotFound, path) } - // set the node - seek, isKey := parts[len(parts)-1].(string) - idx, isSequence := parts[len(parts)-1].(int) - - if isKey { + part := parts[len(parts)-1] + switch part.kind { + case keyElem: if anchor.Kind != yaml.MappingNode { - return fmt.Errorf("%w: %s is not a mapping node", ErrNodeWrongKind, parts[len(parts)-1]) + return fmt.Errorf("%w: %s is not a mapping node", ErrNodeWrongKind, parts[len(parts)-1].key) } for i := 0; i < len(anchor.Content); i += 2 { - if anchor.Content[i].Value == seek { + if anchor.Content[i].Value == part.key { anchor.Content[i+1] = value return nil } } - anchor.Content = append(anchor.Content, &yaml.Node{Kind: yaml.ScalarNode, Value: seek}) + anchor.Content = append(anchor.Content, &yaml.Node{Kind: yaml.ScalarNode, Value: part.key}) anchor.Content = append(anchor.Content, value) - } else if isSequence { + case indexElem: if anchor.Kind != yaml.SequenceNode { - return fmt.Errorf("%w: %s is not a sequence node", ErrNodeWrongKind, parts[len(parts)-1]) + return fmt.Errorf("%w: %s is not a sequence node", ErrNodeWrongKind, parts[len(parts)-2].key) } - if idx < 0 || idx > len(anchor.Content) { - return fmt.Errorf("array index out of bounds: %d", idx) + if part.idx < 0 || part.idx > len(anchor.Content) { + return fmt.Errorf("sequence index out of bounds: %d", part.idx) } - anchor.Content[idx] = value + anchor.Content[part.idx] = value } return nil } -// Append appends a node to the sequence (array) node at the given path. -// +// Append appends a node to the sequence node at the given path. // If the node at the path is not a sequence node, ErrNodeWrongKind is returned. +// +// An optional qualifier `?` can be used automatically create node(s) that are conditionally present; +// a preceding bracket-pair, `[]`, is used to indicate sequences. +// +// Examples: +// - a?.map.items - if 'a' is not present, it will be created +// - b.items[]? - if 'items' is not present, it will be created as a sequence func Append(root *yaml.Node, path string, node *yaml.Node) error { parts, err := parsePath(path) if err != nil { @@ -95,13 +137,17 @@ func Append(root *yaml.Node, path string, node *yaml.Node) error { } // find the anchor node - found := find(root, parts) + found, err := find(root, parts, false) + if err != nil { + return err + } + if found == nil { return fmt.Errorf("%w: %s", ErrNodeNotFound, path) } if found.Kind != yaml.SequenceNode { - return fmt.Errorf("%w %d for append", ErrNodeWrongKind, found.Kind) + return fmt.Errorf("append to a non-sequence node: %w", ErrNodeWrongKind) } found.Content = append(found.Content, node) @@ -119,85 +165,264 @@ func Encode(value interface{}) (*yaml.Node, error) { return &node, nil } -func find(current *yaml.Node, parts []any) *yaml.Node { +// find retrieves a node at the given path. +func find(current *yaml.Node, parts []pathElem, findOnly bool) (*yaml.Node, error) { if len(parts) == 0 { // we automatically skip the document node to avoid having to specify it in the path if current.Kind == yaml.DocumentNode { - return current.Content[0] + return current.Content[0], nil } - return current + return current, nil } - seek, _ := parts[0].(string) - idx, isArray := parts[0].(int) + part := parts[0] switch current.Kind { case yaml.DocumentNode: // we automatically skip the document node to avoid having to specify it in the path - return find(current.Content[0], parts) + return find(current.Content[0], parts, findOnly) case yaml.MappingNode: + if part.kind != keyElem { + return nil, fmt.Errorf("%w: unexpected %s as a mapping node", ErrNodeWrongKind, part.key) + } + for i := 0; i < len(current.Content); i += 2 { - if current.Content[i].Value == seek { - return find(current.Content[i+1], parts[1:]) + if current.Content[i].Value == part.key { + return find(current.Content[i+1], parts[1:], findOnly) } } case yaml.SequenceNode: - if isArray && idx < len(current.Content) { - return find(current.Content[idx], parts[1:]) + if part.kind != indexElem { + return nil, fmt.Errorf("%w: unexpected %s as a sequence node", ErrNodeWrongKind, part.key) + } + + if part.idx < len(current.Content) { + return find(current.Content[part.idx], parts[1:], findOnly) } } - return nil -} + if findOnly { // if we are only looking for the node, we won't honor optional + return nil, nil + } -// parsePath parses a dotted path into a slice of parts, where each part is either a string or an integer. -// The integer parts represent array indexes, and the string parts represent keys. -func parsePath(path string) ([]any, error) { - if path == "" { - return nil, errors.New("empty path") + if part.optionalKind == 0 { + return nil, nil } - // future: support escaping dots - parts := strings.Split(path, ".") - expanded, err := expandArrays(parts) - if err != nil { - return nil, err + node := &yaml.Node{Kind: part.optionalKind} + + switch current.Kind { + case yaml.MappingNode: + current.Content = append(current.Content, &yaml.Node{Kind: yaml.ScalarNode, Value: part.key}) + current.Content = append(current.Content, node) + case yaml.SequenceNode: + current.Content[part.idx] = node } - return expanded, nil + return node, nil } -// expandArrays expands array indexing into individual elements. -func expandArrays(parts []string) (expanded []any, err error) { - expanded = make([]interface{}, 0, len(parts)) - for _, s := range parts { - before, after := cutBrackets(s) - expanded = append(expanded, before) +// parsePath parses a dotted-path into a slice of yaml path elements. +func parsePath(s string) ([]pathElem, error) { + elem := strings.Builder{} + parsed := []pathElem{} + + for i := 0; i < len(s); i++ { + c := s[i] + switch c { + case '.': + if elem.Len() == 0 { + if i == 0 { + return nil, fmt.Errorf("unexpected dot '.' at the beginning of the path") + } + + return nil, fmt.Errorf("unexpected dot '.' at the beginning of the path near %s", s[i-1:]) + } - if len(after) > 0 { - content := after[1 : len(after)-1] - idx, err := strconv.Atoi(content) + yamlPaths, err := parseElem(elem.String()) if err != nil { - return nil, fmt.Errorf("invalid array index: %s in %s", content, after) + return nil, fmt.Errorf("parsing %s: %w", elem.String(), err) } - expanded = append(expanded, idx) + parsed = append(parsed, yamlPaths...) + elem.Reset() + case '"': + j := i + 1 + elem.WriteByte('"') + + // find the unescaped, closing quote + // note that we just want to preserve the quoted string as-is to avoid treating '.' as a separator, + // since a second pass will parse the quoted string + for j < len(s) { + if s[j] == '"' && s[j-1] != '\\' { + elem.WriteByte('"') + break + } + elem.WriteByte(s[j]) + j++ + } + i = j + default: + elem.WriteByte(c) } } - return expanded, nil + if elem.Len() > 0 { + yamlPaths, err := parseElem(elem.String()) + if err != nil { + return nil, fmt.Errorf("parsing %s: %w", elem.String(), err) + } + parsed = append(parsed, yamlPaths...) + } + + if len(parsed) == 0 { + return nil, fmt.Errorf("empty path") + } + + return parsed, nil } -// cutBrackets splits a string into two parts, before the brackets, and after the brackets. -func cutBrackets(s string) (before string, after string) { - if len(s) > 0 && s[len(s)-1] == ']' { // reverse check for faster exit - for i := len(s) - 1; i >= 0; i-- { - if s[i] == '[' { - return s[:i], s[i:] +// parseElem parses a dotted-path element into the corresponding yaml path element(s). +func parseElem(s string) ([]pathElem, error) { + result := []pathElem{} + elem := pathElem{kind: keyElem} + key := strings.Builder{} + + inKey := true // whether we are currently parsing a key part of the element + for i := 0; i < len(s); i++ { + c := s[i] + + switch c { + case '[': + inKey = false + + // find the closing bracket + j := i + 1 + for j < len(s) { + if s[j] == ']' { + break + } + j++ + } + + if j == len(s) { + return nil, fmt.Errorf("missing closing bracket ']' after '[': %s", s[i:]) + } + + // contents is the string between the brackets + contents := s[i+1 : j] + if contents == "" && j+1 < len(s) && s[j+1] == '?' { // empty brackets followed by '?' + elem.optionalKind = yaml.SequenceNode + i = j + 1 + continue + } + idx, err := strconv.Atoi(contents) + if err != nil || idx < 0 { + return nil, fmt.Errorf("invalid sequence index: %s in %s", contents, s[i:j+1]) + } + + switch elem.kind { + case keyElem: + elem.key = key.String() + key.Reset() + + if elem.key == "" { + return nil, fmt.Errorf("empty key in %s", s) + } + case indexElem: + // do nothing } + + result = append(result, elem) + elem = pathElem{kind: indexElem, idx: idx} + + i = j + case ']': + return nil, fmt.Errorf("unexpected closing bracket '[' before ']': %s", s[i:]) + case '?': + elem.optionalKind = yaml.MappingNode + if i != len(s)-1 { + return nil, fmt.Errorf( + "unexpected characters after optional qualifier `?`: %s: "+ + "'?' is a special character; to escape using double quotes, try \"%s\"", + s[i+1:], + s[:i]) + } + case '\\': + if i+1 < len(s) && s[i+1] == '"' { + key.WriteByte('"') + i++ + } + case '"': + // find the closing quote + j := i + 1 + for j < len(s) { + if s[j] == '\\' && j+1 < len(s) && s[j+1] == '"' { + key.WriteByte('"') + j += 2 + continue + } + + if s[j] == '"' { + break + } + key.WriteByte(s[j]) + j++ + } + + if j == len(s) { + return nil, fmt.Errorf( + "missing closing quote '\"' near %s; to escape double quotes, try adding a preceding backslash", s[i:]) + } + i = j + default: + if !inKey { + return nil, fmt.Errorf( + "unexpected characters after character `%s`: %s: "+ + "'[', ']' are special characters; to escape using double quotes, try \"%s\"", + string(s[i-1]), + s[i:], + s[:i-1]) + } + key.WriteByte(c) } } - return s, "" + if key.Len() > 0 { + elem.key = key.String() + elem.kind = keyElem + } + + if len(result) == 0 && elem.key == "" { + return nil, fmt.Errorf("empty") + } + + result = append(result, elem) + return result, nil +} + +type kind int + +const ( + keyElem kind = 1 << iota + indexElem +) + +// pathElem represents a single element in a YAML syntax tree. +// +// Each element is either a key (for a mapping node) or an index (for a sequence node). +type pathElem struct { + // kind is the kind of the current path element. + // either key or idx will be set, but not both. + kind kind + + // key is the name of the current node. + key string + // idx is index of the current sequence node. + idx int + + // optionalKind is the kind of node that will be created if not present. + // It is only used in set or append operations. + optionalKind yaml.Kind } diff --git a/cli/azd/pkg/yamlnode/yamlnode_test.go b/cli/azd/pkg/yamlnode/yamlnode_test.go index 6aeed2cf6bd..2fdcfa5a271 100644 --- a/cli/azd/pkg/yamlnode/yamlnode_test.go +++ b/cli/azd/pkg/yamlnode/yamlnode_test.go @@ -4,6 +4,7 @@ package yamlnode import ( + "reflect" "testing" "github.com/braydonk/yaml" @@ -86,6 +87,7 @@ func TestSet(t *testing.T) { {"Create array", "root.new_array", []string{"first_item"}, false}, {"Create object", "root.nested.new_object", map[string]string{"key": "value"}, false}, {"Create nested array object", "root.mixedArray[1].nestedObj.newKey", "new_deep_value", false}, + {"Create missing key", "root.nonexistent?.key", "value", false}, {"Invalid path", "root.nonexistent.key", "value", true}, {"Invalid array index", "root.array[10]", "value", true}, @@ -130,7 +132,7 @@ func TestAppend(t *testing.T) { tests := []struct { name string path string - value interface{} + value interface{} // Value to append wantErr bool checkLen int // Expected length after append }{ @@ -138,7 +140,7 @@ func TestAppend(t *testing.T) { {"Append to empty array", "root.empty", "item1", false, 1}, {"Append object to mixed array", "root.mixedArray", map[string]string{"key": "value"}, false, 4}, {"Append to nested array", "root.mixedArray[2].nestedArr", "item3", false, 3}, - + {"Append to non-existent array", "root.nonexistent[]?", "item1", false, 1}, {"Invalid path (not an array)", "root.nested.key", "invalid", true, 0}, {"Non-existent path", "root.nonexistent", "value", true, 0}, {"Invalid path format", "root.array.[1]", "invalid", true, 0}, @@ -179,6 +181,7 @@ func TestAppend(t *testing.T) { t.Errorf("Append() resulted in wrong length = %d, want %d", len(node.Content), tt.checkLen) return } + // we verify the last node matches the appended value lastNode := node.Content[len(node.Content)-1] assertNodeEquals(t, "Append()", lastNode, tt.value) } @@ -202,3 +205,137 @@ func assertNodeEquals(t *testing.T, funcName string, node *yaml.Node, expected i t.Errorf("%s = %v, want %v", funcName, string(gotStr), string(wantStr)) } } + +func TestParseElem(t *testing.T) { + tests := []struct { + name, input string + expected []pathElem + wantErr bool + }{ + {name: "key", input: "simple_key", expected: []pathElem{{kind: keyElem, key: "simple_key"}}}, + {name: "key with optional", input: "simple_key?", + expected: []pathElem{ + {kind: keyElem, key: "simple_key", optionalKind: yaml.MappingNode}, + }}, + { + name: "index", input: "key[1]", + expected: []pathElem{ + {kind: keyElem, key: "key"}, + {kind: indexElem, idx: 1}, + }, + }, + { + name: "optional sequence", input: "key[]?", + expected: []pathElem{ + {kind: keyElem, key: "key", optionalKind: yaml.SequenceNode}, + }, + }, + { + name: "multiple indices", input: "nested[1][2]", + expected: []pathElem{ + {kind: keyElem, key: "nested"}, + {kind: indexElem, idx: 1}, + {kind: indexElem, idx: 2}, + }, + }, + { + name: "quoted key", input: "\"special.[]\\\"-characters?\"", + expected: []pathElem{ + {kind: keyElem, key: "special.[]\"-characters?"}, + }, + }, + { + name: "quoted index", input: "\"special.[]\\\"-characters?\"[1]", + expected: []pathElem{ + {kind: keyElem, key: "special.[]\"-characters?"}, + {kind: indexElem, idx: 1}, + }, + }, + { + name: "quote escaping", input: "a \\\"nice\\\" key", + expected: []pathElem{ + {kind: keyElem, key: "a \"nice\" key"}, + }, + }, + // Error cases + {name: "invalid index format", input: "hello[a]", wantErr: true}, + {name: "missing closing bracket", input: "hello[1", wantErr: true}, + {name: "missing opening bracket", input: "hello]22", wantErr: true}, + {name: "empty key", input: "[1]", wantErr: true}, + {name: "empty input", input: "", wantErr: true}, + {name: "invalid characters in key", input: "hello]world[1]", wantErr: true}, + {name: "invalid characters in key", input: "hello?world[1]", wantErr: true}, + {name: "negative index", input: "array[-1]", wantErr: true}, + {name: "missing closed quotes", input: "ar\"ray[-1]", wantErr: true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := parseElem(tt.input) + + if tt.wantErr { + if err == nil { + t.Errorf("parseElem(%q) error = nil, wantErr = true", tt.input) + } + return + } + + if err != nil { + t.Errorf("parseElem(%q) unexpected error = %v", tt.input, err) + return + } + + if !reflect.DeepEqual(result, tt.expected) { + t.Errorf("parseElem(%q) = %v, want %v", tt.input, result, tt.expected) + } + }) + } +} + +func TestParsePath(t *testing.T) { + tests := []struct { + name, input string + expected []pathElem + wantErr bool + }{ + {name: "key", input: "simple_key", expected: []pathElem{{kind: keyElem, key: "simple_key"}}}, + {name: "nested key", input: "parent.nested", expected: []pathElem{ + {kind: keyElem, key: "parent"}, + {kind: keyElem, key: "nested"}, + }}, + {name: "nested array", input: "parent.nested[1].under", expected: []pathElem{ + {kind: keyElem, key: "parent"}, + {kind: keyElem, key: "nested"}, + {kind: indexElem, idx: 1}, + {kind: keyElem, key: "under"}, + }}, + {name: "more nested", input: "parent.nested[1].under?.array[]?.next", expected: []pathElem{ + {kind: keyElem, key: "parent"}, + {kind: keyElem, key: "nested"}, + {kind: indexElem, idx: 1}, + {kind: keyElem, key: "under", optionalKind: yaml.MappingNode}, + {kind: keyElem, key: "array", optionalKind: yaml.SequenceNode}, + {kind: keyElem, key: "next"}, + }}, + {name: "escaping", input: "\"start.\\\".end\"", expected: []pathElem{ + {kind: keyElem, key: "start.\".end"}, + }}, + {name: "escaping middle", input: "start.\"start.\\\".[]?end[]\".end", expected: []pathElem{ + {kind: keyElem, key: "start"}, + {kind: keyElem, key: "start.\".[]?end[]"}, + {kind: keyElem, key: "end"}, + }}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parsePath(tt.input) + if (err != nil) != tt.wantErr { + t.Errorf("parsePath() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.expected) { + t.Errorf("parsePath() = %v, want %v", got, tt.expected) + } + }) + } +}