Skip to content

Commit 7551c7e

Browse files
committed
sql: add jsonpath parser
This is a prototype for a minimal jsonpath parser. It currently supports a small set of features: setting jsonpath mode (strict/lax), root ($), key accessors (.key_name), and array wildcards ([*]). This is standalone and doesn't integrate with the database. Part of: #22513 Release note: None
1 parent a72aab1 commit 7551c7e

25 files changed

+938
-13
lines changed

BUILD.bazel

+2
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ exports_files([
124124
# gazelle:exclude pkg/sql/plpgsql/parser/lexbase/keywords.go
125125
# gazelle:exclude pkg/sql/plpgsql/parser/lexbase/tokens.go
126126
# gazelle:exclude pkg/sql/plpgsql/parser/lexbase/reserved_keywords.go
127+
# gazelle:exclude pkg/util/jsonpath/parser/lexbase/keywords.go
128+
# gazelle:exclude pkg/util/jsonpath/parser/lexbase/tokens.go
127129
# gazelle:exclude pkg/sql/scanner/token_names_test.go
128130
# gazelle:exclude pkg/sql/schemachanger/scexec/mocks_generated_test.go
129131
# gazelle:exclude pkg/cmd/prereqs/testdata

pkg/BUILD.bazel

+5
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,7 @@ ALL_TESTS = [
741741
"//pkg/util/json/tokenizer:tokenizer_test",
742742
"//pkg/util/json:json_disallowed_imports_test",
743743
"//pkg/util/json:json_test",
744+
"//pkg/util/jsonpath/parser:parser_test",
744745
"//pkg/util/limit:limit_test",
745746
"//pkg/util/log/eventpb:eventpb_test",
746747
"//pkg/util/log/logconfig:logconfig_test",
@@ -2581,6 +2582,10 @@ GO_TARGETS = [
25812582
"//pkg/util/json:json",
25822583
"//pkg/util/json:json_test",
25832584
"//pkg/util/jsonbytes:jsonbytes",
2585+
"//pkg/util/jsonpath/parser/lexbase:lexbase",
2586+
"//pkg/util/jsonpath/parser:parser",
2587+
"//pkg/util/jsonpath/parser:parser_test",
2588+
"//pkg/util/jsonpath:jsonpath",
25842589
"//pkg/util/keysutil:keysutil",
25852590
"//pkg/util/leaktest:leaktest",
25862591
"//pkg/util/limit:limit",

pkg/gen/misc.bzl

+3
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ MISC_SRCS = [
2727
"//pkg/testutils/serverutils:ts_control_forwarder_generated.go",
2828
"//pkg/util/interval/generic:example_interval_btree.go",
2929
"//pkg/util/interval/generic:example_interval_btree_test.go",
30+
"//pkg/util/jsonpath/parser/lexbase:keywords.go",
31+
"//pkg/util/jsonpath/parser/lexbase:tokens.go",
32+
"//pkg/util/jsonpath/parser:jsonpath.go",
3033
"//pkg/util/log/channel:channel_generated.go",
3134
"//pkg/util/log/eventpb/eventpbgen:log_channels_generated.go",
3235
"//pkg/util/log/eventpb:eventlog_channels_generated.go",

pkg/sql/lexbase/sql-gen.sh

+4-4
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
# included in the /LICENSE file.
77

88

9-
# This is used through bazel when generating sql.go and plpgsql.go.
10-
# Look at BUILD.bazel in pkg/sql/parser or pkg/plpgsql/parser for
11-
# usage.
9+
# This is used through bazel when generating sql.go, plpgsql.go, and jsonpath.go.
10+
# Look at BUILD.bazel in pkg/sql/parser, pkg/sql/plpgsql/parser, or
11+
# pkg/util/jsonpath/parser for usage.
1212

1313
set -euo pipefail
1414

@@ -23,7 +23,7 @@ GENYACC=$LANG-gen.y
2323
awk '{print $0")>_\\1 <union> /* <\\2> */_"}' > types_regex.tmp
2424

2525
sed -E -f types_regex.tmp < $1 | \
26-
if [ $LANG != plpgsql ] && [ $LANG != pgrepl ]; then \
26+
if [ $LANG != plpgsql ] && [ $LANG != pgrepl ] && [ $LANG != jsonpath ]; then \
2727
awk -f $3 | \
2828
sed -Ee 's,//.*$$,,g;s,/[*]([^*]|[*][^/])*[*]/, ,g;s/ +$$//g' > $GENYACC
2929
else

pkg/sql/parser/statements/BUILD.bazel

+1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@ go_library(
88
deps = [
99
"//pkg/sql/sem/plpgsqltree",
1010
"//pkg/sql/sem/tree",
11+
"//pkg/util/jsonpath",
1112
],
1213
)

pkg/sql/parser/statements/statement.go

+15
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package statements
88
import (
99
"github.com/cockroachdb/cockroach/pkg/sql/sem/plpgsqltree"
1010
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
11+
"github.com/cockroachdb/cockroach/pkg/util/jsonpath"
1112
)
1213

1314
// Statement is the result of parsing a single statement. It contains the AST
@@ -60,6 +61,8 @@ type Statements []Statement[tree.Statement]
6061

6162
type PLpgStatement Statement[*plpgsqltree.Block]
6263

64+
type JsonpathStatement Statement[*jsonpath.Jsonpath]
65+
6366
// String returns the AST formatted as a string.
6467
func (stmts Statements) String() string {
6568
return stmts.StringWithFlags(tree.FmtSimple)
@@ -88,10 +91,22 @@ func (stmt PLpgStatement) StringWithFlags(flags tree.FmtFlags) string {
8891
return ctx.CloseAndGetString()
8992
}
9093

94+
func (stmt JsonpathStatement) String() string {
95+
return stmt.StringWithFlags(tree.FmtSimple)
96+
}
97+
98+
// StringWithFlags returns the AST formatted as a string (with the given flags).
99+
func (stmt JsonpathStatement) StringWithFlags(flags tree.FmtFlags) string {
100+
ctx := tree.NewFmtCtx(flags)
101+
stmt.AST.Format(ctx)
102+
return ctx.CloseAndGetString()
103+
}
104+
91105
type ParsedStmts interface {
92106
String() string
93107
StringWithFlags(flags tree.FmtFlags) string
94108
}
95109

96110
var _ ParsedStmts = Statements{}
97111
var _ ParsedStmts = PLpgStatement{}
112+
var _ ParsedStmts = JsonpathStatement{}

pkg/sql/plpgsql/parser/BUILD.bazel

+1-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ sh_binary(
55
srcs = ["//pkg/sql/lexbase:sql-gen.sh"],
66
)
77

8-
# Define the target to auto-generate sql.go from the grammar file.
8+
# Define the target to auto-generate plpgsql.go from the grammar file.
99
genrule(
1010
name = "plpgsql-goyacc",
1111
srcs = [
@@ -16,7 +16,6 @@ genrule(
1616
export GOPATH=/nonexist-gopath
1717
$(location :plpgsql-gen) $(location plpgsql.y) plpgsql ""\
1818
$(location plpgsql.go) $(location @org_golang_x_tools//cmd/goyacc)
19-
2019
""",
2120
tools = [
2221
":plpgsql-gen",

pkg/sql/scanner/BUILD.bazel

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
33
go_library(
44
name = "scanner",
55
srcs = [
6+
"jsonpath_scan.go",
67
"plpgsql_scan.go",
78
"scan.go",
89
],
@@ -11,6 +12,7 @@ go_library(
1112
deps = [
1213
"//pkg/sql/lexbase",
1314
"//pkg/sql/plpgsql/parser/lexbase",
15+
"//pkg/util/jsonpath/parser/lexbase",
1416
],
1517
)
1618

pkg/sql/scanner/jsonpath_scan.go

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package scanner
7+
8+
import (
9+
sqllexbase "github.com/cockroachdb/cockroach/pkg/sql/lexbase"
10+
"github.com/cockroachdb/cockroach/pkg/util/jsonpath/parser/lexbase"
11+
)
12+
13+
// JSONPathScanner is a scanner with a jsonpath-specific scan function.
14+
type JSONPathScanner struct {
15+
Scanner
16+
}
17+
18+
// Scan scans the next token and populates its information into lval.
19+
// This scan function contains rules for jsonpath.
20+
func (s *JSONPathScanner) Scan(lval ScanSymType) {
21+
ch, skipWhiteSpace := s.scanSetup(lval)
22+
if skipWhiteSpace {
23+
return
24+
}
25+
26+
// TODO(normanchenn): This check will not work for valid JSONPath expressions
27+
// like '$.1key'. We don't support this case yet since expressions like
28+
// '$.1e' should fail due to being interpreted as a numeric literal.
29+
if sqllexbase.IsIdentStart(ch) {
30+
s.scanIdent(lval)
31+
return
32+
}
33+
// Everything else is a single character token which we already initialized
34+
// lval for above.
35+
}
36+
37+
// isIdentMiddle returns true if the character is valid inside an identifier.
38+
func isIdentMiddle(ch int) bool {
39+
return sqllexbase.IsIdentStart(ch) || sqllexbase.IsDigit(ch)
40+
}
41+
42+
// scanIdent is similar to Scanner.scanIdent, but uses Jsonpath tokens.
43+
func (s *JSONPathScanner) scanIdent(lval ScanSymType) {
44+
s.lowerCaseAndNormalizeIdent(lval, isIdentMiddle)
45+
lval.SetID(lexbase.GetKeywordID(lval.Str()))
46+
}

pkg/sql/scanner/plpgsql_scan.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,6 @@ func (s *PLpgSQLScanner) scanNumber(lval ScanSymType, ch int) {
433433

434434
// scanIdent is similar to Scanner.scanIdent, but uses PL/pgSQL tokens.
435435
func (s *PLpgSQLScanner) scanIdent(lval ScanSymType) {
436-
s.lowerCaseAndNormalizeIdent(lval)
436+
s.lowerCaseAndNormalizeIdent(lval, sqllex.IsIdentMiddle)
437437
lval.SetID(lexbase.GetKeywordID(lval.Str()))
438438
}

pkg/sql/scanner/scan.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,13 @@ type ScanSymType interface {
4747
SetUnionVal(interface{})
4848
}
4949

50-
// Scanner lexes SQL statements.
50+
// Scanner lexes statements.
5151
type Scanner struct {
5252
in string
5353
pos int
5454
bytesPrealloc []byte
5555

56-
// Comments is the list of parsed comments from the SQL statement.
56+
// Comments is the list of parsed comments from the statement.
5757
Comments []string
5858

5959
// lastAttemptedID indicates the ID of the last attempted
@@ -606,7 +606,7 @@ func (s *Scanner) ScanComment(lval ScanSymType) (present, ok bool) {
606606
return false, true
607607
}
608608

609-
func (s *Scanner) lowerCaseAndNormalizeIdent(lval ScanSymType) {
609+
func (s *Scanner) lowerCaseAndNormalizeIdent(lval ScanSymType, isIdentMiddle func(int) bool) {
610610
s.lastAttemptedID = int32(lexbase.IDENT)
611611
s.pos--
612612
start := s.pos
@@ -627,7 +627,7 @@ func (s *Scanner) lowerCaseAndNormalizeIdent(lval ScanSymType) {
627627
isLower = false
628628
}
629629

630-
if !lexbase.IsIdentMiddle(ch) {
630+
if !isIdentMiddle(ch) {
631631
break
632632
}
633633

@@ -656,7 +656,7 @@ func (s *Scanner) lowerCaseAndNormalizeIdent(lval ScanSymType) {
656656
}
657657

658658
func (s *Scanner) scanIdent(lval ScanSymType) {
659-
s.lowerCaseAndNormalizeIdent(lval)
659+
s.lowerCaseAndNormalizeIdent(lval, lexbase.IsIdentMiddle)
660660

661661
isExperimental := false
662662
kw := lval.Str()

pkg/sql/sem/tree/datum.go

+3
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ var (
9393
// deriving the arguments to construct a specific time.Time.
9494
MinSupportedTime = timeutil.Unix(-210866803200, 0) // 4714-11-24 00:00:00+00 BC
9595
MinSupportedTimeSec = float64(MinSupportedTime.Unix())
96+
97+
// ValidateJSONPath is injected from pkg/util/jsonpath/parser/parse.go.
98+
ValidateJSONPath func(string) (string, error)
9699
)
97100

98101
// CompareContext represents the dependencies used to evaluate comparisons

pkg/testutils/lint/lint_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1767,7 +1767,7 @@ func TestLint(t *testing.T) {
17671767
}
17681768
}
17691769

1770-
ignore := `zcgo*|\.(pb(\.gw)?)|(\.[eo]g)\.go|/testdata/|^sql/parser/sql\.go$|(_)?generated(_test)?\.go$|^sql/pgrepl/pgreplparser/pgrepl\.go$|^sql/plpgsql/parser/plpgsql\.go$`
1770+
ignore := `zcgo*|\.(pb(\.gw)?)|(\.[eo]g)\.go|/testdata/|^sql/parser/sql\.go$|(_)?generated(_test)?\.go$|^sql/pgrepl/pgreplparser/pgrepl\.go$|^sql/plpgsql/parser/plpgsql\.go$|^util/jsonpath/parser/jsonpath\.go$`
17711771
cmd, stderr, filter, err := dirCmd(pkgDir, crlfmt, "-fast", "-ignore", ignore, "-tab", "2", ".")
17721772
if err != nil {
17731773
t.Fatal(err)

pkg/util/jsonpath/BUILD.bazel

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
load("@io_bazel_rules_go//go:def.bzl", "go_library")
2+
3+
go_library(
4+
name = "jsonpath",
5+
srcs = ["expr.go"],
6+
importpath = "github.com/cockroachdb/cockroach/pkg/util/jsonpath",
7+
visibility = ["//visibility:public"],
8+
deps = ["//pkg/sql/sem/tree"],
9+
)

pkg/util/jsonpath/expr.go

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package jsonpath
7+
8+
import (
9+
"fmt"
10+
"strings"
11+
12+
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
13+
)
14+
15+
type Expr interface {
16+
fmt.Stringer
17+
tree.NodeFormatter
18+
}
19+
20+
// Identical to Expr for now.
21+
type Accessor interface {
22+
Expr
23+
}
24+
25+
type Jsonpath struct {
26+
Query Query
27+
Strict bool
28+
}
29+
30+
var _ Expr = Jsonpath{}
31+
32+
func (j Jsonpath) String() string {
33+
var mode string
34+
if j.Strict {
35+
mode = "strict "
36+
}
37+
return mode + j.Query.String()
38+
}
39+
40+
func (j Jsonpath) Format(ctx *tree.FmtCtx) {
41+
ctx.WriteString(j.String())
42+
}
43+
44+
type Query struct {
45+
Accessors []Accessor
46+
}
47+
48+
var _ Expr = Query{}
49+
50+
func (q Query) String() string {
51+
var sb strings.Builder
52+
for _, accessor := range q.Accessors {
53+
sb.WriteString(accessor.String())
54+
}
55+
return sb.String()
56+
}
57+
58+
func (q Query) Format(ctx *tree.FmtCtx) {
59+
ctx.WriteString(q.String())
60+
}
61+
62+
type Root struct{}
63+
64+
var _ Accessor = Root{}
65+
66+
func (r Root) String() string { return "$" }
67+
68+
func (r Root) Format(ctx *tree.FmtCtx) {
69+
ctx.WriteString(r.String())
70+
}
71+
72+
type Key struct {
73+
Key string
74+
}
75+
76+
var _ Accessor = Key{}
77+
78+
func (k Key) String() string { return "." + k.Key }
79+
80+
func (k Key) Format(ctx *tree.FmtCtx) {
81+
ctx.WriteString(k.String())
82+
}
83+
84+
type Wildcard struct{}
85+
86+
var _ Accessor = Wildcard{}
87+
88+
func (w Wildcard) String() string { return "[*]" }
89+
90+
func (w Wildcard) Format(ctx *tree.FmtCtx) {
91+
ctx.WriteString(w.String())
92+
}

pkg/util/jsonpath/parser/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
jsonpath.go

0 commit comments

Comments
 (0)