diff --git a/go.mod b/go.mod index 81e6db2aff..42d77332c0 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/quay/clair/v4 -go 1.21.9 +go 1.22 require ( github.com/Masterminds/semver v1.5.0 diff --git a/httptransport/error.go b/httptransport/error.go index f24a1bb110..dc2e1cfa1e 100644 --- a/httptransport/error.go +++ b/httptransport/error.go @@ -1,89 +1,40 @@ package httptransport import ( - "bytes" "context" - "encoding/json" - "errors" "fmt" "net/http" - "github.com/quay/zlog" + "github.com/quay/clair/v4/httptransport/internal/details" ) -// StatusClientClosedRequest is a nonstandard HTTP status code used when the -// client has gone away. -// -// This convention is cribbed from Nginx. -const statusClientClosedRequest = 499 - -// ApiError writes an untyped (that is, "application/json") error with the -// provided HTTP status code and message. +// ApiError writes an error with the provided HTTP status code and message. // // ApiError does not return, but instead causes the goroutine to exit. +// +// Deprecated: This is implemented via [details.Error], which provides a +// richer API. func apiError(ctx context.Context, w http.ResponseWriter, code int, f string, v ...interface{}) { - const errheader = `Clair-Error` - disconnect := false - select { - case <-ctx.Done(): - disconnect = true - default: - } - if ev := zlog.Debug(ctx); ev.Enabled() { - ev. - Bool("disconnect", disconnect). - Int("code", code). - Str("error", fmt.Sprintf(f, v...)). - Msg("http error response") - } else { - ev.Send() - } - if disconnect { - // Exit immediately if there's no client to read the response, anyway. - w.WriteHeader(statusClientClosedRequest) - panic(http.ErrAbortHandler) + err := genericError{ + status: code, + err: fmt.Errorf(f, v...), } + details.Error(ctx, w, &err) +} + +type genericError struct { + status int + err error +} - h := w.Header() - h.Del("link") - h.Set("content-type", "application/json") - h.Set("x-content-type-options", "nosniff") - h.Set("trailer", errheader) - w.WriteHeader(code) +func (e *genericError) Error() string { + return e.err.Error() +} - var buf bytes.Buffer - buf.WriteString(`{"code":"`) - switch code { - case http.StatusBadRequest: - buf.WriteString("bad-request") - case http.StatusMethodNotAllowed: - buf.WriteString("method-not-allowed") - case http.StatusNotFound: - buf.WriteString("not-found") - case http.StatusTooManyRequests: - buf.WriteString("too-many-requests") - default: - buf.WriteString("internal-error") - } - buf.WriteByte('"') - if f != "" { - buf.WriteString(`,"message":`) - b, _ := json.Marshal(fmt.Sprintf(f, v...)) // OK use of encoding/json. - buf.Write(b) - } - buf.WriteByte('}') +func (e *genericError) Unwrap() error { + return e.err +} - if _, err := buf.WriteTo(w); err != nil { - h.Set(errheader, err.Error()) - } - switch err := http.NewResponseController(w).Flush(); { - case errors.Is(err, nil): - case errors.Is(err, http.ErrNotSupported): - // Skip - default: - zlog.Warn(ctx). - Err(err). - Msg("unable to flush http response") - } - panic(http.ErrAbortHandler) +func (e *genericError) ErrorStatus() int { + return e.status } diff --git a/httptransport/error_test.go b/httptransport/error_test.go index 3442baff88..233ed79a57 100644 --- a/httptransport/error_test.go +++ b/httptransport/error_test.go @@ -52,7 +52,7 @@ func TestClientDisconnect(t *testing.T) { } <-handlerDone - if got, want := status, statusClientClosedRequest; got != want { + if got, want := status, 499; got != want { t.Errorf("bad status code recorded: got: %d, want: %d", got, want) } } diff --git a/httptransport/internal/details/details.go b/httptransport/internal/details/details.go new file mode 100644 index 0000000000..0381770ec1 --- /dev/null +++ b/httptransport/internal/details/details.go @@ -0,0 +1,198 @@ +// Package details contains helpers for implementing [RFC 9457], "Problem Details +// for HTTP APIs." +// +// See the documentation on [Error] for how keys in the response are +// constructed. +// +// [RFC 9457]: https://datatracker.ietf.org/doc/html/rfc9457 +package details + +import ( + "context" + "errors" + "io" + "net/http" + "sync" + + "github.com/quay/zlog" + + "github.com/quay/clair/v4/internal/json" + "github.com/quay/clair/v4/internal/json/jsontext" +) + +// StatusClientClosedRequest is a nonstandard HTTP status code used when the +// client has gone away. +// +// This convention is cribbed from Nginx. +const StatusClientClosedRequest = 499 + +// ErrorTrailer contains errors encountered while writing the error response, if +// any. +const ErrorTrailer = `Clair-Error` + +// Default JSON encoding options. +var opts = json.JoinOptions(json.DefaultOptionsV2()) + +// Pool of JSON encoders. +var encPool = sync.Pool{ + New: func() any { return jsontext.NewEncoder(io.Discard) }, +} + +// Error constructs and sends a problem detail response, then causes the +// goroutine to panic with [http.ErrAbortHandler]. Well-written handlers should +// be structured to clean up or record events correctly in this instance. +// +// To customize the returned problem detail response, the error provided to this +// function can provide any combination of the following methods: +// +// - ErrorStatus() int +// - ErrorType() string +// - ErrorTitle() string +// - ErrorDetail() string +// - ErrorInstance() string +// - ErrorExtension() map[string]any +// +// The ErrorStatus method is always consulted and used for the HTTP response +// code if present, otherwise [http.StatusInternalServerError] is used. All +// other methods are used if present. If ErrorDetail is not provided, the value +// of the Error method will be used instead. +// +// These methods correspond to the keys defined in RFC 9457, and so should +// follow the guidance there. This means that the values returned by ErrorType +// and ErrorInstance should be URIs if possible. +func Error(ctx context.Context, w http.ResponseWriter, err error) { + disconnect := false + code := http.StatusInternalServerError + select { + case <-ctx.Done(): + disconnect = true + default: + } + // Emit the log line in the defer path. + defer func() { + // If the client has disconnected, this will show up as a + // `disconnect=true, code=NNN` pair here and `code=499` in other HTTP + // metrics. + zlog.Debug(ctx). + Bool("disconnect", disconnect). + Int("code", code). + AnErr("error", err). + Msg("http error response") + }() + + // Always check for the status code. + if i, ok := err.(errStatus); ok { + code = i.ErrorStatus() + } + // Exit immediately if there's no client to read the response. + if disconnect { + w.WriteHeader(StatusClientClosedRequest) + panic(http.ErrAbortHandler) + } + + // The client is connected and presumably wants the error; configure the + // response headers. + h := w.Header() + h.Del("link") + h.Set("content-type", "application/problem+json") + h.Set("x-content-type-options", "nosniff") + h.Set("trailer", ErrorTrailer) + w.WriteHeader(code) + + enc := encPool.Get().(*jsontext.Encoder) + defer func() { encPool.Put(enc) }() + enc.Reset(w, opts) + + // Construct and write the details object in one pass. + wErr := func() error { + if err := enc.WriteToken(jsontext.ObjectStart); err != nil { + return err + } + + var et errType + if errors.As(err, &et) { + if err := errors.Join( + enc.WriteValue(jsontext.Value(`"type"`)), enc.WriteToken(jsontext.String(et.ErrorType())), + ); err != nil { + return err + } + } + var eti errTitle + if errors.As(err, &eti) { + if err := errors.Join( + enc.WriteValue(jsontext.Value(`"title"`)), enc.WriteToken(jsontext.String(eti.ErrorTitle())), + ); err != nil { + return err + } + } + + var detail string + var ed errDetail + if errors.As(err, &ed) { + detail = ed.ErrorDetail() + } else { + detail = err.Error() + } + if err := errors.Join( + enc.WriteValue(jsontext.Value(`"detail"`)), enc.WriteToken(jsontext.String(detail)), + ); err != nil { + return err + } + + var ei errInstance + if errors.As(err, &ei) { + if err := errors.Join( + enc.WriteValue(jsontext.Value(`"instance"`)), enc.WriteToken(jsontext.String(ei.ErrorInstance())), + ); err != nil { + return err + } + } + + var ee errExtension + if errors.As(err, &ee) { + for k, v := range ee.ErrorExtension() { + if err := errors.Join( + enc.WriteToken(jsontext.String(k)), json.MarshalEncode(enc, v, opts), + ); err != nil { + return err + } + } + } + + return enc.WriteToken(jsontext.ObjectEnd) + }() + if wErr != nil { + h.Set(ErrorTrailer, wErr.Error()) + } + + switch err := http.NewResponseController(w).Flush(); { + case errors.Is(err, nil): + case errors.Is(err, http.ErrNotSupported): + // Skip + default: + zlog.Warn(ctx). + Err(err). + Msg("unable to flush http response") + } + + panic(http.ErrAbortHandler) +} + +type errType interface { + ErrorType() string +} +type errStatus interface { + ErrorStatus() int +} +type errTitle interface { + ErrorTitle() string +} +type errDetail interface { + ErrorDetail() string +} +type errInstance interface { + ErrorInstance() string +} +type errExtension interface { + ErrorExtension() map[string]any +} diff --git a/internal/json/bundle.go b/internal/json/bundle.go new file mode 100644 index 0000000000..0e2dfa382b --- /dev/null +++ b/internal/json/bundle.go @@ -0,0 +1,5077 @@ +// Code generated by golang.org/x/tools/cmd/bundle. DO NOT EDIT. +// $ bundle -prefix -import=github.com/go-json-experiment/json/internal/jsonflags=github.com/quay/clair/v4/internal/json/internal/jsonflags -import=github.com/go-json-experiment/json/internal/jsonopts=github.com/quay/clair/v4/internal/json/internal/jsonopts -import=github.com/go-json-experiment/json/internal/jsonwire=github.com/quay/clair/v4/internal/json/internal/jsonwire -import=github.com/go-json-experiment/json/internal=github.com/quay/clair/v4/internal/json/internal -import=github.com/go-json-experiment/json/jsontext=github.com/quay/clair/v4/internal/json/jsontext -import=github.com/go-json-experiment/json=github.com/quay/clair/v4/internal/json -dst github.com/quay/clair/v4/internal/json github.com/go-json-experiment/json + +// Package json implements semantic processing of JSON as specified in RFC 8259. +// JSON is a simple data interchange format that can represent +// primitive data types such as booleans, strings, and numbers, +// in addition to structured data types such as objects and arrays. +// +// [Marshal] and [Unmarshal] encode and decode Go values +// to/from JSON text contained within a []byte. +// [MarshalWrite] and [UnmarshalRead] operate on JSON text +// by writing to or reading from an [io.Writer] or [io.Reader]. +// [MarshalEncode] and [UnmarshalDecode] operate on JSON text +// by encoding to or decoding from a [jsontext.Encoder] or [jsontext.Decoder]. +// [Options] may be passed to each of the marshal or unmarshal functions +// to configure the semantic behavior of marshaling and unmarshaling +// (i.e., alter how JSON data is understood as Go data and vice versa). +// [jsontext.Options] may also be passed to the marshal or unmarshal functions +// to configure the syntactic behavior of encoding or decoding. +// +// The data types of JSON are mapped to/from the data types of Go based on +// the closest logical equivalent between the two type systems. For example, +// a JSON boolean corresponds with a Go bool, +// a JSON string corresponds with a Go string, +// a JSON number corresponds with a Go int, uint or float, +// a JSON array corresponds with a Go slice or array, and +// a JSON object corresponds with a Go struct or map. +// See the documentation on [Marshal] and [Unmarshal] for a comprehensive list +// of how the JSON and Go type systems correspond. +// +// Arbitrary Go types can customize their JSON representation by implementing +// [MarshalerV1], [MarshalerV2], [UnmarshalerV1], or [UnmarshalerV2]. +// This provides authors of Go types with control over how their types are +// serialized as JSON. Alternatively, users can implement functions that match +// [MarshalFuncV1], [MarshalFuncV2], [UnmarshalFuncV1], or [UnmarshalFuncV2] +// to specify the JSON representation for arbitrary types. +// This provides callers of JSON functionality with control over +// how any arbitrary type is serialized as JSON. +// +// # JSON Representation of Go structs +// +// A Go struct is naturally represented as a JSON object, +// where each Go struct field corresponds with a JSON object member. +// When marshaling, all Go struct fields are recursively encoded in depth-first +// order as JSON object members except those that are ignored or omitted. +// When unmarshaling, JSON object members are recursively decoded +// into the corresponding Go struct fields. +// Object members that do not match any struct fields, +// also known as “unknown members”, are ignored by default or rejected +// if [RejectUnknownMembers] is specified. +// +// The representation of each struct field can be customized in the +// "json" struct field tag, where the tag is a comma separated list of options. +// As a special case, if the entire tag is `json:"-"`, +// then the field is ignored with regard to its JSON representation. +// +// The first option is the JSON object name override for the Go struct field. +// If the name is not specified, then the Go struct field name +// is used as the JSON object name. JSON names containing commas or quotes, +// or names identical to "" or "-", can be specified using +// a single-quoted string literal, where the syntax is identical to +// the Go grammar for a double-quoted string literal, +// but instead uses single quotes as the delimiters. +// By default, unmarshaling uses case-sensitive matching to identify +// the Go struct field associated with a JSON object name. +// +// After the name, the following tag options are supported: +// +// - omitzero: When marshaling, the "omitzero" option specifies that +// the struct field should be omitted if the field value is zero +// as determined by the "IsZero() bool" method if present, +// otherwise based on whether the field is the zero Go value. +// This option has no effect when unmarshaling. +// +// - omitempty: When marshaling, the "omitempty" option specifies that +// the struct field should be omitted if the field value would have been +// encoded as a JSON null, empty string, empty object, or empty array. +// This option has no effect when unmarshaling. +// +// - string: The "string" option specifies that [StringifyNumbers] +// be set when marshaling or unmarshaling a struct field value. +// This causes numeric types to be encoded as a JSON number +// within a JSON string, and to be decoded from either a JSON number or +// a JSON string containing a JSON number. +// This extra level of encoding is often necessary since +// many JSON parsers cannot precisely represent 64-bit integers. +// +// - nocase: When unmarshaling, the "nocase" option specifies that +// if the JSON object name does not exactly match the JSON name +// for any of the struct fields, then it attempts to match the struct field +// using a case-insensitive match that also ignores dashes and underscores. +// If multiple fields match, +// the first declared field in breadth-first order takes precedence. +// This takes precedence even if [MatchCaseInsensitiveNames] is set to false. +// This cannot be specified together with the "strictcase" option. +// +// - strictcase: When unmarshaling, the "strictcase" option specifies that the +// JSON object name must exactly match the JSON name for the struct field. +// This takes precedence even if [MatchCaseInsensitiveNames] is set to true. +// This cannot be specified together with the "nocase" option. +// +// - inline: The "inline" option specifies that +// the JSON representable content of this field type is to be promoted +// as if they were specified in the parent struct. +// It is the JSON equivalent of Go struct embedding. +// A Go embedded field is implicitly inlined unless an explicit JSON name +// is specified. The inlined field must be a Go struct +// (that does not implement any JSON methods), [jsontext.Value], +// map[string]T, or an unnamed pointer to such types. When marshaling, +// inlined fields from a pointer type are omitted if it is nil. +// Inlined fields of type [jsontext.Value] and map[string]T are called +// “inlined fallbacks” as they can represent all possible +// JSON object members not directly handled by the parent struct. +// Only one inlined fallback field may be specified in a struct, +// while many non-fallback fields may be specified. This option +// must not be specified with any other option (including the JSON name). +// +// - unknown: The "unknown" option is a specialized variant +// of the inlined fallback to indicate that this Go struct field +// contains any number of unknown JSON object members. The field type must +// be a [jsontext.Value], map[string]T, or an unnamed pointer to such types. +// If [DiscardUnknownMembers] is specified when marshaling, +// the contents of this field are ignored. +// If [RejectUnknownMembers] is specified when unmarshaling, +// any unknown object members are rejected regardless of whether +// an inlined fallback with the "unknown" option exists. This option +// must not be specified with any other option (including the JSON name). +// +// - format: The "format" option specifies a format flag +// used to specialize the formatting of the field value. +// The option is a key-value pair specified as "format:value" where +// the value must be either a literal consisting of letters and numbers +// (e.g., "format:RFC3339") or a single-quoted string literal +// (e.g., "format:'2006-01-02'"). The interpretation of the format flag +// is determined by the struct field type. +// +// The "omitzero" and "omitempty" options are mostly semantically identical. +// The former is defined in terms of the Go type system, +// while the latter in terms of the JSON type system. +// Consequently they behave differently in some circumstances. +// For example, only a nil slice or map is omitted under "omitzero", while +// an empty slice or map is omitted under "omitempty" regardless of nilness. +// The "omitzero" option is useful for types with a well-defined zero value +// (e.g., [net/netip.Addr]) or have an IsZero method (e.g., [time.Time.IsZero]). +// +// Every Go struct corresponds to a list of JSON representable fields +// which is constructed by performing a breadth-first search over +// all struct fields (excluding unexported or ignored fields), +// where the search recursively descends into inlined structs. +// The set of non-inlined fields in a struct must have unique JSON names. +// If multiple fields all have the same JSON name, then the one +// at shallowest depth takes precedence and the other fields at deeper depths +// are excluded from the list of JSON representable fields. +// If multiple fields at the shallowest depth have the same JSON name, +// but exactly one is explicitly tagged with a JSON name, +// then that field takes precedence and all others are excluded from the list. +// This is analogous to Go visibility rules for struct field selection +// with embedded struct types. +// +// Marshaling or unmarshaling a non-empty struct +// without any JSON representable fields results in a [SemanticError]. +// Unexported fields must not have any `json` tags except for `json:"-"`. +// + +package json + +import ( + "bytes" + "cmp" + "encoding" + "encoding/base32" + "encoding/base64" + "encoding/binary" + "encoding/hex" + "errors" + "fmt" + "io" + "math" + "math/bits" + "reflect" + "slices" + "strconv" + "strings" + "sync" + "time" + "unicode" + "unicode/utf8" + + "github.com/quay/clair/v4/internal/json/internal" + "github.com/quay/clair/v4/internal/json/internal/jsonflags" + "github.com/quay/clair/v4/internal/json/internal/jsonopts" + "github.com/quay/clair/v4/internal/json/internal/jsonwire" + "github.com/quay/clair/v4/internal/json/jsontext" +) + +// export exposes internal functionality of the "jsontext" package. +var export = jsontext.Internal.Export(&internal.AllowInternalUse) + +var structOptionsPool = &sync.Pool{New: func() any { return new(jsonopts.Struct) }} + +func getStructOptions() *jsonopts.Struct { + return structOptionsPool.Get().(*jsonopts.Struct) +} + +func putStructOptions(o *jsonopts.Struct) { + *o = jsonopts.Struct{} + structOptionsPool.Put(o) +} + +// Marshal serializes a Go value as a []byte according to the provided +// marshal and encode options (while ignoring unmarshal or decode options). +// It does not terminate the output with a newline. +// +// Type-specific marshal functions and methods take precedence +// over the default representation of a value. +// Functions or methods that operate on *T are only called when encoding +// a value of type T (by taking its address) or a non-nil value of *T. +// Marshal ensures that a value is always addressable +// (by boxing it on the heap if necessary) so that +// these functions and methods can be consistently called. For performance, +// it is recommended that Marshal be passed a non-nil pointer to the value. +// +// The input value is encoded as JSON according the following rules: +// +// - If any type-specific functions in a [WithMarshalers] option match +// the value type, then those functions are called to encode the value. +// If all applicable functions return [SkipFunc], +// then the value is encoded according to subsequent rules. +// +// - If the value type implements [MarshalerV2], +// then the MarshalJSONV2 method is called to encode the value. +// +// - If the value type implements [MarshalerV1], +// then the MarshalJSON method is called to encode the value. +// +// - If the value type implements [encoding.TextMarshaler], +// then the MarshalText method is called to encode the value and +// subsequently encode its result as a JSON string. +// +// - Otherwise, the value is encoded according to the value's type +// as described in detail below. +// +// Most Go types have a default JSON representation. +// Certain types support specialized formatting according to +// a format flag optionally specified in the Go struct tag +// for the struct field that contains the current value +// (see the “JSON Representation of Go structs” section for more details). +// +// The representation of each type is as follows: +// +// - A Go boolean is encoded as a JSON boolean (e.g., true or false). +// It does not support any custom format flags. +// +// - A Go string is encoded as a JSON string. +// It does not support any custom format flags. +// +// - A Go []byte or [N]byte is encoded as a JSON string containing +// the binary value encoded using RFC 4648. +// If the format is "base64" or unspecified, then this uses RFC 4648, section 4. +// If the format is "base64url", then this uses RFC 4648, section 5. +// If the format is "base32", then this uses RFC 4648, section 6. +// If the format is "base32hex", then this uses RFC 4648, section 7. +// If the format is "base16" or "hex", then this uses RFC 4648, section 8. +// If the format is "array", then the bytes value is encoded as a JSON array +// where each byte is recursively JSON-encoded as each JSON array element. +// +// - A Go integer is encoded as a JSON number without fractions or exponents. +// If [StringifyNumbers] is specified, then the JSON number is +// encoded within a JSON string. It does not support any custom format flags. +// +// - A Go float is encoded as a JSON number. +// If [StringifyNumbers] is specified, +// then the JSON number is encoded within a JSON string. +// If the format is "nonfinite", then NaN, +Inf, and -Inf are encoded as +// the JSON strings "NaN", "Infinity", and "-Infinity", respectively. +// Otherwise, the presence of non-finite numbers results in a [SemanticError]. +// +// - A Go map is encoded as a JSON object, where each Go map key and value +// is recursively encoded as a name and value pair in the JSON object. +// The Go map key must encode as a JSON string, otherwise this results +// in a [SemanticError]. When encoding keys, [StringifyNumbers] +// is automatically applied so that numeric keys encode as JSON strings. +// The Go map is traversed in a non-deterministic order. +// For deterministic encoding, consider using [jsontext.Value.Canonicalize]. +// If the format is "emitnull", then a nil map is encoded as a JSON null. +// If the format is "emitempty", then a nil map is encoded as an empty JSON object, +// regardless of whether [FormatNilMapAsNull] is specified. +// Otherwise by default, a nil map is encoded as an empty JSON object. +// +// - A Go struct is encoded as a JSON object. +// See the “JSON Representation of Go structs” section +// in the package-level documentation for more details. +// +// - A Go slice is encoded as a JSON array, where each Go slice element +// is recursively JSON-encoded as the elements of the JSON array. +// If the format is "emitnull", then a nil slice is encoded as a JSON null. +// If the format is "emitempty", then a nil slice is encoded as an empty JSON array, +// regardless of whether [FormatNilSliceAsNull] is specified. +// Otherwise by default, a nil slice is encoded as an empty JSON array. +// +// - A Go array is encoded as a JSON array, where each Go array element +// is recursively JSON-encoded as the elements of the JSON array. +// The JSON array length is always identical to the Go array length. +// It does not support any custom format flags. +// +// - A Go pointer is encoded as a JSON null if nil, otherwise it is +// the recursively JSON-encoded representation of the underlying value. +// Format flags are forwarded to the encoding of the underlying value. +// +// - A Go interface is encoded as a JSON null if nil, otherwise it is +// the recursively JSON-encoded representation of the underlying value. +// It does not support any custom format flags. +// +// - A Go [time.Time] is encoded as a JSON string containing the timestamp +// formatted in RFC 3339 with nanosecond precision. +// If the format matches one of the format constants declared +// in the time package (e.g., RFC1123), then that format is used. +// If the format is "unix", "unixmilli", "unixmicro", or "unixnano", +// then the timestamp is encoded as a JSON number of the number of seconds +// (or milliseconds, microseconds, or nanoseconds) since the Unix epoch, +// which is January 1st, 1970 at 00:00:00 UTC. +// Otherwise, the format is used as-is with [time.Time.Format] if non-empty. +// +// - A Go [time.Duration] is encoded as a JSON string containing the duration +// formatted according to [time.Duration.String]. +// If the format is "sec", "milli", "micro", or "nano", +// then the duration is encoded as a JSON number of the number of seconds +// (or milliseconds, microseconds, or nanoseconds) in the duration. +// If the format is "base60", it is encoded as a JSON string +// using the "H:MM:SS.SSSSSSSSS" representation. +// If the format is "units", it uses [time.Duration.String]. +// +// - All other Go types (e.g., complex numbers, channels, and functions) +// have no default representation and result in a [SemanticError]. +// +// JSON cannot represent cyclic data structures and Marshal does not handle them. +// Passing cyclic structures will result in an error. +func Marshal(in any, opts ...Options) (out []byte, err error) { + enc := export.GetBufferedEncoder(opts...) + defer export.PutBufferedEncoder(enc) + xe := export.Encoder(enc) + xe.Flags.Set(jsonflags.OmitTopLevelNewline | 1) + err = marshalEncode(enc, in, &xe.Struct) + return bytes.Clone(xe.Buf), err +} + +// MarshalWrite serializes a Go value into an [io.Writer] according to the provided +// marshal and encode options (while ignoring unmarshal or decode options). +// It does not terminate the output with a newline. +// See [Marshal] for details about the conversion of a Go value into JSON. +func MarshalWrite(out io.Writer, in any, opts ...Options) (err error) { + enc := export.GetStreamingEncoder(out, opts...) + defer export.PutStreamingEncoder(enc) + xe := export.Encoder(enc) + xe.Flags.Set(jsonflags.OmitTopLevelNewline | 1) + return marshalEncode(enc, in, &xe.Struct) +} + +// MarshalEncode serializes a Go value into an [jsontext.Encoder] according to +// the provided marshal options (while ignoring unmarshal, encode, or decode options). +// Unlike [Marshal] and [MarshalWrite], encode options are ignored because +// they must have already been specified on the provided [jsontext.Encoder]. +// See [Marshal] for details about the conversion of a Go value into JSON. +func MarshalEncode(out *jsontext.Encoder, in any, opts ...Options) (err error) { + mo := getStructOptions() + defer putStructOptions(mo) + mo.Join(opts...) + xe := export.Encoder(out) + mo.CopyCoderOptions(&xe.Struct) + return marshalEncode(out, in, mo) +} + +func marshalEncode(out *jsontext.Encoder, in any, mo *jsonopts.Struct) (err error) { + v := reflect.ValueOf(in) + if !v.IsValid() || (v.Kind() == reflect.Pointer && v.IsNil()) { + return out.WriteToken(jsontext.Null) + } + // Shallow copy non-pointer values to obtain an addressable value. + // It is beneficial to performance to always pass pointers to avoid this. + if v.Kind() != reflect.Pointer { + v2 := reflect.New(v.Type()) + v2.Elem().Set(v) + v = v2 + } + va := addressableValue{v.Elem()} // dereferenced pointer is always addressable + t := va.Type() + + // Lookup and call the marshal function for this type. + marshal := lookupArshaler(t).marshal + if mo.Marshalers != nil { + marshal, _ = mo.Marshalers.(*Marshalers).lookup(marshal, t) + } + if err := marshal(out, va, mo); err != nil { + xe := export.Encoder(out) + if !xe.Flags.Get(jsonflags.AllowDuplicateNames) { + xe.Tokens.InvalidateDisabledNamespaces() + } + return err + } + return nil +} + +// Unmarshal decodes a []byte input into a Go value according to the provided +// unmarshal and decode options (while ignoring marshal or encode options). +// The input must be a single JSON value with optional whitespace interspersed. +// The output must be a non-nil pointer. +// +// Type-specific unmarshal functions and methods take precedence +// over the default representation of a value. +// Functions or methods that operate on *T are only called when decoding +// a value of type T (by taking its address) or a non-nil value of *T. +// Unmarshal ensures that a value is always addressable +// (by boxing it on the heap if necessary) so that +// these functions and methods can be consistently called. +// +// The input is decoded into the output according the following rules: +// +// - If any type-specific functions in a [WithUnmarshalers] option match +// the value type, then those functions are called to decode the JSON +// value. If all applicable functions return [SkipFunc], +// then the input is decoded according to subsequent rules. +// +// - If the value type implements [UnmarshalerV2], +// then the UnmarshalJSONV2 method is called to decode the JSON value. +// +// - If the value type implements [UnmarshalerV1], +// then the UnmarshalJSON method is called to decode the JSON value. +// +// - If the value type implements [encoding.TextUnmarshaler], +// then the input is decoded as a JSON string and +// the UnmarshalText method is called with the decoded string value. +// This fails with a [SemanticError] if the input is not a JSON string. +// +// - Otherwise, the JSON value is decoded according to the value's type +// as described in detail below. +// +// Most Go types have a default JSON representation. +// Certain types support specialized formatting according to +// a format flag optionally specified in the Go struct tag +// for the struct field that contains the current value +// (see the “JSON Representation of Go structs” section for more details). +// A JSON null may be decoded into every supported Go value where +// it is equivalent to storing the zero value of the Go value. +// If the input JSON kind is not handled by the current Go value type, +// then this fails with a [SemanticError]. Unless otherwise specified, +// the decoded value replaces any pre-existing value. +// +// The representation of each type is as follows: +// +// - A Go boolean is decoded from a JSON boolean (e.g., true or false). +// It does not support any custom format flags. +// +// - A Go string is decoded from a JSON string. +// It does not support any custom format flags. +// +// - A Go []byte or [N]byte is decoded from a JSON string +// containing the binary value encoded using RFC 4648. +// If the format is "base64" or unspecified, then this uses RFC 4648, section 4. +// If the format is "base64url", then this uses RFC 4648, section 5. +// If the format is "base32", then this uses RFC 4648, section 6. +// If the format is "base32hex", then this uses RFC 4648, section 7. +// If the format is "base16" or "hex", then this uses RFC 4648, section 8. +// If the format is "array", then the Go slice or array is decoded from a +// JSON array where each JSON element is recursively decoded for each byte. +// When decoding into a non-nil []byte, the slice length is reset to zero +// and the decoded input is appended to it. +// When decoding into a [N]byte, the input must decode to exactly N bytes, +// otherwise it fails with a [SemanticError]. +// +// - A Go integer is decoded from a JSON number. +// It may also be decoded from a JSON string containing a JSON number +// if [StringifyNumbers] is specified. +// It fails with a [SemanticError] if the JSON number +// has a fractional or exponent component. +// It also fails if it overflows the representation of the Go integer type. +// It does not support any custom format flags. +// +// - A Go float is decoded from a JSON number. +// It may also be decoded from a JSON string containing a JSON number +// if [StringifyNumbers] is specified. +// The JSON number is parsed as the closest representable Go float value. +// If the format is "nonfinite", then the JSON strings +// "NaN", "Infinity", and "-Infinity" are decoded as NaN, +Inf, and -Inf. +// Otherwise, the presence of such strings results in a [SemanticError]. +// +// - A Go map is decoded from a JSON object, +// where each JSON object name and value pair is recursively decoded +// as the Go map key and value. When decoding keys, +// [StringifyNumbers] is automatically applied so that +// numeric keys can decode from JSON strings. Maps are not cleared. +// If the Go map is nil, then a new map is allocated to decode into. +// If the decoded key matches an existing Go map entry, the entry value +// is reused by decoding the JSON object value into it. +// The formats "emitnull" and "emitempty" have no effect when decoding. +// +// - A Go struct is decoded from a JSON object. +// See the “JSON Representation of Go structs” section +// in the package-level documentation for more details. +// +// - A Go slice is decoded from a JSON array, where each JSON element +// is recursively decoded and appended to the Go slice. +// Before appending into a Go slice, a new slice is allocated if it is nil, +// otherwise the slice length is reset to zero. +// The formats "emitnull" and "emitempty" have no effect when decoding. +// +// - A Go array is decoded from a JSON array, where each JSON array element +// is recursively decoded as each corresponding Go array element. +// Each Go array element is zeroed before decoding into it. +// It fails with a [SemanticError] if the JSON array does not contain +// the exact same number of elements as the Go array. +// It does not support any custom format flags. +// +// - A Go pointer is decoded based on the JSON kind and underlying Go type. +// If the input is a JSON null, then this stores a nil pointer. +// Otherwise, it allocates a new underlying value if the pointer is nil, +// and recursively JSON decodes into the underlying value. +// Format flags are forwarded to the decoding of the underlying type. +// +// - A Go interface is decoded based on the JSON kind and underlying Go type. +// If the input is a JSON null, then this stores a nil interface value. +// Otherwise, a nil interface value of an empty interface type is initialized +// with a zero Go bool, string, float64, map[string]any, or []any if the +// input is a JSON boolean, string, number, object, or array, respectively. +// If the interface value is still nil, then this fails with a [SemanticError] +// since decoding could not determine an appropriate Go type to decode into. +// For example, unmarshaling into a nil io.Reader fails since +// there is no concrete type to populate the interface value with. +// Otherwise an underlying value exists and it recursively decodes +// the JSON input into it. It does not support any custom format flags. +// +// - A Go [time.Time] is decoded from a JSON string containing the time +// formatted in RFC 3339 with nanosecond precision. +// If the format matches one of the format constants declared in +// the time package (e.g., RFC1123), then that format is used for parsing. +// If the format is "unix", "unixmilli", "unixmicro", or "unixnano", +// then the timestamp is decoded from a JSON number of the number of seconds +// (or milliseconds, microseconds, or nanoseconds) since the Unix epoch, +// which is January 1st, 1970 at 00:00:00 UTC. +// Otherwise, the format is used as-is with [time.Time.Parse] if non-empty. +// +// - A Go [time.Duration] is decoded from a JSON string by +// passing the decoded string to [time.ParseDuration]. +// If the format is "sec", "milli", "micro", or "nano", +// then the duration is decoded from a JSON number of the number of seconds +// (or milliseconds, microseconds, or nanoseconds) in the duration. +// If the format is "base60", it is decoded from a JSON string +// using the "H:MM:SS.SSSSSSSSS" representation. +// If the format is "units", it uses [time.ParseDuration]. +// +// - All other Go types (e.g., complex numbers, channels, and functions) +// have no default representation and result in a [SemanticError]. +// +// In general, unmarshaling follows merge semantics (similar to RFC 7396) +// where the decoded Go value replaces the destination value +// for any JSON kind other than an object. +// For JSON objects, the input object is merged into the destination value +// where matching object members recursively apply merge semantics. +func Unmarshal(in []byte, out any, opts ...Options) (err error) { + dec := export.GetBufferedDecoder(in, opts...) + defer export.PutBufferedDecoder(dec) + xd := export.Decoder(dec) + return unmarshalFull(dec, out, &xd.Struct) +} + +// UnmarshalRead deserializes a Go value from an [io.Reader] according to the +// provided unmarshal and decode options (while ignoring marshal or encode options). +// The input must be a single JSON value with optional whitespace interspersed. +// It consumes the entirety of [io.Reader] until [io.EOF] is encountered, +// without reporting an error for EOF. The output must be a non-nil pointer. +// See [Unmarshal] for details about the conversion of JSON into a Go value. +func UnmarshalRead(in io.Reader, out any, opts ...Options) (err error) { + dec := export.GetStreamingDecoder(in, opts...) + defer export.PutStreamingDecoder(dec) + xd := export.Decoder(dec) + return unmarshalFull(dec, out, &xd.Struct) +} + +func unmarshalFull(in *jsontext.Decoder, out any, uo *jsonopts.Struct) error { + switch err := unmarshalDecode(in, out, uo); err { + case nil: + return export.Decoder(in).CheckEOF() + case io.EOF: + return io.ErrUnexpectedEOF + default: + return err + } +} + +// UnmarshalDecode deserializes a Go value from a [jsontext.Decoder] according to +// the provided unmarshal options (while ignoring marshal, encode, or decode options). +// Unlike [Unmarshal] and [UnmarshalRead], decode options are ignored because +// they must have already been specified on the provided [jsontext.Decoder]. +// The input may be a stream of one or more JSON values, +// where this only unmarshals the next JSON value in the stream. +// The output must be a non-nil pointer. +// See [Unmarshal] for details about the conversion of JSON into a Go value. +func UnmarshalDecode(in *jsontext.Decoder, out any, opts ...Options) (err error) { + uo := getStructOptions() + defer putStructOptions(uo) + uo.Join(opts...) + xd := export.Decoder(in) + uo.CopyCoderOptions(&xd.Struct) + return unmarshalDecode(in, out, uo) +} + +func unmarshalDecode(in *jsontext.Decoder, out any, uo *jsonopts.Struct) (err error) { + v := reflect.ValueOf(out) + if !v.IsValid() || v.Kind() != reflect.Pointer || v.IsNil() { + var t reflect.Type + if v.IsValid() { + t = v.Type() + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + } + err := errors.New("value must be passed as a non-nil pointer reference") + return &SemanticError{action: "unmarshal", GoType: t, Err: err} + } + va := addressableValue{v.Elem()} // dereferenced pointer is always addressable + t := va.Type() + + // Lookup and call the unmarshal function for this type. + unmarshal := lookupArshaler(t).unmarshal + if uo.Unmarshalers != nil { + unmarshal, _ = uo.Unmarshalers.(*Unmarshalers).lookup(unmarshal, t) + } + if err := unmarshal(in, va, uo); err != nil { + xd := export.Decoder(in) + if !xd.Flags.Get(jsonflags.AllowDuplicateNames) { + xd.Tokens.InvalidateDisabledNamespaces() + } + return err + } + return nil +} + +// addressableValue is a reflect.Value that is guaranteed to be addressable +// such that calling the Addr and Set methods do not panic. +// +// There is no compile magic that enforces this property, +// but rather the need to construct this type makes it easier to examine each +// construction site to ensure that this property is upheld. +type addressableValue struct{ reflect.Value } + +// newAddressableValue constructs a new addressable value of type t. +func newAddressableValue(t reflect.Type) addressableValue { + return addressableValue{reflect.New(t).Elem()} +} + +// All marshal and unmarshal behavior is implemented using these signatures. +// The *jsonopts.Struct argument is guaranteed to identical to or at least +// a strict super-set of the options in Encoder.Struct or Decoder.Struct. +// It is identical for Marshal, Unmarshal, MarshalWrite, and UnmarshalRead. +// It is a super-set for MarshalEncode and UnmarshalDecode. +type ( + marshaler = func(*jsontext.Encoder, addressableValue, *jsonopts.Struct) error + unmarshaler = func(*jsontext.Decoder, addressableValue, *jsonopts.Struct) error +) + +type arshaler struct { + marshal marshaler + unmarshal unmarshaler + nonDefault bool +} + +var lookupArshalerCache sync.Map // map[reflect.Type]*arshaler + +func lookupArshaler(t reflect.Type) *arshaler { + if v, ok := lookupArshalerCache.Load(t); ok { + return v.(*arshaler) + } + + fncs := makeDefaultArshaler(t) + fncs = makeMethodArshaler(fncs, t) + fncs = makeTimeArshaler(fncs, t) + + // Use the last stored so that duplicate arshalers can be garbage collected. + v, _ := lookupArshalerCache.LoadOrStore(t, fncs) + return v.(*arshaler) +} + +var stringsPools = &sync.Pool{New: func() any { return new(stringSlice) }} + +type stringSlice []string + +// getStrings returns a non-nil pointer to a slice with length n. +func getStrings(n int) *stringSlice { + s := stringsPools.Get().(*stringSlice) + if cap(*s) < n { + *s = make([]string, n) + } + *s = (*s)[:n] + return s +} + +func putStrings(s *stringSlice) { + if cap(*s) > 1<<10 { + *s = nil // avoid pinning arbitrarily large amounts of memory + } + stringsPools.Put(s) +} + +// Sort sorts the string slice according to RFC 8785, section 3.2.3. +func (ss *stringSlice) Sort() { + slices.SortFunc(*ss, func(x, y string) int { return jsonwire.CompareUTF16(x, y) }) +} + +// This file contains an optimized marshal and unmarshal implementation +// for the any type. This type is often used when the Go program has +// no knowledge of the JSON schema. This is a common enough occurrence +// to justify the complexity of adding logic for this. + +func marshalValueAny(enc *jsontext.Encoder, val any, mo *jsonopts.Struct) error { + switch val := val.(type) { + case nil: + return enc.WriteToken(jsontext.Null) + case bool: + return enc.WriteToken(jsontext.Bool(val)) + case string: + return enc.WriteToken(jsontext.String(val)) + case float64: + return enc.WriteToken(jsontext.Float(val)) + case map[string]any: + return marshalObjectAny(enc, val, mo) + case []any: + return marshalArrayAny(enc, val, mo) + default: + v := newAddressableValue(reflect.TypeOf(val)) + v.Set(reflect.ValueOf(val)) + marshal := lookupArshaler(v.Type()).marshal + if mo.Marshalers != nil { + marshal, _ = mo.Marshalers.(*Marshalers).lookup(marshal, v.Type()) + } + return marshal(enc, v, mo) + } +} + +func unmarshalValueAny(dec *jsontext.Decoder, uo *jsonopts.Struct) (any, error) { + switch k := dec.PeekKind(); k { + case '{': + return unmarshalObjectAny(dec, uo) + case '[': + return unmarshalArrayAny(dec, uo) + default: + xd := export.Decoder(dec) + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) + if err != nil { + return nil, err + } + switch val.Kind() { + case 'n': + return nil, nil + case 'f': + return false, nil + case 't': + return true, nil + case '"': + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) + if xd.StringCache == nil { + xd.StringCache = new(stringCache) + } + return makeString(xd.StringCache, val), nil + case '0': + fv, ok := jsonwire.ParseFloat(val, 64) + if !ok && uo.Flags.Get(jsonflags.RejectFloatOverflow) { + return nil, &SemanticError{action: "unmarshal", JSONKind: k, GoType: float64Type, Err: strconv.ErrRange} + } + return fv, nil + default: + panic("BUG: invalid kind: " + k.String()) + } + } +} + +func marshalObjectAny(enc *jsontext.Encoder, obj map[string]any, mo *jsonopts.Struct) error { + // Check for cycles. + xe := export.Encoder(enc) + if xe.Tokens.Depth() > startDetectingCyclesAfter { + v := reflect.ValueOf(obj) + if err := visitPointer(&xe.SeenPointers, v); err != nil { + return err + } + defer leavePointer(&xe.SeenPointers, v) + } + + // Handle empty maps. + if len(obj) == 0 { + if mo.Flags.Get(jsonflags.FormatNilMapAsNull) && obj == nil { + return enc.WriteToken(jsontext.Null) + } + // Optimize for marshaling an empty map without any preceding whitespace. + if !xe.Flags.Get(jsonflags.AnyWhitespace) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = append(xe.Tokens.MayAppendDelim(xe.Buf, '{'), "{}"...) + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() + } + return nil + } + } + + if err := enc.WriteToken(jsontext.ObjectStart); err != nil { + return err + } + // A Go map guarantees that each entry has a unique key + // The only possibility of duplicates is due to invalid UTF-8. + if !xe.Flags.Get(jsonflags.AllowInvalidUTF8) { + xe.Tokens.Last.DisableNamespace() + } + if !mo.Flags.Get(jsonflags.Deterministic) || len(obj) <= 1 { + for name, val := range obj { + if err := enc.WriteToken(jsontext.String(name)); err != nil { + return err + } + if err := marshalValueAny(enc, val, mo); err != nil { + return err + } + } + } else { + names := getStrings(len(obj)) + var i int + for name := range obj { + (*names)[i] = name + i++ + } + names.Sort() + for _, name := range *names { + if err := enc.WriteToken(jsontext.String(name)); err != nil { + return err + } + if err := marshalValueAny(enc, obj[name], mo); err != nil { + return err + } + } + putStrings(names) + } + if err := enc.WriteToken(jsontext.ObjectEnd); err != nil { + return err + } + return nil +} + +func unmarshalObjectAny(dec *jsontext.Decoder, uo *jsonopts.Struct) (map[string]any, error) { + tok, err := dec.ReadToken() + if err != nil { + return nil, err + } + k := tok.Kind() + switch k { + case 'n': + return nil, nil + case '{': + xd := export.Decoder(dec) + obj := make(map[string]any) + // A Go map guarantees that each entry has a unique key + // The only possibility of duplicates is due to invalid UTF-8. + if !xd.Flags.Get(jsonflags.AllowInvalidUTF8) { + xd.Tokens.Last.DisableNamespace() + } + for dec.PeekKind() != '}' { + tok, err := dec.ReadToken() + if err != nil { + return obj, err + } + name := tok.String() + + // Manually check for duplicate names. + if _, ok := obj[name]; ok { + name := xd.PreviousBuffer() + err := export.NewDuplicateNameError(name, dec.InputOffset()-len64(name)) + return obj, err + } + + val, err := unmarshalValueAny(dec, uo) + obj[name] = val + if err != nil { + return obj, err + } + } + if _, err := dec.ReadToken(); err != nil { + return obj, err + } + return obj, nil + } + return nil, &SemanticError{action: "unmarshal", JSONKind: k, GoType: mapStringAnyType} +} + +func marshalArrayAny(enc *jsontext.Encoder, arr []any, mo *jsonopts.Struct) error { + // Check for cycles. + xe := export.Encoder(enc) + if xe.Tokens.Depth() > startDetectingCyclesAfter { + v := reflect.ValueOf(arr) + if err := visitPointer(&xe.SeenPointers, v); err != nil { + return err + } + defer leavePointer(&xe.SeenPointers, v) + } + + // Handle empty slices. + if len(arr) == 0 { + if mo.Flags.Get(jsonflags.FormatNilSliceAsNull) && arr == nil { + return enc.WriteToken(jsontext.Null) + } + // Optimize for marshaling an empty slice without any preceding whitespace. + if !xe.Flags.Get(jsonflags.AnyWhitespace) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = append(xe.Tokens.MayAppendDelim(xe.Buf, '['), "[]"...) + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() + } + return nil + } + } + + if err := enc.WriteToken(jsontext.ArrayStart); err != nil { + return err + } + for _, val := range arr { + if err := marshalValueAny(enc, val, mo); err != nil { + return err + } + } + if err := enc.WriteToken(jsontext.ArrayEnd); err != nil { + return err + } + return nil +} + +func unmarshalArrayAny(dec *jsontext.Decoder, uo *jsonopts.Struct) ([]any, error) { + tok, err := dec.ReadToken() + if err != nil { + return nil, err + } + k := tok.Kind() + switch k { + case 'n': + return nil, nil + case '[': + arr := []any{} + for dec.PeekKind() != ']' { + val, err := unmarshalValueAny(dec, uo) + arr = append(arr, val) + if err != nil { + return arr, err + } + } + if _, err := dec.ReadToken(); err != nil { + return arr, err + } + return arr, nil + } + return nil, &SemanticError{action: "unmarshal", JSONKind: k, GoType: sliceAnyType} +} + +// optimizeCommon specifies whether to use optimizations targeted for certain +// common patterns, rather than using the slower, but more general logic. +// All tests should pass regardless of whether this is true or not. +const optimizeCommon = true + +var ( + // Most natural Go type that correspond with each JSON type. + anyType = reflect.TypeFor[any]() // JSON value + boolType = reflect.TypeFor[bool]() // JSON bool + stringType = reflect.TypeFor[string]() // JSON string + float64Type = reflect.TypeFor[float64]() // JSON number + mapStringAnyType = reflect.TypeFor[map[string]any]() // JSON object + sliceAnyType = reflect.TypeFor[[]any]() // JSON array + + bytesType = reflect.TypeFor[[]byte]() + emptyStructType = reflect.TypeFor[struct{}]() +) + +const startDetectingCyclesAfter = 1000 + +type seenPointers = map[any]struct{} + +type typedPointer struct { + typ reflect.Type + ptr any // always stores unsafe.Pointer, but avoids depending on unsafe + len int // remember slice length to avoid false positives +} + +// visitPointer visits pointer p of type t, reporting an error if seen before. +// If successfully visited, then the caller must eventually call leave. +func visitPointer(m *seenPointers, v reflect.Value) error { + p := typedPointer{v.Type(), v.UnsafePointer(), sliceLen(v)} + if _, ok := (*m)[p]; ok { + return &SemanticError{action: "marshal", GoType: p.typ, Err: errors.New("encountered a cycle")} + } + if *m == nil { + *m = make(seenPointers) + } + (*m)[p] = struct{}{} + return nil +} + +func leavePointer(m *seenPointers, v reflect.Value) { + p := typedPointer{v.Type(), v.UnsafePointer(), sliceLen(v)} + delete(*m, p) +} + +func sliceLen(v reflect.Value) int { + if v.Kind() == reflect.Slice { + return v.Len() + } + return 0 +} + +func len64[Bytes ~[]byte | ~string](in Bytes) int64 { + return int64(len(in)) +} + +func makeDefaultArshaler(t reflect.Type) *arshaler { + switch t.Kind() { + case reflect.Bool: + return makeBoolArshaler(t) + case reflect.String: + return makeStringArshaler(t) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return makeIntArshaler(t) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return makeUintArshaler(t) + case reflect.Float32, reflect.Float64: + return makeFloatArshaler(t) + case reflect.Map: + return makeMapArshaler(t) + case reflect.Struct: + return makeStructArshaler(t) + case reflect.Slice: + fncs := makeSliceArshaler(t) + if t.AssignableTo(bytesType) { + return makeBytesArshaler(t, fncs) + } + return fncs + case reflect.Array: + fncs := makeArrayArshaler(t) + if reflect.SliceOf(t.Elem()).AssignableTo(bytesType) { + return makeBytesArshaler(t, fncs) + } + return fncs + case reflect.Pointer: + return makePointerArshaler(t) + case reflect.Interface: + return makeInterfaceArshaler(t) + default: + return makeInvalidArshaler(t) + } +} + +func makeBoolArshaler(t reflect.Type) *arshaler { + var fncs arshaler + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + xe := export.Encoder(enc) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { + return newInvalidFormatError("marshal", t, mo.Format) + } + + // Optimize for marshaling without preceding whitespace. + if optimizeCommon && !xe.Flags.Get(jsonflags.AnyWhitespace) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = strconv.AppendBool(xe.Tokens.MayAppendDelim(xe.Buf, 't'), va.Bool()) + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() + } + return nil + } + + return enc.WriteToken(jsontext.Bool(va.Bool())) + } + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { + return newInvalidFormatError("unmarshal", t, uo.Format) + } + tok, err := dec.ReadToken() + if err != nil { + return err + } + k := tok.Kind() + switch k { + case 'n': + va.SetBool(false) + return nil + case 't', 'f': + va.SetBool(tok.Bool()) + return nil + } + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + return &fncs +} + +func makeStringArshaler(t reflect.Type) *arshaler { + var fncs arshaler + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + xe := export.Encoder(enc) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { + return newInvalidFormatError("marshal", t, mo.Format) + } + + // Optimize for marshaling without preceding whitespace or string escaping. + s := va.String() + if optimizeCommon && !xe.Flags.Get(jsonflags.AnyWhitespace) && !xe.Tokens.Last.NeedObjectName() && !jsonwire.NeedEscape(s) { + b := xe.Buf + b = xe.Tokens.MayAppendDelim(b, '"') + b = append(b, '"') + b = append(b, s...) + b = append(b, '"') + xe.Buf = b + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() + } + return nil + } + + return enc.WriteToken(jsontext.String(s)) + } + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { + return newInvalidFormatError("unmarshal", t, uo.Format) + } + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) + if err != nil { + return err + } + k := val.Kind() + switch k { + case 'n': + va.SetString("") + return nil + case '"': + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) + if xd.StringCache == nil { + xd.StringCache = new(stringCache) + } + str := makeString(xd.StringCache, val) + va.SetString(str) + return nil + } + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + return &fncs +} + +var ( + encodeBase16 = func(dst, src []byte) { hex.Encode(dst, src) } + encodeBase32 = base32.StdEncoding.Encode + encodeBase32Hex = base32.HexEncoding.Encode + encodeBase64 = base64.StdEncoding.Encode + encodeBase64URL = base64.URLEncoding.Encode + encodedLenBase16 = hex.EncodedLen + encodedLenBase32 = base32.StdEncoding.EncodedLen + encodedLenBase32Hex = base32.HexEncoding.EncodedLen + encodedLenBase64 = base64.StdEncoding.EncodedLen + encodedLenBase64URL = base64.URLEncoding.EncodedLen + decodeBase16 = hex.Decode + decodeBase32 = base32.StdEncoding.Decode + decodeBase32Hex = base32.HexEncoding.Decode + decodeBase64 = base64.StdEncoding.Decode + decodeBase64URL = base64.URLEncoding.Decode + decodedLenBase16 = hex.DecodedLen + decodedLenBase32 = base32.StdEncoding.WithPadding(base32.NoPadding).DecodedLen + decodedLenBase32Hex = base32.HexEncoding.WithPadding(base32.NoPadding).DecodedLen + decodedLenBase64 = base64.StdEncoding.WithPadding(base64.NoPadding).DecodedLen + decodedLenBase64URL = base64.URLEncoding.WithPadding(base64.NoPadding).DecodedLen +) + +func makeBytesArshaler(t reflect.Type, fncs *arshaler) *arshaler { + // NOTE: This handles both []byte and [N]byte. + marshalArray := fncs.marshal + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + xe := export.Encoder(enc) + encode, encodedLen := encodeBase64, encodedLenBase64 + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { + switch mo.Format { + case "base64": + encode, encodedLen = encodeBase64, encodedLenBase64 + case "base64url": + encode, encodedLen = encodeBase64URL, encodedLenBase64URL + case "base32": + encode, encodedLen = encodeBase32, encodedLenBase32 + case "base32hex": + encode, encodedLen = encodeBase32Hex, encodedLenBase32Hex + case "base16", "hex": + encode, encodedLen = encodeBase16, encodedLenBase16 + case "array": + mo.Format = "" + return marshalArray(enc, va, mo) + default: + return newInvalidFormatError("marshal", t, mo.Format) + } + } else if mo.Flags.Get(jsonflags.FormatByteArrayAsArray) && va.Kind() == reflect.Array { + return marshalArray(enc, va, mo) + } + if mo.Flags.Get(jsonflags.FormatNilSliceAsNull) && va.Kind() == reflect.Slice && va.IsNil() { + // TODO: Provide a "emitempty" format override? + return enc.WriteToken(jsontext.Null) + } + val := enc.UnusedBuffer() + b := va.Bytes() + n := len(`"`) + encodedLen(len(b)) + len(`"`) + if cap(val) < n { + val = make([]byte, n) + } else { + val = val[:n] + } + val[0] = '"' + encode(val[len(`"`):len(val)-len(`"`)], b) + val[len(val)-1] = '"' + return enc.WriteValue(val) + } + unmarshalArray := fncs.unmarshal + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) + decode, decodedLen, encodedLen := decodeBase64, decodedLenBase64, encodedLenBase64 + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { + switch uo.Format { + case "base64": + decode, decodedLen, encodedLen = decodeBase64, decodedLenBase64, encodedLenBase64 + case "base64url": + decode, decodedLen, encodedLen = decodeBase64URL, decodedLenBase64URL, encodedLenBase64URL + case "base32": + decode, decodedLen, encodedLen = decodeBase32, decodedLenBase32, encodedLenBase32 + case "base32hex": + decode, decodedLen, encodedLen = decodeBase32Hex, decodedLenBase32Hex, encodedLenBase32Hex + case "base16", "hex": + decode, decodedLen, encodedLen = decodeBase16, decodedLenBase16, encodedLenBase16 + case "array": + uo.Format = "" + return unmarshalArray(dec, va, uo) + default: + return newInvalidFormatError("unmarshal", t, uo.Format) + } + } else if uo.Flags.Get(jsonflags.FormatByteArrayAsArray) && va.Kind() == reflect.Array { + return unmarshalArray(dec, va, uo) + } + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) + if err != nil { + return err + } + k := val.Kind() + switch k { + case 'n': + va.SetZero() + return nil + case '"': + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) + + // For base64 and base32, decodedLen computes the maximum output size + // when given the original input size. To compute the exact size, + // adjust the input size by excluding trailing padding characters. + // This is unnecessary for base16, but also harmless. + n := len(val) + for n > 0 && val[n-1] == '=' { + n-- + } + n = decodedLen(n) + b := va.Bytes() + if va.Kind() == reflect.Array { + if n != len(b) { + err := fmt.Errorf("decoded base64 length of %d mismatches array length of %d", n, len(b)) + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t, Err: err} + } + } else { + if b == nil || cap(b) < n { + b = make([]byte, n) + } else { + b = b[:n] + } + } + n2, err := decode(b, val) + if err == nil && len(val) != encodedLen(n2) { + // TODO(https://go.dev/issue/53845): RFC 4648, section 3.3, + // specifies that non-alphabet characters must be rejected. + // Unfortunately, the "base32" and "base64" packages allow + // '\r' and '\n' characters by default. + err = errors.New("illegal data at input byte " + strconv.Itoa(bytes.IndexAny(val, "\r\n"))) + } + if err != nil { + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t, Err: err} + } + if va.Kind() == reflect.Slice { + va.SetBytes(b) + } + return nil + } + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + return fncs +} + +func makeIntArshaler(t reflect.Type) *arshaler { + var fncs arshaler + bits := t.Bits() + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + xe := export.Encoder(enc) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { + return newInvalidFormatError("marshal", t, mo.Format) + } + + // Optimize for marshaling without preceding whitespace or string escaping. + if optimizeCommon && !xe.Flags.Get(jsonflags.AnyWhitespace) && !mo.Flags.Get(jsonflags.StringifyNumbers) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = strconv.AppendInt(xe.Tokens.MayAppendDelim(xe.Buf, '0'), va.Int(), 10) + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() + } + return nil + } + + k := stringOrNumberKind(mo.Flags.Get(jsonflags.StringifyNumbers)) + return xe.AppendRaw(k, true, func(b []byte) ([]byte, error) { + return strconv.AppendInt(b, va.Int(), 10), nil + }) + } + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { + return newInvalidFormatError("unmarshal", t, uo.Format) + } + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) + if err != nil { + return err + } + k := val.Kind() + switch k { + case 'n': + va.SetInt(0) + return nil + case '"': + if !uo.Flags.Get(jsonflags.StringifyNumbers) { + break + } + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) + fallthrough + case '0': + var negOffset int + neg := len(val) > 0 && val[0] == '-' + if neg { + negOffset = 1 + } + n, ok := jsonwire.ParseUint(val[negOffset:]) + maxInt := uint64(1) << (bits - 1) + overflow := (neg && n > maxInt) || (!neg && n > maxInt-1) + if !ok { + if n != math.MaxUint64 { + err := fmt.Errorf("cannot parse %q as signed integer: %w", val, strconv.ErrSyntax) + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t, Err: err} + } + overflow = true + } + if overflow { + err := fmt.Errorf("cannot parse %q as signed integer: %w", val, strconv.ErrRange) + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t, Err: err} + } + if neg { + va.SetInt(int64(-n)) + } else { + va.SetInt(int64(+n)) + } + return nil + } + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + return &fncs +} + +func makeUintArshaler(t reflect.Type) *arshaler { + var fncs arshaler + bits := t.Bits() + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + xe := export.Encoder(enc) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { + return newInvalidFormatError("marshal", t, mo.Format) + } + + // Optimize for marshaling without preceding whitespace or string escaping. + if optimizeCommon && !xe.Flags.Get(jsonflags.AnyWhitespace) && !mo.Flags.Get(jsonflags.StringifyNumbers) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = strconv.AppendUint(xe.Tokens.MayAppendDelim(xe.Buf, '0'), va.Uint(), 10) + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() + } + return nil + } + + k := stringOrNumberKind(mo.Flags.Get(jsonflags.StringifyNumbers)) + return xe.AppendRaw(k, true, func(b []byte) ([]byte, error) { + return strconv.AppendUint(b, va.Uint(), 10), nil + }) + } + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { + return newInvalidFormatError("unmarshal", t, uo.Format) + } + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) + if err != nil { + return err + } + k := val.Kind() + switch k { + case 'n': + va.SetUint(0) + return nil + case '"': + if !uo.Flags.Get(jsonflags.StringifyNumbers) { + break + } + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) + fallthrough + case '0': + n, ok := jsonwire.ParseUint(val) + maxUint := uint64(1) << bits + overflow := n > maxUint-1 + if !ok { + if n != math.MaxUint64 { + err := fmt.Errorf("cannot parse %q as unsigned integer: %w", val, strconv.ErrSyntax) + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t, Err: err} + } + overflow = true + } + if overflow { + err := fmt.Errorf("cannot parse %q as unsigned integer: %w", val, strconv.ErrRange) + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t, Err: err} + } + va.SetUint(n) + return nil + } + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + return &fncs +} + +func makeFloatArshaler(t reflect.Type) *arshaler { + var fncs arshaler + bits := t.Bits() + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + xe := export.Encoder(enc) + var allowNonFinite bool + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { + if mo.Format == "nonfinite" { + allowNonFinite = true + } else { + return newInvalidFormatError("marshal", t, mo.Format) + } + } + + fv := va.Float() + if math.IsNaN(fv) || math.IsInf(fv, 0) { + if !allowNonFinite { + err := fmt.Errorf("invalid value: %v", fv) + return &SemanticError{action: "marshal", GoType: t, Err: err} + } + return enc.WriteToken(jsontext.Float(fv)) + } + + // Optimize for marshaling without preceding whitespace or string escaping. + if optimizeCommon && !xe.Flags.Get(jsonflags.AnyWhitespace) && !mo.Flags.Get(jsonflags.StringifyNumbers) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = jsonwire.AppendFloat(xe.Tokens.MayAppendDelim(xe.Buf, '0'), fv, bits) + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() + } + return nil + } + + k := stringOrNumberKind(mo.Flags.Get(jsonflags.StringifyNumbers)) + return xe.AppendRaw(k, true, func(b []byte) ([]byte, error) { + return jsonwire.AppendFloat(b, va.Float(), bits), nil + }) + } + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) + var allowNonFinite bool + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { + if uo.Format == "nonfinite" { + allowNonFinite = true + } else { + return newInvalidFormatError("unmarshal", t, uo.Format) + } + } + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) + if err != nil { + return err + } + k := val.Kind() + switch k { + case 'n': + va.SetFloat(0) + return nil + case '"': + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) + if allowNonFinite { + switch string(val) { + case "NaN": + va.SetFloat(math.NaN()) + return nil + case "Infinity": + va.SetFloat(math.Inf(+1)) + return nil + case "-Infinity": + va.SetFloat(math.Inf(-1)) + return nil + } + } + if !uo.Flags.Get(jsonflags.StringifyNumbers) { + break + } + if n, err := jsonwire.ConsumeNumber(val); n != len(val) || err != nil { + err := fmt.Errorf("cannot parse %q as JSON number: %w", val, strconv.ErrSyntax) + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t, Err: err} + } + fallthrough + case '0': + fv, ok := jsonwire.ParseFloat(val, bits) + if !ok && uo.Flags.Get(jsonflags.RejectFloatOverflow) { + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t, Err: strconv.ErrRange} + } + va.SetFloat(fv) + return nil + } + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + return &fncs +} + +func makeMapArshaler(t reflect.Type) *arshaler { + // NOTE: The logic below disables namespaces for tracking duplicate names + // when handling map keys with a unique representation. + + // NOTE: Values retrieved from a map are not addressable, + // so we shallow copy the values to make them addressable and + // store them back into the map afterwards. + + var fncs arshaler + var ( + once sync.Once + keyFncs *arshaler + valFncs *arshaler + ) + init := func() { + keyFncs = lookupArshaler(t.Key()) + valFncs = lookupArshaler(t.Elem()) + } + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + // Check for cycles. + xe := export.Encoder(enc) + if xe.Tokens.Depth() > startDetectingCyclesAfter { + if err := visitPointer(&xe.SeenPointers, va.Value); err != nil { + return err + } + defer leavePointer(&xe.SeenPointers, va.Value) + } + + emitNull := mo.Flags.Get(jsonflags.FormatNilMapAsNull) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { + switch mo.Format { + case "emitnull": + emitNull = true + mo.Format = "" + case "emitempty": + emitNull = false + mo.Format = "" + default: + return newInvalidFormatError("marshal", t, mo.Format) + } + } + + // Handle empty maps. + n := va.Len() + if n == 0 { + if emitNull && va.IsNil() { + return enc.WriteToken(jsontext.Null) + } + // Optimize for marshaling an empty map without any preceding whitespace. + if optimizeCommon && !xe.Flags.Get(jsonflags.AnyWhitespace) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = append(xe.Tokens.MayAppendDelim(xe.Buf, '{'), "{}"...) + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() + } + return nil + } + } + + once.Do(init) + if err := enc.WriteToken(jsontext.ObjectStart); err != nil { + return err + } + if n > 0 { + nonDefaultKey := keyFncs.nonDefault + marshalKey := keyFncs.marshal + marshalVal := valFncs.marshal + if mo.Marshalers != nil { + var ok bool + marshalKey, ok = mo.Marshalers.(*Marshalers).lookup(marshalKey, t.Key()) + marshalVal, _ = mo.Marshalers.(*Marshalers).lookup(marshalVal, t.Elem()) + nonDefaultKey = nonDefaultKey || ok + } + k := newAddressableValue(t.Key()) + v := newAddressableValue(t.Elem()) + + // A Go map guarantees that each entry has a unique key. + // As such, disable the expensive duplicate name check if we know + // that every Go key will serialize as a unique JSON string. + if !nonDefaultKey && mapKeyWithUniqueRepresentation(k.Kind(), xe.Flags.Get(jsonflags.AllowInvalidUTF8)) { + xe.Tokens.Last.DisableNamespace() + } + + switch { + case !mo.Flags.Get(jsonflags.Deterministic) || n <= 1: + for iter := va.Value.MapRange(); iter.Next(); { + k.SetIterKey(iter) + flagsOriginal := mo.Flags + mo.Flags.Set(jsonflags.StringifyNumbers | 1) // stringify for numeric keys + err := marshalKey(enc, k, mo) + mo.Flags = flagsOriginal + if err != nil { + // TODO: If err is errMissingName, then wrap it as a + // SemanticError since this key type cannot be serialized + // as a JSON string. + return err + } + v.SetIterValue(iter) + if err := marshalVal(enc, v, mo); err != nil { + return err + } + } + case !nonDefaultKey && t.Key().Kind() == reflect.String: + names := getStrings(n) + for i, iter := 0, va.Value.MapRange(); i < n && iter.Next(); i++ { + k.SetIterKey(iter) + (*names)[i] = k.String() + } + names.Sort() + for _, name := range *names { + if err := enc.WriteToken(jsontext.String(name)); err != nil { + return err + } + // TODO(https://go.dev/issue/57061): Use v.SetMapIndexOf. + k.SetString(name) + v.Set(va.MapIndex(k.Value)) + if err := marshalVal(enc, v, mo); err != nil { + return err + } + } + putStrings(names) + default: + type member struct { + name string // unquoted name + key addressableValue + val addressableValue + } + members := make([]member, n) + keys := reflect.MakeSlice(reflect.SliceOf(t.Key()), n, n) + vals := reflect.MakeSlice(reflect.SliceOf(t.Elem()), n, n) + for i, iter := 0, va.Value.MapRange(); i < n && iter.Next(); i++ { + // Marshal the member name. + k := addressableValue{keys.Index(i)} // indexed slice element is always addressable + k.SetIterKey(iter) + v := addressableValue{vals.Index(i)} // indexed slice element is always addressable + v.SetIterValue(iter) + flagsOriginal := mo.Flags + mo.Flags.Set(jsonflags.StringifyNumbers | 1) // stringify for numeric keys + err := marshalKey(enc, k, mo) + mo.Flags = flagsOriginal + if err != nil { + // TODO: If err is errMissingName, then wrap it as a + // SemanticError since this key type cannot be serialized + // as a JSON string. + return err + } + name := xe.UnwriteOnlyObjectMemberName() + members[i] = member{name, k, v} + } + // TODO: If AllowDuplicateNames is enabled, then sort according + // to reflect.Value as well if the names are equal. + // See internal/fmtsort. + slices.SortFunc(members, func(x, y member) int { + return jsonwire.CompareUTF16(x.name, y.name) + }) + for _, member := range members { + if err := enc.WriteToken(jsontext.String(member.name)); err != nil { + return err + } + if err := marshalVal(enc, member.val, mo); err != nil { + return err + } + } + } + } + if err := enc.WriteToken(jsontext.ObjectEnd); err != nil { + return err + } + return nil + } + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { + switch uo.Format { + case "emitnull", "emitempty": + uo.Format = "" // only relevant for marshaling + default: + return newInvalidFormatError("unmarshal", t, uo.Format) + } + } + tok, err := dec.ReadToken() + if err != nil { + return err + } + k := tok.Kind() + switch k { + case 'n': + va.SetZero() + return nil + case '{': + once.Do(init) + if va.IsNil() { + va.Set(reflect.MakeMap(t)) + } + + nonDefaultKey := keyFncs.nonDefault + unmarshalKey := keyFncs.unmarshal + unmarshalVal := valFncs.unmarshal + if uo.Unmarshalers != nil { + var ok bool + unmarshalKey, ok = uo.Unmarshalers.(*Unmarshalers).lookup(unmarshalKey, t.Key()) + unmarshalVal, _ = uo.Unmarshalers.(*Unmarshalers).lookup(unmarshalVal, t.Elem()) + nonDefaultKey = nonDefaultKey || ok + } + k := newAddressableValue(t.Key()) + v := newAddressableValue(t.Elem()) + + // Manually check for duplicate entries by virtue of whether the + // unmarshaled key already exists in the destination Go map. + // Consequently, syntactically different names (e.g., "0" and "-0") + // will be rejected as duplicates since they semantically refer + // to the same Go value. This is an unusual interaction + // between syntax and semantics, but is more correct. + if !nonDefaultKey && mapKeyWithUniqueRepresentation(k.Kind(), xd.Flags.Get(jsonflags.AllowInvalidUTF8)) { + xd.Tokens.Last.DisableNamespace() + } + + // In the rare case where the map is not already empty, + // then we need to manually track which keys we already saw + // since existing presence alone is insufficient to indicate + // whether the input had a duplicate name. + var seen reflect.Value + if !xd.Flags.Get(jsonflags.AllowDuplicateNames) && va.Len() > 0 { + seen = reflect.MakeMap(reflect.MapOf(k.Type(), emptyStructType)) + } + + for dec.PeekKind() != '}' { + k.SetZero() + flagsOriginal := uo.Flags + uo.Flags.Set(jsonflags.StringifyNumbers | 1) // stringify for numeric keys + err := unmarshalKey(dec, k, uo) + uo.Flags = flagsOriginal + if err != nil { + return err + } + if k.Kind() == reflect.Interface && !k.IsNil() && !k.Elem().Type().Comparable() { + err := fmt.Errorf("invalid incomparable key type %v", k.Elem().Type()) + return &SemanticError{action: "unmarshal", GoType: t, Err: err} + } + + if v2 := va.MapIndex(k.Value); v2.IsValid() { + if !xd.Flags.Get(jsonflags.AllowDuplicateNames) && (!seen.IsValid() || seen.MapIndex(k.Value).IsValid()) { + // TODO: Unread the object name. + name := xd.PreviousBuffer() + err := export.NewDuplicateNameError(name, dec.InputOffset()-len64(name)) + return err + } + v.Set(v2) + } else { + v.SetZero() + } + err = unmarshalVal(dec, v, uo) + va.SetMapIndex(k.Value, v.Value) + if seen.IsValid() { + seen.SetMapIndex(k.Value, reflect.Zero(emptyStructType)) + } + if err != nil { + return err + } + } + if _, err := dec.ReadToken(); err != nil { + return err + } + return nil + } + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + return &fncs +} + +// mapKeyWithUniqueRepresentation reports whether all possible values of k +// marshal to a different JSON value, and whether all possible JSON values +// that can unmarshal into k unmarshal to different Go values. +// In other words, the representation must be a bijective. +func mapKeyWithUniqueRepresentation(k reflect.Kind, allowInvalidUTF8 bool) bool { + switch k { + case reflect.Bool, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return true + case reflect.String: + // For strings, we have to be careful since names with invalid UTF-8 + // maybe unescape to the same Go string value. + return !allowInvalidUTF8 + default: + // Floating-point kinds are not listed above since NaNs + // can appear multiple times and all serialize as "NaN". + return false + } +} + +func makeStructArshaler(t reflect.Type) *arshaler { + // NOTE: The logic below disables namespaces for tracking duplicate names + // and does the tracking locally with an efficient bit-set based on which + // Go struct fields were seen. + + var fncs arshaler + var ( + once sync.Once + fields structFields + errInit *SemanticError + ) + init := func() { + fields, errInit = makeStructFields(t) + } + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + xe := export.Encoder(enc) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { + return newInvalidFormatError("marshal", t, mo.Format) + } + once.Do(init) + if errInit != nil { + err := *errInit // shallow copy SemanticError + err.action = "marshal" + return &err + } + if err := enc.WriteToken(jsontext.ObjectStart); err != nil { + return err + } + var seenIdxs uintSet + prevIdx := -1 + xe.Tokens.Last.DisableNamespace() // we manually ensure unique names below + for i := range fields.flattened { + f := &fields.flattened[i] + v := addressableValue{va.Field(f.index[0])} // addressable if struct value is addressable + if len(f.index) > 1 { + v = v.fieldByIndex(f.index[1:], false) + if !v.IsValid() { + continue // implies a nil inlined field + } + } + + // OmitZero skips the field if the Go value is zero, + // which we can determine up front without calling the marshaler. + if f.omitzero && ((f.isZero == nil && v.IsZero()) || (f.isZero != nil && f.isZero(v))) { + continue + } + + // Check for the legacy definition of omitempty. + if f.omitempty && mo.Flags.Get(jsonflags.OmitEmptyWithLegacyDefinition) && isLegacyEmpty(v) { + continue + } + + marshal := f.fncs.marshal + nonDefault := f.fncs.nonDefault + if mo.Marshalers != nil { + var ok bool + marshal, ok = mo.Marshalers.(*Marshalers).lookup(marshal, f.typ) + nonDefault = nonDefault || ok + } + + // OmitEmpty skips the field if the marshaled JSON value is empty, + // which we can know up front if there are no custom marshalers, + // otherwise we must marshal the value and unwrite it if empty. + if f.omitempty && !mo.Flags.Get(jsonflags.OmitEmptyWithLegacyDefinition) && + !nonDefault && f.isEmpty != nil && f.isEmpty(v) { + continue // fast path for omitempty + } + + // Write the object member name. + // + // The logic below is semantically equivalent to: + // enc.WriteToken(String(f.name)) + // but specialized and simplified because: + // 1. The Encoder must be expecting an object name. + // 2. The object namespace is guaranteed to be disabled. + // 3. The object name is guaranteed to be valid and pre-escaped. + // 4. There is no need to flush the buffer (for unwrite purposes). + // 5. There is no possibility of an error occurring. + if optimizeCommon { + // Append any delimiters or optional whitespace. + b := xe.Buf + if xe.Tokens.Last.Length() > 0 { + b = append(b, ',') + if xe.Flags.Get(jsonflags.SpaceAfterComma) { + b = append(b, ' ') + } + } + if xe.Flags.Get(jsonflags.Multiline) { + b = xe.AppendIndent(b, xe.Tokens.NeedIndent('"')) + } + + // Append the token to the output and to the state machine. + n0 := len(b) // offset before calling AppendQuote + if !xe.Flags.Get(jsonflags.EscapeForHTML | jsonflags.EscapeForJS) { + b = append(b, f.quotedName...) + } else { + b, _ = jsonwire.AppendQuote(b, f.name, &xe.Flags) + } + xe.Buf = b + if !xe.Flags.Get(jsonflags.AllowDuplicateNames) { + xe.Names.ReplaceLastQuotedOffset(n0) + } + xe.Tokens.Last.Increment() + } else { + if err := enc.WriteToken(jsontext.String(f.name)); err != nil { + return err + } + } + + // Write the object member value. + flagsOriginal := mo.Flags + if f.string { + mo.Flags.Set(jsonflags.StringifyNumbers | 1) + } + if f.format != "" { + mo.FormatDepth = xe.Tokens.Depth() + mo.Format = f.format + } + err := marshal(enc, v, mo) + mo.Flags = flagsOriginal + mo.Format = "" + if err != nil { + return err + } + + // Try unwriting the member if empty (slow path for omitempty). + if f.omitempty && !mo.Flags.Get(jsonflags.OmitEmptyWithLegacyDefinition) { + var prevName *string + if prevIdx >= 0 { + prevName = &fields.flattened[prevIdx].name + } + if xe.UnwriteEmptyObjectMember(prevName) { + continue + } + } + + // Remember the previous written object member. + // The set of seen fields only needs to be updated to detect + // duplicate names with those from the inlined fallback. + if !xe.Flags.Get(jsonflags.AllowDuplicateNames) && fields.inlinedFallback != nil { + seenIdxs.insert(uint(f.id)) + } + prevIdx = f.id + } + if fields.inlinedFallback != nil && !(mo.Flags.Get(jsonflags.DiscardUnknownMembers) && fields.inlinedFallback.unknown) { + var insertUnquotedName func([]byte) bool + if !xe.Flags.Get(jsonflags.AllowDuplicateNames) { + insertUnquotedName = func(name []byte) bool { + // Check that the name from inlined fallback does not match + // one of the previously marshaled names from known fields. + if foldedFields := fields.lookupByFoldedName(name); len(foldedFields) > 0 { + if f := fields.byActualName[string(name)]; f != nil { + return seenIdxs.insert(uint(f.id)) + } + for _, f := range foldedFields { + if f.matchFoldedName(name, &mo.Flags) { + return seenIdxs.insert(uint(f.id)) + } + } + } + + // Check that the name does not match any other name + // previously marshaled from the inlined fallback. + return xe.Namespaces.Last().InsertUnquoted(name) + } + } + if err := marshalInlinedFallbackAll(enc, va, mo, fields.inlinedFallback, insertUnquotedName); err != nil { + return err + } + } + if err := enc.WriteToken(jsontext.ObjectEnd); err != nil { + return err + } + return nil + } + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { + return newInvalidFormatError("unmarshal", t, uo.Format) + } + tok, err := dec.ReadToken() + if err != nil { + return err + } + k := tok.Kind() + switch k { + case 'n': + va.SetZero() + return nil + case '{': + once.Do(init) + if errInit != nil { + err := *errInit // shallow copy SemanticError + err.action = "unmarshal" + return &err + } + var seenIdxs uintSet + xd.Tokens.Last.DisableNamespace() + for dec.PeekKind() != '}' { + // Process the object member name. + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) + if err != nil { + return err + } + name := jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) + f := fields.byActualName[string(name)] + if f == nil { + for _, f2 := range fields.lookupByFoldedName(name) { + if f2.matchFoldedName(name, &uo.Flags) { + f = f2 + break + } + } + if f == nil { + if uo.Flags.Get(jsonflags.RejectUnknownMembers) && (fields.inlinedFallback == nil || fields.inlinedFallback.unknown) { + return &SemanticError{action: "unmarshal", GoType: t, Err: fmt.Errorf("unknown name %s", val)} + } + if !xd.Flags.Get(jsonflags.AllowDuplicateNames) && !xd.Namespaces.Last().InsertUnquoted(name) { + // TODO: Unread the object name. + err := export.NewDuplicateNameError(val, dec.InputOffset()-len64(val)) + return err + } + + if fields.inlinedFallback == nil { + // Skip unknown value since we have no place to store it. + if err := dec.SkipValue(); err != nil { + return err + } + } else { + // Marshal into value capable of storing arbitrary object members. + if err := unmarshalInlinedFallbackNext(dec, va, uo, fields.inlinedFallback, val, name); err != nil { + return err + } + } + continue + } + } + if !xd.Flags.Get(jsonflags.AllowDuplicateNames) && !seenIdxs.insert(uint(f.id)) { + // TODO: Unread the object name. + err := export.NewDuplicateNameError(val, dec.InputOffset()-len64(val)) + return err + } + + // Process the object member value. + unmarshal := f.fncs.unmarshal + if uo.Unmarshalers != nil { + unmarshal, _ = uo.Unmarshalers.(*Unmarshalers).lookup(unmarshal, f.typ) + } + flagsOriginal := uo.Flags + if f.string { + uo.Flags.Set(jsonflags.StringifyNumbers | 1) + } + if f.format != "" { + uo.FormatDepth = xd.Tokens.Depth() + uo.Format = f.format + } + v := addressableValue{va.Field(f.index[0])} // addressable if struct value is addressable + if len(f.index) > 1 { + v = v.fieldByIndex(f.index[1:], true) + } + err = unmarshal(dec, v, uo) + uo.Flags = flagsOriginal + uo.Format = "" + if err != nil { + return err + } + } + if _, err := dec.ReadToken(); err != nil { + return err + } + return nil + } + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + return &fncs +} + +func (va addressableValue) fieldByIndex(index []int, mayAlloc bool) addressableValue { + for _, i := range index { + va = va.indirect(mayAlloc) + if !va.IsValid() { + return va + } + va = addressableValue{va.Field(i)} // addressable if struct value is addressable + } + return va +} + +func (va addressableValue) indirect(mayAlloc bool) addressableValue { + if va.Kind() == reflect.Pointer { + if va.IsNil() { + if !mayAlloc { + return addressableValue{} + } + va.Set(reflect.New(va.Type().Elem())) + } + va = addressableValue{va.Elem()} // dereferenced pointer is always addressable + } + return va +} + +// isLegacyEmpty reports whether a value is empty according to the v1 definition. +func isLegacyEmpty(v addressableValue) bool { + // Equivalent to encoding/json.isEmptyValue@v1.21.0. + switch v.Kind() { + case reflect.Bool: + return v.Bool() == false + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return v.Int() == 0 + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return v.Uint() == 0 + case reflect.Float32, reflect.Float64: + return v.Float() == 0 + case reflect.String, reflect.Map, reflect.Slice, reflect.Array: + return v.Len() == 0 + case reflect.Pointer, reflect.Interface: + return v.IsNil() + } + return false +} + +func makeSliceArshaler(t reflect.Type) *arshaler { + var fncs arshaler + var ( + once sync.Once + valFncs *arshaler + ) + init := func() { + valFncs = lookupArshaler(t.Elem()) + } + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + // Check for cycles. + xe := export.Encoder(enc) + if xe.Tokens.Depth() > startDetectingCyclesAfter { + if err := visitPointer(&xe.SeenPointers, va.Value); err != nil { + return err + } + defer leavePointer(&xe.SeenPointers, va.Value) + } + + emitNull := mo.Flags.Get(jsonflags.FormatNilSliceAsNull) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { + switch mo.Format { + case "emitnull": + emitNull = true + mo.Format = "" + case "emitempty": + emitNull = false + mo.Format = "" + default: + return newInvalidFormatError("marshal", t, mo.Format) + } + } + + // Handle empty slices. + n := va.Len() + if n == 0 { + if emitNull && va.IsNil() { + return enc.WriteToken(jsontext.Null) + } + // Optimize for marshaling an empty slice without any preceding whitespace. + if optimizeCommon && !xe.Flags.Get(jsonflags.AnyWhitespace) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = append(xe.Tokens.MayAppendDelim(xe.Buf, '['), "[]"...) + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() + } + return nil + } + } + + once.Do(init) + if err := enc.WriteToken(jsontext.ArrayStart); err != nil { + return err + } + marshal := valFncs.marshal + if mo.Marshalers != nil { + marshal, _ = mo.Marshalers.(*Marshalers).lookup(marshal, t.Elem()) + } + for i := range n { + v := addressableValue{va.Index(i)} // indexed slice element is always addressable + if err := marshal(enc, v, mo); err != nil { + return err + } + } + if err := enc.WriteToken(jsontext.ArrayEnd); err != nil { + return err + } + return nil + } + emptySlice := reflect.MakeSlice(t, 0, 0) + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { + switch uo.Format { + case "emitnull", "emitempty": + uo.Format = "" // only relevant for marshaling + default: + return newInvalidFormatError("unmarshal", t, uo.Format) + } + } + + tok, err := dec.ReadToken() + if err != nil { + return err + } + k := tok.Kind() + switch k { + case 'n': + va.SetZero() + return nil + case '[': + once.Do(init) + unmarshal := valFncs.unmarshal + if uo.Unmarshalers != nil { + unmarshal, _ = uo.Unmarshalers.(*Unmarshalers).lookup(unmarshal, t.Elem()) + } + mustZero := true // we do not know the cleanliness of unused capacity + cap := va.Cap() + if cap > 0 { + va.SetLen(cap) + } + var i int + for dec.PeekKind() != ']' { + if i == cap { + va.Value.Grow(1) + cap = va.Cap() + va.SetLen(cap) + mustZero = false // reflect.Value.Grow ensures new capacity is zero-initialized + } + v := addressableValue{va.Index(i)} // indexed slice element is always addressable + i++ + if mustZero { + v.SetZero() + } + if err := unmarshal(dec, v, uo); err != nil { + va.SetLen(i) + return err + } + } + if i == 0 { + va.Set(emptySlice) + } else { + va.SetLen(i) + } + if _, err := dec.ReadToken(); err != nil { + return err + } + return nil + } + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + return &fncs +} + +func makeArrayArshaler(t reflect.Type) *arshaler { + var fncs arshaler + var ( + once sync.Once + valFncs *arshaler + ) + init := func() { + valFncs = lookupArshaler(t.Elem()) + } + n := t.Len() + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + xe := export.Encoder(enc) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { + return newInvalidFormatError("marshal", t, mo.Format) + } + once.Do(init) + if err := enc.WriteToken(jsontext.ArrayStart); err != nil { + return err + } + marshal := valFncs.marshal + if mo.Marshalers != nil { + marshal, _ = mo.Marshalers.(*Marshalers).lookup(marshal, t.Elem()) + } + for i := range n { + v := addressableValue{va.Index(i)} // indexed array element is addressable if array is addressable + if err := marshal(enc, v, mo); err != nil { + return err + } + } + if err := enc.WriteToken(jsontext.ArrayEnd); err != nil { + return err + } + return nil + } + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { + return newInvalidFormatError("unmarshal", t, uo.Format) + } + tok, err := dec.ReadToken() + if err != nil { + return err + } + k := tok.Kind() + switch k { + case 'n': + va.SetZero() + return nil + case '[': + once.Do(init) + unmarshal := valFncs.unmarshal + if uo.Unmarshalers != nil { + unmarshal, _ = uo.Unmarshalers.(*Unmarshalers).lookup(unmarshal, t.Elem()) + } + var i int + for dec.PeekKind() != ']' { + if i >= n { + if uo.Flags.Get(jsonflags.UnmarshalArrayFromAnyLength) { + if err := dec.SkipValue(); err != nil { + return err + } + continue + } + err := errors.New("too many array elements") + return &SemanticError{action: "unmarshal", GoType: t, Err: err} + } + v := addressableValue{va.Index(i)} // indexed array element is addressable if array is addressable + v.SetZero() + if err := unmarshal(dec, v, uo); err != nil { + return err + } + i++ + } + if _, err := dec.ReadToken(); err != nil { + return err + } + if i < n { + if uo.Flags.Get(jsonflags.UnmarshalArrayFromAnyLength) { + for ; i < n; i++ { + va.Index(i).SetZero() + } + return nil + } + err := errors.New("too few array elements") + return &SemanticError{action: "unmarshal", GoType: t, Err: err} + } + return nil + } + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + return &fncs +} + +func makePointerArshaler(t reflect.Type) *arshaler { + var fncs arshaler + var ( + once sync.Once + valFncs *arshaler + ) + init := func() { + valFncs = lookupArshaler(t.Elem()) + } + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + // Check for cycles. + xe := export.Encoder(enc) + if xe.Tokens.Depth() > startDetectingCyclesAfter { + if err := visitPointer(&xe.SeenPointers, va.Value); err != nil { + return err + } + defer leavePointer(&xe.SeenPointers, va.Value) + } + + // NOTE: Struct.Format is forwarded to underlying marshal. + if va.IsNil() { + return enc.WriteToken(jsontext.Null) + } + once.Do(init) + marshal := valFncs.marshal + if mo.Marshalers != nil { + marshal, _ = mo.Marshalers.(*Marshalers).lookup(marshal, t.Elem()) + } + v := addressableValue{va.Elem()} // dereferenced pointer is always addressable + return marshal(enc, v, mo) + } + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + // NOTE: Struct.Format is forwarded to underlying unmarshal. + if dec.PeekKind() == 'n' { + if _, err := dec.ReadToken(); err != nil { + return err + } + va.SetZero() + return nil + } + once.Do(init) + unmarshal := valFncs.unmarshal + if uo.Unmarshalers != nil { + unmarshal, _ = uo.Unmarshalers.(*Unmarshalers).lookup(unmarshal, t.Elem()) + } + if va.IsNil() { + va.Set(reflect.New(t.Elem())) + } + v := addressableValue{va.Elem()} // dereferenced pointer is always addressable + return unmarshal(dec, v, uo) + } + return &fncs +} + +func makeInterfaceArshaler(t reflect.Type) *arshaler { + // NOTE: Values retrieved from an interface are not addressable, + // so we shallow copy the values to make them addressable and + // store them back into the interface afterwards. + + var fncs arshaler + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + xe := export.Encoder(enc) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { + return newInvalidFormatError("marshal", t, mo.Format) + } + if va.IsNil() { + return enc.WriteToken(jsontext.Null) + } + v := newAddressableValue(va.Elem().Type()) + v.Set(va.Elem()) + marshal := lookupArshaler(v.Type()).marshal + if mo.Marshalers != nil { + marshal, _ = mo.Marshalers.(*Marshalers).lookup(marshal, v.Type()) + } + // Optimize for the any type if there are no special options. + if optimizeCommon && + t == anyType && !mo.Flags.Get(jsonflags.StringifyNumbers) && mo.Format == "" && + (mo.Marshalers == nil || !mo.Marshalers.(*Marshalers).fromAny) { + return marshalValueAny(enc, va.Elem().Interface(), mo) + } + return marshal(enc, v, mo) + } + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { + return newInvalidFormatError("unmarshal", t, uo.Format) + } + if dec.PeekKind() == 'n' { + if _, err := dec.ReadToken(); err != nil { + return err + } + va.SetZero() + return nil + } + var v addressableValue + if va.IsNil() { + // Optimize for the any type if there are no special options. + // We do not care about stringified numbers since JSON strings + // are always unmarshaled into an any value as Go strings. + // Duplicate name check must be enforced since unmarshalValueAny + // does not implement merge semantics. + if optimizeCommon && + t == anyType && !xd.Flags.Get(jsonflags.AllowDuplicateNames) && uo.Format == "" && + (uo.Unmarshalers == nil || !uo.Unmarshalers.(*Unmarshalers).fromAny) { + v, err := unmarshalValueAny(dec, uo) + // We must check for nil interface values up front. + // See https://go.dev/issue/52310. + if v != nil { + va.Set(reflect.ValueOf(v)) + } + return err + } + + k := dec.PeekKind() + if !isAnyType(t) { + err := errors.New("cannot derive concrete type for non-empty interface") + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t, Err: err} + } + switch k { + case 'f', 't': + v = newAddressableValue(boolType) + case '"': + v = newAddressableValue(stringType) + case '0': + v = newAddressableValue(float64Type) + case '{': + v = newAddressableValue(mapStringAnyType) + case '[': + v = newAddressableValue(sliceAnyType) + default: + // If k is invalid (e.g., due to an I/O or syntax error), then + // that will be cached by PeekKind and returned by ReadValue. + // If k is '}' or ']', then ReadValue must error since + // those are invalid kinds at the start of a JSON value. + _, err := dec.ReadValue() + return err + } + } else { + // Shallow copy the existing value to keep it addressable. + // Any mutations at the top-level of the value will be observable + // since we always store this value back into the interface value. + v = newAddressableValue(va.Elem().Type()) + v.Set(va.Elem()) + } + unmarshal := lookupArshaler(v.Type()).unmarshal + if uo.Unmarshalers != nil { + unmarshal, _ = uo.Unmarshalers.(*Unmarshalers).lookup(unmarshal, v.Type()) + } + err := unmarshal(dec, v, uo) + va.Set(v.Value) + return err + } + return &fncs +} + +// isAnyType reports wether t is equivalent to the any interface type. +func isAnyType(t reflect.Type) bool { + // This is forward compatible if the Go language permits type sets within + // ordinary interfaces where an interface with zero methods does not + // necessarily mean it can hold every possible Go type. + // See https://go.dev/issue/45346. + return t == anyType || anyType.Implements(t) +} + +func makeInvalidArshaler(t reflect.Type) *arshaler { + var fncs arshaler + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + return &SemanticError{action: "marshal", GoType: t} + } + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + return &SemanticError{action: "unmarshal", GoType: t} + } + return &fncs +} + +func newInvalidFormatError(action string, t reflect.Type, format string) error { + err := fmt.Errorf("invalid format flag: %q", format) + return &SemanticError{action: action, GoType: t, Err: err} +} + +func stringOrNumberKind(isString bool) jsontext.Kind { + if isString { + return '"' + } else { + return '0' + } +} + +type uintSet64 uint64 + +func (s uintSet64) has(i uint) bool { return s&(1< 0 } + +func (s *uintSet64) set(i uint) { *s |= 1 << i } + +// uintSet is a set of unsigned integers. +// It is optimized for most integers being close to zero. +type uintSet struct { + lo uintSet64 + hi []uintSet64 +} + +// has reports whether i is in the set. +func (s *uintSet) has(i uint) bool { + if i < 64 { + return s.lo.has(i) + } else { + i -= 64 + iHi, iLo := int(i/64), i%64 + return iHi < len(s.hi) && s.hi[iHi].has(iLo) + } +} + +// insert inserts i into the set and reports whether it was the first insertion. +func (s *uintSet) insert(i uint) bool { + // TODO: Make this inlinable at least for the lower 64-bit case. + if i < 64 { + has := s.lo.has(i) + s.lo.set(i) + return !has + } else { + i -= 64 + iHi, iLo := int(i/64), i%64 + if iHi >= len(s.hi) { + s.hi = append(s.hi, make([]uintSet64, iHi+1-len(s.hi))...) + s.hi = s.hi[:cap(s.hi)] + } + has := s.hi[iHi].has(iLo) + s.hi[iHi].set(iLo) + return !has + } +} + +// SkipFunc may be returned by [MarshalFuncV2] and [UnmarshalFuncV2] functions. +// +// Any function that returns SkipFunc must not cause observable side effects +// on the provided [jsontext.Encoder] or [jsontext.Decoder]. +// For example, it is permissible to call [jsontext.Decoder.PeekKind], +// but not permissible to call [jsontext.Decoder.ReadToken] or +// [jsontext.Encoder.WriteToken] since such methods mutate the state. +var SkipFunc = errors.New("json: skip function") + +// Marshalers is a list of functions that may override the marshal behavior +// of specific types. Populate [WithMarshalers] to use it with +// [Marshal], [MarshalWrite], or [MarshalEncode]. +// A nil *Marshalers is equivalent to an empty list. +// There are no exported fields or methods on Marshalers. +type Marshalers = typedMarshalers + +// NewMarshalers constructs a flattened list of marshal functions. +// If multiple functions in the list are applicable for a value of a given type, +// then those earlier in the list take precedence over those that come later. +// If a function returns [SkipFunc], then the next applicable function is called, +// otherwise the default marshaling behavior is used. +// +// For example: +// +// m1 := NewMarshalers(f1, f2) +// m2 := NewMarshalers(f0, m1, f3) // equivalent to m3 +// m3 := NewMarshalers(f0, f1, f2, f3) // equivalent to m2 +func NewMarshalers(ms ...*Marshalers) *Marshalers { + return newMarshalers(ms...) +} + +// Unmarshalers is a list of functions that may override the unmarshal behavior +// of specific types. Populate [WithUnmarshalers] to use it with +// [Unmarshal], [UnmarshalRead], or [UnmarshalDecode]. +// A nil *Unmarshalers is equivalent to an empty list. +// There are no exported fields or methods on Unmarshalers. +type Unmarshalers = typedUnmarshalers + +// NewUnmarshalers constructs a flattened list of unmarshal functions. +// If multiple functions in the list are applicable for a value of a given type, +// then those earlier in the list take precedence over those that come later. +// If a function returns [SkipFunc], then the next applicable function is called, +// otherwise the default unmarshaling behavior is used. +// +// For example: +// +// u1 := NewUnmarshalers(f1, f2) +// u2 := NewUnmarshalers(f0, u1, f3) // equivalent to u3 +// u3 := NewUnmarshalers(f0, f1, f2, f3) // equivalent to u2 +func NewUnmarshalers(us ...*Unmarshalers) *Unmarshalers { + return newUnmarshalers(us...) +} + +type typedMarshalers = typedArshalers[jsontext.Encoder] + +type typedUnmarshalers = typedArshalers[jsontext.Decoder] + +type typedArshalers[Coder any] struct { + nonComparable + + fncVals []typedArshaler[Coder] + fncCache sync.Map // map[reflect.Type]arshaler + + // fromAny reports whether any of Go types used to represent arbitrary JSON + // (i.e., any, bool, string, float64, map[string]any, or []any) matches + // any of the provided type-specific arshalers. + // + // This bit of information is needed in arshal_default.go to determine + // whether to use the specialized logic in arshal_any.go to handle + // the any interface type. The logic in arshal_any.go does not support + // type-specific arshal functions, so we must avoid using that logic + // if this is true. + fromAny bool +} + +type typedMarshaler = typedArshaler[jsontext.Encoder] + +type typedUnmarshaler = typedArshaler[jsontext.Decoder] + +type typedArshaler[Coder any] struct { + typ reflect.Type + fnc func(*Coder, addressableValue, *jsonopts.Struct) error + maySkip bool +} + +func newMarshalers(ms ...*Marshalers) *Marshalers { return newTypedArshalers(ms...) } + +func newUnmarshalers(us ...*Unmarshalers) *Unmarshalers { return newTypedArshalers(us...) } + +func newTypedArshalers[Coder any](as ...*typedArshalers[Coder]) *typedArshalers[Coder] { + var a typedArshalers[Coder] + for _, a2 := range as { + if a2 != nil { + a.fncVals = append(a.fncVals, a2.fncVals...) + a.fromAny = a.fromAny || a2.fromAny + } + } + if len(a.fncVals) == 0 { + return nil + } + return &a +} + +func (a *typedArshalers[Coder]) lookup(fnc func(*Coder, addressableValue, *jsonopts.Struct) error, t reflect.Type) (func(*Coder, addressableValue, *jsonopts.Struct) error, bool) { + if a == nil { + return fnc, false + } + if v, ok := a.fncCache.Load(t); ok { + if v == nil { + return fnc, false + } + return v.(func(*Coder, addressableValue, *jsonopts.Struct) error), true + } + + // Collect a list of arshalers that can be called for this type. + // This list may be longer than 1 since some arshalers can be skipped. + var fncs []func(*Coder, addressableValue, *jsonopts.Struct) error + for _, fncVal := range a.fncVals { + if !castableTo(t, fncVal.typ) { + continue + } + fncs = append(fncs, fncVal.fnc) + if !fncVal.maySkip { + break // subsequent arshalers will never be called + } + } + + if len(fncs) == 0 { + a.fncCache.Store(t, nil) // nil to indicate that no funcs found + return fnc, false + } + + // Construct an arshaler that may call every applicable arshaler. + fncDefault := fnc + fnc = func(c *Coder, v addressableValue, o *jsonopts.Struct) error { + for _, fnc := range fncs { + if err := fnc(c, v, o); err != SkipFunc { + return err // may be nil or non-nil + } + } + return fncDefault(c, v, o) + } + + // Use the first stored so duplicate work can be garbage collected. + v, _ := a.fncCache.LoadOrStore(t, fnc) + return v.(func(*Coder, addressableValue, *jsonopts.Struct) error), true +} + +// MarshalFuncV1 constructs a type-specific marshaler that +// specifies how to marshal values of type T. +// T can be any type except a named pointer. +// The function is always provided with a non-nil pointer value +// if T is an interface or pointer type. +// +// The function must marshal exactly one JSON value. +// The value of T must not be retained outside the function call. +// It may not return [SkipFunc]. +func MarshalFuncV1[T any](fn func(T) ([]byte, error)) *Marshalers { + t := reflect.TypeFor[T]() + assertCastableTo(t, true) + typFnc := typedMarshaler{ + typ: t, + fnc: func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + val, err := fn(va.castTo(t).Interface().(T)) + if err != nil { + err = wrapSkipFunc(err, "marshal function of type func(T) ([]byte, error)") + // TODO: Avoid wrapping semantic errors. + return &SemanticError{action: "marshal", GoType: t, Err: err} + } + if err := enc.WriteValue(val); err != nil { + // TODO: Avoid wrapping semantic or I/O errors. + return &SemanticError{action: "marshal", JSONKind: jsontext.Value(val).Kind(), GoType: t, Err: err} + } + return nil + }, + } + return &Marshalers{fncVals: []typedMarshaler{typFnc}, fromAny: castableToFromAny(t)} +} + +// MarshalFuncV2 constructs a type-specific marshaler that +// specifies how to marshal values of type T. +// T can be any type except a named pointer. +// The function is always provided with a non-nil pointer value +// if T is an interface or pointer type. +// +// The function must marshal exactly one JSON value by calling write methods +// on the provided encoder. It may return [SkipFunc] such that marshaling can +// move on to the next marshal function. However, no mutable method calls may +// be called on the encoder if [SkipFunc] is returned. +// The pointer to [jsontext.Encoder], the value of T, and the [Options] value +// must not be retained outside the function call. +func MarshalFuncV2[T any](fn func(*jsontext.Encoder, T, Options) error) *Marshalers { + t := reflect.TypeFor[T]() + assertCastableTo(t, true) + typFnc := typedMarshaler{ + typ: t, + fnc: func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + xe := export.Encoder(enc) + prevDepth, prevLength := xe.Tokens.DepthLength() + xe.Flags.Set(jsonflags.WithinArshalCall | 1) + err := fn(enc, va.castTo(t).Interface().(T), mo) + xe.Flags.Set(jsonflags.WithinArshalCall | 0) + currDepth, currLength := xe.Tokens.DepthLength() + if err == nil && (prevDepth != currDepth || prevLength+1 != currLength) { + err = errors.New("must write exactly one JSON value") + } + if err != nil { + if err == SkipFunc { + if prevDepth == currDepth && prevLength == currLength { + return SkipFunc + } + err = errors.New("must not write any JSON tokens when skipping") + } + // TODO: Avoid wrapping semantic or I/O errors. + return &SemanticError{action: "marshal", GoType: t, Err: err} + } + return nil + }, + maySkip: true, + } + return &Marshalers{fncVals: []typedMarshaler{typFnc}, fromAny: castableToFromAny(t)} +} + +// UnmarshalFuncV1 constructs a type-specific unmarshaler that +// specifies how to unmarshal values of type T. +// T must be an unnamed pointer or an interface type. +// The function is always provided with a non-nil pointer value. +// +// The function must unmarshal exactly one JSON value. +// The input []byte must not be mutated. +// The input []byte and value T must not be retained outside the function call. +// It may not return [SkipFunc]. +func UnmarshalFuncV1[T any](fn func([]byte, T) error) *Unmarshalers { + t := reflect.TypeFor[T]() + assertCastableTo(t, false) + typFnc := typedUnmarshaler{ + typ: t, + fnc: func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + val, err := dec.ReadValue() + if err != nil { + return err // must be a syntactic or I/O error + } + err = fn(val, va.castTo(t).Interface().(T)) + if err != nil { + err = wrapSkipFunc(err, "unmarshal function of type func([]byte, T) error") + // TODO: Avoid wrapping semantic, syntactic, or I/O errors. + return &SemanticError{action: "unmarshal", JSONKind: val.Kind(), GoType: t, Err: err} + } + return nil + }, + } + return &Unmarshalers{fncVals: []typedUnmarshaler{typFnc}, fromAny: castableToFromAny(t)} +} + +// UnmarshalFuncV2 constructs a type-specific unmarshaler that +// specifies how to unmarshal values of type T. +// T must be an unnamed pointer or an interface type. +// The function is always provided with a non-nil pointer value. +// +// The function must unmarshal exactly one JSON value by calling read methods +// on the provided decoder. It may return [SkipFunc] such that unmarshaling can +// move on to the next unmarshal function. However, no mutable method calls may +// be called on the decoder if [SkipFunc] is returned. +// The pointer to [jsontext.Decoder], the value of T, and [Options] value +// must not be retained outside the function call. +func UnmarshalFuncV2[T any](fn func(*jsontext.Decoder, T, Options) error) *Unmarshalers { + t := reflect.TypeFor[T]() + assertCastableTo(t, false) + typFnc := typedUnmarshaler{ + typ: t, + fnc: func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) + prevDepth, prevLength := xd.Tokens.DepthLength() + xd.Flags.Set(jsonflags.WithinArshalCall | 1) + err := fn(dec, va.castTo(t).Interface().(T), uo) + xd.Flags.Set(jsonflags.WithinArshalCall | 0) + currDepth, currLength := xd.Tokens.DepthLength() + if err == nil && (prevDepth != currDepth || prevLength+1 != currLength) { + err = errors.New("must read exactly one JSON value") + } + if err != nil { + if err == SkipFunc { + if prevDepth == currDepth && prevLength == currLength { + return SkipFunc + } + err = errors.New("must not read any JSON tokens when skipping") + } + // TODO: Avoid wrapping semantic, syntactic, or I/O errors. + return &SemanticError{action: "unmarshal", GoType: t, Err: err} + } + return nil + }, + maySkip: true, + } + return &Unmarshalers{fncVals: []typedUnmarshaler{typFnc}, fromAny: castableToFromAny(t)} +} + +// assertCastableTo asserts that "to" is a valid type to be casted to. +// These are the Go types that type-specific arshalers may operate upon. +// +// Let AllTypes be the universal set of all possible Go types. +// This function generally asserts that: +// +// len([from for from in AllTypes if castableTo(from, to)]) > 0 +// +// otherwise it panics. +// +// As a special-case if marshal is false, then we forbid any non-pointer or +// non-interface type since it is almost always a bug trying to unmarshal +// into something where the end-user caller did not pass in an addressable value +// since they will not observe the mutations. +func assertCastableTo(to reflect.Type, marshal bool) { + switch to.Kind() { + case reflect.Interface: + return + case reflect.Pointer: + // Only allow unnamed pointers to be consistent with the fact that + // taking the address of a value produces an unnamed pointer type. + if to.Name() == "" { + return + } + default: + // Technically, non-pointer types are permissible for unmarshal. + // However, they are often a bug since the receiver would be immutable. + // Thus, only allow them for marshaling. + if marshal { + return + } + } + if marshal { + panic(fmt.Sprintf("input type %v must be an interface type, an unnamed pointer type, or a non-pointer type", to)) + } else { + panic(fmt.Sprintf("input type %v must be an interface type or an unnamed pointer type", to)) + } +} + +// castableTo checks whether values of type "from" can be casted to type "to". +// Nil pointer or interface "from" values are never considered castable. +// +// This function must be kept in sync with addressableValue.castTo. +func castableTo(from, to reflect.Type) bool { + switch to.Kind() { + case reflect.Interface: + // TODO: This breaks when ordinary interfaces can have type sets + // since interfaces now exist where only the value form of a type (T) + // implements the interface, but not the pointer variant (*T). + // See https://go.dev/issue/45346. + return reflect.PointerTo(from).Implements(to) + case reflect.Pointer: + // Common case for unmarshaling. + // From must be a concrete or interface type. + return reflect.PointerTo(from) == to + default: + // Common case for marshaling. + // From must be a concrete type. + return from == to + } +} + +// castTo casts va to the specified type. +// If the type is an interface, then the underlying type will always +// be a non-nil pointer to a concrete type. +// +// Requirement: castableTo(va.Type(), to) must hold. +func (va addressableValue) castTo(to reflect.Type) reflect.Value { + switch to.Kind() { + case reflect.Interface: + return va.Addr().Convert(to) + case reflect.Pointer: + return va.Addr() + default: + return va.Value + } +} + +// castableToFromAny reports whether "to" can be casted to from any +// of the dynamic types used to represent arbitrary JSON. +func castableToFromAny(to reflect.Type) bool { + for _, from := range []reflect.Type{anyType, boolType, stringType, float64Type, mapStringAnyType, sliceAnyType} { + if castableTo(from, to) { + return true + } + } + return false +} + +func wrapSkipFunc(err error, what string) error { + if err == SkipFunc { + return errors.New(what + " cannot be skipped") + } + return err +} + +// This package supports "inlining" a Go struct field, where the contents +// of the serialized field (which must be a JSON object) are treated as if +// they are part of the parent Go struct (which represents a JSON object). +// +// Generally, inlined fields are of a Go struct type, where the fields of the +// nested struct are virtually hoisted up to the parent struct using rules +// similar to how Go embedding works (but operating within the JSON namespace). +// +// However, inlined fields may also be of a Go map type with a string key or +// a jsontext.Value. Such inlined fields are called "fallback" fields since they +// represent any arbitrary JSON object member. Explicitly named fields take +// precedence over the inlined fallback. Only one inlined fallback is allowed. + +var jsontextValueType = reflect.TypeFor[jsontext.Value]() + +// marshalInlinedFallbackAll marshals all the members in an inlined fallback. +func marshalInlinedFallbackAll(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct, f *structField, insertUnquotedName func([]byte) bool) error { + v := addressableValue{va.Field(f.index[0])} // addressable if struct value is addressable + if len(f.index) > 1 { + v = v.fieldByIndex(f.index[1:], false) + if !v.IsValid() { + return nil // implies a nil inlined field + } + } + v = v.indirect(false) + if !v.IsValid() { + return nil + } + + if v.Type() == jsontextValueType { + // TODO(https://go.dev/issue/62121): Use reflect.Value.AssertTo. + b := *v.Addr().Interface().(*jsontext.Value) + if len(b) == 0 { // TODO: Should this be nil? What if it were all whitespace? + return nil + } + + dec := export.GetBufferedDecoder(b) + defer export.PutBufferedDecoder(dec) + xd := export.Decoder(dec) + xd.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1) + + tok, err := dec.ReadToken() + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return &SemanticError{action: "marshal", GoType: jsontextValueType, Err: err} + } + if tok.Kind() != '{' { + err := errors.New("inlined raw value must be a JSON object") + return &SemanticError{action: "marshal", JSONKind: tok.Kind(), GoType: jsontextValueType, Err: err} + } + for dec.PeekKind() != '}' { + // Parse the JSON object name. + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) + if err != nil { + return &SemanticError{action: "marshal", GoType: jsontextValueType, Err: err} + } + if insertUnquotedName != nil { + name := jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) + if !insertUnquotedName(name) { + return export.NewDuplicateNameError(val, 0) + } + } + if err := enc.WriteValue(val); err != nil { + return err + } + + // Parse the JSON object value. + val, err = xd.ReadValue(&flags) + if err != nil { + return &SemanticError{action: "marshal", GoType: jsontextValueType, Err: err} + } + if err := enc.WriteValue(val); err != nil { + return err + } + } + if _, err := dec.ReadToken(); err != nil { + return &SemanticError{action: "marshal", GoType: jsontextValueType, Err: err} + } + if err := xd.CheckEOF(); err != nil { + return &SemanticError{action: "marshal", GoType: jsontextValueType, Err: err} + } + return nil + } else { + m := v // must be a map[string]V + n := m.Len() + if n == 0 { + return nil + } + mk := newAddressableValue(stringType) + mv := newAddressableValue(m.Type().Elem()) + marshalKey := func(mk addressableValue) error { + xe := export.Encoder(enc) + b, err := jsonwire.AppendQuote(enc.UnusedBuffer(), mk.String(), &xe.Flags) + if err != nil { + return err + } + if insertUnquotedName != nil { + isVerbatim := bytes.IndexByte(b, '\\') < 0 + name := jsonwire.UnquoteMayCopy(b, isVerbatim) + if !insertUnquotedName(name) { + return export.NewDuplicateNameError(b, 0) + } + } + return enc.WriteValue(b) + } + marshalVal := f.fncs.marshal + if mo.Marshalers != nil { + marshalVal, _ = mo.Marshalers.(*Marshalers).lookup(marshalVal, mv.Type()) + } + if !mo.Flags.Get(jsonflags.Deterministic) || n <= 1 { + for iter := m.MapRange(); iter.Next(); { + mk.SetIterKey(iter) + if err := marshalKey(mk); err != nil { + return err + } + mv.Set(iter.Value()) + if err := marshalVal(enc, mv, mo); err != nil { + return err + } + } + } else { + names := getStrings(n) + for i, iter := 0, m.Value.MapRange(); i < n && iter.Next(); i++ { + mk.SetIterKey(iter) + (*names)[i] = mk.String() + } + names.Sort() + for _, name := range *names { + mk.SetString(name) + if err := marshalKey(mk); err != nil { + return err + } + // TODO(https://go.dev/issue/57061): Use mv.SetMapIndexOf. + mv.Set(m.MapIndex(mk.Value)) + if err := marshalVal(enc, mv, mo); err != nil { + return err + } + } + putStrings(names) + } + return nil + } +} + +// unmarshalInlinedFallbackNext unmarshals only the next member in an inlined fallback. +func unmarshalInlinedFallbackNext(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct, f *structField, quotedName, unquotedName []byte) error { + v := addressableValue{va.Field(f.index[0])} // addressable if struct value is addressable + if len(f.index) > 1 { + v = v.fieldByIndex(f.index[1:], true) + } + v = v.indirect(true) + + if v.Type() == jsontextValueType { + b := v.Addr().Interface().(*jsontext.Value) + if len(*b) == 0 { // TODO: Should this be nil? What if it were all whitespace? + *b = append(*b, '{') + } else { + *b = jsonwire.TrimSuffixWhitespace(*b) + if jsonwire.HasSuffixByte(*b, '}') { + // TODO: When merging into an object for the first time, + // should we verify that it is valid? + *b = jsonwire.TrimSuffixByte(*b, '}') + *b = jsonwire.TrimSuffixWhitespace(*b) + if !jsonwire.HasSuffixByte(*b, ',') && !jsonwire.HasSuffixByte(*b, '{') { + *b = append(*b, ',') + } + } else { + err := errors.New("inlined raw value must be a JSON object") + return &SemanticError{action: "unmarshal", GoType: jsontextValueType, Err: err} + } + } + *b = append(*b, quotedName...) + *b = append(*b, ':') + val, err := dec.ReadValue() + if err != nil { + return err + } + *b = append(*b, val...) + *b = append(*b, '}') + return nil + } else { + name := string(unquotedName) // TODO: Intern this? + + m := v // must be a map[string]V + if m.IsNil() { + m.Set(reflect.MakeMap(m.Type())) + } + mk := reflect.ValueOf(name) + mv := newAddressableValue(v.Type().Elem()) // TODO: Cache across calls? + if v2 := m.MapIndex(mk); v2.IsValid() { + mv.Set(v2) + } + + unmarshal := f.fncs.unmarshal + if uo.Unmarshalers != nil { + unmarshal, _ = uo.Unmarshalers.(*Unmarshalers).lookup(unmarshal, mv.Type()) + } + err := unmarshal(dec, mv, uo) + m.SetMapIndex(mk, mv.Value) + if err != nil { + return err + } + return nil + } +} + +// Interfaces for custom serialization. +var ( + jsonMarshalerV1Type = reflect.TypeFor[MarshalerV1]() + jsonMarshalerV2Type = reflect.TypeFor[MarshalerV2]() + jsonUnmarshalerV1Type = reflect.TypeFor[UnmarshalerV1]() + jsonUnmarshalerV2Type = reflect.TypeFor[UnmarshalerV2]() + textAppenderType = reflect.TypeFor[encodingTextAppender]() + textMarshalerType = reflect.TypeFor[encoding.TextMarshaler]() + textUnmarshalerType = reflect.TypeFor[encoding.TextUnmarshaler]() + + // TODO(https://go.dev/issue/62384): Use encoding.TextAppender instead of this hack. + // This exists for now to provide performance benefits to netip types. + // There is no semantic difference with this change. + appenderToType = reflect.TypeFor[interface{ AppendTo([]byte) []byte }]() +) + +// TODO(https://go.dev/issue/62384): Use encoding.TextAppender instead +// and document public support for this method in json.Marshal. +type encodingTextAppender interface { + AppendText(b []byte) ([]byte, error) +} + +// MarshalerV1 is implemented by types that can marshal themselves. +// It is recommended that types implement [MarshalerV2] unless the implementation +// is trying to avoid a hard dependency on the "jsontext" package. +// +// It is recommended that implementations return a buffer that is safe +// for the caller to retain and potentially mutate. +type MarshalerV1 interface { + MarshalJSON() ([]byte, error) +} + +// MarshalerV2 is implemented by types that can marshal themselves. +// It is recommended that types implement MarshalerV2 instead of [MarshalerV1] +// since this is both more performant and flexible. +// If a type implements both MarshalerV1 and MarshalerV2, +// then MarshalerV2 takes precedence. In such a case, both implementations +// should aim to have equivalent behavior for the default marshal options. +// +// The implementation must write only one JSON value to the Encoder and +// must not retain the pointer to [jsontext.Encoder] or the [Options] value. +type MarshalerV2 interface { + MarshalJSONV2(*jsontext.Encoder, Options) error + + // TODO: Should users call the MarshalEncode function or + // should/can they call this method directly? Does it matter? +} + +// UnmarshalerV1 is implemented by types that can unmarshal themselves. +// It is recommended that types implement [UnmarshalerV2] unless the implementation +// is trying to avoid a hard dependency on the "jsontext" package. +// +// The input can be assumed to be a valid encoding of a JSON value +// if called from unmarshal functionality in this package. +// UnmarshalJSON must copy the JSON data if it is retained after returning. +// It is recommended that UnmarshalJSON implement merge semantics when +// unmarshaling into a pre-populated value. +// +// Implementations must not retain or mutate the input []byte. +type UnmarshalerV1 interface { + UnmarshalJSON([]byte) error +} + +// UnmarshalerV2 is implemented by types that can unmarshal themselves. +// It is recommended that types implement UnmarshalerV2 instead of [UnmarshalerV1] +// since this is both more performant and flexible. +// If a type implements both UnmarshalerV1 and UnmarshalerV2, +// then UnmarshalerV2 takes precedence. In such a case, both implementations +// should aim to have equivalent behavior for the default unmarshal options. +// +// The implementation must read only one JSON value from the Decoder. +// It is recommended that UnmarshalJSONV2 implement merge semantics when +// unmarshaling into a pre-populated value. +// +// Implementations must not retain the pointer to [jsontext.Decoder] or +// the [Options] value. +type UnmarshalerV2 interface { + UnmarshalJSONV2(*jsontext.Decoder, Options) error + + // TODO: Should users call the UnmarshalDecode function or + // should/can they call this method directly? Does it matter? +} + +func makeMethodArshaler(fncs *arshaler, t reflect.Type) *arshaler { + // Avoid injecting method arshaler on the pointer or interface version + // to avoid ever calling the method on a nil pointer or interface receiver. + // Let it be injected on the value receiver (which is always addressable). + if t.Kind() == reflect.Pointer || t.Kind() == reflect.Interface { + return fncs + } + + // Handle custom marshaler. + switch which := implementsWhich(t, jsonMarshalerV2Type, jsonMarshalerV1Type, textAppenderType, textMarshalerType); which { + case jsonMarshalerV2Type: + fncs.nonDefault = true + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + xe := export.Encoder(enc) + prevDepth, prevLength := xe.Tokens.DepthLength() + xe.Flags.Set(jsonflags.WithinArshalCall | 1) + err := va.Addr().Interface().(MarshalerV2).MarshalJSONV2(enc, mo) + xe.Flags.Set(jsonflags.WithinArshalCall | 0) + currDepth, currLength := xe.Tokens.DepthLength() + if (prevDepth != currDepth || prevLength+1 != currLength) && err == nil { + err = errors.New("must write exactly one JSON value") + } + if err != nil { + err = wrapSkipFunc(err, "marshal method") + // TODO: Avoid wrapping semantic or I/O errors. + return &SemanticError{action: "marshal", GoType: t, Err: err} + } + return nil + } + case jsonMarshalerV1Type: + fncs.nonDefault = true + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + marshaler := va.Addr().Interface().(MarshalerV1) + val, err := marshaler.MarshalJSON() + if err != nil { + err = wrapSkipFunc(err, "marshal method") + // TODO: Avoid wrapping semantic errors. + return &SemanticError{action: "marshal", GoType: t, Err: err} + } + if err := enc.WriteValue(val); err != nil { + // TODO: Avoid wrapping semantic or I/O errors. + return &SemanticError{action: "marshal", JSONKind: jsontext.Value(val).Kind(), GoType: t, Err: err} + } + return nil + } + case textAppenderType: + fncs.nonDefault = true + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) (err error) { + appender := va.Addr().Interface().(encodingTextAppender) + if err := export.Encoder(enc).AppendRaw('"', false, appender.AppendText); err != nil { + // TODO: Avoid wrapping semantic, syntactic, or I/O errors. + err = wrapSkipFunc(err, "append method") + return &SemanticError{action: "marshal", JSONKind: '"', GoType: t, Err: err} + } + return nil + } + case textMarshalerType: + fncs.nonDefault = true + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + marshaler := va.Addr().Interface().(encoding.TextMarshaler) + if err := export.Encoder(enc).AppendRaw('"', false, func(b []byte) ([]byte, error) { + b2, err := marshaler.MarshalText() + return append(b, b2...), err + }); err != nil { + // TODO: Avoid wrapping semantic, syntactic, or I/O errors. + err = wrapSkipFunc(err, "marshal method") + return &SemanticError{action: "marshal", JSONKind: '"', GoType: t, Err: err} + } + return nil + } + // TODO(https://go.dev/issue/62384): Rely on encoding.TextAppender instead. + if implementsWhich(t, appenderToType) != nil && t.PkgPath() == "net/netip" { + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + appender := va.Addr().Interface().(interface{ AppendTo([]byte) []byte }) + if err := export.Encoder(enc).AppendRaw('"', false, func(b []byte) ([]byte, error) { + return appender.AppendTo(b), nil + }); err != nil { + // TODO: Avoid wrapping semantic, syntactic, or I/O errors. + err = wrapSkipFunc(err, "append method") + return &SemanticError{action: "marshal", JSONKind: '"', GoType: t, Err: err} + } + return nil + } + } + } + + // Handle custom unmarshaler. + switch which := implementsWhich(t, jsonUnmarshalerV2Type, jsonUnmarshalerV1Type, textUnmarshalerType); which { + case jsonUnmarshalerV2Type: + fncs.nonDefault = true + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) + prevDepth, prevLength := xd.Tokens.DepthLength() + xd.Flags.Set(jsonflags.WithinArshalCall | 1) + err := va.Addr().Interface().(UnmarshalerV2).UnmarshalJSONV2(dec, uo) + xd.Flags.Set(jsonflags.WithinArshalCall | 0) + currDepth, currLength := xd.Tokens.DepthLength() + if (prevDepth != currDepth || prevLength+1 != currLength) && err == nil { + err = errors.New("must read exactly one JSON value") + } + if err != nil { + err = wrapSkipFunc(err, "unmarshal method") + // TODO: Avoid wrapping semantic, syntactic, or I/O errors. + return &SemanticError{action: "unmarshal", GoType: t, Err: err} + } + return nil + } + case jsonUnmarshalerV1Type: + fncs.nonDefault = true + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + val, err := dec.ReadValue() + if err != nil { + return err // must be a syntactic or I/O error + } + unmarshaler := va.Addr().Interface().(UnmarshalerV1) + if err := unmarshaler.UnmarshalJSON(val); err != nil { + err = wrapSkipFunc(err, "unmarshal method") + // TODO: Avoid wrapping semantic, syntactic, or I/O errors. + return &SemanticError{action: "unmarshal", JSONKind: val.Kind(), GoType: t, Err: err} + } + return nil + } + case textUnmarshalerType: + fncs.nonDefault = true + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) + if err != nil { + return err // must be a syntactic or I/O error + } + if val.Kind() != '"' { + err = errors.New("JSON value must be string type") + return &SemanticError{action: "unmarshal", JSONKind: val.Kind(), GoType: t, Err: err} + } + s := jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) + unmarshaler := va.Addr().Interface().(encoding.TextUnmarshaler) + if err := unmarshaler.UnmarshalText(s); err != nil { + err = wrapSkipFunc(err, "unmarshal method") + // TODO: Avoid wrapping semantic, syntactic, or I/O errors. + return &SemanticError{action: "unmarshal", JSONKind: val.Kind(), GoType: t, Err: err} + } + return nil + } + } + + return fncs +} + +// implementsWhich is like t.Implements(ifaceType) for a list of interfaces, +// but checks whether either t or reflect.PointerTo(t) implements the interface. +func implementsWhich(t reflect.Type, ifaceTypes ...reflect.Type) (which reflect.Type) { + for _, ifaceType := range ifaceTypes { + if t.Implements(ifaceType) || reflect.PointerTo(t).Implements(ifaceType) { + return ifaceType + } + } + return nil +} + +var ( + timeDurationType = reflect.TypeFor[time.Duration]() + timeTimeType = reflect.TypeFor[time.Time]() +) + +func makeTimeArshaler(fncs *arshaler, t reflect.Type) *arshaler { + // Ideally, time types would implement MarshalerV2 and UnmarshalerV2, + // but that would incur a dependency on package json from package time. + // Given how widely used time is, it is more acceptable that we incur a + // dependency on time from json. + // + // Injecting the arshaling functionality like this will not be identical + // to actually declaring methods on the time types since embedding of the + // time types will not be able to forward this functionality. + switch t { + case timeDurationType: + fncs.nonDefault = true + marshalNano := fncs.marshal + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error { + xe := export.Encoder(enc) + var m durationArshaler + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { + if !m.initFormat(mo.Format) { + return newInvalidFormatError("marshal", t, mo.Format) + } + } else if mo.Flags.Get(jsonflags.FormatTimeDurationAsNanosecond) { + return marshalNano(enc, va, mo) + } + + // TODO(https://go.dev/issue/62121): Use reflect.Value.AssertTo. + m.td = *va.Addr().Interface().(*time.Duration) + k := stringOrNumberKind(!m.isNumeric() || mo.Flags.Get(jsonflags.StringifyNumbers)) + if err := xe.AppendRaw(k, true, m.appendMarshal); err != nil { + return &SemanticError{action: "marshal", GoType: t, Err: err} + } + return nil + } + unmarshalNano := fncs.unmarshal + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) + var u durationArshaler + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { + if !u.initFormat(uo.Format) { + return newInvalidFormatError("unmarshal", t, uo.Format) + } + } else if uo.Flags.Get(jsonflags.FormatTimeDurationAsNanosecond) { + return unmarshalNano(dec, va, uo) + } + + var flags jsonwire.ValueFlags + td := va.Addr().Interface().(*time.Duration) + val, err := xd.ReadValue(&flags) + if err != nil { + return err + } + switch k := val.Kind(); k { + case 'n': + *td = time.Duration(0) + return nil + case '"': + if u.isNumeric() && !uo.Flags.Get(jsonflags.StringifyNumbers) { + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) + if err := u.unmarshal(val); err != nil { + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t, Err: err} + } + *td = u.td + return nil + case '0': + if !u.isNumeric() { + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + if err := u.unmarshal(val); err != nil { + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t, Err: err} + } + *td = u.td + return nil + default: + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + } + case timeTimeType: + fncs.nonDefault = true + fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) (err error) { + xe := export.Encoder(enc) + var m timeArshaler + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { + if !m.initFormat(mo.Format) { + return newInvalidFormatError("marshal", t, mo.Format) + } + } + + // TODO(https://go.dev/issue/62121): Use reflect.Value.AssertTo. + m.tt = *va.Addr().Interface().(*time.Time) + k := stringOrNumberKind(!m.isNumeric() || mo.Flags.Get(jsonflags.StringifyNumbers)) + if err := xe.AppendRaw(k, !m.hasCustomFormat(), m.appendMarshal); err != nil { + return &SemanticError{action: "marshal", GoType: t, Err: err} + } + return nil + } + fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) (err error) { + xd := export.Decoder(dec) + var u timeArshaler + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { + if !u.initFormat(uo.Format) { + return newInvalidFormatError("unmarshal", t, uo.Format) + } + } + + var flags jsonwire.ValueFlags + tt := va.Addr().Interface().(*time.Time) + val, err := xd.ReadValue(&flags) + if err != nil { + return err + } + switch k := val.Kind(); k { + case 'n': + *tt = time.Time{} + return nil + case '"': + if u.isNumeric() && !uo.Flags.Get(jsonflags.StringifyNumbers) { + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) + if err := u.unmarshal(val); err != nil { + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t, Err: err} + } + *tt = u.tt + return nil + case '0': + if !u.isNumeric() { + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + if err := u.unmarshal(val); err != nil { + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t, Err: err} + } + *tt = u.tt + return nil + default: + return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t} + } + } + } + return fncs +} + +type durationArshaler struct { + td time.Duration + + // base records the representation where: + // - 0 uses time.Duration.String + // - 1e0, 1e3, 1e6, or 1e9 use a decimal encoding of the duration as + // nanoseconds, microseconds, milliseconds, or seconds. + // - 60 uses a "H:MM:SS.SSSSSSSSS" encoding + base uint +} + +func (a *durationArshaler) initFormat(format string) (ok bool) { + switch format { + case "units": + a.base = 0 + case "sec": + a.base = 1e9 + case "milli": + a.base = 1e6 + case "micro": + a.base = 1e3 + case "nano": + a.base = 1e0 + case "base60": // see https://en.wikipedia.org/wiki/Sexagesimal#Modern_usage + a.base = 60 + default: + return false + } + return true +} + +func (a *durationArshaler) isNumeric() bool { + return a.base != 0 && a.base != 60 +} + +func (a *durationArshaler) appendMarshal(b []byte) ([]byte, error) { + switch a.base { + case 0: + return append(b, a.td.String()...), nil + case 60: + return appendDurationBase60(b, a.td), nil + default: + return appendDurationBase10(b, a.td, a.base), nil + } +} + +func (a *durationArshaler) unmarshal(b []byte) (err error) { + switch a.base { + case 0: + a.td, err = time.ParseDuration(string(b)) + case 60: + a.td, err = parseDurationBase60(b) + default: + a.td, err = parseDurationBase10(b, a.base) + } + return err +} + +type timeArshaler struct { + tt time.Time + + // base records the representation where: + // - 0 uses RFC 3339 encoding of the timestamp + // - 1e0, 1e3, 1e6, or 1e9 use a decimal encoding of the timestamp as + // seconds, milliseconds, microseconds, or nanoseconds since Unix epoch. + // - math.MaxUint uses time.Time.Format to encode the timestamp + base uint + format string // time format passed to time.Parse +} + +func (a *timeArshaler) initFormat(format string) bool { + // We assume that an exported constant in the time package will + // always start with an uppercase ASCII letter. + if len(format) == 0 { + return false + } + a.base = math.MaxUint // implies custom format + if c := format[0]; !('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z') { + a.format = format + return true + } + switch format { + case "ANSIC": + a.format = time.ANSIC + case "UnixDate": + a.format = time.UnixDate + case "RubyDate": + a.format = time.RubyDate + case "RFC822": + a.format = time.RFC822 + case "RFC822Z": + a.format = time.RFC822Z + case "RFC850": + a.format = time.RFC850 + case "RFC1123": + a.format = time.RFC1123 + case "RFC1123Z": + a.format = time.RFC1123Z + case "RFC3339": + a.base = 0 + a.format = time.RFC3339 + case "RFC3339Nano": + a.base = 0 + a.format = time.RFC3339Nano + case "Kitchen": + a.format = time.Kitchen + case "Stamp": + a.format = time.Stamp + case "StampMilli": + a.format = time.StampMilli + case "StampMicro": + a.format = time.StampMicro + case "StampNano": + a.format = time.StampNano + case "DateTime": + a.format = time.DateTime + case "DateOnly": + a.format = time.DateOnly + case "TimeOnly": + a.format = time.TimeOnly + case "unix": + a.base = 1e0 + case "unixmilli": + a.base = 1e3 + case "unixmicro": + a.base = 1e6 + case "unixnano": + a.base = 1e9 + default: + // Reject any Go identifier in case new constants are supported. + if strings.TrimFunc(format, isLetterOrDigit) == "" { + return false + } + a.format = format + } + return true +} + +func (a *timeArshaler) isNumeric() bool { + return int(a.base) > 0 +} + +func (a *timeArshaler) hasCustomFormat() bool { + return a.base == math.MaxUint +} + +func (a *timeArshaler) appendMarshal(b []byte) ([]byte, error) { + switch a.base { + case 0: + // TODO(https://go.dev/issue/60204): Use cmp.Or(a.format, time.RFC3339Nano). + format := a.format + if format == "" { + format = time.RFC3339Nano + } + n0 := len(b) + b = a.tt.AppendFormat(b, format) + // Not all Go timestamps can be represented as valid RFC 3339. + // Explicitly check for these edge cases. + // See https://go.dev/issue/4556 and https://go.dev/issue/54580. + switch b := b[n0:]; { + case b[len("9999")] != '-': // year must be exactly 4 digits wide + return b, errors.New("year outside of range [0,9999]") + case b[len(b)-1] != 'Z': + c := b[len(b)-len("Z07:00")] + if ('0' <= c && c <= '9') || parseDec2(b[len(b)-len("07:00"):]) >= 24 { + return b, errors.New("timezone hour outside of range [0,23]") + } + } + return b, nil + case math.MaxUint: + return a.tt.AppendFormat(b, a.format), nil + default: + return appendTimeUnix(b, a.tt, a.base), nil + } +} + +func (a *timeArshaler) unmarshal(b []byte) (err error) { + switch a.base { + case 0: + // Use time.Time.UnmarshalText to avoid possible string allocation. + if err := a.tt.UnmarshalText(b); err != nil { + return err + } + // TODO(https://go.dev/issue/57912): + // RFC 3339 specifies the grammar for a valid timestamp. + // However, the parsing functionality in "time" is too loose and + // incorrectly accepts invalid timestamps as valid. + // Remove these manual checks when "time" checks it for us. + newParseError := func(layout, value, layoutElem, valueElem, message string) error { + return &time.ParseError{Layout: layout, Value: value, LayoutElem: layoutElem, ValueElem: valueElem, Message: message} + } + switch { + case b[len("2006-01-02T")+1] == ':': // hour must be two digits + return newParseError(time.RFC3339, string(b), "15", string(b[len("2006-01-02T"):][:1]), "") + case b[len("2006-01-02T15:04:05")] == ',': // sub-second separator must be a period + return newParseError(time.RFC3339, string(b), ".", ",", "") + case b[len(b)-1] != 'Z': + switch { + case parseDec2(b[len(b)-len("07:00"):]) >= 24: // timezone hour must be in range + return newParseError(time.RFC3339, string(b), "Z07:00", string(b[len(b)-len("Z07:00"):]), ": timezone hour out of range") + case parseDec2(b[len(b)-len("00"):]) >= 60: // timezone minute must be in range + return newParseError(time.RFC3339, string(b), "Z07:00", string(b[len(b)-len("Z07:00"):]), ": timezone minute out of range") + } + } + return nil + case math.MaxUint: + a.tt, err = time.Parse(a.format, string(b)) + return err + default: + a.tt, err = parseTimeUnix(b, a.base) + return err + } +} + +// appendDurationBase10 appends d formatted as a decimal fractional number, +// where pow10 is a power-of-10 used to scale down the number. +func appendDurationBase10(b []byte, d time.Duration, pow10 uint) []byte { + b, n := mayAppendDurationSign(b, d) // append sign + whole, frac := bits.Div64(0, n, uint64(pow10)) // compute whole and frac fields + b = strconv.AppendUint(b, whole, 10) // append whole field + return appendFracBase10(b, uint(frac), pow10) // append frac field +} + +// parseDurationBase10 parses d from a decimal fractional number, +// where pow10 is a power-of-10 used to scale up the number. +func parseDurationBase10(b []byte, pow10 uint) (time.Duration, error) { + suffix, neg := consumeSign(b) // consume sign + wholeBytes, fracBytes := bytesCutByte(suffix, '.', true) // consume whole and frac fields + whole, okWhole := jsonwire.ParseUint(wholeBytes) // parse whole field; may overflow + frac, okFrac := parseFracBase10(fracBytes, pow10) // parse frac field + hi, lo := bits.Mul64(whole, uint64(pow10)) // overflow if hi > 0 + sum, co := bits.Add64(lo, uint64(frac), 0) // overflow if co > 0 + switch d := mayApplyDurationSign(sum, neg); { // overflow if neg != (d < 0) + case (!okWhole && whole != math.MaxUint64) || !okFrac: + return 0, fmt.Errorf("invalid duration %q: %w", b, strconv.ErrSyntax) + case !okWhole || hi > 0 || co > 0 || neg != (d < 0): + return 0, fmt.Errorf("invalid duration %q: %w", b, strconv.ErrRange) + default: + return d, nil + } +} + +// appendDurationBase60 appends d formatted with H:MM:SS.SSS notation. +func appendDurationBase60(b []byte, d time.Duration) []byte { + b, n := mayAppendDurationSign(b, d) // append sign + n, nsec := bits.Div64(0, n, 1e9) // compute nsec field + n, sec := bits.Div64(0, n, 60) // compute sec field + hour, min := bits.Div64(0, n, 60) // compute hour and min fields + b = strconv.AppendUint(b, hour, 10) // append hour field + b = append(b, ':', '0'+byte(min/10), '0'+byte(min%10)) // append min field + b = append(b, ':', '0'+byte(sec/10), '0'+byte(sec%10)) // append sec field + return appendFracBase10(b, uint(nsec), 1e9) // append nsec field +} + +// parseDurationBase60 parses d formatted with H:MM:SS.SSS notation. +// The exact grammar is `-?(0|[1-9][0-9]*):[0-5][0-9]:[0-5][0-9]([.][0-9]+)?`. +func parseDurationBase60(b []byte) (time.Duration, error) { + checkBase60 := func(b []byte) bool { + return len(b) == 2 && ('0' <= b[0] && b[0] <= '5') && '0' <= b[1] && b[1] <= '9' + } + suffix, neg := consumeSign(b) // consume sign + hourBytes, suffix := bytesCutByte(suffix, ':', false) // consume hour field + minBytes, suffix := bytesCutByte(suffix, ':', false) // consume min field + secBytes, nsecBytes := bytesCutByte(suffix, '.', true) // consume sec and nsec fields + hour, okHour := jsonwire.ParseUint(hourBytes) // parse hour field; may overflow + min := parseDec2(minBytes) // parse min field + sec := parseDec2(secBytes) // parse sec field + nsec, okNsec := parseFracBase10(nsecBytes, 1e9) // parse nsec field + n := uint64(min)*60*1e9 + uint64(sec)*1e9 + uint64(nsec) // cannot overflow + hi, lo := bits.Mul64(hour, 60*60*1e9) // overflow if hi > 0 + sum, co := bits.Add64(lo, n, 0) // overflow if co > 0 + switch d := mayApplyDurationSign(sum, neg); { // overflow if neg != (d < 0) + case (!okHour && hour != math.MaxUint64) || !checkBase60(minBytes) || !checkBase60(secBytes) || !okNsec: + return 0, fmt.Errorf("invalid duration %q: %w", b, strconv.ErrSyntax) + case !okHour || hi > 0 || co > 0 || neg != (d < 0): + return 0, fmt.Errorf("invalid duration %q: %w", b, strconv.ErrRange) + default: + return d, nil + } +} + +// mayAppendDurationSign appends a negative sign if n is negative. +func mayAppendDurationSign(b []byte, d time.Duration) ([]byte, uint64) { + if d < 0 { + b = append(b, '-') + d *= -1 + } + return b, uint64(d) +} + +// mayApplyDurationSign inverts n if neg is specified. +func mayApplyDurationSign(n uint64, neg bool) time.Duration { + if neg { + return -1 * time.Duration(n) + } else { + return +1 * time.Duration(n) + } +} + +// appendTimeUnix appends t formatted as a decimal fractional number, +// where pow10 is a power-of-10 used to scale up the number. +func appendTimeUnix(b []byte, t time.Time, pow10 uint) []byte { + sec, nsec := t.Unix(), int64(t.Nanosecond()) + if sec < 0 { + b = append(b, '-') + sec, nsec = negateSecNano(sec, nsec) + } + switch { + case pow10 == 1e0: // fast case where units is in seconds + b = strconv.AppendUint(b, uint64(sec), 10) + return appendFracBase10(b, uint(nsec), 1e9) + case uint64(sec) < 1e9: // intermediate case where units is not seconds, but no overflow + b = strconv.AppendUint(b, uint64(sec)*uint64(pow10)+uint64(uint(nsec)/(1e9/pow10)), 10) + return appendFracBase10(b, (uint(nsec)*pow10)%1e9, 1e9) + default: // slow case where units is not seconds and overflow would occur + b = strconv.AppendUint(b, uint64(sec), 10) + b = appendPaddedBase10(b, uint(uint(nsec)/(1e9/pow10)), pow10) + return appendFracBase10(b, (uint(nsec)*pow10)%1e9, 1e9) + } +} + +// parseTimeUnix parses t formatted as a decimal fractional number, +// where pow10 is a power-of-10 used to scale down the number. +func parseTimeUnix(b []byte, pow10 uint) (time.Time, error) { + suffix, neg := consumeSign(b) // consume sign + wholeBytes, fracBytes := bytesCutByte(suffix, '.', true) // consume whole and frac fields + whole, okWhole := jsonwire.ParseUint(wholeBytes) // parse whole field; may overflow + frac, okFrac := parseFracBase10(fracBytes, 1e9/pow10) // parse frac field + var sec, nsec int64 + switch { + case pow10 == 1e0: // fast case where units is in seconds + sec = int64(whole) // check overflow later after negation + nsec = int64(frac) // cannot overflow + case okWhole: // intermediate case where units is not seconds, but no overflow + sec = int64(whole / uint64(pow10)) // check overflow later after negation + nsec = int64((uint(whole)%pow10)*(1e9/pow10) + uint(frac)) // cannot overflow + case !okWhole && whole == math.MaxUint64: // slow case where units is not seconds and overflow occurred + width := int(math.Log10(float64(pow10))) // compute len(strconv.Itoa(pow10-1)) + whole, okWhole = jsonwire.ParseUint(wholeBytes[:len(wholeBytes)-width]) // parse the upper whole field + mid, _ := parsePaddedBase10(wholeBytes[len(wholeBytes)-width:], pow10) // parse the lower whole field + sec = int64(whole) // check overflow later after negation + nsec = int64(uint(mid)*(1e9/pow10) + frac) // cannot overflow + } + if neg { + sec, nsec = negateSecNano(sec, nsec) + } + switch t := time.Unix(sec, nsec).UTC(); { + case (!okWhole && whole != math.MaxUint64) || !okFrac: + return time.Time{}, fmt.Errorf("invalid time %q: %w", b, strconv.ErrSyntax) + case !okWhole || neg != (t.Unix() < 0): + return time.Time{}, fmt.Errorf("invalid time %q: %w", b, strconv.ErrRange) + default: + return t, nil + } +} + +// negateSecNano negates a Unix timestamp, where nsec must be within [0, 1e9). +func negateSecNano(sec, nsec int64) (int64, int64) { + sec = ^sec // twos-complement negation (i.e., -1*sec + 1) + nsec = -nsec + 1e9 // negate nsec and add 1e9 (which is the extra +1 from sec negation) + sec += int64(nsec / 1e9) // handle possible overflow of nsec if it started as zero + nsec %= 1e9 // ensure nsec stays within [0, 1e9) + return sec, nsec +} + +// appendFracBase10 appends the fraction of n/max10, +// where max10 is a power-of-10 that is larger than n. +func appendFracBase10(b []byte, n, max10 uint) []byte { + if n == 0 { + return b + } + return bytes.TrimRight(appendPaddedBase10(append(b, '.'), n, max10), "0") +} + +// parseFracBase10 parses the fraction of n/max10, +// where max10 is a power-of-10 that is larger than n. +func parseFracBase10(b []byte, max10 uint) (n uint, ok bool) { + switch { + case len(b) == 0: + return 0, true + case len(b) < len(".0") || b[0] != '.': + return 0, false + } + return parsePaddedBase10(b[len("."):], max10) +} + +// appendPaddedBase10 appends a zero-padded encoding of n, +// where max10 is a power-of-10 that is larger than n. +func appendPaddedBase10(b []byte, n, max10 uint) []byte { + if n < max10/10 { + // Formatting of n is shorter than log10(max10), + // so add max10/10 to ensure the length is equal to log10(max10). + i := len(b) + b = strconv.AppendUint(b, uint64(n+max10/10), 10) + b[i]-- // subtract the addition of max10/10 + return b + } + return strconv.AppendUint(b, uint64(n), 10) +} + +// parsePaddedBase10 parses b as the zero-padded encoding of n, +// where max10 is a power-of-10 that is larger than n. +// Truncated suffix is treated as implicit zeros. +// Extended suffix is ignored, but verified to contain only digits. +func parsePaddedBase10(b []byte, max10 uint) (n uint, ok bool) { + pow10 := uint(1) + for pow10 < max10 { + n *= 10 + if len(b) > 0 { + if b[0] < '0' || '9' < b[0] { + return n, false + } + n += uint(b[0] - '0') + b = b[1:] + } + pow10 *= 10 + } + if len(b) > 0 && len(bytes.TrimRight(b, "0123456789")) > 0 { + return n, false // trailing characters are not digits + } + return n, true +} + +// consumeSign consumes an optional leading negative sign. +func consumeSign(b []byte) ([]byte, bool) { + if len(b) > 0 && b[0] == '-' { + return b[len("-"):], true + } + return b, false +} + +// bytesCutByte is similar to bytes.Cut(b, []byte{c}), +// except c may optionally be included as part of the suffix. +func bytesCutByte(b []byte, c byte, include bool) ([]byte, []byte) { + if i := bytes.IndexByte(b, c); i >= 0 { + if include { + return b[:i], b[i:] + } + return b[:i], b[i+1:] + } + return b, nil +} + +// parseDec2 parses b as an unsigned, base-10, 2-digit number. +// The result is undefined if digits are not base-10. +func parseDec2(b []byte) byte { + if len(b) < 2 { + return 0 + } + return 10*(b[0]-'0') + (b[1] - '0') +} + +// requireKeyedLiterals can be embedded in a struct to require keyed literals. +type requireKeyedLiterals struct{} + +// nonComparable can be embedded in a struct to prevent comparability. +type nonComparable [0]func() + +const errorPrefix = "json: " + +// SemanticError describes an error determining the meaning +// of JSON data as Go data or vice-versa. +// +// The contents of this error as produced by this package may change over time. +type SemanticError struct { + requireKeyedLiterals + nonComparable + + action string // either "marshal" or "unmarshal" + + // ByteOffset indicates that an error occurred after this byte offset. + ByteOffset int64 + // JSONPointer indicates that an error occurred within this JSON value + // as indicated using the JSON Pointer notation (see RFC 6901). + JSONPointer jsontext.Pointer + + // JSONKind is the JSON kind that could not be handled. + JSONKind jsontext.Kind // may be zero if unknown + // GoType is the Go type that could not be handled. + GoType reflect.Type // may be nil if unknown + + // Err is the underlying error. + Err error // may be nil +} + +func (e *SemanticError) Error() string { + var sb strings.Builder + sb.WriteString(errorPrefix) + + // Hyrum-proof the error message by deliberately switching between + // two equivalent renderings of the same error message. + // The randomization is tied to the Hyrum-proofing already applied + // on map iteration in Go. + for phrase := range map[string]struct{}{"cannot": {}, "unable to": {}} { + sb.WriteString(phrase) + break // use whichever phrase we get in the first iteration + } + + // Format action. + var preposition string + switch e.action { + case "marshal": + sb.WriteString(" marshal") + preposition = " from" + case "unmarshal": + sb.WriteString(" unmarshal") + preposition = " into" + default: + sb.WriteString(" handle") + preposition = " with" + } + + // Format JSON kind. + var omitPreposition bool + switch e.JSONKind { + case 'n': + sb.WriteString(" JSON null") + case 'f', 't': + sb.WriteString(" JSON boolean") + case '"': + sb.WriteString(" JSON string") + case '0': + sb.WriteString(" JSON number") + case '{', '}': + sb.WriteString(" JSON object") + case '[', ']': + sb.WriteString(" JSON array") + default: + omitPreposition = true + } + + // Format Go type. + if e.GoType != nil { + if !omitPreposition { + sb.WriteString(preposition) + } + sb.WriteString(" Go value of type ") + sb.WriteString(e.GoType.String()) + } + + // Format where. + switch { + case e.JSONPointer != "": + sb.WriteString(" within JSON value at ") + sb.WriteString(strconv.Quote(string(e.JSONPointer))) + case e.ByteOffset > 0: + sb.WriteString(" after byte offset ") + sb.WriteString(strconv.FormatInt(e.ByteOffset, 10)) + } + + // Format underlying error. + if e.Err != nil { + sb.WriteString(": ") + sb.WriteString(e.Err.Error()) + } + + return sb.String() +} + +func (e *SemanticError) Unwrap() error { + return e.Err +} + +func firstError(errs ...error) error { + for _, err := range errs { + if err != nil { + return err + } + } + return nil +} + +type isZeroer interface { + IsZero() bool +} + +var isZeroerType = reflect.TypeFor[isZeroer]() + +type structFields struct { + flattened []structField // listed in depth-first ordering + byActualName map[string]*structField + byFoldedName map[string][]*structField + inlinedFallback *structField +} + +// lookupByFoldedName looks up name by a case-insensitive match +// that also ignores the presence of dashes and underscores. +func (fs *structFields) lookupByFoldedName(name []byte) []*structField { + return fs.byFoldedName[string(foldName(name))] +} + +type structField struct { + id int // unique numeric ID in breadth-first ordering + index []int // index into a struct according to reflect.Type.FieldByIndex + typ reflect.Type + fncs *arshaler + isZero func(addressableValue) bool + isEmpty func(addressableValue) bool + fieldOptions +} + +func makeStructFields(root reflect.Type) (structFields, *SemanticError) { + // Setup a queue for a breath-first search. + var queueIndex int + type queueEntry struct { + typ reflect.Type + index []int + visitChildren bool // whether to recursively visit inlined field in this struct + } + queue := []queueEntry{{root, nil, true}} + seen := map[reflect.Type]bool{root: true} + + // Perform a breadth-first search over all reachable fields. + // This ensures that len(f.index) will be monotonically increasing. + var allFields, inlinedFallbacks []structField + for queueIndex < len(queue) { + qe := queue[queueIndex] + queueIndex++ + + t := qe.typ + inlinedFallbackIndex := -1 // index of last inlined fallback field in current struct + namesIndex := make(map[string]int) // index of each field with a given JSON object name in current struct + var hasAnyJSONTag bool // whether any Go struct field has a `json` tag + var hasAnyJSONField bool // whether any JSON serializable fields exist in current struct + for i := range t.NumField() { + sf := t.Field(i) + _, hasTag := sf.Tag.Lookup("json") + hasAnyJSONTag = hasAnyJSONTag || hasTag + options, ignored, err := parseFieldOptions(sf) + if err != nil { + return structFields{}, &SemanticError{GoType: t, Err: err} + } else if ignored { + continue + } + hasAnyJSONField = true + f := structField{ + // Allocate a new slice (len=N+1) to hold both + // the parent index (len=N) and the current index (len=1). + // Do this to avoid clobbering the memory of the parent index. + index: append(append(make([]int, 0, len(qe.index)+1), qe.index...), i), + typ: sf.Type, + fieldOptions: options, + } + if sf.Anonymous && !f.hasName { + f.inline = true // implied by use of Go embedding without an explicit name + } + if f.inline || f.unknown { + // Handle an inlined field that serializes to/from + // zero or more JSON object members. + + if f.inline && f.unknown { + err := fmt.Errorf("Go struct field %s cannot have both `inline` and `unknown` specified", sf.Name) + return structFields{}, &SemanticError{GoType: t, Err: err} + } + switch f.fieldOptions { + case fieldOptions{name: f.name, quotedName: f.quotedName, inline: true}: + case fieldOptions{name: f.name, quotedName: f.quotedName, unknown: true}: + default: + err := fmt.Errorf("Go struct field %s cannot have any options other than `inline` or `unknown` specified", sf.Name) + return structFields{}, &SemanticError{GoType: t, Err: err} + } + + // Unwrap one level of pointer indirection similar to how Go + // only allows embedding either T or *T, but not **T. + tf := f.typ + if tf.Kind() == reflect.Pointer && tf.Name() == "" { + tf = tf.Elem() + } + // Reject any types with custom serialization otherwise + // it becomes impossible to know what sub-fields to inline. + if which := implementsWhich(tf, + jsonMarshalerV2Type, jsonMarshalerV1Type, textMarshalerType, + jsonUnmarshalerV2Type, jsonUnmarshalerV1Type, textUnmarshalerType, + ); which != nil && tf != jsontextValueType { + err := fmt.Errorf("inlined Go struct field %s of type %s must not implement JSON marshal or unmarshal methods", sf.Name, tf) + return structFields{}, &SemanticError{GoType: t, Err: err} + } + + // Handle an inlined field that serializes to/from + // a finite number of JSON object members backed by a Go struct. + if tf.Kind() == reflect.Struct { + if f.unknown { + err := fmt.Errorf("inlined Go struct field %s of type %s with `unknown` tag must be a Go map of string key or a jsontext.Value", sf.Name, tf) + return structFields{}, &SemanticError{GoType: t, Err: err} + } + if qe.visitChildren { + queue = append(queue, queueEntry{tf, f.index, !seen[tf]}) + } + seen[tf] = true + continue + } + + // Handle an inlined field that serializes to/from any number of + // JSON object members back by a Go map or jsontext.Value. + switch { + case tf == jsontextValueType: + f.fncs = nil // specially handled in arshal_inlined.go + case tf.Kind() == reflect.Map && tf.Key() == stringType: + f.fncs = lookupArshaler(tf.Elem()) + default: + err := fmt.Errorf("inlined Go struct field %s of type %s must be a Go struct, Go map of string key, or jsontext.Value", sf.Name, tf) + return structFields{}, &SemanticError{GoType: t, Err: err} + } + + // Reject multiple inlined fallback fields within the same struct. + if inlinedFallbackIndex >= 0 { + err := fmt.Errorf("inlined Go struct fields %s and %s cannot both be a Go map or jsontext.Value", t.Field(inlinedFallbackIndex).Name, sf.Name) + return structFields{}, &SemanticError{GoType: t, Err: err} + } + inlinedFallbackIndex = i + + inlinedFallbacks = append(inlinedFallbacks, f) + } else { + // Handle normal Go struct field that serializes to/from + // a single JSON object member. + + // Provide a function that uses a type's IsZero method. + switch { + case sf.Type.Kind() == reflect.Interface && sf.Type.Implements(isZeroerType): + f.isZero = func(va addressableValue) bool { + // Avoid panics calling IsZero on a nil interface or + // non-nil interface with nil pointer. + return va.IsNil() || (va.Elem().Kind() == reflect.Pointer && va.Elem().IsNil()) || va.Interface().(isZeroer).IsZero() + } + case sf.Type.Kind() == reflect.Pointer && sf.Type.Implements(isZeroerType): + f.isZero = func(va addressableValue) bool { + // Avoid panics calling IsZero on nil pointer. + return va.IsNil() || va.Interface().(isZeroer).IsZero() + } + case sf.Type.Implements(isZeroerType): + f.isZero = func(va addressableValue) bool { return va.Interface().(isZeroer).IsZero() } + case reflect.PointerTo(sf.Type).Implements(isZeroerType): + f.isZero = func(va addressableValue) bool { return va.Addr().Interface().(isZeroer).IsZero() } + } + + // Provide a function that can determine whether the value would + // serialize as an empty JSON value. + switch sf.Type.Kind() { + case reflect.String, reflect.Map, reflect.Array, reflect.Slice: + f.isEmpty = func(va addressableValue) bool { return va.Len() == 0 } + case reflect.Pointer, reflect.Interface: + f.isEmpty = func(va addressableValue) bool { return va.IsNil() } + } + + // Reject user-specified names with invalid UTF-8. + if !utf8.ValidString(f.name) { + err := fmt.Errorf("Go struct field %s has JSON object name %q with invalid UTF-8", sf.Name, f.name) + return structFields{}, &SemanticError{GoType: t, Err: err} + } + // Reject multiple fields with same name within the same struct. + if j, ok := namesIndex[f.name]; ok { + err := fmt.Errorf("Go struct fields %s and %s conflict over JSON object name %q", t.Field(j).Name, sf.Name, f.name) + return structFields{}, &SemanticError{GoType: t, Err: err} + } + namesIndex[f.name] = i + + f.id = len(allFields) + f.fncs = lookupArshaler(sf.Type) + allFields = append(allFields, f) + } + } + + // NOTE: New users to the json package are occasionally surprised that + // unexported fields are ignored. This occurs by necessity due to our + // inability to directly introspect such fields with Go reflection + // without the use of unsafe. + // + // To reduce friction here, refuse to serialize any Go struct that + // has no JSON serializable fields, has at least one Go struct field, + // and does not have any `json` tags present. For example, + // errors returned by errors.New would fail to serialize. + isEmptyStruct := t.NumField() == 0 + if !isEmptyStruct && !hasAnyJSONTag && !hasAnyJSONField { + err := errors.New("Go struct has no exported fields") + return structFields{}, &SemanticError{GoType: t, Err: err} + } + } + + // Sort the fields by exact name (breaking ties by depth and + // then by presence of an explicitly provided JSON name). + // Select the dominant field from each set of fields with the same name. + // If multiple fields have the same name, then the dominant field + // is the one that exists alone at the shallowest depth, + // or the one that is uniquely tagged with a JSON name. + // Otherwise, no dominant field exists for the set. + flattened := allFields[:0] + slices.SortFunc(allFields, func(x, y structField) int { + switch { + case x.name != y.name: + return strings.Compare(x.name, y.name) + case len(x.index) != len(y.index): + return cmp.Compare(len(x.index), len(y.index)) + case x.hasName && !y.hasName: + return -1 + case !x.hasName && y.hasName: + return +1 + default: + return 0 // TODO(https://go.dev/issue/61643): Compare bools better. + } + }) + for len(allFields) > 0 { + n := 1 // number of fields with the same exact name + for n < len(allFields) && allFields[n-1].name == allFields[n].name { + n++ + } + if n == 1 || len(allFields[0].index) != len(allFields[1].index) || allFields[0].hasName != allFields[1].hasName { + flattened = append(flattened, allFields[0]) // only keep field if there is a dominant field + } + allFields = allFields[n:] + } + + // Sort the fields according to a breadth-first ordering + // so that we can re-number IDs with the smallest possible values. + // This optimizes use of uintSet such that it fits in the 64-entry bit set. + slices.SortFunc(flattened, func(x, y structField) int { + return cmp.Compare(x.id, y.id) + }) + for i := range flattened { + flattened[i].id = i + } + + // Sort the fields according to a depth-first ordering + // as the typical order that fields are marshaled. + slices.SortFunc(flattened, func(x, y structField) int { + return slices.Compare(x.index, y.index) + }) + + // Compute the mapping of fields in the byActualName map. + // Pre-fold all names so that we can lookup folded names quickly. + fs := structFields{ + flattened: flattened, + byActualName: make(map[string]*structField, len(flattened)), + byFoldedName: make(map[string][]*structField, len(flattened)), + } + for i, f := range fs.flattened { + foldedName := string(foldName([]byte(f.name))) + fs.byActualName[f.name] = &fs.flattened[i] + fs.byFoldedName[foldedName] = append(fs.byFoldedName[foldedName], &fs.flattened[i]) + } + for foldedName, fields := range fs.byFoldedName { + if len(fields) > 1 { + // The precedence order for conflicting nocase names + // is by breadth-first order, rather than depth-first order. + slices.SortFunc(fields, func(x, y *structField) int { + return cmp.Compare(x.id, y.id) + }) + fs.byFoldedName[foldedName] = fields + } + } + if n := len(inlinedFallbacks); n == 1 || (n > 1 && len(inlinedFallbacks[0].index) != len(inlinedFallbacks[1].index)) { + fs.inlinedFallback = &inlinedFallbacks[0] // dominant inlined fallback field + } + + return fs, nil +} + +// matchFoldedName matches a case-insensitive name depending on the options. +// It assumes that foldName(f.name) == foldName(name). +// +// Case-insensitive matching is used if the `nocase` tag option is specified +// or the MatchCaseInsensitiveNames call option is specified +// (and the `strictcase` tag option is not specified). +// Functionally, the `nocase` and `strictcase` tag options take precedence. +// +// The v1 definition of case-insensitivity operated under strings.EqualFold +// and would strictly compare dashes and underscores, +// while the v2 definition would ignore the presence of dashes and underscores. +// Thus, if the MatchCaseSensitiveDelimiter call option is specified, +// the match is further restricted to using strings.EqualFold. +func (f *structField) matchFoldedName(name []byte, flags *jsonflags.Flags) bool { + if f.casing == nocase || (flags.Get(jsonflags.MatchCaseInsensitiveNames) && f.casing != strictcase) { + if !flags.Get(jsonflags.MatchCaseSensitiveDelimiter) || strings.EqualFold(string(name), f.name) { + return true + } + } + return false +} + +const ( + nocase = 1 + strictcase = 2 +) + +type fieldOptions struct { + name string + quotedName string // quoted name per RFC 8785, section 3.2.2.2. + hasName bool + casing int8 // either 0, nocase, or strictcase + inline bool + unknown bool + omitzero bool + omitempty bool + string bool + format string +} + +// parseFieldOptions parses the `json` tag in a Go struct field as +// a structured set of options configuring parameters such as +// the JSON member name and other features. +func parseFieldOptions(sf reflect.StructField) (out fieldOptions, ignored bool, err error) { + tag, hasTag := sf.Tag.Lookup("json") + + // Check whether this field is explicitly ignored. + if tag == "-" { + return fieldOptions{}, true, nil + } + + // Check whether this field is unexported. + if !sf.IsExported() { + // In contrast to v1, v2 no longer forwards exported fields from + // embedded fields of unexported types since Go reflection does not + // allow the same set of operations that are available in normal cases + // of purely exported fields. + // See https://go.dev/issue/21357 and https://go.dev/issue/24153. + if sf.Anonymous { + err = firstError(err, fmt.Errorf("embedded Go struct field %s of an unexported type must be explicitly ignored with a `json:\"-\"` tag", sf.Type.Name())) + } + // Tag options specified on an unexported field suggests user error. + if hasTag { + err = firstError(err, fmt.Errorf("unexported Go struct field %s cannot have non-ignored `json:%q` tag", sf.Name, tag)) + } + return fieldOptions{}, true, err + } + + // Determine the JSON member name for this Go field. A user-specified name + // may be provided as either an identifier or a single-quoted string. + // The single-quoted string allows arbitrary characters in the name. + // See https://go.dev/issue/2718 and https://go.dev/issue/3546. + out.name = sf.Name // always starts with an uppercase character + if len(tag) > 0 && !strings.HasPrefix(tag, ",") { + // For better compatibility with v1, accept almost any unescaped name. + n := len(tag) - len(strings.TrimLeftFunc(tag, func(r rune) bool { + return !strings.ContainsRune(",\\'\"`", r) // reserve comma, backslash, and quotes + })) + opt := tag[:n] + if n == 0 { + // Allow a single quoted string for arbitrary names. + var err2 error + opt, n, err2 = consumeTagOption(tag) + if err2 != nil { + err = firstError(err, fmt.Errorf("Go struct field %s has malformed `json` tag: %v", sf.Name, err2)) + } + } + out.hasName = true + out.name = opt + tag = tag[n:] + } + b, _ := jsonwire.AppendQuote(nil, out.name, &jsonflags.Flags{}) + out.quotedName = string(b) + + // Handle any additional tag options (if any). + var wasFormat bool + seenOpts := make(map[string]bool) + for len(tag) > 0 { + // Consume comma delimiter. + if tag[0] != ',' { + err = firstError(err, fmt.Errorf("Go struct field %s has malformed `json` tag: invalid character %q before next option (expecting ',')", sf.Name, tag[0])) + } else { + tag = tag[len(","):] + if len(tag) == 0 { + err = firstError(err, fmt.Errorf("Go struct field %s has malformed `json` tag: invalid trailing ',' character", sf.Name)) + break + } + } + + // Consume and process the tag option. + opt, n, err2 := consumeTagOption(tag) + if err2 != nil { + err = firstError(err, fmt.Errorf("Go struct field %s has malformed `json` tag: %v", sf.Name, err2)) + } + rawOpt := tag[:n] + tag = tag[n:] + switch { + case wasFormat: + err = firstError(err, fmt.Errorf("Go struct field %s has `format` tag option that was not specified last", sf.Name)) + case strings.HasPrefix(rawOpt, "'") && strings.TrimFunc(opt, isLetterOrDigit) == "": + err = firstError(err, fmt.Errorf("Go struct field %s has unnecessarily quoted appearance of `%s` tag option; specify `%s` instead", sf.Name, rawOpt, opt)) + } + switch opt { + case "nocase": + out.casing |= nocase + case "strictcase": + out.casing |= strictcase + case "inline": + out.inline = true + case "unknown": + out.unknown = true + case "omitzero": + out.omitzero = true + case "omitempty": + out.omitempty = true + case "string": + out.string = true + case "format": + if !strings.HasPrefix(tag, ":") { + err = firstError(err, fmt.Errorf("Go struct field %s is missing value for `format` tag option", sf.Name)) + break + } + tag = tag[len(":"):] + opt, n, err2 := consumeTagOption(tag) + if err2 != nil { + err = firstError(err, fmt.Errorf("Go struct field %s has malformed value for `format` tag option: %v", sf.Name, err2)) + break + } + tag = tag[n:] + out.format = opt + wasFormat = true + default: + // Reject keys that resemble one of the supported options. + // This catches invalid mutants such as "omitEmpty" or "omit_empty". + normOpt := strings.ReplaceAll(strings.ToLower(opt), "_", "") + switch normOpt { + case "nocase", "strictcase", "inline", "unknown", "omitzero", "omitempty", "string", "format": + err = firstError(err, fmt.Errorf("Go struct field %s has invalid appearance of `%s` tag option; specify `%s` instead", sf.Name, opt, normOpt)) + } + + // NOTE: Everything else is ignored. This does not mean it is + // forward compatible to insert arbitrary tag options since + // a future version of this package may understand that tag. + } + + // Reject duplicates. + switch { + case out.casing == nocase|strictcase: + err = firstError(err, fmt.Errorf("Go struct field %s cannot have both `nocase` and `structcase` tag options", sf.Name)) + case seenOpts[opt]: + err = firstError(err, fmt.Errorf("Go struct field %s has duplicate appearance of `%s` tag option", sf.Name, rawOpt)) + } + seenOpts[opt] = true + } + return out, false, err +} + +func consumeTagOption(in string) (string, int, error) { + // For legacy compatibility with v1, assume options are comma-separated. + i := strings.IndexByte(in, ',') + if i < 0 { + i = len(in) + } + + switch r, _ := utf8.DecodeRuneInString(in); { + // Option as a Go identifier. + case r == '_' || unicode.IsLetter(r): + n := len(in) - len(strings.TrimLeftFunc(in, isLetterOrDigit)) + return in[:n], n, nil + // Option as a single-quoted string. + case r == '\'': + // The grammar is nearly identical to a double-quoted Go string literal, + // but uses single quotes as the terminators. The reason for a custom + // grammar is because both backtick and double quotes cannot be used + // verbatim in a struct tag. + // + // Convert a single-quoted string to a double-quote string and rely on + // strconv.Unquote to handle the rest. + var inEscape bool + b := []byte{'"'} + n := len(`'`) + for len(in) > n { + r, rn := utf8.DecodeRuneInString(in[n:]) + switch { + case inEscape: + if r == '\'' { + b = b[:len(b)-1] // remove escape character: `\'` => `'` + } + inEscape = false + case r == '\\': + inEscape = true + case r == '"': + b = append(b, '\\') // insert escape character: `"` => `\"` + case r == '\'': + b = append(b, '"') + n += len(`'`) + out, err := strconv.Unquote(string(b)) + if err != nil { + return in[:i], i, fmt.Errorf("invalid single-quoted string: %s", in[:n]) + } + return out, n, nil + } + b = append(b, in[n:][:rn]...) + n += rn + } + if n > 10 { + n = 10 // limit the amount of context printed in the error + } + return in[:i], i, fmt.Errorf("single-quoted string not terminated: %s...", in[:n]) + case len(in) == 0: + return in[:i], i, io.ErrUnexpectedEOF + default: + return in[:i], i, fmt.Errorf("invalid character %q at start of option (expecting Unicode letter or single quote)", r) + } +} + +func isLetterOrDigit(r rune) bool { + return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r) +} + +// foldName returns a folded string such that foldName(x) == foldName(y) +// is similar to strings.EqualFold(x, y), but ignores underscore and dashes. +// This allows foldName to match common naming conventions. +func foldName(in []byte) []byte { + // This is inlinable to take advantage of "function outlining". + // See https://blog.filippo.io/efficient-go-apis-with-the-inliner/ + var arr [32]byte // large enough for most JSON names + return appendFoldedName(arr[:0], in) +} + +func appendFoldedName(out, in []byte) []byte { + for i := 0; i < len(in); { + // Handle single-byte ASCII. + if c := in[i]; c < utf8.RuneSelf { + if c != '_' && c != '-' { + if 'a' <= c && c <= 'z' { + c -= 'a' - 'A' + } + out = append(out, c) + } + i++ + continue + } + // Handle multi-byte Unicode. + r, n := utf8.DecodeRune(in[i:]) + out = utf8.AppendRune(out, foldRune(r)) + i += n + } + return out +} + +// foldRune is a variation on unicode.SimpleFold that returns the same rune +// for all runes in the same fold set. +// +// Invariant: +// +// foldRune(x) == foldRune(y) ⇔ strings.EqualFold(string(x), string(y)) +func foldRune(r rune) rune { + for { + r2 := unicode.SimpleFold(r) + if r2 <= r { + return r2 // smallest character in the fold set + } + r = r2 + } +} + +// stringCache is a cache for strings converted from a []byte. +type stringCache = [256]string // 256*unsafe.Sizeof(string("")) => 4KiB + +// makeString returns the string form of b. +// It returns a pre-allocated string from c if present, otherwise +// it allocates a new string, inserts it into the cache, and returns it. +func makeString(c *stringCache, b []byte) string { + const ( + minCachedLen = 2 // single byte strings are already interned by the runtime + maxCachedLen = 256 // large enough for UUIDs, IPv6 addresses, SHA-256 checksums, etc. + ) + if c == nil || len(b) < minCachedLen || len(b) > maxCachedLen { + return string(b) + } + + // Compute a hash from the fixed-width prefix and suffix of the string. + // This ensures hashing a string is a constant time operation. + var h uint32 + switch { + case len(b) >= 8: + lo := binary.LittleEndian.Uint64(b[:8]) + hi := binary.LittleEndian.Uint64(b[len(b)-8:]) + h = hash64(uint32(lo), uint32(lo>>32)) ^ hash64(uint32(hi), uint32(hi>>32)) + case len(b) >= 4: + lo := binary.LittleEndian.Uint32(b[:4]) + hi := binary.LittleEndian.Uint32(b[len(b)-4:]) + h = hash64(lo, hi) + case len(b) >= 2: + lo := binary.LittleEndian.Uint16(b[:2]) + hi := binary.LittleEndian.Uint16(b[len(b)-2:]) + h = hash64(uint32(lo), uint32(hi)) + } + + // Check the cache for the string. + i := h % uint32(len(*c)) + if s := (*c)[i]; s == string(b) { + return s + } + s := string(b) + (*c)[i] = s + return s +} + +// hash64 returns the hash of two uint32s as a single uint32. +func hash64(lo, hi uint32) uint32 { + // If avalanche=true, this is identical to XXH32 hash on a 8B string: + // var b [8]byte + // binary.LittleEndian.PutUint32(b[:4], lo) + // binary.LittleEndian.PutUint32(b[4:], hi) + // return xxhash.Sum32(b[:]) + const ( + prime1 = 0x9e3779b1 + prime2 = 0x85ebca77 + prime3 = 0xc2b2ae3d + prime4 = 0x27d4eb2f + prime5 = 0x165667b1 + ) + h := prime5 + uint32(8) + h += lo * prime3 + h = bits.RotateLeft32(h, 17) * prime4 + h += hi * prime3 + h = bits.RotateLeft32(h, 17) * prime4 + // Skip final mix (avalanche) step of XXH32 for performance reasons. + // Empirical testing shows that the improvements in unbiased distribution + // does not outweigh the extra cost in computational complexity. + const avalanche = false + if avalanche { + h ^= h >> 15 + h *= prime2 + h ^= h >> 13 + h *= prime3 + h ^= h >> 16 + } + return h +} + +// Options configure [Marshal], [MarshalWrite], [MarshalEncode], +// [Unmarshal], [UnmarshalRead], and [UnmarshalDecode] with specific features. +// Each function takes in a variadic list of options, where properties +// set in later options override the value of previously set properties. +// +// The Options type is identical to [encoding/json.Options] and +// [encoding/json/jsontext.Options]. Options from the other packages can +// be used interchangeably with functionality in this package. +// +// Options represent either a singular option or a set of options. +// It can be functionally thought of as a Go map of option properties +// (even though the underlying implementation avoids Go maps for performance). +// +// The constructors (e.g., [Deterministic]) return a singular option value: +// +// opt := Deterministic(true) +// +// which is analogous to creating a single entry map: +// +// opt := Options{"Deterministic": true} +// +// [JoinOptions] composes multiple options values to together: +// +// out := JoinOptions(opts...) +// +// which is analogous to making a new map and copying the options over: +// +// out := make(Options) +// for _, m := range opts { +// for k, v := range m { +// out[k] = v +// } +// } +// +// [GetOption] looks up the value of options parameter: +// +// v, ok := GetOption(opts, Deterministic) +// +// which is analogous to a Go map lookup: +// +// v, ok := Options["Deterministic"] +// +// There is a single Options type, which is used with both marshal and unmarshal. +// Some options affect both operations, while others only affect one operation: +// +// - [StringifyNumbers] affects marshaling and unmarshaling +// - [Deterministic] affects marshaling only +// - [FormatNilSliceAsNull] affects marshaling only +// - [FormatNilMapAsNull] affects marshaling only +// - [MatchCaseInsensitiveNames] affects marshaling and unmarshaling +// - [DiscardUnknownMembers] affects marshaling only +// - [RejectUnknownMembers] affects unmarshaling only +// - [WithMarshalers] affects marshaling only +// - [WithUnmarshalers] affects unmarshaling only +// +// Options that do not affect a particular operation are ignored. +type Options = jsonopts.Options + +// JoinOptions coalesces the provided list of options into a single Options. +// Properties set in later options override the value of previously set properties. +func JoinOptions(srcs ...Options) Options { + var dst jsonopts.Struct + for _, src := range srcs { + dst.Join(src) + } + return &dst +} + +// GetOption returns the value stored in opts with the provided setter, +// reporting whether the value is present. +// +// Example usage: +// +// v, ok := json.GetOption(opts, json.Deterministic) +// +// Options are most commonly introspected to alter the JSON representation of +// [MarshalerV2.MarshalJSONV2] and [MarshalerV2.MarshalJSONV2] methods, and +// [MarshalFuncV2] and [UnmarshalFuncV2] functions. +// In such cases, the presence bit should generally be ignored. +func GetOption[T any](opts Options, setter func(T) Options) (T, bool) { + return jsonopts.GetOption(opts, setter) +} + +// DefaultOptionsV2 is the full set of all options that define v2 semantics. +// It is equivalent to all options under [Options], [encoding/json.Options], +// and [encoding/json/jsontext.Options] being set to false or the zero value, +// except for the options related to whitespace formatting. +func DefaultOptionsV2() Options { + return &jsonopts.DefaultOptionsV2 +} + +// StringifyNumbers specifies that numeric Go types should be marshaled +// as a JSON string containing the equivalent JSON number value. +// When unmarshaling, numeric Go types can be parsed from either +// a JSON number or a JSON string containing the JSON number +// without any surrounding whitespace. +// +// According to RFC 8259, section 6, a JSON implementation may choose to +// limit the representation of a JSON number to an IEEE 754 binary64 value. +// This may cause decoders to lose precision for int64 and uint64 types. +// Quoting JSON numbers as a JSON string preserves the exact precision. +// +// This affects either marshaling or unmarshaling. +func StringifyNumbers(v bool) Options { + if v { + return jsonflags.StringifyNumbers | 1 + } else { + return jsonflags.StringifyNumbers | 0 + } +} + +// Deterministic specifies that the same input value will be serialized +// as the exact same output bytes. Different processes of +// the same program will serialize equal values to the same bytes, +// but different versions of the same program are not guaranteed +// to produce the exact same sequence of bytes. +// +// This only affects marshaling and is ignored when unmarshaling. +func Deterministic(v bool) Options { + if v { + return jsonflags.Deterministic | 1 + } else { + return jsonflags.Deterministic | 0 + } +} + +// FormatNilSliceAsNull specifies that a nil Go slice should marshal as a +// JSON null instead of the default representation as an empty JSON array +// (or an empty JSON string in the case of ~[]byte). +// Slice fields explicitly marked with `format:emitempty` still marshal +// as an empty JSON array. +// +// This only affects marshaling and is ignored when unmarshaling. +func FormatNilSliceAsNull(v bool) Options { + if v { + return jsonflags.FormatNilSliceAsNull | 1 + } else { + return jsonflags.FormatNilSliceAsNull | 0 + } +} + +// FormatNilMapAsNull specifies that a nil Go map should marshal as a +// JSON null instead of the default representation as an empty JSON object. +// Map fields explicitly marked with `format:emitempty` still marshal +// as an empty JSON object. +// +// This only affects marshaling and is ignored when unmarshaling. +func FormatNilMapAsNull(v bool) Options { + if v { + return jsonflags.FormatNilMapAsNull | 1 + } else { + return jsonflags.FormatNilMapAsNull | 0 + } +} + +// MatchCaseInsensitiveNames specifies that JSON object members are matched +// against Go struct fields using a case-insensitive match of the name. +// Go struct fields explicitly marked with `strictcase` or `nocase` +// always use case-sensitive (or case-insensitive) name matching, +// regardless of the value of this option. +// +// This affects either marshaling or unmarshaling. +// For marshaling, this option may alter the detection of duplicate names +// (assuming [jsontext.AllowDuplicateNames] is false) from inlined fields +// if it matches one of the declared fields in the Go struct. +func MatchCaseInsensitiveNames(v bool) Options { + if v { + return jsonflags.MatchCaseInsensitiveNames | 1 + } else { + return jsonflags.MatchCaseInsensitiveNames | 0 + } +} + +// DiscardUnknownMembers specifies that marshaling should ignore any +// JSON object members stored in Go struct fields dedicated to storing +// unknown JSON object members. +// +// This only affects marshaling and is ignored when unmarshaling. +func DiscardUnknownMembers(v bool) Options { + if v { + return jsonflags.DiscardUnknownMembers | 1 + } else { + return jsonflags.DiscardUnknownMembers | 0 + } +} + +// RejectUnknownMembers specifies that unknown members should be rejected +// when unmarshaling a JSON object, regardless of whether there is a field +// to store unknown members. +// +// This only affects unmarshaling and is ignored when marshaling. +func RejectUnknownMembers(v bool) Options { + if v { + return jsonflags.RejectUnknownMembers | 1 + } else { + return jsonflags.RejectUnknownMembers | 0 + } +} + +// WithMarshalers specifies a list of type-specific marshalers to use, +// which can be used to override the default marshal behavior for values +// of particular types. +// +// This only affects marshaling and is ignored when unmarshaling. +func WithMarshalers(v *Marshalers) Options { + return (*marshalersOption)(v) +} + +// WithUnmarshalers specifies a list of type-specific unmarshalers to use, +// which can be used to override the default unmarshal behavior for values +// of particular types. +// +// This only affects unmarshaling and is ignored when marshaling. +func WithUnmarshalers(v *Unmarshalers) Options { + return (*unmarshalersOption)(v) +} + +// These option types are declared here instead of "jsonopts" +// to avoid a dependency on "reflect" from "jsonopts". +type ( + marshalersOption Marshalers + unmarshalersOption Unmarshalers +) + +func (*marshalersOption) JSONOptions(internal.NotForPublicUse) {} + +func (*unmarshalersOption) JSONOptions(internal.NotForPublicUse) {} + +// Inject support into "jsonopts" to handle these types. +func init() { + jsonopts.GetUnknownOption = func(src *jsonopts.Struct, zero jsonopts.Options) (any, bool) { + switch zero.(type) { + case *marshalersOption: + if !src.Flags.Has(jsonflags.Marshalers) { + return (*Marshalers)(nil), false + } + return src.Marshalers.(*Marshalers), true + case *unmarshalersOption: + if !src.Flags.Has(jsonflags.Unmarshalers) { + return (*Unmarshalers)(nil), false + } + return src.Unmarshalers.(*Unmarshalers), true + default: + panic(fmt.Sprintf("unknown option %T", zero)) + } + } + jsonopts.JoinUnknownOption = func(dst *jsonopts.Struct, src jsonopts.Options) { + switch src := src.(type) { + case *marshalersOption: + dst.Flags.Set(jsonflags.Marshalers | 1) + dst.Marshalers = (*Marshalers)(src) + case *unmarshalersOption: + dst.Flags.Set(jsonflags.Unmarshalers | 1) + dst.Unmarshalers = (*Unmarshalers)(src) + default: + panic(fmt.Sprintf("unknown option %T", src)) + } + } +} diff --git a/internal/json/bundle.sh b/internal/json/bundle.sh new file mode 100644 index 0000000000..e09c4d259d --- /dev/null +++ b/internal/json/bundle.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# Bundle the go-json-experiment package(s) +set -e +find . -name bundle.go -delete + +upstream='github.com/go-json-experiment' +root="$(go list -m)/internal" +pkglist='json/internal/jsonflags json/internal/jsonopts json/internal/jsonwire json/internal json/jsontext json' +for pkg in $pkglist; do + maparg="${maparg}${maparg+ }-import=${upstream}/${pkg}=${root}/${pkg}" +done + +for pkg in $pkglist; do + dir=$(echo "$pkg" | sed 's,^json,.,') + mkdir -p "$dir" + echo "package $(basename "$pkg")" > "${dir}/temp.go" + eval "go run golang.org/x/tools/cmd/bundle -prefix '' $maparg -dst ${root}/${pkg} ${upstream}/${pkg} > _bundle.go" + mv _bundle.go "${dir}/bundle.go" +done + +find . -name temp.go -delete diff --git a/internal/json/generate.go b/internal/json/generate.go new file mode 100644 index 0000000000..4fa391da69 --- /dev/null +++ b/internal/json/generate.go @@ -0,0 +1,3 @@ +package json + +//go:generate sh bundle.sh diff --git a/internal/json/internal/bundle.go b/internal/json/internal/bundle.go new file mode 100644 index 0000000000..05e0782ac8 --- /dev/null +++ b/internal/json/internal/bundle.go @@ -0,0 +1,15 @@ +// Code generated by golang.org/x/tools/cmd/bundle. DO NOT EDIT. +// $ bundle -prefix -import=github.com/go-json-experiment/json/internal/jsonflags=github.com/quay/clair/v4/internal/json/internal/jsonflags -import=github.com/go-json-experiment/json/internal/jsonopts=github.com/quay/clair/v4/internal/json/internal/jsonopts -import=github.com/go-json-experiment/json/internal/jsonwire=github.com/quay/clair/v4/internal/json/internal/jsonwire -import=github.com/go-json-experiment/json/internal=github.com/quay/clair/v4/internal/json/internal -import=github.com/go-json-experiment/json/jsontext=github.com/quay/clair/v4/internal/json/jsontext -import=github.com/go-json-experiment/json=github.com/quay/clair/v4/internal/json -dst github.com/quay/clair/v4/internal/json/internal github.com/go-json-experiment/json/internal + +package internal + +import () + +// NotForPublicUse is a marker type that an API is for internal use only. +// It does not perfectly prevent usage of that API, but helps to restrict usage. +// Anything with this marker is not covered by the Go compatibility agreement. +type NotForPublicUse struct{} + +// AllowInternalUse is passed from "json" to "jsontext" to authenticate +// that the caller can have access to internal functionality. +var AllowInternalUse NotForPublicUse diff --git a/internal/json/internal/jsonflags/bundle.go b/internal/json/internal/jsonflags/bundle.go new file mode 100644 index 0000000000..e2f858707e --- /dev/null +++ b/internal/json/internal/jsonflags/bundle.go @@ -0,0 +1,197 @@ +// Code generated by golang.org/x/tools/cmd/bundle. DO NOT EDIT. +// $ bundle -prefix -import=github.com/go-json-experiment/json/internal/jsonflags=github.com/quay/clair/v4/internal/json/internal/jsonflags -import=github.com/go-json-experiment/json/internal/jsonopts=github.com/quay/clair/v4/internal/json/internal/jsonopts -import=github.com/go-json-experiment/json/internal/jsonwire=github.com/quay/clair/v4/internal/json/internal/jsonwire -import=github.com/go-json-experiment/json/internal=github.com/quay/clair/v4/internal/json/internal -import=github.com/go-json-experiment/json/jsontext=github.com/quay/clair/v4/internal/json/jsontext -import=github.com/go-json-experiment/json=github.com/quay/clair/v4/internal/json -dst github.com/quay/clair/v4/internal/json/internal/jsonflags github.com/go-json-experiment/json/internal/jsonflags + +// jsonflags implements all the optional boolean flags. +// These flags are shared across both "json", "jsontext", and "jsonopts". +// + +package jsonflags + +import ( + "github.com/quay/clair/v4/internal/json/internal" +) + +// Bools represents zero or more boolean flags, all set to true or false. +// The least-significant bit is the boolean value of all flags in the set. +// The remaining bits identify which particular flags. +// +// In common usage, this is OR'd with 0 or 1. For example: +// - (AllowInvalidUTF8 | 0) means "AllowInvalidUTF8 is false" +// - (Multiline | Indent | 1) means "Multiline and Indent are true" +type Bools uint64 + +func (Bools) JSONOptions(internal.NotForPublicUse) {} + +const ( + // AllFlags is the set of all flags. + AllFlags = AllCoderFlags | AllArshalV2Flags | AllArshalV1Flags + + // AllCoderFlags is the set of all encoder/decoder flags. + AllCoderFlags = (maxCoderFlag - 1) - initFlag + + // AllArshalV2Flags is the set of all v2 marshal/unmarshal flags. + AllArshalV2Flags = (maxArshalV2Flag - 1) - (maxCoderFlag - 1) + + // AllArshalV1Flags is the set of all v1 marshal/unmarshal flags. + AllArshalV1Flags = (maxArshalV1Flag - 1) - (maxArshalV2Flag - 1) + + // NonBooleanFlags is the set of non-boolean flags, + // where the value is some other concrete Go type. + // The value of the flag is stored within jsonopts.Struct. + NonBooleanFlags = 0 | + Indent | + IndentPrefix | + ByteLimit | + DepthLimit | + Marshalers | + Unmarshalers + + // DefaultV1Flags is the set of booleans flags that default to true under + // v1 semantics. None of the non-boolean flags differ between v1 and v2. + DefaultV1Flags = 0 | + AllowDuplicateNames | + AllowInvalidUTF8 | + EscapeForHTML | + EscapeForJS | + Deterministic | + FormatNilMapAsNull | + FormatNilSliceAsNull | + MatchCaseInsensitiveNames | + FormatByteArrayAsArray | + FormatTimeDurationAsNanosecond | + IgnoreStructErrors | + MatchCaseSensitiveDelimiter | + MergeWithLegacySemantics | + OmitEmptyWithLegacyDefinition | + RejectFloatOverflow | + ReportLegacyErrorValues | + SkipUnaddressableMethods | + StringifyWithLegacySemantics | + UnmarshalArrayFromAnyLength + + // AnyWhitespace reports whether the encoded output might have any whitespace. + AnyWhitespace = Multiline | SpaceAfterColon | SpaceAfterComma + + // WhitespaceFlags is the set of flags related to whitespace formatting. + // In contrast to AnyWhitespace, this includes Indent and IndentPrefix + // as those settings take no effect if Multiline is false. + WhitespaceFlags = AnyWhitespace | Indent | IndentPrefix +) + +// Encoder and decoder flags. +const ( + initFlag Bools = 1 << iota // reserved for the boolean value itself + + AllowDuplicateNames // encode or decode + AllowInvalidUTF8 // encode or decode + WithinArshalCall // encode or decode; for internal use by json.Marshal and json.Unmarshal + OmitTopLevelNewline // encode only; for internal use by json.Marshal and json.MarshalWrite + PreserveRawStrings // encode only; for internal use by jsontext.Value.Canonicalize + CanonicalizeNumbers // encode only; for internal use by jsontext.Value.Canonicalize + EscapeForHTML // encode only + EscapeForJS // encode only + Multiline // encode only + SpaceAfterColon // encode only + SpaceAfterComma // encode only + Indent // encode only; non-boolean flag + IndentPrefix // encode only; non-boolean flag + ByteLimit // encode or decode; non-boolean flag + DepthLimit // encode or decode; non-boolean flag + + maxCoderFlag +) + +// Marshal and Unmarshal flags (for v2). +const ( + _ Bools = (maxCoderFlag >> 1) << iota + + StringifyNumbers // marshal or unmarshal + Deterministic // marshal only + FormatNilMapAsNull // marshal only + FormatNilSliceAsNull // marshal only + MatchCaseInsensitiveNames // marshal or unmarshal + DiscardUnknownMembers // marshal only + RejectUnknownMembers // unmarshal only + Marshalers // marshal only; non-boolean flag + Unmarshalers // unmarshal only; non-boolean flag + + maxArshalV2Flag +) + +// Marshal and Unmarshal flags (for v1). +const ( + _ Bools = (maxArshalV2Flag >> 1) << iota + + FormatByteArrayAsArray // marshal or unmarshal + FormatTimeDurationAsNanosecond // marshal or unmarshal + IgnoreStructErrors // marshal or unmarshal + MatchCaseSensitiveDelimiter // marshal or unmarshal + MergeWithLegacySemantics // unmarshal + OmitEmptyWithLegacyDefinition // marshal + RejectFloatOverflow // unmarshal + ReportLegacyErrorValues // marshal or unmarshal + SkipUnaddressableMethods // marshal or unmarshal + StringifyWithLegacySemantics // marshal or unmarshal + UnmarshalAnyWithRawNumber // unmarshal; for internal use by jsonv1.Decoder.UseNumber + UnmarshalArrayFromAnyLength // unmarshal + + maxArshalV1Flag +) + +// Flags is a set of boolean flags. +// If the presence bit is zero, then the value bit must also be zero. +// The least-significant bit of both fields is always zero. +// +// Unlike Bools, which can represent a set of bools that are all true or false, +// Flags represents a set of bools, each individually may be true or false. +type Flags struct{ Presence, Values uint64 } + +// Join joins two sets of flags such that the latter takes precedence. +func (dst *Flags) Join(src Flags) { + // Copy over all source presence bits over to the destination (using OR), + // then invert the source presence bits to clear out source value (using AND-NOT), + // then copy over source value bits over to the destination (using OR). + // e.g., dst := Flags{Presence: 0b_1100_0011, Value: 0b_1000_0011} + // e.g., src := Flags{Presence: 0b_0101_1010, Value: 0b_1001_0010} + dst.Presence |= src.Presence // e.g., 0b_1100_0011 | 0b_0101_1010 -> 0b_110_11011 + dst.Values &= ^src.Presence // e.g., 0b_1000_0011 & 0b_1010_0101 -> 0b_100_00001 + dst.Values |= src.Values // e.g., 0b_1000_0001 | 0b_1001_0010 -> 0b_100_10011 +} + +// Set sets both the presence and value for the provided bool (or set of bools). +func (fs *Flags) Set(f Bools) { + // Select out the bits for the flag identifiers (everything except LSB), + // then set the presence for all the identifier bits (using OR), + // then invert the identifier bits to clear out the values (using AND-NOT), + // then copy over all the identifier bits to the value if LSB is 1. + // e.g., fs := Flags{Presence: 0b_0101_0010, Value: 0b_0001_0010} + // e.g., f := 0b_1001_0001 + id := uint64(f) &^ uint64(1) // e.g., 0b_1001_0001 & 0b_1111_1110 -> 0b_1001_0000 + fs.Presence |= id // e.g., 0b_0101_0010 | 0b_1001_0000 -> 0b_1101_0011 + fs.Values &= ^id // e.g., 0b_0001_0010 & 0b_0110_1111 -> 0b_0000_0010 + fs.Values |= uint64(f&1) * id // e.g., 0b_0000_0010 | 0b_1001_0000 -> 0b_1001_0010 +} + +// Get reports whether the bool (or any of the bools) is true. +// This is generally only used with a singular bool. +// The value bit of f (i.e., the LSB) is ignored. +func (fs Flags) Get(f Bools) bool { + return fs.Values&uint64(f) > 0 +} + +// Has reports whether the bool (or any of the bools) is set. +// The value bit of f (i.e., the LSB) is ignored. +func (fs Flags) Has(f Bools) bool { + return fs.Presence&uint64(f) > 0 +} + +// Clear clears both the presence and value for the provided bool or bools. +// The value bit of f (i.e., the LSB) is ignored. +func (fs *Flags) Clear(f Bools) { + // Invert f to produce a mask to clear all bits in f (using AND). + // e.g., fs := Flags{Presence: 0b_0101_0010, Value: 0b_0001_0010} + // e.g., f := 0b_0001_1000 + mask := uint64(^f) // e.g., 0b_0001_1000 -> 0b_1110_0111 + fs.Presence &= mask // e.g., 0b_0101_0010 & 0b_1110_0111 -> 0b_0100_0010 + fs.Values &= mask // e.g., 0b_0001_0010 & 0b_1110_0111 -> 0b_0000_0010 +} diff --git a/internal/json/internal/jsonopts/bundle.go b/internal/json/internal/jsonopts/bundle.go new file mode 100644 index 0000000000..b04b415590 --- /dev/null +++ b/internal/json/internal/jsonopts/bundle.go @@ -0,0 +1,190 @@ +// Code generated by golang.org/x/tools/cmd/bundle. DO NOT EDIT. +// $ bundle -prefix -import=github.com/go-json-experiment/json/internal/jsonflags=github.com/quay/clair/v4/internal/json/internal/jsonflags -import=github.com/go-json-experiment/json/internal/jsonopts=github.com/quay/clair/v4/internal/json/internal/jsonopts -import=github.com/go-json-experiment/json/internal/jsonwire=github.com/quay/clair/v4/internal/json/internal/jsonwire -import=github.com/go-json-experiment/json/internal=github.com/quay/clair/v4/internal/json/internal -import=github.com/go-json-experiment/json/jsontext=github.com/quay/clair/v4/internal/json/jsontext -import=github.com/go-json-experiment/json=github.com/quay/clair/v4/internal/json -dst github.com/quay/clair/v4/internal/json/internal/jsonopts github.com/go-json-experiment/json/internal/jsonopts + +package jsonopts + +import ( + "github.com/quay/clair/v4/internal/json/internal" + "github.com/quay/clair/v4/internal/json/internal/jsonflags" +) + +// Options is the common options type shared across json packages. +type Options interface { + // JSONOptions is exported so related json packages can implement Options. + JSONOptions(internal.NotForPublicUse) +} + +// Struct is the combination of all options in struct form. +// This is efficient to pass down the call stack and to query. +type Struct struct { + Flags jsonflags.Flags + + CoderValues + ArshalValues +} + +type CoderValues struct { + Indent string // jsonflags.Indent + IndentPrefix string // jsonflags.IndentPrefix + ByteLimit int64 // jsonflags.ByteLimit + DepthLimit int // jsonflags.DepthLimit +} + +type ArshalValues struct { + // The Marshalers and Unmarshalers fields use the any type to avoid a + // concrete dependency on *json.Marshalers and *json.Unmarshalers, + // which would in turn create a dependency on the "reflect" package. + + Marshalers any // jsonflags.Marshalers + Unmarshalers any // jsonflags.Unmarshalers + + Format string + FormatDepth int +} + +// DefaultOptionsV2 is the set of all options that define default v2 behavior. +var DefaultOptionsV2 = Struct{ + Flags: jsonflags.Flags{ + Presence: uint64(jsonflags.AllFlags & ^jsonflags.WhitespaceFlags), + Values: uint64(0), + }, +} + +// DefaultOptionsV1 is the set of all options that define default v1 behavior. +var DefaultOptionsV1 = Struct{ + Flags: jsonflags.Flags{ + Presence: uint64(jsonflags.AllFlags & ^jsonflags.WhitespaceFlags), + Values: uint64(jsonflags.DefaultV1Flags), + }, +} + +// CopyCoderOptions copies coder-specific options from src to dst. +// This is used by json.MarshalEncode and json.UnmarshalDecode since those +// functions ignore any coder-specific options and uses the options from the +// Encoder or Decoder that is passed in. +func (dst *Struct) CopyCoderOptions(src *Struct) { + srcFlags := src.Flags + srcFlags.Clear(^jsonflags.AllCoderFlags) + dst.Flags.Join(srcFlags) + dst.CoderValues = src.CoderValues +} + +func (*Struct) JSONOptions(internal.NotForPublicUse) {} + +// GetUnknownOption is injected by the "json" package to handle Options +// declared in that package so that "jsonopts" can handle them. +var GetUnknownOption = func(*Struct, Options) (any, bool) { panic("unknown option") } + +func GetOption[T any](opts Options, setter func(T) Options) (T, bool) { + // Collapse the options to *Struct to simplify lookup. + structOpts, ok := opts.(*Struct) + if !ok { + var structOpts2 Struct + structOpts2.Join(opts) + structOpts = &structOpts2 + } + + // Lookup the option based on the return value of the setter. + var zero T + switch opt := setter(zero).(type) { + case jsonflags.Bools: + v := structOpts.Flags.Get(opt) + ok := structOpts.Flags.Has(opt) + return any(v).(T), ok + case Indent: + if !structOpts.Flags.Has(jsonflags.Indent) { + return zero, false + } + return any(structOpts.Indent).(T), true + case IndentPrefix: + if !structOpts.Flags.Has(jsonflags.IndentPrefix) { + return zero, false + } + return any(structOpts.IndentPrefix).(T), true + case ByteLimit: + if !structOpts.Flags.Has(jsonflags.ByteLimit) { + return zero, false + } + return any(structOpts.ByteLimit).(T), true + case DepthLimit: + if !structOpts.Flags.Has(jsonflags.DepthLimit) { + return zero, false + } + return any(structOpts.DepthLimit).(T), true + default: + v, ok := GetUnknownOption(structOpts, opt) + return v.(T), ok + } +} + +// JoinUnknownOption is injected by the "json" package to handle Options +// declared in that package so that "jsonopts" can handle them. +var JoinUnknownOption = func(*Struct, Options) { panic("unknown option") } + +func (dst *Struct) Join(srcs ...Options) { + for _, src := range srcs { + switch src := src.(type) { + case nil: + continue + case jsonflags.Bools: + dst.Flags.Set(src) + case Indent: + dst.Flags.Set(jsonflags.Multiline | jsonflags.Indent | 1) + dst.Indent = string(src) + case IndentPrefix: + dst.Flags.Set(jsonflags.Multiline | jsonflags.IndentPrefix | 1) + dst.IndentPrefix = string(src) + case ByteLimit: + dst.Flags.Set(jsonflags.ByteLimit | 1) + dst.ByteLimit = int64(src) + case DepthLimit: + dst.Flags.Set(jsonflags.DepthLimit | 1) + dst.DepthLimit = int(src) + case *Struct: + dst.Flags.Join(src.Flags) + if src.Flags.Has(jsonflags.NonBooleanFlags) { + if src.Flags.Has(jsonflags.Indent) { + dst.Indent = src.Indent + } + if src.Flags.Has(jsonflags.IndentPrefix) { + dst.IndentPrefix = src.IndentPrefix + } + if src.Flags.Has(jsonflags.ByteLimit) { + dst.ByteLimit = src.ByteLimit + } + if src.Flags.Has(jsonflags.DepthLimit) { + dst.DepthLimit = src.DepthLimit + } + if src.Flags.Has(jsonflags.Marshalers) { + dst.Marshalers = src.Marshalers + } + if src.Flags.Has(jsonflags.Unmarshalers) { + dst.Unmarshalers = src.Unmarshalers + } + } + if src.Format != "" { + dst.Format = src.Format + dst.FormatDepth = src.FormatDepth + } + default: + JoinUnknownOption(dst, src) + } + } +} + +type ( + Indent string // jsontext.WithIndent + IndentPrefix string // jsontext.WithIndentPrefix + ByteLimit int64 // jsontext.WithByteLimit + DepthLimit int // jsontext.WithDepthLimit + // type for jsonflags.Marshalers declared in "json" package + // type for jsonflags.Unmarshalers declared in "json" package +) + +func (Indent) JSONOptions(internal.NotForPublicUse) {} + +func (IndentPrefix) JSONOptions(internal.NotForPublicUse) {} + +func (ByteLimit) JSONOptions(internal.NotForPublicUse) {} + +func (DepthLimit) JSONOptions(internal.NotForPublicUse) {} diff --git a/internal/json/internal/jsonwire/bundle.go b/internal/json/internal/jsonwire/bundle.go new file mode 100644 index 0000000000..c41fc69a11 --- /dev/null +++ b/internal/json/internal/jsonwire/bundle.go @@ -0,0 +1,1022 @@ +// Code generated by golang.org/x/tools/cmd/bundle. DO NOT EDIT. +// $ bundle -prefix -import=github.com/go-json-experiment/json/internal/jsonflags=github.com/quay/clair/v4/internal/json/internal/jsonflags -import=github.com/go-json-experiment/json/internal/jsonopts=github.com/quay/clair/v4/internal/json/internal/jsonopts -import=github.com/go-json-experiment/json/internal/jsonwire=github.com/quay/clair/v4/internal/json/internal/jsonwire -import=github.com/go-json-experiment/json/internal=github.com/quay/clair/v4/internal/json/internal -import=github.com/go-json-experiment/json/jsontext=github.com/quay/clair/v4/internal/json/jsontext -import=github.com/go-json-experiment/json=github.com/quay/clair/v4/internal/json -dst github.com/quay/clair/v4/internal/json/internal/jsonwire github.com/go-json-experiment/json/internal/jsonwire + +// Package jsonwire implements stateless functionality for handling JSON text. +// + +package jsonwire + +import ( + "cmp" + "errors" + "io" + "math" + "slices" + "strconv" + "strings" + "unicode" + "unicode/utf16" + "unicode/utf8" + + "github.com/quay/clair/v4/internal/json/internal/jsonflags" +) + +type ValueFlags uint + +const ( + _ ValueFlags = (1 << iota) / 2 // powers of two starting with zero + + stringNonVerbatim // string cannot be naively treated as valid UTF-8 + stringNonCanonical // string not formatted according to RFC 8785, section 3.2.2.2. + // TODO: Track whether a number is a non-integer? +) + +func (f *ValueFlags) Join(f2 ValueFlags) { *f |= f2 } + +func (f ValueFlags) IsVerbatim() bool { return f&stringNonVerbatim == 0 } + +func (f ValueFlags) IsCanonical() bool { return f&stringNonCanonical == 0 } + +// ConsumeWhitespace consumes leading JSON whitespace per RFC 7159, section 2. +func ConsumeWhitespace(b []byte) (n int) { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + for len(b) > n && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') { + n++ + } + return n +} + +// ConsumeNull consumes the next JSON null literal per RFC 7159, section 3. +// It returns 0 if it is invalid, in which case consumeLiteral should be used. +func ConsumeNull(b []byte) int { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + const literal = "null" + if len(b) >= len(literal) && string(b[:len(literal)]) == literal { + return len(literal) + } + return 0 +} + +// ConsumeFalse consumes the next JSON false literal per RFC 7159, section 3. +// It returns 0 if it is invalid, in which case consumeLiteral should be used. +func ConsumeFalse(b []byte) int { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + const literal = "false" + if len(b) >= len(literal) && string(b[:len(literal)]) == literal { + return len(literal) + } + return 0 +} + +// ConsumeTrue consumes the next JSON true literal per RFC 7159, section 3. +// It returns 0 if it is invalid, in which case consumeLiteral should be used. +func ConsumeTrue(b []byte) int { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + const literal = "true" + if len(b) >= len(literal) && string(b[:len(literal)]) == literal { + return len(literal) + } + return 0 +} + +// ConsumeLiteral consumes the next JSON literal per RFC 7159, section 3. +// If the input appears truncated, it returns io.ErrUnexpectedEOF. +func ConsumeLiteral(b []byte, lit string) (n int, err error) { + for i := 0; i < len(b) && i < len(lit); i++ { + if b[i] != lit[i] { + return i, NewInvalidCharacterError(b[i:], "within literal "+lit+" (expecting "+strconv.QuoteRune(rune(lit[i]))+")") + } + } + if len(b) < len(lit) { + return len(b), io.ErrUnexpectedEOF + } + return len(lit), nil +} + +// ConsumeSimpleString consumes the next JSON string per RFC 7159, section 7 +// but is limited to the grammar for an ASCII string without escape sequences. +// It returns 0 if it is invalid or more complicated than a simple string, +// in which case consumeString should be called. +// +// It rejects '<', '>', and '&' for compatibility reasons since these were +// always escaped in the v1 implementation. Thus, if this function reports +// non-zero then we know that the string would be encoded the same way +// under both v1 or v2 escape semantics. +func ConsumeSimpleString(b []byte) (n int) { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + if len(b) > 0 && b[0] == '"' { + n++ + for len(b) > n && b[n] < utf8.RuneSelf && escapeASCII[b[n]] == 0 { + n++ + } + if uint(len(b)) > uint(n) && b[n] == '"' { + n++ + return n + } + } + return 0 +} + +// ConsumeString consumes the next JSON string per RFC 7159, section 7. +// If validateUTF8 is false, then this allows the presence of invalid UTF-8 +// characters within the string itself. +// It reports the number of bytes consumed and whether an error was encountered. +// If the input appears truncated, it returns io.ErrUnexpectedEOF. +func ConsumeString(flags *ValueFlags, b []byte, validateUTF8 bool) (n int, err error) { + return ConsumeStringResumable(flags, b, 0, validateUTF8) +} + +// ConsumeStringResumable is identical to consumeString but supports resuming +// from a previous call that returned io.ErrUnexpectedEOF. +func ConsumeStringResumable(flags *ValueFlags, b []byte, resumeOffset int, validateUTF8 bool) (n int, err error) { + // Consume the leading double quote. + switch { + case resumeOffset > 0: + n = resumeOffset // already handled the leading quote + case uint(len(b)) == 0: + return n, io.ErrUnexpectedEOF + case b[0] == '"': + n++ + default: + return n, NewInvalidCharacterError(b[n:], `at start of string (expecting '"')`) + } + + // Consume every character in the string. + for uint(len(b)) > uint(n) { + // Optimize for long sequences of unescaped characters. + noEscape := func(c byte) bool { + return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"' + } + for uint(len(b)) > uint(n) && noEscape(b[n]) { + n++ + } + if uint(len(b)) <= uint(n) { + return n, io.ErrUnexpectedEOF + } + + // Check for terminating double quote. + if b[n] == '"' { + n++ + return n, nil + } + + switch r, rn := utf8.DecodeRune(b[n:]); { + // Handle UTF-8 encoded byte sequence. + // Due to specialized handling of ASCII above, we know that + // all normal sequences at this point must be 2 bytes or larger. + case rn > 1: + n += rn + // Handle escape sequence. + case r == '\\': + flags.Join(stringNonVerbatim) + resumeOffset = n + if uint(len(b)) < uint(n+2) { + return resumeOffset, io.ErrUnexpectedEOF + } + switch r := b[n+1]; r { + case '/': + // Forward slash is the only character with 3 representations. + // Per RFC 8785, section 3.2.2.2., this must not be escaped. + flags.Join(stringNonCanonical) + n += 2 + case '"', '\\', 'b', 'f', 'n', 'r', 't': + n += 2 + case 'u': + if uint(len(b)) < uint(n+6) { + if hasEscapedUTF16Prefix(b[n:], false) { + return resumeOffset, io.ErrUnexpectedEOF + } + flags.Join(stringNonCanonical) + return n, NewInvalidEscapeSequenceError(b[n:]) + } + v1, ok := parseHexUint16(b[n+2 : n+6]) + if !ok { + flags.Join(stringNonCanonical) + return n, NewInvalidEscapeSequenceError(b[n : n+6]) + } + // Only certain control characters can use the \uFFFF notation + // for canonical formatting (per RFC 8785, section 3.2.2.2.). + switch v1 { + // \uFFFF notation not permitted for these characters. + case '\b', '\f', '\n', '\r', '\t': + flags.Join(stringNonCanonical) + default: + // \uFFFF notation only permitted for control characters. + if v1 >= ' ' { + flags.Join(stringNonCanonical) + } else { + // \uFFFF notation must be lower case. + for _, c := range b[n+2 : n+6] { + if 'A' <= c && c <= 'F' { + flags.Join(stringNonCanonical) + } + } + } + } + n += 6 + + r := rune(v1) + if validateUTF8 && utf16.IsSurrogate(r) { + if uint(len(b)) < uint(n+6) { + if hasEscapedUTF16Prefix(b[n:], true) { + return resumeOffset, io.ErrUnexpectedEOF + } + flags.Join(stringNonCanonical) + return n - 6, NewInvalidEscapeSequenceError(b[n-6:]) + } else if v2, ok := parseHexUint16(b[n+2 : n+6]); b[n] != '\\' || b[n+1] != 'u' || !ok { + flags.Join(stringNonCanonical) + return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6]) + } else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError { + flags.Join(stringNonCanonical) + return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6]) + } else { + n += 6 + } + } + default: + flags.Join(stringNonCanonical) + return n, NewInvalidEscapeSequenceError(b[n : n+2]) + } + // Handle invalid UTF-8. + case r == utf8.RuneError: + if !utf8.FullRune(b[n:]) { + return n, io.ErrUnexpectedEOF + } + flags.Join(stringNonVerbatim | stringNonCanonical) + if validateUTF8 { + return n, ErrInvalidUTF8 + } + n++ + // Handle invalid control characters. + case r < ' ': + flags.Join(stringNonVerbatim | stringNonCanonical) + return n, NewInvalidCharacterError(b[n:], "within string (expecting non-control character)") + default: + panic("BUG: unhandled character " + QuoteRune(b[n:])) + } + } + return n, io.ErrUnexpectedEOF +} + +// AppendUnquote appends the unescaped form of a JSON string in src to dst. +// Any invalid UTF-8 within the string will be replaced with utf8.RuneError, +// but the error will be specified as having encountered such an error. +// The input must be an entire JSON string with no surrounding whitespace. +func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) (v []byte, err error) { + dst = slices.Grow(dst, len(src)) + + // Consume the leading double quote. + var i, n int + switch { + case uint(len(src)) == 0: + return dst, io.ErrUnexpectedEOF + case src[0] == '"': + i, n = 1, 1 + default: + return dst, NewInvalidCharacterError(src, `at start of string (expecting '"')`) + } + + // Consume every character in the string. + for uint(len(src)) > uint(n) { + // Optimize for long sequences of unescaped characters. + noEscape := func(c byte) bool { + return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"' + } + for uint(len(src)) > uint(n) && noEscape(src[n]) { + n++ + } + if uint(len(src)) <= uint(n) { + dst = append(dst, src[i:n]...) + return dst, io.ErrUnexpectedEOF + } + + // Check for terminating double quote. + if src[n] == '"' { + dst = append(dst, src[i:n]...) + n++ + if n < len(src) { + err = NewInvalidCharacterError(src[n:], "after string value") + } + return dst, err + } + + switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); { + // Handle UTF-8 encoded byte sequence. + // Due to specialized handling of ASCII above, we know that + // all normal sequences at this point must be 2 bytes or larger. + case rn > 1: + n += rn + // Handle escape sequence. + case r == '\\': + dst = append(dst, src[i:n]...) + + // Handle escape sequence. + if uint(len(src)) < uint(n+2) { + return dst, io.ErrUnexpectedEOF + } + switch r := src[n+1]; r { + case '"', '\\', '/': + dst = append(dst, r) + n += 2 + case 'b': + dst = append(dst, '\b') + n += 2 + case 'f': + dst = append(dst, '\f') + n += 2 + case 'n': + dst = append(dst, '\n') + n += 2 + case 'r': + dst = append(dst, '\r') + n += 2 + case 't': + dst = append(dst, '\t') + n += 2 + case 'u': + if uint(len(src)) < uint(n+6) { + if hasEscapedUTF16Prefix(src[n:], false) { + return dst, io.ErrUnexpectedEOF + } + return dst, NewInvalidEscapeSequenceError(src[n:]) + } + v1, ok := parseHexUint16(src[n+2 : n+6]) + if !ok { + return dst, NewInvalidEscapeSequenceError(src[n : n+6]) + } + n += 6 + + // Check whether this is a surrogate half. + r := rune(v1) + if utf16.IsSurrogate(r) { + r = utf8.RuneError // assume failure unless the following succeeds + if uint(len(src)) < uint(n+6) { + if hasEscapedUTF16Prefix(src[n:], true) { + return utf8.AppendRune(dst, r), io.ErrUnexpectedEOF + } + err = NewInvalidEscapeSequenceError(src[n-6:]) + } else if v2, ok := parseHexUint16(src[n+2 : n+6]); src[n] != '\\' || src[n+1] != 'u' || !ok { + err = NewInvalidEscapeSequenceError(src[n-6 : n+6]) + } else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError { + err = NewInvalidEscapeSequenceError(src[n-6 : n+6]) + } else { + n += 6 + } + } + + dst = utf8.AppendRune(dst, r) + default: + return dst, NewInvalidEscapeSequenceError(src[n : n+2]) + } + i = n + // Handle invalid UTF-8. + case r == utf8.RuneError: + dst = append(dst, src[i:n]...) + if !utf8.FullRuneInString(string(truncateMaxUTF8(src[n:]))) { + return dst, io.ErrUnexpectedEOF + } + // NOTE: An unescaped string may be longer than the escaped string + // because invalid UTF-8 bytes are being replaced. + dst = append(dst, "\uFFFD"...) + n += rn + i = n + err = ErrInvalidUTF8 + // Handle invalid control characters. + case r < ' ': + dst = append(dst, src[i:n]...) + return dst, NewInvalidCharacterError(src[n:], "within string (expecting non-control character)") + default: + panic("BUG: unhandled character " + QuoteRune(src[n:])) + } + } + dst = append(dst, src[i:n]...) + return dst, io.ErrUnexpectedEOF +} + +// hasEscapedUTF16Prefix reports whether b is possibly +// the truncated prefix of a \uFFFF escape sequence. +func hasEscapedUTF16Prefix[Bytes ~[]byte | ~string](b Bytes, lowerSurrogateHalf bool) bool { + for i := range len(b) { + switch c := b[i]; { + case i == 0 && c != '\\': + return false + case i == 1 && c != 'u': + return false + case i == 2 && lowerSurrogateHalf && c != 'd' && c != 'D': + return false // not within ['\uDC00':'\uDFFF'] + case i == 3 && lowerSurrogateHalf && !('c' <= c && c <= 'f') && !('C' <= c && c <= 'F'): + return false // not within ['\uDC00':'\uDFFF'] + case i >= 2 && i < 6 && !('0' <= c && c <= '9') && !('a' <= c && c <= 'f') && !('A' <= c && c <= 'F'): + return false + } + } + return true +} + +// UnquoteMayCopy returns the unescaped form of b. +// If there are no escaped characters, the output is simply a subslice of +// the input with the surrounding quotes removed. +// Otherwise, a new buffer is allocated for the output. +// It assumes the input is valid. +func UnquoteMayCopy(b []byte, isVerbatim bool) []byte { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + if isVerbatim { + return b[len(`"`) : len(b)-len(`"`)] + } + b, _ = AppendUnquote(nil, b) + return b +} + +// ConsumeSimpleNumber consumes the next JSON number per RFC 7159, section 6 +// but is limited to the grammar for a positive integer. +// It returns 0 if it is invalid or more complicated than a simple integer, +// in which case consumeNumber should be called. +func ConsumeSimpleNumber(b []byte) (n int) { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + if len(b) > 0 { + if b[0] == '0' { + n++ + } else if '1' <= b[0] && b[0] <= '9' { + n++ + for len(b) > n && ('0' <= b[n] && b[n] <= '9') { + n++ + } + } else { + return 0 + } + if uint(len(b)) <= uint(n) || (b[n] != '.' && b[n] != 'e' && b[n] != 'E') { + return n + } + } + return 0 +} + +type ConsumeNumberState uint + +const ( + consumeNumberInit ConsumeNumberState = iota + beforeIntegerDigits + withinIntegerDigits + beforeFractionalDigits + withinFractionalDigits + beforeExponentDigits + withinExponentDigits +) + +// ConsumeNumber consumes the next JSON number per RFC 7159, section 6. +// It reports the number of bytes consumed and whether an error was encountered. +// If the input appears truncated, it returns io.ErrUnexpectedEOF. +// +// Note that JSON numbers are not self-terminating. +// If the entire input is consumed, then the caller needs to consider whether +// there may be subsequent unread data that may still be part of this number. +func ConsumeNumber(b []byte) (n int, err error) { + n, _, err = ConsumeNumberResumable(b, 0, consumeNumberInit) + return n, err +} + +// ConsumeNumberResumable is identical to consumeNumber but supports resuming +// from a previous call that returned io.ErrUnexpectedEOF. +func ConsumeNumberResumable(b []byte, resumeOffset int, state ConsumeNumberState) (n int, _ ConsumeNumberState, err error) { + // Jump to the right state when resuming from a partial consumption. + n = resumeOffset + if state > consumeNumberInit { + switch state { + case withinIntegerDigits, withinFractionalDigits, withinExponentDigits: + // Consume leading digits. + for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') { + n++ + } + if uint(len(b)) <= uint(n) { + return n, state, nil // still within the same state + } + state++ // switches "withinX" to "beforeY" where Y is the state after X + } + switch state { + case beforeIntegerDigits: + goto beforeInteger + case beforeFractionalDigits: + goto beforeFractional + case beforeExponentDigits: + goto beforeExponent + default: + return n, state, nil + } + } + + // Consume required integer component (with optional minus sign). +beforeInteger: + resumeOffset = n + if uint(len(b)) > 0 && b[0] == '-' { + n++ + } + switch { + case uint(len(b)) <= uint(n): + return resumeOffset, beforeIntegerDigits, io.ErrUnexpectedEOF + case b[n] == '0': + n++ + state = beforeFractionalDigits + case '1' <= b[n] && b[n] <= '9': + n++ + for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') { + n++ + } + state = withinIntegerDigits + default: + return n, state, NewInvalidCharacterError(b[n:], "within number (expecting digit)") + } + + // Consume optional fractional component. +beforeFractional: + if uint(len(b)) > uint(n) && b[n] == '.' { + resumeOffset = n + n++ + switch { + case uint(len(b)) <= uint(n): + return resumeOffset, beforeFractionalDigits, io.ErrUnexpectedEOF + case '0' <= b[n] && b[n] <= '9': + n++ + default: + return n, state, NewInvalidCharacterError(b[n:], "within number (expecting digit)") + } + for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') { + n++ + } + state = withinFractionalDigits + } + + // Consume optional exponent component. +beforeExponent: + if uint(len(b)) > uint(n) && (b[n] == 'e' || b[n] == 'E') { + resumeOffset = n + n++ + if uint(len(b)) > uint(n) && (b[n] == '-' || b[n] == '+') { + n++ + } + switch { + case uint(len(b)) <= uint(n): + return resumeOffset, beforeExponentDigits, io.ErrUnexpectedEOF + case '0' <= b[n] && b[n] <= '9': + n++ + default: + return n, state, NewInvalidCharacterError(b[n:], "within number (expecting digit)") + } + for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') { + n++ + } + state = withinExponentDigits + } + + return n, state, nil +} + +// parseHexUint16 is similar to strconv.ParseUint, +// but operates directly on []byte and is optimized for base-16. +// See https://go.dev/issue/42429. +func parseHexUint16[Bytes ~[]byte | ~string](b Bytes) (v uint16, ok bool) { + if len(b) != 4 { + return 0, false + } + for i := range 4 { + c := b[i] + switch { + case '0' <= c && c <= '9': + c = c - '0' + case 'a' <= c && c <= 'f': + c = 10 + c - 'a' + case 'A' <= c && c <= 'F': + c = 10 + c - 'A' + default: + return 0, false + } + v = v*16 + uint16(c) + } + return v, true +} + +// ParseUint parses b as a decimal unsigned integer according to +// a strict subset of the JSON number grammar, returning the value if valid. +// It returns (0, false) if there is a syntax error and +// returns (math.MaxUint64, false) if there is an overflow. +func ParseUint(b []byte) (v uint64, ok bool) { + const unsafeWidth = 20 // len(fmt.Sprint(uint64(math.MaxUint64))) + var n int + for ; len(b) > n && ('0' <= b[n] && b[n] <= '9'); n++ { + v = 10*v + uint64(b[n]-'0') + } + switch { + case n == 0 || len(b) != n || (b[0] == '0' && string(b) != "0"): + return 0, false + case n >= unsafeWidth && (b[0] != '1' || v < 1e19 || n > unsafeWidth): + return math.MaxUint64, false + } + return v, true +} + +// ParseFloat parses a floating point number according to the Go float grammar. +// Note that the JSON number grammar is a strict subset. +// +// If the number overflows the finite representation of a float, +// then we return MaxFloat since any finite value will always be infinitely +// more accurate at representing another finite value than an infinite value. +func ParseFloat(b []byte, bits int) (v float64, ok bool) { + // Fast path for exact integer numbers which fit in the + // 24-bit or 53-bit significand of a float32 or float64. + var negLen int // either 0 or 1 + if len(b) > 0 && b[0] == '-' { + negLen = 1 + } + u, ok := ParseUint(b[negLen:]) + if ok && ((bits == 32 && u <= 1<<24) || (bits == 64 && u <= 1<<53)) { + return math.Copysign(float64(u), float64(-1*negLen)), true + } + + // Note that the []byte->string conversion unfortunately allocates. + // See https://go.dev/issue/42429 for more information. + fv, err := strconv.ParseFloat(string(b), bits) + if math.IsInf(fv, 0) { + switch { + case bits == 32 && math.IsInf(fv, +1): + fv = +math.MaxFloat32 + case bits == 64 && math.IsInf(fv, +1): + fv = +math.MaxFloat64 + case bits == 32 && math.IsInf(fv, -1): + fv = -math.MaxFloat32 + case bits == 64 && math.IsInf(fv, -1): + fv = -math.MaxFloat64 + } + } + return fv, err == nil +} + +// escapeASCII reports whether the ASCII character needs to be escaped. +// It conservatively assumes EscapeForHTML. +var escapeASCII = [...]uint8{ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // escape '"' and '&' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, // escape '<' and '>' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // escape '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +} + +// NeedEscape reports whether src needs escaping of any characters. +// It conservatively assumes EscapeForHTML and EscapeForJS. +// It reports true for inputs with invalid UTF-8. +func NeedEscape[Bytes ~[]byte | ~string](src Bytes) bool { + var i int + for uint(len(src)) > uint(i) { + if c := src[i]; c < utf8.RuneSelf { + if escapeASCII[c] > 0 { + return true + } + i++ + } else { + r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[i:]))) + if r == utf8.RuneError || r == '\u2028' || r == '\u2029' { + return true + } + i += rn + } + } + return false +} + +// AppendQuote appends src to dst as a JSON string per RFC 7159, section 7. +// +// It takes in flags and respects the following: +// - EscapeForHTML escapes '<', '>', and '&'. +// - EscapeForJS escapes '\u2028' and '\u2029'. +// - AllowInvalidUTF8 avoids reporting an error for invalid UTF-8. +// +// Regardless of whether AllowInvalidUTF8 is specified, +// invalid bytes are replaced with the Unicode replacement character ('\ufffd'). +// If no escape flags are set, then the shortest representable form is used, +// which is also the canonical form for strings (RFC 8785, section 3.2.2.2). +func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, flags *jsonflags.Flags) ([]byte, error) { + var i, n int + var hasInvalidUTF8 bool + dst = slices.Grow(dst, len(`"`)+len(src)+len(`"`)) + dst = append(dst, '"') + for uint(len(src)) > uint(n) { + // Handle single-byte ASCII. + if c := src[n]; c < utf8.RuneSelf { + n++ + if escapeASCII[c] > 0 { + if (c == '<' || c == '>' || c == '&') && !flags.Get(jsonflags.EscapeForHTML) { + continue + } + dst = append(dst, src[i:n-1]...) + dst = appendEscapedASCII(dst, c) + i = n + } + continue + } + + // Handle multi-byte Unicode. + switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); { + case r == utf8.RuneError && rn == 1: + hasInvalidUTF8 = true + dst = append(dst, src[i:n]...) + dst = append(dst, "\ufffd"...) + n += rn + i = n + case (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS): + dst = append(dst, src[i:n]...) + dst = appendEscapedUnicode(dst, r) + n += rn + i = n + default: + n += rn + } + } + dst = append(dst, src[i:n]...) + dst = append(dst, '"') + if hasInvalidUTF8 && !flags.Get(jsonflags.AllowInvalidUTF8) { + return dst, ErrInvalidUTF8 + } + return dst, nil +} + +func appendEscapedASCII(dst []byte, c byte) []byte { + switch c { + case '"', '\\': + dst = append(dst, '\\', c) + case '\b': + dst = append(dst, "\\b"...) + case '\f': + dst = append(dst, "\\f"...) + case '\n': + dst = append(dst, "\\n"...) + case '\r': + dst = append(dst, "\\r"...) + case '\t': + dst = append(dst, "\\t"...) + default: + dst = appendEscapedUTF16(dst, uint16(c)) + } + return dst +} + +func appendEscapedUnicode(dst []byte, r rune) []byte { + if r1, r2 := utf16.EncodeRune(r); r1 != '\ufffd' && r2 != '\ufffd' { + dst = appendEscapedUTF16(dst, uint16(r1)) + dst = appendEscapedUTF16(dst, uint16(r2)) + } else { + dst = appendEscapedUTF16(dst, uint16(r)) + } + return dst +} + +func appendEscapedUTF16(dst []byte, x uint16) []byte { + const hex = "0123456789abcdef" + return append(dst, '\\', 'u', hex[(x>>12)&0xf], hex[(x>>8)&0xf], hex[(x>>4)&0xf], hex[(x>>0)&0xf]) +} + +// ReformatString consumes a JSON string from src and appends it to dst, +// reformatting it if necessary for the given escapeRune parameter. +// It returns the appended output and the number of consumed input bytes. +func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) { + // TODO: Should this update ValueFlags as input? + var valFlags ValueFlags + n, err := ConsumeString(&valFlags, src, !flags.Get(jsonflags.AllowInvalidUTF8)) + if err != nil { + return dst, n, err + } + isCanonical := !flags.Get(jsonflags.EscapeForHTML | jsonflags.EscapeForJS) + if flags.Get(jsonflags.PreserveRawStrings) || (isCanonical && valFlags.IsCanonical()) { + dst = append(dst, src[:n]...) // copy the string verbatim + return dst, n, nil + } + + // TODO: Implement a direct, raw-to-raw reformat for strings. + // If the escapeRune option would have resulted in no changes to the output, + // it would be faster to simply append src to dst without going through + // an intermediary representation in a separate buffer. + b, _ := AppendUnquote(nil, src[:n]) + dst, _ = AppendQuote(dst, string(b), flags) + return dst, n, nil +} + +// AppendFloat appends src to dst as a JSON number per RFC 7159, section 6. +// It formats numbers similar to the ES6 number-to-string conversion. +// See https://go.dev/issue/14135. +// +// The output is identical to ECMA-262, 6th edition, section 7.1.12.1 and with +// RFC 8785, section 3.2.2.3 for 64-bit floating-point numbers except for -0, +// which is formatted as -0 instead of just 0. +// +// For 32-bit floating-point numbers, +// the output is a 32-bit equivalent of the algorithm. +// Note that ECMA-262 specifies no algorithm for 32-bit numbers. +func AppendFloat(dst []byte, src float64, bits int) []byte { + if bits == 32 { + src = float64(float32(src)) + } + + abs := math.Abs(src) + fmt := byte('f') + if abs != 0 { + if bits == 64 && (float64(abs) < 1e-6 || float64(abs) >= 1e21) || + bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) { + fmt = 'e' + } + } + dst = strconv.AppendFloat(dst, src, fmt, -1, bits) + if fmt == 'e' { + // Clean up e-09 to e-9. + n := len(dst) + if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' { + dst[n-2] = dst[n-1] + dst = dst[:n-1] + } + } + return dst +} + +// ReformatNumber consumes a JSON string from src and appends it to dst, +// canonicalizing it if specified. +// It returns the appended output and the number of consumed input bytes. +func ReformatNumber(dst, src []byte, canonicalize bool) ([]byte, int, error) { + n, err := ConsumeNumber(src) + if err != nil { + return dst, n, err + } + if !canonicalize { + dst = append(dst, src[:n]...) // copy the number verbatim + return dst, n, nil + } + + // Canonicalize the number per RFC 8785, section 3.2.2.3. + // As an optimization, we can copy integer numbers below 2⁵³ verbatim. + const maxExactIntegerDigits = 16 // len(strconv.AppendUint(nil, 1<<53, 10)) + if n < maxExactIntegerDigits && ConsumeSimpleNumber(src[:n]) == n { + dst = append(dst, src[:n]...) // copy the number verbatim + return dst, n, nil + } + fv, _ := strconv.ParseFloat(string(src[:n]), 64) + switch { + case fv == 0: + fv = 0 // normalize negative zero as just zero + case math.IsInf(fv, +1): + fv = +math.MaxFloat64 + case math.IsInf(fv, -1): + fv = -math.MaxFloat64 + } + return AppendFloat(dst, fv, 64), n, nil +} + +// TrimSuffixWhitespace trims JSON from the end of b. +func TrimSuffixWhitespace(b []byte) []byte { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + n := len(b) - 1 + for n >= 0 && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') { + n-- + } + return b[:n+1] +} + +// TrimSuffixString trims a valid JSON string at the end of b. +// The behavior is undefined if there is not a valid JSON string present. +func TrimSuffixString(b []byte) []byte { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + if len(b) > 0 && b[len(b)-1] == '"' { + b = b[:len(b)-1] + } + for len(b) >= 2 && !(b[len(b)-1] == '"' && b[len(b)-2] != '\\') { + b = b[:len(b)-1] // trim all characters except an unescaped quote + } + if len(b) > 0 && b[len(b)-1] == '"' { + b = b[:len(b)-1] + } + return b +} + +// HasSuffixByte reports whether b ends with c. +func HasSuffixByte(b []byte, c byte) bool { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + return len(b) > 0 && b[len(b)-1] == c +} + +// TrimSuffixByte removes c from the end of b if it is present. +func TrimSuffixByte(b []byte, c byte) []byte { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + if len(b) > 0 && b[len(b)-1] == c { + return b[:len(b)-1] + } + return b +} + +// QuoteRune quotes the first rune in the input. +func QuoteRune[Bytes ~[]byte | ~string](b Bytes) string { + r, n := utf8.DecodeRuneInString(string(truncateMaxUTF8(b))) + if r == utf8.RuneError && n == 1 { + return `'\x` + strconv.FormatUint(uint64(b[0]), 16) + `'` + } + return strconv.QuoteRune(r) +} + +// CompareUTF16 lexicographically compares x to y according +// to the UTF-16 codepoints of the UTF-8 encoded input strings. +// This implements the ordering specified in RFC 8785, section 3.2.3. +func CompareUTF16[Bytes ~[]byte | ~string](x, y Bytes) int { + // NOTE: This is an optimized, mostly allocation-free implementation + // of CompareUTF16Simple in wire_test.go. FuzzCompareUTF16 verifies that the + // two implementations agree on the result of comparing any two strings. + isUTF16Self := func(r rune) bool { + return ('\u0000' <= r && r <= '\uD7FF') || ('\uE000' <= r && r <= '\uFFFF') + } + + var invalidUTF8 bool + x0, y0 := x, y + for { + if len(x) == 0 || len(y) == 0 { + if len(x) == len(y) && invalidUTF8 { + return strings.Compare(string(x0), string(y0)) + } + return cmp.Compare(len(x), len(y)) + } + + // ASCII fast-path. + if x[0] < utf8.RuneSelf || y[0] < utf8.RuneSelf { + if x[0] != y[0] { + return cmp.Compare(x[0], y[0]) + } + x, y = x[1:], y[1:] + continue + } + + // Decode next pair of runes as UTF-8. + rx, nx := utf8.DecodeRuneInString(string(truncateMaxUTF8(x))) + ry, ny := utf8.DecodeRuneInString(string(truncateMaxUTF8(y))) + + selfx := isUTF16Self(rx) + selfy := isUTF16Self(ry) + switch { + // The x rune is a single UTF-16 codepoint, while + // the y rune is a surrogate pair of UTF-16 codepoints. + case selfx && !selfy: + ry, _ = utf16.EncodeRune(ry) + // The y rune is a single UTF-16 codepoint, while + // the x rune is a surrogate pair of UTF-16 codepoints. + case selfy && !selfx: + rx, _ = utf16.EncodeRune(rx) + } + if rx != ry { + return cmp.Compare(rx, ry) + } + invalidUTF8 = invalidUTF8 || (rx == utf8.RuneError && nx == 1) || (ry == utf8.RuneError && ny == 1) + x, y = x[nx:], y[ny:] + } +} + +// truncateMaxUTF8 truncates b such it contains at least one rune. +// +// The utf8 package currently lacks generic variants, which complicates +// generic functions that operates on either []byte or string. +// As a hack, we always call the utf8 function operating on strings, +// but always truncate the input such that the result is identical. +// +// Example usage: +// +// utf8.DecodeRuneInString(string(truncateMaxUTF8(b))) +// +// Converting a []byte to a string is stack allocated since +// truncateMaxUTF8 guarantees that the []byte is short. +func truncateMaxUTF8[Bytes ~[]byte | ~string](b Bytes) Bytes { + // TODO(https://go.dev/issue/56948): Remove this function and + // instead directly call generic utf8 functions wherever used. + if len(b) > utf8.UTFMax { + return b[:utf8.UTFMax] + } + return b +} + +// NewError and ErrInvalidUTF8 are injected by the "jsontext" package, +// so that these error types use the jsontext.SyntacticError type. +var ( + NewError = errors.New + ErrInvalidUTF8 = errors.New("invalid UTF-8 within string") +) + +func NewInvalidCharacterError[Bytes ~[]byte | ~string](prefix Bytes, where string) error { + what := QuoteRune(prefix) + return NewError("invalid character " + what + " " + where) +} + +func NewInvalidEscapeSequenceError[Bytes ~[]byte | ~string](what Bytes) error { + label := "escape sequence" + if len(what) > 6 { + label = "surrogate pair" + } + needEscape := strings.IndexFunc(string(what), func(r rune) bool { + return r == '`' || r == utf8.RuneError || unicode.IsSpace(r) || !unicode.IsPrint(r) + }) >= 0 + if needEscape { + return NewError("invalid " + label + " " + strconv.Quote(string(what)) + " within string") + } else { + return NewError("invalid " + label + " `" + string(what) + "` within string") + } +} diff --git a/internal/json/jsontext/bundle.go b/internal/json/jsontext/bundle.go new file mode 100644 index 0000000000..54b4fbbb03 --- /dev/null +++ b/internal/json/jsontext/bundle.go @@ -0,0 +1,4099 @@ +// Code generated by golang.org/x/tools/cmd/bundle. DO NOT EDIT. +// $ bundle -prefix -import=github.com/go-json-experiment/json/internal/jsonflags=github.com/quay/clair/v4/internal/json/internal/jsonflags -import=github.com/go-json-experiment/json/internal/jsonopts=github.com/quay/clair/v4/internal/json/internal/jsonopts -import=github.com/go-json-experiment/json/internal/jsonwire=github.com/quay/clair/v4/internal/json/internal/jsonwire -import=github.com/go-json-experiment/json/internal=github.com/quay/clair/v4/internal/json/internal -import=github.com/go-json-experiment/json/jsontext=github.com/quay/clair/v4/internal/json/jsontext -import=github.com/go-json-experiment/json=github.com/quay/clair/v4/internal/json -dst github.com/quay/clair/v4/internal/json/jsontext github.com/go-json-experiment/json/jsontext + +// Package jsontext implements syntactic processing of JSON +// as specified in RFC 4627, RFC 7159, RFC 7493, RFC 8259, and RFC 8785. +// JSON is a simple data interchange format that can represent +// primitive data types such as booleans, strings, and numbers, +// in addition to structured data types such as objects and arrays. +// +// The [Encoder] and [Decoder] types are used to encode or decode +// a stream of JSON tokens or values. +// +// # Tokens and Values +// +// A JSON token refers to the basic structural elements of JSON: +// +// - a JSON literal (i.e., null, true, or false) +// - a JSON string (e.g., "hello, world!") +// - a JSON number (e.g., 123.456) +// - a start or end delimiter for a JSON object (i.e., '{' or '}') +// - a start or end delimiter for a JSON array (i.e., '[' or ']') +// +// A JSON token is represented by the [Token] type in Go. Technically, +// there are two additional structural characters (i.e., ':' and ','), +// but there is no [Token] representation for them since their presence +// can be inferred by the structure of the JSON grammar itself. +// For example, there must always be an implicit colon between +// the name and value of a JSON object member. +// +// A JSON value refers to a complete unit of JSON data: +// +// - a JSON literal, string, or number +// - a JSON object (e.g., `{"name":"value"}`) +// - a JSON array (e.g., `[1,2,3,]`) +// +// A JSON value is represented by the [Value] type in Go and is a []byte +// containing the raw textual representation of the value. There is some overlap +// between tokens and values as both contain literals, strings, and numbers. +// However, only a value can represent the entirety of a JSON object or array. +// +// The [Encoder] and [Decoder] types contain methods to read or write the next +// [Token] or [Value] in a sequence. They maintain a state machine to validate +// whether the sequence of JSON tokens and/or values produces a valid JSON. +// [Options] may be passed to the [NewEncoder] or [NewDecoder] constructors +// to configure the syntactic behavior of encoding and decoding. +// +// # Terminology +// +// The terms "encode" and "decode" are used for syntactic functionality +// that is concerned with processing JSON based on its grammar, and +// the terms "marshal" and "unmarshal" are used for semantic functionality +// that determines the meaning of JSON values as Go values and vice-versa. +// This package (i.e., [jsontext]) deals with JSON at a syntactic layer, +// while [encoding/json/v2] deals with JSON at a semantic layer. +// The goal is to provide a clear distinction between functionality that +// is purely concerned with encoding versus that of marshaling. +// For example, one can directly encode a stream of JSON tokens without +// needing to marshal a concrete Go value representing them. +// Similarly, one can decode a stream of JSON tokens without +// needing to unmarshal them into a concrete Go value. +// +// This package uses JSON terminology when discussing JSON, which may differ +// from related concepts in Go or elsewhere in computing literature. +// +// - a JSON "object" refers to an unordered collection of name/value members. +// - a JSON "array" refers to an ordered sequence of elements. +// - a JSON "value" refers to either a literal (i.e., null, false, or true), +// string, number, object, or array. +// +// See RFC 8259 for more information. +// +// # Specifications +// +// Relevant specifications include RFC 4627, RFC 7159, RFC 7493, RFC 8259, +// and RFC 8785. Each RFC is generally a stricter subset of another RFC. +// In increasing order of strictness: +// +// - RFC 4627 and RFC 7159 do not require (but recommend) the use of UTF-8 +// and also do not require (but recommend) that object names be unique. +// - RFC 8259 requires the use of UTF-8, +// but does not require (but recommends) that object names be unique. +// - RFC 7493 requires the use of UTF-8 +// and also requires that object names be unique. +// - RFC 8785 defines a canonical representation. It requires the use of UTF-8 +// and also requires that object names be unique and in a specific ordering. +// It specifies exactly how strings and numbers must be formatted. +// +// The primary difference between RFC 4627 and RFC 7159 is that the former +// restricted top-level values to only JSON objects and arrays, while +// RFC 7159 and subsequent RFCs permit top-level values to additionally be +// JSON nulls, booleans, strings, or numbers. +// +// By default, this package operates on RFC 7493, but can be configured +// to operate according to the other RFC specifications. +// RFC 7493 is a stricter subset of RFC 8259 and fully compliant with it. +// In particular, it makes specific choices about behavior that RFC 8259 +// leaves as undefined in order to ensure greater interoperability. +// + +package jsontext + +import ( + "bytes" + "errors" + "io" + "math" + "math/bits" + "slices" + "strconv" + "strings" + "sync" + + "github.com/quay/clair/v4/internal/json/internal" + "github.com/quay/clair/v4/internal/json/internal/jsonflags" + "github.com/quay/clair/v4/internal/json/internal/jsonopts" + "github.com/quay/clair/v4/internal/json/internal/jsonwire" +) + +// NOTE: The logic for decoding is complicated by the fact that reading from +// an io.Reader into a temporary buffer means that the buffer may contain a +// truncated portion of some valid input, requiring the need to fetch more data. +// +// This file is structured in the following way: +// +// - consumeXXX functions parse an exact JSON token from a []byte. +// If the buffer appears truncated, then it returns io.ErrUnexpectedEOF. +// The consumeSimpleXXX functions are so named because they only handle +// a subset of the grammar for the JSON token being parsed. +// They do not handle the full grammar to keep these functions inlinable. +// +// - Decoder.consumeXXX methods parse the next JSON token from Decoder.buf, +// automatically fetching more input if necessary. These methods take +// a position relative to the start of Decoder.buf as an argument and +// return the end of the consumed JSON token as a position, +// also relative to the start of Decoder.buf. +// +// - In the event of an I/O errors or state machine violations, +// the implementation avoids mutating the state of Decoder +// (aside from the book-keeping needed to implement Decoder.fetch). +// For this reason, only Decoder.ReadToken and Decoder.ReadValue are +// responsible for updated Decoder.prevStart and Decoder.prevEnd. +// +// - For performance, much of the implementation uses the pattern of calling +// the inlinable consumeXXX functions first, and if more work is necessary, +// then it calls the slower Decoder.consumeXXX methods. +// TODO: Revisit this pattern if the Go compiler provides finer control +// over exactly which calls are inlined or not. + +// Decoder is a streaming decoder for raw JSON tokens and values. +// It is used to read a stream of top-level JSON values, +// each separated by optional whitespace characters. +// +// [Decoder.ReadToken] and [Decoder.ReadValue] calls may be interleaved. +// For example, the following JSON value: +// +// {"name":"value","array":[null,false,true,3.14159],"object":{"k":"v"}} +// +// can be parsed with the following calls (ignoring errors for brevity): +// +// d.ReadToken() // { +// d.ReadToken() // "name" +// d.ReadToken() // "value" +// d.ReadValue() // "array" +// d.ReadToken() // [ +// d.ReadToken() // null +// d.ReadToken() // false +// d.ReadValue() // true +// d.ReadToken() // 3.14159 +// d.ReadToken() // ] +// d.ReadValue() // "object" +// d.ReadValue() // {"k":"v"} +// d.ReadToken() // } +// +// The above is one of many possible sequence of calls and +// may not represent the most sensible method to call for any given token/value. +// For example, it is probably more common to call [Decoder.ReadToken] to obtain a +// string token for object names. +type Decoder struct { + s decoderState +} + +// decoderState is the low-level state of Decoder. +// It has exported fields and method for use by the "json" package. +type decoderState struct { + state + decodeBuffer + jsonopts.Struct + + StringCache *[256]string // only used when unmarshaling; identical to json.stringCache +} + +// decodeBuffer is a buffer split into 4 segments: +// +// - buf[0:prevEnd] // already read portion of the buffer +// - buf[prevStart:prevEnd] // previously read value +// - buf[prevEnd:len(buf)] // unread portion of the buffer +// - buf[len(buf):cap(buf)] // unused portion of the buffer +// +// Invariants: +// +// 0 ≤ prevStart ≤ prevEnd ≤ len(buf) ≤ cap(buf) +type decodeBuffer struct { + peekPos int // non-zero if valid offset into buf for start of next token + peekErr error // implies peekPos is -1 + + buf []byte // may alias rd if it is a bytes.Buffer + prevStart int + prevEnd int + + // baseOffset is added to prevStart and prevEnd to obtain + // the absolute offset relative to the start of io.Reader stream. + baseOffset int64 + + rd io.Reader +} + +// NewDecoder constructs a new streaming decoder reading from r. +// +// If r is a [bytes.Buffer], then the decoder parses directly from the buffer +// without first copying the contents to an intermediate buffer. +// Additional writes to the buffer must not occur while the decoder is in use. +func NewDecoder(r io.Reader, opts ...Options) *Decoder { + d := new(Decoder) + d.Reset(r, opts...) + return d +} + +// Reset resets a decoder such that it is reading afresh from r and +// configured with the provided options. Reset must not be called on an +// a Decoder passed to the [encoding/json/v2.UnmarshalerV2.UnmarshalJSONV2] method +// or the [encoding/json/v2.UnmarshalFuncV2] function. +func (d *Decoder) Reset(r io.Reader, opts ...Options) { + switch { + case d == nil: + panic("jsontext: invalid nil Decoder") + case r == nil: + panic("jsontext: invalid nil io.Reader") + case d.s.Flags.Get(jsonflags.WithinArshalCall): + panic("jsontext: cannot reset Decoder passed to json.UnmarshalerV2") + } + d.s.reset(nil, r, opts...) +} + +func (d *decoderState) reset(b []byte, r io.Reader, opts ...Options) { + d.state.reset() + d.decodeBuffer = decodeBuffer{buf: b, rd: r} + d.Struct = jsonopts.Struct{} + d.Struct.Join(opts...) +} + +var errBufferWriteAfterNext = errors.New("invalid bytes.Buffer.Write call after calling bytes.Buffer.Next") + +// fetch reads at least 1 byte from the underlying io.Reader. +// It returns io.ErrUnexpectedEOF if zero bytes were read and io.EOF was seen. +func (d *decoderState) fetch() error { + if d.rd == nil { + return io.ErrUnexpectedEOF + } + + // Inform objectNameStack that we are about to fetch new buffer content. + d.Names.copyQuotedBuffer(d.buf) + + // Specialize bytes.Buffer for better performance. + if bb, ok := d.rd.(*bytes.Buffer); ok { + switch { + case bb.Len() == 0: + return io.ErrUnexpectedEOF + case len(d.buf) == 0: + d.buf = bb.Next(bb.Len()) // "read" all data in the buffer + return nil + default: + // This only occurs if a partially filled bytes.Buffer was provided + // and more data is written to it while Decoder is reading from it. + // This practice will lead to data corruption since future writes + // may overwrite the contents of the current buffer. + // + // The user is trying to use a bytes.Buffer as a pipe, + // but a bytes.Buffer is poor implementation of a pipe, + // the purpose-built io.Pipe should be used instead. + return &ioError{action: "read", err: errBufferWriteAfterNext} + } + } + + // Allocate initial buffer if empty. + if cap(d.buf) == 0 { + d.buf = make([]byte, 0, 64) + } + + // Check whether to grow the buffer. + const maxBufferSize = 4 << 10 + const growthSizeFactor = 2 // higher value is faster + const growthRateFactor = 2 // higher value is slower + // By default, grow if below the maximum buffer size. + grow := cap(d.buf) <= maxBufferSize/growthSizeFactor + // Growing can be expensive, so only grow + // if a sufficient number of bytes have been processed. + grow = grow && int64(cap(d.buf)) < d.previousOffsetEnd()/growthRateFactor + // If prevStart==0, then fetch was called in order to fetch more data + // to finish consuming a large JSON value contiguously. + // Grow if less than 25% of the remaining capacity is available. + // Note that this may cause the input buffer to exceed maxBufferSize. + grow = grow || (d.prevStart == 0 && len(d.buf) >= 3*cap(d.buf)/4) + + if grow { + // Allocate a new buffer and copy the contents of the old buffer over. + // TODO: Provide a hard limit on the maximum internal buffer size? + buf := make([]byte, 0, cap(d.buf)*growthSizeFactor) + d.buf = append(buf, d.buf[d.prevStart:]...) + } else { + // Move unread portion of the data to the front. + n := copy(d.buf[:cap(d.buf)], d.buf[d.prevStart:]) + d.buf = d.buf[:n] + } + d.baseOffset += int64(d.prevStart) + d.prevEnd -= d.prevStart + d.prevStart = 0 + + // Read more data into the internal buffer. + for { + n, err := d.rd.Read(d.buf[len(d.buf):cap(d.buf)]) + switch { + case n > 0: + d.buf = d.buf[:len(d.buf)+n] + return nil // ignore errors if any bytes are read + case err == io.EOF: + return io.ErrUnexpectedEOF + case err != nil: + return &ioError{action: "read", err: err} + default: + continue // Read returned (0, nil) + } + } +} + +const invalidateBufferByte = '#' // invalid starting character for JSON grammar + +// invalidatePreviousRead invalidates buffers returned by Peek and Read calls +// so that the first byte is an invalid character. +// This Hyrum-proofs the API against faulty application code that assumes +// values returned by ReadValue remain valid past subsequent Read calls. +func (d *decodeBuffer) invalidatePreviousRead() { + // Avoid mutating the buffer if d.rd is nil which implies that d.buf + // is provided by the user code and may not expect mutations. + isBytesBuffer := func(r io.Reader) bool { + _, ok := r.(*bytes.Buffer) + return ok + } + if d.rd != nil && !isBytesBuffer(d.rd) && d.prevStart < d.prevEnd && uint(d.prevStart) < uint(len(d.buf)) { + d.buf[d.prevStart] = invalidateBufferByte + d.prevStart = d.prevEnd + } +} + +// needMore reports whether there are no more unread bytes. +func (d *decodeBuffer) needMore(pos int) bool { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + return pos == len(d.buf) +} + +// injectSyntacticErrorWithPosition wraps a SyntacticError with the position, +// otherwise it returns the error as is. +// It takes a position relative to the start of the start of d.buf. +func (d *decodeBuffer) injectSyntacticErrorWithPosition(err error, pos int) error { + if serr, ok := err.(*SyntacticError); ok { + return serr.withOffset(d.baseOffset + int64(pos)) + } + return err +} + +func (d *decodeBuffer) previousOffsetStart() int64 { return d.baseOffset + int64(d.prevStart) } + +func (d *decodeBuffer) previousOffsetEnd() int64 { return d.baseOffset + int64(d.prevEnd) } + +func (d *decodeBuffer) PreviousBuffer() []byte { return d.buf[d.prevStart:d.prevEnd] } + +func (d *decodeBuffer) unreadBuffer() []byte { return d.buf[d.prevEnd:len(d.buf)] } + +// PeekKind retrieves the next token kind, but does not advance the read offset. +// It returns 0 if there are no more tokens. +func (d *Decoder) PeekKind() Kind { + return d.s.PeekKind() +} + +func (d *decoderState) PeekKind() Kind { + // Check whether we have a cached peek result. + if d.peekPos > 0 { + return Kind(d.buf[d.peekPos]).normalize() + } + + var err error + d.invalidatePreviousRead() + pos := d.prevEnd + + // Consume leading whitespace. + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + if d.needMore(pos) { + if pos, err = d.consumeWhitespace(pos); err != nil { + if err == io.ErrUnexpectedEOF && d.Tokens.Depth() == 1 { + err = io.EOF // EOF possibly if no Tokens present after top-level value + } + d.peekPos, d.peekErr = -1, err + return invalidKind + } + } + + // Consume colon or comma. + var delim byte + if c := d.buf[pos]; c == ':' || c == ',' { + delim = c + pos += 1 + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + if d.needMore(pos) { + if pos, err = d.consumeWhitespace(pos); err != nil { + d.peekPos, d.peekErr = -1, d.checkDelimBeforeIOError(delim, err) + return invalidKind + } + } + } + next := Kind(d.buf[pos]).normalize() + if d.Tokens.needDelim(next) != delim { + d.peekPos, d.peekErr = -1, d.checkDelim(delim, next) + return invalidKind + } + + // This may set peekPos to zero, which is indistinguishable from + // the uninitialized state. While a small hit to performance, it is correct + // since ReadValue and ReadToken will disregard the cached result and + // recompute the next kind. + d.peekPos, d.peekErr = pos, nil + return next +} + +// checkDelimBeforeIOError checks whether the delim is even valid +// before returning an IO error, which occurs after the delim. +func (d *decoderState) checkDelimBeforeIOError(delim byte, err error) error { + // Since an IO error occurred, we do not know what the next kind is. + // However, knowing the next kind is necessary to validate + // whether the current delim is at least potentially valid. + // Since a JSON string is always valid as the next token, + // conservatively assume that is the next kind for validation. + const next = Kind('"') + if d.Tokens.needDelim(next) != delim { + err = d.checkDelim(delim, next) + } + return err +} + +// checkDelim checks whether delim is valid for the given next kind. +func (d *decoderState) checkDelim(delim byte, next Kind) error { + pos := d.prevEnd // restore position to right after leading whitespace + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + err := d.Tokens.checkDelim(delim, next) + return d.injectSyntacticErrorWithPosition(err, pos) +} + +// SkipValue is semantically equivalent to calling [Decoder.ReadValue] and discarding +// the result except that memory is not wasted trying to hold the entire result. +func (d *Decoder) SkipValue() error { + return d.s.SkipValue() +} + +func (d *decoderState) SkipValue() error { + switch d.PeekKind() { + case '{', '[': + // For JSON objects and arrays, keep skipping all tokens + // until the depth matches the starting depth. + depth := d.Tokens.Depth() + for { + if _, err := d.ReadToken(); err != nil { + return err + } + if depth >= d.Tokens.Depth() { + return nil + } + } + default: + // Trying to skip a value when the next token is a '}' or ']' + // will result in an error being returned here. + var flags jsonwire.ValueFlags + if _, err := d.ReadValue(&flags); err != nil { + return err + } + return nil + } +} + +// ReadToken reads the next [Token], advancing the read offset. +// The returned token is only valid until the next Peek, Read, or Skip call. +// It returns [io.EOF] if there are no more tokens. +func (d *Decoder) ReadToken() (Token, error) { + return d.s.ReadToken() +} + +func (d *decoderState) ReadToken() (Token, error) { + // Determine the next kind. + var err error + var next Kind + pos := d.peekPos + if pos != 0 { + // Use cached peek result. + if d.peekErr != nil { + err := d.peekErr + d.peekPos, d.peekErr = 0, nil // possibly a transient I/O error + return Token{}, err + } + next = Kind(d.buf[pos]).normalize() + d.peekPos = 0 // reset cache + } else { + d.invalidatePreviousRead() + pos = d.prevEnd + + // Consume leading whitespace. + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + if d.needMore(pos) { + if pos, err = d.consumeWhitespace(pos); err != nil { + if err == io.ErrUnexpectedEOF && d.Tokens.Depth() == 1 { + err = io.EOF // EOF possibly if no Tokens present after top-level value + } + return Token{}, err + } + } + + // Consume colon or comma. + var delim byte + if c := d.buf[pos]; c == ':' || c == ',' { + delim = c + pos += 1 + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + if d.needMore(pos) { + if pos, err = d.consumeWhitespace(pos); err != nil { + return Token{}, d.checkDelimBeforeIOError(delim, err) + } + } + } + next = Kind(d.buf[pos]).normalize() + if d.Tokens.needDelim(next) != delim { + return Token{}, d.checkDelim(delim, next) + } + } + + // Handle the next token. + var n int + switch next { + case 'n': + if jsonwire.ConsumeNull(d.buf[pos:]) == 0 { + pos, err = d.consumeLiteral(pos, "null") + if err != nil { + return Token{}, d.injectSyntacticErrorWithPosition(err, pos) + } + } else { + pos += len("null") + } + if err = d.Tokens.appendLiteral(); err != nil { + return Token{}, d.injectSyntacticErrorWithPosition(err, pos-len("null")) // report position at start of literal + } + d.prevStart, d.prevEnd = pos, pos + return Null, nil + + case 'f': + if jsonwire.ConsumeFalse(d.buf[pos:]) == 0 { + pos, err = d.consumeLiteral(pos, "false") + if err != nil { + return Token{}, d.injectSyntacticErrorWithPosition(err, pos) + } + } else { + pos += len("false") + } + if err = d.Tokens.appendLiteral(); err != nil { + return Token{}, d.injectSyntacticErrorWithPosition(err, pos-len("false")) // report position at start of literal + } + d.prevStart, d.prevEnd = pos, pos + return False, nil + + case 't': + if jsonwire.ConsumeTrue(d.buf[pos:]) == 0 { + pos, err = d.consumeLiteral(pos, "true") + if err != nil { + return Token{}, d.injectSyntacticErrorWithPosition(err, pos) + } + } else { + pos += len("true") + } + if err = d.Tokens.appendLiteral(); err != nil { + return Token{}, d.injectSyntacticErrorWithPosition(err, pos-len("true")) // report position at start of literal + } + d.prevStart, d.prevEnd = pos, pos + return True, nil + + case '"': + var flags jsonwire.ValueFlags // TODO: Preserve this in Token? + if n = jsonwire.ConsumeSimpleString(d.buf[pos:]); n == 0 { + oldAbsPos := d.baseOffset + int64(pos) + pos, err = d.consumeString(&flags, pos) + newAbsPos := d.baseOffset + int64(pos) + n = int(newAbsPos - oldAbsPos) + if err != nil { + return Token{}, d.injectSyntacticErrorWithPosition(err, pos) + } + } else { + pos += n + } + if !d.Flags.Get(jsonflags.AllowDuplicateNames) && d.Tokens.Last.NeedObjectName() { + if !d.Tokens.Last.isValidNamespace() { + return Token{}, errInvalidNamespace + } + if d.Tokens.Last.isActiveNamespace() && !d.Namespaces.Last().insertQuoted(d.buf[pos-n:pos], flags.IsVerbatim()) { + err = newDuplicateNameError(d.buf[pos-n : pos]) + return Token{}, d.injectSyntacticErrorWithPosition(err, pos-n) // report position at start of string + } + d.Names.ReplaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds + } + if err = d.Tokens.appendString(); err != nil { + return Token{}, d.injectSyntacticErrorWithPosition(err, pos-n) // report position at start of string + } + d.prevStart, d.prevEnd = pos-n, pos + return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil + + case '0': + // NOTE: Since JSON numbers are not self-terminating, + // we need to make sure that the next byte is not part of a number. + if n = jsonwire.ConsumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) { + oldAbsPos := d.baseOffset + int64(pos) + pos, err = d.consumeNumber(pos) + newAbsPos := d.baseOffset + int64(pos) + n = int(newAbsPos - oldAbsPos) + if err != nil { + return Token{}, d.injectSyntacticErrorWithPosition(err, pos) + } + } else { + pos += n + } + if err = d.Tokens.appendNumber(); err != nil { + return Token{}, d.injectSyntacticErrorWithPosition(err, pos-n) // report position at start of number + } + d.prevStart, d.prevEnd = pos-n, pos + return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil + + case '{': + if err = d.Tokens.pushObject(); err != nil { + return Token{}, d.injectSyntacticErrorWithPosition(err, pos) + } + if !d.Flags.Get(jsonflags.AllowDuplicateNames) { + d.Names.push() + d.Namespaces.push() + } + pos += 1 + d.prevStart, d.prevEnd = pos, pos + return ObjectStart, nil + + case '}': + if err = d.Tokens.popObject(); err != nil { + return Token{}, d.injectSyntacticErrorWithPosition(err, pos) + } + if !d.Flags.Get(jsonflags.AllowDuplicateNames) { + d.Names.pop() + d.Namespaces.pop() + } + pos += 1 + d.prevStart, d.prevEnd = pos, pos + return ObjectEnd, nil + + case '[': + if err = d.Tokens.pushArray(); err != nil { + return Token{}, d.injectSyntacticErrorWithPosition(err, pos) + } + pos += 1 + d.prevStart, d.prevEnd = pos, pos + return ArrayStart, nil + + case ']': + if err = d.Tokens.popArray(); err != nil { + return Token{}, d.injectSyntacticErrorWithPosition(err, pos) + } + pos += 1 + d.prevStart, d.prevEnd = pos, pos + return ArrayEnd, nil + + default: + err = newInvalidCharacterError(d.buf[pos:], "at start of token") + return Token{}, d.injectSyntacticErrorWithPosition(err, pos) + } +} + +// ReadValue returns the next raw JSON value, advancing the read offset. +// The value is stripped of any leading or trailing whitespace and +// contains the exact bytes of the input, which may contain invalid UTF-8 +// if [AllowInvalidUTF8] is specified. +// +// The returned value is only valid until the next Peek, Read, or Skip call and +// may not be mutated while the Decoder remains in use. +// If the decoder is currently at the end token for an object or array, +// then it reports a [SyntacticError] and the internal state remains unchanged. +// It returns [io.EOF] if there are no more values. +func (d *Decoder) ReadValue() (Value, error) { + var flags jsonwire.ValueFlags + return d.s.ReadValue(&flags) +} + +func (d *decoderState) ReadValue(flags *jsonwire.ValueFlags) (Value, error) { + // Determine the next kind. + var err error + var next Kind + pos := d.peekPos + if pos != 0 { + // Use cached peek result. + if d.peekErr != nil { + err := d.peekErr + d.peekPos, d.peekErr = 0, nil // possibly a transient I/O error + return nil, err + } + next = Kind(d.buf[pos]).normalize() + d.peekPos = 0 // reset cache + } else { + d.invalidatePreviousRead() + pos = d.prevEnd + + // Consume leading whitespace. + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + if d.needMore(pos) { + if pos, err = d.consumeWhitespace(pos); err != nil { + if err == io.ErrUnexpectedEOF && d.Tokens.Depth() == 1 { + err = io.EOF // EOF possibly if no Tokens present after top-level value + } + return nil, err + } + } + + // Consume colon or comma. + var delim byte + if c := d.buf[pos]; c == ':' || c == ',' { + delim = c + pos += 1 + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + if d.needMore(pos) { + if pos, err = d.consumeWhitespace(pos); err != nil { + return nil, d.checkDelimBeforeIOError(delim, err) + } + } + } + next = Kind(d.buf[pos]).normalize() + if d.Tokens.needDelim(next) != delim { + return nil, d.checkDelim(delim, next) + } + } + + // Handle the next value. + oldAbsPos := d.baseOffset + int64(pos) + pos, err = d.consumeValue(flags, pos, d.Tokens.Depth()) + newAbsPos := d.baseOffset + int64(pos) + n := int(newAbsPos - oldAbsPos) + if err != nil { + return nil, d.injectSyntacticErrorWithPosition(err, pos) + } + switch next { + case 'n', 't', 'f': + err = d.Tokens.appendLiteral() + case '"': + if !d.Flags.Get(jsonflags.AllowDuplicateNames) && d.Tokens.Last.NeedObjectName() { + if !d.Tokens.Last.isValidNamespace() { + err = errInvalidNamespace + break + } + if d.Tokens.Last.isActiveNamespace() && !d.Namespaces.Last().insertQuoted(d.buf[pos-n:pos], flags.IsVerbatim()) { + err = newDuplicateNameError(d.buf[pos-n : pos]) + break + } + d.Names.ReplaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds + } + err = d.Tokens.appendString() + case '0': + err = d.Tokens.appendNumber() + case '{': + if err = d.Tokens.pushObject(); err != nil { + break + } + if err = d.Tokens.popObject(); err != nil { + panic("BUG: popObject should never fail immediately after pushObject: " + err.Error()) + } + case '[': + if err = d.Tokens.pushArray(); err != nil { + break + } + if err = d.Tokens.popArray(); err != nil { + panic("BUG: popArray should never fail immediately after pushArray: " + err.Error()) + } + } + if err != nil { + return nil, d.injectSyntacticErrorWithPosition(err, pos-n) // report position at start of value + } + d.prevEnd = pos + d.prevStart = pos - n + return d.buf[pos-n : pos : pos], nil +} + +// CheckEOF verifies that the input has no more data. +func (d *decoderState) CheckEOF() error { + switch pos, err := d.consumeWhitespace(d.prevEnd); err { + case nil: + err := newInvalidCharacterError(d.buf[pos:], "after top-level value") + return d.injectSyntacticErrorWithPosition(err, pos) + case io.ErrUnexpectedEOF: + return nil + default: + return err + } +} + +// consumeWhitespace consumes all whitespace starting at d.buf[pos:]. +// It returns the new position in d.buf immediately after the last whitespace. +// If it returns nil, there is guaranteed to at least be one unread byte. +// +// The following pattern is common in this implementation: +// +// pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) +// if d.needMore(pos) { +// if pos, err = d.consumeWhitespace(pos); err != nil { +// return ... +// } +// } +// +// It is difficult to simplify this without sacrificing performance since +// consumeWhitespace must be inlined. The body of the if statement is +// executed only in rare situations where we need to fetch more data. +// Since fetching may return an error, we also need to check the error. +func (d *decoderState) consumeWhitespace(pos int) (newPos int, err error) { + for { + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + if d.needMore(pos) { + absPos := d.baseOffset + int64(pos) + err = d.fetch() // will mutate d.buf and invalidate pos + pos = int(absPos - d.baseOffset) + if err != nil { + return pos, err + } + continue + } + return pos, nil + } +} + +// consumeValue consumes a single JSON value starting at d.buf[pos:]. +// It returns the new position in d.buf immediately after the value. +func (d *decoderState) consumeValue(flags *jsonwire.ValueFlags, pos, depth int) (newPos int, err error) { + for { + var n int + var err error + switch next := Kind(d.buf[pos]).normalize(); next { + case 'n': + if n = jsonwire.ConsumeNull(d.buf[pos:]); n == 0 { + n, err = jsonwire.ConsumeLiteral(d.buf[pos:], "null") + } + case 'f': + if n = jsonwire.ConsumeFalse(d.buf[pos:]); n == 0 { + n, err = jsonwire.ConsumeLiteral(d.buf[pos:], "false") + } + case 't': + if n = jsonwire.ConsumeTrue(d.buf[pos:]); n == 0 { + n, err = jsonwire.ConsumeLiteral(d.buf[pos:], "true") + } + case '"': + if n = jsonwire.ConsumeSimpleString(d.buf[pos:]); n == 0 { + return d.consumeString(flags, pos) + } + case '0': + // NOTE: Since JSON numbers are not self-terminating, + // we need to make sure that the next byte is not part of a number. + if n = jsonwire.ConsumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) { + return d.consumeNumber(pos) + } + case '{': + return d.consumeObject(flags, pos, depth) + case '[': + return d.consumeArray(flags, pos, depth) + default: + return pos, newInvalidCharacterError(d.buf[pos:], "at start of value") + } + if err == io.ErrUnexpectedEOF { + absPos := d.baseOffset + int64(pos) + err = d.fetch() // will mutate d.buf and invalidate pos + pos = int(absPos - d.baseOffset) + if err != nil { + return pos, err + } + continue + } + return pos + n, err + } +} + +// consumeLiteral consumes a single JSON literal starting at d.buf[pos:]. +// It returns the new position in d.buf immediately after the literal. +func (d *decoderState) consumeLiteral(pos int, lit string) (newPos int, err error) { + for { + n, err := jsonwire.ConsumeLiteral(d.buf[pos:], lit) + if err == io.ErrUnexpectedEOF { + absPos := d.baseOffset + int64(pos) + err = d.fetch() // will mutate d.buf and invalidate pos + pos = int(absPos - d.baseOffset) + if err != nil { + return pos, err + } + continue + } + return pos + n, err + } +} + +// consumeString consumes a single JSON string starting at d.buf[pos:]. +// It returns the new position in d.buf immediately after the string. +func (d *decoderState) consumeString(flags *jsonwire.ValueFlags, pos int) (newPos int, err error) { + var n int + for { + n, err = jsonwire.ConsumeStringResumable(flags, d.buf[pos:], n, !d.Flags.Get(jsonflags.AllowInvalidUTF8)) + if err == io.ErrUnexpectedEOF { + absPos := d.baseOffset + int64(pos) + err = d.fetch() // will mutate d.buf and invalidate pos + pos = int(absPos - d.baseOffset) + if err != nil { + return pos, err + } + continue + } + return pos + n, err + } +} + +// consumeNumber consumes a single JSON number starting at d.buf[pos:]. +// It returns the new position in d.buf immediately after the number. +func (d *decoderState) consumeNumber(pos int) (newPos int, err error) { + var n int + var state jsonwire.ConsumeNumberState + for { + n, state, err = jsonwire.ConsumeNumberResumable(d.buf[pos:], n, state) + // NOTE: Since JSON numbers are not self-terminating, + // we need to make sure that the next byte is not part of a number. + if err == io.ErrUnexpectedEOF || d.needMore(pos+n) { + mayTerminate := err == nil + absPos := d.baseOffset + int64(pos) + err = d.fetch() // will mutate d.buf and invalidate pos + pos = int(absPos - d.baseOffset) + if err != nil { + if mayTerminate && err == io.ErrUnexpectedEOF { + return pos + n, nil + } + return pos, err + } + continue + } + return pos + n, err + } +} + +// consumeObject consumes a single JSON object starting at d.buf[pos:]. +// It returns the new position in d.buf immediately after the object. +func (d *decoderState) consumeObject(flags *jsonwire.ValueFlags, pos, depth int) (newPos int, err error) { + var n int + var names *objectNamespace + if !d.Flags.Get(jsonflags.AllowDuplicateNames) { + d.Namespaces.push() + defer d.Namespaces.pop() + names = d.Namespaces.Last() + } + + // Handle before start. + if uint(pos) >= uint(len(d.buf)) || d.buf[pos] != '{' { + panic("BUG: consumeObject must be called with a buffer that starts with '{'") + } else if depth == maxNestingDepth+1 { + return pos, errMaxDepth + } + pos++ + + // Handle after start. + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + if d.needMore(pos) { + if pos, err = d.consumeWhitespace(pos); err != nil { + return pos, err + } + } + if d.buf[pos] == '}' { + pos++ + return pos, nil + } + + depth++ + for { + // Handle before name. + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + if d.needMore(pos) { + if pos, err = d.consumeWhitespace(pos); err != nil { + return pos, err + } + } + var flags2 jsonwire.ValueFlags + if n = jsonwire.ConsumeSimpleString(d.buf[pos:]); n == 0 { + oldAbsPos := d.baseOffset + int64(pos) + pos, err = d.consumeString(&flags2, pos) + newAbsPos := d.baseOffset + int64(pos) + n = int(newAbsPos - oldAbsPos) + flags.Join(flags2) + if err != nil { + return pos, err + } + } else { + pos += n + } + if !d.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(d.buf[pos-n:pos], flags2.IsVerbatim()) { + return pos - n, newDuplicateNameError(d.buf[pos-n : pos]) + } + + // Handle after name. + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + if d.needMore(pos) { + if pos, err = d.consumeWhitespace(pos); err != nil { + return pos, err + } + } + if d.buf[pos] != ':' { + return pos, newInvalidCharacterError(d.buf[pos:], "after object name (expecting ':')") + } + pos++ + + // Handle before value. + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + if d.needMore(pos) { + if pos, err = d.consumeWhitespace(pos); err != nil { + return pos, err + } + } + pos, err = d.consumeValue(flags, pos, depth) + if err != nil { + return pos, err + } + + // Handle after value. + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + if d.needMore(pos) { + if pos, err = d.consumeWhitespace(pos); err != nil { + return pos, err + } + } + switch d.buf[pos] { + case ',': + pos++ + continue + case '}': + pos++ + return pos, nil + default: + return pos, newInvalidCharacterError(d.buf[pos:], "after object value (expecting ',' or '}')") + } + } +} + +// consumeArray consumes a single JSON array starting at d.buf[pos:]. +// It returns the new position in d.buf immediately after the array. +func (d *decoderState) consumeArray(flags *jsonwire.ValueFlags, pos, depth int) (newPos int, err error) { + // Handle before start. + if uint(pos) >= uint(len(d.buf)) || d.buf[pos] != '[' { + panic("BUG: consumeArray must be called with a buffer that starts with '['") + } else if depth == maxNestingDepth+1 { + return pos, errMaxDepth + } + pos++ + + // Handle after start. + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + if d.needMore(pos) { + if pos, err = d.consumeWhitespace(pos); err != nil { + return pos, err + } + } + if d.buf[pos] == ']' { + pos++ + return pos, nil + } + + depth++ + for { + // Handle before value. + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + if d.needMore(pos) { + if pos, err = d.consumeWhitespace(pos); err != nil { + return pos, err + } + } + pos, err = d.consumeValue(flags, pos, depth) + if err != nil { + return pos, err + } + + // Handle after value. + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + if d.needMore(pos) { + if pos, err = d.consumeWhitespace(pos); err != nil { + return pos, err + } + } + switch d.buf[pos] { + case ',': + pos++ + continue + case ']': + pos++ + return pos, nil + default: + return pos, newInvalidCharacterError(d.buf[pos:], "after array value (expecting ',' or ']')") + } + } +} + +// InputOffset returns the current input byte offset. It gives the location +// of the next byte immediately after the most recently returned token or value. +// The number of bytes actually read from the underlying [io.Reader] may be more +// than this offset due to internal buffering effects. +func (d *Decoder) InputOffset() int64 { + return d.s.previousOffsetEnd() +} + +// UnreadBuffer returns the data remaining in the unread buffer, +// which may contain zero or more bytes. +// The returned buffer must not be mutated while Decoder continues to be used. +// The buffer contents are valid until the next Peek, Read, or Skip call. +func (d *Decoder) UnreadBuffer() []byte { + return d.s.unreadBuffer() +} + +// StackDepth returns the depth of the state machine for read JSON data. +// Each level on the stack represents a nested JSON object or array. +// It is incremented whenever an [ObjectStart] or [ArrayStart] token is encountered +// and decremented whenever an [ObjectEnd] or [ArrayEnd] token is encountered. +// The depth is zero-indexed, where zero represents the top-level JSON value. +func (d *Decoder) StackDepth() int { + // NOTE: Keep in sync with Encoder.StackDepth. + return d.s.Tokens.Depth() - 1 +} + +// StackIndex returns information about the specified stack level. +// It must be a number between 0 and [Decoder.StackDepth], inclusive. +// For each level, it reports the kind: +// +// - 0 for a level of zero, +// - '{' for a level representing a JSON object, and +// - '[' for a level representing a JSON array. +// +// It also reports the length of that JSON object or array. +// Each name and value in a JSON object is counted separately, +// so the effective number of members would be half the length. +// A complete JSON object must have an even length. +func (d *Decoder) StackIndex(i int) (Kind, int64) { + // NOTE: Keep in sync with Encoder.StackIndex. + switch s := d.s.Tokens.index(i); { + case i > 0 && s.isObject(): + return '{', s.Length() + case i > 0 && s.isArray(): + return '[', s.Length() + default: + return 0, s.Length() + } +} + +// StackPointer returns a JSON Pointer (RFC 6901) to the most recently read value. +// Object names are only present if [AllowDuplicateNames] is false, otherwise +// object members are represented using their index within the object. +func (d *Decoder) StackPointer() Pointer { + d.s.Names.copyQuotedBuffer(d.s.buf) + return Pointer(d.s.appendStackPointer(nil)) +} + +// requireKeyedLiterals can be embedded in a struct to require keyed literals. +type requireKeyedLiterals struct{} + +// nonComparable can be embedded in a struct to prevent comparability. +type nonComparable [0]func() + +// Encoder is a streaming encoder from raw JSON tokens and values. +// It is used to write a stream of top-level JSON values, +// each terminated with a newline character. +// +// [Encoder.WriteToken] and [Encoder.WriteValue] calls may be interleaved. +// For example, the following JSON value: +// +// {"name":"value","array":[null,false,true,3.14159],"object":{"k":"v"}} +// +// can be composed with the following calls (ignoring errors for brevity): +// +// e.WriteToken(ObjectStart) // { +// e.WriteToken(String("name")) // "name" +// e.WriteToken(String("value")) // "value" +// e.WriteValue(Value(`"array"`)) // "array" +// e.WriteToken(ArrayStart) // [ +// e.WriteToken(Null) // null +// e.WriteToken(False) // false +// e.WriteValue(Value("true")) // true +// e.WriteToken(Float(3.14159)) // 3.14159 +// e.WriteToken(ArrayEnd) // ] +// e.WriteValue(Value(`"object"`)) // "object" +// e.WriteValue(Value(`{"k":"v"}`)) // {"k":"v"} +// e.WriteToken(ObjectEnd) // } +// +// The above is one of many possible sequence of calls and +// may not represent the most sensible method to call for any given token/value. +// For example, it is probably more common to call [Encoder.WriteToken] with a string +// for object names. +type Encoder struct { + s encoderState +} + +// encoderState is the low-level state of Encoder. +// It has exported fields and method for use by the "json" package. +type encoderState struct { + state + encodeBuffer + jsonopts.Struct + + SeenPointers map[any]struct{} // only used when marshaling; identical to json.seenPointers +} + +// encodeBuffer is a buffer split into 2 segments: +// +// - buf[0:len(buf)] // written (but unflushed) portion of the buffer +// - buf[len(buf):cap(buf)] // unused portion of the buffer +type encodeBuffer struct { + Buf []byte // may alias wr if it is a bytes.Buffer + + // baseOffset is added to len(buf) to obtain the absolute offset + // relative to the start of io.Writer stream. + baseOffset int64 + + wr io.Writer + + // maxValue is the approximate maximum Value size passed to WriteValue. + maxValue int + // unusedCache is the buffer returned by the UnusedBuffer method. + unusedCache []byte + // bufStats is statistics about buffer utilization. + // It is only used with pooled encoders in pools.go. + bufStats bufferStatistics +} + +// NewEncoder constructs a new streaming encoder writing to w +// configured with the provided options. +// It flushes the internal buffer when the buffer is sufficiently full or +// when a top-level value has been written. +// +// If w is a [bytes.Buffer], then the encoder appends directly into the buffer +// without copying the contents from an intermediate buffer. +func NewEncoder(w io.Writer, opts ...Options) *Encoder { + e := new(Encoder) + e.Reset(w, opts...) + return e +} + +// Reset resets an encoder such that it is writing afresh to w and +// configured with the provided options. Reset must not be called on +// a Encoder passed to the [encoding/json/v2.MarshalerV2.MarshalJSONV2] method +// or the [encoding/json/v2.MarshalFuncV2] function. +func (e *Encoder) Reset(w io.Writer, opts ...Options) { + switch { + case e == nil: + panic("jsontext: invalid nil Encoder") + case w == nil: + panic("jsontext: invalid nil io.Writer") + case e.s.Flags.Get(jsonflags.WithinArshalCall): + panic("jsontext: cannot reset Encoder passed to json.MarshalerV2") + } + e.s.reset(nil, w, opts...) +} + +func (e *encoderState) reset(b []byte, w io.Writer, opts ...Options) { + e.state.reset() + e.encodeBuffer = encodeBuffer{Buf: b, wr: w, bufStats: e.bufStats} + if bb, ok := w.(*bytes.Buffer); ok && bb != nil { + e.Buf = bb.Bytes()[bb.Len():] // alias the unused buffer of bb + } + e.Struct = jsonopts.Struct{} + e.Struct.Join(opts...) + if e.Flags.Get(jsonflags.Multiline) { + if !e.Flags.Has(jsonflags.SpaceAfterColon) { + e.Flags.Set(jsonflags.SpaceAfterColon | 1) + } + if !e.Flags.Has(jsonflags.SpaceAfterComma) { + e.Flags.Set(jsonflags.SpaceAfterComma | 0) + } + if !e.Flags.Has(jsonflags.Indent) { + e.Flags.Set(jsonflags.Indent | 1) + e.Indent = "\t" + } + } +} + +// NeedFlush determines whether to flush at this point. +func (e *encoderState) NeedFlush() bool { + // NOTE: This function is carefully written to be inlinable. + + // Avoid flushing if e.wr is nil since there is no underlying writer. + // Flush if less than 25% of the capacity remains. + // Flushing at some constant fraction ensures that the buffer stops growing + // so long as the largest Token or Value fits within that unused capacity. + return e.wr != nil && (e.Tokens.Depth() == 1 || len(e.Buf) > 3*cap(e.Buf)/4) +} + +// Flush flushes the buffer to the underlying io.Writer. +// It may append a trailing newline after the top-level value. +func (e *encoderState) Flush() error { + if e.wr == nil || e.avoidFlush() { + return nil + } + + // In streaming mode, always emit a newline after the top-level value. + if e.Tokens.Depth() == 1 && !e.Flags.Get(jsonflags.OmitTopLevelNewline) { + e.Buf = append(e.Buf, '\n') + } + + // Inform objectNameStack that we are about to flush the buffer content. + e.Names.copyQuotedBuffer(e.Buf) + + // Specialize bytes.Buffer for better performance. + if bb, ok := e.wr.(*bytes.Buffer); ok { + // If e.buf already aliases the internal buffer of bb, + // then the Write call simply increments the internal offset, + // otherwise Write operates as expected. + // See https://go.dev/issue/42986. + n, _ := bb.Write(e.Buf) // never fails unless bb is nil + e.baseOffset += int64(n) + + // If the internal buffer of bytes.Buffer is too small, + // append operations elsewhere in the Encoder may grow the buffer. + // This would be semantically correct, but hurts performance. + // As such, ensure 25% of the current length is always available + // to reduce the probability that other appends must allocate. + if avail := bb.Available(); avail < bb.Len()/4 { + bb.Grow(avail + 1) + } + + e.Buf = bb.AvailableBuffer() + return nil + } + + // Flush the internal buffer to the underlying io.Writer. + n, err := e.wr.Write(e.Buf) + e.baseOffset += int64(n) + if err != nil { + // In the event of an error, preserve the unflushed portion. + // Thus, write errors aren't fatal so long as the io.Writer + // maintains consistent state after errors. + if n > 0 { + e.Buf = e.Buf[:copy(e.Buf, e.Buf[n:])] + } + return &ioError{action: "write", err: err} + } + e.Buf = e.Buf[:0] + + // Check whether to grow the buffer. + // Note that cap(e.buf) may already exceed maxBufferSize since + // an append elsewhere already grew it to store a large token. + const maxBufferSize = 4 << 10 + const growthSizeFactor = 2 // higher value is faster + const growthRateFactor = 2 // higher value is slower + // By default, grow if below the maximum buffer size. + grow := cap(e.Buf) <= maxBufferSize/growthSizeFactor + // Growing can be expensive, so only grow + // if a sufficient number of bytes have been processed. + grow = grow && int64(cap(e.Buf)) < e.previousOffsetEnd()/growthRateFactor + if grow { + e.Buf = make([]byte, 0, cap(e.Buf)*growthSizeFactor) + } + + return nil +} + +// injectSyntacticErrorWithPosition wraps a SyntacticError with the position, +// otherwise it returns the error as is. +// It takes a position relative to the start of the start of e.buf. +func (e *encodeBuffer) injectSyntacticErrorWithPosition(err error, pos int) error { + if serr, ok := err.(*SyntacticError); ok { + return serr.withOffset(e.baseOffset + int64(pos)) + } + return err +} + +func (e *encodeBuffer) previousOffsetEnd() int64 { return e.baseOffset + int64(len(e.Buf)) } + +func (e *encodeBuffer) unflushedBuffer() []byte { return e.Buf } + +// avoidFlush indicates whether to avoid flushing to ensure there is always +// enough in the buffer to unwrite the last object member if it were empty. +func (e *encoderState) avoidFlush() bool { + switch { + case e.Tokens.Last.Length() == 0: + // Never flush after ObjectStart or ArrayStart since we don't know yet + // if the object or array will end up being empty. + return true + case e.Tokens.Last.needObjectValue(): + // Never flush before the object value since we don't know yet + // if the object value will end up being empty. + return true + case e.Tokens.Last.NeedObjectName() && len(e.Buf) >= 2: + // Never flush after the object value if it does turn out to be empty. + switch string(e.Buf[len(e.Buf)-2:]) { + case `ll`, `""`, `{}`, `[]`: // last two bytes of every empty value + return true + } + } + return false +} + +// UnwriteEmptyObjectMember unwrites the last object member if it is empty +// and reports whether it performed an unwrite operation. +func (e *encoderState) UnwriteEmptyObjectMember(prevName *string) bool { + if last := e.Tokens.Last; !last.isObject() || !last.NeedObjectName() || last.Length() == 0 { + panic("BUG: must be called on an object after writing a value") + } + + // The flushing logic is modified to never flush a trailing empty value. + // The encoder never writes trailing whitespace eagerly. + b := e.unflushedBuffer() + + // Detect whether the last value was empty. + var n int + if len(b) >= 3 { + switch string(b[len(b)-2:]) { + case "ll": // last two bytes of `null` + n = len(`null`) + case `""`: + // It is possible for a non-empty string to have `""` as a suffix + // if the second to the last quote was escaped. + if b[len(b)-3] == '\\' { + return false // e.g., `"\""` is not empty + } + n = len(`""`) + case `{}`: + n = len(`{}`) + case `[]`: + n = len(`[]`) + } + } + if n == 0 { + return false + } + + // Unwrite the value, whitespace, colon, name, whitespace, and comma. + b = b[:len(b)-n] + b = jsonwire.TrimSuffixWhitespace(b) + b = jsonwire.TrimSuffixByte(b, ':') + b = jsonwire.TrimSuffixString(b) + b = jsonwire.TrimSuffixWhitespace(b) + b = jsonwire.TrimSuffixByte(b, ',') + e.Buf = b // store back truncated unflushed buffer + + // Undo state changes. + e.Tokens.Last.decrement() // for object member value + e.Tokens.Last.decrement() // for object member name + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + if e.Tokens.Last.isActiveNamespace() { + e.Namespaces.Last().removeLast() + } + e.Names.clearLast() + if prevName != nil { + e.Names.copyQuotedBuffer(e.Buf) // required by objectNameStack.replaceLastUnquotedName + e.Names.replaceLastUnquotedName(*prevName) + } + } + return true +} + +// UnwriteOnlyObjectMemberName unwrites the only object member name +// and returns the unquoted name. +func (e *encoderState) UnwriteOnlyObjectMemberName() string { + if last := e.Tokens.Last; !last.isObject() || last.Length() != 1 { + panic("BUG: must be called on an object after writing first name") + } + + // Unwrite the name and whitespace. + b := jsonwire.TrimSuffixString(e.Buf) + isVerbatim := bytes.IndexByte(e.Buf[len(b):], '\\') < 0 + name := string(jsonwire.UnquoteMayCopy(e.Buf[len(b):], isVerbatim)) + e.Buf = jsonwire.TrimSuffixWhitespace(b) + + // Undo state changes. + e.Tokens.Last.decrement() + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + if e.Tokens.Last.isActiveNamespace() { + e.Namespaces.Last().removeLast() + } + e.Names.clearLast() + } + return name +} + +// WriteToken writes the next token and advances the internal write offset. +// +// The provided token kind must be consistent with the JSON grammar. +// For example, it is an error to provide a number when the encoder +// is expecting an object name (which is always a string), or +// to provide an end object delimiter when the encoder is finishing an array. +// If the provided token is invalid, then it reports a [SyntacticError] and +// the internal state remains unchanged. The offset reported +// in [SyntacticError] will be relative to the [Encoder.OutputOffset]. +func (e *Encoder) WriteToken(t Token) error { + return e.s.WriteToken(t) +} + +func (e *encoderState) WriteToken(t Token) error { + k := t.Kind() + b := e.Buf // use local variable to avoid mutating e in case of error + + // Append any delimiters or optional whitespace. + b = e.Tokens.MayAppendDelim(b, k) + if e.Flags.Get(jsonflags.AnyWhitespace) { + b = e.appendWhitespace(b, k) + } + pos := len(b) // offset before the token + + // Append the token to the output and to the state machine. + var err error + switch k { + case 'n': + b = append(b, "null"...) + err = e.Tokens.appendLiteral() + case 'f': + b = append(b, "false"...) + err = e.Tokens.appendLiteral() + case 't': + b = append(b, "true"...) + err = e.Tokens.appendLiteral() + case '"': + if b, err = t.appendString(b, &e.Flags); err != nil { + break + } + if !e.Flags.Get(jsonflags.AllowDuplicateNames) && e.Tokens.Last.NeedObjectName() { + if !e.Tokens.Last.isValidNamespace() { + err = errInvalidNamespace + break + } + if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) { + err = newDuplicateNameError(b[pos:]) + break + } + e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds + } + err = e.Tokens.appendString() + case '0': + if b, err = t.appendNumber(b, e.Flags.Get(jsonflags.CanonicalizeNumbers)); err != nil { + break + } + err = e.Tokens.appendNumber() + case '{': + b = append(b, '{') + if err = e.Tokens.pushObject(); err != nil { + break + } + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + e.Names.push() + e.Namespaces.push() + } + case '}': + b = append(b, '}') + if err = e.Tokens.popObject(); err != nil { + break + } + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + e.Names.pop() + e.Namespaces.pop() + } + case '[': + b = append(b, '[') + err = e.Tokens.pushArray() + case ']': + b = append(b, ']') + err = e.Tokens.popArray() + default: + err = &SyntacticError{str: "invalid json.Token"} + } + if err != nil { + return e.injectSyntacticErrorWithPosition(err, pos) + } + + // Finish off the buffer and store it back into e. + e.Buf = b + if e.NeedFlush() { + return e.Flush() + } + return nil +} + +// AppendRaw appends either a raw string (without double quotes) or number. +// Specify safeASCII if the string output is guaranteed to be ASCII +// without any characters (including '<', '>', and '&') that need escaping, +// otherwise this will validate whether the string needs escaping. +// The appended bytes for a JSON number must be valid. +// +// This is a specialized implementation of Encoder.WriteValue +// that allows appending directly into the buffer. +// It is only called from marshal logic in the "json" package. +func (e *encoderState) AppendRaw(k Kind, safeASCII bool, appendFn func([]byte) ([]byte, error)) error { + b := e.Buf // use local variable to avoid mutating e in case of error + + // Append any delimiters or optional whitespace. + b = e.Tokens.MayAppendDelim(b, k) + if e.Flags.Get(jsonflags.AnyWhitespace) { + b = e.appendWhitespace(b, k) + } + pos := len(b) // offset before the token + + var err error + switch k { + case '"': + // Append directly into the encoder buffer by assuming that + // most of the time none of the characters need escaping. + b = append(b, '"') + if b, err = appendFn(b); err != nil { + return err + } + b = append(b, '"') + + // Check whether we need to escape the string and if necessary + // copy it to a scratch buffer and then escape it back. + isVerbatim := safeASCII || !jsonwire.NeedEscape(b[pos+len(`"`):len(b)-len(`"`)]) + if !isVerbatim { + var err error + b2 := append(e.unusedCache, b[pos+len(`"`):len(b)-len(`"`)]...) + b, err = jsonwire.AppendQuote(b[:pos], string(b2), &e.Flags) + e.unusedCache = b2[:0] + if err != nil { + return e.injectSyntacticErrorWithPosition(err, pos) + } + } + + // Update the state machine. + if !e.Flags.Get(jsonflags.AllowDuplicateNames) && e.Tokens.Last.NeedObjectName() { + if !e.Tokens.Last.isValidNamespace() { + return errInvalidNamespace + } + if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], isVerbatim) { + err := newDuplicateNameError(b[pos:]) + return e.injectSyntacticErrorWithPosition(err, pos) + } + e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds + } + if err := e.Tokens.appendString(); err != nil { + return e.injectSyntacticErrorWithPosition(err, pos) + } + case '0': + if b, err = appendFn(b); err != nil { + return err + } + if err := e.Tokens.appendNumber(); err != nil { + return e.injectSyntacticErrorWithPosition(err, pos) + } + default: + panic("BUG: invalid kind") + } + + // Finish off the buffer and store it back into e. + e.Buf = b + if e.NeedFlush() { + return e.Flush() + } + return nil +} + +// WriteValue writes the next raw value and advances the internal write offset. +// The Encoder does not simply copy the provided value verbatim, but +// parses it to ensure that it is syntactically valid and reformats it +// according to how the Encoder is configured to format whitespace and strings. +// If [AllowInvalidUTF8] is specified, then any invalid UTF-8 is mangled +// as the Unicode replacement character, U+FFFD. +// +// The provided value kind must be consistent with the JSON grammar +// (see examples on [Encoder.WriteToken]). If the provided value is invalid, +// then it reports a [SyntacticError] and the internal state remains unchanged. +// The offset reported in [SyntacticError] will be relative to the +// [Encoder.OutputOffset] plus the offset into v of any encountered syntax error. +func (e *Encoder) WriteValue(v Value) error { + return e.s.WriteValue(v) +} + +func (e *encoderState) WriteValue(v Value) error { + e.maxValue |= len(v) // bitwise OR is a fast approximation of max + + k := v.Kind() + b := e.Buf // use local variable to avoid mutating e in case of error + + // Append any delimiters or optional whitespace. + b = e.Tokens.MayAppendDelim(b, k) + if e.Flags.Get(jsonflags.AnyWhitespace) { + b = e.appendWhitespace(b, k) + } + pos := len(b) // offset before the value + + // Append the value the output. + var n int + n += jsonwire.ConsumeWhitespace(v[n:]) + b, m, err := e.reformatValue(b, v[n:], e.Tokens.Depth()) + if err != nil { + return e.injectSyntacticErrorWithPosition(err, pos+n+m) + } + n += m + n += jsonwire.ConsumeWhitespace(v[n:]) + if len(v) > n { + err = newInvalidCharacterError(v[n:], "after top-level value") + return e.injectSyntacticErrorWithPosition(err, pos+n) + } + + // Append the kind to the state machine. + switch k { + case 'n', 'f', 't': + err = e.Tokens.appendLiteral() + case '"': + if !e.Flags.Get(jsonflags.AllowDuplicateNames) && e.Tokens.Last.NeedObjectName() { + if !e.Tokens.Last.isValidNamespace() { + err = errInvalidNamespace + break + } + if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) { + err = newDuplicateNameError(b[pos:]) + break + } + e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds + } + err = e.Tokens.appendString() + case '0': + err = e.Tokens.appendNumber() + case '{': + if err = e.Tokens.pushObject(); err != nil { + break + } + if err = e.Tokens.popObject(); err != nil { + panic("BUG: popObject should never fail immediately after pushObject: " + err.Error()) + } + case '[': + if err = e.Tokens.pushArray(); err != nil { + break + } + if err = e.Tokens.popArray(); err != nil { + panic("BUG: popArray should never fail immediately after pushArray: " + err.Error()) + } + } + if err != nil { + return e.injectSyntacticErrorWithPosition(err, pos) + } + + // Finish off the buffer and store it back into e. + e.Buf = b + if e.NeedFlush() { + return e.Flush() + } + return nil +} + +// appendWhitespace appends whitespace that immediately precedes the next token. +func (e *encoderState) appendWhitespace(b []byte, next Kind) []byte { + if delim := e.Tokens.needDelim(next); delim == ':' { + if e.Flags.Get(jsonflags.SpaceAfterColon) { + b = append(b, ' ') + } + } else { + if delim == ',' && e.Flags.Get(jsonflags.SpaceAfterComma) { + b = append(b, ' ') + } + if e.Flags.Get(jsonflags.Multiline) { + b = e.AppendIndent(b, e.Tokens.NeedIndent(next)) + } + } + return b +} + +// AppendIndent appends the appropriate number of indentation characters +// for the current nested level, n. +func (e *encoderState) AppendIndent(b []byte, n int) []byte { + if n == 0 { + return b + } + b = append(b, '\n') + b = append(b, e.IndentPrefix...) + for ; n > 1; n-- { + b = append(b, e.Indent...) + } + return b +} + +// reformatValue parses a JSON value from the start of src and +// appends it to the end of dst, reformatting whitespace and strings as needed. +// It returns the extended dst buffer and the number of consumed input bytes. +func (e *encoderState) reformatValue(dst []byte, src Value, depth int) ([]byte, int, error) { + // TODO: Should this update ValueFlags as input? + if len(src) == 0 { + return dst, 0, io.ErrUnexpectedEOF + } + switch k := Kind(src[0]).normalize(); k { + case 'n': + if jsonwire.ConsumeNull(src) == 0 { + n, err := jsonwire.ConsumeLiteral(src, "null") + return dst, n, err + } + return append(dst, "null"...), len("null"), nil + case 'f': + if jsonwire.ConsumeFalse(src) == 0 { + n, err := jsonwire.ConsumeLiteral(src, "false") + return dst, n, err + } + return append(dst, "false"...), len("false"), nil + case 't': + if jsonwire.ConsumeTrue(src) == 0 { + n, err := jsonwire.ConsumeLiteral(src, "true") + return dst, n, err + } + return append(dst, "true"...), len("true"), nil + case '"': + if n := jsonwire.ConsumeSimpleString(src); n > 0 { + dst = append(dst, src[:n]...) // copy simple strings verbatim + return dst, n, nil + } + return jsonwire.ReformatString(dst, src, &e.Flags) + case '0': + if n := jsonwire.ConsumeSimpleNumber(src); n > 0 && !e.Flags.Get(jsonflags.CanonicalizeNumbers) { + dst = append(dst, src[:n]...) // copy simple numbers verbatim + return dst, n, nil + } + return jsonwire.ReformatNumber(dst, src, e.Flags.Get(jsonflags.CanonicalizeNumbers)) + case '{': + return e.reformatObject(dst, src, depth) + case '[': + return e.reformatArray(dst, src, depth) + default: + return dst, 0, newInvalidCharacterError(src, "at start of value") + } +} + +// reformatObject parses a JSON object from the start of src and +// appends it to the end of src, reformatting whitespace and strings as needed. +// It returns the extended dst buffer and the number of consumed input bytes. +func (e *encoderState) reformatObject(dst []byte, src Value, depth int) ([]byte, int, error) { + // Append object start. + if len(src) == 0 || src[0] != '{' { + panic("BUG: reformatObject must be called with a buffer that starts with '{'") + } else if depth == maxNestingDepth+1 { + return dst, 0, errMaxDepth + } + dst = append(dst, '{') + n := len("{") + + // Append (possible) object end. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, io.ErrUnexpectedEOF + } + if src[n] == '}' { + dst = append(dst, '}') + n += len("}") + return dst, n, nil + } + + var err error + var names *objectNamespace + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + e.Namespaces.push() + defer e.Namespaces.pop() + names = e.Namespaces.Last() + } + depth++ + for { + // Append optional newline and indentation. + if e.Flags.Get(jsonflags.Multiline) { + dst = e.AppendIndent(dst, depth) + } + + // Append object name. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, io.ErrUnexpectedEOF + } + m := jsonwire.ConsumeSimpleString(src[n:]) + if m > 0 { + dst = append(dst, src[n:n+m]...) + } else { + dst, m, err = jsonwire.ReformatString(dst, src[n:], &e.Flags) + if err != nil { + return dst, n + m, err + } + } + // TODO: Specify whether the name is verbatim or not. + if !e.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(src[n:n+m], false) { + return dst, n, newDuplicateNameError(src[n : n+m]) + } + n += m + + // Append colon. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, io.ErrUnexpectedEOF + } + if src[n] != ':' { + return dst, n, newInvalidCharacterError(src[n:], "after object name (expecting ':')") + } + dst = append(dst, ':') + n += len(":") + if e.Flags.Get(jsonflags.SpaceAfterColon) { + dst = append(dst, ' ') + } + + // Append object value. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, io.ErrUnexpectedEOF + } + dst, m, err = e.reformatValue(dst, src[n:], depth) + if err != nil { + return dst, n + m, err + } + n += m + + // Append comma or object end. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, io.ErrUnexpectedEOF + } + switch src[n] { + case ',': + dst = append(dst, ',') + if e.Flags.Get(jsonflags.SpaceAfterComma) { + dst = append(dst, ' ') + } + n += len(",") + continue + case '}': + if e.Flags.Get(jsonflags.Multiline) { + dst = e.AppendIndent(dst, depth-1) + } + dst = append(dst, '}') + n += len("}") + return dst, n, nil + default: + return dst, n, newInvalidCharacterError(src[n:], "after object value (expecting ',' or '}')") + } + } +} + +// reformatArray parses a JSON array from the start of src and +// appends it to the end of dst, reformatting whitespace and strings as needed. +// It returns the extended dst buffer and the number of consumed input bytes. +func (e *encoderState) reformatArray(dst []byte, src Value, depth int) ([]byte, int, error) { + // Append array start. + if len(src) == 0 || src[0] != '[' { + panic("BUG: reformatArray must be called with a buffer that starts with '['") + } else if depth == maxNestingDepth+1 { + return dst, 0, errMaxDepth + } + dst = append(dst, '[') + n := len("[") + + // Append (possible) array end. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, io.ErrUnexpectedEOF + } + if src[n] == ']' { + dst = append(dst, ']') + n += len("]") + return dst, n, nil + } + + var err error + depth++ + for { + // Append optional newline and indentation. + if e.Flags.Get(jsonflags.Multiline) { + dst = e.AppendIndent(dst, depth) + } + + // Append array value. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, io.ErrUnexpectedEOF + } + var m int + dst, m, err = e.reformatValue(dst, src[n:], depth) + if err != nil { + return dst, n + m, err + } + n += m + + // Append comma or array end. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, io.ErrUnexpectedEOF + } + switch src[n] { + case ',': + dst = append(dst, ',') + if e.Flags.Get(jsonflags.SpaceAfterComma) { + dst = append(dst, ' ') + } + n += len(",") + continue + case ']': + if e.Flags.Get(jsonflags.Multiline) { + dst = e.AppendIndent(dst, depth-1) + } + dst = append(dst, ']') + n += len("]") + return dst, n, nil + default: + return dst, n, newInvalidCharacterError(src[n:], "after array value (expecting ',' or ']')") + } + } +} + +// OutputOffset returns the current output byte offset. It gives the location +// of the next byte immediately after the most recently written token or value. +// The number of bytes actually written to the underlying [io.Writer] may be less +// than this offset due to internal buffering effects. +func (e *Encoder) OutputOffset() int64 { + return e.s.previousOffsetEnd() +} + +// UnusedBuffer returns a zero-length buffer with a possible non-zero capacity. +// This buffer is intended to be used to populate a [Value] +// being passed to an immediately succeeding [Encoder.WriteValue] call. +// +// Example usage: +// +// b := d.UnusedBuffer() +// b = append(b, '"') +// b = appendString(b, v) // append the string formatting of v +// b = append(b, '"') +// ... := d.WriteValue(b) +// +// It is the user's responsibility to ensure that the value is valid JSON. +func (e *Encoder) UnusedBuffer() []byte { + // NOTE: We don't return e.buf[len(e.buf):cap(e.buf)] since WriteValue would + // need to take special care to avoid mangling the data while reformatting. + // WriteValue can't easily identify whether the input Value aliases e.buf + // without using unsafe.Pointer. Thus, we just return a different buffer. + // Should this ever alias e.buf, we need to consider how it operates with + // the specialized performance optimization for bytes.Buffer. + n := 1 << bits.Len(uint(e.s.maxValue|63)) // fast approximation for max length + if cap(e.s.unusedCache) < n { + e.s.unusedCache = make([]byte, 0, n) + } + return e.s.unusedCache +} + +// StackDepth returns the depth of the state machine for written JSON data. +// Each level on the stack represents a nested JSON object or array. +// It is incremented whenever an [ObjectStart] or [ArrayStart] token is encountered +// and decremented whenever an [ObjectEnd] or [ArrayEnd] token is encountered. +// The depth is zero-indexed, where zero represents the top-level JSON value. +func (e *Encoder) StackDepth() int { + // NOTE: Keep in sync with Decoder.StackDepth. + return e.s.Tokens.Depth() - 1 +} + +// StackIndex returns information about the specified stack level. +// It must be a number between 0 and [Encoder.StackDepth], inclusive. +// For each level, it reports the kind: +// +// - 0 for a level of zero, +// - '{' for a level representing a JSON object, and +// - '[' for a level representing a JSON array. +// +// It also reports the length of that JSON object or array. +// Each name and value in a JSON object is counted separately, +// so the effective number of members would be half the length. +// A complete JSON object must have an even length. +func (e *Encoder) StackIndex(i int) (Kind, int64) { + // NOTE: Keep in sync with Decoder.StackIndex. + switch s := e.s.Tokens.index(i); { + case i > 0 && s.isObject(): + return '{', s.Length() + case i > 0 && s.isArray(): + return '[', s.Length() + default: + return 0, s.Length() + } +} + +// StackPointer returns a JSON Pointer (RFC 6901) to the most recently written value. +// Object names are only present if [AllowDuplicateNames] is false, otherwise +// object members are represented using their index within the object. +func (e *Encoder) StackPointer() Pointer { + e.s.Names.copyQuotedBuffer(e.s.Buf) + return Pointer(e.s.appendStackPointer(nil)) +} + +const errorPrefix = "jsontext: " + +type ioError struct { + action string // either "read" or "write" + err error +} + +func (e *ioError) Error() string { + return errorPrefix + e.action + " error: " + e.err.Error() +} + +func (e *ioError) Unwrap() error { + return e.err +} + +// SyntacticError is a description of a syntactic error that occurred when +// encoding or decoding JSON according to the grammar. +// +// The contents of this error as produced by this package may change over time. +type SyntacticError struct { + requireKeyedLiterals + nonComparable + + // ByteOffset indicates that an error occurred after this byte offset. + ByteOffset int64 + str string +} + +func (e *SyntacticError) Error() string { + return errorPrefix + e.str +} + +func (e *SyntacticError) withOffset(pos int64) error { + return &SyntacticError{ByteOffset: pos, str: e.str} +} + +func newDuplicateNameError[Bytes ~[]byte | ~string](quoted Bytes) *SyntacticError { + return &SyntacticError{str: "duplicate name " + string(quoted) + " in object"} +} + +func newInvalidCharacterError[Bytes ~[]byte | ~string](prefix Bytes, where string) *SyntacticError { + what := jsonwire.QuoteRune(prefix) + return &SyntacticError{str: "invalid character " + what + " " + where} +} + +// TODO: Error types between "json", "jsontext", and "jsonwire" is a mess. +// Clean this up. +func init() { + // Inject behavior in "jsonwire" so that it can produce SyntacticError types. + jsonwire.NewError = func(s string) error { return &SyntacticError{str: s} } + jsonwire.ErrInvalidUTF8 = &SyntacticError{str: jsonwire.ErrInvalidUTF8.Error()} +} + +// Internal is for internal use only. +// This is exempt from the Go compatibility agreement. +var Internal exporter + +type exporter struct{} + +// Export exposes internal functionality from "jsontext" to "json". +// This cannot be dynamically called by other packages since +// they cannot obtain a reference to the internal.AllowInternalUse value. +func (exporter) Export(p *internal.NotForPublicUse) export { + if p != &internal.AllowInternalUse { + panic("unauthorized call to Export") + } + return export{} +} + +// The export type exposes functionality to packages with visibility to +// the internal.AllowInternalUse variable. The "json" package uses this +// to modify low-level state in the Encoder and Decoder types. +// It mutates the state directly instead of calling ReadToken or WriteToken +// since this is more performant. The public APIs need to track state to ensure +// that users are constructing a valid JSON value, but the "json" implementation +// guarantees that it emits valid JSON by the structure of the code itself. +type export struct{} + +// Encoder returns a pointer to the underlying encoderState. +func (export) Encoder(e *Encoder) *encoderState { return &e.s } + +// Decoder returns a pointer to the underlying decoderState. +func (export) Decoder(d *Decoder) *decoderState { return &d.s } + +func (export) GetBufferedEncoder(o ...Options) *Encoder { + return getBufferedEncoder(o...) +} + +func (export) PutBufferedEncoder(e *Encoder) { + putBufferedEncoder(e) +} + +func (export) GetStreamingEncoder(w io.Writer, o ...Options) *Encoder { + return getStreamingEncoder(w, o...) +} + +func (export) PutStreamingEncoder(e *Encoder) { + putStreamingEncoder(e) +} + +func (export) GetBufferedDecoder(b []byte, o ...Options) *Decoder { + return getBufferedDecoder(b, o...) +} + +func (export) PutBufferedDecoder(d *Decoder) { + putBufferedDecoder(d) +} + +func (export) GetStreamingDecoder(r io.Reader, o ...Options) *Decoder { + return getStreamingDecoder(r, o...) +} + +func (export) PutStreamingDecoder(d *Decoder) { + putStreamingDecoder(d) +} + +func (export) NewDuplicateNameError(quoted []byte, pos int64) error { + return newDuplicateNameError(quoted).withOffset(pos) +} + +func (export) NewInvalidCharacterError(prefix, where string, pos int64) error { + return newInvalidCharacterError(prefix, where).withOffset(pos) +} + +func (export) NewMissingNameError(pos int64) error { + return errMissingName.withOffset(pos) +} + +func (export) NewInvalidUTF8Error(pos int64) error { + return errInvalidUTF8.withOffset(pos) +} + +// Options configures [NewEncoder], [Encoder.Reset], [NewDecoder], +// and [Decoder.Reset] with specific features. +// Each function takes in a variadic list of options, where properties +// set in latter options override the value of previously set properties. +// +// The Options type is identical to [encoding/json.Options] and +// [encoding/json/v2.Options]. Options from the other packages may +// be passed to functionality in this package, but are ignored. +// Options from this package may be used with the other packages. +type Options = jsonopts.Options + +// AllowDuplicateNames specifies that JSON objects may contain +// duplicate member names. Disabling the duplicate name check may provide +// performance benefits, but breaks compliance with RFC 7493, section 2.3. +// The input or output will still be compliant with RFC 8259, +// which leaves the handling of duplicate names as unspecified behavior. +// +// This affects either encoding or decoding. +func AllowDuplicateNames(v bool) Options { + if v { + return jsonflags.AllowDuplicateNames | 1 + } else { + return jsonflags.AllowDuplicateNames | 0 + } +} + +// AllowInvalidUTF8 specifies that JSON strings may contain invalid UTF-8, +// which will be mangled as the Unicode replacement character, U+FFFD. +// This causes the encoder or decoder to break compliance with +// RFC 7493, section 2.1, and RFC 8259, section 8.1. +// +// This affects either encoding or decoding. +func AllowInvalidUTF8(v bool) Options { + if v { + return jsonflags.AllowInvalidUTF8 | 1 + } else { + return jsonflags.AllowInvalidUTF8 | 0 + } +} + +// EscapeForHTML specifies that '<', '>', and '&' characters within JSON strings +// should be escaped as a hexadecimal Unicode codepoint (e.g., \u003c) so that +// the output is safe to embed within HTML. +// +// This only affects encoding and is ignored when decoding. +func EscapeForHTML(v bool) Options { + if v { + return jsonflags.EscapeForHTML | 1 + } else { + return jsonflags.EscapeForHTML | 0 + } +} + +// EscapeForJS specifies that U+2028 and U+2029 characters within JSON strings +// should be escaped as a hexadecimal Unicode codepoint (e.g., \u2028) so that +// the output is valid to embed within JavaScript. See RFC 8259, section 12. +// +// This only affects encoding and is ignored when decoding. +func EscapeForJS(v bool) Options { + if v { + return jsonflags.EscapeForJS | 1 + } else { + return jsonflags.EscapeForJS | 0 + } +} + +// SpaceAfterColon specifies that the JSON output should emit a space character +// after each colon separator following a JSON object name. +// If false, then no space character appears after the colon separator. +// +// This only affects encoding and is ignored when decoding. +func SpaceAfterColon(v bool) Options { + if v { + return jsonflags.SpaceAfterColon | 1 + } else { + return jsonflags.SpaceAfterColon | 0 + } +} + +// SpaceAfterComma specifies that the JSON output should emit a space character +// after each comma separator following a JSON object value or array element. +// If false, then no space character appears after the comma separator. +// +// This only affects encoding and is ignored when decoding. +func SpaceAfterComma(v bool) Options { + if v { + return jsonflags.SpaceAfterComma | 1 + } else { + return jsonflags.SpaceAfterComma | 0 + } +} + +// Multiline specifies that the JSON output should expand to multiple lines, +// where every JSON object member or JSON array element appears on +// a new, indented line according to the nesting depth. +// +// If [SpaceAfterColon] is not specified, then the default is true. +// If [SpaceAfterComma] is not specified, then the default is false. +// If [WithIndent] is not specified, then the default is "\t". +// +// If set to false, then the output is a single-line, +// where the only whitespace emitted is determined by the current +// values of [SpaceAfterColon] and [SpaceAfterComma]. +// +// This only affects encoding and is ignored when decoding. +func Multiline(v bool) Options { + if v { + return jsonflags.Multiline | 1 + } else { + return jsonflags.Multiline | 0 + } +} + +// WithIndent specifies that the encoder should emit multiline output +// where each element in a JSON object or array begins on a new, indented line +// beginning with the indent prefix (see [WithIndentPrefix]) +// followed by one or more copies of indent according to the nesting depth. +// The indent must only be composed of space or tab characters. +// +// If the intent to emit indented output without a preference for +// the particular indent string, then use [Multiline] instead. +// +// This only affects encoding and is ignored when decoding. +// Use of this option implies [Multiline] being set to true. +func WithIndent(indent string) Options { + // Fast-path: Return a constant for common indents, which avoids allocating. + // These are derived from analyzing the Go module proxy on 2023-07-01. + switch indent { + case "\t": + return jsonopts.Indent("\t") // ~14k usages + case " ": + return jsonopts.Indent(" ") // ~18k usages + case " ": + return jsonopts.Indent(" ") // ~1.7k usages + case " ": + return jsonopts.Indent(" ") // ~52k usages + case " ": + return jsonopts.Indent(" ") // ~12k usages + case "": + return jsonopts.Indent("") // ~1.5k usages + } + + // Otherwise, allocate for this unique value. + if s := strings.Trim(indent, " \t"); len(s) > 0 { + panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent") + } + return jsonopts.Indent(indent) +} + +// WithIndentPrefix specifies that the encoder should emit multiline output +// where each element in a JSON object or array begins on a new, indented line +// beginning with the indent prefix followed by one or more copies of indent +// (see [WithIndent]) according to the nesting depth. +// The prefix must only be composed of space or tab characters. +// +// This only affects encoding and is ignored when decoding. +// Use of this option implies [Multiline] being set to true. +func WithIndentPrefix(prefix string) Options { + if s := strings.Trim(prefix, " \t"); len(s) > 0 { + panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent prefix") + } + return jsonopts.IndentPrefix(prefix) +} + +/* +// TODO(https://go.dev/issue/56733): Implement WithByteLimit and WithDepthLimit. + +// WithByteLimit sets a limit on the number of bytes of input or output bytes +// that may be consumed or produced for each top-level JSON value. +// If a [Decoder] or [Encoder] method call would need to consume/produce +// more than a total of n bytes to make progress on the top-level JSON value, +// then the call will report an error. +// Whitespace before and within the top-level value are counted against the limit. +// Whitespace after a top-level value are counted against the limit +// for the next top-level value. +// +// A non-positive limit is equivalent to no limit at all. +// If unspecified, the default limit is no limit at all. +func WithByteLimit(n int64) Options { + return jsonopts.ByteLimit(max(n, 0)) +} + +// WithDepthLimit sets a limit on the maximum depth of JSON nesting +// that may be consumed or produced for each top-level JSON value. +// If a [Decoder] or [Encoder] method call would need to consume or produce +// a depth greater than n to make progress on the top-level JSON value, +// then the call will report an error. +// +// A non-positive limit is equivalent to no limit at all. +// If unspecified, the default limit is 10000. +func WithDepthLimit(n int) Options { + return jsonopts.DepthLimit(max(n, 0)) +} +*/ + +// TODO(https://go.dev/issue/47657): Use sync.PoolOf. + +var ( + // This owns the internal buffer since there is no io.Writer to output to. + // Since the buffer can get arbitrarily large in normal usage, + // there is statistical tracking logic to determine whether to recycle + // the internal buffer or not based on a history of utilization. + bufferedEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }} + + // This owns the internal buffer, but it is only used to temporarily store + // buffered JSON before flushing it to the underlying io.Writer. + // In a sufficiently efficient streaming mode, we do not expect the buffer + // to grow arbitrarily large. Thus, we avoid recycling large buffers. + streamingEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }} + + // This does not own the internal buffer since + // it is taken directly from the provided bytes.Buffer. + bytesBufferEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }} +) + +// bufferStatistics is statistics to track buffer utilization. +// It is used to determine whether to recycle a buffer or not +// to avoid https://go.dev/issue/23199. +type bufferStatistics struct { + strikes int // number of times the buffer was under-utilized + prevLen int // length of previous buffer +} + +func getBufferedEncoder(opts ...Options) *Encoder { + e := bufferedEncoderPool.Get().(*Encoder) + if e.s.Buf == nil { + // Round up to nearest 2ⁿ to make best use of malloc size classes. + // See runtime/sizeclasses.go on Go1.15. + // Logical OR with 63 to ensure 64 as the minimum buffer size. + n := 1 << bits.Len(uint(e.s.bufStats.prevLen|63)) + e.s.Buf = make([]byte, 0, n) + } + e.s.reset(e.s.Buf[:0], nil, opts...) + return e +} + +func putBufferedEncoder(e *Encoder) { + // Recycle large buffers only if sufficiently utilized. + // If a buffer is under-utilized enough times sequentially, + // then it is discarded, ensuring that a single large buffer + // won't be kept alive by a continuous stream of small usages. + // + // The worst case utilization is computed as: + // MIN_UTILIZATION_THRESHOLD / (1 + MAX_NUM_STRIKES) + // + // For the constants chosen below, this is (25%)/(1+4) ⇒ 5%. + // This may seem low, but it ensures a lower bound on + // the absolute worst-case utilization. Without this check, + // this would be theoretically 0%, which is infinitely worse. + // + // See https://go.dev/issue/27735. + switch { + case cap(e.s.Buf) <= 4<<10: // always recycle buffers smaller than 4KiB + e.s.bufStats.strikes = 0 + case cap(e.s.Buf)/4 <= len(e.s.Buf): // at least 25% utilization + e.s.bufStats.strikes = 0 + case e.s.bufStats.strikes < 4: // at most 4 strikes + e.s.bufStats.strikes++ + default: // discard the buffer; too large and too often under-utilized + e.s.bufStats.strikes = 0 + e.s.bufStats.prevLen = len(e.s.Buf) // heuristic for size to allocate next time + e.s.Buf = nil + } + bufferedEncoderPool.Put(e) +} + +func getStreamingEncoder(w io.Writer, opts ...Options) *Encoder { + if _, ok := w.(*bytes.Buffer); ok { + e := bytesBufferEncoderPool.Get().(*Encoder) + e.s.reset(nil, w, opts...) // buffer taken from bytes.Buffer + return e + } else { + e := streamingEncoderPool.Get().(*Encoder) + e.s.reset(e.s.Buf[:0], w, opts...) // preserve existing buffer + return e + } +} + +func putStreamingEncoder(e *Encoder) { + if _, ok := e.s.wr.(*bytes.Buffer); ok { + bytesBufferEncoderPool.Put(e) + } else { + if cap(e.s.Buf) > 64<<10 { + e.s.Buf = nil // avoid pinning arbitrarily large amounts of memory + } + streamingEncoderPool.Put(e) + } +} + +var ( + // This does not own the internal buffer since it is externally provided. + bufferedDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }} + + // This owns the internal buffer, but it is only used to temporarily store + // buffered JSON fetched from the underlying io.Reader. + // In a sufficiently efficient streaming mode, we do not expect the buffer + // to grow arbitrarily large. Thus, we avoid recycling large buffers. + streamingDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }} + + // This does not own the internal buffer since + // it is taken directly from the provided bytes.Buffer. + bytesBufferDecoderPool = bufferedDecoderPool +) + +func getBufferedDecoder(b []byte, opts ...Options) *Decoder { + d := bufferedDecoderPool.Get().(*Decoder) + d.s.reset(b, nil, opts...) + return d +} + +func putBufferedDecoder(d *Decoder) { + bufferedDecoderPool.Put(d) +} + +func getStreamingDecoder(r io.Reader, opts ...Options) *Decoder { + if _, ok := r.(*bytes.Buffer); ok { + d := bytesBufferDecoderPool.Get().(*Decoder) + d.s.reset(nil, r, opts...) // buffer taken from bytes.Buffer + return d + } else { + d := streamingDecoderPool.Get().(*Decoder) + d.s.reset(d.s.buf[:0], r, opts...) // preserve existing buffer + return d + } +} + +func putStreamingDecoder(d *Decoder) { + if _, ok := d.s.rd.(*bytes.Buffer); ok { + bytesBufferDecoderPool.Put(d) + } else { + if cap(d.s.buf) > 64<<10 { + d.s.buf = nil // avoid pinning arbitrarily large amounts of memory + } + streamingDecoderPool.Put(d) + } +} + +var errInvalidUTF8 = &SyntacticError{str: "invalid UTF-8 within string"} + +// AppendQuote appends a double-quoted JSON string literal representing src +// to dst and returns the extended buffer. +// It uses the minimal string representation per RFC 8785, section 3.2.2.2. +// Invalid UTF-8 bytes are replaced with the Unicode replacement character +// and an error is returned at the end indicating the presence of invalid UTF-8. +func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) { + return jsonwire.AppendQuote(dst, src, &jsonflags.Flags{}) +} + +// AppendUnquote appends the decoded interpretation of src as a +// double-quoted JSON string literal to dst and returns the extended buffer. +// The input src must be a JSON string without any surrounding whitespace. +// Invalid UTF-8 bytes are replaced with the Unicode replacement character +// and an error is returned at the end indicating the presence of invalid UTF-8. +// Any trailing bytes after the JSON string literal results in an error. +func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) { + return jsonwire.AppendUnquote(dst, src) +} + +var ( + errMissingName = &SyntacticError{str: "missing string for object name"} + errMissingColon = &SyntacticError{str: "missing character ':' after object name"} + errMissingValue = &SyntacticError{str: "missing value after object name"} + errMissingComma = &SyntacticError{str: "missing character ',' after object or array value"} + errMismatchDelim = &SyntacticError{str: "mismatching structural token for object or array"} + errMaxDepth = &SyntacticError{str: "exceeded max depth"} + + errInvalidNamespace = &SyntacticError{str: "object namespace is in an invalid state"} +) + +// Per RFC 8259, section 9, implementations may enforce a maximum depth. +// Such a limit is necessary to prevent stack overflows. +const maxNestingDepth = 10000 + +type state struct { + // Tokens validates whether the next token kind is valid. + Tokens stateMachine + + // Names is a stack of object names. + // Not used if AllowDuplicateNames is true. + Names objectNameStack + + // Namespaces is a stack of object namespaces. + // For performance reasons, Encoder or Decoder may not update this + // if Marshal or Unmarshal is able to track names in a more efficient way. + // See makeMapArshaler and makeStructArshaler. + // Not used if AllowDuplicateNames is true. + Namespaces objectNamespaceStack +} + +func (s *state) reset() { + s.Tokens.reset() + s.Names.reset() + s.Namespaces.reset() +} + +// Pointer is a JSON Pointer (RFC 6901) that references a particular JSON value +// relative to the root of the top-level JSON value. +type Pointer string + +// nextToken returns the next token in the pointer, reducing the length of p. +func (p *Pointer) nextToken() (token string) { + *p = Pointer(strings.TrimPrefix(string(*p), "/")) + i := min(uint(strings.IndexByte(string(*p), '/')), uint(len(*p))) + token = string(*p)[:i] + *p = (*p)[i:] + if strings.Contains(token, "~") { + // Per RFC 6901, section 3, unescape '~' and '/' characters. + token = strings.ReplaceAll(token, "~1", "/") + token = strings.ReplaceAll(token, "~0", "~") + } + return token +} + +// appendStackPointer appends a JSON Pointer (RFC 6901) to the current value. +// The returned pointer is only accurate if s.names is populated, +// otherwise it uses the numeric index as the object member name. +// +// Invariant: Must call s.names.copyQuotedBuffer beforehand. +func (s state) appendStackPointer(b []byte) []byte { + var objectDepth int + for i := 1; i < s.Tokens.Depth(); i++ { + e := s.Tokens.index(i) + if e.Length() == 0 { + break // empty object or array + } + b = append(b, '/') + switch { + case e.isObject(): + if objectDepth < s.Names.length() { + for _, c := range s.Names.getUnquoted(objectDepth) { + // Per RFC 6901, section 3, escape '~' and '/' characters. + switch c { + case '~': + b = append(b, "~0"...) + case '/': + b = append(b, "~1"...) + default: + b = append(b, c) + } + } + } else { + // Since the names stack is unpopulated, the name is unknown. + // As a best-effort replacement, use the numeric member index. + // While inaccurate, it produces a syntactically valid pointer. + b = strconv.AppendUint(b, uint64((e.Length()-1)/2), 10) + } + objectDepth++ + case e.isArray(): + b = strconv.AppendUint(b, uint64(e.Length()-1), 10) + } + } + return b +} + +// stateMachine is a push-down automaton that validates whether +// a sequence of tokens is valid or not according to the JSON grammar. +// It is useful for both encoding and decoding. +// +// It is a stack where each entry represents a nested JSON object or array. +// The stack has a minimum depth of 1 where the first level is a +// virtual JSON array to handle a stream of top-level JSON values. +// The top-level virtual JSON array is special in that it doesn't require commas +// between each JSON value. +// +// For performance, most methods are carefully written to be inlinable. +// The zero value is a valid state machine ready for use. +type stateMachine struct { + Stack []stateEntry + Last stateEntry +} + +// reset resets the state machine. +// The machine always starts with a minimum depth of 1. +func (m *stateMachine) reset() { + m.Stack = m.Stack[:0] + if cap(m.Stack) > 1<<10 { + m.Stack = nil + } + m.Last = stateTypeArray +} + +// Depth is the current nested depth of JSON objects and arrays. +// It is one-indexed (i.e., top-level values have a depth of 1). +func (m stateMachine) Depth() int { + return len(m.Stack) + 1 +} + +// index returns a reference to the ith entry. +// It is only valid until the next push method call. +func (m *stateMachine) index(i int) *stateEntry { + if i == len(m.Stack) { + return &m.Last + } + return &m.Stack[i] +} + +// DepthLength reports the current nested depth and +// the length of the last JSON object or array. +func (m stateMachine) DepthLength() (int, int64) { + return m.Depth(), m.Last.Length() +} + +// appendLiteral appends a JSON literal as the next token in the sequence. +// If an error is returned, the state is not mutated. +func (m *stateMachine) appendLiteral() error { + switch { + case m.Last.NeedObjectName(): + return errMissingName + case !m.Last.isValidNamespace(): + return errInvalidNamespace + default: + m.Last.Increment() + return nil + } +} + +// appendString appends a JSON string as the next token in the sequence. +// If an error is returned, the state is not mutated. +func (m *stateMachine) appendString() error { + switch { + case !m.Last.isValidNamespace(): + return errInvalidNamespace + default: + m.Last.Increment() + return nil + } +} + +// appendNumber appends a JSON number as the next token in the sequence. +// If an error is returned, the state is not mutated. +func (m *stateMachine) appendNumber() error { + return m.appendLiteral() +} + +// pushObject appends a JSON start object token as next in the sequence. +// If an error is returned, the state is not mutated. +func (m *stateMachine) pushObject() error { + switch { + case m.Last.NeedObjectName(): + return errMissingName + case !m.Last.isValidNamespace(): + return errInvalidNamespace + case len(m.Stack) == maxNestingDepth: + return errMaxDepth + default: + m.Last.Increment() + m.Stack = append(m.Stack, m.Last) + m.Last = stateTypeObject + return nil + } +} + +// popObject appends a JSON end object token as next in the sequence. +// If an error is returned, the state is not mutated. +func (m *stateMachine) popObject() error { + switch { + case !m.Last.isObject(): + return errMismatchDelim + case m.Last.needObjectValue(): + return errMissingValue + case !m.Last.isValidNamespace(): + return errInvalidNamespace + default: + m.Last = m.Stack[len(m.Stack)-1] + m.Stack = m.Stack[:len(m.Stack)-1] + return nil + } +} + +// pushArray appends a JSON start array token as next in the sequence. +// If an error is returned, the state is not mutated. +func (m *stateMachine) pushArray() error { + switch { + case m.Last.NeedObjectName(): + return errMissingName + case !m.Last.isValidNamespace(): + return errInvalidNamespace + case len(m.Stack) == maxNestingDepth: + return errMaxDepth + default: + m.Last.Increment() + m.Stack = append(m.Stack, m.Last) + m.Last = stateTypeArray + return nil + } +} + +// popArray appends a JSON end array token as next in the sequence. +// If an error is returned, the state is not mutated. +func (m *stateMachine) popArray() error { + switch { + case !m.Last.isArray() || len(m.Stack) == 0: // forbid popping top-level virtual JSON array + return errMismatchDelim + case !m.Last.isValidNamespace(): + return errInvalidNamespace + default: + m.Last = m.Stack[len(m.Stack)-1] + m.Stack = m.Stack[:len(m.Stack)-1] + return nil + } +} + +// NeedIndent reports whether indent whitespace should be injected. +// A zero value means that no whitespace should be injected. +// A positive value means '\n', indentPrefix, and (n-1) copies of indentBody +// should be appended to the output immediately before the next token. +func (m stateMachine) NeedIndent(next Kind) (n int) { + willEnd := next == '}' || next == ']' + switch { + case m.Depth() == 1: + return 0 // top-level values are never indented + case m.Last.Length() == 0 && willEnd: + return 0 // an empty object or array is never indented + case m.Last.Length() == 0 || m.Last.needImplicitComma(next): + return m.Depth() + case willEnd: + return m.Depth() - 1 + default: + return 0 + } +} + +// MayAppendDelim appends a colon or comma that may precede the next token. +func (m stateMachine) MayAppendDelim(b []byte, next Kind) []byte { + switch { + case m.Last.needImplicitColon(): + return append(b, ':') + case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values + return append(b, ',') + default: + return b + } +} + +// needDelim reports whether a colon or comma token should be implicitly emitted +// before the next token of the specified kind. +// A zero value means no delimiter should be emitted. +func (m stateMachine) needDelim(next Kind) (delim byte) { + switch { + case m.Last.needImplicitColon(): + return ':' + case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values + return ',' + default: + return 0 + } +} + +// checkDelim reports whether the specified delimiter should be there given +// the kind of the next token that appears immediately afterwards. +func (m stateMachine) checkDelim(delim byte, next Kind) error { + switch m.needDelim(next) { + case delim: + return nil + case ':': + return errMissingColon + case ',': + return errMissingComma + default: + return newInvalidCharacterError([]byte{delim}, "before next token") + } +} + +// InvalidateDisabledNamespaces marks all disabled namespaces as invalid. +// +// For efficiency, Marshal and Unmarshal may disable namespaces since there are +// more efficient ways to track duplicate names. However, if an error occurs, +// the namespaces in Encoder or Decoder will be left in an inconsistent state. +// Mark the namespaces as invalid so that future method calls on +// Encoder or Decoder will return an error. +func (m *stateMachine) InvalidateDisabledNamespaces() { + for i := range m.Depth() { + e := m.index(i) + if !e.isActiveNamespace() { + e.invalidateNamespace() + } + } +} + +// stateEntry encodes several artifacts within a single unsigned integer: +// - whether this represents a JSON object or array, +// - whether this object should check for duplicate names, and +// - how many elements are in this JSON object or array. +type stateEntry uint64 + +const ( + // The type mask (1 bit) records whether this is a JSON object or array. + stateTypeMask stateEntry = 0x8000_0000_0000_0000 + stateTypeObject stateEntry = 0x8000_0000_0000_0000 + stateTypeArray stateEntry = 0x0000_0000_0000_0000 + + // The name check mask (2 bit) records whether to update + // the namespaces for the current JSON object and + // whether the namespace is valid. + stateNamespaceMask stateEntry = 0x6000_0000_0000_0000 + stateDisableNamespace stateEntry = 0x4000_0000_0000_0000 + stateInvalidNamespace stateEntry = 0x2000_0000_0000_0000 + + // The count mask (61 bits) records the number of elements. + stateCountMask stateEntry = 0x1fff_ffff_ffff_ffff + stateCountLSBMask stateEntry = 0x0000_0000_0000_0001 + stateCountOdd stateEntry = 0x0000_0000_0000_0001 + stateCountEven stateEntry = 0x0000_0000_0000_0000 +) + +// Length reports the number of elements in the JSON object or array. +// Each name and value in an object entry is treated as a separate element. +func (e stateEntry) Length() int64 { + return int64(e & stateCountMask) +} + +// isObject reports whether this is a JSON object. +func (e stateEntry) isObject() bool { + return e&stateTypeMask == stateTypeObject +} + +// isArray reports whether this is a JSON array. +func (e stateEntry) isArray() bool { + return e&stateTypeMask == stateTypeArray +} + +// NeedObjectName reports whether the next token must be a JSON string, +// which is necessary for JSON object names. +func (e stateEntry) NeedObjectName() bool { + return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountEven +} + +// needImplicitColon reports whether an colon should occur next, +// which always occurs after JSON object names. +func (e stateEntry) needImplicitColon() bool { + return e.needObjectValue() +} + +// needObjectValue reports whether the next token must be a JSON value, +// which is necessary after every JSON object name. +func (e stateEntry) needObjectValue() bool { + return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountOdd +} + +// needImplicitComma reports whether an comma should occur next, +// which always occurs after a value in a JSON object or array +// before the next value (or name). +func (e stateEntry) needImplicitComma(next Kind) bool { + return !e.needObjectValue() && e.Length() > 0 && next != '}' && next != ']' +} + +// Increment increments the number of elements for the current object or array. +// This assumes that overflow won't practically be an issue since +// 1< 0. +func (e *stateEntry) decrement() { + (*e)-- +} + +// DisableNamespace disables the JSON object namespace such that the +// Encoder or Decoder no longer updates the namespace. +func (e *stateEntry) DisableNamespace() { + *e |= stateDisableNamespace +} + +// isActiveNamespace reports whether the JSON object namespace is actively +// being updated and used for duplicate name checks. +func (e stateEntry) isActiveNamespace() bool { + return e&(stateDisableNamespace) == 0 +} + +// invalidateNamespace marks the JSON object namespace as being invalid. +func (e *stateEntry) invalidateNamespace() { + *e |= stateInvalidNamespace +} + +// isValidNamespace reports whether the JSON object namespace is valid. +func (e stateEntry) isValidNamespace() bool { + return e&(stateInvalidNamespace) == 0 +} + +// objectNameStack is a stack of names when descending into a JSON object. +// In contrast to objectNamespaceStack, this only has to remember a single name +// per JSON object. +// +// This data structure may contain offsets to encodeBuffer or decodeBuffer. +// It violates clean abstraction of layers, but is significantly more efficient. +// This ensures that popping and pushing in the common case is a trivial +// push/pop of an offset integer. +// +// The zero value is an empty names stack ready for use. +type objectNameStack struct { + // offsets is a stack of offsets for each name. + // A non-negative offset is the ending offset into the local names buffer. + // A negative offset is the bit-wise inverse of a starting offset into + // a remote buffer (e.g., encodeBuffer or decodeBuffer). + // A math.MinInt offset at the end implies that the last object is empty. + // Invariant: Positive offsets always occur before negative offsets. + offsets []int + // unquotedNames is a back-to-back concatenation of names. + unquotedNames []byte +} + +func (ns *objectNameStack) reset() { + ns.offsets = ns.offsets[:0] + ns.unquotedNames = ns.unquotedNames[:0] + if cap(ns.offsets) > 1<<6 { + ns.offsets = nil // avoid pinning arbitrarily large amounts of memory + } + if cap(ns.unquotedNames) > 1<<10 { + ns.unquotedNames = nil // avoid pinning arbitrarily large amounts of memory + } +} + +func (ns *objectNameStack) length() int { + return len(ns.offsets) +} + +// getUnquoted retrieves the ith unquoted name in the stack. +// It returns an empty string if the last object is empty. +// +// Invariant: Must call copyQuotedBuffer beforehand. +func (ns *objectNameStack) getUnquoted(i int) []byte { + ns.ensureCopiedBuffer() + if i == 0 { + return ns.unquotedNames[:ns.offsets[0]] + } else { + return ns.unquotedNames[ns.offsets[i-1]:ns.offsets[i-0]] + } +} + +// invalidOffset indicates that the last JSON object currently has no name. +const invalidOffset = math.MinInt + +// push descends into a nested JSON object. +func (ns *objectNameStack) push() { + ns.offsets = append(ns.offsets, invalidOffset) +} + +// ReplaceLastQuotedOffset replaces the last name with the starting offset +// to the quoted name in some remote buffer. All offsets provided must be +// relative to the same buffer until copyQuotedBuffer is called. +func (ns *objectNameStack) ReplaceLastQuotedOffset(i int) { + // Use bit-wise inversion instead of naive multiplication by -1 to avoid + // ambiguity regarding zero (which is a valid offset into the names field). + // Bit-wise inversion is mathematically equivalent to -i-1, + // such that 0 becomes -1, 1 becomes -2, and so forth. + // This ensures that remote offsets are always negative. + ns.offsets[len(ns.offsets)-1] = ^i +} + +// replaceLastUnquotedName replaces the last name with the provided name. +// +// Invariant: Must call copyQuotedBuffer beforehand. +func (ns *objectNameStack) replaceLastUnquotedName(s string) { + ns.ensureCopiedBuffer() + var startOffset int + if len(ns.offsets) > 1 { + startOffset = ns.offsets[len(ns.offsets)-2] + } + ns.unquotedNames = append(ns.unquotedNames[:startOffset], s...) + ns.offsets[len(ns.offsets)-1] = len(ns.unquotedNames) +} + +// clearLast removes any name in the last JSON object. +// It is semantically equivalent to ns.push followed by ns.pop. +func (ns *objectNameStack) clearLast() { + ns.offsets[len(ns.offsets)-1] = invalidOffset +} + +// pop ascends out of a nested JSON object. +func (ns *objectNameStack) pop() { + ns.offsets = ns.offsets[:len(ns.offsets)-1] +} + +// copyQuotedBuffer copies names from the remote buffer into the local names +// buffer so that there are no more offset references into the remote buffer. +// This allows the remote buffer to change contents without affecting +// the names that this data structure is trying to remember. +func (ns *objectNameStack) copyQuotedBuffer(b []byte) { + // Find the first negative offset. + var i int + for i = len(ns.offsets) - 1; i >= 0 && ns.offsets[i] < 0; i-- { + continue + } + + // Copy each name from the remote buffer into the local buffer. + for i = i + 1; i < len(ns.offsets); i++ { + if i == len(ns.offsets)-1 && ns.offsets[i] == invalidOffset { + if i == 0 { + ns.offsets[i] = 0 + } else { + ns.offsets[i] = ns.offsets[i-1] + } + break // last JSON object had a push without any names + } + + // As a form of Hyrum proofing, we write an invalid character into the + // buffer to make misuse of Decoder.ReadToken more obvious. + // We need to undo that mutation here. + quotedName := b[^ns.offsets[i]:] + if quotedName[0] == invalidateBufferByte { + quotedName[0] = '"' + } + + // Append the unquoted name to the local buffer. + var startOffset int + if i > 0 { + startOffset = ns.offsets[i-1] + } + if n := jsonwire.ConsumeSimpleString(quotedName); n > 0 { + ns.unquotedNames = append(ns.unquotedNames[:startOffset], quotedName[len(`"`):n-len(`"`)]...) + } else { + ns.unquotedNames, _ = jsonwire.AppendUnquote(ns.unquotedNames[:startOffset], quotedName) + } + ns.offsets[i] = len(ns.unquotedNames) + } +} + +func (ns *objectNameStack) ensureCopiedBuffer() { + if len(ns.offsets) > 0 && ns.offsets[len(ns.offsets)-1] < 0 { + panic("BUG: copyQuotedBuffer not called beforehand") + } +} + +// objectNamespaceStack is a stack of object namespaces. +// This data structure assists in detecting duplicate names. +type objectNamespaceStack []objectNamespace + +// reset resets the object namespace stack. +func (nss *objectNamespaceStack) reset() { + if cap(*nss) > 1<<10 { + *nss = nil + } + *nss = (*nss)[:0] +} + +// push starts a new namespace for a nested JSON object. +func (nss *objectNamespaceStack) push() { + if cap(*nss) > len(*nss) { + *nss = (*nss)[:len(*nss)+1] + nss.Last().reset() + } else { + *nss = append(*nss, objectNamespace{}) + } +} + +// Last returns a pointer to the last JSON object namespace. +func (nss objectNamespaceStack) Last() *objectNamespace { + return &nss[len(nss)-1] +} + +// pop terminates the namespace for a nested JSON object. +func (nss *objectNamespaceStack) pop() { + *nss = (*nss)[:len(*nss)-1] +} + +// objectNamespace is the namespace for a JSON object. +// In contrast to objectNameStack, this needs to remember a all names +// per JSON object. +// +// The zero value is an empty namespace ready for use. +type objectNamespace struct { + // It relies on a linear search over all the names before switching + // to use a Go map for direct lookup. + + // endOffsets is a list of offsets to the end of each name in buffers. + // The length of offsets is the number of names in the namespace. + endOffsets []uint + // allUnquotedNames is a back-to-back concatenation of every name in the namespace. + allUnquotedNames []byte + // mapNames is a Go map containing every name in the namespace. + // Only valid if non-nil. + mapNames map[string]struct{} +} + +// reset resets the namespace to be empty. +func (ns *objectNamespace) reset() { + ns.endOffsets = ns.endOffsets[:0] + ns.allUnquotedNames = ns.allUnquotedNames[:0] + ns.mapNames = nil + if cap(ns.endOffsets) > 1<<6 { + ns.endOffsets = nil // avoid pinning arbitrarily large amounts of memory + } + if cap(ns.allUnquotedNames) > 1<<10 { + ns.allUnquotedNames = nil // avoid pinning arbitrarily large amounts of memory + } +} + +// length reports the number of names in the namespace. +func (ns *objectNamespace) length() int { + return len(ns.endOffsets) +} + +// getUnquoted retrieves the ith unquoted name in the namespace. +func (ns *objectNamespace) getUnquoted(i int) []byte { + if i == 0 { + return ns.allUnquotedNames[:ns.endOffsets[0]] + } else { + return ns.allUnquotedNames[ns.endOffsets[i-1]:ns.endOffsets[i-0]] + } +} + +// lastUnquoted retrieves the last name in the namespace. +func (ns *objectNamespace) lastUnquoted() []byte { + return ns.getUnquoted(ns.length() - 1) +} + +// insertQuoted inserts a name and reports whether it was inserted, +// which only occurs if name is not already in the namespace. +// The provided name must be a valid JSON string. +func (ns *objectNamespace) insertQuoted(name []byte, isVerbatim bool) bool { + if isVerbatim { + name = name[len(`"`) : len(name)-len(`"`)] + } + return ns.insert(name, !isVerbatim) +} + +func (ns *objectNamespace) InsertUnquoted(name []byte) bool { + return ns.insert(name, false) +} + +func (ns *objectNamespace) insert(name []byte, quoted bool) bool { + var allNames []byte + if quoted { + allNames, _ = jsonwire.AppendUnquote(ns.allUnquotedNames, name) + } else { + allNames = append(ns.allUnquotedNames, name...) + } + name = allNames[len(ns.allUnquotedNames):] + + // Switch to a map if the buffer is too large for linear search. + // This does not add the current name to the map. + if ns.mapNames == nil && (ns.length() > 64 || len(ns.allUnquotedNames) > 1024) { + ns.mapNames = make(map[string]struct{}) + var startOffset uint + for _, endOffset := range ns.endOffsets { + name := ns.allUnquotedNames[startOffset:endOffset] + ns.mapNames[string(name)] = struct{}{} // allocates a new string + startOffset = endOffset + } + } + + if ns.mapNames == nil { + // Perform linear search over the buffer to find matching names. + // It provides O(n) lookup, but does not require any allocations. + var startOffset uint + for _, endOffset := range ns.endOffsets { + if string(ns.allUnquotedNames[startOffset:endOffset]) == string(name) { + return false + } + startOffset = endOffset + } + } else { + // Use the map if it is populated. + // It provides O(1) lookup, but requires a string allocation per name. + if _, ok := ns.mapNames[string(name)]; ok { + return false + } + ns.mapNames[string(name)] = struct{}{} // allocates a new string + } + + ns.allUnquotedNames = allNames + ns.endOffsets = append(ns.endOffsets, uint(len(ns.allUnquotedNames))) + return true +} + +// removeLast removes the last name in the namespace. +func (ns *objectNamespace) removeLast() { + if ns.mapNames != nil { + delete(ns.mapNames, string(ns.lastUnquoted())) + } + if ns.length()-1 == 0 { + ns.endOffsets = ns.endOffsets[:0] + ns.allUnquotedNames = ns.allUnquotedNames[:0] + } else { + ns.endOffsets = ns.endOffsets[:ns.length()-1] + ns.allUnquotedNames = ns.allUnquotedNames[:ns.endOffsets[ns.length()-1]] + } +} + +// NOTE: Token is analogous to v1 json.Token. + +const ( + maxInt64 = math.MaxInt64 + minInt64 = math.MinInt64 + maxUint64 = math.MaxUint64 + minUint64 = 0 // for consistency and readability purposes + + invalidTokenPanic = "invalid json.Token; it has been voided by a subsequent json.Decoder call" +) + +// Token represents a lexical JSON token, which may be one of the following: +// - a JSON literal (i.e., null, true, or false) +// - a JSON string (e.g., "hello, world!") +// - a JSON number (e.g., 123.456) +// - a start or end delimiter for a JSON object (i.e., { or } ) +// - a start or end delimiter for a JSON array (i.e., [ or ] ) +// +// A Token cannot represent entire array or object values, while a [Value] can. +// There is no Token to represent commas and colons since +// these structural tokens can be inferred from the surrounding context. +type Token struct { + nonComparable + + // Tokens can exist in either a "raw" or an "exact" form. + // Tokens produced by the Decoder are in the "raw" form. + // Tokens returned by constructors are usually in the "exact" form. + // The Encoder accepts Tokens in either the "raw" or "exact" form. + // + // The following chart shows the possible values for each Token type: + // ╔═════════════════╦════════════╤════════════╤════════════╗ + // ║ Token type ║ raw field │ str field │ num field ║ + // ╠═════════════════╬════════════╪════════════╪════════════╣ + // ║ null (raw) ║ "null" │ "" │ 0 ║ + // ║ false (raw) ║ "false" │ "" │ 0 ║ + // ║ true (raw) ║ "true" │ "" │ 0 ║ + // ║ string (raw) ║ non-empty │ "" │ offset ║ + // ║ string (string) ║ nil │ non-empty │ 0 ║ + // ║ number (raw) ║ non-empty │ "" │ offset ║ + // ║ number (float) ║ nil │ "f" │ non-zero ║ + // ║ number (int64) ║ nil │ "i" │ non-zero ║ + // ║ number (uint64) ║ nil │ "u" │ non-zero ║ + // ║ object (delim) ║ "{" or "}" │ "" │ 0 ║ + // ║ array (delim) ║ "[" or "]" │ "" │ 0 ║ + // ╚═════════════════╩════════════╧════════════╧════════════╝ + // + // Notes: + // - For tokens stored in "raw" form, the num field contains the + // absolute offset determined by raw.previousOffsetStart(). + // The buffer itself is stored in raw.previousBuffer(). + // - JSON literals and structural characters are always in the "raw" form. + // - JSON strings and numbers can be in either "raw" or "exact" forms. + // - The exact zero value of JSON strings and numbers in the "exact" forms + // have ambiguous representation. Thus, they are always represented + // in the "raw" form. + + // raw contains a reference to the raw decode buffer. + // If non-nil, then its value takes precedence over str and num. + // It is only valid if num == raw.previousOffsetStart(). + raw *decodeBuffer + + // str is the unescaped JSON string if num is zero. + // Otherwise, it is "f", "i", or "u" if num should be interpreted + // as a float64, int64, or uint64, respectively. + str string + + // num is a float64, int64, or uint64 stored as a uint64 value. + // It is non-zero for any JSON number in the "exact" form. + num uint64 +} + +// TODO: Does representing 1-byte delimiters as *decodeBuffer cause performance issues? + +var ( + Null Token = rawToken("null") + False Token = rawToken("false") + True Token = rawToken("true") + + ObjectStart Token = rawToken("{") + ObjectEnd Token = rawToken("}") + ArrayStart Token = rawToken("[") + ArrayEnd Token = rawToken("]") + + zeroString Token = rawToken(`""`) + zeroNumber Token = rawToken(`0`) + + nanString Token = String("NaN") + pinfString Token = String("Infinity") + ninfString Token = String("-Infinity") +) + +func rawToken(s string) Token { + return Token{raw: &decodeBuffer{buf: []byte(s), prevStart: 0, prevEnd: len(s)}} +} + +// Bool constructs a Token representing a JSON boolean. +func Bool(b bool) Token { + if b { + return True + } + return False +} + +// String constructs a Token representing a JSON string. +// The provided string should contain valid UTF-8, otherwise invalid characters +// may be mangled as the Unicode replacement character. +func String(s string) Token { + if len(s) == 0 { + return zeroString + } + return Token{str: s} +} + +// Float constructs a Token representing a JSON number. +// The values NaN, +Inf, and -Inf will be represented +// as a JSON string with the values "NaN", "Infinity", and "-Infinity". +func Float(n float64) Token { + switch { + case math.Float64bits(n) == 0: + return zeroNumber + case math.IsNaN(n): + return nanString + case math.IsInf(n, +1): + return pinfString + case math.IsInf(n, -1): + return ninfString + } + return Token{str: "f", num: math.Float64bits(n)} +} + +// Int constructs a Token representing a JSON number from an int64. +func Int(n int64) Token { + if n == 0 { + return zeroNumber + } + return Token{str: "i", num: uint64(n)} +} + +// Uint constructs a Token representing a JSON number from a uint64. +func Uint(n uint64) Token { + if n == 0 { + return zeroNumber + } + return Token{str: "u", num: uint64(n)} +} + +// Clone makes a copy of the Token such that its value remains valid +// even after a subsequent [Decoder.Read] call. +func (t Token) Clone() Token { + // TODO: Allow caller to avoid any allocations? + if raw := t.raw; raw != nil { + // Avoid copying globals. + if t.raw.prevStart == 0 { + switch t.raw { + case Null.raw: + return Null + case False.raw: + return False + case True.raw: + return True + case ObjectStart.raw: + return ObjectStart + case ObjectEnd.raw: + return ObjectEnd + case ArrayStart.raw: + return ArrayStart + case ArrayEnd.raw: + return ArrayEnd + } + } + + if uint64(raw.previousOffsetStart()) != t.num { + panic(invalidTokenPanic) + } + buf := bytes.Clone(raw.PreviousBuffer()) + return Token{raw: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}} + } + return t +} + +// Bool returns the value for a JSON boolean. +// It panics if the token kind is not a JSON boolean. +func (t Token) Bool() bool { + switch t.raw { + case True.raw: + return true + case False.raw: + return false + default: + panic("invalid JSON token kind: " + t.Kind().String()) + } +} + +// appendString appends a JSON string to dst and returns it. +// It panics if t is not a JSON string. +func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) { + if raw := t.raw; raw != nil { + // Handle raw string value. + buf := raw.PreviousBuffer() + if Kind(buf[0]) == '"' { + if jsonwire.ConsumeSimpleString(buf) == len(buf) { + return append(dst, buf...), nil + } + dst, _, err := jsonwire.ReformatString(dst, buf, flags) + return dst, err + } + } else if len(t.str) != 0 && t.num == 0 { + // Handle exact string value. + return jsonwire.AppendQuote(dst, t.str, flags) + } + + panic("invalid JSON token kind: " + t.Kind().String()) +} + +// String returns the unescaped string value for a JSON string. +// For other JSON kinds, this returns the raw JSON representation. +func (t Token) String() string { + // This is inlinable to take advantage of "function outlining". + // This avoids an allocation for the string(b) conversion + // if the caller does not use the string in an escaping manner. + // See https://blog.filippo.io/efficient-go-apis-with-the-inliner/ + s, b := t.string() + if len(b) > 0 { + return string(b) + } + return s +} + +func (t Token) string() (string, []byte) { + if raw := t.raw; raw != nil { + if uint64(raw.previousOffsetStart()) != t.num { + panic(invalidTokenPanic) + } + buf := raw.PreviousBuffer() + if buf[0] == '"' { + // TODO: Preserve ValueFlags in Token? + isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf) + return "", jsonwire.UnquoteMayCopy(buf, isVerbatim) + } + // Handle tokens that are not JSON strings for fmt.Stringer. + return "", buf + } + if len(t.str) != 0 && t.num == 0 { + return t.str, nil + } + // Handle tokens that are not JSON strings for fmt.Stringer. + if t.num > 0 { + switch t.str[0] { + case 'f': + return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.num), 64)), nil + case 'i': + return strconv.FormatInt(int64(t.num), 10), nil + case 'u': + return strconv.FormatUint(uint64(t.num), 10), nil + } + } + return "", nil +} + +// appendNumber appends a JSON number to dst and returns it. +// It panics if t is not a JSON number. +func (t Token) appendNumber(dst []byte, canonicalize bool) ([]byte, error) { + if raw := t.raw; raw != nil { + // Handle raw number value. + buf := raw.PreviousBuffer() + if Kind(buf[0]).normalize() == '0' { + if !canonicalize { + return append(dst, buf...), nil + } + dst, _, err := jsonwire.ReformatNumber(dst, buf, canonicalize) + return dst, err + } + } else if t.num != 0 { + // Handle exact number value. + switch t.str[0] { + case 'f': + return jsonwire.AppendFloat(dst, math.Float64frombits(t.num), 64), nil + case 'i': + return strconv.AppendInt(dst, int64(t.num), 10), nil + case 'u': + return strconv.AppendUint(dst, uint64(t.num), 10), nil + } + } + + panic("invalid JSON token kind: " + t.Kind().String()) +} + +// Float returns the floating-point value for a JSON number. +// It returns a NaN, +Inf, or -Inf value for any JSON string +// with the values "NaN", "Infinity", or "-Infinity". +// It panics for all other cases. +func (t Token) Float() float64 { + if raw := t.raw; raw != nil { + // Handle raw number value. + if uint64(raw.previousOffsetStart()) != t.num { + panic(invalidTokenPanic) + } + buf := raw.PreviousBuffer() + if Kind(buf[0]).normalize() == '0' { + fv, _ := jsonwire.ParseFloat(buf, 64) + return fv + } + } else if t.num != 0 { + // Handle exact number value. + switch t.str[0] { + case 'f': + return math.Float64frombits(t.num) + case 'i': + return float64(int64(t.num)) + case 'u': + return float64(uint64(t.num)) + } + } + + // Handle string values with "NaN", "Infinity", or "-Infinity". + if t.Kind() == '"' { + switch t.String() { + case "NaN": + return math.NaN() + case "Infinity": + return math.Inf(+1) + case "-Infinity": + return math.Inf(-1) + } + } + + panic("invalid JSON token kind: " + t.Kind().String()) +} + +// Int returns the signed integer value for a JSON number. +// The fractional component of any number is ignored (truncation toward zero). +// Any number beyond the representation of an int64 will be saturated +// to the closest representable value. +// It panics if the token kind is not a JSON number. +func (t Token) Int() int64 { + if raw := t.raw; raw != nil { + // Handle raw integer value. + if uint64(raw.previousOffsetStart()) != t.num { + panic(invalidTokenPanic) + } + neg := false + buf := raw.PreviousBuffer() + if len(buf) > 0 && buf[0] == '-' { + neg, buf = true, buf[1:] + } + if numAbs, ok := jsonwire.ParseUint(buf); ok { + if neg { + if numAbs > -minInt64 { + return minInt64 + } + return -1 * int64(numAbs) + } else { + if numAbs > +maxInt64 { + return maxInt64 + } + return +1 * int64(numAbs) + } + } + } else if t.num != 0 { + // Handle exact integer value. + switch t.str[0] { + case 'i': + return int64(t.num) + case 'u': + if t.num > maxInt64 { + return maxInt64 + } + return int64(t.num) + } + } + + // Handle JSON number that is a floating-point value. + if t.Kind() == '0' { + switch fv := t.Float(); { + case fv >= maxInt64: + return maxInt64 + case fv <= minInt64: + return minInt64 + default: + return int64(fv) // truncation toward zero + } + } + + panic("invalid JSON token kind: " + t.Kind().String()) +} + +// Uint returns the unsigned integer value for a JSON number. +// The fractional component of any number is ignored (truncation toward zero). +// Any number beyond the representation of an uint64 will be saturated +// to the closest representable value. +// It panics if the token kind is not a JSON number. +func (t Token) Uint() uint64 { + // NOTE: This accessor returns 0 for any negative JSON number, + // which might be surprising, but is at least consistent with the behavior + // of saturating out-of-bounds numbers to the closest representable number. + + if raw := t.raw; raw != nil { + // Handle raw integer value. + if uint64(raw.previousOffsetStart()) != t.num { + panic(invalidTokenPanic) + } + neg := false + buf := raw.PreviousBuffer() + if len(buf) > 0 && buf[0] == '-' { + neg, buf = true, buf[1:] + } + if num, ok := jsonwire.ParseUint(buf); ok { + if neg { + return minUint64 + } + return num + } + } else if t.num != 0 { + // Handle exact integer value. + switch t.str[0] { + case 'u': + return t.num + case 'i': + if int64(t.num) < minUint64 { + return minUint64 + } + return uint64(int64(t.num)) + } + } + + // Handle JSON number that is a floating-point value. + if t.Kind() == '0' { + switch fv := t.Float(); { + case fv >= maxUint64: + return maxUint64 + case fv <= minUint64: + return minUint64 + default: + return uint64(fv) // truncation toward zero + } + } + + panic("invalid JSON token kind: " + t.Kind().String()) +} + +// Kind returns the token kind. +func (t Token) Kind() Kind { + switch { + case t.raw != nil: + raw := t.raw + if uint64(raw.previousOffsetStart()) != t.num { + panic(invalidTokenPanic) + } + return Kind(t.raw.buf[raw.prevStart]).normalize() + case t.num != 0: + return '0' + case len(t.str) != 0: + return '"' + default: + return invalidKind + } +} + +// Kind represents each possible JSON token kind with a single byte, +// which is conveniently the first byte of that kind's grammar +// with the restriction that numbers always be represented with '0': +// +// - 'n': null +// - 'f': false +// - 't': true +// - '"': string +// - '0': number +// - '{': object start +// - '}': object end +// - '[': array start +// - ']': array end +// +// An invalid kind is usually represented using 0, +// but may be non-zero due to invalid JSON data. +type Kind byte + +const invalidKind Kind = 0 + +// String prints the kind in a humanly readable fashion. +func (k Kind) String() string { + switch k { + case 'n': + return "null" + case 'f': + return "false" + case 't': + return "true" + case '"': + return "string" + case '0': + return "number" + case '{': + return "{" + case '}': + return "}" + case '[': + return "[" + case ']': + return "]" + default: + return "" + } +} + +// normalize coalesces all possible starting characters of a number as just '0'. +func (k Kind) normalize() Kind { + if k == '-' || ('0' <= k && k <= '9') { + return '0' + } + return k +} + +// NOTE: Value is analogous to v1 json.RawMessage. + +// Value represents a single raw JSON value, which may be one of the following: +// - a JSON literal (i.e., null, true, or false) +// - a JSON string (e.g., "hello, world!") +// - a JSON number (e.g., 123.456) +// - an entire JSON object (e.g., {"fizz":"buzz"} ) +// - an entire JSON array (e.g., [1,2,3] ) +// +// Value can represent entire array or object values, while [Token] cannot. +// Value may contain leading and/or trailing whitespace. +type Value []byte + +// Clone returns a copy of v. +func (v Value) Clone() Value { + return bytes.Clone(v) +} + +// String returns the string formatting of v. +func (v Value) String() string { + if v == nil { + return "null" + } + return string(v) +} + +// IsValid reports whether the raw JSON value is syntactically valid +// according to RFC 7493. +// +// It verifies whether the input is properly encoded as UTF-8, +// that escape sequences within strings decode to valid Unicode codepoints, and +// that all names in each object are unique. +// It does not verify whether numbers are representable within the limits +// of any common numeric type (e.g., float64, int64, or uint64). +func (v Value) IsValid() bool { + d := getBufferedDecoder(v) + defer putBufferedDecoder(d) + _, errVal := d.ReadValue() + _, errEOF := d.ReadToken() + return errVal == nil && errEOF == io.EOF +} + +// Compact removes all whitespace from the raw JSON value. +// +// It does not reformat JSON strings to use any other representation. +// It is guaranteed to succeed if the input is valid. +// If the value is already compacted, then the buffer is not mutated. +func (v *Value) Compact() error { + return v.reformat(false, false, "", "") +} + +// Indent reformats the whitespace in the raw JSON value so that each element +// in a JSON object or array begins on a new, indented line beginning with +// prefix followed by one or more copies of indent according to the nesting. +// The value does not begin with the prefix nor any indention, +// to make it easier to embed inside other formatted JSON data. +// +// It does not reformat JSON strings to use any other representation. +// It is guaranteed to succeed if the input is valid. +// If the value is already indented properly, then the buffer is not mutated. +// +// The prefix and indent strings must be composed of only spaces and/or tabs. +func (v *Value) Indent(prefix, indent string) error { + return v.reformat(false, true, prefix, indent) +} + +// Canonicalize canonicalizes the raw JSON value according to the +// JSON Canonicalization Scheme (JCS) as defined by RFC 8785 +// where it produces a stable representation of a JSON value. +// +// The output stability is dependent on the stability of the application data +// (see RFC 8785, Appendix E). It cannot produce stable output from +// fundamentally unstable input. For example, if the JSON value +// contains ephemeral data (e.g., a frequently changing timestamp), +// then the value is still unstable regardless of whether this is called. +// +// Note that JCS treats all JSON numbers as IEEE 754 double precision numbers. +// Any numbers with precision beyond what is representable by that form +// will lose their precision when canonicalized. For example, integer values +// beyond ±2⁵³ will lose their precision. It is recommended that +// int64 and uint64 data types be represented as a JSON string. +// +// It is guaranteed to succeed if the input is valid. +// If the value is already canonicalized, then the buffer is not mutated. +func (v *Value) Canonicalize() error { + return v.reformat(true, false, "", "") +} + +// TODO: Instead of implementing the v1 Marshaler/Unmarshaler, +// consider implementing the v2 versions instead. + +// MarshalJSON returns v as the JSON encoding of v. +// It returns the stored value as the raw JSON output without any validation. +// If v is nil, then this returns a JSON null. +func (v Value) MarshalJSON() ([]byte, error) { + // NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON. + if v == nil { + return []byte("null"), nil + } + return v, nil +} + +// UnmarshalJSON sets v as the JSON encoding of b. +// It stores a copy of the provided raw JSON input without any validation. +func (v *Value) UnmarshalJSON(b []byte) error { + // NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON. + if v == nil { + return errors.New("json.Value: UnmarshalJSON on nil pointer") + } + *v = append((*v)[:0], b...) + return nil +} + +// Kind returns the starting token kind. +// For a valid value, this will never include '}' or ']'. +func (v Value) Kind() Kind { + if v := v[jsonwire.ConsumeWhitespace(v):]; len(v) > 0 { + return Kind(v[0]).normalize() + } + return invalidKind +} + +func (v *Value) reformat(canonical, multiline bool, prefix, indent string) error { + // Write the entire value to reformat all tokens and whitespace. + e := getBufferedEncoder() + defer putBufferedEncoder(e) + eo := &e.s.Struct + if canonical { + eo.Flags.Set(jsonflags.AllowInvalidUTF8 | 0) // per RFC 8785, section 3.2.4 + eo.Flags.Set(jsonflags.AllowDuplicateNames | 0) // per RFC 8785, section 3.1 + eo.Flags.Set(jsonflags.CanonicalizeNumbers | 1) // per RFC 8785, section 3.2.2.3 + eo.Flags.Set(jsonflags.PreserveRawStrings | 0) // per RFC 8785, section 3.2.2.2 + eo.Flags.Set(jsonflags.EscapeForHTML | 0) // per RFC 8785, section 3.2.2.2 + eo.Flags.Set(jsonflags.EscapeForJS | 0) // per RFC 8785, section 3.2.2.2 + eo.Flags.Set(jsonflags.Multiline | 0) // per RFC 8785, section 3.2.1 + } else { + if s := strings.TrimLeft(prefix, " \t"); len(s) > 0 { + panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent prefix") + } + if s := strings.TrimLeft(indent, " \t"); len(s) > 0 { + panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent") + } + eo.Flags.Set(jsonflags.AllowInvalidUTF8 | 1) + eo.Flags.Set(jsonflags.AllowDuplicateNames | 1) + eo.Flags.Set(jsonflags.PreserveRawStrings | 1) + if multiline { + eo.Flags.Set(jsonflags.Multiline | 1) + eo.Flags.Set(jsonflags.SpaceAfterColon | 1) + eo.Flags.Set(jsonflags.Indent | 1) + eo.Flags.Set(jsonflags.IndentPrefix | 1) + eo.IndentPrefix = prefix + eo.Indent = indent + } else { + eo.Flags.Set(jsonflags.Multiline | 0) + } + } + eo.Flags.Set(jsonflags.OmitTopLevelNewline | 1) + if err := e.s.WriteValue(*v); err != nil { + return err + } + + // For canonical output, we may need to reorder object members. + if canonical { + // Obtain a buffered encoder just to use its internal buffer as + // a scratch buffer in reorderObjects for reordering object members. + e2 := getBufferedEncoder() + defer putBufferedEncoder(e2) + + // Disable redundant checks performed earlier during encoding. + d := getBufferedDecoder(e.s.Buf) + defer putBufferedDecoder(d) + d.s.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1) + reorderObjects(d, &e2.s.Buf) // per RFC 8785, section 3.2.3 + } + + // Store the result back into the value if different. + if !bytes.Equal(*v, e.s.Buf) { + *v = append((*v)[:0], e.s.Buf...) + } + return nil +} + +type memberName struct { + // name is the unescaped name. + name []byte + // before and after are byte offsets into Decoder.buf that represents + // the entire name/value pair. It may contain leading commas. + before, after int64 +} + +var memberNamePool = sync.Pool{New: func() any { return new([]memberName) }} + +func getMemberNames() *[]memberName { + ns := memberNamePool.Get().(*[]memberName) + *ns = (*ns)[:0] + return ns +} + +func putMemberNames(ns *[]memberName) { + if cap(*ns) < 1<<10 { + clear(*ns) // avoid pinning name + memberNamePool.Put(ns) + } +} + +// reorderObjects recursively reorders all object members in place +// according to the ordering specified in RFC 8785, section 3.2.3. +// +// Pre-conditions: +// - The value is valid (i.e., no decoder errors should ever occur). +// - The value is compact (i.e., no whitespace is present). +// - Initial call is provided a Decoder reading from the start of v. +// +// Post-conditions: +// - Exactly one JSON value is read from the Decoder. +// - All fully-parsed JSON objects are reordered by directly moving +// the members in the value buffer. +// +// The runtime is approximately O(n·log(n)) + O(m·log(m)), +// where n is len(v) and m is the total number of object members. +func reorderObjects(d *Decoder, scratch *[]byte) { + switch tok, _ := d.ReadToken(); tok.Kind() { + case '{': + // Iterate and collect the name and offsets for every object member. + members := getMemberNames() + defer putMemberNames(members) + var prevName []byte + isSorted := true + + beforeBody := d.InputOffset() // offset after '{' + for d.PeekKind() != '}' { + beforeName := d.InputOffset() + var flags jsonwire.ValueFlags + name, _ := d.s.ReadValue(&flags) + name = jsonwire.UnquoteMayCopy(name, flags.IsVerbatim()) + reorderObjects(d, scratch) + afterValue := d.InputOffset() + + if isSorted && len(*members) > 0 { + isSorted = jsonwire.CompareUTF16(prevName, []byte(name)) < 0 + } + *members = append(*members, memberName{name, beforeName, afterValue}) + prevName = name + } + afterBody := d.InputOffset() // offset before '}' + d.ReadToken() + + // Sort the members; return early if it's already sorted. + if isSorted { + return + } + slices.SortFunc(*members, func(x, y memberName) int { + return jsonwire.CompareUTF16(x.name, y.name) + }) + + // Append the reordered members to a new buffer, + // then copy the reordered members back over the original members. + // Avoid swapping in place since each member may be a different size + // where moving a member over a smaller member may corrupt the data + // for subsequent members before they have been moved. + // + // The following invariant must hold: + // sum([m.after-m.before for m in members]) == afterBody-beforeBody + sorted := (*scratch)[:0] + for i, member := range *members { + if d.s.buf[member.before] == ',' { + member.before++ // trim leading comma + } + sorted = append(sorted, d.s.buf[member.before:member.after]...) + if i < len(*members)-1 { + sorted = append(sorted, ',') // append trailing comma + } + } + if int(afterBody-beforeBody) != len(sorted) { + panic("BUG: length invariant violated") + } + copy(d.s.buf[beforeBody:afterBody], sorted) + + // Update scratch buffer to the largest amount ever used. + if len(sorted) > len(*scratch) { + *scratch = sorted + } + case '[': + for d.PeekKind() != ']' { + reorderObjects(d, scratch) + } + d.ReadToken() + } +}