Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UTF8: Content negotiation (encoding side) #2

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
bd0376d
UTF-8 support in validation, and some parsers and formatters (#537)
ywwg Jan 23, 2024
18b3319
UTF-8 support in metric and label names
ywwg Aug 30, 2023
3c00867
Try the library-wide setting
ywwg Dec 4, 2023
5977639
Further cleanup
ywwg Dec 4, 2023
920df27
Use go quoting -- did we promise we'd do this in the design?
ywwg Dec 4, 2023
772b97e
even yet still more lint
ywwg Dec 4, 2023
22d6f4f
Use expfmt.writeEscapedString instead of strconv.Quote
ywwg Dec 14, 2023
f5b977d
fix error type check
ywwg Dec 14, 2023
61a48a3
start on content negotiation
ywwg Dec 15, 2023
ab55242
progress on content negotiation, we are mixed up in a bunch of places
ywwg Dec 18, 2023
69b8725
remove escaping from MetricFamilyToOpenMetrics and MetricFamilyToText
ywwg Dec 18, 2023
e4a4454
Add testing for metric family escaping
ywwg Dec 19, 2023
db86aac
another case and fix a thing
ywwg Dec 19, 2023
a3ec40e
add a proto format test
ywwg Dec 19, 2023
359a3e6
cleanup
ywwg Dec 19, 2023
fde90cb
unescaping! tests!
ywwg Dec 19, 2023
bc2df78
I think this is all wrong but we are going to save it for now
ywwg Dec 20, 2023
d76d636
revert the unescaping stuff
ywwg Dec 21, 2023
b1c6b6f
notes
ywwg Dec 21, 2023
b049ab5
cleanup for review
ywwg Dec 28, 2023
13e99f9
consistent name
ywwg Dec 28, 2023
bbeefd7
don't need
ywwg Dec 28, 2023
3321000
change default escaping to values
ywwg Dec 28, 2023
072fbe3
some debugging and fix a test
ywwg Jan 9, 2024
8da0659
get rid of escaping mech via headers, it doesn't make sense
ywwg Jan 9, 2024
b85b2ea
escaping scheme is a default, not passed in via header
ywwg Jan 9, 2024
9d244f1
rename
ywwg Jan 10, 2024
2b7ac01
UTF-8 support in metric and label names
ywwg Aug 30, 2023
7212730
cleanup
ywwg Jan 11, 2024
dfcd4af
lint and imports
ywwg Jan 18, 2024
f4d7f87
merge cleanups
ywwg Jan 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion config/http_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
"sync"
"time"

"github.com/mwitkow/go-conntrack"
conntrack "github.com/mwitkow/go-conntrack"
"golang.org/x/net/http/httpproxy"
"golang.org/x/net/http2"
"golang.org/x/oauth2"
Expand Down
13 changes: 9 additions & 4 deletions expfmt/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,15 @@ func ResponseFormat(h http.Header) Format {
return FmtProtoDelim

case textType:
if v, ok := params["version"]; ok && v != TextVersion {
if v, ok := params["version"]; ok {
if v == TextVersion_0_0_4 {
return FmtText_0_0_4
} else if v == TextVersion_1_0_0 {
return FmtText_1_0_0
}
return FmtUnknown
}
return FmtText
return FmtText_0_0_4
}

return FmtUnknown
Expand All @@ -73,8 +78,8 @@ func ResponseFormat(h http.Header) Format {
// NewDecoder returns a new decoder based on the given input format.
// If the input format does not imply otherwise, a text format decoder is returned.
func NewDecoder(r io.Reader, format Format) Decoder {
switch format {
case FmtProtoDelim:
switch format.FormatType() {
case TypeProtoDelim:
return &protoDecoder{r: r}
}
return &textDecoder{r: r}
Expand Down
62 changes: 56 additions & 6 deletions expfmt/decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"bufio"
"errors"
"io"
"math"
"net/http"
"reflect"
"sort"
Expand Down Expand Up @@ -104,9 +105,10 @@ func TestProtoDecoder(t *testing.T) {
testTime := model.Now()

scenarios := []struct {
in string
expected model.Vector
fail bool
in string
expected model.Vector
legacyNameFail bool
fail bool
}{
{
in: "",
Expand Down Expand Up @@ -332,6 +334,30 @@ func TestProtoDecoder(t *testing.T) {
},
},
},
{
in: "\xa8\x01\n\ngauge.name\x12\x11gauge\ndoc\nstr\"ing\x18\x01\"T\n\x1b\n\x06name.1\x12\x11val with\nnew line\n*\n\x06name*2\x12 val with \\backslash and \"quotes\"\x12\t\t\x00\x00\x00\x00\x00\x00\xf0\x7f\"/\n\x10\n\x06name.1\x12\x06Björn\n\x10\n\x06name*2\x12\x06佖佥\x12\t\t\xd1\xcfD\xb9\xd0\x05\xc2H",
legacyNameFail: true,
expected: model.Vector{
&model.Sample{
Metric: model.Metric{
model.MetricNameLabel: "gauge.name",
"name.1": "val with\nnew line",
"name*2": "val with \\backslash and \"quotes\"",
},
Value: model.SampleValue(math.Inf(+1)),
Timestamp: testTime,
},
&model.Sample{
Metric: model.Metric{
model.MetricNameLabel: "gauge.name",
"name.1": "Björn",
"name*2": "佖佥",
},
Value: 3.14e42,
Timestamp: testTime,
},
},
},
}

for i, scenario := range scenarios {
Expand All @@ -344,11 +370,31 @@ func TestProtoDecoder(t *testing.T) {

var all model.Vector
for {
model.NameValidationScheme = model.LegacyValidation
var smpls model.Vector
err := dec.Decode(&smpls)
if err != nil && errors.Is(err, io.EOF) {
break
}
if scenario.legacyNameFail {
if err == nil {
t.Fatal("Expected error when decoding without UTF-8 support enabled but got none")
}
model.NameValidationScheme = model.UTF8Validation
dec = &SampleDecoder{
Dec: &protoDecoder{r: strings.NewReader(scenario.in)},
Opts: &DecodeOptions{
Timestamp: testTime,
},
}
err = dec.Decode(&smpls)
if errors.Is(err, io.EOF) {
break
}
if err != nil {
t.Fatalf("Unexpected error when decoding with UTF-8 support: %v", err)
}
}
if scenario.fail {
if err == nil {
t.Fatal("Expected error but got none")
Expand Down Expand Up @@ -385,13 +431,17 @@ func testDiscriminatorHTTPHeader(t testing.TB) {
input: map[string]string{"Content-Type": `application/vnd.google.protobuf; proto="io.prometheus.client.MetricFamily"; encoding="illegal"`},
output: FmtUnknown,
},
{
input: map[string]string{"Content-Type": `text/plain; version=1.0.0`},
output: FmtText_1_0_0,
},
{
input: map[string]string{"Content-Type": `text/plain; version=0.0.4`},
output: FmtText,
output: FmtText_0_0_4,
},
{
input: map[string]string{"Content-Type": `text/plain`},
output: FmtText,
output: FmtText_0_0_4,
},
{
input: map[string]string{"Content-Type": `text/plain; version=0.0.3`},
Expand Down Expand Up @@ -501,7 +551,7 @@ func TestTextDecoderWithBufioReader(t *testing.T) {

var decoded bool
r := bufio.NewReader(strings.NewReader(example))
dec := NewDecoder(r, FmtText)
dec := NewDecoder(r, FmtText_0_0_4)
for {
var mf dto.MetricFamily
if err := dec.Decode(&mf); err != nil {
Expand Down
87 changes: 57 additions & 30 deletions expfmt/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import (
"google.golang.org/protobuf/encoding/protodelim"
"google.golang.org/protobuf/encoding/prototext"

"github.com/prometheus/common/model"

"github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg"

dto "github.com/prometheus/client_model/go"
Expand Down Expand Up @@ -64,20 +66,31 @@ func Negotiate(h http.Header) Format {
for _, ac := range goautoneg.ParseAccept(h.Get(hdrAccept)) {
ver := ac.Params["version"]
if ac.Type+"/"+ac.SubType == ProtoType && ac.Params["proto"] == ProtoProtocol {
utf8Suffix := Format("")
if ac.Params["validchars"] == UTF8Valid {
utf8Suffix = FmtUTF8Param
}

switch ac.Params["encoding"] {
case "delimited":
return FmtProtoDelim
return FmtProtoDelim + utf8Suffix
case "text":
return FmtProtoText
return FmtProtoText + utf8Suffix
case "compact-text":
return FmtProtoCompact
return FmtProtoCompact + utf8Suffix
}
}
if ac.Type == "text" && ac.SubType == "plain" && (ver == TextVersion || ver == "") {
return FmtText
if ac.Type == "text" && ac.SubType == "plain" && (ver == TextVersion_0_0_4 || ver == TextVersion_1_0_0 || ver == "") {
if ver == TextVersion_1_0_0 {
if ac.Params["validchars"] == UTF8Valid {
return FmtText_1_0_0 + FmtUTF8Param
}
return FmtText_1_0_0
}
return FmtText_0_0_4
}
}
return FmtText
return FmtText_0_0_4
}

// NegotiateIncludingOpenMetrics works like Negotiate but includes
Expand All @@ -88,26 +101,44 @@ func NegotiateIncludingOpenMetrics(h http.Header) Format {
for _, ac := range goautoneg.ParseAccept(h.Get(hdrAccept)) {
ver := ac.Params["version"]
if ac.Type+"/"+ac.SubType == ProtoType && ac.Params["proto"] == ProtoProtocol {
utf8Suffix := Format("")
if ac.Params["validchars"] == UTF8Valid {
utf8Suffix = FmtUTF8Param
}

switch ac.Params["encoding"] {
case "delimited":
return FmtProtoDelim
return FmtProtoDelim + utf8Suffix
case "text":
return FmtProtoText
return FmtProtoText + utf8Suffix
case "compact-text":
return FmtProtoCompact
return FmtProtoCompact + utf8Suffix
}
}
if ac.Type == "text" && ac.SubType == "plain" && (ver == TextVersion_1_0_0 || ver == "") {
if ac.Params["validchars"] == UTF8Valid {
return FmtText_1_0_0 + FmtUTF8Param
}
return FmtText_0_0_4
}
if ac.Type == "text" && ac.SubType == "plain" && (ver == TextVersion || ver == "") {
return FmtText
if ac.Type == "text" && ac.SubType == "plain" && (ver == TextVersion_0_0_4 || ver == "") {
return FmtText_0_0_4
}
if ac.Type+"/"+ac.SubType == OpenMetricsType && (ver == OpenMetricsVersion_0_0_1 || ver == OpenMetricsVersion_1_0_0 || ver == "") {
if ver == OpenMetricsVersion_1_0_0 {
if ac.Type+"/"+ac.SubType == OpenMetricsType && (ver == OpenMetricsVersion_0_0_1 || ver == OpenMetricsVersion_1_0_0 || ver == OpenMetricsVersion_2_0_0 || ver == "") {
switch ver {
case OpenMetricsVersion_2_0_0:
if ac.Params["validchars"] == UTF8Valid {
return FmtOpenMetrics_2_0_0 + FmtUTF8Param
}
return FmtOpenMetrics_2_0_0
case OpenMetricsVersion_1_0_0:
return FmtOpenMetrics_1_0_0
default:
return FmtOpenMetrics_0_0_1
}
return FmtOpenMetrics_0_0_1
}
}
return FmtText
return FmtText_0_0_4
}

// NewEncoder returns a new encoder based on content type negotiation. All
Expand All @@ -116,44 +147,40 @@ func NegotiateIncludingOpenMetrics(h http.Header) Format {
// for FmtOpenMetrics, but a future (breaking) release will add the Close method
// to the Encoder interface directly. The current version of the Encoder
// interface is kept for backwards compatibility.
// In cases where the Format does not allow for UTF8 names, the global
// NameEscapingScheme will be applied.
func NewEncoder(w io.Writer, format Format) Encoder {
escapingScheme := format.ToEscapingScheme()

switch format {
case FmtProtoDelim:
return encoderCloser{
encode: func(v *dto.MetricFamily) error {
_, err := protodelim.MarshalTo(w, v)
return err
},
close: func() error { return nil },
}
case FmtProtoCompact:
return encoderCloser{
encode: func(v *dto.MetricFamily) error {
_, err := fmt.Fprintln(w, v.String())
_, err := fmt.Fprintln(w, model.EscapeMetricFamily(v, escapingScheme).String())
return err
},
close: func() error { return nil },
}
case FmtProtoText:
case TypeProtoText:
return encoderCloser{
encode: func(v *dto.MetricFamily) error {
_, err := fmt.Fprintln(w, prototext.Format(v))
_, err := fmt.Fprintln(w, prototext.Format(model.EscapeMetricFamily(v, escapingScheme)))
return err
},
close: func() error { return nil },
}
case FmtText:
case TypeTextPlain:
return encoderCloser{
encode: func(v *dto.MetricFamily) error {
_, err := MetricFamilyToText(w, v)
_, err := MetricFamilyToText(w, model.EscapeMetricFamily(v, escapingScheme))
return err
},
close: func() error { return nil },
}
case FmtOpenMetrics_0_0_1, FmtOpenMetrics_1_0_0:
case TypeOpenMetrics:
return encoderCloser{
encode: func(v *dto.MetricFamily) error {
_, err := MetricFamilyToOpenMetrics(w, v)
_, err := MetricFamilyToOpenMetrics(w, model.EscapeMetricFamily(v, escapingScheme))
return err
},
close: func() error {
Expand Down
Loading