diff --git a/experimental/transaction.go b/experimental/transaction.go new file mode 100644 index 000000000..ffe72d827 --- /dev/null +++ b/experimental/transaction.go @@ -0,0 +1,36 @@ +// Copyright 2024 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +package experimental + +import ( + "github.com/corazawaf/coraza/v3/types" +) + +type Transaction interface { + types.Transaction + + // UseRequestBody directly sets the provided byte slice as the request body buffer. + // This is meant to be used when the entire request body is available, as it avoids + // the need for an extra copy into the request body buffer. Because of this, this method + // is expected to be called just once, further calls to UseRequestBody have to be avoided. + // If the body size exceeds the limit and the action is to reject, an interruption will be returned. + // + // Note: The new internal buffer takes ownership of the provided data, the caller should NOT use b slice + // after this call. + // + // It returns the relevant interruption, the final internal body buffer length and any error that occurs. + UseRequestBody(b []byte) (*types.Interruption, int, error) + + // UseResponseBody directly sets the provided byte slice as the response body buffer. + // This is meant to be used when the entire response body is available, as it avoids + // the need for an extra copy into the response body buffer. Because of this, this method is expected to + // be called just once, further calls to UseResponseBody have to be avoided. + // If the body size exceeds the limit and the action is to reject, an interruption will be returned. + // + // Note: The new internal buffer takes ownership of the provided data, the caller should NOT use b slice + // after this call. + // + // It returns the relevant interruption, the final internal body buffer length and any error that occurs. + UseResponseBody(b []byte) (*types.Interruption, int, error) +} diff --git a/internal/corazawaf/body_buffer.go b/internal/corazawaf/body_buffer.go index ac9010e54..4b8cffe16 100644 --- a/internal/corazawaf/body_buffer.go +++ b/internal/corazawaf/body_buffer.go @@ -94,6 +94,24 @@ func (br *BodyBuffer) Write(data []byte) (n int, err error) { return br.buffer.Write(data) } +// SetBuffer sets the buffer to the provided slice of bytes. +func (br *BodyBuffer) SetBuffer(data []byte) error { + if len(data) == 0 { + return errors.New("provided data is empty") + } + + // Check if the provided data exceeds the memory limit + if int64(len(data)) > br.options.MemoryLimit { + return errors.New("memoryLimit reached while writing") + } + + // Set the buffer to the provided slice + br.buffer = bytes.NewBuffer(data) + br.length = int64(len(data)) + + return nil +} + type bodyBufferReader struct { pos int br *BodyBuffer diff --git a/internal/corazawaf/transaction.go b/internal/corazawaf/transaction.go index 2fe3585c8..3f1883f17 100644 --- a/internal/corazawaf/transaction.go +++ b/internal/corazawaf/transaction.go @@ -968,6 +968,60 @@ func (tx *Transaction) ReadRequestBodyFrom(r io.Reader) (*types.Interruption, in return tx.interruption, int(w), err } +// UseRequestBody directly sets the provided byte slice as the request body buffer. +// This is meant to be used when the entire request body is available, as it avoids +// the need for an extra copy into the request body buffer. Because of this, this method +// is expected to be called just once, further calls to UseRequestBody have to be avoided. +// If the body size exceeds the limit and the action is to reject, an interruption will be returned. +// The caller should not use b slice after this call. +// +// It returns the relevant interruption, the final internal body buffer length and any error that occurs. +func (tx *Transaction) UseRequestBody(b []byte) (*types.Interruption, int, error) { + if tx.RuleEngine == types.RuleEngineOff { + return nil, 0, nil + } + + if !tx.RequestBodyAccess { + return nil, 0, nil + } + + if tx.lastPhase >= types.PhaseRequestBody { + return nil, 0, fmt.Errorf("request body buffer set more than once, which has been already been processed") + } + + bodySize := int64(len(b)) + var runProcessRequestBody bool + + if bodySize > tx.RequestBodyLimit { + tx.variables.inboundDataError.Set("1") + if tx.WAF.RequestBodyLimitAction == types.BodyLimitActionReject { + // We interrupt this transaction in case RequestBodyLimitAction is Reject + return setAndReturnBodyLimitInterruption(tx) + } + + if tx.WAF.RequestBodyLimitAction == types.BodyLimitActionProcessPartial { + // Truncate the body slice to the configured limit + b = b[:tx.RequestBodyLimit] + bodySize = tx.RequestBodyLimit + runProcessRequestBody = true + } + } + + // Point the internal buffer to the provided slice + err := tx.requestBodyBuffer.SetBuffer(b) + if err != nil { + return nil, 0, err + } + + err = nil + if runProcessRequestBody { + tx.debugLogger.Warn().Msg("Processing request body whose size reached the configured limit (Action ProcessPartial)") + _, err = tx.ProcessRequestBody() + } + + return tx.interruption, int(bodySize), err +} + // ProcessRequestBody Performs the analysis of the request body (if any) // // It is recommended to call this method even if it is not expected to have a body. @@ -1218,6 +1272,59 @@ func (tx *Transaction) ReadResponseBodyFrom(r io.Reader) (*types.Interruption, i return tx.interruption, int(w), err } +// UseResponseBody directly sets the provided byte slice as the response body buffer. +// This is meant to be used when the entire response body is available, as it avoids +// the need for an extra copy into the response body buffer. Because of this, this method is expected to +// be called just once, further calls to UseResponseBody have to be avoided. +// If the body size exceeds the limit and the action is to reject, an interruption will be returned. +// The caller should not use b slice after this call. +// +// It returns the relevant interruption, the final internal body buffer length and any error that occurs. +func (tx *Transaction) UseResponseBody(b []byte) (*types.Interruption, int, error) { + if tx.RuleEngine == types.RuleEngineOff { + return nil, 0, nil + } + + if !tx.ResponseBodyAccess { + return nil, 0, nil + } + + if tx.lastPhase >= types.PhaseResponseBody { + return nil, 0, fmt.Errorf("response body buffer set more than once, which has been already been processed") + } + + var ( + bodySize = int64(len(b)) + runProcessResponseBody = false + ) + if bodySize >= tx.ResponseBodyLimit { + tx.variables.outboundDataError.Set("1") + if tx.WAF.ResponseBodyLimitAction == types.BodyLimitActionReject { + // We interrupt this transaction in case ResponseBodyLimitAction is Reject + return setAndReturnBodyLimitInterruption(tx) + } + + if tx.WAF.ResponseBodyLimitAction == types.BodyLimitActionProcessPartial { + // Truncate the body slice to the configured limit + b = b[:tx.ResponseBodyLimit] + bodySize = tx.ResponseBodyLimit + runProcessResponseBody = true + } + } + // Point the internal buffer to the provided slice + err := tx.responseBodyBuffer.SetBuffer(b) + if err != nil { + return nil, 0, err + } + + err = nil + if runProcessResponseBody { + tx.debugLogger.Debug().Msg("Processing response body whose size reached the configured limit (Action ProcessPartial)") + _, err = tx.ProcessResponseBody() + } + return tx.interruption, int(bodySize), err +} + // ProcessResponseBody Perform the analysis of the the response body (if any) // // It is recommended to call this method even if it is not expected to have a body. diff --git a/internal/corazawaf/transaction_test.go b/internal/corazawaf/transaction_test.go index 07881c092..2d8aeb074 100644 --- a/internal/corazawaf/transaction_test.go +++ b/internal/corazawaf/transaction_test.go @@ -12,6 +12,7 @@ import ( "strconv" "strings" "testing" + "unsafe" "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/debuglog" @@ -115,37 +116,6 @@ func TestTxMultipart(t *testing.T) { } } -func TestTxResponse(t *testing.T) { - /* - tx := NewWAF().NewTransaction() - ht := []string{ - "HTTP/1.1 200 OK", - "Content-Type: text/html", - "Last-Modified: Mon, 14 Sep 2020 21:10:42 GMT", - "Accept-Ranges: bytes", - "ETag: \"0b5f480db8ad61:0\"", - "Vary: Accept-Encoding", - "Server: Microsoft-IIS/8.5", - "Content-Security-Policy: default-src: https:; frame-ancestors 'self' X-Frame-Options: SAMEORIGIN", - "Strict-Transport-Security: max-age=31536000; includeSubDomains; preload", - "Date: Wed, 16 Sep 2020 14:14:09 GMT", - "Connection: close", - "Content-Length: 10", - "", - "testcontent", - } - data := strings.Join(ht, "\r\n") - tx.ParseResponseString(nil, data) - - exp := map[string]string{ - "%{response_headers.content-length}": "10", - "%{response_headers.server}": "Microsoft-IIS/8.5", - } - - validateMacroExpansion(exp, tx, t) - */ -} - var requestBodyWriters = map[string]func(tx *Transaction, body string) (*types.Interruption, int, error){ "WriteRequestBody": func(tx *Transaction, body string) (*types.Interruption, int, error) { return tx.WriteRequestBody([]byte(body)) @@ -158,6 +128,9 @@ var requestBodyWriters = map[string]func(tx *Transaction, body string) (*types.I strings.NewReader(body), }) }, + "UseRequestBody": func(tx *Transaction, body string) (*types.Interruption, int, error) { + return tx.UseRequestBody([]byte(body)) + }, } func TestWriteRequestBody(t *testing.T) { @@ -212,10 +185,14 @@ func TestWriteRequestBody(t *testing.T) { for _, testCase := range testCases { t.Run(testCase.name, func(t *testing.T) { - for name, writeRequestBody := range requestBodyWriters { - t.Run(name, func(t *testing.T) { + for writerName, writeRequestBody := range requestBodyWriters { + t.Run(writerName, func(t *testing.T) { for name, chunks := range bodyChunks { t.Run(name, func(t *testing.T) { + if name != "BodyInOneShot" && writerName == "UseRequestBody" { + // UseRequestBody is intended to be used only when the whole body is available + return + } waf := NewWAF() waf.RuleEngine = types.RuleEngineOn waf.RequestBodyAccess = true @@ -299,6 +276,11 @@ func TestWriteRequestBodyOnLimitReached(t *testing.T) { t.Run(tName, func(t *testing.T) { for wName, writer := range requestBodyWriters { + if wName == "UseRequestBody" { + // Skip UseRequestBody test case. It is intended to be used only when the whole body is available + // at once. This test gradually populates the body up to its limit. + continue + } t.Run(wName, func(t *testing.T) { tx := waf.NewTransaction() _, err := tx.requestBodyBuffer.Write([]byte("ab")) @@ -313,11 +295,11 @@ func TestWriteRequestBodyOnLimitReached(t *testing.T) { } if it != tCase.preexistingInterruption { - t.Fatalf("unexpected interruption") + t.Fatalf("unexpected interruption: %v", it) } if n != 0 { - t.Fatalf("unexpected number of bytes written") + t.Fatalf("unexpected number of bytes written. Expected 0, got %d", n) } if err := tx.Close(); err != nil { @@ -378,6 +360,66 @@ func TestWriteRequestBodyIsNopWhenBodyIsNotAccesible(t *testing.T) { } } +// UseRequestBody has not to be called more then once, the first call +// might already trigger the inspection and so a new buffer set might +// not be inspected anymore. If that happens, an error has to be returned. +func TestUseRequestBodyMultipleCalls(t *testing.T) { + tx := makeTransaction(t) + tx.lastPhase = 1 + tx.RequestBodyAccess = true + tx.RequestBodyLimit = 10 + tx.WAF.RequestBodyLimitAction = types.BodyLimitActionProcessPartial + body := bytes.Repeat([]byte("a"), 11) + it, n, err := tx.UseRequestBody(body) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + if it != nil { + t.Fatalf("unexpected interruption: %v", it) + } + if n != int(tx.RequestBodyLimit) { + t.Fatalf("unexpected number of bytes written. Expected %d, got %d", tx.RequestBodyLimit, n) + } + // UseRequestBody should not generate a copy of the data, + // body and tx.Buffer are expected to point to the same data + bodyPtr := unsafe.SliceData(body) + txBufferPtr := &tx.requestBodyBuffer.buffer.Bytes()[0] + if bodyPtr != txBufferPtr { + t.Fatalf("body and tx.Buffer are not pointing to the same data") + } + // Second call to UseRequestBody with phase 2 processed should trigger an error + _, _, err = tx.UseRequestBody(body) + if err == nil { + t.Fatalf("expected error calling UseRequestBody twice with phase 2 processed in between") + } +} + +func BenchmarkUseRequestBody(b *testing.B) { + body := bytes.Repeat([]byte("A"), 10*1024*1024) + + b.Run("WriteRequestBody", func(b *testing.B) { + for i := 0; i < b.N; i++ { + tx := makeTransaction(b) + tx.lastPhase = 1 + tx.RequestBodyAccess = true + tx.RequestBodyLimit = 11 * 1024 * 1024 + _, _, _ = tx.WriteRequestBody(body) + } + b.ReportAllocs() + }) + + b.Run("UseRequestBody", func(b *testing.B) { + for i := 0; i < b.N; i++ { + tx := makeTransaction(b) + tx.lastPhase = 1 + tx.RequestBodyAccess = true + tx.RequestBodyLimit = 11 * 1024 * 1024 + _, _, _ = tx.UseRequestBody(body) + } + b.ReportAllocs() + }) +} + func TestResponseHeader(t *testing.T) { tx := makeTransaction(t) tx.AddResponseHeader("content-type", "test") @@ -480,6 +522,9 @@ var responseBodyWriters = map[string]func(tx *Transaction, body string) (*types. strings.NewReader(body), }) }, + "UseResponseBody": func(tx *Transaction, body string) (*types.Interruption, int, error) { + return tx.UseResponseBody([]byte(body)) + }, } func TestWriteResponseBody(t *testing.T) { @@ -531,10 +576,14 @@ func TestWriteResponseBody(t *testing.T) { for _, testCase := range testCases { t.Run(testCase.name, func(t *testing.T) { - for name, writeResponseBody := range responseBodyWriters { - t.Run(name, func(t *testing.T) { + for writerName, writeResponseBody := range responseBodyWriters { + t.Run(writerName, func(t *testing.T) { for name, chunks := range bodyChunks { t.Run(name, func(t *testing.T) { + if name != "BodyInOneShot" && writerName == "UseResponseBody" { + // UseResponseBody is intended to be used only when the whole body is available + return + } waf := NewWAF() waf.RuleEngine = types.RuleEngineOn waf.ResponseBodyMimeTypes = []string{"text/plain"} @@ -622,6 +671,11 @@ func TestWriteResponseBodyOnLimitReached(t *testing.T) { t.Run(tName, func(t *testing.T) { for wName, writer := range responseBodyWriters { + if wName == "UseResponseBody" { + // Skip UseResponseBody test case. It is intended to be used only when the whole body is available + // at once. This test gradually populates the body up to its limit. + continue + } t.Run(wName, func(t *testing.T) { tx := waf.NewTransaction() _, err := tx.responseBodyBuffer.Write([]byte("ab")) @@ -701,6 +755,66 @@ func TestWriteResponseBodyIsNopWhenBodyIsNotAccesible(t *testing.T) { } } +// UseResponseBody has not to be called more then once, the first call +// might already trigger the inspection and so a new buffer set might +// not be inspected anymore. If that happens, an error has to be returned. +func TestUseResponseBodyMultipleCalls(t *testing.T) { + tx := makeTransaction(t) + tx.lastPhase = 3 + tx.ResponseBodyAccess = true + tx.ResponseBodyLimit = 10 + tx.WAF.ResponseBodyLimitAction = types.BodyLimitActionProcessPartial + body := bytes.Repeat([]byte("a"), 11) + it, n, err := tx.UseResponseBody(body) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + if it != nil { + t.Fatalf("unexpected interruption: %v", it) + } + if n != int(tx.ResponseBodyLimit) { + t.Fatalf("unexpected number of bytes written. Expected %d, got %d", tx.ResponseBodyLimit, n) + } + // UseResponseBody should not generate a copy of the data, + // body and tx.Buffer are expected to point to the same data + bodyPtr := unsafe.SliceData(body) + txBufferPtr := &tx.responseBodyBuffer.buffer.Bytes()[0] + if bodyPtr != txBufferPtr { + t.Fatalf("body and tx.Buffer are not pointing to the same data") + } + // Second call to UseResponseBody with phase 4 processed should trigger an error + _, _, err = tx.UseResponseBody(body) + if err == nil { + t.Fatalf("expected error calling UseRequestBody twice with phase 2 processed in between") + } +} + +func BenchmarkUseResponseBody(b *testing.B) { + body := bytes.Repeat([]byte("A"), 10*1024*1024) + + b.Run("WriteResponseBody", func(b *testing.B) { + for i := 0; i < b.N; i++ { + tx := makeTransaction(b) + tx.lastPhase = 3 + tx.ResponseBodyAccess = true + tx.ResponseBodyLimit = 11 * 1024 * 1024 + _, _, _ = tx.WriteRequestBody(body) + } + b.ReportAllocs() + }) + + b.Run("UseResponseBody", func(b *testing.B) { + for i := 0; i < b.N; i++ { + tx := makeTransaction(b) + tx.lastPhase = 3 + tx.ResponseBodyAccess = true + tx.ResponseBodyLimit = 11 * 1024 * 1024 + _, _, _ = tx.UseResponseBody(body) + } + b.ReportAllocs() + }) +} + func TestAuditLogFields(t *testing.T) { tx := makeTransaction(t) tx.AuditLogParts = types.AuditLogParts("ABCDEFGHIJK") diff --git a/types/transaction.go b/types/transaction.go index b16448a20..c9a2cffb9 100644 --- a/types/transaction.go +++ b/types/transaction.go @@ -104,6 +104,9 @@ type Transaction interface { // It returns the corresponding interruption, the number of bytes written an error if any. ReadRequestBodyFrom(io.Reader) (*Interruption, int, error) + // Corrently exposed only as experimental + // UseRequestBody(b []byte) (*Interruption, int, error) + // AddResponseHeader Adds a response header variable // // With this method it is possible to feed Coraza with a response header. @@ -147,6 +150,9 @@ type Transaction interface { // It returns the corresponding interruption, the number of bytes written an error if any. ReadResponseBodyFrom(io.Reader) (*Interruption, int, error) + // Corrently exposed only as experimental + // UseResponseBody(b []byte) (*Interruption, int, error) + // ProcessLogging Logging all information relative to this transaction. // At this point there is not need to hold the connection, the response can be // delivered prior to the execution of this method.