diff --git a/buildtools/bdwgc/Dockerfile b/buildtools/bdwgc/Dockerfile new file mode 100644 index 0000000..44dd934 --- /dev/null +++ b/buildtools/bdwgc/Dockerfile @@ -0,0 +1,22 @@ +# Copyright The OWASP Coraza contributors +# SPDX-License-Identifier: Apache-2.0 + +FROM ghcr.io/corazawaf/coraza-proxy-wasm/buildtools-wasi-sdk:main + +RUN apt-get install -y autogen autoconf automake libtool patch + +RUN mkdir -p /bdwgc && curl -L https://github.com/ivmai/bdwgc/archive/refs/tags/v8.2.2.tar.gz | tar -xz --strip-components 1 -C /bdwgc +WORKDIR /bdwgc +ADD bdwgc.patch bdwgc.patch +RUN patch -p1 < bdwgc.patch +RUN ./autogen.sh + +# -D_WASI_EMULATED_MMAN allows wasi-libc's mmap header files to be included. We do not actually use +# the library itself, instead reimplementing in mmap.go +ENV CFLAGS ${CFLAGS} -D_WASI_EMULATED_MMAN + +# host is required by configure but not used so set it arbitrarily +RUN ./configure --disable-threads --enable-mmap --disable-shared --disable-gcj-support --disable-java-finalization --disable-atomic-uncollectible --host=i686-pc-linux-gnu +RUN make + +CMD ["cp", "/bdwgc/.libs/libgc.a", "/out/"] \ No newline at end of file diff --git a/buildtools/bdwgc/bdwgc.patch b/buildtools/bdwgc/bdwgc.patch new file mode 100644 index 0000000..ea44e9c --- /dev/null +++ b/buildtools/bdwgc/bdwgc.patch @@ -0,0 +1,109 @@ +diff --git a/alloc.c b/alloc.c +index a53e12c0..6e7be764 100644 +--- a/alloc.c ++++ b/alloc.c +@@ -19,7 +19,7 @@ + #include "private/gc_priv.h" + + #include +-#if !defined(MACOS) && !defined(MSWINCE) ++#if !defined(MACOS) && !defined(MSWINCE) && !defined(WASI) + # include + # if !defined(GC_NO_TYPES) && !defined(SN_TARGET_PSP2) \ + && !defined(__CC_ARM) +diff --git a/include/private/gc_priv.h b/include/private/gc_priv.h +index 2ffd4dd0..28ed5100 100644 +--- a/include/private/gc_priv.h ++++ b/include/private/gc_priv.h +@@ -838,7 +838,9 @@ EXTERN_C_END + # endif + #endif /* DARWIN */ + +-#include ++#if !defined(GC_NO_SIGSETJMP) ++# include ++#endif + + #if __STDC_VERSION__ >= 201112L + # include /* for static_assert */ +diff --git a/include/private/gcconfig.h b/include/private/gcconfig.h +index 2de01afe..cf25e78c 100644 +--- a/include/private/gcconfig.h ++++ b/include/private/gcconfig.h +@@ -703,6 +703,11 @@ EXTERN_C_BEGIN + # define I386 + # define mach_type_known + # endif ++# if defined(__wasi__) ++# define WASI ++# define I386 ++# define mach_type_known ++#endif + + /* Feel free to add more clauses here */ + +@@ -1425,6 +1430,21 @@ EXTERN_C_BEGIN + # define USE_MMAP_ANON /* avoid /dev/zero, not supported */ + # define STACK_GROWS_DOWN + # endif ++# ifdef WASI ++ extern unsigned char __global_base; ++ extern unsigned char __heap_base; ++# define OS_TYPE "WASI" ++# define DATASTART ((ptr_t)&__global_base) ++# define DATAEND ((ptr_t)&__heap_base) ++# define STACKBOTTOM ((ptr_t)&__global_base) ++# define GETPAGESIZE() 65536 ++# define USE_MMAP_ANON /* avoid /dev/zero, not supported */ ++# define GC_NO_SIGSETJMP 1 ++# define NO_CLOCK 1 ++# ifndef HBLKSIZE ++# define HBLKSIZE 4096 ++# endif ++# endif + # if defined(__QNX__) + # define OS_TYPE "QNX" + # define SA_RESTART 0 +diff --git a/misc.c b/misc.c +index 64eeb7e4..273a1fa7 100644 +--- a/misc.c ++++ b/misc.c +@@ -20,7 +20,7 @@ + #include + #include + +-#ifndef MSWINCE ++#if !defined(MSWINCE) && !defined(WASI) + # include + #endif + +diff --git a/os_dep.c b/os_dep.c +index e116ad02..4bd9031c 100644 +--- a/os_dep.c ++++ b/os_dep.c +@@ -27,14 +27,14 @@ + #endif + + #include +-#if defined(MSWINCE) || defined(SN_TARGET_PS3) ++#if defined(MSWINCE) || defined(SN_TARGET_PS3) || defined(WASI) + # define SIGSEGV 0 /* value is irrelevant */ + #else + # include + #endif + + #if defined(UNIX_LIKE) || defined(CYGWIN32) || defined(NACL) \ +- || defined(SYMBIAN) ++ || defined(SYMBIAN) || defined(WASI) + # include + #endif + +@@ -2668,7 +2668,7 @@ static void block_unmap_inner(ptr_t start_addr, size_t len) + if (madvise(start_addr, len, MADV_DONTNEED) == -1) + ABORT_ON_REMAP_FAIL("unmap: madvise", start_addr, len); + # endif +-# elif defined(EMSCRIPTEN) ++# elif defined(EMSCRIPTEN) || defined(WASI) + /* Nothing to do, mmap(PROT_NONE) is not supported and */ + /* mprotect() is just a no-op. */ + # else diff --git a/buildtools/tinygo/wasi-libc.Dockerfile b/buildtools/tinygo/wasi-libc.Dockerfile index 51445cb..06897f6 100644 --- a/buildtools/tinygo/wasi-libc.Dockerfile +++ b/buildtools/tinygo/wasi-libc.Dockerfile @@ -5,6 +5,7 @@ FROM ghcr.io/corazawaf/coraza-proxy-wasm/buildtools-wasi-sdk:main RUN apt-get install -y git +# https://github.com/tinygo-org/tinygo/pull/3280 RUN git clone https://github.com/anuraaga/tinygo --branch wasm-stacks-nogc WORKDIR /tinygo RUN git fetch origin wasm-stacks-nogc && git reset --hard e2da8f6f0f5cf5a75a384bbddae80c1b8a84eca7 diff --git a/init_tinygo.go b/init_tinygo.go index aacf1f8..a8a1981 100644 --- a/init_tinygo.go +++ b/init_tinygo.go @@ -7,5 +7,5 @@ package main import _ "github.com/corazawaf/coraza-proxy-wasm/internal/agc" -// #cgo LDFLAGS: lib/libinjection.a lib/libre2.a lib/libcre2.a lib/libc++.a lib/libc++abi.a lib/libclang_rt.builtins-wasm32.a lib/libaho_corasick.a lib/libmimalloc.a +// #cgo LDFLAGS: lib/libinjection.a lib/libre2.a lib/libcre2.a lib/libc++.a lib/libc++abi.a lib/libclang_rt.builtins-wasm32.a lib/libaho_corasick.a lib/libmimalloc.a lib/libgc.a import "C" diff --git a/internal/agc/doc.go b/internal/agc/doc.go index 7c3b575..fa5dcda 100644 --- a/internal/agc/doc.go +++ b/internal/agc/doc.go @@ -1,15 +1,12 @@ // Copyright The OWASP Coraza contributors // SPDX-License-Identifier: Apache-2.0 -// Package agc is a custom gargabe gollector for TinyGo. The main difference is instead of taking -// ownership of the entire process heap, it uses malloc to allocate blocks for the GC to then -// assign to allocated objects. +// Package agc is a custom garbage collector for TinyGo. It delegates to bdwgc for actual +// allocation and collection. // // Unfortunately, we must rely on a package init() method for initializing the heap because we // cannot override TinyGo's initHeap function that normally does it. This means initialization // order matters, this package should be the first package to be initialized - any packages // initialized before cannot allocate memory. For that reason, we have named this agc instead // of gc. -// -// Currently, only one block can be allocated meaning this has a fixed-size heap. package agc diff --git a/internal/agc/gc_conservative.go b/internal/agc/gc_conservative.go index 80fa675..85f6e62 100644 --- a/internal/agc/gc_conservative.go +++ b/internal/agc/gc_conservative.go @@ -40,161 +40,42 @@ import ( "unsafe" ) -const gcDebug = false +/* +void onCollectionEvent(); +*/ +import "C" -// disable assertions for the garbage collector -const gcAsserts = false +//export GC_malloc +func GC_malloc(size uintptr) unsafe.Pointer -// disable assertions for the scheduler -const schedulerAsserts = false +//export GC_add_roots +func GC_add_roots(from uintptr, to uintptr) -// Some globals + constants for the entire GC. +//export GC_clear_roots +func GC_clear_roots() -const ( - heapSize = 128 * 1024 * 1024 - wordsPerBlock = 4 // number of pointers in an allocated block - bytesPerBlock = wordsPerBlock * unsafe.Sizeof(heapStart) - stateBits = 2 // how many bits a block state takes (see blockState type) - blocksPerStateByte = 8 / stateBits - markStackSize = 4 * unsafe.Sizeof((*int)(nil)) // number of to-be-marked blocks to queue before forcing a rescan -) +//export GC_gcollect +func GC_gcollect() -var ( - heapStart uintptr // start of the heap - heapEnd uintptr // end of the heap (exclusive) - - metadataStart unsafe.Pointer // pointer to the start of the heap metadata - nextAlloc gcBlock // the next block that should be tried by the allocator - endBlock gcBlock // the block just past the end of the available space - gcTotalAlloc uint64 // total number of bytes allocated - gcMallocs uint64 // total number of allocations - gcFrees uint64 // total number of objects freed -) +//export GC_get_all_interior_pointers +func GC_get_all_interior_pointers() int32 -// zeroSizedAlloc is just a sentinel that gets returned when allocating 0 bytes. -var zeroSizedAlloc uint8 - -// Provide some abstraction over heap blocks. - -// blockState stores the four states in which a block can be. It is two bits in -// size. -type blockState uint8 +//export GC_set_on_collection_event +func GC_set_on_collection_event(f unsafe.Pointer) const ( - blockStateFree blockState = 0 // 00 - blockStateHead blockState = 1 // 01 - blockStateTail blockState = 2 // 10 - blockStateMark blockState = 3 // 11 - blockStateMask blockState = 3 // 11 + gcEventStart = 0 + gcEventEnd = 5 ) -// String returns a human-readable version of the block state, for debugging. -func (s blockState) String() string { - switch s { - case blockStateFree: - return "free" - case blockStateHead: - return "head" - case blockStateTail: - return "tail" - case blockStateMark: - return "mark" - default: - // must never happen - return "!err" - } -} - -// The block number in the pool. -type gcBlock uintptr - -// blockFromAddr returns a block given an address somewhere in the heap (which -// might not be heap-aligned). -func blockFromAddr(addr uintptr) gcBlock { - if gcAsserts && (addr < heapStart || addr >= uintptr(metadataStart)) { - panic("gc: trying to get block from invalid address") - } - return gcBlock((addr - heapStart) / bytesPerBlock) -} - -// Return a pointer to the start of the allocated object. -func (b gcBlock) pointer() unsafe.Pointer { - return unsafe.Pointer(b.address()) -} - -// Return the address of the start of the allocated object. -func (b gcBlock) address() uintptr { - addr := heapStart + uintptr(b)*bytesPerBlock - if gcAsserts && addr > uintptr(metadataStart) { - panic("gc: block pointing inside metadata") - } - return addr -} - -// findHead returns the head (first block) of an object, assuming the block -// points to an allocated object. It returns the same block if this block -// already points to the head. -func (b gcBlock) findHead() gcBlock { - for b.state() == blockStateTail { - b-- - } - if gcAsserts { - if b.state() != blockStateHead && b.state() != blockStateMark { - panic("gc: found tail without head") - } - } - return b -} - -// findNext returns the first block just past the end of the tail. This may or -// may not be the head of an object. -func (b gcBlock) findNext() gcBlock { - if b.state() == blockStateHead || b.state() == blockStateMark { - b++ - } - for b.address() < uintptr(metadataStart) && b.state() == blockStateTail { - b++ - } - return b -} - -// State returns the current block state. -func (b gcBlock) state() blockState { - stateBytePtr := (*uint8)(unsafe.Pointer(uintptr(metadataStart) + uintptr(b/blocksPerStateByte))) - return blockState(*stateBytePtr>>((b%blocksPerStateByte)*stateBits)) & blockStateMask -} - -// setState sets the current block to the given state, which must contain more -// bits than the current state. Allowed transitions: from free to any state and -// from head to mark. -func (b gcBlock) setState(newState blockState) { - stateBytePtr := (*uint8)(unsafe.Pointer(uintptr(metadataStart) + uintptr(b/blocksPerStateByte))) - *stateBytePtr |= uint8(newState << ((b % blocksPerStateByte) * stateBits)) - if gcAsserts && b.state() != newState { - panic("gc: setState() was not successful") - } -} - -// markFree sets the block state to free, no matter what state it was in before. -func (b gcBlock) markFree() { - stateBytePtr := (*uint8)(unsafe.Pointer(uintptr(metadataStart) + uintptr(b/blocksPerStateByte))) - *stateBytePtr &^= uint8(blockStateMask << ((b % blocksPerStateByte) * stateBits)) - if gcAsserts && b.state() != blockStateFree { - panic("gc: markFree() was not successful") - } -} - -// unmark changes the state of the block from mark to head. It must be marked -// before calling this function. -func (b gcBlock) unmark() { - if gcAsserts && b.state() != blockStateMark { - panic("gc: unmark() on a block that is not marked") - } - clearMask := blockStateMask ^ blockStateHead // the bits to clear from the state - stateBytePtr := (*uint8)(unsafe.Pointer(uintptr(metadataStart) + uintptr(b/blocksPerStateByte))) - *stateBytePtr &^= uint8(clearMask << ((b % blocksPerStateByte) * stateBits)) - if gcAsserts && b.state() != blockStateHead { - panic("gc: unmark() was not successful") +//export onCollectionEvent +func onCollectionEvent(eventType uint32) { + switch eventType { + case gcEventStart: + GC_add_roots(globalsStart, globalsEnd) + addStackRoots() + case gcEventEnd: + GC_clear_roots() } } @@ -202,46 +83,8 @@ func (b gcBlock) unmark() { // No memory may be allocated before this is called. That means the runtime and // any packages the runtime depends upon may not allocate memory during package // initialization. -// -//go:linkname initHeap runtime.initHeap func init() { - heapStart = uintptr(libc_malloc(heapSize)) - heapEnd = heapStart + heapSize - calculateHeapAddresses() - - // Set all block states to 'free'. - metadataSize := heapEnd - uintptr(metadataStart) - memzero(unsafe.Pointer(metadataStart), metadataSize) -} - -// calculateHeapAddresses initializes variables such as metadataStart and -// numBlock based on heapStart and heapEnd. -// -// This function can be called again when the heap size increases. The caller is -// responsible for copying the metadata to the new location. -func calculateHeapAddresses() { - totalSize := heapEnd - heapStart - - // Allocate some memory to keep 2 bits of information about every block. - metadataSize := (totalSize + blocksPerStateByte*bytesPerBlock) / (1 + blocksPerStateByte*bytesPerBlock) - metadataStart = unsafe.Pointer(heapEnd - metadataSize) - - // Use the rest of the available memory as heap. - numBlocks := (uintptr(metadataStart) - heapStart) / bytesPerBlock - endBlock = gcBlock(numBlocks) - if gcDebug { - println("heapStart: ", heapStart) - println("heapEnd: ", heapEnd) - println("total size: ", totalSize) - println("metadata size: ", metadataSize) - println("metadataStart: ", metadataStart) - println("# of blocks: ", numBlocks) - println("# of block states:", metadataSize*blocksPerStateByte) - } - if gcAsserts && metadataSize*blocksPerStateByte < numBlocks { - // sanity check - panic("gc: metadata array is too small") - } + GC_set_on_collection_event(C.onCollectionEvent) } // alloc tries to find some free space on the heap, possibly doing a garbage @@ -249,326 +92,14 @@ func calculateHeapAddresses() { // //go:linkname alloc runtime.alloc func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { - if size == 0 { - return unsafe.Pointer(&zeroSizedAlloc) - } - - gcTotalAlloc += uint64(size) - gcMallocs++ - - neededBlocks := (size + (bytesPerBlock - 1)) / bytesPerBlock - - // Continue looping until a run of free blocks has been found that fits the - // requested size. - index := nextAlloc - numFreeBlocks := uintptr(0) - heapScanCount := uint8(0) - for { - if index == nextAlloc { - if heapScanCount == 0 { - heapScanCount = 1 - } else if heapScanCount == 1 { - // The entire heap has been searched for free memory, but none - // could be found. Run a garbage collection cycle to reclaim - // free memory and try again. - heapScanCount = 2 - freeBytes := runGC() - heapSize := uintptr(metadataStart) - heapStart - if freeBytes < heapSize/3 { - // Ensure there is at least 33% headroom. - // This percentage was arbitrarily chosen, and may need to - // be tuned in the future. - growHeap() - } - } else { - // Even after garbage collection, no free memory could be found. - // Try to increase heap size. - if growHeap() { - // Success, the heap was increased in size. Try again with a - // larger heap. - } else { - // Unfortunately the heap could not be increased. This - // happens on baremetal systems for example (where all - // available RAM has already been dedicated to the heap). - panic("out of memory") - } - } - } - - // Wrap around the end of the heap. - if index == endBlock { - index = 0 - // Reset numFreeBlocks as allocations cannot wrap. - numFreeBlocks = 0 - // In rare cases, the initial heap might be so small that there are - // no blocks at all. In this case, it's better to jump back to the - // start of the loop and try again, until the GC realizes there is - // no memory and grows the heap. - // This can sometimes happen on WebAssembly, where the initial heap - // is created by whatever is left on the last memory page. - continue - } - - // Is the block we're looking at free? - if index.state() != blockStateFree { - // This block is in use. Try again from this point. - numFreeBlocks = 0 - index++ - continue - } - numFreeBlocks++ - index++ - - // Are we finished? - if numFreeBlocks == neededBlocks { - // Found a big enough range of free blocks! - nextAlloc = index - thisAlloc := index - gcBlock(neededBlocks) - if gcDebug { - println("found memory:", thisAlloc.pointer(), int(size)) - } - - // Set the following blocks as being allocated. - thisAlloc.setState(blockStateHead) - for i := thisAlloc + 1; i != nextAlloc; i++ { - i.setState(blockStateTail) - } - - // Return a pointer to this allocation. - pointer := thisAlloc.pointer() - memzero(pointer, size) - return pointer - } - } + buf := GC_malloc(size) + memzero(buf, size) + return buf } // GC performs a garbage collection cycle. func GC() { - runGC() -} - -// runGC performs a garbage colleciton cycle. It is the internal implementation -// of the runtime.GC() function. The difference is that it returns the number of -// free bytes in the heap after the GC is finished. -func runGC() (freeBytes uintptr) { - if gcDebug { - println("running collection cycle...") - } - - // Mark phase: mark all reachable objects, recursively. - markStack() - markGlobals() - finishMark() - - // Sweep phase: free all non-marked objects and unmark marked objects for - // the next collection cycle. - freeBytes = sweep() - - // Show how much has been sweeped, for debugging. - if gcDebug { - dumpHeap() - } - - return -} - -// markRoots reads all pointers from start to end (exclusive) and if they look -// like a heap pointer and are unmarked, marks them and scans that object as -// well (recursively). The start and end parameters must be valid pointers and -// must be aligned. -func markRoots(start, end uintptr) { - if gcDebug { - println("mark from", start, "to", end, int(end-start)) - } - if gcAsserts { - if start >= end { - panic("gc: unexpected range to mark") - } - if start%unsafe.Alignof(start) != 0 { - panic("gc: unaligned start pointer") - } - if end%unsafe.Alignof(end) != 0 { - panic("gc: unaligned end pointer") - } - } - - // Reduce the end bound to avoid reading too far on platforms where pointer alignment is smaller than pointer size. - // If the size of the range is 0, then end will be slightly below start after this. - end -= unsafe.Sizeof(end) - unsafe.Alignof(end) - - for addr := start; addr < end; addr += unsafe.Alignof(addr) { - root := *(*uintptr)(unsafe.Pointer(addr)) - markRoot(addr, root) - } -} - -// stackOverflow is a flag which is set when the GC scans too deep while marking. -// After it is set, all marked allocations must be re-scanned. -var stackOverflow bool - -// startMark starts the marking process on a root and all of its children. -func startMark(root gcBlock) { - var stack [markStackSize]gcBlock - stack[0] = root - root.setState(blockStateMark) - stackLen := 1 - for stackLen > 0 { - // Pop a block off of the stack. - stackLen-- - block := stack[stackLen] - if gcDebug { - println("stack popped, remaining stack:", stackLen) - } - - // Scan all pointers inside the block. - start, end := block.address(), block.findNext().address() - for addr := start; addr != end; addr += unsafe.Alignof(addr) { - // Load the word. - word := *(*uintptr)(unsafe.Pointer(addr)) - - if !looksLikePointer(word) { - // Not a heap pointer. - continue - } - - // Find the corresponding memory block. - referencedBlock := blockFromAddr(word) - - if referencedBlock.state() == blockStateFree { - // The to-be-marked object doesn't actually exist. - // This is probably a false positive. - if gcDebug { - println("found reference to free memory:", word, "at:", addr) - } - continue - } - - // Move to the block's head. - referencedBlock = referencedBlock.findHead() - - if referencedBlock.state() == blockStateMark { - // The block has already been marked by something else. - continue - } - - // Mark block. - if gcDebug { - println("marking block:", referencedBlock) - } - referencedBlock.setState(blockStateMark) - - if stackLen == len(stack) { - // The stack is full. - // It is necessary to rescan all marked blocks once we are done. - stackOverflow = true - if gcDebug { - println("gc stack overflowed") - } - continue - } - - // Push the pointer onto the stack to be scanned later. - stack[stackLen] = referencedBlock - stackLen++ - } - } -} - -// finishMark finishes the marking process by processing all stack overflows. -func finishMark() { - for stackOverflow { - // Re-mark all blocks. - stackOverflow = false - for block := gcBlock(0); block < endBlock; block++ { - if block.state() != blockStateMark { - // Block is not marked, so we do not need to rescan it. - continue - } - - // Re-mark the block. - startMark(block) - } - } -} - -// mark a GC root at the address addr. -func markRoot(addr, root uintptr) { - if looksLikePointer(root) { - block := blockFromAddr(root) - if block.state() == blockStateFree { - // The to-be-marked object doesn't actually exist. - // This could either be a dangling pointer (oops!) but most likely - // just a false positive. - return - } - head := block.findHead() - if head.state() != blockStateMark { - if gcDebug { - println("found unmarked pointer", root, "at address", addr) - } - startMark(head) - } - } -} - -// Sweep goes through all memory and frees unmarked memory. -// It returns how many bytes are free in the heap after the sweep. -func sweep() (freeBytes uintptr) { - freeCurrentObject := false - for block := gcBlock(0); block < endBlock; block++ { - switch block.state() { - case blockStateHead: - // Unmarked head. Free it, including all tail blocks following it. - block.markFree() - freeCurrentObject = true - gcFrees++ - freeBytes += bytesPerBlock - case blockStateTail: - if freeCurrentObject { - // This is a tail object following an unmarked head. - // Free it now. - block.markFree() - freeBytes += bytesPerBlock - } - case blockStateMark: - // This is a marked object. The next tail blocks must not be freed, - // but the mark bit must be removed so the next GC cycle will - // collect this object if it is unreferenced then. - block.unmark() - freeCurrentObject = false - case blockStateFree: - freeBytes += bytesPerBlock - } - } - return -} - -// looksLikePointer returns whether this could be a pointer. Currently, it -// simply returns whether it lies anywhere in the heap. Go allows interior -// pointers so we can't check alignment or anything like that. -func looksLikePointer(ptr uintptr) bool { - return ptr >= heapStart && ptr < uintptr(metadataStart) -} - -// dumpHeap can be used for debugging purposes. It dumps the state of each heap -// block to standard output. -func dumpHeap() { - println("heap:") - for block := gcBlock(0); block < endBlock; block++ { - switch block.state() { - case blockStateHead: - print("*") - case blockStateTail: - print("-") - case blockStateMark: - print("#") - default: // free - print("ยท") - } - if block%64 == 63 || block+1 == endBlock { - println() - } - } + GC_gcollect() } func KeepAlive(x interface{}) { @@ -578,13 +109,3 @@ func KeepAlive(x interface{}) { func SetFinalizer(obj interface{}, finalizer interface{}) { // Unimplemented. } - -//export malloc -func libc_malloc(size uintptr) unsafe.Pointer - -//export free -func libc_free(ptr unsafe.Pointer) - -func growHeap() bool { - return false -} diff --git a/internal/agc/gc_globals.go b/internal/agc/gc_globals.go index 6ab1d52..074ce23 100644 --- a/internal/agc/gc_globals.go +++ b/internal/agc/gc_globals.go @@ -20,14 +20,3 @@ var ( globalsStart = uintptr(unsafe.Pointer(&globalsStartSymbol)) globalsEnd = uintptr(unsafe.Pointer(&heapStartSymbol)) ) - -// This file implements markGlobals for all the files that don't have a more -// specific implementation. - -// markGlobals marks all globals, which are reachable by definition. -// -// This implementation marks all globals conservatively and assumes it can use -// linker-defined symbols for the start and end of the .data section. -func markGlobals() { - markRoots(globalsStart, globalsEnd) -} diff --git a/internal/agc/gc_stack_portable.go b/internal/agc/gc_stack_portable.go index 56a22ce..d131ce2 100644 --- a/internal/agc/gc_stack_portable.go +++ b/internal/agc/gc_stack_portable.go @@ -3,16 +3,13 @@ //go:build tinygo -// Copied from https://github.com/tinygo-org/tinygo/blob/3dbc4d52105f4209ece1332f0272f293745ac0bf/src/runtime/gc_stack_portable.go -// with go:linkname used to override functions in the runtime package. - package agc import ( "unsafe" ) -//go:linkname stackChainStart runtime.stackChainStart +//go:extern runtime.stackChainStart var stackChainStart *stackChainObject type stackChainObject struct { @@ -20,35 +17,10 @@ type stackChainObject struct { numSlots uintptr } -// markStack marks all root pointers found on the stack. -// -// This implementation is conservative and relies on the compiler inserting code -// to manually push/pop stack objects that are stored in a linked list starting -// with stackChainStart. Manually keeping track of stack values is _much_ more -// expensive than letting the compiler do it and it inhibits a few important -// optimizations, but it has the big advantage of being portable to basically -// any ISA, including WebAssembly. -func markStack() { - stackObject := stackChainStart - for stackObject != nil { +func addStackRoots() { + for stackObject := stackChainStart; stackObject != nil; stackObject = stackObject.parent { start := uintptr(unsafe.Pointer(stackObject)) + unsafe.Sizeof(uintptr(0))*2 end := start + stackObject.numSlots*unsafe.Alignof(uintptr(0)) - markRoots(start, end) - stackObject = stackObject.parent + GC_add_roots(start, end) } } - -// trackPointer is a stub function call inserted by the compiler during IR -// construction. Calls to it are later replaced with regular stack bookkeeping -// code. -// -//go:linkname trackPointer runtime.trackPointer -func trackPointer(ptr unsafe.Pointer) - -// swapStackChain swaps the stack chain. -// This is called from internal/task when switching goroutines. -// -//go:linkname swapStackChain runtime.swapStackChain -func swapStackChain(dst **stackChainObject) { - *dst, stackChainStart = stackChainStart, *dst -} diff --git a/internal/agc/mem.go b/internal/agc/mem.go index 597d587..3c8a2a9 100644 --- a/internal/agc/mem.go +++ b/internal/agc/mem.go @@ -9,6 +9,3 @@ import "unsafe" //go:linkname memzero runtime.memzero func memzero(ptr unsafe.Pointer, size uintptr) - -//go:linkname memcpy runtime.memcpy -func memcpy(dst, src unsafe.Pointer, size uintptr) diff --git a/internal/agc/mmap.go b/internal/agc/mmap.go new file mode 100644 index 0000000..fda3e07 --- /dev/null +++ b/internal/agc/mmap.go @@ -0,0 +1,47 @@ +// Copyright The OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build tinygo + +package agc + +import "unsafe" + +// Simple implementation of mmap delegating to malloc. wasi-libc defines a similar emulation library, but while +// we wouldn't exercise the code path, its use of pread is incompatible with Envoy. +// https://github.com/WebAssembly/wasi-libc/blob/5d8a1409aa85acf8dbb197e13d33489ad1eac656/libc-bottom-half/mman/mman.c + +/* +int errno; +*/ +import "C" + +// Must match bdwgc value of HBLKSIZE +const hBlkSize = 4096 + +//export mi_zalloc_aligned +func mi_zalloc_aligned(size uintptr, alignment uintptr) unsafe.Pointer + +//export mi_free +func mi_free(ptr unsafe.Pointer) + +//export mmap +func mmap(_ unsafe.Pointer, length uintptr, _ int32, _ int32, _ int32, _ uint64) unsafe.Pointer { + buf := mi_zalloc_aligned(length, hBlkSize) + if buf == nil { + C.errno = 132 /* ENOMEM */ + return unsafe.Add(unsafe.Pointer(uintptr(0)), -1) + } + return buf +} + +//export munmap +func munmap(addr unsafe.Pointer, _ uintptr) int32 { + mi_free(addr) + return 0 +} + +//export mprotect +func mprotect(addr unsafe.Pointer, length uintptr, prot int32) int32 { + return 0 +} diff --git a/lib/libaho_corasick.a b/lib/libaho_corasick.a index 04c6a69..4c38a73 100644 Binary files a/lib/libaho_corasick.a and b/lib/libaho_corasick.a differ diff --git a/lib/libgc.a b/lib/libgc.a new file mode 100644 index 0000000..63e434e Binary files /dev/null and b/lib/libgc.a differ diff --git a/magefile.go b/magefile.go index 86c2f8c..81cb244 100644 --- a/magefile.go +++ b/magefile.go @@ -206,7 +206,7 @@ tinygo build -gc=none -opt=2 -o %s -scheduler=none -target=wasi %s`, filepath.Jo // UpdateLibs updates the C++ filter dependencies. func UpdateLibs() error { - libs := []string{"aho-corasick", "libinjection", "mimalloc", "re2"} + libs := []string{"aho-corasick", "bdwgc", "libinjection", "mimalloc", "re2"} for _, lib := range libs { if err := sh.RunV("docker", "build", "-t", "ghcr.io/corazawaf/coraza-proxy-wasm/buildtools-"+lib, filepath.Join("buildtools", lib)); err != nil { return err