From b94c60f905979845d539ad7b47b9f49a64737803 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 13 Feb 2025 23:15:16 +0000 Subject: [PATCH] Adding sticky support and setting it as the default GC --- Make.inc | 5 +- Makefile | 6 +- deps/checksums/mmtk_julia | 3 - deps/mmtk_julia.mk | 12 ++-- doc/make.jl | 1 + doc/src/devdocs/gc-mmtk.md | 43 ++++++++++++ src/Makefile | 4 +- src/gc-interface.h | 19 ++++- src/gc-mmtk.c | 29 ++++++-- src/gc-stock.c | 5 ++ src/gc-wb-mmtk.h | 86 +++++++++++++++++++++++ src/gc-wb-stock.h | 104 ++++++++++++++++++++++++++++ src/genericmemory.c | 43 +----------- src/julia.h | 49 ++++--------- src/llvm-gc-interface-passes.h | 25 +++++++ src/llvm-late-gc-lowering-mmtk.cpp | 75 ++++++++++++++++++++ src/llvm-late-gc-lowering-stock.cpp | 44 ++++++++++++ src/llvm-late-gc-lowering.cpp | 69 ------------------ src/staticdata.c | 6 +- 19 files changed, 461 insertions(+), 167 deletions(-) create mode 100644 doc/src/devdocs/gc-mmtk.md create mode 100644 src/gc-wb-mmtk.h create mode 100644 src/gc-wb-stock.h diff --git a/Make.inc b/Make.inc index 16e238c6f0683..2c351a1b16a2a 100644 --- a/Make.inc +++ b/Make.inc @@ -81,7 +81,7 @@ WITH_GC_VERIFY := 0 WITH_GC_DEBUG_ENV := 0 # Use stock if MMTK_PLAN hasn't been defined -MMTK_PLAN ?= None +MMTK_PLAN ?= StickyImmix # Enable DTrace support WITH_DTRACE := 0 @@ -844,6 +844,9 @@ MMTK_BUILD ?= release ifeq (${MMTK_PLAN},Immix) JCXXFLAGS += -DMMTK_PLAN_IMMIX JCFLAGS += -DMMTK_PLAN_IMMIX +else ifeq (${MMTK_PLAN},StickyImmix) +JCXXFLAGS += -DMMTK_PLAN_STICKYIMMIX +JCFLAGS += -DMMTK_PLAN_STICKYIMMIX else $(error "Unsupported MMTk plan: $(MMTK_PLAN)") endif diff --git a/Makefile b/Makefile index 0f1e8c45edf40..dfb7cc3e4a623 100644 --- a/Makefile +++ b/Makefile @@ -291,10 +291,10 @@ else ifeq (${MMTK_PLAN},StickyImmix) LIB_PATH_PLAN = sticky endif -ifeq ($(MMTK_MOVING), 0) -LIB_PATH_MOVING := non_moving -else +ifeq ($(MMTK_MOVING), 1) LIB_PATH_MOVING := moving +else +LIB_PATH_MOVING := non_moving endif JL_PRIVATE_LIBS-0 += $(LIB_PATH_PLAN)/$(LIB_PATH_MOVING)/$(MMTK_BUILD)/libmmtk_julia diff --git a/deps/checksums/mmtk_julia b/deps/checksums/mmtk_julia index 4ccc7b407cb60..9fb27848704d5 100644 --- a/deps/checksums/mmtk_julia +++ b/deps/checksums/mmtk_julia @@ -1,6 +1,3 @@ -mmtk_julia-b69acf5af7a7dd97c1cc6fd99f7c2d51b477f214.tar.gz/md5/1911cf084d26c48e2ed58af3d268b4b6 -mmtk_julia-b69acf5af7a7dd97c1cc6fd99f7c2d51b477f214.tar.gz/sha512/75beab54398989c46b62e714b242cf6705d88d220f40c21e494e0f29161437f5fbe9ba05b543d2353a1ad76f4239ac4025b476be0be864649f310f14935289fe -mmtk_julia-f07d66aafc86af84ea988b35335acc9bbc770fa1.tar.gz/md5/38afb5db6d8c55413a4ec96aefa2ebb4 mmtk_julia-f07d66aafc86af84ea988b35335acc9bbc770fa1.tar.gz/sha512/78525582a46a6baf8d33df7b622e55cf244439afcd7192ba55489c1bc18393d1237d2903d517c610484bf9e2a7338ad31435a9cbf70889d6bcf87c40cec829e5 mmtk_julia.v0.30.3+1.x86_64-linux-gnu.tar.gz/md5/631b204574da7062802dac501a4b711f mmtk_julia.v0.30.3+1.x86_64-linux-gnu.tar.gz/sha512/daaed59d08fc49621479ed638dea0aac0cba123986e486571447e8e21e9a098776ce2e87fbd92ddea276782fc44621f23d40fa213296b28e1d4480553c7de4f7 diff --git a/deps/mmtk_julia.mk b/deps/mmtk_julia.mk index 1dc59749a00b5..bfa667ca9491c 100644 --- a/deps/mmtk_julia.mk +++ b/deps/mmtk_julia.mk @@ -1,9 +1,9 @@ ## MMTK ## # Both MMTK_MOVING and MMTK_PLAN should be specified in the Make.user file. -# At this point, since we only support non-moving this is always set to 0 -# FIXME: change it to `?:` when introducing moving plans -MMTK_MOVING := 0 +# FIXME: By default we do a non-moving build. We should change the default to 1 +# once we support moving plans. +MMTK_MOVING ?= 0 MMTK_VARS := MMTK_PLAN=$(MMTK_PLAN) MMTK_MOVING=$(MMTK_MOVING) ifneq ($(USE_BINARYBUILDER_MMTK_JULIA),1) @@ -85,10 +85,10 @@ else ifeq (${MMTK_PLAN},StickyImmix) LIB_PATH_PLAN = sticky endif -ifeq ($(MMTK_MOVING), 0) -LIB_PATH_MOVING := non_moving -else +ifeq ($(MMTK_MOVING), 1) LIB_PATH_MOVING := moving +else +LIB_PATH_MOVING := non_moving endif version-check-mmtk_julia: $(BUILDROOT)/usr/lib/libmmtk_julia.so diff --git a/doc/make.jl b/doc/make.jl index 43d51e9936b58..068531be24a1d 100644 --- a/doc/make.jl +++ b/doc/make.jl @@ -229,6 +229,7 @@ DevDocs = [ "devdocs/aot.md", "devdocs/gc-sa.md", "devdocs/gc.md", + "devdocs/gc-mmtk.md", "devdocs/jit.md", "devdocs/builtins.md", "devdocs/precompile_hang.md", diff --git a/doc/src/devdocs/gc-mmtk.md b/doc/src/devdocs/gc-mmtk.md new file mode 100644 index 0000000000000..323e7901b6d2f --- /dev/null +++ b/doc/src/devdocs/gc-mmtk.md @@ -0,0 +1,43 @@ +# Julia + MMTk + +There has been quite a lot of effort to refactor the GC code inside Julia to support external GCs. The first step to enable using different GC algorithms for Julia was the design and implementation of a [GC interface](https://docs.google.com/document/d/1v0jtSrIpdEDNOxj5S9g1jPqSpuAkNWhr_T8ToFC9RLI/edit?usp=sharing). To drive that interface, we added support for building Julia with [MMTk](https://www.mmtk.io). MMTk is a memory management toolkit providing language implementers with a framework to implement flexible and performant GCs. The flexibility comes from the fact that it is possible to switch implementations fairly easily. MMTk supports state-of-the-art high-performance implementations that are continuously added and maintained in the core part of the framework. MMTk is under active development and has been used by other programming languages such as [Java](https://github.com/mmtk/mmtk-openjdk) and [Ruby](https://github.com/ruby/mmtk). To support a language, it is necessary to implement an *MMTk binding*, which contains the code that connects the language to [mmtk-core](https://github.com/mmtk/mmtk-core). The mmtk-julia binding can be found in [this repository](https://github.com/mmtk/mmtk-julia). + +> [!NOTE] +> Using a different GC requires building Julia from source. It is not possible to switch implementations at runtime. To see what version of the GC is currently being used, run `versioninfo()` from the Julia REPL and it should show the version under `GC: ...`. + +## Building Julia with MMTk + +There are 3 different ways of building Julia with MMTk: building from source using a fixed release of the binding, checking out a custom version in the mmtk-julia [repository](https://github.com/mmtk/mmtk-julia) or using a precompiled binary from Julia's BinaryBuilder. The easiest way is to use the BinaryBuilder binary. Simply set the variable `MMTK_PLAN` to one of the supported plans below and build Julia as usual. + +There are different configurations supported by the following variables, which can be set in a `Make.user` file or as an environment variable. + +| Variable | | | +|---------------|--------------|---------------| +| `MMTK_PLAN` | Immix | StickyImmix | +| `MMTK_MOVING` | 0 | 1 | +| `MMTK_BUILD` | release | debug | + +If only `MMTK_PLAN` is set, then the default is to do a non-moving, release build. + +> [!IMPORTANT] +> While the binding supports building all versions above, we have only integrated non-moving Immix into Julia. Support for the other versions should be added in the near future. + +### Building mmtk-julia from source + +It is also possible to build the binding from source. To do so, set the variable `USE_BINARYBUILDER_MMTK_JULIA=0` and the latest release version of the binding will be downloaded and built as part of building Julia. Note that this requires an installation of the rust toolchain. + +It is also possible to build a custom version of binding by checking it out from the [git repository](https://github.com/mmtk/mmtk-julia) and setting a variable named `MMTK_JULIA_DIR` as the path that contains the binding. + +For more information on building Julia with MMTk, please refer to the [README](https://github.com/mmtk/mmtk-julia/blob/master/README.md) file in the binding repo. + +### I've got a build error when building Julia with MMTk, what should I do? + +If you try to build Julia with MMTk and get an error it is likely due to a change to Julia that has not been yet propagated to the binding or to the code in Julia that is specific to MMTk. Some changes include: + +(1) **Changing the memory layout of objects in Julia**. The binding relies on automatically generated Rust FFI bindings from Julia code. These files are generated using a crate named [`rust-bindgen`](https://github.com/rust-lang/rust-bindgen). To regenerate those files, check out the latest version of the `mmtk-julia` binding, set the variable `JULIA_PATH` to the path of the Julia version you are trying to build and run `make regen-bindgen-ffi` from the directory containing the binding. This should delete the current version of the FFI bindings and generate a new version based on the Julia code from `JULIA_PATH`. + +(2) **Changing the root objects passed to the GC**. Julia passes a set of objects to the GC as roots in the function [gc_mark_roots](https://github.com/JuliaLang/julia/blob/fbe865657942da7d73cc02f76064f9ba9cdef56c/src/gc-stock.c#L2846). At the moment, this set needs to be consistent between both the Stock GC and MMTk (in the function [`jl_gc_scan_vm_specific_roots`](https://github.com/JuliaLang/julia/blob/fbe865657942da7d73cc02f76064f9ba9cdef56c/src/gc-mmtk.c#L496)). + +(3) **Changing how objects are scanned**. MMTk uses the same strategy to find references in Julia objects as the stock GC (see [gc_mark_outrefs](https://github.com/JuliaLang/julia/blob/fbe865657942da7d73cc02f76064f9ba9cdef56c/src/gc-stock.c#L2227C19-L2227C34)). Changing the logic from this function should be reflected in the Rust code in the binding that [scan Julia objects](https://github.com/mmtk/mmtk-julia/blob/c9e046baf3a0d52fe75d6c8b28f6afd69b045d95/mmtk/src/julia_scanning.rs#L68). + +If your case is not included in one of the alternatives above, please create an issue in the Julia repository tagging it with the `GC: MMTK` label. diff --git a/src/Makefile b/src/Makefile index 130c35960f2d0..3b2c72a9ec1cb 100644 --- a/src/Makefile +++ b/src/Makefile @@ -123,9 +123,9 @@ UV_HEADERS += uv/*.h endif PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls-common.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) ifneq (${MMTK_PLAN},None) - PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-mmtk.h) + PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-mmtk.h gc-wb-mmtk.h) else - PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-stock.h) + PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-stock.h gc-wb-stock.h) endif ifeq ($(OS),WINNT) PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h) diff --git a/src/gc-interface.h b/src/gc-interface.h index 618077b127803..c6055fb3e894e 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -8,6 +8,7 @@ #define JL_GC_INTERFACE_H #include "dtypes.h" +#include "julia_atomics.h" #ifdef __cplusplus extern "C" { @@ -17,6 +18,7 @@ struct _jl_tls_states_t; struct _jl_value_t; struct _jl_weakref_t; struct _jl_datatype_t; +struct _jl_genericmemory_t; // ========================================================================= // // GC Metrics @@ -214,6 +216,10 @@ struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty, unsigned align) JL_NOTSAF // The GC may use that information to, for instance, determine that such objects should // be treated as marked and belonged to the old generation in nursery collections. void jl_gc_notify_image_load(const char* img_data, size_t len); +// This function notifies the GC about memory addresses that are set when allocating the boot image. +// The GC may use that information to, for instance, determine that all objects in that chunk of memory should +// be treated as marked and belonged to the old generation in nursery collections. +void jl_gc_notify_image_alloc(const char* img_data, size_t len); // ========================================================================= // // Runtime Write-Barriers @@ -252,7 +258,18 @@ STATIC_INLINE void jl_gc_wb_knownold(const void *parent, const void *ptr) JL_NOT // per field of the object being copied, but may be special-cased for performance reasons. STATIC_INLINE void jl_gc_multi_wb(const void *parent, const struct _jl_value_t *ptr) JL_NOTSAFEPOINT; - +// Write-barrier function that must be used after copying fields of elements of genericmemory objects +// into another. It should be semantically equivalent to triggering multiple write barriers – one +// per field of the object being copied, but may be special-cased for performance reasons. +STATIC_INLINE void jl_gc_wb_genericmemory_copy_ptr(const struct _jl_value_t *owner, struct _jl_genericmemory_t *src, char* src_p, + size_t n, struct _jl_datatype_t *dt) JL_NOTSAFEPOINT; +// Similar to jl_gc_wb_genericmemory_copy but must be used when copying *boxed* elements of a genericmemory +// object. Note that this barrier also performs the copying unlike jl_gc_wb_genericmemory_copy_ptr. +// The parameters src_p, dest_p and n will be modified and will contain information about +// the *uncopied* data after performing this barrier, and will be copied using memmove_refs. +STATIC_INLINE void jl_gc_wb_genericmemory_copy_boxed(const struct _jl_value_t *owner, _Atomic(void*) * dest_p, + struct _jl_genericmemory_t *src, _Atomic(void*) * src_p, + size_t* n) JL_NOTSAFEPOINT; #ifdef __cplusplus } #endif diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index a6650dd7cb68c..5f8524b3a58b9 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -1,5 +1,6 @@ #include "gc-common.h" #include "gc-tls-mmtk.h" +#include "gc-wb-mmtk.h" #include "mmtkMutator.h" #include "threading.h" @@ -861,10 +862,23 @@ STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (uintptr_t)allocator->limit, size, align, offset, 1); } +inline void mmtk_set_side_metadata(const void* side_metadata_base, void* obj) { + intptr_t addr = (intptr_t) obj; + uint8_t* meta_addr = (uint8_t*) side_metadata_base + (addr >> 6); + intptr_t shift = (addr >> 3) & 0b111; + while(1) { + uint8_t old_val = *meta_addr; + uint8_t new_val = old_val | (1 << shift); + if (jl_atomic_cmpswap((_Atomic(uint8_t)*)meta_addr, &old_val, new_val)) { + break; + } + } +} + STATIC_INLINE void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { - // FIXME: Similarly, for now, we do nothing - // but when supporting moving, this is where we set the valid object (VO) bit - // and log (old gen) bit + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { + mmtk_set_side_metadata(MMTK_SIDE_LOG_BIT_BASE_ADDRESS, obj); + } } JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, size_t align, void *ty) @@ -1081,6 +1095,11 @@ void jl_gc_notify_image_load(const char* img_data, size_t len) mmtk_set_vm_space((void*)img_data, len); } +void jl_gc_notify_image_alloc(const char* img_data, size_t len) +{ + mmtk_immortal_region_post_alloc((void*)img_data, len); +} + // ========================================================================= // // Code specific to stock that is not supported by MMTk // ========================================================================= // @@ -1128,7 +1147,9 @@ _Atomic(int) gc_stack_free_idx = 0; JL_DLLEXPORT void jl_gc_queue_root(const struct _jl_value_t *ptr) JL_NOTSAFEPOINT { - mmtk_unreachable(); + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, ptr, (const void*) 0); } JL_DLLEXPORT void jl_gc_queue_multiroot(const struct _jl_value_t *root, const void *stored, diff --git a/src/gc-stock.c b/src/gc-stock.c index 3d3bc9f485e51..e357dadc53d7c 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -4071,6 +4071,11 @@ void jl_gc_notify_image_load(const char* img_data, size_t len) // Do nothing } +void jl_gc_notify_image_alloc(const char* img_data, size_t len) +{ + // Do nothing +} + JL_DLLEXPORT const char* jl_gc_active_impl(void) { return "Built with stock GC"; } diff --git a/src/gc-wb-mmtk.h b/src/gc-wb-mmtk.h new file mode 100644 index 0000000000000..5652b7a88dd7e --- /dev/null +++ b/src/gc-wb-mmtk.h @@ -0,0 +1,86 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +/* + write barriers which should be inlined by the compiler +*/ + +#ifndef JL_GC_WB_H +#define JL_GC_WB_H + +#ifdef __cplusplus +extern "C" { +#endif + +extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr); +extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr); +extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS; + +#define MMTK_OBJECT_BARRIER (1) +// Stickyimmix needs write barrier. Immix does not need write barrier. +#ifdef MMTK_PLAN_IMMIX +#define MMTK_NEEDS_WRITE_BARRIER (0) +#endif +#ifdef MMTK_PLAN_STICKYIMMIX +#define MMTK_NEEDS_WRITE_BARRIER (1) +#endif + +// GC write barriers + +// Directly call into MMTk for write barrier (debugging only) +STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_post(&ptls->gc_tls.mmtk_mutator, parent, ptr); +} + +// Inlined fastpath +STATIC_INLINE void mmtk_gc_wb_fast(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { + intptr_t addr = (intptr_t) (void*) parent; + uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); + intptr_t shift = (addr >> 3) & 0b111; + uint8_t byte_val = *meta_addr; + if (((byte_val >> shift) & 1) == 1) { + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, ptr); + } + } +} + +STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + mmtk_gc_wb_fast(parent, ptr); +} + +STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t* +{ + mmtk_gc_wb_fast(ptr, (void*)0); +} + +STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT +{ + mmtk_gc_wb_fast(parent, (void*)0); +} + +STATIC_INLINE void jl_gc_wb_genericmemory_copy_boxed(const jl_value_t *dest_owner, _Atomic(void*) * dest_p, + jl_genericmemory_t *src, _Atomic(void*) * src_p, + size_t* n) JL_NOTSAFEPOINT +{ + mmtk_gc_wb_fast(dest_owner, (void*)0); +} + +STATIC_INLINE void jl_gc_wb_genericmemory_copy_ptr(const jl_value_t *owner, jl_genericmemory_t *src, char* src_p, + size_t n, jl_datatype_t *dt) JL_NOTSAFEPOINT +{ + mmtk_gc_wb_fast(owner, (void*)0); +} + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gc-wb-stock.h b/src/gc-wb-stock.h new file mode 100644 index 0000000000000..9da402c1c6237 --- /dev/null +++ b/src/gc-wb-stock.h @@ -0,0 +1,104 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +/* + write barriers which should be inlined by the compiler +*/ + +#ifndef JL_GC_WB_H +#define JL_GC_WB_H + +#ifdef __cplusplus +extern "C" { +#endif + +// GC write barriers + +STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + // parent and ptr isa jl_value_t* + if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 /* GC_OLD_MARKED */ && // parent is old and not in remset + (jl_astaggedvalue(ptr)->bits.gc & 1 /* GC_MARKED */) == 0)) // ptr is young + jl_gc_queue_root((jl_value_t*)parent); +} + +STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t* +{ + // if ptr is old + if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3 /* GC_OLD_MARKED */)) { + jl_gc_queue_root((jl_value_t*)ptr); + } +} + +STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT +{ + // 3 == GC_OLD_MARKED + // ptr is an immutable object + if (__likely(jl_astaggedvalue(parent)->bits.gc != 3)) + return; // parent is young or in remset + if (__likely(jl_astaggedvalue(ptr)->bits.gc == 3)) + return; // ptr is old and not in remset (thus it does not point to young) + jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr); + const jl_datatype_layout_t *ly = dt->layout; + if (ly->npointers) + jl_gc_queue_multiroot((jl_value_t*)parent, ptr, dt); +} + +STATIC_INLINE void jl_gc_wb_genericmemory_copy_boxed(const jl_value_t *dest_owner, _Atomic(void*) * dest_p, + jl_genericmemory_t *src, _Atomic(void*) * src_p, + size_t* n) JL_NOTSAFEPOINT +{ + if (__unlikely(jl_astaggedvalue(dest_owner)->bits.gc == 3 /* GC_OLD_MARKED */ )) { + jl_value_t *src_owner = jl_genericmemory_owner(src); + size_t done = 0; + if (jl_astaggedvalue(src_owner)->bits.gc != 3 /* GC_OLD_MARKED */) { + if (dest_p < src_p || dest_p > src_p + (*n)) { + for (; done < (*n); done++) { // copy forwards + void *val = jl_atomic_load_relaxed(src_p + done); + jl_atomic_store_release(dest_p + done, val); + // `val` is young or old-unmarked + if (val && !(jl_astaggedvalue(val)->bits.gc & 1 /* GC_MARKED */)) { + jl_gc_queue_root(dest_owner); + break; + } + } + src_p += done; + dest_p += done; + } + else { + for (; done < (*n); done++) { // copy backwards + void *val = jl_atomic_load_relaxed(src_p + (*n) - done - 1); + jl_atomic_store_release(dest_p + (*n) - done - 1, val); + // `val` is young or old-unmarked + if (val && !(jl_astaggedvalue(val)->bits.gc & 1 /* GC_MARKED */)) { + jl_gc_queue_root(dest_owner); + break; + } + } + } + (*n) -= done; + } + } +} + +STATIC_INLINE void jl_gc_wb_genericmemory_copy_ptr(const jl_value_t *owner, jl_genericmemory_t *src, char* src_p, + size_t n, jl_datatype_t *dt) JL_NOTSAFEPOINT +{ + if (__unlikely(jl_astaggedvalue(owner)->bits.gc == 3 /* GC_OLD_MARKED */)) { + jl_value_t *src_owner = jl_genericmemory_owner(src); + size_t elsz = dt->layout->size; + if (jl_astaggedvalue(src_owner)->bits.gc != 3 /* GC_OLD_MARKED */) { + dt = (jl_datatype_t*)jl_tparam1(dt); + for (size_t done = 0; done < n; done++) { // copy forwards + char* s = (char*)src_p+done*elsz; + if (*((jl_value_t**)s+dt->layout->first_ptr) != NULL) + jl_gc_queue_multiroot(owner, s, dt); + } + } + } +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/genericmemory.c b/src/genericmemory.c index e435ec3b63c9f..b455a2fb36274 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -235,36 +235,7 @@ JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destda _Atomic(void*) * dest_p = (_Atomic(void*)*)destdata; _Atomic(void*) * src_p = (_Atomic(void*)*)srcdata; jl_value_t *owner = jl_genericmemory_owner(dest); - if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) { - jl_value_t *src_owner = jl_genericmemory_owner(src); - ssize_t done = 0; - if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) { - if (dest_p < src_p || dest_p > src_p + n) { - for (; done < n; done++) { // copy forwards - void *val = jl_atomic_load_relaxed(src_p + done); - jl_atomic_store_release(dest_p + done, val); - // `val` is young or old-unmarked - if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) { - jl_gc_queue_root(owner); - break; - } - } - src_p += done; - dest_p += done; - } else { - for (; done < n; done++) { // copy backwards - void *val = jl_atomic_load_relaxed(src_p + n - done - 1); - jl_atomic_store_release(dest_p + n - done - 1, val); - // `val` is young or old-unmarked - if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) { - jl_gc_queue_root(owner); - break; - } - } - } - n -= done; - } - } + jl_gc_wb_genericmemory_copy_boxed(owner, dest_p, src, src_p, &n); return memmove_refs(dest_p, src_p, n); } size_t elsz = layout->size; @@ -280,17 +251,7 @@ JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destda if (layout->first_ptr != -1) { memmove_refs((_Atomic(void*)*)destdata, (_Atomic(void*)*)srcdata, n * elsz / sizeof(void*)); jl_value_t *owner = jl_genericmemory_owner(dest); - if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) { - jl_value_t *src_owner = jl_genericmemory_owner(src); - if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) { - dt = (jl_datatype_t*)jl_tparam1(dt); - for (size_t done = 0; done < n; done++) { // copy forwards - char* s = (char*)src_p+done*elsz; - if (*((jl_value_t**)s+layout->first_ptr) != NULL) - jl_gc_queue_multiroot(owner, s, dt); - } - } - } + jl_gc_wb_genericmemory_copy_ptr(owner, src, src_p, n, dt); } else { memmove(destdata, srcdata, n * elsz); diff --git a/src/julia.h b/src/julia.h index bf049c909d833..304cc340f710c 100644 --- a/src/julia.h +++ b/src/julia.h @@ -66,6 +66,7 @@ typedef struct _jl_taggedvalue_t jl_taggedvalue_t; typedef struct _jl_tls_states_t *jl_ptls_t; +typedef struct _jl_genericmemory_t jl_genericmemory_t; #ifdef JL_LIBRARY_EXPORTS #include "uv.h" @@ -162,7 +163,7 @@ typedef struct { // jl_value_t *data[]; } jl_svec_t; -JL_EXTENSION typedef struct { +JL_EXTENSION typedef struct _jl_genericmemory_t { JL_DATA_TYPE size_t length; void *ptr; @@ -1176,38 +1177,6 @@ JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz); // thread-local allocator of the current thread. JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value); -// GC write barriers - -STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT -{ - // parent and ptr isa jl_value_t* - if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 /* GC_OLD_MARKED */ && // parent is old and not in remset - (jl_astaggedvalue(ptr)->bits.gc & 1 /* GC_MARKED */) == 0)) // ptr is young - jl_gc_queue_root((jl_value_t*)parent); -} - -STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t* -{ - // if ptr is old - if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3 /* GC_OLD_MARKED */)) { - jl_gc_queue_root((jl_value_t*)ptr); - } -} - -STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT -{ - // 3 == GC_OLD_MARKED - // ptr is an immutable object - if (__likely(jl_astaggedvalue(parent)->bits.gc != 3)) - return; // parent is young or in remset - if (__likely(jl_astaggedvalue(ptr)->bits.gc == 3)) - return; // ptr is old and not in remset (thus it does not point to young) - jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr); - const jl_datatype_layout_t *ly = dt->layout; - if (ly->npointers) - jl_gc_queue_multiroot((jl_value_t*)parent, ptr, dt); -} - JL_DLLEXPORT void jl_gc_safepoint(void); JL_DLLEXPORT int jl_safepoint_suspend_thread(int tid, int waitstate); JL_DLLEXPORT void jl_safepoint_suspend_all_threads(struct _jl_task_t *ct); @@ -1270,6 +1239,18 @@ STATIC_INLINE jl_value_t *jl_svecset( #define jl_array_maxsize(a) (((jl_array_t*)(a))->ref.mem->length) #define jl_array_len(a) (jl_array_ndims(a) == 1 ? jl_array_nrows(a) : jl_array_maxsize(a)) +JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT; +#define jl_current_task (container_of(jl_get_pgcstack(), jl_task_t, gcstack)) + +STATIC_INLINE jl_value_t *jl_genericmemory_owner(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT; + +// write barriers +#ifndef MMTK_GC +#include "gc-wb-stock.h" +#else +#include "gc-wb-mmtk.h" +#endif + /* how - allocation style 0 = data is inlined @@ -2338,8 +2319,6 @@ JL_DLLEXPORT void JL_NORETURN jl_throw(jl_value_t *e JL_MAYBE_UNROOTED); JL_DLLEXPORT void JL_NORETURN jl_rethrow(void); JL_DLLEXPORT void JL_NORETURN jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED); JL_DLLEXPORT void JL_NORETURN jl_no_exc_handler(jl_value_t *e, jl_task_t *ct); -JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT; -#define jl_current_task (container_of(jl_get_pgcstack(), jl_task_t, gcstack)) extern JL_DLLIMPORT int jl_task_gcstack_offset; extern JL_DLLIMPORT int jl_task_ptls_offset; diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h index 7b2a4bb033203..367af0d012819 100644 --- a/src/llvm-gc-interface-passes.h +++ b/src/llvm-gc-interface-passes.h @@ -413,4 +413,29 @@ struct FinalLowerGC: private JuliaPassContext { void lowerSafepoint(CallInst *target, Function &F); }; +// Enable this optimization only on LLVM 4.0+ since this cause LLVM to optimize +// constant store loop to produce a `memset_pattern16` with a global variable +// that's initialized by `addrspacecast`. Such a global variable is not supported by the backend. +// This is not a problem on 4.0+ since that transformation (in loop-idiom) is disabled +// for NI pointers. +static SmallVector *FindRefinements(Value *V, State *S) +{ + if (!S) + return nullptr; + auto it = S->AllPtrNumbering.find(V); + if (it == S->AllPtrNumbering.end()) + return nullptr; + auto rit = S->Refinements.find(it->second); + return rit != S->Refinements.end() && !rit->second.empty() ? &rit->second : nullptr; +} + +inline bool IsPermRooted(Value *V, State *S) +{ + if (isa(V)) + return true; + if (auto *RefinePtr = FindRefinements(V, S)) + return RefinePtr->size() == 1 && (*RefinePtr)[0] == -2; + return false; +} + #endif // LLVM_GC_PASSES_H diff --git a/src/llvm-late-gc-lowering-mmtk.cpp b/src/llvm-late-gc-lowering-mmtk.cpp index 5539c8dbcf153..3effd8c92368f 100644 --- a/src/llvm-late-gc-lowering-mmtk.cpp +++ b/src/llvm-late-gc-lowering-mmtk.cpp @@ -94,3 +94,78 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) } return target; } + +void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector &WriteBarriers, bool *CFGModified) { + auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext()); + for (auto CI : WriteBarriers) { + auto parent = CI->getArgOperand(0); + if (std::all_of(CI->op_begin() + 1, CI->op_end(), + [parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) { + CI->eraseFromParent(); + continue; + } + if (CFGModified) { + *CFGModified = true; + } + + IRBuilder<> builder(CI); + builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // FIXME: Currently we call write barrier with the src object (parent). + // This works fine for object barrier for generational plans (such as stickyimmix), which does not use the target object at all. + // But for other MMTk plans, we need to be careful. + const bool INLINE_WRITE_BARRIER = true; + if (CI->getCalledOperand() == write_barrier_func) { + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { + if (INLINE_WRITE_BARRIER) { + auto i8_ty = Type::getInt8Ty(F.getContext()); + auto intptr_ty = T_size; + + // intptr_t addr = (intptr_t) (void*) src; + // uint8_t* meta_addr = (uint8_t*) (SIDE_METADATA_BASE_ADDRESS + (addr >> 6)); + intptr_t metadata_base_address = reinterpret_cast(MMTK_SIDE_LOG_BIT_BASE_ADDRESS); + auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address); + auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0)); + + auto parent_val = builder.CreatePtrToInt(parent, intptr_ty); + auto shr = builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 6)); + auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr); + + // intptr_t shift = (addr >> 3) & 0b111; + auto shift = builder.CreateAnd(builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7)); + auto shift_i8 = builder.CreateTruncOrBitCast(shift, i8_ty); + + // uint8_t byte_val = *meta_addr; + auto load_i8 = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align()); + + // if (((byte_val >> shift) & 1) == 1) { + auto shifted_load_i8 = builder.CreateLShr(load_i8, shift_i8); + auto masked = builder.CreateAnd(shifted_load_i8, ConstantInt::get(i8_ty, 1)); + auto is_unlogged = builder.CreateICmpEQ(masked, ConstantInt::get(i8_ty, 1)); + + // object_reference_write_slow_call((void*) src, (void*) slot, (void*) target); + MDBuilder MDB(F.getContext()); + SmallVector Weights{1, 9}; + if (S) { + if (!S->DT) { + S->DT = &GetDT(); + } + DomTreeUpdater dtu = DomTreeUpdater(S->DT, llvm::DomTreeUpdater::UpdateStrategy::Lazy); + auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights), &dtu); + builder.SetInsertPoint(mayTriggerSlowpath); + } else { + auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights)); + builder.SetInsertPoint(mayTriggerSlowpath); + } + builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), { parent }); + } else { + Function *wb_func = getOrDeclare(jl_intrinsics::queueGCRoot); + builder.CreateCall(wb_func, { parent }); + } + } + } else { + assert(false); + } + CI->eraseFromParent(); + } +} diff --git a/src/llvm-late-gc-lowering-stock.cpp b/src/llvm-late-gc-lowering-stock.cpp index 2a11487773396..d1894877cfe7c 100644 --- a/src/llvm-late-gc-lowering-stock.cpp +++ b/src/llvm-late-gc-lowering-stock.cpp @@ -7,3 +7,47 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) // Do nothing for the stock GC return target; } + +void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector &WriteBarriers, bool *CFGModified) { + auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext()); + for (auto CI : WriteBarriers) { + auto parent = CI->getArgOperand(0); + if (std::all_of(CI->op_begin() + 1, CI->op_end(), + [parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) { + CI->eraseFromParent(); + continue; + } + if (CFGModified) { + *CFGModified = true; + } + + IRBuilder<> builder(CI); + builder.SetCurrentDebugLocation(CI->getDebugLoc()); + auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED, "parent_bits"); + auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked"); + auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false); + builder.SetInsertPoint(mayTrigTerm); + mayTrigTerm->getParent()->setName("may_trigger_wb"); + Value *anyChldNotMarked = NULL; + for (unsigned i = 1; i < CI->arg_size(); i++) { + Value *child = CI->getArgOperand(i); + Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED, "child_bit"); + Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked"); + anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked; + } + assert(anyChldNotMarked); // handled by all_of test above + MDBuilder MDB(parent->getContext()); + SmallVector Weights{1, 9}; + auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false, + MDB.createBranchWeights(Weights)); + trigTerm->getParent()->setName("trigger_wb"); + builder.SetInsertPoint(trigTerm); + if (CI->getCalledOperand() == write_barrier_func) { + builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent); + } + else { + assert(false); + } + CI->eraseFromParent(); + } +} diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 7d6fba65a79e7..96a8281ae6bde 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1868,31 +1868,6 @@ Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value * return load; } -// Enable this optimization only on LLVM 4.0+ since this cause LLVM to optimize -// constant store loop to produce a `memset_pattern16` with a global variable -// that's initialized by `addrspacecast`. Such a global variable is not supported by the backend. -// This is not a problem on 4.0+ since that transformation (in loop-idiom) is disabled -// for NI pointers. -static SmallVector *FindRefinements(Value *V, State *S) -{ - if (!S) - return nullptr; - auto it = S->AllPtrNumbering.find(V); - if (it == S->AllPtrNumbering.end()) - return nullptr; - auto rit = S->Refinements.find(it->second); - return rit != S->Refinements.end() && !rit->second.empty() ? &rit->second : nullptr; -} - -static bool IsPermRooted(Value *V, State *S) -{ - if (isa(V)) - return true; - if (auto *RefinePtr = FindRefinements(V, S)) - return RefinePtr->size() == 1 && (*RefinePtr)[0] == -2; - return false; -} - static inline void UpdatePtrNumbering(Value *From, Value *To, State *S) { if (!S) @@ -1911,50 +1886,6 @@ MDNode *createMutableTBAAAccessTag(MDNode *Tag) { return MDBuilder(Tag->getContext()).createMutableTBAAAccessTag(Tag); } -void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector &WriteBarriers, bool *CFGModified) { - auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext()); - for (auto CI : WriteBarriers) { - auto parent = CI->getArgOperand(0); - if (std::all_of(CI->op_begin() + 1, CI->op_end(), - [parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) { - CI->eraseFromParent(); - continue; - } - if (CFGModified) { - *CFGModified = true; - } - - IRBuilder<> builder(CI); - builder.SetCurrentDebugLocation(CI->getDebugLoc()); - auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED, "parent_bits"); - auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked"); - auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false); - builder.SetInsertPoint(mayTrigTerm); - mayTrigTerm->getParent()->setName("may_trigger_wb"); - Value *anyChldNotMarked = NULL; - for (unsigned i = 1; i < CI->arg_size(); i++) { - Value *child = CI->getArgOperand(i); - Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED, "child_bit"); - Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked"); - anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked; - } - assert(anyChldNotMarked); // handled by all_of test above - MDBuilder MDB(parent->getContext()); - SmallVector Weights{1, 9}; - auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false, - MDB.createBranchWeights(Weights)); - trigTerm->getParent()->setName("trigger_wb"); - builder.SetInsertPoint(trigTerm); - if (CI->getCalledOperand() == write_barrier_func) { - builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent); - } - else { - assert(false); - } - CI->eraseFromParent(); - } -} - bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { auto T_int32 = Type::getInt32Ty(F.getContext()); auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext()); diff --git a/src/staticdata.c b/src/staticdata.c index c29448c491eb7..2b8d29876c418 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -4166,9 +4166,10 @@ static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, i char *sysimg; int success = !needs_permalloc; ios_seek(f, datastartpos); - if (needs_permalloc) + if (needs_permalloc) { sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); - else + jl_gc_notify_image_alloc(sysimg, len); + } else sysimg = &f->buf[f->bpos]; if (needs_permalloc) success = ios_readall(f, sysimg, len) == len; @@ -4291,6 +4292,7 @@ JL_DLLEXPORT void jl_restore_system_image(const char *fname) ios_seek_end(&f); size_t len = ios_pos(&f); char *sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); + jl_gc_notify_image_alloc(sysimg, len); ios_seek(&f, 0); if (ios_readall(&f, sysimg, len) != len) jl_errorf("Error reading system image file.");