From be1f1d9c55b33622d3c48cd85bd1917db258073a Mon Sep 17 00:00:00 2001 From: Mads Marquart Date: Fri, 16 Jun 2023 17:17:31 +0300 Subject: [PATCH 1/3] Add partial support for optimized objc_retain/objc_release on Aarch64 --- crates/objc2/Cargo.toml | 12 ++ crates/objc2/src/rc/allocated.rs | 6 +- crates/objc2/src/rc/id.rs | 5 +- crates/objc2/src/runtime/mod.rs | 2 + .../objc2/src/runtime/retain_release_fast.rs | 104 ++++++++++++++++++ 5 files changed, 124 insertions(+), 5 deletions(-) create mode 100644 crates/objc2/src/runtime/retain_release_fast.rs diff --git a/crates/objc2/Cargo.toml b/crates/objc2/Cargo.toml index 860a3ebd6..966adac63 100644 --- a/crates/objc2/Cargo.toml +++ b/crates/objc2/Cargo.toml @@ -65,6 +65,18 @@ unstable-c-unwind = [] # For better documentation on docs.rs unstable-docsrs = [] +# Enable new features available on ARM64 on: +# - macOS 13.0 +# - iOS 16.0 +# - tvOS 16.0 +# - watchOS 9.0 +# +# See https://developer.apple.com/videos/play/wwdc2022/110363/ for an overview +# of the features. +# +# Currently untested. +unstable-apple-new = ["apple"] + # Runtime selection. See `objc-sys` for details. apple = ["objc-sys/apple"] gnustep-1-7 = ["objc-sys/gnustep-1-7"] diff --git a/crates/objc2/src/rc/allocated.rs b/crates/objc2/src/rc/allocated.rs index 8ef5e0229..a992ee16a 100644 --- a/crates/objc2/src/rc/allocated.rs +++ b/crates/objc2/src/rc/allocated.rs @@ -3,7 +3,7 @@ use core::marker::PhantomData; use core::mem::{self, ManuallyDrop}; use core::ptr::NonNull; -use crate::ffi; +use crate::runtime::objc_release_fast; use crate::Message; /// A marker type that can be used to indicate that the object has been @@ -77,8 +77,8 @@ impl Drop for Allocated { // destructors are written to take into account that the object may // not have been initialized. // - // Rest is same as `Id`. - unsafe { ffi::objc_release(self.ptr.as_ptr().cast()) }; + // Rest is same as `Id`'s `Drop`. + unsafe { objc_release_fast(self.ptr.as_ptr().cast()) }; } } diff --git a/crates/objc2/src/rc/id.rs b/crates/objc2/src/rc/id.rs index d4c533554..976ea8e38 100644 --- a/crates/objc2/src/rc/id.rs +++ b/crates/objc2/src/rc/id.rs @@ -7,6 +7,7 @@ use core::ptr::{self, NonNull}; use super::AutoreleasePool; use crate::mutability::{IsIdCloneable, IsMutable}; +use crate::runtime::{objc_release_fast, objc_retain_fast}; use crate::{ffi, ClassType, Message}; /// A reference counted pointer type for Objective-C objects. @@ -321,7 +322,7 @@ impl Id { #[inline] pub unsafe fn retain(ptr: *mut T) -> Option> { // SAFETY: The caller upholds that the pointer is valid - let res: *mut T = unsafe { ffi::objc_retain(ptr.cast()) }.cast(); + let res: *mut T = unsafe { objc_retain_fast(ptr.cast()) }.cast(); debug_assert_eq!(res, ptr, "objc_retain did not return the same pointer"); // SAFETY: We just retained the object, so it has +1 retain count unsafe { Self::new(res) } @@ -650,7 +651,7 @@ impl Drop for Id { // SAFETY: The `ptr` is guaranteed to be valid and have at least one // retain count. - unsafe { ffi::objc_release(self.ptr.as_ptr().cast()) }; + unsafe { objc_release_fast(self.ptr.as_ptr().cast()) }; } } diff --git a/crates/objc2/src/runtime/mod.rs b/crates/objc2/src/runtime/mod.rs index 5dfcfcf18..a19be1891 100644 --- a/crates/objc2/src/runtime/mod.rs +++ b/crates/objc2/src/runtime/mod.rs @@ -38,8 +38,10 @@ mod nsobject; mod nsproxy; mod nszone; mod protocol_object; +mod retain_release_fast; pub(crate) use self::method_encoding_iter::{EncodingParseError, MethodEncodingIter}; +pub(crate) use self::retain_release_fast::{objc_release_fast, objc_retain_fast}; use crate::encode::__unstable::{EncodeArguments, EncodeConvertReturn, EncodeReturn}; use crate::encode::{Encode, Encoding, OptionEncode, RefEncode}; use crate::verify::{verify_method_signature, Inner}; diff --git a/crates/objc2/src/runtime/retain_release_fast.rs b/crates/objc2/src/runtime/retain_release_fast.rs new file mode 100644 index 000000000..68f23abe0 --- /dev/null +++ b/crates/objc2/src/runtime/retain_release_fast.rs @@ -0,0 +1,104 @@ +//! Optimized versions of `objc_retain` and `objc_release`. +//! +//! On macOS 13.0 / iOS 16.0 / tvOS 16.0 / watchOS 9.0, on ARM64, optimized +//! versions of these two functions that use a different calling convention +//! than the usual C calling convention, are available. +//! +//! Specifically, the expected input register is changed. The output register +//! is unchanged. +//! +//! As an example, if the object is stored in the `x19` register and we need +//! to release it, we usually end up emitting an extra `mov` to get the object +//! into the `x0` register first, as expected by the C calling convention: +//! +//! ```asm +//! mov x0, x19 +//! bl _objc_release +//! ``` +//! +//! With this optimization though, since the expected register is encoded in +//! the name of the function instead, we can avoid the move altogether. +//! +//! ```asm +//! bl _objc_release_x19 +//! ``` +//! +//! +//! +//! Safety of our two uses of the `asm!` macro: +//! +//! 1. We use the register class `reg`, with the modifier `x`, which on +//! Aarch64 is defined as `x[0-30]`, see [this][asm-reg-cls]. +//! +//! The functions are only available in the variants `x[0-15]` and +//! `x[19-28]` though, see [this][objc4-source], so if the register +//! allocator ends up using `x16`, `x17`, `x18`, `x29` or `x30`, we will +//! emit a call to e.g. `objc_retain_x29`, which will fail at link time. +//! +//! Before this option can be stable, we need a way to prevent that! +//! +//! 2. We use the `clobber_abi("C")` since we're effectively calling a C +//! C function. +//! +//! [asm-reg-cls]: https://doc.rust-lang.org/nightly/reference/inline-assembly.html#register-operands +//! [objc4-source]: https://github.com/apple-oss-distributions/objc4/blob/objc4-866.9/runtime/objc-abi.h#L442-L498 +use crate::ffi; + +/// A potentially faster version of `ffi::objc_retain`. +/// +/// +/// # Safety +/// +/// Same as `ffi::objc_retain`. +#[inline] +pub(crate) unsafe fn objc_retain_fast(obj: *mut ffi::objc_object) -> *mut ffi::objc_object { + #[cfg(all(feature = "unstable-apple-new", target_arch = "aarch64"))] + // SAFETY: See the file header. + // + // As per the ARM64 calling convention, the return value is put in `x0`. + // + // That the function itself is safe to call is upheld by the caller. + unsafe { + let result; + core::arch::asm!( + "bl _objc_retain_{obj:x}", + obj = in(reg) obj, + lateout("x0") result, + clobber_abi("C"), + ); + result + } + + #[cfg(not(all(feature = "unstable-apple-new", target_arch = "aarch64")))] + // SAFETY: Upheld by caller. + unsafe { + ffi::objc_retain(obj) + } +} + +/// A potentially faster version of `ffi::objc_release`. +/// +/// +/// # Safety +/// +/// Same as `ffi::objc_release`. +#[inline] +pub(crate) unsafe fn objc_release_fast(obj: *mut ffi::objc_object) { + #[cfg(all(feature = "unstable-apple-new", target_arch = "aarch64"))] + // SAFETY: See the file header. + // + // That the function itself is safe to call is upheld by the caller. + unsafe { + core::arch::asm!( + "bl _objc_release_{obj:x}", + obj = in(reg) obj, + clobber_abi("C"), + ) + } + + #[cfg(not(all(feature = "unstable-apple-new", target_arch = "aarch64")))] + // SAFETY: Upheld by caller. + unsafe { + ffi::objc_release(obj) + } +} From cb69e2ae5e917f1e4c6de5eec1f0c3fae524db04 Mon Sep 17 00:00:00 2001 From: Mads Marquart Date: Fri, 16 Jun 2023 17:02:33 +0300 Subject: [PATCH 2/3] Optimize objc_retainAutoreleasedReturnValue on Aarch64 --- crates/objc2/src/rc/id.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crates/objc2/src/rc/id.rs b/crates/objc2/src/rc/id.rs index 976ea8e38..9c98db030 100644 --- a/crates/objc2/src/rc/id.rs +++ b/crates/objc2/src/rc/id.rs @@ -405,8 +405,14 @@ impl Id { }; // Supported since macOS 10.10. - #[cfg(target_arch = "aarch64")] + // + // On macOS 13.0 / iOS 16.0 / tvOS 16.0 / watchOS 9.0, the runtime + // instead checks the return pointer address, so we no longer need + // to emit these extra instructions, see this video from WWDC22: + // https://developer.apple.com/videos/play/wwdc2022/110363/ + #[cfg(all(target_arch = "aarch64", not(feature = "unstable-apple-new")))] unsafe { + // Same as `mov x29, x29` core::arch::asm!("mov fp, fp", options(nomem, preserves_flags, nostack)) }; From 194727809eb2adf1cb661ed6dccdbff7bf3cf3fd Mon Sep 17 00:00:00 2001 From: Mads Marquart Date: Fri, 28 Jul 2023 23:35:37 +0200 Subject: [PATCH 3/3] More precisely note status of `unstable-apple-new` --- crates/objc2/Cargo.toml | 6 ++++-- crates/objc2/src/runtime/retain_release_fast.rs | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/crates/objc2/Cargo.toml b/crates/objc2/Cargo.toml index 966adac63..c8e9b3c05 100644 --- a/crates/objc2/Cargo.toml +++ b/crates/objc2/Cargo.toml @@ -65,7 +65,7 @@ unstable-c-unwind = [] # For better documentation on docs.rs unstable-docsrs = [] -# Enable new features available on ARM64 on: +# Enable some new features available on ARM64 on: # - macOS 13.0 # - iOS 16.0 # - tvOS 16.0 @@ -74,7 +74,9 @@ unstable-docsrs = [] # See https://developer.apple.com/videos/play/wwdc2022/110363/ for an overview # of the features. # -# Currently untested. +# Currently untested, might be unsound or lead to confusing compiler errors. +# +# Additionally, the message sending improvements is not yet implemented. unstable-apple-new = ["apple"] # Runtime selection. See `objc-sys` for details. diff --git a/crates/objc2/src/runtime/retain_release_fast.rs b/crates/objc2/src/runtime/retain_release_fast.rs index 68f23abe0..662db01ca 100644 --- a/crates/objc2/src/runtime/retain_release_fast.rs +++ b/crates/objc2/src/runtime/retain_release_fast.rs @@ -35,7 +35,7 @@ //! allocator ends up using `x16`, `x17`, `x18`, `x29` or `x30`, we will //! emit a call to e.g. `objc_retain_x29`, which will fail at link time. //! -//! Before this option can be stable, we need a way to prevent that! +//! TODO: Before this option can be stable, we need a way to prevent that! //! //! 2. We use the `clobber_abi("C")` since we're effectively calling a C //! C function.