Skip to content

Commit

Permalink
Added AggressiveOptimization to methods involved in measuring allo…
Browse files Browse the repository at this point in the history
…cations.

Warm up allocation measurement before taking actual measurement.
Isolated allocation measurement.
Changed some `RuntimeInformation` properties to static readonly fields.
Removed enable monitoring in Engine (GcStats handles it).
Removed `GC.Collect()` from allocation measurement.
Sleep thread to account for tiered jit in Core runtimes 3.0 to 6.0.
Updated MemoryDiagnoserTests.
Block finalizer thread during memory tests.
Disabled EventSource for integration tests.
  • Loading branch information
timcassell committed Sep 22, 2024
1 parent adf8e6d commit 1797d9c
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 97 deletions.
110 changes: 89 additions & 21 deletions src/BenchmarkDotNet/Engines/Engine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.Globalization;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Threading;
using BenchmarkDotNet.Characteristics;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Portability;
Expand Down Expand Up @@ -214,31 +215,56 @@ private ClockSpan Measure(Action<long> action, long invokeCount)

private (GcStats, ThreadingStats, double) GetExtraStats(IterationData data)
{
// we enable monitoring after main target run, for this single iteration which is executed at the end
// so even if we enable AppDomain monitoring in separate process
// it does not matter, because we have already obtained the results!
EnableMonitoring();
// Warm up the measurement functions before starting the actual measurement.
DeadCodeEliminationHelper.KeepAliveWithoutBoxing(GcStats.ReadInitial());
DeadCodeEliminationHelper.KeepAliveWithoutBoxing(GcStats.ReadFinal());

IterationSetupAction(); // we run iteration setup first, so even if it allocates, it is not included in the results

var initialThreadingStats = ThreadingStats.ReadInitial(); // this method might allocate
var exceptionsStats = new ExceptionsStats(); // allocates
exceptionsStats.StartListening(); // this method might allocate
var initialGcStats = GcStats.ReadInitial();

WorkloadAction(data.InvokeCount / data.UnrollFactor);
#if !NET7_0_OR_GREATER
if (RuntimeInformation.IsNetCore && Environment.Version.Major is >= 3 and <= 6 && RuntimeInformation.IsTieredJitEnabled)
{
// #1542
// We put the current thread to sleep so tiered jit can kick in, compile its stuff,
// and NOT allocate anything on the background thread when we are measuring allocations.
// This is only an issue on netcoreapp3.0 to net6.0. Tiered jit allocations were "fixed" in net7.0
// (maybe not completely eliminated forever, but at least reduced to a point where measurements are much more stable),
// and netcoreapp2.X uses only GetAllocatedBytesForCurrentThread which doesn't capture the tiered jit allocations.
Thread.Sleep(TimeSpan.FromMilliseconds(500));
}
#endif

exceptionsStats.Stop();
var finalGcStats = GcStats.ReadFinal();
// GC collect before measuring allocations.
ForceGcCollect();
GcStats gcStats;
using (FinalizerBlocker.MaybeStart())
{
gcStats = MeasureWithGc(data.InvokeCount / data.UnrollFactor);
}

exceptionsStats.Stop(); // this method might (de)allocate
var finalThreadingStats = ThreadingStats.ReadFinal();

IterationCleanupAction(); // we run iteration cleanup after collecting GC stats

var totalOperationsCount = data.InvokeCount * OperationsPerInvoke;
GcStats gcStats = (finalGcStats - initialGcStats).WithTotalOperations(totalOperationsCount);
ThreadingStats threadingStats = (finalThreadingStats - initialThreadingStats).WithTotalOperations(data.InvokeCount * OperationsPerInvoke);
return (gcStats.WithTotalOperations(totalOperationsCount),
(finalThreadingStats - initialThreadingStats).WithTotalOperations(totalOperationsCount),
exceptionsStats.ExceptionsCount / (double)totalOperationsCount);
}

return (gcStats, threadingStats, exceptionsStats.ExceptionsCount / (double)totalOperationsCount);
// Isolate the allocation measurement and skip tier0 jit to make sure we don't get any unexpected allocations.
[MethodImpl(MethodImplOptions.NoInlining | CodeGenHelper.AggressiveOptimizationOption)]
private GcStats MeasureWithGc(long invokeCount)
{
var initialGcStats = GcStats.ReadInitial();
WorkloadAction(invokeCount);
var finalGcStats = GcStats.ReadFinal();
return finalGcStats - initialGcStats;
}

private void RandomizeManagedHeapMemory()
Expand Down Expand Up @@ -267,7 +293,7 @@ private void GcCollect()
ForceGcCollect();
}

private static void ForceGcCollect()
internal static void ForceGcCollect()
{
GC.Collect();
GC.WaitForPendingFinalizers();
Expand All @@ -278,15 +304,6 @@ private static void ForceGcCollect()

public void WriteLine() => Host.WriteLine();

private static void EnableMonitoring()
{
if (RuntimeInformation.IsOldMono) // Monitoring is not available in Mono, see http://stackoverflow.com/questions/40234948/how-to-get-the-number-of-allocated-bytes-in-mono
return;

if (RuntimeInformation.IsFullFramework)
AppDomain.MonitoringIsEnabled = true;
}

[UsedImplicitly]
public static class Signals
{
Expand All @@ -309,5 +326,56 @@ private static readonly Dictionary<string, HostSignal> MessagesToSignals
public static bool TryGetSignal(string message, out HostSignal signal)
=> MessagesToSignals.TryGetValue(message, out signal);
}

// Very long key and value so this shouldn't be used outside of unit tests.
internal const string UnitTestBlockFinalizerEnvKey = "BENCHMARKDOTNET_UNITTEST_BLOCK_FINALIZER_FOR_MEMORYDIAGNOSER";
internal const string UnitTestBlockFinalizerEnvValue = UnitTestBlockFinalizerEnvKey + "_ACTIVE";

// To prevent finalizers interfering with allocation measurements for unit tests,
// we block the finalizer thread until we've completed the measurement.
// https://github.com/dotnet/runtime/issues/101536#issuecomment-2077647417
private readonly struct FinalizerBlocker : IDisposable
{
private readonly ManualResetEventSlim hangEvent;

private FinalizerBlocker(ManualResetEventSlim hangEvent) => this.hangEvent = hangEvent;

private sealed class Impl
{
private readonly ManualResetEventSlim hangEvent = new (false);
private readonly ManualResetEventSlim enteredFinalizerEvent = new (false);

~Impl()
{
enteredFinalizerEvent.Set();
hangEvent.Wait();
}

[MethodImpl(MethodImplOptions.NoInlining)]
internal static (ManualResetEventSlim hangEvent, ManualResetEventSlim enteredFinalizerEvent) CreateWeakly()
{
var impl = new Impl();
return (impl.hangEvent, impl.enteredFinalizerEvent);
}
}

internal static FinalizerBlocker MaybeStart()
{
if (Environment.GetEnvironmentVariable(UnitTestBlockFinalizerEnvKey) != UnitTestBlockFinalizerEnvValue)
{
return default;
}
var (hangEvent, enteredFinalizerEvent) = Impl.CreateWeakly();
do
{
GC.Collect();
// Do NOT call GC.WaitForPendingFinalizers.
}
while (!enteredFinalizerEvent.IsSet);
return new FinalizerBlocker(hangEvent);
}

public void Dispose() => hangEvent?.Set();
}
}
}
22 changes: 10 additions & 12 deletions src/BenchmarkDotNet/Engines/GcStats.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Reflection;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Columns;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Portability;
Expand Down Expand Up @@ -106,9 +107,10 @@ public int GetCollectionsCount(int generation)
return AllocatedBytes <= AllocationQuantum ? 0L : AllocatedBytes;
}

// Skip tier0 jit to make sure we don't get any unexpected allocations in this method.
[MethodImpl(CodeGenHelper.AggressiveOptimizationOption)]
public static GcStats ReadInitial()
{
// this will force GC.Collect, so we want to do this before collecting collections counts
long? allocatedBytes = GetAllocatedBytes();

return new GcStats(
Expand All @@ -119,15 +121,14 @@ public static GcStats ReadInitial()
0);
}

// Skip tier0 jit to make sure we don't get any unexpected allocations in this method.
[MethodImpl(CodeGenHelper.AggressiveOptimizationOption)]
public static GcStats ReadFinal()
{
return new GcStats(
GC.CollectionCount(0),
GC.CollectionCount(1),
GC.CollectionCount(2),

// this will force GC.Collect, so we want to do this after collecting collections counts
// to exclude this single full forced collection from results
GetAllocatedBytes(),
0);
}
Expand All @@ -136,17 +137,16 @@ public static GcStats ReadFinal()
public static GcStats FromForced(int forcedFullGarbageCollections)
=> new GcStats(forcedFullGarbageCollections, forcedFullGarbageCollections, forcedFullGarbageCollections, 0, 0);

// Skip tier0 jit to make sure we don't get any unexpected allocations in this method.
[MethodImpl(CodeGenHelper.AggressiveOptimizationOption)]
private static long? GetAllocatedBytes()
{
// we have no tests for WASM and don't want to risk introducing a new bug (https://github.com/dotnet/BenchmarkDotNet/issues/2226)
if (RuntimeInformation.IsWasm)
return null;

// "This instance Int64 property returns the number of bytes that have been allocated by a specific
// AppDomain. The number is accurate as of the last garbage collection." - CLR via C#
// so we enforce GC.Collect here just to make sure we get accurate results
GC.Collect();

// Do NOT call GC.Collect() here, as it causes finalizers to run and possibly allocate. https://github.com/dotnet/runtime/issues/101536#issuecomment-2077533242
// Instead, we call it before we start the measurement in the Engine.
#if NET6_0_OR_GREATER
return GC.GetTotalAllocatedBytes(precise: true);
#else
Expand Down Expand Up @@ -218,9 +218,7 @@ private static long CalculateAllocationQuantumSize()
break;
}

GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
Engine.ForceGcCollect();

result = GC.GetTotalMemory(false);
var tmp = new object();
Expand Down
57 changes: 34 additions & 23 deletions src/BenchmarkDotNet/Portability/RuntimeInformation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,47 +29,47 @@ internal static class RuntimeInformation
internal const string ReleaseConfigurationName = "RELEASE";
internal const string Unknown = "?";

// Many of these checks allocate and/or are expensive to compute. We store the results in static readonly fields to keep Engine non-allocating.
// Static readonly fields are used instead of properties to avoid an extra getter method call that might not be tier1 jitted.
// This class is internal, so we don't need to expose these as properties.

/// <summary>
/// returns true for both the old (implementation of .NET Framework) and new Mono (.NET 6+ flavour)
/// </summary>
public static bool IsMono { get; } =
Type.GetType("Mono.RuntimeStructs") != null; // it allocates a lot of memory, we need to check it once in order to keep Engine non-allocating!
public static readonly bool IsMono = Type.GetType("Mono.RuntimeStructs") != null;

public static bool IsOldMono { get; } = Type.GetType("Mono.Runtime") != null;
public static readonly bool IsOldMono = Type.GetType("Mono.Runtime") != null;

public static bool IsNewMono { get; } = IsMono && !IsOldMono;
public static readonly bool IsNewMono = IsMono && !IsOldMono;

public static bool IsFullFramework =>
public static readonly bool IsFullFramework =
#if NET6_0_OR_GREATER
// This could be const, but we want to avoid unreachable code warnings.
false;
#else
FrameworkDescription.StartsWith(".NET Framework", StringComparison.OrdinalIgnoreCase);
#endif

[PublicAPI]
public static bool IsNetNative => FrameworkDescription.StartsWith(".NET Native", StringComparison.OrdinalIgnoreCase);
public static readonly bool IsNetNative = FrameworkDescription.StartsWith(".NET Native", StringComparison.OrdinalIgnoreCase);

public static bool IsNetCore
=> ((Environment.Version.Major >= 5) || FrameworkDescription.StartsWith(".NET Core", StringComparison.OrdinalIgnoreCase))
&& !string.IsNullOrEmpty(typeof(object).Assembly.Location);
public static readonly bool IsNetCore =
((Environment.Version.Major >= 5) || FrameworkDescription.StartsWith(".NET Core", StringComparison.OrdinalIgnoreCase))
&& !string.IsNullOrEmpty(typeof(object).Assembly.Location);

public static bool IsNativeAOT
=> Environment.Version.Major >= 5
&& string.IsNullOrEmpty(typeof(object).Assembly.Location) // it's merged to a single .exe and .Location returns null
&& !IsWasm; // Wasm also returns "" for assembly locations
public static readonly bool IsNativeAOT =
Environment.Version.Major >= 5
&& string.IsNullOrEmpty(typeof(object).Assembly.Location) // it's merged to a single .exe and .Location returns null
&& !IsWasm; // Wasm also returns "" for assembly locations

#if NET6_0_OR_GREATER
[System.Runtime.Versioning.SupportedOSPlatformGuard("browser")]
#endif
public static bool IsWasm =>
#if NET6_0_OR_GREATER
OperatingSystem.IsBrowser();
public static readonly bool IsWasm = OperatingSystem.IsBrowser();
#else
IsOSPlatform(OSPlatform.Create("BROWSER"));
public static readonly bool IsWasm = IsOSPlatform(OSPlatform.Create("BROWSER"));
#endif

#if NETSTANDARD2_0
public static bool IsAot { get; } = IsAotMethod(); // This allocates, so we only want to call it once statically.
public static readonly bool IsAot = IsAotMethod();

private static bool IsAotMethod()
{
Expand All @@ -87,11 +87,22 @@ private static bool IsAotMethod()
return false;
}
#else
public static bool IsAot => !System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeCompiled;
public static readonly bool IsAot = !System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeCompiled;
#endif

public static bool IsRunningInContainer => string.Equals(Environment.GetEnvironmentVariable("DOTNET_RUNNING_IN_CONTAINER"), "true");

public static readonly bool IsTieredJitEnabled =
IsNetCore
&& (Environment.Version.Major < 3
// Disabled by default in netcoreapp2.X, check if it's enabled.
? Environment.GetEnvironmentVariable("COMPlus_TieredCompilation") == "1"
|| Environment.GetEnvironmentVariable("DOTNET_TieredCompilation") == "1"
|| (AppContext.TryGetSwitch("System.Runtime.TieredCompilation", out bool isEnabled) && isEnabled)
// Enabled by default in netcoreapp3.0+, check if it's disabled.
: Environment.GetEnvironmentVariable("COMPlus_TieredCompilation") != "0"
&& Environment.GetEnvironmentVariable("DOTNET_TieredCompilation") != "0"
&& (!AppContext.TryGetSwitch("System.Runtime.TieredCompilation", out isEnabled) || isEnabled));

public static readonly bool IsRunningInContainer = string.Equals(Environment.GetEnvironmentVariable("DOTNET_RUNNING_IN_CONTAINER"), "true");

internal static string GetArchitecture() => GetCurrentPlatform().ToString();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
<Content Include="xunit.runner.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</Content>
<!-- Disable EventSource to stabilize MemoryDiagnoserTests. https://github.com/dotnet/BenchmarkDotNet/pull/2562#issuecomment-2081317379 -->
<RuntimeHostConfigurationOption Include="System.Diagnostics.Tracing.EventSource.IsSupported" Value="false" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\BenchmarkDotNet.IntegrationTests.ConfigPerAssembly\BenchmarkDotNet.IntegrationTests.ConfigPerAssembly.csproj" />
Expand Down
Loading

0 comments on commit 1797d9c

Please sign in to comment.