Skip to content

Optimizations for a specific function is causing runtime misbehavior #122254

@vinnyrom

Description

@vinnyrom

Description

Optimizations for int DecodeFromString<Dialect>(Span<byte>, ReadOnlySpan<char>) is causing test failures.

Reproduction Steps

Add a C# Class Library project with the following:

using System.Runtime.CompilerServices;

namespace Repro.Library;

public static class AdvancedBase85
{
  public interface IDialect
  {
    static abstract char ZeroSentinel { get; }
    static abstract char FillSentinel { get; }
    static abstract char[] CharacterMap { get; }
    static abstract byte[] ByteCodes { get; }
  }

  public sealed class SafeForXmlDialect : IDialect
  {
    private SafeForXmlDialect() { }

    static char IDialect.ZeroSentinel => '!';
    static char IDialect.FillSentinel => '~';
    static char[] IDialect.CharacterMap => "#$%()*+,-.0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}".ToCharArray();
    static byte[] IDialect.ByteCodes =>
    [
      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
    //        !     "     #     $     %     &     '     (     )     *     +     ,     -     .     /
      0xFF, 0xFF, 0xFF, 0x00, 0x01, 0x02, 0xFF, 0xFF, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF,
    //  0     1     2     3     4     5     6     7     8     9     :     ;     <     =     >     ?
      0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0xFF, 0x16, 0xFF, 0x17,
    //  @     A     B     C     D     E     F     G     H     I     J     K     L     M     N     O
      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
    //  P     Q     R     S     T     U     V     W     X     Y     Z     [     \     ]     ^     _
      0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0xFF, 0x34, 0x35, 0x36,
    //  `     a     b     c     d     e     f     g     h     i     j     k     l     m     n     o
      0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46,
    //  p     q     r     s     t     u     v     w     x     y     z     {     |     }     ~
      0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0xFF, 0xFF,
    ];
  }

  public sealed class SafeForMailDialect : IDialect
  {
    private SafeForMailDialect() { }

    static char IDialect.ZeroSentinel => '!';
    static char IDialect.FillSentinel => '%';
    static char[] IDialect.CharacterMap => "#$&'()*+,./0123456789;<=>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_abcdefghijklmnopqrstuvwxyz{|}~".ToCharArray();
    static byte[] IDialect.ByteCodes =>
    [
      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
    //        !     "     #     $     %     &     '     (     )     *     +     ,     -     .     /
      0xFF, 0xFF, 0xFF, 0x00, 0x01, 0xFF, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xFF, 0x09, 0x0A,
    //  0     1     2     3     4     5     6     7     8     9     :     ;     <     =     >     ?
      0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0xFF, 0x15, 0x16, 0x17, 0x18, 0xFF,
    //  @     A     B     C     D     E     F     G     H     I     J     K     L     M     N     O
      0xFF, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
    // P     Q     R     S     T     U     V     W     X     Y     Z     [     \     ]     ^     _
      0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0xFF, 0x34, 0x35, 0x36,
    //  `     a     b     c     d     e     f     g     h     i     j     k     l     m     n     o
      0xFF, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45,
    //  p     q     r     s     t     u     v     w     x     y     z     {     |     }     ~
      0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0xFF,
    ];
  }

  public static MemoryStream? DecodeFromString(string? text) => DecodeFromString<SafeForXmlDialect>(text);
  public static MemoryStream? DecodeFromString<Dialect>(string? text)
    where Dialect : IDialect
  {
    if (text is null)
    {
      return null;
    }

    var textLength = text.Length;
    if (textLength == 0)
    {
      return new MemoryStream(0);
    }

    var guard = Guid.NewGuid().ToByteArray();
    var guardLength = guard.Length;
    var guardOffset = (textLength << 2) + 4;
    var data = new byte[guardOffset + guardLength];

    var guardSpan = new Span<byte>(data, guardOffset, guardLength);
    guard.CopyTo(guardSpan);

    var dataLength = DecodeFromString<Dialect>(data, text);
    if (dataLength > guardOffset)
    {
      throw new InvalidOperationException("decoder reported length overflowed end of buffer");
    }

    if (!guardSpan.SequenceEqual(guard))
    {
      throw new InvalidOperationException("end of buffer guard was overwritten");
    }

    return new MemoryStream(data, 0, dataLength, false, true);
  }

  // The issue does not reproduce when optimizations are disabled:
  // [MethodImpl(MethodImplOptions.NoOptimization)]
  private unsafe static int DecodeFromString<Dialect>(Span<byte> data, ReadOnlySpan<char> text)
    where Dialect : IDialect
  {
    fixed (byte* byteCodes = Dialect.ByteCodes)
    {
      var zeroSentinel = Dialect.ZeroSentinel;
      var fillSentinel = Dialect.FillSentinel;

      fixed (byte* pData = data)
      {
        var pCurrentValue = (uint*)pData;

        fixed (char* pText = text)
        {
          int block = 0;
          uint value = 0;

          for (char* pCurrentChar = pText, pEndChar = pText + text.Length; pCurrentChar < pEndChar; ++pCurrentChar)
          {
            var currentChar = *pCurrentChar;
            if (currentChar == zeroSentinel)
            {
              if (block > 3)
              {
                throw new InvalidOperationException("zero sentinel used in invalid block level; only supported in blocks 0-3");
              }

              if (block != 0)
              {
                *pCurrentValue++ = value;
                block = 0;
              }
              else
              {
                *pCurrentValue++ = 0;
              }
            }
            else if (currentChar == fillSentinel)
            {
              if (block != 0)
              {
                throw new InvalidOperationException("fill sentinel used in invalid block level; only support at block zero");
              }
              *pCurrentValue++ = uint.MaxValue;
            }
            else if (currentChar > 0x20 && currentChar < 0x7F)
            {
              var byteCode = byteCodes[currentChar];
              if (byteCode >= 0x55)
              {
                throw new InvalidOperationException("encoundered invalid decoded byte value");
              }

              switch (block)
              {
                case 0:
                {
                  value = byteCode;
                  block = 1;
                  break;
                }
                case 1:
                {
                  value += byteCode * 85u;
                  block = 2;
                  break;
                }
                case 2:
                {
                  value += byteCode * 7225u;
                  block = 3;
                  break;
                }
                case 3:
                {
                  value += byteCode * 614125u;
                  block = 4;
                  break;
                }
                case 4:
                {
                  value += byteCode * 52200625u;
                  *pCurrentValue++ = value;
                  block = 0;
                  break;
                }
                default:
                {
                  throw new InvalidOperationException("encountered invalid block level; block level must be 0-4");
                }
              }
            }
            else
            {
              if (currentChar != ' ' && currentChar != '\r' && currentChar != '\n' && currentChar != '\t')
              {
                throw new InvalidOperationException("encountered illegal character");
              }
            }
          }

          var bytesWritten = (int)((byte*)pCurrentValue - pData);
          switch (block)
          {
            case 2:
              if (value > 0xFF)
              {
                throw new InvalidOperationException("invalid 2-block leftover value encountered");
              }
              *(byte*)pCurrentValue = (byte)value;
              ++bytesWritten;
              break;
            case 3:
              if (value > 0xFFFF)
              {
                throw new InvalidOperationException("invalid 3-block leftover value encountered");
              }
              *(short*)pCurrentValue = (short)value;
              bytesWritten += 2;
              break;
            case 4:
              if (value > 0xFFFFFF)
              {
                throw new InvalidOperationException("invalid 4-block leftover value encountered");
              }
              *(byte*)pCurrentValue = (byte)value;
              *((byte*)pCurrentValue + 1) = (byte)(value >> 8);
              *((byte*)pCurrentValue + 2) = (byte)(value >> 16);
              bytesWritten += 3;
              break;
            default:
              if (block != 0)
              {
                throw new InvalidOperationException("invalid leftover block-level; leftovers can only be 2-4");
              }
              break;
          }

          return bytesWritten;
        }
      }
    }
  }

  public static string? EncodeToString(byte[] data) => EncodeToString<SafeForXmlDialect>(data);
  public static string? EncodeToString<Dialect>(byte[] data)
    where Dialect : IDialect
  {
    if (data is null)
    {
      return null;
    }

    int dataLength = data.Length;
    if (dataLength == 0)
    {
      return string.Empty;
    }

    var guard = Guid.NewGuid().ToByteArray().Select(b => (char)b).ToArray();
    var guardLength = guard.Length;
    var guardOffset = ((dataLength * 5) >> 2) + 4;
    var text = new char[guardOffset + guardLength];

    var guardSpan = new Span<char>(text, guardOffset, guardLength);
    guard.CopyTo(guardSpan);

    var textLength = EncodeToString<Dialect>(text, data);
    if (textLength > guardOffset)
    {
      throw new InvalidOperationException("encoder reported length overflowed end of buffer");
    }

    if (!guardSpan.SequenceEqual(guard))
    {
      throw new InvalidOperationException("end of buffer guard was overwritten");
    }

    return new string(text, 0, textLength);
  }

  private unsafe static int EncodeToString<Dialect>(Span<char> text, ReadOnlySpan<byte> data)
    where Dialect : IDialect
  {
    fixed (char* characterMap = Dialect.CharacterMap)
    {
      var zeroSentinel = Dialect.ZeroSentinel;
      var fillSentinel = Dialect.FillSentinel;

      fixed (char* pText = text)
      {
        var pCurrentChar = pText;

        fixed (byte* pData = data)
        {
          uint value = 0;
          uint temp = 0;

          var pCurrentValue = (uint*)pData;
          for (var pEndValue = pCurrentValue + (data.Length >> 2); pCurrentValue < pEndValue; ++pCurrentValue)
          {
            value = *pCurrentValue;

            if (value == 0)
            {
              *pCurrentChar++ = zeroSentinel;
              continue;
            }

            if (value == 0xFFFFFFFFUL)
            {
              *pCurrentChar++ = fillSentinel;
              continue;
            }

            temp = value / 85;
            pCurrentChar[0] = characterMap[value - temp * 85];
            if (temp == 0)
            {
              pCurrentChar[1] = zeroSentinel;
              pCurrentChar += 2;
              continue;
            }

            value = temp / 85;
            pCurrentChar[1] = characterMap[temp - value * 85];
            if (value == 0)
            {
              pCurrentChar[2] = zeroSentinel;
              pCurrentChar += 3;
              continue;
            }

            temp = value / 85;
            pCurrentChar[2] = characterMap[value - temp * 85];
            if (temp == 0)
            {
              pCurrentChar[3] = zeroSentinel;
              pCurrentChar += 4;
              continue;
            }

            value = temp / 85;
            pCurrentChar[3] = characterMap[temp - value * 85];
            temp = value / 85;
            pCurrentChar[4] = characterMap[value - temp * 85];
            pCurrentChar += 5;
          }

          pCurrentChar = EncodeLeftover(characterMap, pCurrentChar, (byte*)pCurrentValue, leftoverCount: data.Length & 3);

          return (int)(pCurrentChar - pText);
        }
      }
    }
  }

  private unsafe static char* EncodeLeftover(char* characterMap, char* pCurrentChar, byte* pLeftoverData, int leftoverCount)
  {
    uint value = 0;
    uint temp = 0;

    switch (leftoverCount)
    {
      case 3:
      {
        value = pLeftoverData[2];
        value <<= 8;
        value |= pLeftoverData[1];
        value <<= 8;
        value |= pLeftoverData[0];

        temp = value / 85;
        pCurrentChar[0] = characterMap[value - temp * 85];
        value = temp / 85;
        pCurrentChar[1] = characterMap[temp - value * 85];
        temp = value / 85;
        pCurrentChar[2] = characterMap[value - temp * 85];
        value = temp / 85;
        pCurrentChar[3] = characterMap[temp - value * 85];
        pCurrentChar += 4;
        break;
      }
      case 2:
      {
        value = pLeftoverData[1];
        value <<= 8;
        value |= pLeftoverData[0];

        temp = value / 85;
        pCurrentChar[0] = characterMap[value - temp * 85];
        value = temp / 85;
        pCurrentChar[1] = characterMap[temp - value * 85];
        temp = value / 85;
        pCurrentChar[2] = characterMap[value - temp * 85];
        value = temp / 85;
        pCurrentChar += 3;
        break;
      }
      case 1:
      {
        value = pLeftoverData[0];

        temp = value / 85;
        pCurrentChar[0] = characterMap[value - temp * 85];
        value = temp / 85;
        pCurrentChar[1] = characterMap[temp - value * 85];
        pCurrentChar += 2;
        break;
      }
      case 0:
      {
        break;
      }
      default:
      {
        throw new InvalidOperationException("invalid leftover bytes; only 1-3 are valid");
      }
    }

    return pCurrentChar;
  }
}

Add a C# NUnit Test Project with the following code and add a reference to the library project:

using System.ComponentModel;

namespace Repro.Library.Tests;

[TestFixture]
[EditorBrowsable(EditorBrowsableState.Never)]
internal sealed class AdvancedBase85GeneralTests
{
  private static void TestFixedSizeAndValues(int size, byte value)
  {
    Console.WriteLine("  Testing size {0} with value 0x{1:X2}", size, value);

    var originalBytes = new byte[size];
    originalBytes.AsSpan().Fill(value);

    var encodedBytes = AdvancedBase85.EncodeToString(originalBytes);
    var decodedBytes = AdvancedBase85.DecodeFromString(encodedBytes)!.ToArray();
    CompareBytes(originalBytes, decodedBytes);
  }

  private static void CompareBytes(byte[] bytes, byte[] newBytes)
  {
    if (!bytes.SequenceEqual(newBytes))
    {
      Assert.Fail("byte arrays differ");
    }
  }

  [Test]
  public static void SantityChecks()
  {
    const int nMaxBytes = 4903;

    for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
    {
      TestFixedSizeAndValues(nBytes, 0x00);
    }

    for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
    {
      TestFixedSizeAndValues(nBytes, 0x01);
    }

    for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
    {
      TestFixedSizeAndValues(nBytes, 0x7F);
    }

    for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
    {
      TestFixedSizeAndValues(nBytes, 0x80);
    }

    for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
    {
      TestFixedSizeAndValues(nBytes, 0xFE);
    }

    for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
    {
      TestFixedSizeAndValues(nBytes, 0xFF);
    }
  }

  [Test]
  public static void RandomChecks()
  {
    var runCount = 4096;
    Console.WriteLine("Running {0} random checks", runCount);

    var random = new Random(12345);
    for (int i = 0; i < 4096; i++)
    {
      var count = random.Next(1, 65536);
      var originalBytes = new byte[count];
      random.NextBytes(originalBytes);

      var encodedBytes = AdvancedBase85.EncodeToString(originalBytes);
      var decodedBytes = AdvancedBase85.DecodeFromString(encodedBytes)!.ToArray();
      CompareBytes(originalBytes, decodedBytes);

      if ((i + 1) % 256 == 0)
      {
        Console.WriteLine($"  {i + 1} checks completed");
      }
    }
  }

  [Test]
  public static void SparseChecks()
  {
    Console.WriteLine("Running sparse checks...");

    var random = new Random(12345);
    for (var offset = -4; offset <= 4; offset++)
    {
      var length = offset + 65536;
      var swapCount = int.Max(length >> 11, 8);

      Console.WriteLine("  Array length of {0} from offset {1}", length, offset);

      var originalBytes = new byte[length];
      for (var mode = -1; mode <= 1; mode++)
      {
        Console.WriteLine($"    Spare mode is {(mode == 0 ? "Mixed" : mode < 0 ? "Zeros" : "Ones")}");
        random.NextBytes(originalBytes);

        if (mode != 0)
        {
          for (var index = 0; index < length; index++)
          {
            byte value = originalBytes[index];
            if (value == 0 || value == byte.MaxValue)
            {
              originalBytes[index] = (byte)random.Next(1, 255);
            }
          }
        }

        var firstLength = 0;
        var lastLength = int.MaxValue;

        var loopCount = 0;
        var runCount = 0;
        for (; ; )
        {
          var text = AdvancedBase85.EncodeToString(originalBytes)!;
          if (mode != 0)
          {
            var newLength = text.Length;
            if (firstLength == 0)
            {
              firstLength = newLength;
            }
            else if (newLength > lastLength)
            {
              Assert.Fail("unexpected encoding growth");
            }
            lastLength = newLength;
          }

          var decodedBytes = AdvancedBase85.DecodeFromString(text)!.ToArray();
          CompareBytes(originalBytes, decodedBytes);

          loopCount++;
          if (loopCount % 512 == 0)
          {
            Console.WriteLine($"      {loopCount} checks completed");
          }

          if (runCount >= length)
          {
            break;
          }

          var swap = 0;
          while (swap < swapCount)
          {
            var index = random.Next(0, length);

            if (mode == 0)
            {
              originalBytes[index] = (byte)(random.NextDouble() < 0.5 ? 0x00 : 0xFF);
            }
            else if (mode < 0)
            {
              originalBytes[index] = 0x00;
            }
            else
            {
              originalBytes[index] = 0xFF;
            }

            swap++;
            runCount++;
          }
        }

        if (mode != 0)
        {
          Assert.That(lastLength, Is.LessThan(firstLength));
          Console.WriteLine($"      Encoding length reduced from {firstLength} to {lastLength}");
        }
      }
    }
  }
}

Expected behavior

All tests to pass with the Release configuration.

Actual behavior

Under the Release configuration, the tests non-deterministically fail.

Regression?

Worked in 8.0.

Known Workarounds

The issue does not reproduce if [MethodImpl(MethodImplOptions.NoOptimization)] is added to int DecodeFromString<Dialect>(Span<byte>, ReadOnlySpan<char>).

Configuration

Processor 12th Gen Intel(R) Core(TM) i7-12800H 2.40 GHz
Installed RAM 64.0 GB (63.7 GB usable)
System type 64-bit operating system, x64-based processor
Edition Windows 11 Enterprise
Version 23H2
OS build 22631.4460

Other information

No response

Metadata

Metadata

Assignees

Labels

area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions