apache · paulirwin · Aug 27, 2025 · Aug 27, 2025 · NightOwl888 · Sep 9, 2025
diff --git a/src/Lucene.Net.Tests/Util/TestUnicodeUtil.cs b/src/Lucene.Net.Tests/Util/TestUnicodeUtil.cs
@@ -3,6 +3,7 @@
 using Lucene.Net.Attributes;
 using NUnit.Framework;
 using System;
+using System.Text;
 using Assert = Lucene.Net.TestFramework.Assert;
 
 namespace Lucene.Net.Util
@@ -339,7 +340,7 @@ public void TestUTF8toUTF16Exception(byte[] invalidUtf8, bool shouldThrow)
 
             if (shouldThrow)
             {
-                Assert.Throws<FormatException>(() => UnicodeUtil.UTF8toUTF16(invalidUtf8, scratch));
+                Assert.Throws<DecoderFallbackException>(() => UnicodeUtil.UTF8toUTF16(invalidUtf8, scratch));
             }
             else
             {

diff --git a/src/Lucene.Net/Util/BytesRef.cs b/src/Lucene.Net/Util/BytesRef.cs
@@ -44,7 +44,7 @@ namespace Lucene.Net.Util
     [Serializable]
 #endif
     // LUCENENET specific: Not implementing ICloneable per Microsoft's recommendation
-    [DebuggerDisplay("{ToString()} {Utf8ToString()}")]
+    [DebuggerDisplay("{ToString()} {Utf8ToStringWithFallback()}")]
 if (bytesRef.TryUtf8ToString(out var utf8String)) 
 { 
     return utf8String; 
 } 
 else 
 { 
     return bytesRef.ToString(); 
 } 
 if (bytesRef.TryUtf8ToString(out var utf8String)) 
 { 
     return utf8String; 
 } 
 else 
 { 
     return bytesRef.ToString(); 
 } 
     public sealed class BytesRef : IComparable<BytesRef>, IComparable, IEquatable<BytesRef> // LUCENENET specific - implemented IComparable for FieldComparator, IEquatable<BytesRef>
     {
         /// <summary>

diff --git a/src/Lucene.Net/Util/UnicodeUtil.cs b/src/Lucene.Net/Util/UnicodeUtil.cs
@@ -886,7 +886,7 @@ public static string ToHexString(string s)
         /// it doesn't provide enough space to hold the worst case of each byte becoming a UTF-16 codepoint.
         /// <para/>
         /// NOTE: Full characters are read, even if this reads past the length passed (and
-        /// can result in an <see cref="FormatException"/> if invalid UTF-8 is passed).
+        /// can result in a <see cref="DecoderFallbackException"/> if invalid UTF-8 is passed).
         /// Explicit checks for valid UTF-8 are not performed.
         /// </summary>
         /// <seealso cref="UTF8toUTF16(ReadOnlySpan{byte}, CharsRef)"/>
@@ -901,7 +901,7 @@ public static void UTF8toUTF16(byte[] utf8, int offset, int length, CharsRef cha
         /// it doesn't provide enough space to hold the worst case of each byte becoming a UTF-16 codepoint.
         /// <para/>
         /// NOTE: Full characters are read, even if this reads past the length passed (and
-        /// can result in an <see cref="FormatException"/> if invalid UTF-8 is passed).
+        /// can result in a <see cref="DecoderFallbackException"/> if invalid UTF-8 is passed).
         /// Explicit checks for valid UTF-8 are not performed.
         /// </summary>
         /// <remarks>
@@ -926,15 +926,15 @@ public static void UTF8toUTF16(ReadOnlySpan<byte> utf8, CharsRef chars)
                 {
                     if (utf8.Length <= i)
                     {
-                        throw new FormatException($"Invalid UTF-8 starting at [{b:x2}] at offset {i - 1}");
+                        throw new DecoderFallbackException($"Invalid UTF-8 starting at [{b:x2}] at offset {i - 1}");
                     }
                     @out[out_offset++] = (char)(((b & 0x1f) << 6) + (utf8[i++] & 0x3f));
                 }
                 else if (b < 0xf0)
                 {
                     if (utf8.Length <= i + 1)
                     {
-                        throw new FormatException($"Invalid UTF-8 starting at [{b:x2}] at offset {i - 1}");
+                        throw new DecoderFallbackException($"Invalid UTF-8 starting at [{b:x2}] at offset {i - 1}");
                     }
                     @out[out_offset++] = (char)(((b & 0xf) << 12) + ((utf8[i] & 0x3f) << 6) + (utf8[i + 1] & 0x3f));
                     i += 2;
@@ -943,7 +943,7 @@ public static void UTF8toUTF16(ReadOnlySpan<byte> utf8, CharsRef chars)
                 {
                     if (utf8.Length <= i + 2)
                     {
-                        throw new FormatException($"Invalid UTF-8 starting at [{b:x2}] at offset {i - 1}");
+                        throw new DecoderFallbackException($"Invalid UTF-8 starting at [{b:x2}] at offset {i - 1}");
                     }
                     if (Debugging.AssertsEnabled) Debugging.Assert(b < 0xf8, "b = 0x{0:x}", b);
                     int ch = ((b & 0x7) << 18) + ((utf8[i] & 0x3f) << 12) + ((utf8[i + 1] & 0x3f) << 6) + (utf8[i + 2] & 0x3f);