Skip to content

Commit 0404251

Browse files
authored
feat: Adds Latin1 text support (#2317)
## Summary - Adds proper Latin1 encoding support, replacing CP1252 usage throughout the codebase - Adds specialized, optimized string decoding methods with safe string filtering for each encoding type - Filters invalid Unicode characters (C0/C1 control codes, non-characters) by removal rather than replacement since the UO client renders nothing for these characters - Fixes UTF-16 null terminator position handling to correctly advance by 2 bytes ## Changes TextEncoding.cs - Added SearchValues-based invalid byte/char detection for efficient filtering - Added encoding-specific GetString methods: GetStringAscii, GetStringLatin1, GetStringUtf8, GetStringBigUni, GetStringLittleUni - Each method supports a safeString parameter for filtering invalid characters - Little-endian UTF-16 uses direct memory cast for zero-copy decoding on LE systems - Invalid characters are removed (not replaced with U+FFFD) since the client renders nothing for them SpanReader.cs - Added ReadLatin1() and ReadLatin1Safe() methods - Rewrote encoding-specific read methods to use optimized TextEncoding.GetString* methods - Fixed UTF-16 null terminator handling: position now correctly advances by byteLength (2) instead of 1 SpanWriter.cs - Added WriteLatin1 and WriteLatin1Null methods ## Packet Updates - Updated all packet code to use Latin1 encoding instead of CP1252 - Affected: account packets, equipment packets, menu packets, message packets, mobile packets, player packets, secure trade packets, vendor packets, gump packets, book packets, mahjong packets ## Filtering Behavior Invalid characters filtered in safe mode: ``` ┌───────────────┬────────────────────────┐ │ Range │ Description │ ├───────────────┼────────────────────────┤ │ 0x00-0x1F │ C0 control codes │ ├───────────────┼────────────────────────┤ │ 0x7F │ DEL │ ├───────────────┼────────────────────────┤ │ 0x80-0x9F │ C1 control codes │ ├───────────────┼────────────────────────┤ │ 0xFFFE-0xFFFF │ Unicode non-characters │ └───────────────┴────────────────────────┘ ``` Note: Surrogate pairs (0xD800-0xDFFF) are not filtered because proper validation requires context checking for paired vs unpaired surrogates. The UO client renders nothing for these anyway. ## Test Plan - All 631 Server.Tests pass - Verified client rendering behavior using TestUnicodeGump command (pages 1-5) - Confirmed U+FFFD, unpaired surrogates, and non-characters all render as blank in client - Verified Latin1 characters (0xA0-0xFF) display correctly - Verified C1 control codes (0x80-0x9F) are filtered and don't display
1 parent 1e4c32c commit 0404251

27 files changed

+727
-88
lines changed

Projects/Server.Tests/Tests/Buffers/SpanReaderTests.cs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,8 @@ public void TestReadLittleUniStringWithNull()
291291
var result = reader.ReadLittleUni();
292292

293293
Assert.Equal("Hi", result);
294-
Assert.Equal(5, reader.Position);
294+
// Position 6: 4 bytes for "Hi" + 2 bytes for UTF-16 null terminator
295+
Assert.Equal(6, reader.Position);
295296
}
296297

297298
[Fact]
@@ -309,12 +310,14 @@ public void TestReadLittleUniStringFixedLength()
309310
[Fact]
310311
public void TestReadLittleUniSafe()
311312
{
312-
ReadOnlySpan<byte> buffer = [(byte)'H', 0, 0xFF, 0xD8, (byte)'i', 0];
313+
// Test with C1 control code (0x85 = NEL) which should be filtered
314+
ReadOnlySpan<byte> buffer = [(byte)'H', 0, 0x85, 0x00, (byte)'i', 0];
313315
var reader = new SpanReader(buffer);
314316

315317
var result = reader.ReadLittleUniSafe();
316318

317-
Assert.Equal("H\uFFFDi", result);
319+
// C1 control (0x0085) removed - client renders nothing for invalid chars
320+
Assert.Equal("Hi", result);
318321
Assert.Equal(6, reader.Position);
319322
}
320323

@@ -339,7 +342,8 @@ public void TestReadBigUniStringWithNull()
339342
var result = reader.ReadBigUni();
340343

341344
Assert.Equal("Hi", result);
342-
Assert.Equal(5, reader.Position);
345+
// Position 6: 4 bytes for "Hi" + 2 bytes for UTF-16 null terminator
346+
Assert.Equal(6, reader.Position);
343347
}
344348

345349
[Fact]
@@ -357,12 +361,14 @@ public void TestReadBigUniStringFixedLength()
357361
[Fact]
358362
public void TestReadBigUniSafe()
359363
{
360-
ReadOnlySpan<byte> buffer = [0, (byte)'H', 0xD8, 0xFF, 0, (byte)'i'];
364+
// Test with C1 control code (0x0085 = NEL) which should be filtered
365+
ReadOnlySpan<byte> buffer = [0, (byte)'H', 0x00, 0x85, 0, (byte)'i'];
361366
var reader = new SpanReader(buffer);
362367

363368
var result = reader.ReadBigUniSafe();
364369

365-
Assert.Equal("H\uFFFDi", result);
370+
// C1 control (0x0085) removed - client renders nothing for invalid chars
371+
Assert.Equal("Hi", result);
366372
Assert.Equal(6, reader.Position);
367373
}
368374

Projects/Server/Buffers/SpanReader.cs

Lines changed: 208 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*************************************************************************
22
* ModernUO *
3-
* Copyright 2019-2025 - ModernUO Development Team *
3+
* Copyright 2019-2026 - ModernUO Development Team *
44
* Email: [email protected] *
55
* File: SpanReader.cs *
66
* *
@@ -207,49 +207,241 @@ public string ReadString(Encoding encoding, bool safeString = false, int fixedLe
207207
}
208208

209209
[MethodImpl(MethodImplOptions.AggressiveInlining)]
210-
public string ReadLittleUniSafe(int fixedLength) => ReadString(TextEncoding.UnicodeLE, true, fixedLength);
210+
public string ReadLittleUniSafe(int fixedLength) => ReadStringLittleUni(true, fixedLength);
211211

212212
[MethodImpl(MethodImplOptions.AggressiveInlining)]
213-
public string ReadLittleUniSafe() => ReadString(TextEncoding.UnicodeLE, true);
213+
public string ReadLittleUniSafe() => ReadStringLittleUni(true);
214214

215215
[MethodImpl(MethodImplOptions.AggressiveInlining)]
216-
public string ReadLittleUni(int fixedLength) => ReadString(TextEncoding.UnicodeLE, false, fixedLength);
216+
public string ReadLittleUni(int fixedLength) => ReadStringLittleUni(false, fixedLength);
217217

218218
[MethodImpl(MethodImplOptions.AggressiveInlining)]
219-
public string ReadLittleUni() => ReadString(TextEncoding.UnicodeLE);
219+
public string ReadLittleUni() => ReadStringLittleUni(false);
220220

221221
[MethodImpl(MethodImplOptions.AggressiveInlining)]
222-
public string ReadBigUniSafe(int fixedLength) => ReadString(TextEncoding.Unicode, true, fixedLength);
222+
public string ReadBigUniSafe(int fixedLength) => ReadStringBigUni(true, fixedLength);
223223

224224
[MethodImpl(MethodImplOptions.AggressiveInlining)]
225-
public string ReadBigUniSafe() => ReadString(TextEncoding.Unicode, true);
225+
public string ReadBigUniSafe() => ReadStringBigUni(true);
226226

227227
[MethodImpl(MethodImplOptions.AggressiveInlining)]
228-
public string ReadBigUni(int fixedLength) => ReadString(TextEncoding.Unicode, false, fixedLength);
228+
public string ReadBigUni(int fixedLength) => ReadStringBigUni(false, fixedLength);
229229

230230
[MethodImpl(MethodImplOptions.AggressiveInlining)]
231-
public string ReadBigUni() => ReadString(TextEncoding.Unicode);
231+
public string ReadBigUni() => ReadStringBigUni(false);
232232

233233
[MethodImpl(MethodImplOptions.AggressiveInlining)]
234-
public string ReadUTF8Safe(int fixedLength) => ReadString(TextEncoding.UTF8, true, fixedLength);
234+
public string ReadUTF8Safe(int fixedLength) => ReadStringUtf8(true, fixedLength);
235235

236236
[MethodImpl(MethodImplOptions.AggressiveInlining)]
237-
public string ReadUTF8Safe() => ReadString(TextEncoding.UTF8, true);
237+
public string ReadUTF8Safe() => ReadStringUtf8(true);
238238

239239
[MethodImpl(MethodImplOptions.AggressiveInlining)]
240-
public string ReadUTF8() => ReadString(TextEncoding.UTF8);
240+
public string ReadUTF8(int fixedLength) => ReadStringUtf8(false, fixedLength);
241241

242242
[MethodImpl(MethodImplOptions.AggressiveInlining)]
243-
public string ReadAsciiSafe(int fixedLength) => ReadString(Encoding.ASCII, true, fixedLength);
243+
public string ReadUTF8() => ReadStringUtf8(false);
244244

245245
[MethodImpl(MethodImplOptions.AggressiveInlining)]
246-
public string ReadAsciiSafe() => ReadString(Encoding.ASCII, true);
246+
public string ReadAsciiSafe(int fixedLength) => ReadStringAscii(true, fixedLength);
247247

248248
[MethodImpl(MethodImplOptions.AggressiveInlining)]
249-
public string ReadAscii(int fixedLength) => ReadString(Encoding.ASCII, false, fixedLength);
249+
public string ReadAsciiSafe() => ReadStringAscii(true);
250250

251251
[MethodImpl(MethodImplOptions.AggressiveInlining)]
252-
public string ReadAscii() => ReadString(Encoding.ASCII);
252+
public string ReadAscii(int fixedLength) => ReadStringAscii(false, fixedLength);
253+
254+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
255+
public string ReadAscii() => ReadStringAscii(false);
256+
257+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
258+
public string ReadLatin1Safe(int fixedLength) => ReadStringLatin1(true, fixedLength);
259+
260+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
261+
public string ReadLatin1Safe() => ReadStringLatin1(true);
262+
263+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
264+
public string ReadLatin1(int fixedLength) => ReadStringLatin1(false, fixedLength);
265+
266+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
267+
public string ReadLatin1() => ReadStringLatin1(false);
268+
269+
/// <summary>
270+
/// Reads a big-endian UTF-16 string, filtering control codes and non-characters in safe mode.
271+
/// </summary>
272+
private string ReadStringBigUni(bool safeString, int fixedLength = -1)
273+
{
274+
if (fixedLength == 0)
275+
{
276+
return "";
277+
}
278+
279+
const int byteLength = 2;
280+
var isFixedLength = fixedLength > -1;
281+
282+
var remaining = Remaining;
283+
int size;
284+
if (isFixedLength)
285+
{
286+
size = fixedLength * byteLength;
287+
if (size > remaining)
288+
{
289+
throw new EndOfStreamException("Cannot read past the end of the buffer.");
290+
}
291+
}
292+
else
293+
{
294+
// Ensure even number of bytes
295+
size = remaining - (remaining & 1);
296+
}
297+
298+
var span = _buffer.Slice(Position, size);
299+
var index = span.IndexOfTerminator(byteLength);
300+
301+
if (index > -1)
302+
{
303+
span = _buffer.Slice(Position, index);
304+
}
305+
306+
Position += isFixedLength || index < 0 ? size : index + byteLength;
307+
308+
return TextEncoding.GetStringBigUni(span, safeString);
309+
}
310+
311+
/// <summary>
312+
/// Reads a little-endian UTF-16 string, filtering control codes and non-characters in safe mode.
313+
/// </summary>
314+
private string ReadStringLittleUni(bool safeString, int fixedLength = -1)
315+
{
316+
if (fixedLength == 0)
317+
{
318+
return "";
319+
}
320+
321+
const int byteLength = 2;
322+
var isFixedLength = fixedLength > -1;
323+
324+
var remaining = Remaining;
325+
int size;
326+
if (isFixedLength)
327+
{
328+
size = fixedLength * byteLength;
329+
if (size > remaining)
330+
{
331+
throw new EndOfStreamException("Cannot read past the end of the buffer.");
332+
}
333+
}
334+
else
335+
{
336+
// Ensure even number of bytes
337+
size = remaining - (remaining & 1);
338+
}
339+
340+
var span = _buffer.Slice(Position, size);
341+
var index = span.IndexOfTerminator(byteLength);
342+
343+
if (index > -1)
344+
{
345+
span = _buffer.Slice(Position, index);
346+
}
347+
348+
Position += isFixedLength || index < 0 ? size : index + byteLength;
349+
350+
return TextEncoding.GetStringLittleUni(span, safeString);
351+
}
352+
353+
/// <summary>
354+
/// Reads an ASCII string, filtering C0 control codes and DEL in safe mode.
355+
/// </summary>
356+
private string ReadStringAscii(bool safeString, int fixedLength = -1)
357+
{
358+
if (fixedLength == 0)
359+
{
360+
return "";
361+
}
362+
363+
var isFixedLength = fixedLength > -1;
364+
365+
int size = isFixedLength ? fixedLength : Remaining;
366+
if (size > Remaining)
367+
{
368+
throw new EndOfStreamException("Cannot read past the end of the buffer.");
369+
}
370+
371+
var span = _buffer.Slice(Position, size);
372+
var index = span.IndexOf((byte)0);
373+
374+
if (index > -1)
375+
{
376+
span = _buffer.Slice(Position, index);
377+
}
378+
379+
Position += isFixedLength || index < 0 ? size : index + 1;
380+
381+
return TextEncoding.GetStringAscii(span, safeString);
382+
}
383+
384+
/// <summary>
385+
/// Reads a UTF-8 string, filtering control codes and non-characters in safe mode.
386+
/// </summary>
387+
private string ReadStringUtf8(bool safeString, int fixedLength = -1)
388+
{
389+
if (fixedLength == 0)
390+
{
391+
return "";
392+
}
393+
394+
var isFixedLength = fixedLength > -1;
395+
396+
int size = isFixedLength ? fixedLength : Remaining;
397+
if (size > Remaining)
398+
{
399+
throw new EndOfStreamException("Cannot read past the end of the buffer.");
400+
}
401+
402+
var span = _buffer.Slice(Position, size);
403+
var index = span.IndexOf((byte)0);
404+
405+
if (index > -1)
406+
{
407+
span = _buffer.Slice(Position, index);
408+
}
409+
410+
Position += isFixedLength || index < 0 ? size : index + 1;
411+
412+
return TextEncoding.GetStringUtf8(span, safeString);
413+
}
414+
415+
/// <summary>
416+
/// Reads a Latin1 string, filtering C0/C1 control codes in safe mode.
417+
/// </summary>
418+
private string ReadStringLatin1(bool safeString, int fixedLength = -1)
419+
{
420+
if (fixedLength == 0)
421+
{
422+
return "";
423+
}
424+
425+
var isFixedLength = fixedLength > -1;
426+
427+
int size = isFixedLength ? fixedLength : Remaining;
428+
if (size > Remaining)
429+
{
430+
throw new EndOfStreamException("Cannot read past the end of the buffer.");
431+
}
432+
433+
var span = _buffer.Slice(Position, size);
434+
var index = span.IndexOf((byte)0);
435+
436+
if (index > -1)
437+
{
438+
span = _buffer.Slice(Position, index);
439+
}
440+
441+
Position += isFixedLength || index < 0 ? size : index + 1;
442+
443+
return TextEncoding.GetStringLatin1(span, safeString);
444+
}
253445

254446
[MethodImpl(MethodImplOptions.AggressiveInlining)]
255447
public int Seek(int offset, SeekOrigin origin)

Projects/Server/Buffers/SpanWriter.cs

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*************************************************************************
22
* ModernUO *
3-
* Copyright 2019-2025 - ModernUO Development Team *
3+
* Copyright 2019-2026 - ModernUO Development Team *
44
* Email: [email protected] *
55
* File: SpanWriter.cs *
66
* *
@@ -273,8 +273,7 @@ public void Write(ReadOnlySpan<byte> buffer)
273273
[MethodImpl(MethodImplOptions.AggressiveInlining)]
274274
public void WriteAscii(char chr) => Write((byte)chr);
275275

276-
public void WriteAscii(
277-
ref RawInterpolatedStringHandler handler)
276+
public void WriteAscii(ref RawInterpolatedStringHandler handler)
278277
{
279278
Write(handler.Text, Encoding.ASCII);
280279
handler.Clear();
@@ -289,9 +288,22 @@ public void WriteAscii(
289288
handler.Clear();
290289
}
291290

292-
public void Write(
293-
Encoding encoding,
291+
public void WriteLatin1(ref RawInterpolatedStringHandler handler)
292+
{
293+
Write(handler.Text, Encoding.Latin1);
294+
handler.Clear();
295+
}
296+
297+
public void WriteLatin1(
298+
IFormatProvider? formatProvider,
299+
[InterpolatedStringHandlerArgument("formatProvider")]
294300
ref RawInterpolatedStringHandler handler)
301+
{
302+
Write(handler.Text, Encoding.Latin1);
303+
handler.Clear();
304+
}
305+
306+
public void Write(Encoding encoding, ref RawInterpolatedStringHandler handler)
295307
{
296308
Write(handler.Text, encoding);
297309
handler.Clear();
@@ -388,6 +400,19 @@ public void WriteAsciiNull(string value)
388400
[MethodImpl(MethodImplOptions.AggressiveInlining)]
389401
public void WriteAscii(string value, int fixedLength) => Write(value, Encoding.ASCII, fixedLength);
390402

403+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
404+
public void WriteLatin1(string value) => Write(value, Encoding.Latin1);
405+
406+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
407+
public void WriteLatin1(string value, int fixedLength) => Write(value, Encoding.Latin1, fixedLength);
408+
409+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
410+
public void WriteLatin1Null(string value)
411+
{
412+
Write(value, Encoding.Latin1);
413+
Write((byte)0); // '\0'
414+
}
415+
391416
[MethodImpl(MethodImplOptions.AggressiveInlining)]
392417
public void Clear(int count)
393418
{

0 commit comments

Comments
 (0)