Skip to content

Commit 7e39669

Browse files
authored
Merge pull request #6157 from cj1128/fix-string16
Fix string16 bugs in Mac and Linux
2 parents d108b50 + 55f3a45 commit 7e39669

File tree

4 files changed

+119
-13
lines changed

4 files changed

+119
-13
lines changed

src/string.cpp

Lines changed: 94 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -635,26 +635,107 @@ gb_internal String normalize_path(gbAllocator a, String const &path, String cons
635635
#elif defined(GB_SYSTEM_UNIX) || defined(GB_SYSTEM_OSX)
636636
#include <wchar.h>
637637

638-
gb_internal int convert_multibyte_to_widechar(char const *multibyte_input, usize input_length, wchar_t *output, usize output_size) {
639-
String string = copy_string(heap_allocator(), make_string(cast(u8 const*)multibyte_input, input_length)); /* Guarantee NULL terminator */
640-
u8* input = string.text;
638+
gb_internal void utf16_encode_surrogate_pair(Rune r, u16 *r1, u16 *r2) {
639+
static Rune const _surr_self = 0x10000;
640+
static Rune const _surr1 = 0xd800;
641+
static Rune const _surr2 = 0xdc00;
642+
Rune r_ = r - _surr_self;
643+
*r1 = _surr1 + ((r_ >> 10) & 0x3ff);
644+
*r2 = _surr2 + (r_ & 0x3ff);
645+
}
646+
647+
gb_internal isize utf16_decode(u16 const *s, isize n, Rune *r) {
648+
static Rune const _surr1 = 0xd800;
649+
static Rune const _surr2 = 0xdc00;
650+
static Rune const _surr3 = 0xe000;
651+
static Rune const _surr_self = 0x10000;
652+
if (n < 1) {
653+
*r = GB_RUNE_INVALID;
654+
return 0;
655+
}
656+
u16 c = s[0];
657+
if (c < 0xd800 || c > 0xdfff) {
658+
*r = cast(Rune)c;
659+
return 1;
660+
}
661+
if (c >= 0xdc00) {
662+
*r = GB_RUNE_INVALID;
663+
return 1;
664+
}
665+
if (n < 2) {
666+
*r = GB_RUNE_INVALID;
667+
return 1;
668+
}
669+
u16 c2 = s[1];
670+
if (c2 < 0xdc00 || c2 > 0xdfff) {
671+
*r = GB_RUNE_INVALID;
672+
return 1;
673+
}
674+
*r = (((c-_surr1)<<10) | (c2 - _surr2)) + _surr_self;
675+
return 2;
676+
}
641677

642-
mbstate_t ps = { 0 };
643-
size_t result = mbsrtowcs(output, cast(const char**)&input, output_size, &ps);
678+
gb_internal int convert_multibyte_to_widechar(char const *multibyte_input, usize input_length, wchar_t *output, usize output_size) {
679+
u16 *out = cast(u16 *)output;
680+
String s = make_string(cast(u8 const *)multibyte_input, input_length);
681+
isize i = 0;
682+
isize output_len = 0;
683+
while (i < s.len) {
684+
Rune r = 0;
685+
isize width = utf8_decode(s.text + i, s.len - i, &r);
686+
if (r == GB_RUNE_INVALID) {
687+
return -1;
688+
}
689+
i += width;
690+
if (r < 0x10000) {
691+
if (out) {
692+
if (cast(usize)output_len+1 > output_size) {
693+
return -1;
694+
}
695+
out[output_len] = cast(u16)r;
696+
}
697+
output_len += 1;
698+
} else {
699+
if (out) {
700+
if (cast(usize)output_len+2 > output_size) {
701+
return -1;
702+
}
703+
u16 r1, r2;
704+
utf16_encode_surrogate_pair(r, &r1, &r2);
705+
out[output_len+0] = r1;
706+
out[output_len+1] = r2;
707+
}
708+
output_len += 2;
709+
}
710+
}
644711

645-
gb_free(heap_allocator(), string.text);
646-
return (result == (size_t)-1) ? -1 : (int)result;
712+
return cast(int)output_len;
647713
}
648714

649715
gb_internal int convert_widechar_to_multibyte(wchar_t const *widechar_input, usize input_length, char* output, usize output_size) {
650-
String string = copy_string(heap_allocator(), make_string(cast(u8 const*)widechar_input, input_length)); /* Guarantee NULL terminator */
651-
u8* input = string.text;
716+
u16 const *in = cast(u16 const *)widechar_input;
717+
isize i = 0;
718+
isize output_len = 0;
719+
while (i < input_length) {
720+
Rune r;
721+
isize width = utf16_decode(in + i, input_length - i, &r);
722+
if (r == GB_RUNE_INVALID) {
723+
return -1;
724+
}
725+
i += width;
652726

653-
mbstate_t ps = { 0 };
654-
size_t result = wcsrtombs(output, cast(const wchar_t**)&input, output_size, &ps);
727+
u8 buf[4];
728+
isize char_len = gb_utf8_encode_rune(buf, r);
655729

656-
gb_free(heap_allocator(), string.text);
657-
return (result == (size_t)-1) ? -1 : (int)result;
730+
if(output) {
731+
if (cast(usize)output_len+cast(usize)char_len > output_size) {
732+
return -1;
733+
}
734+
gb_memmove(output + output_len, buf, char_len);
735+
}
736+
output_len += char_len;
737+
}
738+
return cast(int)output_len;
658739
}
659740
#else
660741
#error Implement system

tests/issues/run.bat

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ set COMMON=-define:ODIN_TEST_FANCY=false -file -vet -strict-style -ignore-unused
2727
..\..\..\odin build ..\test_issue_5265.odin %COMMON% || exit /b
2828
..\..\..\odin test ..\test_issue_5699.odin %COMMON% || exit /b
2929
..\..\..\odin test ..\test_issue_6068.odin %COMMON% || exit /b
30+
..\..\..\odin test ..\test_issue_6101.odin %COMMON% || exit /b
3031

3132
@echo off
3233

tests/issues/run.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ $ODIN build ../test_issue_5097-2.odin $COMMON
3434
$ODIN build ../test_issue_5265.odin $COMMON
3535
$ODIN test ../test_issue_5699.odin $COMMON
3636
$ODIN test ../test_issue_6068.odin $COMMON
37+
$ODIN test ../test_issue_6101.odin $COMMON
3738

3839
set +x
3940

tests/issues/test_issue_6101.odin

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// Tests issue #6101 https://github.com/odin-lang/Odin/issues/6101
2+
package test_issues
3+
4+
import "core:testing"
5+
6+
@(test)
7+
test_issue_6101_bmp :: proc(t: ^testing.T) {
8+
s := string16("\u732b")
9+
testing.expect_value(t, len(s), 1)
10+
11+
u := transmute([]u16)s
12+
testing.expect_value(t, u[0], 0x732b)
13+
}
14+
15+
@(test)
16+
test_issue_6101_non_bmp :: proc(t: ^testing.T) {
17+
s := string16("\U0001F63A")
18+
testing.expect_value(t, len(s), 2)
19+
20+
u := transmute([]u16)s
21+
testing.expect_value(t, u[0], 0xD83D)
22+
testing.expect_value(t, u[1], 0xDE3A)
23+
}

0 commit comments

Comments
 (0)