Skip to content

Commit 5eccda3

Browse files
committed
Fix one-byte string Latin-1 to UTF-8 conversion (rubyjs#338)
Ruby is not wrong to insist that an ASCII-8BIT string with a character that has its high bit set cannot be converted to UTF-8. Interpret such strings as ISO-8859-1 a.k.a. Latin-1 and only then convert to UTF-8.
1 parent 4bae43d commit 5eccda3

File tree

2 files changed

+14
-2
lines changed

2 files changed

+14
-2
lines changed

ext/mini_racer_extension/mini_racer_extension.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,13 +339,24 @@ static VALUE str_encode_bang(VALUE v)
339339

340340
static void des_string8(void *arg, const uint8_t *s, size_t n)
341341
{
342+
rb_encoding *e;
342343
DesCtx *c;
343344
VALUE v;
344345

345346
c = arg;
346-
v = rb_enc_str_new((char *)s, n, rb_ascii8bit_encoding());
347-
if (c->transcode_latin1)
347+
if (*c->err)
348+
return;
349+
if (c->transcode_latin1) {
350+
e = rb_enc_find("ISO-8859-1"); // TODO cache?
351+
if (!e) {
352+
snprintf(c->err, sizeof(c->err), "no ISO-8859-1 encoding");
353+
return;
354+
}
355+
v = rb_enc_str_new((char *)s, n, e);
348356
v = str_encode_bang(v); // cannot fail
357+
} else {
358+
v = rb_enc_str_new((char *)s, n, rb_ascii8bit_encoding());
359+
}
349360
put(c, v);
350361
}
351362

test/mini_racer_test.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,6 +1115,7 @@ def test_function_property
11151115

11161116
def test_string_encoding
11171117
context = MiniRacer::Context.new
1118+
assert_equal "ä", context.eval("'ä'")
11181119
assert_equal "ok", context.eval("'ok'".encode("ISO-8859-1"))
11191120
assert_equal "ok", context.eval("'ok'".encode("ISO8859-1"))
11201121
assert_equal "ok", context.eval("'ok'".encode("UTF-16LE"))

0 commit comments

Comments
 (0)