diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index 41a4d2fac..9ced26ff1 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -226,15 +226,12 @@ jobs: elixir_version: "1.14" rebar3_version: "3.23.0" -# TODO: enable master again -# master will not work until we don't adapt to atom table changes -# # master/main version of OTP/Elixir -# - os: "ubuntu-24.04" -# cc: "cc" -# cxx: "c++" -# otp: "master" -# elixir_version: "main" -# rebar3_version: "3.24.0" + - os: "ubuntu-24.04" + cc: "cc" + cxx: "c++" + otp: "master" + elixir_version: "main" + rebar3_version: "3.24.0" # Additional default compiler builds - os: "ubuntu-20.04" diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c0a46069..f85d59f12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `supervisor:terminate_child/2`, `supervisor:restart_child/2` and `supervisor:delete_child/2` - Added `esp:partition_read/3`, and documentation for `esp:partition_erase_range/2/3` and `esp:partition_write/3` - Added support for list insertion in 'ets:insert/2'. +- Support to OTP-28 ### Fixed - ESP32: improved sntp sync speed from a cold boot. diff --git a/doc/release-notes.md.in b/doc/release-notes.md.in index 1de96f072..6b9c71a4d 100644 --- a/doc/release-notes.md.in +++ b/doc/release-notes.md.in @@ -33,6 +33,7 @@ AtomVM will run BEAM files that have been compiled using the following Erlang an | ✅ OTP 24 | ✅ 1.14 | | ✅ OTP 25 | ✅ 1.14 | | ✅ OTP 26 | ✅ 1.15 | +| ✅ OTP 28 | ✅ 1.17 | ```{note} Versions of Elixir that are compatible with a particular OTP version may work. This table reflects the versions that are tested. diff --git a/src/libAtomVM/atom_table.c b/src/libAtomVM/atom_table.c index ed281dcb3..af474aa3b 100644 --- a/src/libAtomVM/atom_table.c +++ b/src/libAtomVM/atom_table.c @@ -445,9 +445,29 @@ long atom_table_ensure_atom(struct AtomTable *table, AtomString string, enum Ato return new_index; } -int atom_table_ensure_atoms( - struct AtomTable *table, const void *atoms, int count, int *translate_table) +static inline int read_encoded_len(const uint8_t **len_bytes) { + uint8_t byte0 = (*len_bytes)[0]; + + if ((byte0 & 0x8) == 0) { + (*len_bytes)++; + return byte0 >> 4; + + } else if ((byte0 & 0x10) == 0) { + uint8_t byte1 = (*len_bytes)[1]; + (*len_bytes) += 2; + return ((byte0 >> 5) << 8) | byte1; + + } else { + return -1; + } +} + +int atom_table_ensure_atoms(struct AtomTable *table, const void *atoms, int count, + int *translate_table, enum EnsureAtomsOpt opt) +{ + bool is_long_format = (opt & EnsureLongEncoding) != 0; + SMP_WRLOCK(table); int new_atoms_count = 0; @@ -455,16 +475,41 @@ int atom_table_ensure_atoms( const uint8_t *current_atom = atoms; for (int i = 0; i < count; i++) { - struct HNode *node = get_node(table, current_atom); + struct HNode *node; + if (is_long_format) { + int atom_len = read_encoded_len(¤t_atom); + if (UNLIKELY(atom_len < 0)) { + fprintf(stderr, "Found invalid atom len."); + SMP_UNLOCK(table); + return ATOM_TABLE_INVALID_LEN; + } else if (UNLIKELY(atom_len > 255)) { + fprintf(stderr, + "Unsupported atom length %i bytes.\n" + "Unlike OTP >= 28, AtomVM supports a maximum of 255 bytes" + "regardeless the number of codepoints.\n" + "If you are seeing this error please open an issue on GitHub:\n" + "https://github.com/atomvm/AtomVM/issues\n", + atom_len); + SMP_UNLOCK(table); + return ATOM_TABLE_INVALID_LEN; + } + char tmp_old_fmt[256]; + tmp_old_fmt[0] = atom_len; + memcpy(tmp_old_fmt + 1, current_atom, atom_len); + node = get_node(table, tmp_old_fmt); + current_atom += atom_len; + } else { + node = get_node(table, current_atom); + uint8_t atom_len = current_atom[0]; + current_atom += 1 + atom_len; + } + if (node) { translate_table[i] = node->index; } else { new_atoms_count++; translate_table[i] = ATOM_TABLE_NOT_FOUND; } - - uint8_t atom_len = current_atom[0]; - current_atom += 1 + atom_len; } maybe_rehash(table, new_atoms_count); @@ -473,6 +518,19 @@ int atom_table_ensure_atoms( int remaining_atoms = new_atoms_count; struct HNodeGroup *node_group = table->last_node_group; for (int i = 0; i < count; i++) { + + const uint8_t *to_be_copied = NULL; + const uint8_t *next_atom = current_atom; + uint8_t atom_len; + if (is_long_format) { + atom_len = read_encoded_len(&next_atom); + to_be_copied = next_atom; + next_atom += atom_len; + } else { + atom_len = current_atom[0]; + next_atom += 1 + atom_len; + } + if (translate_table[i] == ATOM_TABLE_NOT_FOUND) { if (!table->last_node_group_avail) { node_group = new_node_group(table, remaining_atoms); @@ -482,7 +540,19 @@ int atom_table_ensure_atoms( } } - unsigned long hash = sdbm_hash(current_atom, atom_string_len(current_atom)); + if (is_long_format) { + uint8_t *atom_copy = malloc(atom_len + 1); + if (IS_NULL_PTR(atom_copy)) { + // we are not going to remove atoms that have already been added up to this one + SMP_UNLOCK(table); + return ATOM_TABLE_ALLOC_FAIL; + } + atom_copy[0] = atom_len; + memcpy(atom_copy + 1, to_be_copied, atom_len); + current_atom = atom_copy; + } + + unsigned long hash = sdbm_hash(current_atom, atom_len); unsigned long bucket_index = hash % table->capacity; translate_table[i] = insert_node(table, node_group, bucket_index, current_atom); @@ -491,8 +561,7 @@ int atom_table_ensure_atoms( break; } } - uint8_t atom_len = current_atom[0]; - current_atom += 1 + atom_len; + current_atom = next_atom; } SMP_UNLOCK(table); diff --git a/src/libAtomVM/atom_table.h b/src/libAtomVM/atom_table.h index c06a3b535..e78423cac 100644 --- a/src/libAtomVM/atom_table.h +++ b/src/libAtomVM/atom_table.h @@ -31,9 +31,16 @@ extern "C" { #define ATOM_TABLE_NOT_FOUND -1 #define ATOM_TABLE_ALLOC_FAIL -2 +#define ATOM_TABLE_INVALID_LEN -3 struct AtomTable; +enum EnsureAtomsOpt +{ + EnsureAtomsNoOpts = 0, + EnsureLongEncoding = 1 +}; + enum AtomTableCopyOpt { AtomTableNoOpts = 0, @@ -56,8 +63,8 @@ AtomString atom_table_get_atom_string(struct AtomTable *table, long index); long atom_table_get_index(struct AtomTable *table, AtomString string); -int atom_table_ensure_atoms( - struct AtomTable *table, const void *atoms, int count, int *translate_table); +int atom_table_ensure_atoms(struct AtomTable *table, const void *atoms, int count, + int *translate_table, enum EnsureAtomsOpt opts); int atom_table_cmp_using_atom_index( struct AtomTable *table, int t_atom_index, int other_atom_index); diff --git a/src/libAtomVM/bif.c b/src/libAtomVM/bif.c index 5888d645f..ba56c7a79 100644 --- a/src/libAtomVM/bif.c +++ b/src/libAtomVM/bif.c @@ -24,11 +24,14 @@ #include #include "atom.h" +#include "bitstring.h" #include "defaultatoms.h" #include "dictionary.h" +#include "interop.h" #include "overflow_helpers.h" #include "term.h" #include "trace.h" +#include "unicode.h" #include "utils.h" //Ignore warning caused by gperf generated code @@ -1512,3 +1515,176 @@ term bif_erlang_size_1(Context *ctx, uint32_t fail_label, int live, term arg1) RAISE_ERROR_BIF(fail_label, BADARG_ATOM); } + +static term list_to_atom(Context *ctx, term a_list, bool create_new, term *error_reason); + +term bif_erlang_list_to_atom_1(Context *ctx, uint32_t fail_label, int live, term arg1) +{ + UNUSED(live); + + term error_reason; + term result = list_to_atom(ctx, arg1, true, &error_reason); + if (UNLIKELY(term_is_invalid_term(result))) { + RAISE_ERROR_BIF(fail_label, error_reason); + } + return result; +} + +term bif_erlang_list_to_existing_atom_1(Context *ctx, uint32_t fail_label, int live, term arg1) +{ + UNUSED(live); + + term error_reason; + term result = list_to_atom(ctx, arg1, false, &error_reason); + if (UNLIKELY(term_is_invalid_term(result))) { + RAISE_ERROR_BIF(fail_label, error_reason); + } + return result; +} + +static term list_to_atom(Context *ctx, term a_list, bool create_new, term *error_reason) +{ + if (UNLIKELY(!term_is_list(a_list))) { + *error_reason = BADARG_ATOM; + return term_invalid_term(); + } + + int ok; + char *atom_string = interop_list_to_utf8_string(a_list, &ok); + if (UNLIKELY(!ok)) { + *error_reason = OUT_OF_MEMORY_ATOM; + return term_invalid_term(); + } + int atom_string_len = strlen(atom_string); + if (UNLIKELY(atom_string_len > 255)) { + free(atom_string); + *error_reason = SYSTEM_LIMIT_ATOM; + return term_invalid_term(); + } + + AtomString atom = malloc(atom_string_len + 1); + if (IS_NULL_PTR(atom)) { + free(atom_string); + *error_reason = OUT_OF_MEMORY_ATOM; + return term_invalid_term(); + } + ((uint8_t *) atom)[0] = atom_string_len; + memcpy(((char *) atom) + 1, atom_string, atom_string_len); + free(atom_string); + + enum AtomTableCopyOpt atom_opts = AtomTableCopyAtom; + if (!create_new) { + atom_opts |= AtomTableAlreadyExisting; + } + long global_atom_index = atom_table_ensure_atom(ctx->global->atom_table, atom, atom_opts); + free((void *) atom); + if (UNLIKELY(global_atom_index == ATOM_TABLE_NOT_FOUND)) { + *error_reason = BADARG_ATOM; + return term_invalid_term(); + } else if (UNLIKELY(global_atom_index == ATOM_TABLE_ALLOC_FAIL)) { + *error_reason = OUT_OF_MEMORY_ATOM; + return term_invalid_term(); + } + return term_from_atom_index(global_atom_index); +} + +term bif_erlang_binary_to_atom_2(Context *ctx, uint32_t fail_label, int live, term arg1, term arg2) +{ + UNUSED(live); + + term error_reason; + term result = binary_to_atom(ctx, arg1, arg2, true, &error_reason); + if (UNLIKELY(term_is_invalid_term(result))) { + RAISE_ERROR_BIF(fail_label, error_reason); + } + return result; +} + +term bif_erlang_binary_to_existing_atom_2(Context *ctx, uint32_t fail_label, int live, term arg1, term arg2) +{ + UNUSED(live); + + term error_reason; + term result = binary_to_atom(ctx, arg1, arg2, false, &error_reason); + if (UNLIKELY(term_is_invalid_term(result))) { + RAISE_ERROR_BIF(fail_label, error_reason); + } + return result; +} + +term binary_to_atom(Context *ctx, term a_binary, term encoding, bool create_new, term *error_reason) +{ + if (UNLIKELY(!term_is_binary(a_binary))) { + *error_reason = BADARG_ATOM; + return term_invalid_term(); + } + + const char *atom_string = term_binary_data(a_binary); + size_t atom_string_len = term_binary_size(a_binary); + if (UNLIKELY(atom_string_len > 255)) { + *error_reason = SYSTEM_LIMIT_ATOM; + return term_invalid_term(); + } + + bool encode_latin1_to_utf8 = false; + if (UNLIKELY((encoding == LATIN1_ATOM) + && !unicode_buf_is_ascii((const uint8_t *) atom_string, atom_string_len))) { + encode_latin1_to_utf8 = true; + } else if (UNLIKELY((encoding != LATIN1_ATOM) && (encoding != UNICODE_ATOM) + && (encoding != UTF8_ATOM))) { + *error_reason = BADARG_ATOM; + return term_invalid_term(); + } + + AtomString atom; + if (LIKELY(!encode_latin1_to_utf8)) { + size_t i = 0; + while (i < atom_string_len) { + uint32_t codepoint; + size_t codepoint_size; + if (UNLIKELY(bitstring_utf8_decode( + (uint8_t *) atom_string + i, atom_string_len, &codepoint, &codepoint_size)) + != UnicodeTransformDecodeSuccess) { + *error_reason = BADARG_ATOM; + return term_invalid_term(); + } + i += codepoint_size; + } + + atom = malloc(atom_string_len + 1); + ((uint8_t *) atom)[0] = atom_string_len; + memcpy(((char *) atom) + 1, atom_string, atom_string_len); + } else { + // * 2 is the worst case size + size_t buf_len = atom_string_len * 2; + atom = malloc(buf_len + 1); + uint8_t *atom_data = ((uint8_t *) atom) + 1; + size_t out_pos = 0; + for (size_t i = 0; i < atom_string_len; i++) { + size_t out_size; + bitstring_utf8_encode(((uint8_t) atom_string[i]), &atom_data[out_pos], &out_size); + out_pos += out_size; + } + if (out_pos > 255) { + free((void *) atom); + *error_reason = SYSTEM_LIMIT_ATOM; + return term_invalid_term(); + } + ((uint8_t *) atom)[0] = out_pos; + } + + enum AtomTableCopyOpt atom_opts = AtomTableCopyAtom; + if (!create_new) { + atom_opts |= AtomTableAlreadyExisting; + } + long global_atom_index = atom_table_ensure_atom(ctx->global->atom_table, atom, atom_opts); + free((void *) atom); + if (UNLIKELY(global_atom_index == ATOM_TABLE_NOT_FOUND)) { + *error_reason = BADARG_ATOM; + return term_invalid_term(); + } else if (UNLIKELY(global_atom_index == ATOM_TABLE_ALLOC_FAIL)) { + *error_reason = OUT_OF_MEMORY_ATOM; + return term_invalid_term(); + } + return term_from_atom_index(global_atom_index); +} diff --git a/src/libAtomVM/bif.h b/src/libAtomVM/bif.h index 5d257588f..d7d20468c 100644 --- a/src/libAtomVM/bif.h +++ b/src/libAtomVM/bif.h @@ -112,6 +112,14 @@ term bif_erlang_max_2(Context *ctx, uint32_t fail_label, term arg1, term arg2); term bif_erlang_size_1(Context *ctx, uint32_t fail_label, int live, term arg1); +term bif_erlang_list_to_atom_1(Context *ctx, uint32_t fail_label, int live, term arg1); +term bif_erlang_list_to_existing_atom_1(Context *ctx, uint32_t fail_label, int live, term arg1); +term bif_erlang_binary_to_atom_2(Context *ctx, uint32_t fail_label, int live, term arg1, term arg2); +term bif_erlang_binary_to_existing_atom_2(Context *ctx, uint32_t fail_label, int live, term arg1, term arg2); + +// helpers: +term binary_to_atom(Context *ctx, term a_binary, term encoding, bool create_new, term *error_reason); + #ifdef __cplusplus } #endif diff --git a/src/libAtomVM/bifs.gperf b/src/libAtomVM/bifs.gperf index d441da07a..71b252ff8 100644 --- a/src/libAtomVM/bifs.gperf +++ b/src/libAtomVM/bifs.gperf @@ -92,3 +92,7 @@ erlang:map_get/2, {.bif.base.type = BIFFunctionType, .bif.bif2_ptr = bif_erlang_ erlang:min/2, {.bif.base.type = BIFFunctionType, .bif.bif2_ptr = bif_erlang_min_2} erlang:max/2, {.bif.base.type = BIFFunctionType, .bif.bif2_ptr = bif_erlang_max_2} erlang:size/1, {.gcbif.base.type = GCBIFFunctionType, .gcbif.gcbif1_ptr = bif_erlang_size_1} +erlang:list_to_atom/1, {.gcbif.base.type = GCBIFFunctionType, .gcbif.gcbif1_ptr = bif_erlang_list_to_atom_1} +erlang:list_to_existing_atom/1, {.gcbif.base.type = GCBIFFunctionType, .gcbif.gcbif1_ptr = bif_erlang_list_to_existing_atom_1} +erlang:binary_to_atom/2, {.gcbif.base.type = GCBIFFunctionType, .gcbif.gcbif2_ptr = bif_erlang_binary_to_atom_2} +erlang:binary_to_existing_atom/2, {.gcbif.base.type = GCBIFFunctionType, .gcbif.gcbif2_ptr = bif_erlang_binary_to_existing_atom_2} diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c index f52f95cb9..70e9d05f3 100644 --- a/src/libAtomVM/module.c +++ b/src/libAtomVM/module.c @@ -56,6 +56,7 @@ return; \ } +static bool module_are_literals_compressed(const uint8_t *litT); #ifdef WITH_ZLIB static void *module_uncompress_literals(const uint8_t *litT, int size); #endif @@ -72,6 +73,13 @@ static void parse_line_table(uint16_t **line_refs, struct ModuleFilename **filen static enum ModuleLoadResult module_populate_atoms_table(Module *this_module, uint8_t *table_data, GlobalContext *glb) { int atoms_count = READ_32_ALIGNED(table_data + 8); + + enum EnsureAtomsOpt ensure_opts = EnsureAtomsNoOpts; + if (atoms_count < 0) { + ensure_opts = EnsureLongEncoding; + atoms_count = -atoms_count; + } + const char *current_atom = (const char *) table_data + 12; this_module->local_atoms_to_global_table = calloc(atoms_count + 1, sizeof(int)); @@ -81,10 +89,12 @@ static enum ModuleLoadResult module_populate_atoms_table(Module *this_module, ui } long ensure_result = atom_table_ensure_atoms( - glb->atom_table, current_atom, atoms_count, this_module->local_atoms_to_global_table + 1); - if (ensure_result == ATOM_TABLE_ALLOC_FAIL) { + glb->atom_table, current_atom, atoms_count, this_module->local_atoms_to_global_table + 1, ensure_opts); + if (UNLIKELY(ensure_result == ATOM_TABLE_ALLOC_FAIL)) { fprintf(stderr, "Cannot allocate memory while loading module (line: %i).\n", __LINE__); return MODULE_ERROR_FAILED_ALLOCATION; + } else if (UNLIKELY(ensure_result == ATOM_TABLE_INVALID_LEN)) { + return MODULE_ERROR_INVALID; } return MODULE_LOAD_OK; @@ -285,20 +295,26 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary list_init(&mod->line_ref_offsets); if (offsets[LITT]) { - #ifdef WITH_ZLIB - mod->literals_data = module_uncompress_literals(beam_file + offsets[LITT], sizes[LITT]); - if (IS_NULL_PTR(mod->literals_data)) { + if (!module_are_literals_compressed(beam_file + offsets[LITT])) { + mod->literals_data = beam_file + offsets[LITT] + LITT_HEADER_SIZE; + mod->free_literals_data = 0; + + } else { + #ifdef WITH_ZLIB + mod->literals_data = module_uncompress_literals(beam_file + offsets[LITT], sizes[LITT]); + if (IS_NULL_PTR(mod->literals_data)) { + module_destroy(mod); + return NULL; + } + mod->free_literals_data = 1; + #else + fprintf(stderr, "Error: zlib required to uncompress literals.\n"); module_destroy(mod); return NULL; - } - #else - fprintf(stderr, "Error: zlib required to uncompress literals.\n"); - module_destroy(mod); - return NULL; - #endif + #endif + } mod->literals_table = module_build_literals_table(mod->literals_data); - mod->free_literals_data = 1; } else if (offsets[LITU]) { mod->literals_data = beam_file + offsets[LITU] + IFF_SECTION_HEADER_SIZE; @@ -331,6 +347,12 @@ COLD_FUNC void module_destroy(Module *module) free(module); } +static bool module_are_literals_compressed(const uint8_t *litT) +{ + uint32_t required_buf_size = READ_32_ALIGNED(litT + LITT_UNCOMPRESSED_SIZE_OFFSET); + return (required_buf_size != 0); +} + #ifdef WITH_ZLIB static void *module_uncompress_literals(const uint8_t *litT, int size) { diff --git a/src/libAtomVM/module.h b/src/libAtomVM/module.h index e41f8bbc1..0b1e48ff4 100644 --- a/src/libAtomVM/module.h +++ b/src/libAtomVM/module.h @@ -144,7 +144,8 @@ typedef struct Module Module; enum ModuleLoadResult { MODULE_LOAD_OK = 0, - MODULE_ERROR_FAILED_ALLOCATION = 1 + MODULE_ERROR_FAILED_ALLOCATION = 1, + MODULE_ERROR_INVALID = 2 }; #ifdef ENABLE_ADVANCED_TRACE diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index ac702cbed..807d9c64c 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -86,9 +86,6 @@ static NativeHandlerResult process_console_mailbox(Context *ctx); static term make_list_from_utf8_buf(const uint8_t *buf, size_t buf_len, Context *ctx); static term make_list_from_ascii_buf(const uint8_t *buf, size_t len, Context *ctx); -static term binary_to_atom(Context *ctx, int argc, term argv[], int create_new); -static term list_to_atom(Context *ctx, int argc, term argv[], int create_new); - static term nif_binary_at_2(Context *ctx, int argc, term argv[]); static term nif_binary_copy(Context *ctx, int argc, term argv[]); static term nif_binary_first_1(Context *ctx, int argc, term argv[]); @@ -99,11 +96,11 @@ static term nif_calendar_system_time_to_universal_time_2(Context *ctx, int argc, static term nif_erlang_delete_element_2(Context *ctx, int argc, term argv[]); static term nif_erlang_atom_to_binary(Context *ctx, int argc, term argv[]); static term nif_erlang_atom_to_list_1(Context *ctx, int argc, term argv[]); -static term nif_erlang_binary_to_atom_2(Context *ctx, int argc, term argv[]); +static term nif_erlang_binary_to_atom_1(Context *ctx, int argc, term argv[]); static term nif_erlang_binary_to_float_1(Context *ctx, int argc, term argv[]); static term nif_erlang_binary_to_integer(Context *ctx, int argc, term argv[]); static term nif_erlang_binary_to_list_1(Context *ctx, int argc, term argv[]); -static term nif_erlang_binary_to_existing_atom_2(Context *ctx, int argc, term argv[]); +static term nif_erlang_binary_to_existing_atom_1(Context *ctx, int argc, term argv[]); static term nif_erlang_concat_2(Context *ctx, int argc, term argv[]); static term nif_erlang_display_1(Context *ctx, int argc, term argv[]); static term nif_erlang_erase_1(Context *ctx, int argc, term argv[]); @@ -122,8 +119,6 @@ static term nif_erlang_float_to_list(Context *ctx, int argc, term argv[]); static term nif_erlang_list_to_binary_1(Context *ctx, int argc, term argv[]); static term nif_erlang_list_to_integer(Context *ctx, int argc, term argv[]); static term nif_erlang_list_to_float_1(Context *ctx, int argc, term argv[]); -static term nif_erlang_list_to_atom_1(Context *ctx, int argc, term argv[]); -static term nif_erlang_list_to_existing_atom_1(Context *ctx, int argc, term argv[]); static term nif_erlang_monotonic_time_1(Context *ctx, int argc, term argv[]); static term nif_erlang_iolist_size_1(Context *ctx, int argc, term argv[]); static term nif_erlang_iolist_to_binary_1(Context *ctx, int argc, term argv[]); @@ -271,10 +266,10 @@ static const struct Nif atom_to_list_nif = .nif_ptr = nif_erlang_atom_to_list_1 }; -static const struct Nif binary_to_atom_nif = +static const struct Nif binary_to_atom_1_nif = { .base.type = NIFFunctionType, - .nif_ptr = nif_erlang_binary_to_atom_2 + .nif_ptr = nif_erlang_binary_to_atom_1 }; static const struct Nif binary_to_float_nif = @@ -295,10 +290,10 @@ static const struct Nif binary_to_list_nif = .nif_ptr = nif_erlang_binary_to_list_1 }; -static const struct Nif binary_to_existing_atom_nif = +static const struct Nif binary_to_existing_atom_1_nif = { .base.type = NIFFunctionType, - .nif_ptr = nif_erlang_binary_to_existing_atom_2 + .nif_ptr = nif_erlang_binary_to_existing_atom_1 }; static const struct Nif delete_element_nif = @@ -373,18 +368,6 @@ static const struct Nif is_process_alive_nif = .nif_ptr = nif_erlang_is_process_alive_1 }; -static const struct Nif list_to_atom_nif = -{ - .base.type = NIFFunctionType, - .nif_ptr = nif_erlang_list_to_atom_1 -}; - -static const struct Nif list_to_existing_atom_nif = -{ - .base.type = NIFFunctionType, - .nif_ptr = nif_erlang_list_to_existing_atom_1 -}; - static const struct Nif list_to_binary_nif = { .base.type = NIFFunctionType, @@ -1876,9 +1859,16 @@ static term nif_erlang_list_to_tuple_1(Context *ctx, int argc, term argv[]) return tuple; } -static term nif_erlang_binary_to_atom_2(Context *ctx, int argc, term argv[]) +static term nif_erlang_binary_to_atom_1(Context *ctx, int argc, term argv[]) { - return binary_to_atom(ctx, argc, argv, 1); + UNUSED(argc); + + term error_reason; + term result = binary_to_atom(ctx, argv[0], UTF8_ATOM, true, &error_reason); + if (UNLIKELY(term_is_invalid_term(result))) { + RAISE_ERROR(error_reason); + } + return result; } static term nif_erlang_binary_to_integer(Context *ctx, int argc, term argv[]) @@ -2035,131 +2025,16 @@ static term nif_erlang_binary_to_list_1(Context *ctx, int argc, term argv[]) return prev; } -static term nif_erlang_binary_to_existing_atom_2(Context *ctx, int argc, term argv[]) -{ - return binary_to_atom(ctx, argc, argv, 0); -} - -static term binary_to_atom(Context *ctx, int argc, term argv[], int create_new) -{ - term a_binary = argv[0]; - VALIDATE_VALUE(a_binary, term_is_binary); - - term encoding = (argc == 2) ? argv[1] : UTF8_ATOM; - - const char *atom_string = term_binary_data(a_binary); - size_t atom_string_len = term_binary_size(a_binary); - if (UNLIKELY(atom_string_len > 255)) { - RAISE_ERROR(SYSTEM_LIMIT_ATOM); - } - - bool encode_latin1_to_utf8 = false; - if (UNLIKELY((encoding == LATIN1_ATOM) - && !unicode_buf_is_ascii((const uint8_t *) atom_string, atom_string_len))) { - encode_latin1_to_utf8 = true; - } else if (UNLIKELY((encoding != LATIN1_ATOM) && (encoding != UNICODE_ATOM) - && (encoding != UTF8_ATOM))) { - RAISE_ERROR(BADARG_ATOM); - } - - AtomString atom; - if (LIKELY(!encode_latin1_to_utf8)) { - size_t i = 0; - while (i < atom_string_len) { - uint32_t codepoint; - size_t codepoint_size; - if (UNLIKELY(bitstring_utf8_decode( - (uint8_t *) atom_string + i, atom_string_len, &codepoint, &codepoint_size)) - != UnicodeTransformDecodeSuccess) { - RAISE_ERROR(BADARG_ATOM); - } - i += codepoint_size; - } - - atom = malloc(atom_string_len + 1); - ((uint8_t *) atom)[0] = atom_string_len; - memcpy(((char *) atom) + 1, atom_string, atom_string_len); - } else { - // * 2 is the worst case size - size_t buf_len = atom_string_len * 2; - atom = malloc(buf_len + 1); - uint8_t *atom_data = ((uint8_t *) atom) + 1; - size_t out_pos = 0; - for (size_t i = 0; i < atom_string_len; i++) { - size_t out_size; - bitstring_utf8_encode(((uint8_t) atom_string[i]), &atom_data[out_pos], &out_size); - out_pos += out_size; - } - if (out_pos > 255) { - free((void *) atom); - RAISE_ERROR(SYSTEM_LIMIT_ATOM); - } - ((uint8_t *) atom)[0] = out_pos; - } - - enum AtomTableCopyOpt atom_opts = AtomTableCopyAtom; - if (!create_new) { - atom_opts |= AtomTableAlreadyExisting; - } - long global_atom_index = atom_table_ensure_atom(ctx->global->atom_table, atom, atom_opts); - free((void *) atom); - if (UNLIKELY(global_atom_index == ATOM_TABLE_NOT_FOUND)) { - RAISE_ERROR(BADARG_ATOM); - } else if (UNLIKELY(global_atom_index == ATOM_TABLE_ALLOC_FAIL)) { - RAISE_ERROR(OUT_OF_MEMORY_ATOM); - } - return term_from_atom_index(global_atom_index); -} - -term nif_erlang_list_to_atom_1(Context *ctx, int argc, term argv[]) -{ - return list_to_atom(ctx, argc, argv, 1); -} - -term nif_erlang_list_to_existing_atom_1(Context *ctx, int argc, term argv[]) -{ - return list_to_atom(ctx, argc, argv, 0); -} - -term list_to_atom(Context *ctx, int argc, term argv[], int create_new) +static term nif_erlang_binary_to_existing_atom_1(Context *ctx, int argc, term argv[]) { UNUSED(argc); - term a_list = argv[0]; - VALIDATE_VALUE(a_list, term_is_list); - - int ok; - char *atom_string = interop_list_to_utf8_string(a_list, &ok); - if (UNLIKELY(!ok)) { - RAISE_ERROR(OUT_OF_MEMORY_ATOM); - } - int atom_string_len = strlen(atom_string); - if (UNLIKELY(atom_string_len > 255)) { - free(atom_string); - RAISE_ERROR(SYSTEM_LIMIT_ATOM); + term error_reason; + term result = binary_to_atom(ctx, argv[0], UTF8_ATOM, false, &error_reason); + if (UNLIKELY(term_is_invalid_term(result))) { + RAISE_ERROR(error_reason); } - - AtomString atom = malloc(atom_string_len + 1); - if (IS_NULL_PTR(atom)) { - free(atom_string); - RAISE_ERROR(OUT_OF_MEMORY_ATOM); - } - ((uint8_t *) atom)[0] = atom_string_len; - memcpy(((char *) atom) + 1, atom_string, atom_string_len); - free(atom_string); - - enum AtomTableCopyOpt atom_opts = AtomTableCopyAtom; - if (!create_new) { - atom_opts |= AtomTableAlreadyExisting; - } - long global_atom_index = atom_table_ensure_atom(ctx->global->atom_table, atom, atom_opts); - free((void *) atom); - if (UNLIKELY(global_atom_index == ATOM_TABLE_NOT_FOUND)) { - RAISE_ERROR(BADARG_ATOM); - } else if (UNLIKELY(global_atom_index == ATOM_TABLE_ALLOC_FAIL)) { - RAISE_ERROR(OUT_OF_MEMORY_ATOM); - } - return term_from_atom_index(global_atom_index); + return result; } static term nif_erlang_atom_to_binary(Context *ctx, int argc, term argv[]) diff --git a/src/libAtomVM/nifs.gperf b/src/libAtomVM/nifs.gperf index 0cc99e02c..a2f337769 100644 --- a/src/libAtomVM/nifs.gperf +++ b/src/libAtomVM/nifs.gperf @@ -43,14 +43,12 @@ calendar:system_time_to_universal_time/2, &system_time_to_universal_time_nif erlang:atom_to_binary/1, &atom_to_binary_nif erlang:atom_to_binary/2, &atom_to_binary_nif erlang:atom_to_list/1, &atom_to_list_nif -erlang:binary_to_atom/1, &binary_to_atom_nif -erlang:binary_to_atom/2, &binary_to_atom_nif +erlang:binary_to_atom/1, &binary_to_atom_1_nif erlang:binary_to_float/1, &binary_to_float_nif erlang:binary_to_integer/1, &binary_to_integer_nif erlang:binary_to_integer/2, &binary_to_integer_nif erlang:binary_to_list/1, &binary_to_list_nif -erlang:binary_to_existing_atom/1, &binary_to_existing_atom_nif -erlang:binary_to_existing_atom/2, &binary_to_existing_atom_nif +erlang:binary_to_existing_atom/1, &binary_to_existing_atom_1_nif erlang:delete_element/2, &delete_element_nif erlang:erase/1, &erase_nif erlang:error/1, &error_nif @@ -65,8 +63,6 @@ erlang:float_to_list/1, &float_to_list_nif erlang:float_to_list/2, &float_to_list_nif erlang:fun_info/2, &fun_info_nif erlang:insert_element/3, &insert_element_nif -erlang:list_to_atom/1, &list_to_atom_nif -erlang:list_to_existing_atom/1, &list_to_existing_atom_nif erlang:integer_to_binary/1, &integer_to_binary_nif erlang:integer_to_binary/2, &integer_to_binary_nif erlang:integer_to_list/1, &integer_to_list_nif diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h index 226a0aee0..a6b9e2777 100644 --- a/src/libAtomVM/opcodesswitch.h +++ b/src/libAtomVM/opcodesswitch.h @@ -2010,6 +2010,31 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) break; } + case GCBIFFunctionType: { + // Support compilers < OTP28 that generate CALL_EXT + // for binary_to_(existing_)atom/1,2 and list_to_(existing_)atom/1 + // functions. + // Regular CALL_EXTs to those functions are generated as well + // even on OTP28, so it is required to allow calling them using + // CALL_EXT even on OTP28: BIFs are used for try ... catch. + const struct GCBif *gcbif = EXPORTED_FUNCTION_TO_GCBIF(func); + term return_value; + switch (arity) { + case 1: + return_value = gcbif->gcbif1_ptr(ctx, 0, 0, x_regs[0]); + break; + case 2: + return_value = gcbif->gcbif2_ptr(ctx, 0, 0, x_regs[0], x_regs[1]); + break; + default: + fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity); + AVM_ABORT(); + } + PROCESS_MAYBE_TRAP_RETURN_VALUE_RESTORE_PC(return_value, orig_pc); + x_regs[0] = return_value; + + break; + } default: { fprintf(stderr, "Invalid function type %i at index: %" PRIu32 "\n", func->type, index); AVM_ABORT(); @@ -2116,6 +2141,36 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) break; } + case GCBIFFunctionType: { + // Support compilers < OTP28 that generate CALL_EXT_LAST + // for binary_to_(existing_)atom/1,2 and list_to_(existing_)atom/1 + // functions. + // Regular CALL_EXT_LASTs to those functions are generated as well + // even on OTP28, so it is required to allow calling them using + // CALL_EXT_LAST even on OTP28: BIFs are used for try ... catch. + ctx->cp = ctx->e[n_words]; + ctx->e += (n_words + 1); + + const struct GCBif *gcbif = EXPORTED_FUNCTION_TO_GCBIF(func); + term return_value; + switch (arity) { + case 1: + return_value = gcbif->gcbif1_ptr(ctx, 0, 0, x_regs[0]); + break; + case 2: + return_value = gcbif->gcbif2_ptr(ctx, 0, 0, x_regs[0], x_regs[1]); + break; + default: + fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity); + AVM_ABORT(); + } + PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value); + x_regs[0] = return_value; + + DO_RETURN(); + + break; + } default: { fprintf(stderr, "Invalid function type %i at index: %" PRIu32 "\n", func->type, index); AVM_ABORT(); @@ -3571,6 +3626,33 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) break; } + case GCBIFFunctionType: { + // Support compilers < OTP28 that generate CALL_EXT_ONLY + // for binary_to_(existing_)atom/1,2 and list_to_(existing_)atom/1 + // functions. + // Regular CALL_EXT_ONLYs to those functions are generated as well + // even on OTP28, so it is required to allow calling them using + // CALL_EXT_ONLY even on OTP28: BIFs are used for try ... catch. + const struct GCBif *gcbif = EXPORTED_FUNCTION_TO_GCBIF(func); + term return_value; + switch (arity) { + case 1: + return_value = gcbif->gcbif1_ptr(ctx, 0, 0, x_regs[0]); + break; + case 2: + return_value = gcbif->gcbif2_ptr(ctx, 0, 0, x_regs[0], x_regs[1]); + break; + default: + fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity); + AVM_ABORT(); + } + PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value); + x_regs[0] = return_value; + + DO_RETURN(); + + break; + } default: { AVM_ABORT(); } diff --git a/tools/packbeam/packbeam.c b/tools/packbeam/packbeam.c index 3c8cf7f1e..f3451b9db 100644 --- a/tools/packbeam/packbeam.c +++ b/tools/packbeam/packbeam.c @@ -47,6 +47,7 @@ typedef struct FileData { } FileData; static void pad_and_align(FILE *f); +bool are_literals_compressed(const uint8_t *litT); static void *uncompress_literals(const uint8_t *litT, int size, size_t *uncompressedSize); static void add_module_header(FILE *f, const char *module_name, uint32_t flags); static void pack_beam_file(FILE *pack, const uint8_t *data, size_t size, const char *filename, int is_entrypoint, bool include_lines); @@ -338,15 +339,21 @@ static void pack_beam_file(FILE *pack, const uint8_t *data, size_t size, const c assert_fwrite(data + offsets[LINT], sizes[LINT] + IFF_SECTION_HEADER_SIZE, pack); pad_and_align(pack); } - if (offsets[LITT]) { - size_t u_size; - void *deflated = uncompress_literals(data + offsets[LITT], sizes[LITT], &u_size); - assert_fwrite("LitU", 4, pack); - uint32_t size_field = ENDIAN_SWAP_32(u_size); - assert_fwrite(&size_field, sizeof(size_field), pack); - assert_fwrite(deflated, u_size, pack); - free(deflated); + const uint8_t *litt = data + offsets[LITT]; + size_t litt_size = sizes[LITT]; + if (are_literals_compressed(litt)) { + size_t u_size; + void *deflated = uncompress_literals(data + offsets[LITT], litt_size, &u_size); + assert_fwrite("LitU", 4, pack); + uint32_t size_field = ENDIAN_SWAP_32(u_size); + assert_fwrite(&size_field, sizeof(size_field), pack); + assert_fwrite(deflated, u_size, pack); + free(deflated); + } else { + assert_fwrite(data + offsets[LITT], sizes[LITT] + IFF_SECTION_HEADER_SIZE, pack); + pad_and_align(pack); + } } pad_and_align(pack); @@ -419,6 +426,12 @@ static int do_list(int argc, char **argv) return ret; } +bool are_literals_compressed(const uint8_t *litT) +{ + unsigned int required_buf_size = READ_32_ALIGNED(litT + LITT_UNCOMPRESSED_SIZE_OFFSET); + return (required_buf_size != 0); +} + static void *uncompress_literals(const uint8_t *litT, int size, size_t *uncompressedSize) { unsigned int required_buf_size = READ_32_ALIGNED(litT + LITT_UNCOMPRESSED_SIZE_OFFSET);