Skip to content

Commit

Permalink
Merge pull request #1490 from bettio/otp28-support
Browse files Browse the repository at this point in the history
Add support to OTP-28 (master branch)

Enable again tests against OTP from master branch.

In order to support what will be released as OTP-28 some changes have been required:
- New encoding for atoms
- Uncompressed literals
- to_atom NIFs (`binary_to_atom`, etc...) are now BIFs when used in guards
(hence support for GCBIFs in CALL_EXT related opcodes has been introduced for
OTP < 28 support and when this optimization is not applied)

These changes are made under both the "Apache 2.0" and the "GNU Lesser General
Public License 2.1 or later" license terms (dual license).

SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
  • Loading branch information
bettio committed Feb 14, 2025
2 parents 652d60e + c079677 commit 2c82f47
Show file tree
Hide file tree
Showing 14 changed files with 445 additions and 193 deletions.
15 changes: 6 additions & 9 deletions .github/workflows/build-and-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -226,15 +226,12 @@ jobs:
elixir_version: "1.14"
rebar3_version: "3.23.0"

# TODO: enable master again
# master will not work until we don't adapt to atom table changes
# # master/main version of OTP/Elixir
# - os: "ubuntu-24.04"
# cc: "cc"
# cxx: "c++"
# otp: "master"
# elixir_version: "main"
# rebar3_version: "3.24.0"
- os: "ubuntu-24.04"
cc: "cc"
cxx: "c++"
otp: "master"
elixir_version: "main"
rebar3_version: "3.24.0"

# Additional default compiler builds
- os: "ubuntu-20.04"
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added `supervisor:terminate_child/2`, `supervisor:restart_child/2` and `supervisor:delete_child/2`
- Added `esp:partition_read/3`, and documentation for `esp:partition_erase_range/2/3` and `esp:partition_write/3`
- Added support for list insertion in 'ets:insert/2'.
- Support to OTP-28

### Fixed
- ESP32: improved sntp sync speed from a cold boot.
Expand Down
1 change: 1 addition & 0 deletions doc/release-notes.md.in
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ AtomVM will run BEAM files that have been compiled using the following Erlang an
| ✅ OTP 24 | ✅ 1.14 |
| ✅ OTP 25 | ✅ 1.14 |
| ✅ OTP 26 | ✅ 1.15 |
| ✅ OTP 28 | ✅ 1.17 |

```{note}
Versions of Elixir that are compatible with a particular OTP version may work. This table reflects the versions that are tested.
Expand Down
87 changes: 78 additions & 9 deletions src/libAtomVM/atom_table.c
Original file line number Diff line number Diff line change
Expand Up @@ -445,26 +445,71 @@ long atom_table_ensure_atom(struct AtomTable *table, AtomString string, enum Ato
return new_index;
}

int atom_table_ensure_atoms(
struct AtomTable *table, const void *atoms, int count, int *translate_table)
static inline int read_encoded_len(const uint8_t **len_bytes)
{
uint8_t byte0 = (*len_bytes)[0];

if ((byte0 & 0x8) == 0) {
(*len_bytes)++;
return byte0 >> 4;

} else if ((byte0 & 0x10) == 0) {
uint8_t byte1 = (*len_bytes)[1];
(*len_bytes) += 2;
return ((byte0 >> 5) << 8) | byte1;

} else {
return -1;
}
}

int atom_table_ensure_atoms(struct AtomTable *table, const void *atoms, int count,
int *translate_table, enum EnsureAtomsOpt opt)
{
bool is_long_format = (opt & EnsureLongEncoding) != 0;

SMP_WRLOCK(table);

int new_atoms_count = 0;

const uint8_t *current_atom = atoms;

for (int i = 0; i < count; i++) {
struct HNode *node = get_node(table, current_atom);
struct HNode *node;
if (is_long_format) {
int atom_len = read_encoded_len(&current_atom);
if (UNLIKELY(atom_len < 0)) {
fprintf(stderr, "Found invalid atom len.");
SMP_UNLOCK(table);
return ATOM_TABLE_INVALID_LEN;
} else if (UNLIKELY(atom_len > 255)) {
fprintf(stderr,
"Unsupported atom length %i bytes.\n"
"Unlike OTP >= 28, AtomVM supports a maximum of 255 bytes"
"regardeless the number of codepoints.\n"
"If you are seeing this error please open an issue on GitHub:\n"
"https://github.com/atomvm/AtomVM/issues\n",
atom_len);
SMP_UNLOCK(table);
return ATOM_TABLE_INVALID_LEN;
}
char tmp_old_fmt[256];
tmp_old_fmt[0] = atom_len;
memcpy(tmp_old_fmt + 1, current_atom, atom_len);
node = get_node(table, tmp_old_fmt);
current_atom += atom_len;
} else {
node = get_node(table, current_atom);
uint8_t atom_len = current_atom[0];
current_atom += 1 + atom_len;
}

if (node) {
translate_table[i] = node->index;
} else {
new_atoms_count++;
translate_table[i] = ATOM_TABLE_NOT_FOUND;
}

uint8_t atom_len = current_atom[0];
current_atom += 1 + atom_len;
}

maybe_rehash(table, new_atoms_count);
Expand All @@ -473,6 +518,19 @@ int atom_table_ensure_atoms(
int remaining_atoms = new_atoms_count;
struct HNodeGroup *node_group = table->last_node_group;
for (int i = 0; i < count; i++) {

const uint8_t *to_be_copied = NULL;
const uint8_t *next_atom = current_atom;
uint8_t atom_len;
if (is_long_format) {
atom_len = read_encoded_len(&next_atom);
to_be_copied = next_atom;
next_atom += atom_len;
} else {
atom_len = current_atom[0];
next_atom += 1 + atom_len;
}

if (translate_table[i] == ATOM_TABLE_NOT_FOUND) {
if (!table->last_node_group_avail) {
node_group = new_node_group(table, remaining_atoms);
Expand All @@ -482,7 +540,19 @@ int atom_table_ensure_atoms(
}
}

unsigned long hash = sdbm_hash(current_atom, atom_string_len(current_atom));
if (is_long_format) {
uint8_t *atom_copy = malloc(atom_len + 1);
if (IS_NULL_PTR(atom_copy)) {
// we are not going to remove atoms that have already been added up to this one
SMP_UNLOCK(table);
return ATOM_TABLE_ALLOC_FAIL;
}
atom_copy[0] = atom_len;
memcpy(atom_copy + 1, to_be_copied, atom_len);
current_atom = atom_copy;
}

unsigned long hash = sdbm_hash(current_atom, atom_len);
unsigned long bucket_index = hash % table->capacity;

translate_table[i] = insert_node(table, node_group, bucket_index, current_atom);
Expand All @@ -491,8 +561,7 @@ int atom_table_ensure_atoms(
break;
}
}
uint8_t atom_len = current_atom[0];
current_atom += 1 + atom_len;
current_atom = next_atom;
}

SMP_UNLOCK(table);
Expand Down
11 changes: 9 additions & 2 deletions src/libAtomVM/atom_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,16 @@ extern "C" {

#define ATOM_TABLE_NOT_FOUND -1
#define ATOM_TABLE_ALLOC_FAIL -2
#define ATOM_TABLE_INVALID_LEN -3

struct AtomTable;

enum EnsureAtomsOpt
{
EnsureAtomsNoOpts = 0,
EnsureLongEncoding = 1
};

enum AtomTableCopyOpt
{
AtomTableNoOpts = 0,
Expand All @@ -56,8 +63,8 @@ AtomString atom_table_get_atom_string(struct AtomTable *table, long index);

long atom_table_get_index(struct AtomTable *table, AtomString string);

int atom_table_ensure_atoms(
struct AtomTable *table, const void *atoms, int count, int *translate_table);
int atom_table_ensure_atoms(struct AtomTable *table, const void *atoms, int count,
int *translate_table, enum EnsureAtomsOpt opts);

int atom_table_cmp_using_atom_index(
struct AtomTable *table, int t_atom_index, int other_atom_index);
Expand Down
176 changes: 176 additions & 0 deletions src/libAtomVM/bif.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@
#include <math.h>

#include "atom.h"
#include "bitstring.h"
#include "defaultatoms.h"
#include "dictionary.h"
#include "interop.h"
#include "overflow_helpers.h"
#include "term.h"
#include "trace.h"
#include "unicode.h"
#include "utils.h"

//Ignore warning caused by gperf generated code
Expand Down Expand Up @@ -1512,3 +1515,176 @@ term bif_erlang_size_1(Context *ctx, uint32_t fail_label, int live, term arg1)

RAISE_ERROR_BIF(fail_label, BADARG_ATOM);
}

static term list_to_atom(Context *ctx, term a_list, bool create_new, term *error_reason);

term bif_erlang_list_to_atom_1(Context *ctx, uint32_t fail_label, int live, term arg1)
{
UNUSED(live);

term error_reason;
term result = list_to_atom(ctx, arg1, true, &error_reason);
if (UNLIKELY(term_is_invalid_term(result))) {
RAISE_ERROR_BIF(fail_label, error_reason);
}
return result;
}

term bif_erlang_list_to_existing_atom_1(Context *ctx, uint32_t fail_label, int live, term arg1)
{
UNUSED(live);

term error_reason;
term result = list_to_atom(ctx, arg1, false, &error_reason);
if (UNLIKELY(term_is_invalid_term(result))) {
RAISE_ERROR_BIF(fail_label, error_reason);
}
return result;
}

static term list_to_atom(Context *ctx, term a_list, bool create_new, term *error_reason)
{
if (UNLIKELY(!term_is_list(a_list))) {
*error_reason = BADARG_ATOM;
return term_invalid_term();
}

int ok;
char *atom_string = interop_list_to_utf8_string(a_list, &ok);
if (UNLIKELY(!ok)) {
*error_reason = OUT_OF_MEMORY_ATOM;
return term_invalid_term();
}
int atom_string_len = strlen(atom_string);
if (UNLIKELY(atom_string_len > 255)) {
free(atom_string);
*error_reason = SYSTEM_LIMIT_ATOM;
return term_invalid_term();
}

AtomString atom = malloc(atom_string_len + 1);
if (IS_NULL_PTR(atom)) {
free(atom_string);
*error_reason = OUT_OF_MEMORY_ATOM;
return term_invalid_term();
}
((uint8_t *) atom)[0] = atom_string_len;
memcpy(((char *) atom) + 1, atom_string, atom_string_len);
free(atom_string);

enum AtomTableCopyOpt atom_opts = AtomTableCopyAtom;
if (!create_new) {
atom_opts |= AtomTableAlreadyExisting;
}
long global_atom_index = atom_table_ensure_atom(ctx->global->atom_table, atom, atom_opts);
free((void *) atom);
if (UNLIKELY(global_atom_index == ATOM_TABLE_NOT_FOUND)) {
*error_reason = BADARG_ATOM;
return term_invalid_term();
} else if (UNLIKELY(global_atom_index == ATOM_TABLE_ALLOC_FAIL)) {
*error_reason = OUT_OF_MEMORY_ATOM;
return term_invalid_term();
}
return term_from_atom_index(global_atom_index);
}

term bif_erlang_binary_to_atom_2(Context *ctx, uint32_t fail_label, int live, term arg1, term arg2)
{
UNUSED(live);

term error_reason;
term result = binary_to_atom(ctx, arg1, arg2, true, &error_reason);
if (UNLIKELY(term_is_invalid_term(result))) {
RAISE_ERROR_BIF(fail_label, error_reason);
}
return result;
}

term bif_erlang_binary_to_existing_atom_2(Context *ctx, uint32_t fail_label, int live, term arg1, term arg2)
{
UNUSED(live);

term error_reason;
term result = binary_to_atom(ctx, arg1, arg2, false, &error_reason);
if (UNLIKELY(term_is_invalid_term(result))) {
RAISE_ERROR_BIF(fail_label, error_reason);
}
return result;
}

term binary_to_atom(Context *ctx, term a_binary, term encoding, bool create_new, term *error_reason)
{
if (UNLIKELY(!term_is_binary(a_binary))) {
*error_reason = BADARG_ATOM;
return term_invalid_term();
}

const char *atom_string = term_binary_data(a_binary);
size_t atom_string_len = term_binary_size(a_binary);
if (UNLIKELY(atom_string_len > 255)) {
*error_reason = SYSTEM_LIMIT_ATOM;
return term_invalid_term();
}

bool encode_latin1_to_utf8 = false;
if (UNLIKELY((encoding == LATIN1_ATOM)
&& !unicode_buf_is_ascii((const uint8_t *) atom_string, atom_string_len))) {
encode_latin1_to_utf8 = true;
} else if (UNLIKELY((encoding != LATIN1_ATOM) && (encoding != UNICODE_ATOM)
&& (encoding != UTF8_ATOM))) {
*error_reason = BADARG_ATOM;
return term_invalid_term();
}

AtomString atom;
if (LIKELY(!encode_latin1_to_utf8)) {
size_t i = 0;
while (i < atom_string_len) {
uint32_t codepoint;
size_t codepoint_size;
if (UNLIKELY(bitstring_utf8_decode(
(uint8_t *) atom_string + i, atom_string_len, &codepoint, &codepoint_size))
!= UnicodeTransformDecodeSuccess) {
*error_reason = BADARG_ATOM;
return term_invalid_term();
}
i += codepoint_size;
}

atom = malloc(atom_string_len + 1);
((uint8_t *) atom)[0] = atom_string_len;
memcpy(((char *) atom) + 1, atom_string, atom_string_len);
} else {
// * 2 is the worst case size
size_t buf_len = atom_string_len * 2;
atom = malloc(buf_len + 1);
uint8_t *atom_data = ((uint8_t *) atom) + 1;
size_t out_pos = 0;
for (size_t i = 0; i < atom_string_len; i++) {
size_t out_size;
bitstring_utf8_encode(((uint8_t) atom_string[i]), &atom_data[out_pos], &out_size);
out_pos += out_size;
}
if (out_pos > 255) {
free((void *) atom);
*error_reason = SYSTEM_LIMIT_ATOM;
return term_invalid_term();
}
((uint8_t *) atom)[0] = out_pos;
}

enum AtomTableCopyOpt atom_opts = AtomTableCopyAtom;
if (!create_new) {
atom_opts |= AtomTableAlreadyExisting;
}
long global_atom_index = atom_table_ensure_atom(ctx->global->atom_table, atom, atom_opts);
free((void *) atom);
if (UNLIKELY(global_atom_index == ATOM_TABLE_NOT_FOUND)) {
*error_reason = BADARG_ATOM;
return term_invalid_term();
} else if (UNLIKELY(global_atom_index == ATOM_TABLE_ALLOC_FAIL)) {
*error_reason = OUT_OF_MEMORY_ATOM;
return term_invalid_term();
}
return term_from_atom_index(global_atom_index);
}
Loading

0 comments on commit 2c82f47

Please sign in to comment.