Skip to content

Commit

Permalink
Reverted support for term_to_binary/2
Browse files Browse the repository at this point in the history
Signed-off-by: Fred Dushin <fred@dushin.net>
  • Loading branch information
fadushin committed Feb 26, 2024
1 parent e9fdddf commit 6b001a2
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 163 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Support for utf8 encoding to `*_to_atom` and `atom_to_*` functions
- `binary_to_atom/1` and `atom_to_binary/1` that default to utf8 (they were introduced with OTP23)
- Added Pico cmake option `AVM_WAIT_BOOTSEL_ON_EXIT` (default `ON`) to allow tools to use automated `BOOTSEL` mode after main application exits
- Use UTF-8 encoding for atoms when using `erlang:term_to_binary/1`, in conformance with OTP-26

### Fixed

Expand Down Expand Up @@ -46,7 +47,6 @@ used)
- New atom table, which uses less memory, has improved performances and better code.
- SPI: when gpio number is not provided for `miso` or `mosi` default to disabled
- Change port call tuple format to the same format as gen_server, so casts can be supported too
- Use UTF-8 encoding for atoms when using `erlang:term_to_binary/1,2`, in conformance with OTP-26

### Fixed

Expand Down
12 changes: 4 additions & 8 deletions doc/src/programmers-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -589,9 +589,9 @@ See the `word_size` key in the [System APIs](#system-apis) section for informati

### External Term Format

The `erlang:term_to_binary/1` and `erlang:binary_to_term/2` can be used to serialize arbitrary term data into and out of binary data. These operations can be useful for applications that wish to share term data over some network protocol, such as HTTP or MQTT, or wish to store serialized term data in some permanant sttorage (e.g., Non-volatile storage on ESP32 devices).
The `erlang:term_to_binary/1` function can be used to serialize arbitrary term data into and out of binary data. These operations can be useful for applications that wish to share term data over some network protocol, such as HTTP or MQTT, or wish to store serialized term data in some permanant sttorage (e.g., Non-volatile storage on ESP32 devices).

For example, to convert a term to a binary, use `erlang:term_to_binary/1,2`, e.g.,
For example, to convert a term to a binary, use `erlang:term_to_binary/1`, e.g.,

%% erlang
Term = ...
Expand All @@ -603,11 +603,7 @@ And to convert the binary back to a term, use `erlang:binary_to_term/1,2`, e.g.,
Binary = ...
{Term, _Used} = erlang:binary_to_term(Binary, [used]),

By default, AtomVM will encode all atoms using UTF-8 encoding. This encoding is the default encoding for OTP-26 and later releases. If you would like to use the legacy Latin1 encoding for atoms that do not contain UTF-8 extended characters, provide the `{minor_version, 1}` to the `erlang:term_to_binary/2` function. For example:

%% erlang
Term = ...
Binary = erlang:term_to_binary(Term, [{minor_version, 1}]),
By default, AtomVM will encode all atoms using UTF-8 encoding. This encoding is the default encoding for OTP-26 and later releases.

For more information about Erlang external term format, consult the [Erlang Documentation](https://www.erlang.org/doc/apps/erts/erl_ext_dist.html)

Expand Down Expand Up @@ -1717,7 +1713,7 @@ The station mode configuration supports the following options:
| `dhcp_hostname` | `string() \| binary()` | no | `atomvm-<MAC>` where `<MAC>` is the factory-assigned MAC-address of the device | DHCP hostname for the connecting device |
```{important}
The WiFi network to which you are connecting must support DHCP and IPv4.
The WiFi network to which you are connecting must support DHCP and IPv4.
IPv6 addressing is not yet supported on AtomVM.
```
Expand Down
20 changes: 1 addition & 19 deletions libs/estdlib/src/erlang.erl
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@
garbage_collect/1,
binary_to_term/1,
term_to_binary/1,
term_to_binary/2,
timestamp/0,
universaltime/0,
localtime/0
Expand Down Expand Up @@ -1150,7 +1149,7 @@ binary_to_term(_Binary) ->
erlang:nif_error(undefined).

%%-----------------------------------------------------------------------------
%% @returns the binary encoding of a term
%% @returns A binary encoding passed term.
%% @param Term term to encode
%% @doc Encode a term to a binary that can later be decoded with `binary_to_term/1'.
%% This function should be mostly compatible with its Erlang/OTP counterpart.
Expand All @@ -1162,23 +1161,6 @@ binary_to_term(_Binary) ->
term_to_binary(_Term) ->
erlang:nif_error(undefined).

%%-----------------------------------------------------------------------------
%% @returns the binary encoding of a term
%% @param Term term to encode
%% @param Options encoding options. Currently, the only supported encoding
%% options are `{minor_version, 1}', which will encode atoms using
%% latin1 encoding, if the atom does not contain any extended UTF-8
%% characters.
%% @doc Encode a term to a binary that can later be decoded with `binary_to_term/1'.
%% This function should be mostly compatible with its Erlang/OTP counterpart.
%% Unlike modern Erlang/OTP, resources are currently serialized as empty
%% binaries.
%% @end
%%-----------------------------------------------------------------------------
-spec term_to_binary(Term :: any(), Options :: [{minor_version, 1}]) -> binary().
term_to_binary(_Term, _Options) ->
erlang:nif_error(undefined).

%%-----------------------------------------------------------------------------
%% @returns A tuple representing the current timestamp.
%% @see monotonic_time/1
Expand Down
99 changes: 22 additions & 77 deletions src/libAtomVM/externalterm.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

#include "externalterm.h"

#include "bitstring.h"
#include "context.h"
#include "list.h"

Expand Down Expand Up @@ -72,9 +71,9 @@

static term parse_external_terms(const uint8_t *external_term_buf, size_t *eterm_size, bool copy, Heap *heap, GlobalContext *glb);
static int calculate_heap_usage(const uint8_t *external_term_buf, size_t remaining, size_t *eterm_size, bool copy);
static size_t compute_external_size(term t, ExternalTermOpts opts, GlobalContext *glb);
static int externalterm_from_term(uint8_t **buf, size_t *len, term t, ExternalTermOpts opts, GlobalContext *glb);
static int serialize_term(uint8_t *buf, term t, ExternalTermOpts opts, GlobalContext *glb);
static size_t compute_external_size(term t, GlobalContext *glb);
static int externalterm_from_term(uint8_t **buf, size_t *len, term t, GlobalContext *glb);
static int serialize_term(uint8_t *buf, term t, GlobalContext *glb);

/**
* @brief
Expand Down Expand Up @@ -163,27 +162,27 @@ enum ExternalTermResult externalterm_from_binary(Context *ctx, term *dst, term b
}
}

static int externalterm_from_term(uint8_t **buf, size_t *len, term t, ExternalTermOpts opts, GlobalContext *glb)
static int externalterm_from_term(uint8_t **buf, size_t *len, term t, GlobalContext *glb)
{
*len = compute_external_size(t, opts, glb) + 1;
*len = compute_external_size(t, glb) + 1;
*buf = malloc(*len);
if (IS_NULL_PTR(*buf)) {
fprintf(stderr, "Unable to allocate %zu bytes for externalized term.\n", *len);
AVM_ABORT();
}
size_t k = serialize_term(*buf + 1, t, opts, glb);
size_t k = serialize_term(*buf + 1, t, glb);
*buf[0] = EXTERNAL_TERM_TAG;
return k + 1;
}

term externalterm_to_binary(Context *ctx, term t, ExternalTermOpts opts)
term externalterm_to_binary(Context *ctx, term t)
{
//
// convert
//
uint8_t *buf;
size_t len;
externalterm_from_term(&buf, &len, t, opts, ctx->global);
externalterm_from_term(&buf, &len, t, ctx->global);
//
// Ensure enough free space in heap for binary
//
Expand All @@ -200,9 +199,9 @@ term externalterm_to_binary(Context *ctx, term t, ExternalTermOpts opts)
return binary;
}

static size_t compute_external_size(term t, ExternalTermOpts opts, GlobalContext *glb)
static size_t compute_external_size(term t, GlobalContext *glb)
{
return serialize_term(NULL, t, opts, glb);
return serialize_term(NULL, t, glb);
}

static uint8_t get_num_bytes(avm_uint64_t val)
Expand All @@ -226,47 +225,7 @@ static void write_bytes(uint8_t *buf, avm_uint64_t val)
}
}

static bool has_extended_utf8_encoding(const uint8_t *atom_data, size_t atom_len)
{
for (size_t i = 0; i < atom_len; ) {
size_t out_len = 0;
uint32_t c;
enum UnicodeTransformDecodeResult res = bitstring_utf8_decode(
atom_data + i,
atom_len - i,
&c,
&out_len
);
if (res == UnicodeTransformDecodeSuccess && out_len != 1) {
return true;
} else {
++i;
}
}
return false;
}

static inline void encode_atom_latin1(uint8_t *buf, atom_ref_t atom_ref, size_t atom_len, int *offset, GlobalContext *glb)
{
*offset = 3;
if (!IS_NULL_PTR(buf)) {
buf[0] = ATOM_EXT;
WRITE_16_UNALIGNED(buf + 1, atom_len);
atom_table_write_bytes(glb->atom_table, atom_ref, atom_len, buf + 3);
}
}

static inline void encode_atom_utf8(uint8_t *buf, atom_ref_t atom_ref, size_t atom_len, int *offset, GlobalContext *glb)
{
*offset = 2;
if (!IS_NULL_PTR(buf)) {
buf[0] = SMALL_ATOM_UTF8_EXT;
buf[1] = atom_len;
atom_table_write_bytes(glb->atom_table, atom_ref, atom_len, buf + 2);
}
}

static int serialize_term(uint8_t *buf, term t, ExternalTermOpts opts, GlobalContext *glb)
static int serialize_term(uint8_t *buf, term t, GlobalContext *glb)
{
if (term_is_uint8(t)) {
if (!IS_NULL_PTR(buf)) {
Expand Down Expand Up @@ -313,26 +272,12 @@ static int serialize_term(uint8_t *buf, term t, ExternalTermOpts opts, GlobalCon
int atom_index = term_to_atom_index(t);
size_t atom_len;
atom_ref_t atom_ref = atom_table_get_atom_ptr_and_len(glb->atom_table, atom_index, &atom_len);

uint8_t *atom_data = malloc(atom_len);
if (IS_NULL_PTR(atom_data)) {
// Not much else we can do here...
AVM_ABORT();
}
atom_table_write_bytes(glb->atom_table, atom_ref, atom_len, atom_data);

int offset = 0;
if (opts & ExternalTermAllowLatin1Encoding) {
if (has_extended_utf8_encoding(atom_data, atom_len)) {
encode_atom_utf8(buf, atom_ref, atom_len, &offset, glb);
} else {
encode_atom_latin1(buf, atom_ref, atom_len, &offset, glb);
}
} else {
encode_atom_utf8(buf, atom_ref, atom_len, &offset, glb);
if (!IS_NULL_PTR(buf)) {
buf[0] = SMALL_ATOM_UTF8_EXT;
buf[1] = atom_len;
atom_table_write_bytes(glb->atom_table, atom_ref, atom_len, buf + 2);
}
free(atom_data);
return offset + atom_len;
return 2 + atom_len;

} else if (term_is_tuple(t)) {
size_t arity = term_get_tuple_arity(t);
Expand All @@ -347,7 +292,7 @@ static int serialize_term(uint8_t *buf, term t, ExternalTermOpts opts, GlobalCon
size_t k = 2;
for (size_t i = 0; i < arity; ++i) {
term e = term_get_tuple_element(t, i);
k += serialize_term(IS_NULL_PTR(buf) ? NULL : buf + k, e, opts, glb);
k += serialize_term(IS_NULL_PTR(buf) ? NULL : buf + k, e, glb);
}
return k;

Expand Down Expand Up @@ -387,11 +332,11 @@ static int serialize_term(uint8_t *buf, term t, ExternalTermOpts opts, GlobalCon
term i = t;
while (term_is_nonempty_list(i)) {
term e = term_get_list_head(i);
k += serialize_term(IS_NULL_PTR(buf) ? NULL : buf + k, e, opts, glb);
k += serialize_term(IS_NULL_PTR(buf) ? NULL : buf + k, e, glb);
i = term_get_list_tail(i);
++len;
}
k += serialize_term(IS_NULL_PTR(buf) ? NULL : buf + k, i, opts, glb);
k += serialize_term(IS_NULL_PTR(buf) ? NULL : buf + k, i, glb);
if (!IS_NULL_PTR(buf)) {
WRITE_32_UNALIGNED(buf + 1, len);
}
Expand All @@ -418,9 +363,9 @@ static int serialize_term(uint8_t *buf, term t, ExternalTermOpts opts, GlobalCon
size_t k = 5;
for (size_t i = 0; i < size; ++i) {
term key = term_get_map_key(t, i);
k += serialize_term(IS_NULL_PTR(buf) ? NULL : buf + k, key, opts, glb);
k += serialize_term(IS_NULL_PTR(buf) ? NULL : buf + k, key, glb);
term value = term_get_map_value(t, i);
k += serialize_term(IS_NULL_PTR(buf) ? NULL : buf + k, value, opts, glb);
k += serialize_term(IS_NULL_PTR(buf) ? NULL : buf + k, value, glb);
}
return k;
} else if (term_is_function(t)) {
Expand All @@ -431,7 +376,7 @@ static int serialize_term(uint8_t *buf, term t, ExternalTermOpts opts, GlobalCon
const term *boxed_value = term_to_const_term_ptr(t);
for (size_t i = 1; i <= 3; ++i) {
term mfa = boxed_value[i];
k += serialize_term(IS_NULL_PTR(buf) ? NULL : buf + k, mfa, opts, glb);
k += serialize_term(IS_NULL_PTR(buf) ? NULL : buf + k, mfa, glb);
}
return k;
} else {
Expand Down
9 changes: 2 additions & 7 deletions src/libAtomVM/externalterm.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ enum ExternalTermResult
typedef enum
{
ExternalTermNoOpts = 0,
ExternalTermToHeapFragment = 1,
ExternalTermAllowLatin1Encoding = 2
ExternalTermToHeapFragment = 1
} ExternalTermOpts;

/**
Expand Down Expand Up @@ -90,14 +89,10 @@ enum ExternalTermResult externalterm_from_binary(Context *ctx, term *dst, term b
* WARNING: This function may call the GC, which may render the input binary invalid.
* @param ctx the context that owns the memory that will be allocated.
* @param t the term to return as binary.
* @param opts encoding options. If the ExternalTermAllowLatin1Encoding bit is
* set in opts, then atoms that do not contain extended UTF-8 character will be
* encoded using latin1 (ATOM_EXT) encoding; otherwise, atoms are encoded in UTF-8
* (SMALL_ATOM_UTF8_EXT or ATOM_UTF8_EXT) encoding.
* @returns the term deserialized from the input term, or an invalid term, if
* deserialization fails.
*/
term externalterm_to_binary(Context *ctx, term t, ExternalTermOpts opts);
term externalterm_to_binary(Context *ctx, term t);

#ifdef __cplusplus
}
Expand Down
19 changes: 3 additions & 16 deletions src/libAtomVM/nifs.c
Original file line number Diff line number Diff line change
Expand Up @@ -2914,24 +2914,11 @@ static term nif_erlang_binary_to_term(Context *ctx, int argc, term argv[])

static term nif_erlang_term_to_binary(Context *ctx, int argc, term argv[])
{
ExternalTermOpts opts = ExternalTermNoOpts;
if (argc == 2) {
term options = argv[1];
VALIDATE_VALUE(options, term_is_list);

term minor_version = interop_kv_get_value(options, ATOM_STR("\xD", "minor_version"), ctx->global);
if (!term_is_invalid_term(minor_version)) {
VALIDATE_VALUE(minor_version, term_is_integer);
if (term_to_int(minor_version) != 1) {
RAISE_ERROR(BADARG_ATOM);
} else {
opts |= ExternalTermAllowLatin1Encoding;
}
}
if (argc != 1) {
RAISE_ERROR(BADARG_ATOM);
}

term t = argv[0];
term ret = externalterm_to_binary(ctx, t, opts);
term ret = externalterm_to_binary(ctx, t);
if (term_is_invalid_term(ret)) {
RAISE_ERROR(BADARG_ATOM);
}
Expand Down
1 change: 0 additions & 1 deletion src/libAtomVM/nifs.gperf
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ erlang:put/2, &put_nif
erlang:binary_to_term/1, &binary_to_term_nif
erlang:binary_to_term/2, &binary_to_term_nif
erlang:term_to_binary/1, &term_to_binary_nif
erlang:term_to_binary/2, &term_to_binary_nif
erlang:throw/1, &throw_nif
erlang:raise/3, &raise_nif
erlang:unlink/1, &unlink_nif
Expand Down
Loading

0 comments on commit 6b001a2

Please sign in to comment.