From fbaaa5f5aa2241ea71e4b06e1f394050c934d9be Mon Sep 17 00:00:00 2001 From: Pablo Lopez Date: Thu, 21 Jul 2022 13:44:42 +0200 Subject: [PATCH 1/7] [RTI-12596] Added new nif to compress directly to a file --- c_src/zstd_nif.c | 82 +++++++++++++++++++++++++++++++++++++++++++-- src/zstd.erl | 12 +++++++ test/zstd_tests.erl | 14 ++++++++ 3 files changed, 106 insertions(+), 2 deletions(-) diff --git a/c_src/zstd_nif.c b/c_src/zstd_nif.c index 0264929..2f1f33b 100644 --- a/c_src/zstd_nif.c +++ b/c_src/zstd_nif.c @@ -1,10 +1,12 @@ #include "erl_nif.h" - #include +#include +#include #include ErlNifTSDKey zstdDecompressContextKey; ErlNifTSDKey zstdCompressContextKey; +ErlNifTSDKey zstdCompressToFileContextKey; static ERL_NIF_TERM zstd_nif_compress(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ErlNifBinary bin, ret_bin; @@ -62,15 +64,91 @@ static ERL_NIF_TERM zstd_nif_decompress(ErlNifEnv* env, int argc, const ERL_NIF_ return out; } +/*! fopen_orDie() : + * Open a file using given file path and open option. + * + * @return If successful this function will return a FILE pointer to an + * opened file otherwise it sends an error to stderr and exits. + */ +static FILE* fopen_orDie(const char *filename, const char *instruction) +{ + FILE* const inFile = fopen(filename, instruction); + if (inFile) return inFile; + /* error */ + perror(filename); + exit(2); +} + +/*! saveFile_orDie() : + * + * Save buffSize bytes to a given file path, obtaining them from a location pointed + * to by buff. + * + * Note: This function will send an error to stderr and exit if it + * cannot write to a given file. + */ +static void saveFile_orDie(const char* fileName, const void* buff, size_t buffSize) +{ + FILE* const oFile = fopen_orDie(fileName, "a"); + size_t const wSize = fwrite(buff, 1, buffSize, oFile); + if (wSize != (size_t)buffSize) { + fprintf(stderr, "fwrite: %s : %s \n", fileName, strerror(errno)); + exit(5); + } + if (fclose(oFile)) { + perror(fileName); + exit(3); + } +} + +static ERL_NIF_TERM zstd_nif_compress_to_file(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + ErlNifBinary bin, ret_bin, arg_path; + size_t buff_size, compressed_size; + unsigned int compression_level; + char* path; + + ZSTD_CCtx* ctx = (ZSTD_CCtx*)enif_tsd_get(zstdCompressToFileContextKey); + if (!ctx) { + ctx = ZSTD_createCCtx(); + enif_tsd_set(zstdCompressToFileContextKey, ctx); + } + + if(!enif_inspect_binary(env, argv[0], &bin) + || !enif_inspect_binary(env, argv[1], &arg_path) + || !enif_get_uint(env, argv[2], &compression_level) + || compression_level > ZSTD_maxCLevel()) + return enif_make_badarg(env); + + buff_size = ZSTD_compressBound(bin.size); + + if(!enif_alloc_binary(buff_size, &ret_bin)) + return enif_make_atom(env, "error"); + + compressed_size = ZSTD_compressCCtx(ctx, ret_bin.data, buff_size, bin.data, bin.size, compression_level); + if(ZSTD_isError(compressed_size)) + return enif_make_atom(env, "error"); + + if(!enif_realloc_binary(&ret_bin, compressed_size)) + return enif_make_atom(env, "error"); + + path = (char *)arg_path.data; + + saveFile_orDie(path, ret_bin.data, compressed_size); + + return enif_make_atom(env, "ok"); +} + static ErlNifFunc nif_funcs[] = { {"compress", 2, zstd_nif_compress}, - {"decompress", 1, zstd_nif_decompress} + {"decompress", 1, zstd_nif_decompress}, + {"compress_to_file", 3, zstd_nif_compress_to_file}, }; static int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) { enif_tsd_key_create("zstd_decompress_context_key", &zstdDecompressContextKey); enif_tsd_key_create("zstd_compress_context_key", &zstdCompressContextKey); + enif_tsd_key_create("zstd_compress_to_file_context_key", &zstdCompressToFileContextKey); return 0; } diff --git a/src/zstd.erl b/src/zstd.erl index 5da34b4..04eb806 100644 --- a/src/zstd.erl +++ b/src/zstd.erl @@ -2,6 +2,7 @@ -export([compress/1, compress/2]). -export([decompress/1]). +-export([compress_to_file/2, compress_to_file/3]). -on_load init/0. @@ -21,6 +22,17 @@ compress(_, _) -> decompress(_) -> erlang:nif_error(?LINE). +-spec compress_to_file(Uncompressed :: binary(), Path :: binary()) -> ok | error. +compress_to_file(Binary, Path) -> + compress_to_file(Binary, Path, 1). + +-spec compress_to_file(Uncompressed :: binary(), + Path :: binary(), + CompressionLevel :: 0..22) -> + ok | error. +compress_to_file(_, _, _) -> + erlang:nif_error(?LINE). + init() -> SoName = case code:priv_dir(?APPNAME) of diff --git a/test/zstd_tests.erl b/test/zstd_tests.erl index 4927a04..6ea9e0e 100644 --- a/test/zstd_tests.erl +++ b/test/zstd_tests.erl @@ -7,3 +7,17 @@ zstd_test() -> ?assertEqual(Data, zstd:decompress( zstd:compress(Data))). + +compress_to_file_test() -> + Path = "/tmp/zstd_test.zst", + Data = <<"Hello, World!">>, + case filelib:is_regular(Path) of + true -> + file:delete(Path); + _ -> + ok + end, + ?assertEqual(ok, zstd:compress_to_file(Data, erlang:list_to_binary(Path))), + {ok, ToDecompress} = file:read_file(Path), + Decompressed = zstd:decompress(ToDecompress), + ?assertEqual(Decompressed, Data). From a043d661d56d7e1600d8c4f2b01410f162b35fc3 Mon Sep 17 00:00:00 2001 From: Pablo Lopez Date: Mon, 1 Aug 2022 12:43:26 +0200 Subject: [PATCH 2/7] [RTI-12596] Using dirty schedulers when needed --- c_src/zstd_nif.c | 16 +++++++++++++++- test/zstd_tests.erl | 12 +++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/c_src/zstd_nif.c b/c_src/zstd_nif.c index 2f1f33b..3c95f9d 100644 --- a/c_src/zstd_nif.c +++ b/c_src/zstd_nif.c @@ -4,10 +4,15 @@ #include #include +#define MAX_BYTES_TO_NIF 20000 + ErlNifTSDKey zstdDecompressContextKey; ErlNifTSDKey zstdCompressContextKey; ErlNifTSDKey zstdCompressToFileContextKey; +static ERL_NIF_TERM do_compress_to_file(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); + + static ERL_NIF_TERM zstd_nif_compress(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ErlNifBinary bin, ret_bin; size_t buff_size, compressed_size; @@ -100,8 +105,17 @@ static void saveFile_orDie(const char* fileName, const void* buff, size_t buffSi exit(3); } } - static ERL_NIF_TERM zstd_nif_compress_to_file(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + ErlNifBinary bin; + if(!enif_inspect_binary(env, argv[0], &bin)) return enif_make_badarg(env); + if (bin.size > MAX_BYTES_TO_NIF) { + return enif_schedule_nif(env, "do_compress_to_file", ERL_NIF_DIRTY_JOB_CPU_BOUND, do_compress_to_file, argc, argv); + } + + return do_compress_to_file(env, argc, argv); +} + +static ERL_NIF_TERM do_compress_to_file(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ErlNifBinary bin, ret_bin, arg_path; size_t buff_size, compressed_size; unsigned int compression_level; diff --git a/test/zstd_tests.erl b/test/zstd_tests.erl index 6ea9e0e..098a020 100644 --- a/test/zstd_tests.erl +++ b/test/zstd_tests.erl @@ -10,7 +10,17 @@ zstd_test() -> compress_to_file_test() -> Path = "/tmp/zstd_test.zst", - Data = <<"Hello, World!">>, + Data = <<"Hello there!">>, + compress_to_file_and_check(Path, Data). + +compress_to_file_using_dirty_scheduler_test() -> + Path = "/tmp/zstd_dirty_scheduler_test.zst", + Data = + base64:encode( + crypto:strong_rand_bytes(64000)), + compress_to_file_and_check(Path, Data). + +compress_to_file_and_check(Path, Data) -> case filelib:is_regular(Path) of true -> file:delete(Path); From 72dce976ca38ec752a84affac7561f5a3e65ab70 Mon Sep 17 00:00:00 2001 From: Pablo Lopez Date: Tue, 2 Aug 2022 10:52:17 +0200 Subject: [PATCH 3/7] [RTI-12596] Ensure data is written in the file after each write --- c_src/zstd_nif.c | 4 +--- test/zstd_tests.erl | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/c_src/zstd_nif.c b/c_src/zstd_nif.c index 3c95f9d..b450504 100644 --- a/c_src/zstd_nif.c +++ b/c_src/zstd_nif.c @@ -80,7 +80,6 @@ static FILE* fopen_orDie(const char *filename, const char *instruction) FILE* const inFile = fopen(filename, instruction); if (inFile) return inFile; /* error */ - perror(filename); exit(2); } @@ -97,13 +96,12 @@ static void saveFile_orDie(const char* fileName, const void* buff, size_t buffSi FILE* const oFile = fopen_orDie(fileName, "a"); size_t const wSize = fwrite(buff, 1, buffSize, oFile); if (wSize != (size_t)buffSize) { - fprintf(stderr, "fwrite: %s : %s \n", fileName, strerror(errno)); exit(5); } if (fclose(oFile)) { - perror(fileName); exit(3); } + fdatasync(); } static ERL_NIF_TERM zstd_nif_compress_to_file(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ErlNifBinary bin; diff --git a/test/zstd_tests.erl b/test/zstd_tests.erl index 098a020..6421c24 100644 --- a/test/zstd_tests.erl +++ b/test/zstd_tests.erl @@ -3,7 +3,7 @@ -include_lib("eunit/include/eunit.hrl"). zstd_test() -> - Data = <<"Hello, World!">>, + Data = <<"Hello, World!\n">>, ?assertEqual(Data, zstd:decompress( zstd:compress(Data))). From fd01e9736d0151b346bd5db03aea47ee56b75917 Mon Sep 17 00:00:00 2001 From: Pablo Lopez Date: Wed, 3 Aug 2022 10:15:24 +0200 Subject: [PATCH 4/7] [RTI-12596] WIP: remove call to fdatasync as it is not C99 complaint --- c_src/zstd_nif.c | 1 - 1 file changed, 1 deletion(-) diff --git a/c_src/zstd_nif.c b/c_src/zstd_nif.c index b450504..f96b717 100644 --- a/c_src/zstd_nif.c +++ b/c_src/zstd_nif.c @@ -101,7 +101,6 @@ static void saveFile_orDie(const char* fileName, const void* buff, size_t buffSi if (fclose(oFile)) { exit(3); } - fdatasync(); } static ERL_NIF_TERM zstd_nif_compress_to_file(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ErlNifBinary bin; From 82a4bfc24ebf447543909140bf5974841ed9f9b5 Mon Sep 17 00:00:00 2001 From: Pablo Lopez Date: Thu, 4 Aug 2022 13:41:22 +0200 Subject: [PATCH 5/7] [RTI-12596] WIP: fixed bug with the path format' --- c_src/zstd_nif.c | 12 ++++++------ src/zstd.erl | 4 ++-- test/zstd_tests.erl | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/c_src/zstd_nif.c b/c_src/zstd_nif.c index f96b717..b0fc1d1 100644 --- a/c_src/zstd_nif.c +++ b/c_src/zstd_nif.c @@ -113,10 +113,9 @@ static ERL_NIF_TERM zstd_nif_compress_to_file(ErlNifEnv* env, int argc, const ER } static ERL_NIF_TERM do_compress_to_file(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { - ErlNifBinary bin, ret_bin, arg_path; + ErlNifBinary bin, ret_bin; size_t buff_size, compressed_size; - unsigned int compression_level; - char* path; + unsigned int compression_level, path_len; ZSTD_CCtx* ctx = (ZSTD_CCtx*)enif_tsd_get(zstdCompressToFileContextKey); if (!ctx) { @@ -124,8 +123,11 @@ static ERL_NIF_TERM do_compress_to_file(ErlNifEnv* env, int argc, const ERL_NIF_ enif_tsd_set(zstdCompressToFileContextKey, ctx); } + enif_get_list_length(env, argv[1], &path_len); + char path[path_len + 1]; + if(!enif_inspect_binary(env, argv[0], &bin) - || !enif_inspect_binary(env, argv[1], &arg_path) + || !enif_get_string(env, argv[1], path, (path_len + 1), ERL_NIF_LATIN1) || !enif_get_uint(env, argv[2], &compression_level) || compression_level > ZSTD_maxCLevel()) return enif_make_badarg(env); @@ -142,8 +144,6 @@ static ERL_NIF_TERM do_compress_to_file(ErlNifEnv* env, int argc, const ERL_NIF_ if(!enif_realloc_binary(&ret_bin, compressed_size)) return enif_make_atom(env, "error"); - path = (char *)arg_path.data; - saveFile_orDie(path, ret_bin.data, compressed_size); return enif_make_atom(env, "ok"); diff --git a/src/zstd.erl b/src/zstd.erl index 04eb806..1fa1d03 100644 --- a/src/zstd.erl +++ b/src/zstd.erl @@ -22,12 +22,12 @@ compress(_, _) -> decompress(_) -> erlang:nif_error(?LINE). --spec compress_to_file(Uncompressed :: binary(), Path :: binary()) -> ok | error. +-spec compress_to_file(Uncompressed :: binary(), Path :: string()) -> ok | error. compress_to_file(Binary, Path) -> compress_to_file(Binary, Path, 1). -spec compress_to_file(Uncompressed :: binary(), - Path :: binary(), + Path :: string(), CompressionLevel :: 0..22) -> ok | error. compress_to_file(_, _, _) -> diff --git a/test/zstd_tests.erl b/test/zstd_tests.erl index 6421c24..065d268 100644 --- a/test/zstd_tests.erl +++ b/test/zstd_tests.erl @@ -27,7 +27,7 @@ compress_to_file_and_check(Path, Data) -> _ -> ok end, - ?assertEqual(ok, zstd:compress_to_file(Data, erlang:list_to_binary(Path))), + ?assertEqual(ok, zstd:compress_to_file(Data, Path)), {ok, ToDecompress} = file:read_file(Path), Decompressed = zstd:decompress(ToDecompress), ?assertEqual(Decompressed, Data). From 920ee58cba19bdcff5d3f2d00a3f25af61f8b0a4 Mon Sep 17 00:00:00 2001 From: Pablo Lopez Date: Thu, 4 Aug 2022 17:16:46 +0200 Subject: [PATCH 6/7] [RTI-12596] WIP: fixed a bug in zstd' --- c_src/zstd_nif.c | 48 ++++++++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/c_src/zstd_nif.c b/c_src/zstd_nif.c index b0fc1d1..6bb7d0f 100644 --- a/c_src/zstd_nif.c +++ b/c_src/zstd_nif.c @@ -69,38 +69,22 @@ static ERL_NIF_TERM zstd_nif_decompress(ErlNifEnv* env, int argc, const ERL_NIF_ return out; } -/*! fopen_orDie() : - * Open a file using given file path and open option. - * - * @return If successful this function will return a FILE pointer to an - * opened file otherwise it sends an error to stderr and exits. - */ -static FILE* fopen_orDie(const char *filename, const char *instruction) -{ - FILE* const inFile = fopen(filename, instruction); - if (inFile) return inFile; - /* error */ - exit(2); -} -/*! saveFile_orDie() : - * - * Save buffSize bytes to a given file path, obtaining them from a location pointed - * to by buff. - * - * Note: This function will send an error to stderr and exit if it - * cannot write to a given file. - */ -static void saveFile_orDie(const char* fileName, const void* buff, size_t buffSize) + +static int save_file(const char* fileName, const void* buff, size_t buffSize) { - FILE* const oFile = fopen_orDie(fileName, "a"); + FILE* const oFile = fopen(fileName, "a"); + if (!oFile) { + return 0; + } size_t const wSize = fwrite(buff, 1, buffSize, oFile); if (wSize != (size_t)buffSize) { - exit(5); + return 0; } if (fclose(oFile)) { - exit(3); + return 0; } + return 1; } static ERL_NIF_TERM zstd_nif_compress_to_file(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ErlNifBinary bin; @@ -138,14 +122,22 @@ static ERL_NIF_TERM do_compress_to_file(ErlNifEnv* env, int argc, const ERL_NIF_ return enif_make_atom(env, "error"); compressed_size = ZSTD_compressCCtx(ctx, ret_bin.data, buff_size, bin.data, bin.size, compression_level); - if(ZSTD_isError(compressed_size)) + if(ZSTD_isError(compressed_size)) { + enif_release_binary(&ret_bin); return enif_make_atom(env, "error"); + } - if(!enif_realloc_binary(&ret_bin, compressed_size)) + if(!enif_realloc_binary(&ret_bin, compressed_size)) { + enif_release_binary(&ret_bin); return enif_make_atom(env, "error"); + } - saveFile_orDie(path, ret_bin.data, compressed_size); + if (!save_file(path, ret_bin.data, compressed_size)) { + enif_release_binary(&ret_bin); + return enif_make_atom(env, "error"); + } + enif_release_binary(&ret_bin); return enif_make_atom(env, "ok"); } From 969273611640e0c0b20103413e860fb6afdd93ae Mon Sep 17 00:00:00 2001 From: Pablo Lopez Date: Fri, 5 Aug 2022 10:21:49 +0200 Subject: [PATCH 7/7] [RTI-12596] WIP: Expect iolist instead of binary --- c_src/zstd_nif.c | 4 ++-- src/zstd.erl | 8 ++++---- test/zstd_tests.erl | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/c_src/zstd_nif.c b/c_src/zstd_nif.c index 6bb7d0f..8955581 100644 --- a/c_src/zstd_nif.c +++ b/c_src/zstd_nif.c @@ -88,7 +88,7 @@ static int save_file(const char* fileName, const void* buff, size_t buffSize) } static ERL_NIF_TERM zstd_nif_compress_to_file(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ErlNifBinary bin; - if(!enif_inspect_binary(env, argv[0], &bin)) return enif_make_badarg(env); + if(!enif_inspect_iolist_as_binary(env, argv[0], &bin)) return enif_make_badarg(env); if (bin.size > MAX_BYTES_TO_NIF) { return enif_schedule_nif(env, "do_compress_to_file", ERL_NIF_DIRTY_JOB_CPU_BOUND, do_compress_to_file, argc, argv); } @@ -110,7 +110,7 @@ static ERL_NIF_TERM do_compress_to_file(ErlNifEnv* env, int argc, const ERL_NIF_ enif_get_list_length(env, argv[1], &path_len); char path[path_len + 1]; - if(!enif_inspect_binary(env, argv[0], &bin) + if(!enif_inspect_iolist_as_binary(env, argv[0], &bin) || !enif_get_string(env, argv[1], path, (path_len + 1), ERL_NIF_LATIN1) || !enif_get_uint(env, argv[2], &compression_level) || compression_level > ZSTD_maxCLevel()) diff --git a/src/zstd.erl b/src/zstd.erl index 1fa1d03..efbdd80 100644 --- a/src/zstd.erl +++ b/src/zstd.erl @@ -22,11 +22,11 @@ compress(_, _) -> decompress(_) -> erlang:nif_error(?LINE). --spec compress_to_file(Uncompressed :: binary(), Path :: string()) -> ok | error. -compress_to_file(Binary, Path) -> - compress_to_file(Binary, Path, 1). +-spec compress_to_file(Uncompressed :: iolist(), Path :: string()) -> ok | error. +compress_to_file(IoList, Path) -> + compress_to_file(IoList, Path, 1). --spec compress_to_file(Uncompressed :: binary(), +-spec compress_to_file(Uncompressed :: iolist(), Path :: string(), CompressionLevel :: 0..22) -> ok | error. diff --git a/test/zstd_tests.erl b/test/zstd_tests.erl index 065d268..1a69011 100644 --- a/test/zstd_tests.erl +++ b/test/zstd_tests.erl @@ -10,14 +10,14 @@ zstd_test() -> compress_to_file_test() -> Path = "/tmp/zstd_test.zst", - Data = <<"Hello there!">>, + Data = [<<"Hello">>, <<" there!">>], compress_to_file_and_check(Path, Data). compress_to_file_using_dirty_scheduler_test() -> Path = "/tmp/zstd_dirty_scheduler_test.zst", Data = - base64:encode( - crypto:strong_rand_bytes(64000)), + [base64:encode( + crypto:strong_rand_bytes(64000))], compress_to_file_and_check(Path, Data). compress_to_file_and_check(Path, Data) -> @@ -30,4 +30,4 @@ compress_to_file_and_check(Path, Data) -> ?assertEqual(ok, zstd:compress_to_file(Data, Path)), {ok, ToDecompress} = file:read_file(Path), Decompressed = zstd:decompress(ToDecompress), - ?assertEqual(Decompressed, Data). + ?assertEqual(Decompressed, erlang:iolist_to_binary(Data)).