From aca4da56d1a895b7e5c00ee22e054b47db929279 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B5ivo=20Leedj=C3=A4rv?= <69477666+tleedjarv@users.noreply.github.com> Date: Wed, 18 Aug 2021 20:51:34 +0200 Subject: [PATCH] Use accelerated file copy functions, if supported This patch makes use of various platform- and filesystem-specific syscalls to accelerate local file copies. The following syscalls are tried: * clonefile(2) on macOS (version >= 10.12) * ioctl FICLONE on Linux (kernel >= 4.5) (also BTRFS_IOC_CLONE since Linux 2.6.29(?) should work) * copy_file_range(2) on Linux (kernel >= 4.5) * sendfile(2) on Linux (kernel >= 2.2) and sendfile(3ext) on Solaris Fallback to read-write loop is used if none of the above are available or supported by the filesystem. --- src/Makefile.OCaml | 9 ++- src/copy.ml | 60 +++++++++++++- src/copy_stubs.c | 197 +++++++++++++++++++++++++++++++++++++++++++++ src/dune | 2 +- 4 files changed, 264 insertions(+), 4 deletions(-) create mode 100644 src/copy_stubs.c diff --git a/src/Makefile.OCaml b/src/Makefile.OCaml index 6cfb06908..35a3ae889 100644 --- a/src/Makefile.OCaml +++ b/src/Makefile.OCaml @@ -102,6 +102,8 @@ buildexecutable:: # Generate backtrace information for exceptions CAMLFLAGS+=-g +CCFLAGS+=-ccopt -D_FILE_OFFSET_BITS=64 + INCLFLAGS=-I lwt -I ubase -I system DEP_INCLFLAGS=-I lwt -I ubase -I system CAMLFLAGS+=$(INCLFLAGS) @@ -145,6 +147,9 @@ else CLIBS+=-cclib -lutil endif endif + ifeq ($(OSARCH),solaris) + CLIBS+=-cclib -lsendfile + endif buildexecutable:: @echo Building for Unix endif @@ -226,7 +231,7 @@ OCAMLOBJS+=main.cmo # File extensions will be substituted for the native code version OCAMLLIBS+=unix.cma str.cma bigarray.cma -COBJS+=osxsupport$(OBJ_EXT) pty$(OBJ_EXT) bytearray_stubs$(OBJ_EXT) hash_compat$(OBJ_EXT) +COBJS+=osxsupport$(OBJ_EXT) pty$(OBJ_EXT) bytearray_stubs$(OBJ_EXT) hash_compat$(OBJ_EXT) copy_stubs$(OBJ_EXT) ######################################################################## ### User Interface setup @@ -432,7 +437,7 @@ fswatch.cmi : ubase/prefs.cmi %.o %.obj: %.c @echo "$(CAMLC): $< ---> $@" - $(CAMLC) $(CAMLFLAGS) -ccopt $(OUTPUT_SEL)$(CWD)/$@ -c $(CWD)/$< + $(CAMLC) $(CAMLFLAGS) $(CCFLAGS) -ccopt $(OUTPUT_SEL)$(CWD)/$@ -c $(CWD)/$< $(NAME)$(EXEC_EXT): $(CAMLOBJS) $(COBJS) @echo Linking $@ diff --git a/src/copy.ml b/src/copy.ml index 60a051155..068211bac 100644 --- a/src/copy.ml +++ b/src/copy.ml @@ -280,15 +280,73 @@ let setFileinfo fspathTo pathTo realPathTo update desc = (****) +(* [unsn_clone_path] does not raise exceptions. *) +external clone_path : string -> string -> bool = "unsn_clone_path" +(* [unsn_clone_file] does not raise exceptions. *) +external clone_file : Unix.file_descr -> Unix.file_descr -> bool = "unsn_clone_file" +external copy_file : Unix.file_descr -> Unix.file_descr -> int64 + -> int -> int = "unsn_copy_file" + +let copyByPath fspathFrom pathFrom fspathTo pathTo = + clone_path + (Fspath.toString (Fspath.concat fspathFrom pathFrom)) + (Fspath.toString (Fspath.concat fspathTo pathTo)) + +let copy_size l = + let def = 10_485_760L in (* 10 MiB, to get periodic progress feedback *) + Int64.to_int @@ + if Int64.compare l def > 0 then def else l + +let rec copyFileAux src dst offs len notify = + let open Uutil in + if len > Filesize.zero then + let n = copy_file src dst (Filesize.toInt64 offs) + (copy_size (Filesize.toInt64 len)) in + let () = notify n in + if n > 0 then + let n' = Filesize.ofInt n in + copyFileAux src dst (Filesize.add offs n') (Filesize.sub len n') notify + +let copyFileRange src dst offs len fallback notify = + try + copyFileAux src dst offs len notify + with + | Unix.Unix_error ((EINVAL | ENOSYS | EBADF | EXDEV + | ESPIPE | ENOTSOCK | EOPNOTSUPP), _, _) + | Unix.Unix_error (EUNKNOWNERR -1, _, _) -> + (* These errors are not expected in the middle of a copy; these + indicate that [copy_file] is not supported at all (by the OS or + by the filesystem, or for these specific files) and nothing + has been copied so far, which makes fallback straight-forward. + Fallback to read-write loop expects that seek positions in + input and output fds have not changed. *) + fallback notify + +let copyFile inCh outCh kind len fallback notify = + let src = Unix.descr_of_in_channel inCh + and dst = Unix.descr_of_out_channel outCh in + if kind = `DATA && clone_file src dst then + notify (Uutil.Filesize.toInt len) + else + match kind with + | `DATA -> copyFileRange src dst Uutil.Filesize.zero len fallback notify + | `DATA_APPEND offs -> copyFileRange src dst offs len fallback notify + | `RESS -> fallback notify + let copyContents fspathFrom pathFrom fspathTo pathTo fileKind fileLength ido = let use_id f = match ido with Some id -> f id | None -> () in + if fileKind = `DATA && copyByPath fspathFrom pathFrom fspathTo pathTo then + use_id (fun id -> Uutil.showProgress id fileLength "l") + else + (* Open fds only if copying by path did not work *) let inFd = openFileIn fspathFrom pathFrom fileKind in protect (fun () -> let outFd = openFileOut fspathTo pathTo fileKind fileLength in protect (fun () -> - Uutil.readWriteBounded inFd outFd fileLength + let fallback = Uutil.readWriteBounded inFd outFd fileLength in + copyFile inFd outFd fileKind fileLength fallback (fun l -> use_id (fun id -> (* (Util.msg "Copied file %s (%d bytes)\n" (Path.toString pathFrom) l); *) diff --git a/src/copy_stubs.c b/src/copy_stubs.c new file mode 100644 index 000000000..4842408e5 --- /dev/null +++ b/src/copy_stubs.c @@ -0,0 +1,197 @@ +/* Unison file synchronizer: src/copy_stubs.c */ +/* Copyright 2021, Tõivo Leedjärv + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include + +#include + + +/* ----------------------------------------------- */ +/* Clone a file given source and destination paths */ +/* It must fully complete or fully fail. */ + +#if defined(__APPLE__) + + +#include + +#if defined(MAC_OS_X_VERSION_10_12) +#include +#include +#include + +CAMLprim value unsn_clone_path(value src, value dst) +{ + CAMLparam2(src, dst); + char *srcn, *dstn; + int status; + + srcn = strdup(String_val(src)); + dstn = strdup(String_val(dst)); + caml_release_runtime_system(); + status = clonefile(srcn, dstn, CLONE_NOFOLLOW | CLONE_NOOWNERCOPY); + free(srcn); + free(dstn); + caml_acquire_runtime_system(); + + /* Don't raise an exception, just return false in case of errors */ + CAMLreturn(Val_bool(status == 0)); +} +#else /* MAC_OS_X_VERSION_10_12 */ +CAMLprim value unsn_clone_path(value src, value dst) +{ + CAMLparam2(src, dst); + CAMLreturn(Val_false); +} +#endif /* MAC_OS_X_VERSION_10_12 */ + + +#else /* defined(__APPLE__) */ + + +CAMLprim value unsn_clone_path(value src, value dst) +{ + CAMLparam2(src, dst); + CAMLreturn(Val_false); +} + + +#endif /* defined(__APPLE__) */ + + +/* ----------------------------------------------- */ +/* Clone a file given input and output fd */ +/* It must fully complete or fully fail. */ + +#if defined(__linux__) || defined(__linux) + + +#include + +#if !defined(FICLONE) && defined(_IOW) +#define FICLONE _IOW(0x94, 9, int) +#endif + +CAMLprim value unsn_clone_file(value in_fd, value out_fd) +{ + CAMLparam2(in_fd, out_fd); + +#ifdef FICLONE + caml_release_runtime_system(); + int status = ioctl(Int_val(out_fd), FICLONE, Int_val(in_fd)); + caml_acquire_runtime_system(); + + /* Don't raise an exception, just return false in case of errors */ + CAMLreturn(Val_bool(status == 0)); +#else /* defined(FICLONE) */ + CAMLreturn(Val_false); +#endif +} + + +#else /* defined(__linux__) */ + + +CAMLprim value unsn_clone_file(value in_fd, value out_fd) +{ + CAMLparam2(in_fd, out_fd); + CAMLreturn(Val_false); +} + + +#endif /* defined(__linux__) */ + + +/* --------------------------------------------------------- */ +/* Copy, or possibly clone, a file given input and output fd */ +/* If operation is not supported by the OS or the filesystem + then file offsets must not have been changed at failure. */ + +#if defined(__linux__) || defined(__linux) + + +#include +#include +#include + +CAMLprim value unsn_copy_file(value in_fd, value out_fd, value offs, value len) +{ + CAMLparam4(in_fd, out_fd, offs, len); + off_t off_i = Int64_val(offs); + ssize_t ret; + + caml_release_runtime_system(); +#ifdef __NR_copy_file_range + /* First, try copy_file_range() */ + /* Using off_i and off_o prevents changing in_fd and out_fd file offsets */ + off_t off_o = Int64_val(offs); + ret = syscall(__NR_copy_file_range, Int_val(in_fd), &off_i, Int_val(out_fd), &off_o, Long_val(len), 0); + if (ret == -1 && (errno == ENOSYS || errno == EBADF || errno == EXDEV)) +#endif /* defined(__NR_copy_file_range) */ + { + /* Second, try sendfile(); this one changes out_fd file offset */ + ret = sendfile(Int_val(out_fd), Int_val(in_fd), &off_i, Long_val(len)); + } + caml_acquire_runtime_system(); + if (ret == -1) uerror("copy_file", Nothing); + + CAMLreturn(Val_long(ret)); +} + + +#elif defined(__sun) || defined(sun) + + +#include + +CAMLprim value unsn_copy_file(value in_fd, value out_fd, value offs, value len) +{ + CAMLparam4(in_fd, out_fd, offs, len); + off_t off = Int64_val(offs); + ssize_t ret; + + caml_release_runtime_system(); + /* This one changes out_fd file offset */ + ret = sendfile(Int_val(out_fd), Int_val(in_fd), &off, Long_val(len)); + caml_acquire_runtime_system(); + if (ret == -1) { + if (off > Int64_val(offs)) { + ret = off - Int64_val(offs); + } else { + uerror("copy_file", Nothing); + } + } + + CAMLreturn(Val_long(ret)); +} + + +#else /* defined(__linux__) || defined(__sun) */ + + +CAMLprim value unsn_copy_file(value in_fd, value out_fd, value offs, value len) +{ + CAMLparam4(in_fd, out_fd, offs, len); + unix_error(ENOSYS, "copy_file", Nothing); + CAMLreturn(Val_long(0)); +} + + +#endif /* defined(__linux__) || defined (__sun) */ diff --git a/src/dune b/src/dune index ca7545a93..74044c75d 100644 --- a/src/dune +++ b/src/dune @@ -7,7 +7,7 @@ -w -3-6-9-10-26-27-32-34-35-38-39-50-52 -warn-error -3-6-9-10-26-27-32-34-35-39-50-52 -no-strict-sequence) - (c_names bytearray_stubs osxsupport pty hash_compat) + (c_names bytearray_stubs osxsupport pty hash_compat copy_stubs) (c_library_flags -lutil) (libraries str unix lwt_lib bigarray))