From 173a7ffb50868c4e7dbeee32ed9b8f8c0df8125a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B5ivo=20Leedj=C3=A4rv?= <69477666+tleedjarv@users.noreply.github.com> Date: Wed, 18 Aug 2021 20:51:34 +0200 Subject: [PATCH] Use accelerated file copy functions, if supported This patch makes use of various platform- and filesystem-specific syscalls to accelerate local file copies. The following syscalls are tried: * clonefile(2) on macOS (version >= 10.12) * ioctl FICLONE on Linux (kernel >= 4.5) (also BTRFS_IOC_CLONE since Linux 2.6.29(?) should work) * copy_file_range(2) on Linux (kernel >= 4.5) * sendfile(2) on Linux (kernel >= 2.2) and sendfile(3ext) on Solaris Fallback to read-write loop is used if none of the above are available or supported by the filesystem. --- src/Makefile.OCaml | 5 +- src/copy.ml | 67 +++++++++++++- src/copy_stubs.c | 218 +++++++++++++++++++++++++++++++++++++++++++++ src/dune | 2 +- 4 files changed, 287 insertions(+), 5 deletions(-) create mode 100644 src/copy_stubs.c diff --git a/src/Makefile.OCaml b/src/Makefile.OCaml index cea70e95e..dede66081 100644 --- a/src/Makefile.OCaml +++ b/src/Makefile.OCaml @@ -129,6 +129,8 @@ ifneq ($(strip $(LDLIBS)),) CLIBS+=-cclib '$(LDLIBS)' endif +CAMLCFLAGS+=-ccopt -D_FILE_OFFSET_BITS=64 + INCLFLAGS=-I lwt -I ubase -I system DEP_INCLFLAGS=-I lwt -I ubase -I system CAMLFLAGS+=$(INCLFLAGS) @@ -187,6 +189,7 @@ else ifeq ($(OSARCH),solaris) # ACL functions CLIBS+=-cclib -lsec + CLIBS+=-cclib -lsendfile endif buildexecutable:: @echo Building for Unix @@ -274,7 +277,7 @@ OCAMLOBJS+=main.cmo OCAMLLIBS+=unix.cma str.cma INCLFLAGS+=-I +unix -I +str -COBJS+=osxsupport$(OBJ_EXT) pty$(OBJ_EXT) bytearray_stubs$(OBJ_EXT) hash_compat$(OBJ_EXT) props_xattr$(OBJ_EXT) props_acl$(OBJ_EXT) +COBJS+=osxsupport$(OBJ_EXT) pty$(OBJ_EXT) bytearray_stubs$(OBJ_EXT) hash_compat$(OBJ_EXT) props_xattr$(OBJ_EXT) props_acl$(OBJ_EXT) copy_stubs$(OBJ_EXT) ######################################################################## ### User Interface setup diff --git a/src/copy.ml b/src/copy.ml index fa704f35c..6ff9918b3 100644 --- a/src/copy.ml +++ b/src/copy.ml @@ -415,6 +415,60 @@ let readPropsExtDataG root path desc = (****) +(* [unsn_clone_path] does not raise exceptions. *) +external clone_path : string -> string -> bool = "unsn_clone_path" +(* [unsn_clone_file] does not raise exceptions. *) +external clone_file : Unix.file_descr -> Unix.file_descr -> bool = "unsn_clone_file" +external copy_file : Unix.file_descr -> Unix.file_descr -> int64 + -> int -> int = "unsn_copy_file" + +let copy_size l = + let def = 10_485_760L in (* 10 MiB, to get periodic progress feedback *) + Int64.to_int @@ + if Int64.compare l def > 0 then def else l + +let rec copyFileAux src dst offs len notify = + let open Uutil in + if len > Filesize.zero then begin + let n = copy_file src dst (Filesize.toInt64 offs) + (copy_size (Filesize.toInt64 len)) in + let n' = Filesize.ofInt n in + let () = notify n' in + if n > 0 then + copyFileAux src dst (Filesize.add offs n') (Filesize.sub len n') notify + end + +let copyFileRange src dst offs len fallback notify = + try + copyFileAux src dst offs len notify + with + | Unix.Unix_error ((EINVAL | ENOSYS | EBADF | EXDEV + | ESPIPE | ENOTSOCK | EOPNOTSUPP), _, _) + | Unix.Unix_error (EUNKNOWNERR -1, _, _) -> + (* These errors are not expected in the middle of a copy; these + indicate that [copy_file] is not supported at all (by the OS or + by the filesystem, or for these specific files) and nothing + has been copied so far, which makes fallback straight-forward. + Fallback to read-write loop expects that seek positions in + input and output fds have not changed. *) + fallback () + +let copyFile inCh outCh kind len fallback notify = + let src = Unix.descr_of_in_channel inCh + and dst = Unix.descr_of_out_channel outCh in + if kind = `DATA && clone_file src dst then + notify len + else + match kind with + | `DATA -> copyFileRange src dst Uutil.Filesize.zero len fallback notify + | `DATA_APPEND offs -> copyFileRange src dst offs len fallback notify + | `RESS -> fallback () + +let copyByPath fspathFrom pathFrom fspathTo pathTo = + clone_path + (Fspath.toString (Fspath.concat fspathFrom pathFrom)) + (Fspath.toString (Fspath.concat fspathTo pathTo)) + (* The fds opened in this function normally shouldn't be tracked for extra cleanup at connection close because this is sequential non-Lwt code. Yet, there is a risk that code called by [Uutil.showProgress] may include Lwt @@ -423,18 +477,25 @@ let readPropsExtDataG root path desc = [closeFile*] functions). *) let copyContents fspathFrom pathFrom fspathTo pathTo fileKind fileLength ido = let use_id f = match ido with Some id -> f id | None -> () in + if fileKind = `DATA && copyByPath fspathFrom pathFrom fspathTo pathTo then + use_id (fun id -> Uutil.showProgress id fileLength "l") + else + (* Open fds only if copying by path did not work *) let inFd = openFileIn fspathFrom pathFrom fileKind in protect (fun () -> let outFd = openFileOut fspathTo pathTo fileKind fileLength in protect (fun () -> - Uutil.readWriteBounded inFd outFd fileLength - (fun l -> + let showProgress l = use_id (fun id -> (* (Util.msg "Copied file %s (%d bytes)\n" (Path.toString pathFrom) l); *) if fileKind <> `RESS then Abort.checkAll (); - Uutil.showProgress id (Uutil.Filesize.ofInt l) "l")); + Uutil.showProgress id l "l") + in + let fallback () = Uutil.readWriteBounded inFd outFd fileLength + (fun l -> showProgress (Uutil.Filesize.ofInt l)) in + copyFile inFd outFd fileKind fileLength fallback showProgress; closeFileIn inFd; closeFileOut outFd; (* ignore (Sys.command ("ls -l " ^ (Fspath.toString (Fspath.concat fspathTo pathTo)))) *) diff --git a/src/copy_stubs.c b/src/copy_stubs.c new file mode 100644 index 000000000..6fca750a7 --- /dev/null +++ b/src/copy_stubs.c @@ -0,0 +1,218 @@ +/* Unison file synchronizer: src/copy_stubs.c */ +/* Copyright 2021-2023, Tõivo Leedjärv + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include + +#if OCAML_VERSION_MAJOR < 5 +#define caml_unix_error unix_error +#define caml_uerror uerror +#endif + + +#include + + +/* ----------------------------------------------- */ +/* Clone a file given source and destination paths */ +/* It must fully complete or fully fail. + + The function must not raise any exceptions. + + Return true for success and false for failure + or if the operation is not supported. */ + +#if defined(__APPLE__) + + +#include + +#if defined(MAC_OS_X_VERSION_10_12) +#include +#include +#include + +CAMLprim value unsn_clone_path(value src, value dst) +{ + CAMLparam2(src, dst); + char *srcn, *dstn; + int status; + + srcn = strdup(String_val(src)); + dstn = strdup(String_val(dst)); + caml_release_runtime_system(); + status = clonefile(srcn, dstn, CLONE_NOFOLLOW | CLONE_NOOWNERCOPY); + free(srcn); + free(dstn); + caml_acquire_runtime_system(); + + /* Don't raise an exception, just return false in case of errors */ + CAMLreturn(Val_bool(status == 0)); +} +#else /* MAC_OS_X_VERSION_10_12 */ +CAMLprim value unsn_clone_path(value src, value dst) +{ + CAMLparam2(src, dst); + CAMLreturn(Val_false); +} +#endif /* MAC_OS_X_VERSION_10_12 */ + + +#else /* defined(__APPLE__) */ + + +CAMLprim value unsn_clone_path(value src, value dst) +{ + CAMLparam2(src, dst); + CAMLreturn(Val_false); +} + + +#endif /* defined(__APPLE__) */ + + +/* ----------------------------------------------- */ +/* Clone a file given input and output fd */ +/* It must fully complete or fully fail. + + The function must not raise any exceptions. + + Return true for success and false for failure + or if the operation is not supported. */ + +#if defined(__linux__) || defined(__linux) + + +#include + +#if !defined(FICLONE) && defined(_IOW) +#define FICLONE _IOW(0x94, 9, int) +#endif + +CAMLprim value unsn_clone_file(value in_fd, value out_fd) +{ + CAMLparam2(in_fd, out_fd); + +#ifdef FICLONE + caml_release_runtime_system(); + int status = ioctl(Int_val(out_fd), FICLONE, Int_val(in_fd)); + caml_acquire_runtime_system(); + + /* Don't raise an exception, just return false in case of errors */ + CAMLreturn(Val_bool(status == 0)); +#else /* defined(FICLONE) */ + CAMLreturn(Val_false); +#endif +} + + +#else /* defined(__linux__) */ + + +CAMLprim value unsn_clone_file(value in_fd, value out_fd) +{ + CAMLparam2(in_fd, out_fd); + CAMLreturn(Val_false); +} + + +#endif /* defined(__linux__) */ + + +/* --------------------------------------------------------- */ +/* Copy, or possibly clone, a file given input and output fd */ +/* If operation is not supported by the OS or the filesystem + then file offsets must not have been changed at failure. + Output file offset must be changed on success. + + The function must return the number of bytes copied. + + On any error, raise a Unix exception based on errno. + Raise ENOSYS if the operation is not supported. */ + +#if defined(__linux__) || defined(__linux) + + +#include +#include +#include + +CAMLprim value unsn_copy_file(value in_fd, value out_fd, value offs, value len) +{ + CAMLparam4(in_fd, out_fd, offs, len); + off_t off_i = Int64_val(offs); + ssize_t ret; + + caml_release_runtime_system(); +#ifdef __NR_copy_file_range + /* First, try copy_file_range() */ + /* Using off_i prevents changing in_fd file offset */ + ret = syscall(__NR_copy_file_range, Int_val(in_fd), &off_i, Int_val(out_fd), NULL, Long_val(len), 0); + if (ret == -1 && (errno == ENOSYS || errno == EBADF || errno == EXDEV)) +#endif /* defined(__NR_copy_file_range) */ + { + /* Second, try sendfile(); this one changes out_fd file offset */ + ret = sendfile(Int_val(out_fd), Int_val(in_fd), &off_i, Long_val(len)); + } + caml_acquire_runtime_system(); + if (ret == -1) caml_uerror("copy_file", Nothing); + + CAMLreturn(Val_long(ret)); +} + + +#elif defined(__sun) || defined(sun) + + +#include + +CAMLprim value unsn_copy_file(value in_fd, value out_fd, value offs, value len) +{ + CAMLparam4(in_fd, out_fd, offs, len); + off_t off = orig_off = Int64_val(offs); + ssize_t ret; + + caml_release_runtime_system(); + /* This one changes out_fd file offset */ + ret = sendfile(Int_val(out_fd), Int_val(in_fd), &off, Long_val(len)); + caml_acquire_runtime_system(); + if (ret == -1) { + if (off > off_orig) { + ret = off - off_orig; + } else { + caml_uerror("copy_file", Nothing); + } + } + + CAMLreturn(Val_long(ret)); +} + + +#else /* defined(__linux__) || defined(__sun) */ + + +CAMLprim value unsn_copy_file(value in_fd, value out_fd, value offs, value len) +{ + CAMLparam4(in_fd, out_fd, offs, len); + caml_unix_error(ENOSYS, "copy_file", Nothing); + CAMLreturn(Val_long(0)); +} + + +#endif /* defined(__linux__) || defined (__sun) */ diff --git a/src/dune b/src/dune index 1968ba2d4..edac6f023 100644 --- a/src/dune +++ b/src/dune @@ -9,7 +9,7 @@ -no-strict-sequence) (foreign_stubs (language c) - (names bytearray_stubs osxsupport pty hash_compat props_xattr props_acl)) + (names bytearray_stubs osxsupport pty hash_compat props_xattr props_acl copy_stubs)) (c_library_flags -lutil) (libraries str unix lwt_lib bigarray))