From 173a7ffb50868c4e7dbeee32ed9b8f8c0df8125a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?T=C3=B5ivo=20Leedj=C3=A4rv?=
<69477666+tleedjarv@users.noreply.github.com>
Date: Wed, 18 Aug 2021 20:51:34 +0200
Subject: [PATCH] Use accelerated file copy functions, if supported
This patch makes use of various platform- and filesystem-specific
syscalls to accelerate local file copies.
The following syscalls are tried:
* clonefile(2) on macOS (version >= 10.12)
* ioctl FICLONE on Linux (kernel >= 4.5)
(also BTRFS_IOC_CLONE since Linux 2.6.29(?) should work)
* copy_file_range(2) on Linux (kernel >= 4.5)
* sendfile(2) on Linux (kernel >= 2.2) and sendfile(3ext) on Solaris
Fallback to read-write loop is used if none of the above are available
or supported by the filesystem.
---
src/Makefile.OCaml | 5 +-
src/copy.ml | 67 +++++++++++++-
src/copy_stubs.c | 218 +++++++++++++++++++++++++++++++++++++++++++++
src/dune | 2 +-
4 files changed, 287 insertions(+), 5 deletions(-)
create mode 100644 src/copy_stubs.c
diff --git a/src/Makefile.OCaml b/src/Makefile.OCaml
index cea70e95e..dede66081 100644
--- a/src/Makefile.OCaml
+++ b/src/Makefile.OCaml
@@ -129,6 +129,8 @@ ifneq ($(strip $(LDLIBS)),)
CLIBS+=-cclib '$(LDLIBS)'
endif
+CAMLCFLAGS+=-ccopt -D_FILE_OFFSET_BITS=64
+
INCLFLAGS=-I lwt -I ubase -I system
DEP_INCLFLAGS=-I lwt -I ubase -I system
CAMLFLAGS+=$(INCLFLAGS)
@@ -187,6 +189,7 @@ else
ifeq ($(OSARCH),solaris)
# ACL functions
CLIBS+=-cclib -lsec
+ CLIBS+=-cclib -lsendfile
endif
buildexecutable::
@echo Building for Unix
@@ -274,7 +277,7 @@ OCAMLOBJS+=main.cmo
OCAMLLIBS+=unix.cma str.cma
INCLFLAGS+=-I +unix -I +str
-COBJS+=osxsupport$(OBJ_EXT) pty$(OBJ_EXT) bytearray_stubs$(OBJ_EXT) hash_compat$(OBJ_EXT) props_xattr$(OBJ_EXT) props_acl$(OBJ_EXT)
+COBJS+=osxsupport$(OBJ_EXT) pty$(OBJ_EXT) bytearray_stubs$(OBJ_EXT) hash_compat$(OBJ_EXT) props_xattr$(OBJ_EXT) props_acl$(OBJ_EXT) copy_stubs$(OBJ_EXT)
########################################################################
### User Interface setup
diff --git a/src/copy.ml b/src/copy.ml
index fa704f35c..6ff9918b3 100644
--- a/src/copy.ml
+++ b/src/copy.ml
@@ -415,6 +415,60 @@ let readPropsExtDataG root path desc =
(****)
+(* [unsn_clone_path] does not raise exceptions. *)
+external clone_path : string -> string -> bool = "unsn_clone_path"
+(* [unsn_clone_file] does not raise exceptions. *)
+external clone_file : Unix.file_descr -> Unix.file_descr -> bool = "unsn_clone_file"
+external copy_file : Unix.file_descr -> Unix.file_descr -> int64
+ -> int -> int = "unsn_copy_file"
+
+let copy_size l =
+ let def = 10_485_760L in (* 10 MiB, to get periodic progress feedback *)
+ Int64.to_int @@
+ if Int64.compare l def > 0 then def else l
+
+let rec copyFileAux src dst offs len notify =
+ let open Uutil in
+ if len > Filesize.zero then begin
+ let n = copy_file src dst (Filesize.toInt64 offs)
+ (copy_size (Filesize.toInt64 len)) in
+ let n' = Filesize.ofInt n in
+ let () = notify n' in
+ if n > 0 then
+ copyFileAux src dst (Filesize.add offs n') (Filesize.sub len n') notify
+ end
+
+let copyFileRange src dst offs len fallback notify =
+ try
+ copyFileAux src dst offs len notify
+ with
+ | Unix.Unix_error ((EINVAL | ENOSYS | EBADF | EXDEV
+ | ESPIPE | ENOTSOCK | EOPNOTSUPP), _, _)
+ | Unix.Unix_error (EUNKNOWNERR -1, _, _) ->
+ (* These errors are not expected in the middle of a copy; these
+ indicate that [copy_file] is not supported at all (by the OS or
+ by the filesystem, or for these specific files) and nothing
+ has been copied so far, which makes fallback straight-forward.
+ Fallback to read-write loop expects that seek positions in
+ input and output fds have not changed. *)
+ fallback ()
+
+let copyFile inCh outCh kind len fallback notify =
+ let src = Unix.descr_of_in_channel inCh
+ and dst = Unix.descr_of_out_channel outCh in
+ if kind = `DATA && clone_file src dst then
+ notify len
+ else
+ match kind with
+ | `DATA -> copyFileRange src dst Uutil.Filesize.zero len fallback notify
+ | `DATA_APPEND offs -> copyFileRange src dst offs len fallback notify
+ | `RESS -> fallback ()
+
+let copyByPath fspathFrom pathFrom fspathTo pathTo =
+ clone_path
+ (Fspath.toString (Fspath.concat fspathFrom pathFrom))
+ (Fspath.toString (Fspath.concat fspathTo pathTo))
+
(* The fds opened in this function normally shouldn't be tracked for extra
cleanup at connection close because this is sequential non-Lwt code. Yet,
there is a risk that code called by [Uutil.showProgress] may include Lwt
@@ -423,18 +477,25 @@ let readPropsExtDataG root path desc =
[closeFile*] functions). *)
let copyContents fspathFrom pathFrom fspathTo pathTo fileKind fileLength ido =
let use_id f = match ido with Some id -> f id | None -> () in
+ if fileKind = `DATA && copyByPath fspathFrom pathFrom fspathTo pathTo then
+ use_id (fun id -> Uutil.showProgress id fileLength "l")
+ else
+ (* Open fds only if copying by path did not work *)
let inFd = openFileIn fspathFrom pathFrom fileKind in
protect
(fun () ->
let outFd = openFileOut fspathTo pathTo fileKind fileLength in
protect
(fun () ->
- Uutil.readWriteBounded inFd outFd fileLength
- (fun l ->
+ let showProgress l =
use_id (fun id ->
(* (Util.msg "Copied file %s (%d bytes)\n" (Path.toString pathFrom) l); *)
if fileKind <> `RESS then Abort.checkAll ();
- Uutil.showProgress id (Uutil.Filesize.ofInt l) "l"));
+ Uutil.showProgress id l "l")
+ in
+ let fallback () = Uutil.readWriteBounded inFd outFd fileLength
+ (fun l -> showProgress (Uutil.Filesize.ofInt l)) in
+ copyFile inFd outFd fileKind fileLength fallback showProgress;
closeFileIn inFd;
closeFileOut outFd;
(* ignore (Sys.command ("ls -l " ^ (Fspath.toString (Fspath.concat fspathTo pathTo)))) *)
diff --git a/src/copy_stubs.c b/src/copy_stubs.c
new file mode 100644
index 000000000..6fca750a7
--- /dev/null
+++ b/src/copy_stubs.c
@@ -0,0 +1,218 @@
+/* Unison file synchronizer: src/copy_stubs.c */
+/* Copyright 2021-2023, Tõivo Leedjärv
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+#include
+#include
+#include
+#include
+
+#if OCAML_VERSION_MAJOR < 5
+#define caml_unix_error unix_error
+#define caml_uerror uerror
+#endif
+
+
+#include
+
+
+/* ----------------------------------------------- */
+/* Clone a file given source and destination paths */
+/* It must fully complete or fully fail.
+
+ The function must not raise any exceptions.
+
+ Return true for success and false for failure
+ or if the operation is not supported. */
+
+#if defined(__APPLE__)
+
+
+#include
+
+#if defined(MAC_OS_X_VERSION_10_12)
+#include
+#include
+#include
+
+CAMLprim value unsn_clone_path(value src, value dst)
+{
+ CAMLparam2(src, dst);
+ char *srcn, *dstn;
+ int status;
+
+ srcn = strdup(String_val(src));
+ dstn = strdup(String_val(dst));
+ caml_release_runtime_system();
+ status = clonefile(srcn, dstn, CLONE_NOFOLLOW | CLONE_NOOWNERCOPY);
+ free(srcn);
+ free(dstn);
+ caml_acquire_runtime_system();
+
+ /* Don't raise an exception, just return false in case of errors */
+ CAMLreturn(Val_bool(status == 0));
+}
+#else /* MAC_OS_X_VERSION_10_12 */
+CAMLprim value unsn_clone_path(value src, value dst)
+{
+ CAMLparam2(src, dst);
+ CAMLreturn(Val_false);
+}
+#endif /* MAC_OS_X_VERSION_10_12 */
+
+
+#else /* defined(__APPLE__) */
+
+
+CAMLprim value unsn_clone_path(value src, value dst)
+{
+ CAMLparam2(src, dst);
+ CAMLreturn(Val_false);
+}
+
+
+#endif /* defined(__APPLE__) */
+
+
+/* ----------------------------------------------- */
+/* Clone a file given input and output fd */
+/* It must fully complete or fully fail.
+
+ The function must not raise any exceptions.
+
+ Return true for success and false for failure
+ or if the operation is not supported. */
+
+#if defined(__linux__) || defined(__linux)
+
+
+#include
+
+#if !defined(FICLONE) && defined(_IOW)
+#define FICLONE _IOW(0x94, 9, int)
+#endif
+
+CAMLprim value unsn_clone_file(value in_fd, value out_fd)
+{
+ CAMLparam2(in_fd, out_fd);
+
+#ifdef FICLONE
+ caml_release_runtime_system();
+ int status = ioctl(Int_val(out_fd), FICLONE, Int_val(in_fd));
+ caml_acquire_runtime_system();
+
+ /* Don't raise an exception, just return false in case of errors */
+ CAMLreturn(Val_bool(status == 0));
+#else /* defined(FICLONE) */
+ CAMLreturn(Val_false);
+#endif
+}
+
+
+#else /* defined(__linux__) */
+
+
+CAMLprim value unsn_clone_file(value in_fd, value out_fd)
+{
+ CAMLparam2(in_fd, out_fd);
+ CAMLreturn(Val_false);
+}
+
+
+#endif /* defined(__linux__) */
+
+
+/* --------------------------------------------------------- */
+/* Copy, or possibly clone, a file given input and output fd */
+/* If operation is not supported by the OS or the filesystem
+ then file offsets must not have been changed at failure.
+ Output file offset must be changed on success.
+
+ The function must return the number of bytes copied.
+
+ On any error, raise a Unix exception based on errno.
+ Raise ENOSYS if the operation is not supported. */
+
+#if defined(__linux__) || defined(__linux)
+
+
+#include
+#include
+#include
+
+CAMLprim value unsn_copy_file(value in_fd, value out_fd, value offs, value len)
+{
+ CAMLparam4(in_fd, out_fd, offs, len);
+ off_t off_i = Int64_val(offs);
+ ssize_t ret;
+
+ caml_release_runtime_system();
+#ifdef __NR_copy_file_range
+ /* First, try copy_file_range() */
+ /* Using off_i prevents changing in_fd file offset */
+ ret = syscall(__NR_copy_file_range, Int_val(in_fd), &off_i, Int_val(out_fd), NULL, Long_val(len), 0);
+ if (ret == -1 && (errno == ENOSYS || errno == EBADF || errno == EXDEV))
+#endif /* defined(__NR_copy_file_range) */
+ {
+ /* Second, try sendfile(); this one changes out_fd file offset */
+ ret = sendfile(Int_val(out_fd), Int_val(in_fd), &off_i, Long_val(len));
+ }
+ caml_acquire_runtime_system();
+ if (ret == -1) caml_uerror("copy_file", Nothing);
+
+ CAMLreturn(Val_long(ret));
+}
+
+
+#elif defined(__sun) || defined(sun)
+
+
+#include
+
+CAMLprim value unsn_copy_file(value in_fd, value out_fd, value offs, value len)
+{
+ CAMLparam4(in_fd, out_fd, offs, len);
+ off_t off = orig_off = Int64_val(offs);
+ ssize_t ret;
+
+ caml_release_runtime_system();
+ /* This one changes out_fd file offset */
+ ret = sendfile(Int_val(out_fd), Int_val(in_fd), &off, Long_val(len));
+ caml_acquire_runtime_system();
+ if (ret == -1) {
+ if (off > off_orig) {
+ ret = off - off_orig;
+ } else {
+ caml_uerror("copy_file", Nothing);
+ }
+ }
+
+ CAMLreturn(Val_long(ret));
+}
+
+
+#else /* defined(__linux__) || defined(__sun) */
+
+
+CAMLprim value unsn_copy_file(value in_fd, value out_fd, value offs, value len)
+{
+ CAMLparam4(in_fd, out_fd, offs, len);
+ caml_unix_error(ENOSYS, "copy_file", Nothing);
+ CAMLreturn(Val_long(0));
+}
+
+
+#endif /* defined(__linux__) || defined (__sun) */
diff --git a/src/dune b/src/dune
index 1968ba2d4..edac6f023 100644
--- a/src/dune
+++ b/src/dune
@@ -9,7 +9,7 @@
-no-strict-sequence)
(foreign_stubs
(language c)
- (names bytearray_stubs osxsupport pty hash_compat props_xattr props_acl))
+ (names bytearray_stubs osxsupport pty hash_compat props_xattr props_acl copy_stubs))
(c_library_flags -lutil)
(libraries str unix lwt_lib bigarray))