Skip to content

Commit

Permalink
Use accelerated file copy functions, if supported
Browse files Browse the repository at this point in the history
This patch makes use of various platform- and filesystem-specific
syscalls to accelerate local file copies.

The following syscalls are tried:
 * clonefile(2) on macOS (version >= 10.12)
 * ioctl FICLONE on Linux (kernel >= 4.5)
   (also BTRFS_IOC_CLONE since Linux 2.6.29(?) should work)
 * copy_file_range(2) on Linux (kernel >= 4.5)
 * sendfile(2) on Linux (kernel >= 2.2) and sendfile(3ext) on Solaris

Fallback to read-write loop is used if none of the above are available
or supported by the filesystem.
  • Loading branch information
tleedjarv committed Mar 23, 2023
1 parent 43b9875 commit 173a7ff
Show file tree
Hide file tree
Showing 4 changed files with 287 additions and 5 deletions.
5 changes: 4 additions & 1 deletion src/Makefile.OCaml
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ ifneq ($(strip $(LDLIBS)),)
CLIBS+=-cclib '$(LDLIBS)'
endif

CAMLCFLAGS+=-ccopt -D_FILE_OFFSET_BITS=64

INCLFLAGS=-I lwt -I ubase -I system
DEP_INCLFLAGS=-I lwt -I ubase -I system
CAMLFLAGS+=$(INCLFLAGS)
Expand Down Expand Up @@ -187,6 +189,7 @@ else
ifeq ($(OSARCH),solaris)
# ACL functions
CLIBS+=-cclib -lsec
CLIBS+=-cclib -lsendfile
endif
buildexecutable::
@echo Building for Unix
Expand Down Expand Up @@ -274,7 +277,7 @@ OCAMLOBJS+=main.cmo
OCAMLLIBS+=unix.cma str.cma
INCLFLAGS+=-I +unix -I +str

COBJS+=osxsupport$(OBJ_EXT) pty$(OBJ_EXT) bytearray_stubs$(OBJ_EXT) hash_compat$(OBJ_EXT) props_xattr$(OBJ_EXT) props_acl$(OBJ_EXT)
COBJS+=osxsupport$(OBJ_EXT) pty$(OBJ_EXT) bytearray_stubs$(OBJ_EXT) hash_compat$(OBJ_EXT) props_xattr$(OBJ_EXT) props_acl$(OBJ_EXT) copy_stubs$(OBJ_EXT)

########################################################################
### User Interface setup
Expand Down
67 changes: 64 additions & 3 deletions src/copy.ml
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,60 @@ let readPropsExtDataG root path desc =

(****)

(* [unsn_clone_path] does not raise exceptions. *)
external clone_path : string -> string -> bool = "unsn_clone_path"
(* [unsn_clone_file] does not raise exceptions. *)
external clone_file : Unix.file_descr -> Unix.file_descr -> bool = "unsn_clone_file"
external copy_file : Unix.file_descr -> Unix.file_descr -> int64
-> int -> int = "unsn_copy_file"

let copy_size l =
let def = 10_485_760L in (* 10 MiB, to get periodic progress feedback *)
Int64.to_int @@
if Int64.compare l def > 0 then def else l

let rec copyFileAux src dst offs len notify =
let open Uutil in
if len > Filesize.zero then begin
let n = copy_file src dst (Filesize.toInt64 offs)
(copy_size (Filesize.toInt64 len)) in
let n' = Filesize.ofInt n in
let () = notify n' in
if n > 0 then
copyFileAux src dst (Filesize.add offs n') (Filesize.sub len n') notify
end

let copyFileRange src dst offs len fallback notify =
try
copyFileAux src dst offs len notify
with
| Unix.Unix_error ((EINVAL | ENOSYS | EBADF | EXDEV
| ESPIPE | ENOTSOCK | EOPNOTSUPP), _, _)
| Unix.Unix_error (EUNKNOWNERR -1, _, _) ->
(* These errors are not expected in the middle of a copy; these
indicate that [copy_file] is not supported at all (by the OS or
by the filesystem, or for these specific files) and nothing
has been copied so far, which makes fallback straight-forward.
Fallback to read-write loop expects that seek positions in
input and output fds have not changed. *)
fallback ()

let copyFile inCh outCh kind len fallback notify =
let src = Unix.descr_of_in_channel inCh
and dst = Unix.descr_of_out_channel outCh in
if kind = `DATA && clone_file src dst then
notify len
else
match kind with
| `DATA -> copyFileRange src dst Uutil.Filesize.zero len fallback notify
| `DATA_APPEND offs -> copyFileRange src dst offs len fallback notify
| `RESS -> fallback ()

let copyByPath fspathFrom pathFrom fspathTo pathTo =
clone_path
(Fspath.toString (Fspath.concat fspathFrom pathFrom))
(Fspath.toString (Fspath.concat fspathTo pathTo))

(* The fds opened in this function normally shouldn't be tracked for extra
cleanup at connection close because this is sequential non-Lwt code. Yet,
there is a risk that code called by [Uutil.showProgress] may include Lwt
Expand All @@ -423,18 +477,25 @@ let readPropsExtDataG root path desc =
[closeFile*] functions). *)
let copyContents fspathFrom pathFrom fspathTo pathTo fileKind fileLength ido =
let use_id f = match ido with Some id -> f id | None -> () in
if fileKind = `DATA && copyByPath fspathFrom pathFrom fspathTo pathTo then
use_id (fun id -> Uutil.showProgress id fileLength "l")
else
(* Open fds only if copying by path did not work *)
let inFd = openFileIn fspathFrom pathFrom fileKind in
protect
(fun () ->
let outFd = openFileOut fspathTo pathTo fileKind fileLength in
protect
(fun () ->
Uutil.readWriteBounded inFd outFd fileLength
(fun l ->
let showProgress l =
use_id (fun id ->
(* (Util.msg "Copied file %s (%d bytes)\n" (Path.toString pathFrom) l); *)
if fileKind <> `RESS then Abort.checkAll ();
Uutil.showProgress id (Uutil.Filesize.ofInt l) "l"));
Uutil.showProgress id l "l")
in
let fallback () = Uutil.readWriteBounded inFd outFd fileLength
(fun l -> showProgress (Uutil.Filesize.ofInt l)) in
copyFile inFd outFd fileKind fileLength fallback showProgress;
closeFileIn inFd;
closeFileOut outFd;
(* ignore (Sys.command ("ls -l " ^ (Fspath.toString (Fspath.concat fspathTo pathTo)))) *)
Expand Down
218 changes: 218 additions & 0 deletions src/copy_stubs.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
/* Unison file synchronizer: src/copy_stubs.c */
/* Copyright 2021-2023, Tõivo Leedjärv
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include <caml/memory.h>
#include <caml/threads.h>
#include <caml/unixsupport.h>
#include <caml/version.h>

#if OCAML_VERSION_MAJOR < 5
#define caml_unix_error unix_error
#define caml_uerror uerror
#endif


#include <errno.h>


/* ----------------------------------------------- */
/* Clone a file given source and destination paths */
/* It must fully complete or fully fail.
The function must not raise any exceptions.
Return true for success and false for failure
or if the operation is not supported. */

#if defined(__APPLE__)


#include <AvailabilityMacros.h>

#if defined(MAC_OS_X_VERSION_10_12)
#include <string.h>
#include <sys/attr.h>
#include <sys/clonefile.h>

CAMLprim value unsn_clone_path(value src, value dst)
{
CAMLparam2(src, dst);
char *srcn, *dstn;
int status;

srcn = strdup(String_val(src));
dstn = strdup(String_val(dst));
caml_release_runtime_system();
status = clonefile(srcn, dstn, CLONE_NOFOLLOW | CLONE_NOOWNERCOPY);
free(srcn);
free(dstn);
caml_acquire_runtime_system();

/* Don't raise an exception, just return false in case of errors */
CAMLreturn(Val_bool(status == 0));
}
#else /* MAC_OS_X_VERSION_10_12 */
CAMLprim value unsn_clone_path(value src, value dst)
{
CAMLparam2(src, dst);
CAMLreturn(Val_false);
}
#endif /* MAC_OS_X_VERSION_10_12 */


#else /* defined(__APPLE__) */


CAMLprim value unsn_clone_path(value src, value dst)
{
CAMLparam2(src, dst);
CAMLreturn(Val_false);
}


#endif /* defined(__APPLE__) */


/* ----------------------------------------------- */
/* Clone a file given input and output fd */
/* It must fully complete or fully fail.
The function must not raise any exceptions.
Return true for success and false for failure
or if the operation is not supported. */

#if defined(__linux__) || defined(__linux)


#include <sys/ioctl.h>

#if !defined(FICLONE) && defined(_IOW)
#define FICLONE _IOW(0x94, 9, int)
#endif

CAMLprim value unsn_clone_file(value in_fd, value out_fd)
{
CAMLparam2(in_fd, out_fd);

#ifdef FICLONE
caml_release_runtime_system();
int status = ioctl(Int_val(out_fd), FICLONE, Int_val(in_fd));
caml_acquire_runtime_system();

/* Don't raise an exception, just return false in case of errors */
CAMLreturn(Val_bool(status == 0));
#else /* defined(FICLONE) */
CAMLreturn(Val_false);
#endif
}


#else /* defined(__linux__) */


CAMLprim value unsn_clone_file(value in_fd, value out_fd)
{
CAMLparam2(in_fd, out_fd);
CAMLreturn(Val_false);
}


#endif /* defined(__linux__) */


/* --------------------------------------------------------- */
/* Copy, or possibly clone, a file given input and output fd */
/* If operation is not supported by the OS or the filesystem
then file offsets must not have been changed at failure.
Output file offset must be changed on success.
The function must return the number of bytes copied.
On any error, raise a Unix exception based on errno.
Raise ENOSYS if the operation is not supported. */

#if defined(__linux__) || defined(__linux)


#include <unistd.h>
#include <sys/syscall.h>
#include <sys/sendfile.h>

CAMLprim value unsn_copy_file(value in_fd, value out_fd, value offs, value len)
{
CAMLparam4(in_fd, out_fd, offs, len);
off_t off_i = Int64_val(offs);
ssize_t ret;

caml_release_runtime_system();
#ifdef __NR_copy_file_range
/* First, try copy_file_range() */
/* Using off_i prevents changing in_fd file offset */
ret = syscall(__NR_copy_file_range, Int_val(in_fd), &off_i, Int_val(out_fd), NULL, Long_val(len), 0);
if (ret == -1 && (errno == ENOSYS || errno == EBADF || errno == EXDEV))
#endif /* defined(__NR_copy_file_range) */
{
/* Second, try sendfile(); this one changes out_fd file offset */
ret = sendfile(Int_val(out_fd), Int_val(in_fd), &off_i, Long_val(len));
}
caml_acquire_runtime_system();
if (ret == -1) caml_uerror("copy_file", Nothing);

CAMLreturn(Val_long(ret));
}


#elif defined(__sun) || defined(sun)


#include <sys/sendfile.h>

CAMLprim value unsn_copy_file(value in_fd, value out_fd, value offs, value len)
{
CAMLparam4(in_fd, out_fd, offs, len);
off_t off = orig_off = Int64_val(offs);
ssize_t ret;

caml_release_runtime_system();
/* This one changes out_fd file offset */
ret = sendfile(Int_val(out_fd), Int_val(in_fd), &off, Long_val(len));
caml_acquire_runtime_system();
if (ret == -1) {
if (off > off_orig) {
ret = off - off_orig;
} else {
caml_uerror("copy_file", Nothing);
}
}

CAMLreturn(Val_long(ret));
}


#else /* defined(__linux__) || defined(__sun) */


CAMLprim value unsn_copy_file(value in_fd, value out_fd, value offs, value len)
{
CAMLparam4(in_fd, out_fd, offs, len);
caml_unix_error(ENOSYS, "copy_file", Nothing);
CAMLreturn(Val_long(0));
}


#endif /* defined(__linux__) || defined (__sun) */
2 changes: 1 addition & 1 deletion src/dune
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
-no-strict-sequence)
(foreign_stubs
(language c)
(names bytearray_stubs osxsupport pty hash_compat props_xattr props_acl))
(names bytearray_stubs osxsupport pty hash_compat props_xattr props_acl copy_stubs))
(c_library_flags -lutil)
(libraries str unix lwt_lib bigarray))

Expand Down

0 comments on commit 173a7ff

Please sign in to comment.