@@ -59,7 +59,7 @@ use crate::os::unix::io::{AsRawFd, FromRawFd, RawFd};
59
59
use crate :: os:: unix:: net:: UnixStream ;
60
60
use crate :: process:: { ChildStderr , ChildStdin , ChildStdout } ;
61
61
use crate :: ptr;
62
- use crate :: sync:: atomic:: { AtomicBool , Ordering } ;
62
+ use crate :: sync:: atomic:: { AtomicBool , AtomicU8 , Ordering } ;
63
63
use crate :: sys:: cvt;
64
64
65
65
#[ cfg( test) ]
@@ -491,18 +491,29 @@ impl CopyResult {
491
491
}
492
492
}
493
493
494
- /// linux-specific implementation that will attempt to use copy_file_range for copy offloading
495
- /// as the name says, it only works on regular files
494
+ /// Invalid file descriptor.
495
+ ///
496
+ /// Valid file descriptors are guaranteed to be positive numbers (see `open()` manpage)
497
+ /// while negative values are used to indicate errors.
498
+ /// Thus -1 will never be overlap with a valid open file.
499
+ const INVALID_FD : RawFd = -1 ;
500
+
501
+ /// Linux-specific implementation that will attempt to use copy_file_range for copy offloading.
502
+ /// As the name says, it only works on regular files.
496
503
///
497
504
/// Callers must handle fallback to a generic copy loop.
498
505
/// `Fallback` may indicate non-zero number of bytes already written
499
506
/// if one of the files' cursor +`max_len` would exceed u64::MAX (`EOVERFLOW`).
500
507
pub ( super ) fn copy_regular_files ( reader : RawFd , writer : RawFd , max_len : u64 ) -> CopyResult {
501
508
use crate :: cmp;
502
509
510
+ const NOT_PROBED : u8 = 0 ;
511
+ const UNAVAILABLE : u8 = 1 ;
512
+ const AVAILABLE : u8 = 2 ;
513
+
503
514
// Kernel prior to 4.5 don't have copy_file_range
504
515
// We store the availability in a global to avoid unnecessary syscalls
505
- static HAS_COPY_FILE_RANGE : AtomicBool = AtomicBool :: new ( true ) ;
516
+ static HAS_COPY_FILE_RANGE : AtomicU8 = AtomicU8 :: new ( NOT_PROBED ) ;
506
517
507
518
syscall ! {
508
519
fn copy_file_range(
@@ -515,39 +526,39 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) ->
515
526
) -> libc:: ssize_t
516
527
}
517
528
518
- let has_copy_file_range = HAS_COPY_FILE_RANGE . load ( Ordering :: Relaxed ) ;
519
- let mut written = 0u64 ;
520
- while written < max_len {
521
- let copy_result = if has_copy_file_range {
522
- let bytes_to_copy = cmp:: min ( max_len - written, usize:: MAX as u64 ) ;
523
- // cap to 1GB chunks in case u64::MAX is passed as max_len and the file has a non-zero seek position
524
- // this allows us to copy large chunks without hitting EOVERFLOW,
525
- // unless someone sets a file offset close to u64::MAX - 1GB, in which case a fallback would be required
526
- let bytes_to_copy = cmp:: min ( bytes_to_copy as usize , 0x4000_0000usize ) ;
527
- let copy_result = unsafe {
528
- // We actually don't have to adjust the offsets,
529
- // because copy_file_range adjusts the file offset automatically
530
- cvt ( copy_file_range (
531
- reader,
532
- ptr:: null_mut ( ) ,
533
- writer,
534
- ptr:: null_mut ( ) ,
535
- bytes_to_copy,
536
- 0 ,
537
- ) )
529
+ match HAS_COPY_FILE_RANGE . load ( Ordering :: Relaxed ) {
530
+ NOT_PROBED => {
531
+ // EPERM can indicate seccomp filters or an immutable file.
532
+ // To distinguish these cases we probe with invalid file descriptors which should result in EBADF if the syscall is supported
533
+ // and some other error (ENOSYS or EPERM) if it's not available
534
+ let result = unsafe {
535
+ cvt ( copy_file_range ( INVALID_FD , ptr:: null_mut ( ) , INVALID_FD , ptr:: null_mut ( ) , 1 , 0 ) )
538
536
} ;
539
- if let Err ( ref copy_err) = copy_result {
540
- match copy_err. raw_os_error ( ) {
541
- Some ( libc:: ENOSYS | libc:: EPERM | libc:: EOPNOTSUPP ) => {
542
- HAS_COPY_FILE_RANGE . store ( false , Ordering :: Relaxed ) ;
543
- }
544
- _ => { }
545
- }
537
+
538
+ if matches ! ( result. map_err( |e| e. raw_os_error( ) ) , Err ( Some ( libc:: EBADF ) ) ) {
539
+ HAS_COPY_FILE_RANGE . store ( AVAILABLE , Ordering :: Relaxed ) ;
540
+ } else {
541
+ HAS_COPY_FILE_RANGE . store ( UNAVAILABLE , Ordering :: Relaxed ) ;
542
+ return CopyResult :: Fallback ( 0 ) ;
546
543
}
547
- copy_result
548
- } else {
549
- Err ( Error :: from_raw_os_error ( libc:: ENOSYS ) )
544
+ }
545
+ UNAVAILABLE => return CopyResult :: Fallback ( 0 ) ,
546
+ _ => { }
547
+ } ;
548
+
549
+ let mut written = 0u64 ;
550
+ while written < max_len {
551
+ let bytes_to_copy = cmp:: min ( max_len - written, usize:: MAX as u64 ) ;
552
+ // cap to 1GB chunks in case u64::MAX is passed as max_len and the file has a non-zero seek position
553
+ // this allows us to copy large chunks without hitting EOVERFLOW,
554
+ // unless someone sets a file offset close to u64::MAX - 1GB, in which case a fallback would be required
555
+ let bytes_to_copy = cmp:: min ( bytes_to_copy as usize , 0x4000_0000usize ) ;
556
+ let copy_result = unsafe {
557
+ // We actually don't have to adjust the offsets,
558
+ // because copy_file_range adjusts the file offset automatically
559
+ cvt ( copy_file_range ( reader, ptr:: null_mut ( ) , writer, ptr:: null_mut ( ) , bytes_to_copy, 0 ) )
550
560
} ;
561
+
551
562
match copy_result {
552
563
Ok ( 0 ) if written == 0 => {
553
564
// fallback to work around several kernel bugs where copy_file_range will fail to
@@ -567,11 +578,14 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) ->
567
578
libc:: ENOSYS | libc:: EXDEV | libc:: EINVAL | libc:: EPERM | libc:: EOPNOTSUPP ,
568
579
) => {
569
580
// Try fallback io::copy if either:
570
- // - Kernel version is < 4.5 (ENOSYS)
581
+ // - Kernel version is < 4.5 (ENOSYS¹ )
571
582
// - Files are mounted on different fs (EXDEV)
572
583
// - copy_file_range is broken in various ways on RHEL/CentOS 7 (EOPNOTSUPP)
573
- // - copy_file_range is disallowed, for example by seccomp (EPERM)
584
+ // - copy_file_range file is immutable or syscall is blocked by seccomp¹ (EPERM)
574
585
// - copy_file_range cannot be used with pipes or device nodes (EINVAL)
586
+ //
587
+ // ¹ these cases should be detected by the initial probe but we handle them here
588
+ // anyway in case syscall interception changes during runtime
575
589
assert_eq ! ( written, 0 ) ;
576
590
CopyResult :: Fallback ( 0 )
577
591
}
0 commit comments