diff --git a/src/helpers.rs b/src/helpers.rs index ea11093a17..dbfb46b4de 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -720,6 +720,53 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { interp_ok(value_place) } + /// Dereferences a pointer to access an element within a source array, with specialized bounds checking + /// for vectored I/O operations like readv(). + /// + /// This function provides array-aware bounds checking that is specifically designed for situations + /// where we need to access multiple independent memory regions, such as when processing an array + /// of iovec structures. Unlike simple pointer arithmetic bounds checking, this implementation + /// understands and validates array-based access patterns. + fn deref_pointer_and_offset_vectored( + &self, + op: &impl Projectable<'tcx, Provenance>, + offset_bytes: u64, + base_layout: TyAndLayout<'tcx>, + count: usize, + value_layout: TyAndLayout<'tcx>, + ) -> InterpResult<'tcx, MPlaceTy<'tcx>> { + // 1. Validate the iovec array bounds. + let array_size = base_layout + .size + .bytes() + .checked_mul(count as u64) + .ok_or_else(|| err_ub_format!("iovec array size overflow"))?; + + // 2. Check if our offset is within the array. + if offset_bytes >= array_size { + throw_ub_format!( + "{}", + format!( + "iovec array access out of bounds: offset {} in array of size {}", + offset_bytes, array_size + ) + ); + } + + // 3. Ensure the iovec structure we're accessing is fully contained. + if offset_bytes.checked_add(base_layout.size.bytes()).is_none_or(|end| end > array_size) { + throw_ub_format!("iovec structure would extend past array bounds"); + } + + // 4. Proceed with the dereferencing. + let this = self.eval_context_ref(); + let op_place = this.deref_pointer_as(op, base_layout)?; + let offset = Size::from_bytes(offset_bytes); + + let value_place = op_place.offset(offset, value_layout, this)?; + interp_ok(value_place) + } + fn deref_pointer_and_read( &self, op: &impl Projectable<'tcx, Provenance>, diff --git a/src/shims/unix/fd.rs b/src/shims/unix/fd.rs index e5dead1a26..3725b73f05 100644 --- a/src/shims/unix/fd.rs +++ b/src/shims/unix/fd.rs @@ -257,6 +257,135 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { interp_ok(()) } + fn readv( + &mut self, + fd_num: i32, + iov_ptr: &OpTy<'tcx>, + iovcnt: i32, + offset: Option, + dest: &MPlaceTy<'tcx>, + ) -> InterpResult<'tcx> { + let this = self.eval_context_mut(); + + // Early returns for empty or invalid cases + if iovcnt == 0 { + trace!("readv: iovcnt is 0, returning 0 bytes read."); + return this.write_scalar(Scalar::from_i32(0), dest); + } + + let Some(fd) = this.machine.fds.get(fd_num) else { + trace!("readv: FD not found"); + return this.set_last_error_and_return(LibcError("EBADF"), dest); + }; + trace!("readv: FD mapped to {fd:?}"); + + // Convert count only once at the start + let iovcnt = usize::try_from(iovcnt).expect("iovcnt conversion to usize failed"); + + // Get iovec layout information + let iovec_layout = this.libc_ty_layout("iovec"); + + // Create temporary storage for read results + // We need temporary storage for each individual read operation's result + // Using an intermediate buffer helps handle error conditions cleanly + // We use i128 to safely handle both success (positive) and error (-1) cases + let read_dest = this.allocate(this.machine.layouts.i128, MiriMemoryKind::Machine.into())?; + + // Use usize to match ssize_t semantics while staying platform-independent + let mut total_bytes_read: usize = 0; + + let mut current_offset = offset; + + // Process each iovec structure + for i in 0..iovcnt { + // Access the current iovec structure + let offset_bytes = iovec_layout + .size + .bytes() + .checked_mul(i as u64) + .expect("iovec array index calculation overflow"); + + let current_iov = this.deref_pointer_and_offset_vectored( + iov_ptr, + offset_bytes, + iovec_layout, + iovcnt, + iovec_layout, + )?; + + // Extract buffer information + let iov_base = this.project_field_named(¤t_iov, "iov_base")?; + let iov_base_ptr = this.read_pointer(&iov_base)?; + + let iov_len = this.project_field_named(¤t_iov, "iov_len")?; + let iov_len = usize::try_from(this.read_target_usize(&iov_len)?) + .expect("iovec length exceeds platform size"); + + if iov_len == 0 { + continue; + } + + // Validate buffer access + let buffer_size = Size::from_bytes(iov_len); + this.check_ptr_access(iov_base_ptr, buffer_size, CheckInAllocMsg::MemoryAccessTest)?; + + // Perform the read operation + let read_result = if let Some(off) = current_offset { + // Handle pread case + let Ok(off) = u64::try_from(off) else { + return this.set_last_error_and_return(LibcError("EINVAL"), dest); + }; + + fd.as_unix().pread( + this.machine.communicate(), + off, + iov_base_ptr, + iov_len, + &read_dest, + this, + )?; + this.read_scalar(&read_dest)?.to_i128()? + } else { + // Handle regular read case + fd.read(&fd, this.machine.communicate(), iov_base_ptr, iov_len, &read_dest, this)?; + this.read_scalar(&read_dest)?.to_i128()? + }; + + // Handle read result + if read_result < 0 { + this.write_int(-1, dest)?; + return interp_ok(()); + } + + // Update offset for next read if preadv + if let Some(off) = current_offset.as_mut() { + // Safe addition with overflow check for offset + *off = off.checked_add(read_result).expect("file offset calculation overflow"); + } + + let read_result = usize::try_from(read_result).unwrap(); + + // Safe addition with overflow check + total_bytes_read = total_bytes_read + .checked_add(read_result) + .expect("total bytes read calculation overflow"); + + // Break if we hit EOF (partial read) + // Convert read_result to unsigned safely for comparison + if read_result < iov_len { + break; + } + } + + trace!("readv: Total bytes read: {}", total_bytes_read); + this.write_int( + u64::try_from(total_bytes_read).expect("total bytes read exceeds u64 capacity"), + dest, + )?; + + interp_ok(()) + } + fn write( &mut self, fd_num: i32, diff --git a/src/shims/unix/foreign_items.rs b/src/shims/unix/foreign_items.rs index 88ec32808b..5864fbb841 100644 --- a/src/shims/unix/foreign_items.rs +++ b/src/shims/unix/foreign_items.rs @@ -157,6 +157,12 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let count = this.read_target_usize(count)?; this.read(fd, buf, count, None, dest)?; } + "readv" => { + let [fd, iov, iovcnt] = this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?; + let fd = this.read_scalar(fd)?.to_i32()?; + let iovcnt = this.read_scalar(iovcnt)?.to_i32()?; + this.readv(fd, iov, iovcnt, None, dest)?; + } "write" => { let [fd, buf, n] = this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?; let fd = this.read_scalar(fd)?.to_i32()?; diff --git a/tests/pass-dep/libc/libc-fs.rs b/tests/pass-dep/libc/libc-fs.rs index f85abe2cc4..31b40ca663 100644 --- a/tests/pass-dep/libc/libc-fs.rs +++ b/tests/pass-dep/libc/libc-fs.rs @@ -38,6 +38,10 @@ fn main() { test_isatty(); test_read_and_uninit(); test_nofollow_not_symlink(); + test_readv_basic(); + test_readv_large_buffers(); + test_readv_partial_and_eof(); + test_readv_error_conditions(); } fn test_file_open_unix_allow_two_args() { @@ -431,3 +435,426 @@ fn test_nofollow_not_symlink() { let ret = unsafe { libc::open(cpath.as_ptr(), libc::O_NOFOLLOW | libc::O_CLOEXEC) }; assert!(ret >= 0); } + +/// Tests basic functionality of the readv() system call by reading a small file +/// with multiple buffers. +/// +/// Verifies that: +/// - File contents are read correctly into provided buffers +/// - The total number of bytes read matches file size +/// - Buffer boundaries are respected +/// - Return values match expected behavior +fn test_readv_basic() { + let bytes = b"abcdefgh"; + let path = utils::prepare_with_content("miri_test_libc_readv.txt", bytes); + + // Convert path to a null-terminated CString. + let name = CString::new(path.into_os_string().into_string().unwrap()).unwrap(); + let name_ptr = name.as_ptr(); + + let mut first_buf = [0u8; 4]; + let mut second_buf = [0u8; 8]; + + unsafe { + // Define iovec structures. + let iov: [libc::iovec; 2] = [ + libc::iovec { + iov_len: first_buf.len() as usize, + iov_base: first_buf.as_mut_ptr() as *mut libc::c_void, + }, + libc::iovec { + iov_len: second_buf.len() as usize, + iov_base: second_buf.as_mut_ptr() as *mut libc::c_void, + }, + ]; + + // Open file. + let fd = libc::open(name_ptr, libc::O_RDONLY); + if fd < 0 { + eprintln!("Failed to open file: {}", Error::last_os_error().to_string()); + return; + } + + // Call readv with proper type conversions. + let iovcnt = libc::c_int::try_from(iov.len()).expect("iovec count too large for platform"); + + // Call readv with proper type handling for the count. + let res = libc::readv(fd, iov.as_ptr() as *const libc::iovec, iovcnt); + + if res < 0 { + eprintln!("Failed to readv: {}", Error::last_os_error()); + libc::close(fd); + return; + } + + // Close the file descriptor. + libc::close(fd); + } + + // Validate buffers. + if first_buf != *b"abcd" { + eprintln!("First buffer mismatch: {:?}", first_buf); + } + + if second_buf != *b"efgh\0\0\0\0" { + eprintln!("Second buffer mismatch: {:?}", second_buf); + } +} + +/// Tests readv() system call with large buffer sizes and pattern verification. +/// Uses multiple buffers (16KB, 16KB, 32KB) to read a 64KB file containing +/// a repeating 'ABCD' pattern with markers at buffer boundaries. +/// +/// Verifies that: +/// - Large file contents are read correctly +/// - Markers at buffer boundaries are preserved +/// - Pattern integrity is maintained between markers +/// - Memory safety with large allocations +/// - Buffer boundary handling for larger sizes +fn test_readv_large_buffers() { + const BUFFER_SIZE_1: usize = 16384; // 16KB + const BUFFER_SIZE_2: usize = 16384; // 16KB + const BUFFER_SIZE_3: usize = 32768; // 32KB + + // Define our buffer sizes + let buffer_sizes = &[ + BUFFER_SIZE_1, // 16KB + BUFFER_SIZE_2, // 16KB + BUFFER_SIZE_3, // 32KB + ]; + + // Create large test file with patterns and markers. + // Generate pattern with awareness of buffer boundaries. + let large_content = utils::generate_test_pattern(buffer_sizes); + + let path = utils::prepare_with_content("large_readv_test.txt", &large_content); + + // Create buffers based on our defined sizes. + let mut buffers: Vec> = buffer_sizes.iter().map(|&size| vec![0u8; size]).collect(); + + // Convert path to CString for libc interface. + let path_cstr = CString::new(path.into_os_string().into_string().unwrap()).unwrap(); + + let bytes_read: usize = unsafe { + let fd = libc::open(path_cstr.as_ptr(), libc::O_RDONLY); + assert!(fd > 0, "Failed to open test file"); + + // Create iovec array using our buffers. + let iov = buffers + .iter_mut() + .map(|buf| { + libc::iovec { iov_base: buf.as_mut_ptr() as *mut libc::c_void, iov_len: buf.len() } + }) + .collect::>(); + + // Perform readv operation. + let read_result = libc::readv(fd, iov.as_ptr(), iov.len() as i32); + + libc::close(fd); + read_result.try_into().unwrap() + }; + + // Verify total bytes read. + let expected_total: usize = buffer_sizes.iter().sum(); + assert_eq!( + bytes_read, expected_total, + "Unexpected bytes read. Expected {}, got {}", + expected_total, bytes_read + ); + + // Verify markers in each buffer with correct positioning. + let mut current_pos = 0; + for (i, buf) in buffers.iter().enumerate() { + let marker = format!("##MARKER{}##", i + 1); + let marker_len = marker.len(); + + // Calculate correct position for this buffer + let buffer_size = buf.len(); + let marker_pos = buffer_size - marker_len; + + // Read the exact number of bytes needed for the marker. + let content = std::str::from_utf8(&buf[marker_pos..marker_pos + marker_len]) + .unwrap_or("Invalid UTF-8"); + + assert_eq!( + content, + marker, + "Marker {} mismatch at position {}. Expected '{}', found '{}'", + i + 1, + current_pos + marker_pos, + marker, + content + ); + + // Update position for next buffer + current_pos += buffer_size; + } + + // Helper function to verify the repeating ABCD pattern. + let verify_pattern = |buf: &[u8], start: usize, end: usize, buffer_num: usize| { + // Safety check for range validity + if start >= end || end > buf.len() { + println!( + "Invalid range for buffer {}: start={}, end={}, len={}", + buffer_num, + start, + end, + buf.len() + ); + return false; + } + + let chunk = &buf[start..end]; + + // Calculate the pattern offset for alignment. + let pattern_offset = start % 4; + let expected_pattern = [b'A', b'B', b'C', b'D']; + + // Verify each byte against the expected pattern at the correct offset. + chunk.iter().enumerate().all(|(i, &byte)| { + let expected = expected_pattern[(i + pattern_offset) % 4]; + if byte != expected { + println!( + "Mismatch at position {}: expected {}, found {}", + start + i, + expected as char, + byte as char + ); + false + } else { + true + } + }) + }; + + // Adjust verification ranges and pattern alignment. + for (i, buf) in buffers.iter().enumerate() { + let buffer_num = i + 1; + let buffer_size = buf.len(); + let marker_len = 11; + + // Calculate correct start position based on marker alignment. + let start = if buffer_num == 1 { 0 } else { marker_len }; + let end = buffer_size - marker_len; + + assert!( + verify_pattern(buf, start, end, buffer_num), + "Pattern corruption detected in buffer {}. Expected aligned 'ABCD' pattern \ + in range {}..{}", + buffer_num, + start, + end + ); + } +} + +/// Tests readv() system call behavior with EOF conditions and partial reads. +/// Uses a test file smaller than total buffer size to verify correct handling +/// of file boundaries and partial data transfers. +/// +/// Verifies that: +/// - Partial reads near EOF work correctly +/// - Reading exactly at EOF returns 0 +/// - Buffer contents match expected data +/// - Total bytes read matches available data +/// - Remaining buffer space is unmodified +fn test_readv_partial_and_eof() { + // Let's create a file smaller than our total buffer sizes. + // We'll use a structured pattern to make validation easier. + let test_data = b"HEADER_DATA_SECTION_ONE_DATA_SECTION_TWO_END"; // 41 bytes + let path = utils::prepare_with_content("partial_read_test.txt", test_data); + + // Test Case 1: Normal buffers larger than file size. + { + let mut first_buf = vec![0u8; 20]; // Should be filled completely + let mut second_buf = vec![0u8; 20]; // Should be filled completely + let mut third_buf = vec![0u8; 20]; // Should be partially filled + + let path_cstr = CString::new(path.to_str().unwrap()).unwrap(); + + let bytes_read: usize = unsafe { + let fd = libc::open(path_cstr.as_ptr(), libc::O_RDONLY); + assert!(fd > 0, "Failed to open test file"); + + let iov = [ + libc::iovec { + iov_base: first_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: first_buf.len(), + }, + libc::iovec { + iov_base: second_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: second_buf.len(), + }, + libc::iovec { + iov_base: third_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: third_buf.len(), + }, + ]; + + let result = libc::readv(fd, iov.as_ptr(), iov.len() as i32); + libc::close(fd); + result.try_into().unwrap() + }; + + // Verify total bytes read matches file size. + assert_eq!( + bytes_read, + test_data.len(), + "Expected {} bytes read, got {}", + test_data.len(), + bytes_read + ); + + // Verify buffer contents + assert_eq!(&first_buf[..20], &test_data[..20], "First buffer content mismatch"); + assert_eq!(&second_buf[..20], &test_data[20..40], "Second buffer content mismatch"); + assert_eq!(&third_buf[..1], &test_data[40..41], "Third buffer partial content mismatch"); + } + + // Test Case 2: Reading from an offset near EOF. + { + let mut first_buf = vec![0u8; 10]; + let mut second_buf = vec![0u8; 10]; + + let path_cstr = CString::new(path.to_str().unwrap()).unwrap(); + + let bytes_read: usize = unsafe { + let fd = libc::open(path_cstr.as_ptr(), libc::O_RDONLY); + assert!(fd > 0, "Failed to open test file"); + + // Seek to near end of file + let offset = test_data.len() - 15; + assert_eq!(libc::lseek(fd, offset as i64, libc::SEEK_SET), offset as i64); + + let iov = [ + libc::iovec { + iov_base: first_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: first_buf.len(), + }, + libc::iovec { + iov_base: second_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: second_buf.len(), + }, + ]; + + let result = libc::readv(fd, iov.as_ptr(), iov.len() as i32); + libc::close(fd); + result.try_into().unwrap() + }; + + // Should read remaining 15 bytes + assert_eq!(bytes_read, 15, "Expected 15 bytes read from offset, got {}", bytes_read); + } + + // Test Case 3: Reading at EOF. + { + let mut buf = vec![0u8; 10]; + + let path_cstr = CString::new(path.to_str().unwrap()).unwrap(); + + let bytes_read: usize = unsafe { + let fd = libc::open(path_cstr.as_ptr(), libc::O_RDONLY); + assert!(fd > 0, "Failed to open test file"); + + // Seek to EOF + assert_eq!( + libc::lseek(fd, test_data.len() as i64, libc::SEEK_SET), + test_data.len() as i64 + ); + + let iov = [libc::iovec { + iov_base: buf.as_mut_ptr() as *mut libc::c_void, + iov_len: buf.len(), + }]; + + let result = libc::readv(fd, iov.as_ptr(), iov.len() as i32); + libc::close(fd); + result.try_into().unwrap() + }; + + // Should read 0 bytes at EOF + assert_eq!(bytes_read, 0, "Expected 0 bytes read at EOF, got {}", bytes_read); + } + + // Test Case 4: Small buffers with exact boundaries. + { + let mut first_buf = vec![0u8; 7]; // "HEADER_" + let mut second_buf = vec![0u8; 5]; // "DATA_" + let mut third_buf = vec![0u8; 7]; // "SECTION" + + let path_cstr = CString::new(path.to_str().unwrap()).unwrap(); + + let bytes_read: usize = unsafe { + let fd = libc::open(path_cstr.as_ptr(), libc::O_RDONLY); + assert!(fd > 0, "Failed to open test file"); + + let iov = [ + libc::iovec { + iov_base: first_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: first_buf.len(), + }, + libc::iovec { + iov_base: second_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: second_buf.len(), + }, + libc::iovec { + iov_base: third_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: third_buf.len(), + }, + ]; + + let result = libc::readv(fd, iov.as_ptr(), iov.len() as i32); + libc::close(fd); + result.try_into().unwrap() + }; + + // Verify exact buffer fills. + assert_eq!( + bytes_read, 19, + "Expected 19 bytes read for exact boundaries, got {}", + bytes_read + ); + assert_eq!(&first_buf, b"HEADER_", "First buffer exact content mismatch"); + assert_eq!(&second_buf, b"DATA_", "Second buffer exact content mismatch"); + assert_eq!(&third_buf[..7], b"SECTION", "Third buffer exact content mismatch"); + } +} + +/// Tests error handling conditions of the readv() system call. +/// Verifies that the implementation properly handles various error scenarios +/// including invalid file descriptors, +/// +/// Test coverage includes: +/// - Invalid file descriptor scenarios +fn test_readv_error_conditions() { + // Test Case 1: Invalid File Descriptor Scenarios. + { + let mut buffer = vec![0u8; 10]; + + // Create a single valid iovec structure for testing. + let iov = [libc::iovec { + iov_base: buffer.as_mut_ptr() as *mut libc::c_void, + iov_len: buffer.len(), + }]; + + unsafe { + // Test with negative file descriptor. + let result = libc::readv(-1, iov.as_ptr(), 1); + assert_eq!(result, -1, "Expected error for negative file descriptor"); + assert_eq!( + *libc::__errno_location(), + libc::EBADF, + "Expected EBADF for negative file descriptor" + ); + + // Test with unopened but potentially valid fd number. + let result = libc::readv(999999, iov.as_ptr(), 1); + assert_eq!(result, -1, "Expected error for invalid file descriptor"); + assert_eq!( + *libc::__errno_location(), + libc::EBADF, + "Expected EBADF for invalid file descriptor" + ); + } + } +} diff --git a/tests/utils/fs.rs b/tests/utils/fs.rs index 7340908626..80627f431b 100644 --- a/tests/utils/fs.rs +++ b/tests/utils/fs.rs @@ -51,3 +51,50 @@ pub fn prepare_dir(dirname: &str) -> PathBuf { fs::remove_dir_all(&path).ok(); path } + +/// Generates a test pattern with markers placed at buffer boundaries +/// +/// Arguments: +/// * `buffer_sizes` - An array of buffer sizes that will be used in the readv operation +/// +/// Returns: +/// * A vector containing the test pattern with markers placed at buffer boundaries +/// +/// The function creates a pattern by: +/// 1. Filling the content with repeating "ABCD" sequences +/// 2. Placing markers at each buffer boundary +/// 3. Adding an end pattern to detect overruns +pub fn generate_test_pattern(buffer_sizes: &[usize]) -> Vec { + // Calculate total size needed for all buffers. + let total_size: usize = buffer_sizes.iter().sum(); + + // Create our base content vector. + let mut content = Vec::with_capacity(total_size); + + // Fill with repeating ABCD pattern. + let base_pattern = b"ABCD"; + while content.len() < total_size { + content.extend_from_slice(base_pattern); + } + content.truncate(total_size); + + // Calculate marker positions at buffer boundaries. + // We'll accumulate sizes to find boundary positions. + // Calculate correct marker positions based on cumulative buffer boundaries. + let mut cumulative_position = 0; + for (i, &buffer_size) in buffer_sizes.iter().enumerate() { + let marker = format!("##MARKER{}##", i + 1).into_bytes(); + let marker_len = marker.len(); + + // Position marker relative to the current buffer's end + let marker_position = cumulative_position + buffer_size - marker_len; + + if marker_position + marker_len <= total_size { + content[marker_position..marker_position + marker_len].copy_from_slice(&marker); + } + + cumulative_position += buffer_size; + } + + content +}