diff --git a/src/rust/iced-x86/Cargo.toml b/src/rust/iced-x86/Cargo.toml index b30ca6d27..9c1cd8989 100644 --- a/src/rust/iced-x86/Cargo.toml +++ b/src/rust/iced-x86/Cargo.toml @@ -12,7 +12,7 @@ keywords = ["disassembler", "assembler", "x86", "amd64", "x86_64"] categories = ["no-std", "development-tools::debugging", "encoding", "hardware-support", "wasm"] # Don't include the tests when uploading to crates.io exclude = ["/src/**/test/", "/src/**/tests/", "/src/**/test_utils/"] -rust-version = "1.63.0" +rust-version = "1.79.0" # These features are documented in README.md / lib.rs [features] diff --git a/src/rust/iced-x86/src/decoder.rs b/src/rust/iced-x86/src/decoder.rs index 6d9f2a107..1389b3caf 100644 --- a/src/rust/iced-x86/src/decoder.rs +++ b/src/rust/iced-x86/src/decoder.rs @@ -101,6 +101,7 @@ use crate::instruction_internal; use crate::tuple_type_tbl::get_disp8n; use crate::*; use core::iter::FusedIterator; +use core::marker::PhantomData; use core::{cmp, fmt, mem, ptr}; #[rustfmt::skip] @@ -504,19 +505,19 @@ where // Next bytes to read if there's enough bytes left to read. // This can be 1 byte past the last byte of `data`. - // Invariant: data.as_ptr() <= data_ptr <= max_data_ptr <= data.as_ptr() + data.len() == data_ptr_end + // Invariant: data as *const u8 <= data_ptr <= max_data_ptr <= data as *const u8 + data.len() == data_ptr_end // Invariant: {data_ptr,max_data_ptr,data_ptr_end}.add(max(MAX_READ_SIZE, MAX_INSTRUCTION_LENGTH)) doesn't overflow data_ptr: usize, - // This is `data.as_ptr() + data.len()` (1 byte past the last valid byte). + // This is `data as *const u8 + data.len()` (1 byte past the last valid byte). // This is guaranteed to be >= data_ptr (see the ctor), in other words, it can't overflow to 0 - // Invariant: data.as_ptr() <= data_ptr <= max_data_ptr <= data.as_ptr() + data.len() == data_ptr_end + // Invariant: data as *const u8 <= data_ptr <= max_data_ptr <= data as *const u8 + data.len() == data_ptr_end // Invariant: {data_ptr,max_data_ptr,data_ptr_end}.add(max(MAX_READ_SIZE, MAX_INSTRUCTION_LENGTH)) doesn't overflow data_ptr_end: usize, // Set to cmp::min(self.data_ptr + IcedConstants::MAX_INSTRUCTION_LENGTH, self.data_ptr_end) // and is guaranteed to not overflow // Initialized in decode() to at most 15 bytes after data_ptr so read_uXX() fails quickly after at most 15 read bytes // (1MB prefixes won't cause it to read 1MB prefixes, it will stop after at most 15). - // Invariant: data.as_ptr() <= data_ptr <= max_data_ptr <= data.as_ptr() + data.len() == data_ptr_end + // Invariant: data as *const u8 <= data_ptr <= max_data_ptr <= data as *const u8 + data.len() == data_ptr_end // Invariant: {data_ptr,max_data_ptr,data_ptr_end}.add(max(MAX_READ_SIZE, MAX_INSTRUCTION_LENGTH)) doesn't overflow max_data_ptr: usize, // Initialized to start of data (data_ptr) when decode() is called. Used to calculate current IP/offset (when decoding) if needed. @@ -578,10 +579,20 @@ where // Offset of displacement in the instruction. Only used by get_constant_offsets() to return the offset of the displ displ_index: u8, - // Input data provided by the user. When there's no more bytes left to read we'll return a NoMoreBytes error - data: &'a [u8], + // Input data provided by the user as a raw pointer to avoid potential UB from aliasing. + // When there's no more bytes left to read we'll return a NoMoreBytes error + data: *const [u8], + phantom: PhantomData<&'a [u8]> } +// Safety: decoder is safe to send between threads +unsafe impl Send for Decoder<'_> {} + +// Safety: data read by decoder is borrowed from an immutable u8 slice, unless +// using the unsafe `try_with_slice_ptr` constructor. In this case, the caller +// is responsible for making sure the slice pointer is never written to +unsafe impl Sync for Decoder<'_> {} + macro_rules! write_base_reg { ($instruction:ident, $expr:expr) => { debug_assert!($expr < IcedConstants::REGISTER_ENUM_COUNT as u32); @@ -866,10 +877,92 @@ impl<'a> Decoder<'a> { /// assert_eq!(instr.code(), Code::Add_rm32_r32); /// assert!(instr.has_lock_prefix()); /// ``` + #[inline] + pub fn try_with_ip(bitness: u32, data: &'a [u8], ip: u64, options: u32) -> Result, IcedError> { + unsafe { Decoder::try_with_slice_ptr(bitness, data, ip, options) } + } + + /// Creates a decoder from a raw slice pointer. + /// + /// # Errors + /// + /// Fails if `bitness` is not one of 16, 32, 64. + /// + /// # Arguments + /// + /// * `bitness`: 16, 32 or 64 + /// * `data`: Data to decode + /// * `ip`: `RIP` value + /// * `options`: Decoder options, `0` or eg. `DecoderOptions::NO_INVALID_CHECK | DecoderOptions::AMD` + /// + /// # Safety + /// For other safe methods to be safe, `data` must be a valid pointer to a slice of bytes with lifetime + /// at least that of the decoder which is not mutably aliased. + /// + /// It is not *immediately* UB for `data` to fail to uphold these invariants. In that case you must + /// ensure that whenever [`decode`] or [`decode_out`] is called, the subslice at the current decoder + /// position with length equal to the to-be-decoded instruction does uphold them. + /// + /// # Examples + /// + /// ``` + /// use iced_x86::*; + /// + /// // xchg ah,[rdx+rsi+16h] + /// // xacquire lock add dword ptr [rax],5Ah + /// // vmovdqu64 zmm18{k3}{z},zmm11 + /// let bytes = b"\x86\x64\x32\x16\xF0\xF2\x83\x00\x5A\x62\xC1\xFE\xCB\x6F\xD3" as *const [u8]; + /// let mut decoder = unsafe { + /// Decoder::try_with_slice_ptr(64, bytes, 0x1234_5678, DecoderOptions::NONE).unwrap() + /// }; + /// + /// let instr1 = decoder.decode(); + /// assert_eq!(instr1.code(), Code::Xchg_rm8_r8); + /// assert_eq!(instr1.mnemonic(), Mnemonic::Xchg); + /// assert_eq!(instr1.len(), 4); + /// + /// let instr2 = decoder.decode(); + /// assert_eq!(instr2.code(), Code::Add_rm32_imm8); + /// assert_eq!(instr2.mnemonic(), Mnemonic::Add); + /// assert_eq!(instr2.len(), 5); + /// + /// let instr3 = decoder.decode(); + /// assert_eq!(instr3.code(), Code::EVEX_Vmovdqu64_zmm_k1z_zmmm512); + /// assert_eq!(instr3.mnemonic(), Mnemonic::Vmovdqu64); + /// assert_eq!(instr3.len(), 6); + /// ``` + /// + /// It's sometimes useful to decode some invalid instructions, eg. `lock add esi,ecx`. + /// Pass in [`DecoderOptions::NO_INVALID_CHECK`] to the constructor and the decoder + /// will decode some invalid encodings. + /// + /// [`DecoderOptions::NO_INVALID_CHECK`]: struct.DecoderOptions.html#associatedconstant.NO_INVALID_CHECK + /// [`decode`]: #method.decode + /// [`decode_out`]: #method.decode_out + /// + /// ``` + /// use iced_x86::*; + /// + /// // lock add esi,ecx ; lock not allowed + /// let bytes = b"\xF0\x01\xCE" as *const [u8]; + /// let mut decoder = unsafe { + /// Decoder::try_with_slice_ptr(64, bytes, 0x1234_5678, DecoderOptions::NONE).unwrap() + /// }; + /// let instr = decoder.decode(); + /// assert_eq!(instr.code(), Code::INVALID); + /// + /// // We want to decode some instructions with invalid encodings + /// let mut decoder = unsafe { + /// Decoder::try_with_slice_ptr(64, bytes, 0x1234_5678, DecoderOptions::NO_INVALID_CHECK).unwrap() + /// }; + /// let instr = decoder.decode(); + /// assert_eq!(instr.code(), Code::Add_rm32_r32); + /// assert!(instr.has_lock_prefix()); + /// ``` #[allow(clippy::missing_inline_in_public_items)] #[allow(clippy::let_unit_value)] #[allow(trivial_casts)] - pub fn try_with_ip(bitness: u32, data: &'a [u8], ip: u64, options: u32) -> Result, IcedError> { + pub unsafe fn try_with_slice_ptr(bitness: u32, data: *const [u8], ip: u64, options: u32) -> Result, IcedError> { let is64b_mode; let default_code_size; let default_operand_size; @@ -903,13 +996,13 @@ impl<'a> Decoder<'a> { } _ => return Err(IcedError::new("Invalid bitness")), } - let data_ptr_end = data.as_ptr() as usize + data.len(); - if data_ptr_end < data.as_ptr() as usize || { + let data_ptr_end = data as *const u8 as usize + data.len(); + if data_ptr_end < data as *const u8 as usize || { // Verify that max_data_ptr can never overflow and that data_ptr.add(N) can't overflow. // Both of them can equal data_ptr_end (1 byte past the last valid byte). // When reading a u8/u16/u32..., we calculate data_ptr.add({1,2,4,...MAX_READ_SIZE}) so it must not overflow. // In decode(), we calculate data_ptr.add(MAX_INSTRUCTION_LENGTH) so it must not overflow. - data_ptr_end.wrapping_add(cmp::max(IcedConstants::MAX_INSTRUCTION_LENGTH, Decoder::MAX_READ_SIZE)) < data.as_ptr() as usize + data_ptr_end.wrapping_add(cmp::max(IcedConstants::MAX_INSTRUCTION_LENGTH, Decoder::MAX_READ_SIZE)) < data as *const u8 as usize } { return Err(IcedError::new("Invalid slice")); } @@ -998,10 +1091,10 @@ impl<'a> Decoder<'a> { Ok(Decoder { ip, - data_ptr: data.as_ptr() as usize, + data_ptr: data as *const u8 as usize, data_ptr_end, - max_data_ptr: data.as_ptr() as usize, - instr_start_data_ptr: data.as_ptr() as usize, + max_data_ptr: data as *const u8 as usize, + instr_start_data_ptr: data as *const u8 as usize, handlers_map0: get_handlers(&tables.handlers_map0), handlers_vex_map0, handlers_vex: [handlers_vex_0f, handlers_vex_0f38, handlers_vex_0f3a], @@ -1027,6 +1120,7 @@ impl<'a> Decoder<'a> { default_code_size, displ_index: 0, data, + phantom: PhantomData }) } @@ -1080,7 +1174,7 @@ impl<'a> Decoder<'a> { #[must_use] #[inline] pub fn position(&self) -> usize { - self.data_ptr - self.data.as_ptr() as usize + self.data_ptr - self.data as *const u8 as usize } /// Sets the current data position, which is the index into the data passed to the constructor. @@ -1131,7 +1225,7 @@ impl<'a> Decoder<'a> { } else { // - We verified the new offset above. // - Referencing 1 byte past the last valid byte is safe as long as we don't dereference it. - self.data_ptr = self.data.as_ptr() as usize + new_pos; + self.data_ptr = self.data as *const u8 as usize + new_pos; Ok(()) } }