From 4eefcf0a6f88f69e036988f86edc46e24796951d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aurimas=20Bla=C5=BEulionis?= <0x60@pm.me> Date: Fri, 5 Jan 2024 11:09:58 +0300 Subject: [PATCH] Support for lossy Macho parsing --- src/mach/mod.rs | 66 +++++++++++++++++++++++++++++++++------------ src/mach/segment.rs | 34 ++++++++++++++++++++--- 2 files changed, 80 insertions(+), 20 deletions(-) diff --git a/src/mach/mod.rs b/src/mach/mod.rs index 163600008..a9f46e47d 100644 --- a/src/mach/mod.rs +++ b/src/mach/mod.rs @@ -147,7 +147,12 @@ impl<'a> MachO<'a> { } } /// Parses the Mach-o binary from `bytes` at `offset` - pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result> { + pub fn parse(bytes: &'a [u8], offset: usize) -> error::Result> { + Self::parse_2(bytes, offset, false) + } + + /// Parses the Mach-o binary from `bytes` at `offset` in `lossy` mode + pub fn parse_2(bytes: &'a [u8], mut offset: usize, lossy: bool) -> error::Result> { let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?; let ctx = if let Some(ctx) = maybe_ctx { ctx @@ -183,27 +188,42 @@ impl<'a> MachO<'a> { let cmd = load_command::LoadCommand::parse(bytes, offset, ctx.le)?; debug!("{} - {:?}", i, cmd); match cmd.command { - load_command::CommandVariant::Segment32(command) => { - // FIXME: we may want to be less strict about failure here, and just return an empty segment to allow parsing to continue? - segments.push(segment::Segment::from_32(bytes, &command, cmd.offset, ctx)?) - } - load_command::CommandVariant::Segment64(command) => { - segments.push(segment::Segment::from_64(bytes, &command, cmd.offset, ctx)?) - } + load_command::CommandVariant::Segment32(command) => segments.push( + segment::Segment::from_32_2(bytes, &command, cmd.offset, ctx, lossy)?, + ), + load_command::CommandVariant::Segment64(command) => segments.push( + segment::Segment::from_64_2(bytes, &command, cmd.offset, ctx, lossy)?, + ), load_command::CommandVariant::Symtab(command) => { - symbols = Some(symbols::Symbols::parse(bytes, &command, ctx)?); + match symbols::Symbols::parse(bytes, &command, ctx) { + Ok(s) => symbols = Some(s), + Err(e) if lossy => { + debug!("CommandVariant::Symtab failed: {e}"); + } + Err(e) => return Err(e), + } } load_command::CommandVariant::LoadDylib(command) | load_command::CommandVariant::LoadUpwardDylib(command) | load_command::CommandVariant::ReexportDylib(command) | load_command::CommandVariant::LoadWeakDylib(command) | load_command::CommandVariant::LazyLoadDylib(command) => { - let lib = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?; - libs.push(lib); + match bytes.pread::<&str>(cmd.offset + command.dylib.name as usize) { + Ok(lib) => libs.push(lib), + Err(e) if lossy => { + debug!("CommandVariant::Load/Reexport Dylib failed: {e}"); + } + Err(e) => return Err(e.into()), + } } load_command::CommandVariant::Rpath(command) => { - let rpath = bytes.pread::<&str>(cmd.offset + command.path as usize)?; - rpaths.push(rpath); + match bytes.pread::<&str>(cmd.offset + command.path as usize) { + Ok(rpath) => rpaths.push(rpath), + Err(e) if lossy => { + debug!("CommandVariant::Rpath failed: {e}"); + } + Err(e) => return Err(e.into()), + } } load_command::CommandVariant::DyldInfo(command) | load_command::CommandVariant::DyldInfoOnly(command) => { @@ -229,9 +249,16 @@ impl<'a> MachO<'a> { } } load_command::CommandVariant::IdDylib(command) => { - let id = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?; - libs[0] = id; - name = Some(id); + match bytes.pread::<&str>(cmd.offset + command.dylib.name as usize) { + Ok(id) => { + libs[0] = id; + name = Some(id); + } + Err(e) if lossy => { + debug!("CommandVariant::IdDylib failed: {e}"); + } + Err(e) => return Err(e.into()), + } } _ => (), } @@ -502,6 +529,11 @@ pub enum Mach<'a> { impl<'a> Mach<'a> { /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary pub fn parse(bytes: &'a [u8]) -> error::Result { + Self::parse_2(bytes, false) + } + + /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary + pub fn parse_2(bytes: &'a [u8], lossy: bool) -> error::Result { let size = bytes.len(); if size < 4 { let error = error::Error::Malformed("size is smaller than a magical number".into()); @@ -515,7 +547,7 @@ impl<'a> Mach<'a> { } // we might be a regular binary _ => { - let binary = MachO::parse(bytes, 0)?; + let binary = MachO::parse_2(bytes, 0, lossy)?; Ok(Mach::Binary(binary)) } } diff --git a/src/mach/segment.rs b/src/mach/segment.rs index 3c87bd62e..75a7ffbcf 100644 --- a/src/mach/segment.rs +++ b/src/mach/segment.rs @@ -466,6 +466,16 @@ impl<'a> Segment<'a> { segment: &SegmentCommand32, offset: usize, ctx: container::Ctx, + ) -> Result { + Self::from_32_2(bytes, segment, offset, ctx, false) + } + + pub(crate) fn from_32_2( + bytes: &'a [u8], + segment: &SegmentCommand32, + offset: usize, + ctx: container::Ctx, + lossy: bool, ) -> Result { Ok(Segment { cmd: segment.cmd, @@ -479,11 +489,15 @@ impl<'a> Segment<'a> { initprot: segment.initprot, nsects: segment.nsects, flags: segment.flags, - data: segment_data( + data: match segment_data( bytes, u64::from(segment.fileoff), u64::from(segment.filesize), - )?, + ) { + Ok(v) => v, + Err(e) if lossy => &[], + Err(e) => return Err(e), + }, offset, raw_data: bytes, ctx, @@ -495,6 +509,16 @@ impl<'a> Segment<'a> { segment: &SegmentCommand64, offset: usize, ctx: container::Ctx, + ) -> Result { + Self::from_64_2(bytes, segment, offset, ctx, false) + } + + pub fn from_64_2( + bytes: &'a [u8], + segment: &SegmentCommand64, + offset: usize, + ctx: container::Ctx, + lossy: bool, ) -> Result { Ok(Segment { cmd: segment.cmd, @@ -508,7 +532,11 @@ impl<'a> Segment<'a> { initprot: segment.initprot, nsects: segment.nsects, flags: segment.flags, - data: segment_data(bytes, segment.fileoff, segment.filesize)?, + data: match segment_data(bytes, segment.fileoff, segment.filesize) { + Ok(v) => v, + Err(e) if lossy => &[], + Err(e) => return Err(e), + }, offset, raw_data: bytes, ctx,