Skip to content

Commit

Permalink
Merge pull request #7 from ibotty/master
Browse files Browse the repository at this point in the history
wip: port to encoding_rs
  • Loading branch information
jothan authored Mar 21, 2024
2 parents ef3ae0b + 4cd5439 commit 1f652c4
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 40 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ fuzz = ["afl"]
crate-type = ["lib", "cdylib"]

[dependencies]
encoding = "0.2"
nom = "6.0"
base64 = "0.13"
idna = "0.2.0"
Expand All @@ -35,6 +34,8 @@ serde = { version = "1.0", features = ["derive"], optional=true }
memmap = { version = "0.7.0", optional=true }
pyo3 = { version = "0.13", features = ["extension-module"], optional=true }
afl = { version = "0.8", optional=true }
encoding_rs = "0.8.33"
charset = "0.1.3"

[[bin]]
name = "fuzz_mailbox"
Expand Down
8 changes: 3 additions & 5 deletions src/rfc2047.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@

use std::borrow::Cow;

use encoding::DecoderTrap;
use encoding::all::ASCII;
use encoding::label::encoding_from_whatwg_label;
use encoding_rs::{Encoding, UTF_8}; // TODO: was ASCII

use nom::branch::alt;
use nom::bytes::complete::{tag, take_while1};
Expand Down Expand Up @@ -56,13 +54,13 @@ fn _encoded_word(input: &[u8]) -> NomResult<(Cow<str>, Vec<u8>)> {
delimited(tag("?"), token, tag("?")),
terminated(encoded_text, tag("?=")))),
|(charset, _lang, encoding, text)| {
(ascii_to_string(charset), decode_text(encoding, text).unwrap_or_else(|| text.to_vec()))
(charset::decode_ascii(charset), decode_text(encoding, text).unwrap_or_else(|| text.to_vec()))
})(input)
}

fn decode_charset((charset, bytes): (Cow<str>, Vec<u8>)) -> String
{
encoding_from_whatwg_label(&charset).unwrap_or(ASCII).decode(&bytes, DecoderTrap::Replace).unwrap()
Encoding::for_label(charset.as_bytes()).unwrap_or(UTF_8).decode_without_bom_handling(&bytes).0.to_string()
}

/// Decode an encoded word.
Expand Down
24 changes: 12 additions & 12 deletions src/rfc2231.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ use std::fmt::{self, Display};
use std::str;
use std::collections::HashMap;

use encoding::label::encoding_from_whatwg_label;
use encoding::types::EncodingRef;
use encoding::DecoderTrap;
use encoding::all::ASCII;
use charset::decode_ascii;

use encoding_rs::Encoding;
use encoding_rs::UTF_8; // TODO: was ASCII

use nom::branch::alt;
use nom::bytes::complete::{tag, tag_no_case, take_while1, take_while_m_n};
Expand Down Expand Up @@ -160,13 +160,13 @@ enum Segment<'a> {
Decoded(Cow<'a, str>),
}

fn decode_segments(mut input: Vec<(u32, Segment)>, encoding: EncodingRef) -> String {
fn decode_segments(mut input: Vec<(u32, Segment)>, encoding: &'static Encoding) -> String {
input.sort_by(|a, b| a.0.cmp(&b.0));
let mut out = String::new();
let mut encoded = Vec::new();

let decode = |bytes: &mut Vec<_>, out: &mut String| {
out.push_str(&encoding.decode(&bytes, DecoderTrap::Replace).unwrap());
out.push_str(&encoding.decode_without_bom_handling(bytes).0);
bytes.clear();
};

Expand Down Expand Up @@ -197,10 +197,10 @@ fn decode_parameter_list(input: Vec<Parameter>) -> Vec<(String, String)> {
Value::Regular(v) => { simple.insert(name_norm, v.into()); },
Value::Extended(ExtendedValue::Initial{value, encoding: encoding_name, ..}) => {
let codec = match encoding_name {
Some(encoding_name) => encoding_from_whatwg_label(&ascii_to_string(encoding_name)).unwrap_or(ASCII),
None => ASCII,
Some(encoding_name) => Encoding::for_label(decode_ascii(encoding_name).as_bytes()).unwrap_or(UTF_8),
None => UTF_8,
};
simple_encoded.insert(name_norm, codec.decode(&value, DecoderTrap::Replace).unwrap());
simple_encoded.insert(name_norm, codec.decode_without_bom_handling(value.as_slice()).0.to_string()); // TODO: eliminate to_string
}
Value::Extended(ExtendedValue::Other(..)) => unreachable!(),
}
Expand All @@ -212,7 +212,7 @@ fn decode_parameter_list(input: Vec<Parameter>) -> Vec<(String, String)> {
Value::Regular(v) => ent.push((section, Segment::Decoded(v))),
Value::Extended(ExtendedValue::Initial{value, encoding: encoding_name, ..}) => {
if let Some(encoding_name) = encoding_name {
if let Some(codec) = encoding_from_whatwg_label(&ascii_to_string(encoding_name)) {
if let Some(codec) = Encoding::for_label(decode_ascii(encoding_name).as_bytes()) {
composite_encoding.insert(name_norm, codec);
}
}
Expand All @@ -226,7 +226,7 @@ fn decode_parameter_list(input: Vec<Parameter>) -> Vec<(String, String)> {

let mut composite_out = Vec::new();
for (name, segments) in composite {
let codec = composite_encoding.get(&name).cloned().unwrap_or(ASCII);
let codec = composite_encoding.get(&name).cloned().unwrap_or(UTF_8);
composite_out.push((name, decode_segments(segments, codec)));
}

Expand All @@ -243,7 +243,7 @@ fn decode_parameter_list(input: Vec<Parameter>) -> Vec<(String, String)> {
pub fn content_type(input: &[u8]) -> NomResult<(String, Vec<(String, String)>)> {
map(pair(delimited(ofws, _mime_type, ofws),
_parameter_list),
|(mt, p)| (ascii_to_string(mt).to_lowercase(), decode_parameter_list(p)))(input)
|(mt, p)| (decode_ascii(mt).to_lowercase(), decode_parameter_list(p)))(input)
}

fn _x_token(input: &[u8]) -> NomResult<&str> {
Expand Down
6 changes: 4 additions & 2 deletions src/rfc3461.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ use std::str;

use crate::util::*;

use charset::decode_ascii;

use nom::branch::alt;
use nom::bytes::complete::{take, tag, tag_no_case};
use nom::character::is_hex_digit;
Expand Down Expand Up @@ -52,7 +54,7 @@ fn _printable_xtext(input: &[u8]) -> NomResult<Vec<u8>> {
/// ```
pub fn orcpt_address(input: &[u8]) -> NomResult<(Cow<str>, Cow<str>)> {
map(separated_pair(atom::<crate::behaviour::Legacy>, tag(";"), _printable_xtext),
|(a, b)| (ascii_to_string(a), ascii_to_string(b)))(input)
|(a, b)| (decode_ascii(a), Cow::Owned(decode_ascii(&b).into_owned())))(input)
}

/// The DSN return type desired by the sender.
Expand Down Expand Up @@ -120,7 +122,7 @@ pub fn dsn_mail_params<'a>(input: &[Param<'a>]) -> Result<(DSNMailParams, Vec<Pa
return Err("ENVID over 100 bytes");
}
if let Ok((_, parsed)) = exact!(value, _printable_xtext) {
envid_val = Some(ascii_to_string(parsed).into());
envid_val = Some(decode_ascii(&parsed).into());
} else {
return Err("Invalid ENVID");
}
Expand Down
19 changes: 0 additions & 19 deletions src/util.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
use std::borrow::Cow;
use std::str;

use encoding::{Encoding, DecoderTrap};
use encoding::all::ASCII;

use nom::IResult;
use nom::bytes::complete::take;
use nom::combinator::{map, recognize, verify};
Expand All @@ -15,19 +9,6 @@ pub(crate) type NomError<'a> = ();
/// Shortcut type for taking in bytes and spitting out a success or NomError.
pub type NomResult<'a, O, E=NomError<'a>> = IResult<&'a [u8], O, E>;

pub fn ascii_to_string<'a, T: Into<Cow<'a, [u8]>>>(i: T) -> Cow<'a, str> {
let i = i.into();

if i.is_ascii() {
match i {
Cow::Borrowed(i) => Cow::Borrowed(str::from_utf8(i).unwrap()),
Cow::Owned(i) => Cow::Owned(String::from_utf8(i).unwrap()),
}
} else {
Cow::Owned(ASCII.decode(&i, DecoderTrap::Replace).unwrap())
}
}

macro_rules! nom_fromstr {
( $type:ty, $func:path ) => {
impl std::str::FromStr for $type {
Expand Down
4 changes: 3 additions & 1 deletion src/xforward.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
//!
//! [XFORWARD]: http://www.postfix.org/XFORWARD_README.html
use charset::decode_ascii;

use nom::branch::alt;
use nom::bytes::complete::{tag, tag_no_case};
use nom::combinator::{opt, map};
Expand Down Expand Up @@ -33,7 +35,7 @@ fn unavailable(input: &[u8]) -> NomResult<Option<String>> {
}

fn value(input: &[u8]) -> NomResult<Option<String>> {
alt((unavailable, map(xtext, |x| Some(ascii_to_string(x).into()))))(input)
alt((unavailable, map(xtext, |x| Some(decode_ascii(&x).into()))))(input)
}

fn param(input: &[u8]) -> NomResult<Param> {
Expand Down

0 comments on commit 1f652c4

Please sign in to comment.