Skip to content

Commit

Permalink
Unify diagnostic generation for utf8 failure
Browse files Browse the repository at this point in the history
  • Loading branch information
estebank committed Jan 18, 2025
1 parent 0b0d931 commit d565d2f
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 47 deletions.
22 changes: 2 additions & 20 deletions compiler/rustc_builtin_macros/src/source_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use rustc_expand::base::{
use rustc_expand::module::DirOwnership;
use rustc_lint_defs::BuiltinLintDiag;
use rustc_parse::parser::{ForceCollect, Parser};
use rustc_parse::{new_parser_from_file, unwrap_or_emit_fatal};
use rustc_parse::{new_parser_from_file, unwrap_or_emit_fatal, utf8_error};
use rustc_session::lint::builtin::INCOMPLETE_INCLUDE;
use rustc_span::source_map::SourceMap;
use rustc_span::{Pos, Span, Symbol};
Expand Down Expand Up @@ -211,25 +211,7 @@ pub(crate) fn expand_include_str(
}
Err(utf8err) => {
let mut err = cx.dcx().struct_span_err(sp, format!("`{path}` wasn't a utf-8 file"));
let path = PathBuf::from(path.as_str());
let start = utf8err.valid_up_to();
let note = format!("invalid utf-8 at byte `{start}`");
let msg = if let Some(len) = utf8err.error_len() {
format!("`{:?}` is not valid utf-8", &bytes[start..start + len])
} else {
note.clone()
};
let contents = String::from_utf8_lossy(&bytes[..]).to_string();
let source = cx.source_map().new_source_file(path.into(), contents);
let span = Span::with_root_ctxt(
source.normalized_byte_pos(start as u32),
source.normalized_byte_pos(start as u32),
);
if span.is_dummy() {
err.note(note);
} else {
err.span_note(span, msg);
}
utf8_error(cx.source_map(), path.as_str(), None, &mut err, utf8err, &bytes[..]);
DummyResult::any(sp, err.emit())
}
},
Expand Down
75 changes: 48 additions & 27 deletions compiler/rustc_parse/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,17 @@
#![warn(unreachable_pub)]
// tidy-alphabetical-end

use std::path::Path;
use std::path::{Path, PathBuf};
use std::str::Utf8Error;

use rustc_ast as ast;
use rustc_ast::tokenstream::TokenStream;
use rustc_ast::{AttrItem, Attribute, MetaItemInner, token};
use rustc_ast_pretty::pprust;
use rustc_data_structures::sync::Lrc;
use rustc_errors::{Diag, FatalError, PResult};
use rustc_errors::{Diag, EmissionGuarantee, FatalError, PResult};
use rustc_session::parse::ParseSess;
use rustc_span::source_map::SourceMap;
use rustc_span::{FileName, SourceFile, Span};
pub use unicode_normalization::UNICODE_VERSION as UNICODE_NORMALIZATION_VERSION;

Expand Down Expand Up @@ -74,36 +76,21 @@ pub fn new_parser_from_file<'a>(
path: &Path,
sp: Option<Span>,
) -> Result<Parser<'a>, Vec<Diag<'a>>> {
let source_file = psess.source_map().load_file(path).unwrap_or_else(|e| {
let sm = psess.source_map();
let source_file = sm.load_file(path).unwrap_or_else(|e| {
let msg = format!("couldn't read `{}`: {}", path.display(), e);
let mut err = psess.dcx().struct_fatal(msg);
if let Ok(contents) = std::fs::read(path)
&& let Err(utf8err) = String::from_utf8(contents)
&& let Err(utf8err) = String::from_utf8(contents.clone())
{
// The file exists, but it wasn't valid UTF-8.
let start = utf8err.utf8_error().valid_up_to();
let note = format!("invalid utf-8 at byte `{start}`");
let msg = if let Some(len) = utf8err.utf8_error().error_len() {
format!("`{:?}` is not valid utf-8", &utf8err.as_bytes()[start..start + len])
} else {
note.clone()
};
let contents = utf8err.into_utf8_lossy();
let source = psess.source_map().new_source_file(path.to_owned().into(), contents);
let span = Span::with_root_ctxt(
source.normalized_byte_pos(start as u32),
source.normalized_byte_pos(start as u32),
utf8_error(
sm,
&path.display().to_string(),
sp,
&mut err,
utf8err.utf8_error(),
&contents,
);
if span.is_dummy() {
err.note(note);
} else {
if sp.is_some() {
err.span_note(span, msg);
} else {
err.span(span);
err.span_label(span, msg);
}
}
}
if let Some(sp) = sp {
err.span(sp);
Expand All @@ -113,6 +100,40 @@ pub fn new_parser_from_file<'a>(
new_parser_from_source_file(psess, source_file)
}

pub fn utf8_error<E: EmissionGuarantee>(
sm: &SourceMap,
path: &str,
sp: Option<Span>,
err: &mut Diag<'_, E>,
utf8err: Utf8Error,
contents: &[u8],
) {
// The file exists, but it wasn't valid UTF-8.
let start = utf8err.valid_up_to();
let note = format!("invalid utf-8 at byte `{start}`");
let msg = if let Some(len) = utf8err.error_len() {
format!("`{:?}` is not valid utf-8", &contents[start..start + len])
} else {
note.clone()
};
let contents = String::from_utf8_lossy(contents).to_string();
let source = sm.new_source_file(PathBuf::from(path).into(), contents);
let span = Span::with_root_ctxt(
source.normalized_byte_pos(start as u32),
source.normalized_byte_pos(start as u32),
);
if span.is_dummy() {
err.note(note);
} else {
if sp.is_some() {
err.span_note(span, msg);
} else {
err.span(span);
err.span_label(span, msg);
}
}
}

/// Given a session and a `source_file`, return a parser. Returns any buffered errors from lexing
/// the initial token stream.
fn new_parser_from_source_file(
Expand Down

0 comments on commit d565d2f

Please sign in to comment.