From 3dcb46738356b97bd49dbef114c673f702558293 Mon Sep 17 00:00:00 2001
From: Wim Looman
Date: Fri, 16 Sep 2022 00:15:21 +0200
Subject: [PATCH] Replace client-side highlight.js by server-side syntect
highlighting
This is setup to only support the same languages we targeted before:
Rust, Markdown and TOML.
---
.gitmodules | 6 +
Cargo.lock | 72 +
Cargo.toml | 2 +
assets/syntaxes/Extras/TOML | 1 +
assets/syntaxes/Packages | 1 +
build.rs | 37 +
dockerfiles/Dockerfile | 1 +
src/web/crate_details.rs | 4 +-
src/web/markdown.rs | 106 ++
src/web/mod.rs | 20 +-
src/web/page/templates.rs | 31 +
src/web/source.rs | 59 +-
src/web/statics.rs | 2 +-
templates/core/about/builds.html | 24 +-
templates/core/about/metadata.html | 12 +-
templates/crate/details.html | 9 -
templates/crate/source.html | 9 +-
templates/macros.html | 49 -
templates/style/_syntax-themes.scss | 59 +
templates/style/_syntax.scss | 28 +
templates/style/style.scss | 2 +-
vendor/highlightjs/LICENSE | 29 -
vendor/highlightjs/highlight.min.js | 1296 ------------------
vendor/highlightjs/languages/ini.min.js | 16 -
vendor/highlightjs/languages/markdown.min.js | 29 -
vendor/highlightjs/languages/rust.min.js | 21 -
vendor/highlightjs/styles/dark.min.css | 1 -
vendor/highlightjs/styles/github.min.css | 1 -
28 files changed, 398 insertions(+), 1529 deletions(-)
create mode 100644 .gitmodules
create mode 160000 assets/syntaxes/Extras/TOML
create mode 160000 assets/syntaxes/Packages
create mode 100644 src/web/markdown.rs
create mode 100644 templates/style/_syntax-themes.scss
create mode 100644 templates/style/_syntax.scss
delete mode 100644 vendor/highlightjs/LICENSE
delete mode 100644 vendor/highlightjs/highlight.min.js
delete mode 100644 vendor/highlightjs/languages/ini.min.js
delete mode 100644 vendor/highlightjs/languages/markdown.min.js
delete mode 100644 vendor/highlightjs/languages/rust.min.js
delete mode 100644 vendor/highlightjs/styles/dark.min.css
delete mode 100644 vendor/highlightjs/styles/github.min.css
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..7c44f26c6
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,6 @@
+[submodule "assets/syntaxes/Packages"]
+ path = assets/syntaxes/Packages
+ url = https://github.com/sublimehq/Packages
+[submodule "assets/syntaxes/Extras/TOML"]
+ path = assets/syntaxes/Extras/TOML
+ url = https://github.com/jasonwilliams/sublime_toml_highlighting
diff --git a/Cargo.lock b/Cargo.lock
index 4e1dcb249..8a15abee3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -579,6 +579,30 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
+[[package]]
+name = "bincode"
+version = "1.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "bit-set"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
+dependencies = [
+ "bit-vec",
+]
+
+[[package]]
+name = "bit-vec"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
+
[[package]]
name = "bitflags"
version = "1.3.2"
@@ -1359,6 +1383,7 @@ dependencies = [
"string_cache",
"string_cache_codegen",
"strum",
+ "syntect",
"systemstat",
"tempfile",
"tera",
@@ -1485,6 +1510,16 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
+[[package]]
+name = "fancy-regex"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d6b8560a05112eb52f04b00e5d3790c0dd75d9d980eb8a122fb23b92a623ccf"
+dependencies = [
+ "bit-set",
+ "regex",
+]
+
[[package]]
name = "fastrand"
version = "1.8.0"
@@ -2797,6 +2832,12 @@ dependencies = [
"cc",
]
+[[package]]
+name = "linked-hash-map"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
+
[[package]]
name = "linux-raw-sys"
version = "0.0.46"
@@ -4808,6 +4849,28 @@ dependencies = [
"unicode-xid",
]
+[[package]]
+name = "syntect"
+version = "5.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c6c454c27d9d7d9a84c7803aaa3c50cd088d2906fe3c6e42da3209aa623576a8"
+dependencies = [
+ "bincode",
+ "bitflags",
+ "fancy-regex",
+ "flate2",
+ "fnv",
+ "lazy_static",
+ "once_cell",
+ "regex-syntax",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "thiserror",
+ "walkdir",
+ "yaml-rust",
+]
+
[[package]]
name = "systemstat"
version = "0.2.2"
@@ -5842,6 +5905,15 @@ version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "114ba2b24d2167ef6d67d7d04c8cc86522b87f490025f39f0303b7db5bf5e3d8"
+[[package]]
+name = "yaml-rust"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
+dependencies = [
+ "linked-hash-map",
+]
+
[[package]]
name = "yansi"
version = "0.5.1"
diff --git a/Cargo.toml b/Cargo.toml
index f1e06cead..9be3cb53d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -46,6 +46,7 @@ backtrace = "0.3.61"
failure = "0.1.8"
thiserror = "1.0.26"
comrak = { version = "0.14.0", default-features = false }
+syntect = { version = "5.0.0", default-features = false, features = ["parsing", "html", "dump-load", "regex-fancy"] }
toml = "0.5"
schemamama = "0.3"
schemamama_postgres = "0.3"
@@ -129,6 +130,7 @@ walkdir = "2"
anyhow = { version = "1.0.42", features = ["backtrace"] }
grass = { version = "0.11.0", default-features = false }
once_cell = { version = "1.4.0", features = ["parking_lot"] }
+syntect = { version = "5.0.0", default-features = false, features = ["parsing", "dump-create", "yaml-load", "regex-fancy"] }
[[bench]]
name = "compression"
diff --git a/assets/syntaxes/Extras/TOML b/assets/syntaxes/Extras/TOML
new file mode 160000
index 000000000..ed3843890
--- /dev/null
+++ b/assets/syntaxes/Extras/TOML
@@ -0,0 +1 @@
+Subproject commit ed38438900d6b128353cd1aa1364e2e3b8ffb8a2
diff --git a/assets/syntaxes/Packages b/assets/syntaxes/Packages
new file mode 160000
index 000000000..7d9ed80ba
--- /dev/null
+++ b/assets/syntaxes/Packages
@@ -0,0 +1 @@
+Subproject commit 7d9ed80ba4935847c2290237c915e55b5371cfc0
diff --git a/build.rs b/build.rs
index 2fc2ffb77..5893d2bdd 100644
--- a/build.rs
+++ b/build.rs
@@ -36,6 +36,13 @@ mod tracked {
Ok(())
}
+ pub(crate) fn track_recursive(path: impl AsRef) -> Result<()> {
+ for entry in walkdir::WalkDir::new(path) {
+ track(entry?.path())?;
+ }
+ Ok(())
+ }
+
pub(crate) fn read(path: impl AsRef) -> Result> {
let path = path.as_ref();
track(path)?;
@@ -72,6 +79,7 @@ fn main() -> Result<()> {
write_git_version(out_dir)?;
compile_sass(out_dir)?;
write_known_targets(out_dir)?;
+ compile_syntax(out_dir)?;
Ok(())
}
@@ -171,3 +179,32 @@ fn write_known_targets(out_dir: &Path) -> Result<()> {
Ok(())
}
+
+fn compile_syntax(out_dir: &Path) -> Result<()> {
+ use syntect::{dumps::dump_to_uncompressed_file, parsing::SyntaxSetBuilder};
+
+ fn tracked_add_from_folder(
+ builder: &mut SyntaxSetBuilder,
+ path: impl AsRef,
+ ) -> Result<()> {
+ // There's no easy way to know exactly which files matter, so just track everything in the
+ // folder
+ tracked::track_recursive(&path)?;
+ builder.add_from_folder(path, true)?;
+ Ok(())
+ }
+
+ let mut builder = SyntaxSetBuilder::new();
+ builder.add_plain_text_syntax();
+ tracked_add_from_folder(&mut builder, "assets/syntaxes/Packages/Rust/")?;
+ // Some of the extended syntaxes fail to compile, so only load the primary markdown syntax
+ tracked_add_from_folder(
+ &mut builder,
+ "assets/syntaxes/Packages/Markdown/Markdown.sublime-syntax",
+ )?;
+ tracked_add_from_folder(&mut builder, "assets/syntaxes/Extras/TOML/")?;
+
+ dump_to_uncompressed_file(&builder.build(), out_dir.join("syntect.packdump"))?;
+
+ Ok(())
+}
diff --git a/dockerfiles/Dockerfile b/dockerfiles/Dockerfile
index 0a6008a85..52a249bcc 100644
--- a/dockerfiles/Dockerfile
+++ b/dockerfiles/Dockerfile
@@ -50,6 +50,7 @@ COPY src src/
RUN find src -name "*.rs" -exec touch {} \;
COPY templates/style templates/style
COPY vendor vendor/
+COPY assets assets/
RUN cargo build --release
diff --git a/src/web/crate_details.rs b/src/web/crate_details.rs
index 17cf144bd..3610a47e1 100644
--- a/src/web/crate_details.rs
+++ b/src/web/crate_details.rs
@@ -1,4 +1,4 @@
-use super::{match_version, redirect_base, render_markdown, MatchSemver, MetaData};
+use super::{markdown, match_version, redirect_base, MatchSemver, MetaData};
use crate::utils::{get_correct_docsrs_style_file, report_error};
use crate::{
db::Pool,
@@ -69,7 +69,7 @@ where
{
markdown
.as_ref()
- .map(|markdown| render_markdown(markdown))
+ .map(|markdown| markdown::render(markdown))
.serialize(serializer)
}
diff --git a/src/web/markdown.rs b/src/web/markdown.rs
new file mode 100644
index 000000000..833c38878
--- /dev/null
+++ b/src/web/markdown.rs
@@ -0,0 +1,106 @@
+use crate::error::Result;
+use comrak::{
+ adapters::SyntaxHighlighterAdapter, ComrakExtensionOptions, ComrakOptions, ComrakPlugins,
+ ComrakRenderPlugins,
+};
+use once_cell::sync::Lazy;
+use std::collections::HashMap;
+use std::fmt::Write;
+
+#[derive(Debug)]
+struct CodeAdapter;
+
+impl SyntaxHighlighterAdapter for CodeAdapter {
+ fn highlight(&self, lang: Option<&str>, code: &str) -> String {
+ highlight_code(lang, code)
+ }
+
+ fn build_pre_tag(&self, attributes: &HashMap) -> String {
+ build_opening_tag("pre", attributes)
+ }
+
+ fn build_code_tag(&self, attributes: &HashMap) -> String {
+ build_opening_tag("code", attributes)
+ }
+}
+
+fn build_opening_tag(tag: &str, attributes: &HashMap) -> String {
+ let mut tag_parts = format!("<{tag}");
+ for (attr, val) in attributes {
+ write!(tag_parts, " {attr}=\"{val}\"").unwrap();
+ }
+ tag_parts.push('>');
+ tag_parts
+}
+
+pub fn try_highlight_code(lang: Option<&str>, code: &str) -> Result {
+ use syntect::{
+ html::{ClassStyle, ClassedHTMLGenerator},
+ parsing::SyntaxSet,
+ util::LinesWithEndings,
+ };
+
+ static SYNTAX_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/syntect.packdump"));
+ static SYNTAXES: Lazy = Lazy::new(|| {
+ let syntaxes: SyntaxSet = syntect::dumps::from_uncompressed_data(SYNTAX_DATA).unwrap();
+ let names = syntaxes
+ .syntaxes()
+ .iter()
+ .map(|s| &s.name)
+ .collect::>();
+ log::debug!("known syntaxes {names:?}");
+ syntaxes
+ });
+
+ let syntax = lang
+ .and_then(|lang| SYNTAXES.find_syntax_by_token(lang))
+ .or_else(|| SYNTAXES.find_syntax_by_first_line(code))
+ .unwrap_or_else(|| SYNTAXES.find_syntax_plain_text());
+
+ log::trace!("Using syntax {:?} for language {lang:?}", syntax.name);
+
+ let mut html_generator = ClassedHTMLGenerator::new_with_class_style(
+ syntax,
+ &SYNTAXES,
+ ClassStyle::SpacedPrefixed { prefix: "syntax-" },
+ );
+
+ for line in LinesWithEndings::from(code) {
+ html_generator.parse_html_for_line_which_includes_newline(line)?;
+ }
+
+ Ok(html_generator.finalize())
+}
+
+pub fn highlight_code(lang: Option<&str>, code: &str) -> String {
+ match try_highlight_code(lang, code) {
+ Ok(highlighted) => highlighted,
+ Err(err) => {
+ log::error!("failed while highlighting code: {err:?}");
+ code.to_owned()
+ }
+ }
+}
+
+/// Wrapper around the Markdown parser and renderer to render markdown
+pub(crate) fn render(text: &str) -> String {
+ comrak::markdown_to_html_with_plugins(
+ text,
+ &ComrakOptions {
+ extension: ComrakExtensionOptions {
+ superscript: true,
+ table: true,
+ autolink: true,
+ tasklist: true,
+ strikethrough: true,
+ ..ComrakExtensionOptions::default()
+ },
+ ..ComrakOptions::default()
+ },
+ &ComrakPlugins {
+ render: ComrakRenderPlugins {
+ codefence_syntax_highlighter: Some(&CodeAdapter),
+ },
+ },
+ )
+}
diff --git a/src/web/mod.rs b/src/web/mod.rs
index cf0164a0a..da144b435 100644
--- a/src/web/mod.rs
+++ b/src/web/mod.rs
@@ -81,6 +81,7 @@ mod error;
mod extensions;
mod features;
mod file;
+mod markdown;
pub(crate) mod metrics;
mod releases;
mod routes;
@@ -414,25 +415,6 @@ fn match_version(
Err(Nope::VersionNotFound)
}
-/// Wrapper around the Markdown parser and renderer to render markdown
-fn render_markdown(text: &str) -> String {
- use comrak::{markdown_to_html, ComrakExtensionOptions, ComrakOptions};
-
- let options = ComrakOptions {
- extension: ComrakExtensionOptions {
- superscript: true,
- table: true,
- autolink: true,
- tasklist: true,
- strikethrough: true,
- ..ComrakExtensionOptions::default()
- },
- ..ComrakOptions::default()
- };
-
- markdown_to_html(text, &options)
-}
-
#[must_use = "`Server` blocks indefinitely when dropped"]
pub struct Server {
inner: Listening,
diff --git a/src/web/page/templates.rs b/src/web/page/templates.rs
index 99c944b76..e8d1352ee 100644
--- a/src/web/page/templates.rs
+++ b/src/web/page/templates.rs
@@ -100,6 +100,7 @@ pub(super) fn load_templates(conn: &mut Client) -> Result {
tera.register_filter("fas", IconType::Strong);
tera.register_filter("far", IconType::Regular);
tera.register_filter("fab", IconType::Brand);
+ tera.register_filter("highlight", Highlight);
Ok(tera)
}
@@ -303,6 +304,36 @@ impl tera::Filter for IconType {
}
}
+struct Highlight;
+
+impl tera::Filter for Highlight {
+ fn filter(&self, value: &Value, args: &HashMap) -> TeraResult {
+ let code = value.as_str().ok_or_else(|| {
+ let msg = format!( "Filter `highlight` was called on an incorrect value: got `{value}` but expected a string");
+ tera::Error::msg(msg)
+ })?;
+ let lang = args
+ .get("lang")
+ .and_then(|lang| {
+ if lang.is_null() {
+ None
+ } else {
+ Some(lang.as_str().ok_or_else(|| {
+ let msg = format!("Filter `highlight` received an incorrect type for arg `{lang}`: got `{lang}` but expected a string");
+ tera::Error::msg(msg)
+ }))
+ }
+ })
+ .transpose()?;
+ let highlighted = crate::web::markdown::highlight_code(lang, code);
+ Ok(format!("{highlighted}
").into())
+ }
+
+ fn is_safe(&self) -> bool {
+ true
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
diff --git a/src/web/source.rs b/src/web/source.rs
index 10cd9b931..13f50da47 100644
--- a/src/web/source.rs
+++ b/src/web/source.rs
@@ -24,6 +24,30 @@ struct File {
name: String,
/// The mime type of the file
mime: String,
+ /// The extension of the file, if it has one
+ extension: Option,
+}
+
+impl File {
+ fn from_path_and_mime(path: &str, mime: &str) -> File {
+ let (name, mime, extension) = if let Some((dir, _)) = path.split_once('/') {
+ (dir, "dir", None)
+ } else {
+ let extension = if path.starts_with('.') {
+ None
+ } else {
+ path.rsplit_once('.').map(|(_, ext)| ext)
+ };
+
+ (path, mime, extension)
+ };
+
+ Self {
+ name: name.to_owned(),
+ mime: mime.to_owned(),
+ extension: extension.map(|s| s.to_owned()),
+ }
+ }
}
/// A list of source files
@@ -98,22 +122,8 @@ impl FileList {
}
// look only files for req_path
- if path.starts_with(req_path) {
- // remove req_path from path to reach files in this directory
- let path = path.replace(req_path, "");
- let path_splited: Vec<&str> = path.split('/').collect();
-
- // if path have '/' it is a directory
- let mime = if path_splited.len() > 1 {
- "dir".to_owned()
- } else {
- mime.to_owned()
- };
-
- let file = File {
- name: path_splited[0].to_owned(),
- mime,
- };
+ if let Some(path) = path.strip_prefix(req_path) {
+ let file = File::from_path_and_mime(path, mime);
// avoid adding duplicates, a directory may occur more than once
if !file_list.contains(&file) {
@@ -163,8 +173,8 @@ impl FileList {
struct SourcePage {
file_list: FileList,
show_parent_link: bool,
+ file: Option,
file_content: Option,
- is_rust_source: bool,
canonical_url: String,
}
@@ -271,20 +281,25 @@ pub fn source_browser_handler(req: &mut Request) -> IronResult {
None
};
- let (file_content, is_rust_source) = if let Some(blob) = blob {
+ let (file, file_content) = if let Some(blob) = blob {
// serve the file with DatabaseFileHandler if file isn't text and not empty
if !blob.mime.starts_with("text") && !blob.is_empty() {
return Ok(DbFile(blob).serve());
} else if blob.mime.starts_with("text") && !blob.is_empty() {
+ let path = blob
+ .path
+ .rsplit_once('/')
+ .map(|(_, path)| path)
+ .unwrap_or(&blob.path);
(
+ Some(File::from_path_and_mime(path, &blob.mime)),
String::from_utf8(blob.content).ok(),
- blob.path.ends_with(".rs"),
)
} else {
- (None, false)
+ (None, None)
}
} else {
- (None, false)
+ (None, None)
};
let file_list = FileList::from_path(
@@ -299,8 +314,8 @@ pub fn source_browser_handler(req: &mut Request) -> IronResult {
SourcePage {
file_list,
show_parent_link: !req_path.is_empty(),
+ file,
file_content,
- is_rust_source,
canonical_url,
}
.into_response(req)
diff --git a/src/web/statics.rs b/src/web/statics.rs
index 915d71fbc..9a5925f24 100644
--- a/src/web/statics.rs
+++ b/src/web/statics.rs
@@ -258,7 +258,7 @@ mod tests {
wrapper(|env| {
let web = env.frontend();
- let files = &[("highlightjs/styles/dark.min.css", "text/css")];
+ let files = &[("vendored.css", "text/css")];
for (file, mime) in files {
let url = format!("/-/static/{}", file);
diff --git a/templates/core/about/builds.html b/templates/core/about/builds.html
index 5e4f74c26..93a47f7f3 100644
--- a/templates/core/about/builds.html
+++ b/templates/core/about/builds.html
@@ -31,11 +31,11 @@ Detecting Docs.rs
To recognize Docs.rs from build.rs
files, you can test for the environment variable DOCS_RS
, e.g.:
- {% filter dedent(levels=3) -%}
-
if std::env::var("DOCS_RS").is_ok() {
+ {% filter highlight(lang="rust") %}{% filter dedent(levels=3) -%}
+ if std::env::var("DOCS_RS").is_ok() {
// ... your code here ...
- }
- {%- endfilter %}
+ }
+ {%- endfilter %}{% endfilter %}
This approach can be helpful if you need dependencies for building the library, but not for building the documentation.
@@ -46,10 +46,10 @@
You can configure how your crate is built by adding package metadata to your Cargo.toml
, e.g.:
- {% filter dedent -%}
-
[package.metadata.docs.rs]
- rustc-args = ["--cfg", "docsrs"]
- {%- endfilter %}
+ {% filter highlight(lang="toml") %}{% filter dedent -%}
+ [package.metadata.docs.rs]
+ rustc-args = ["--cfg", "docsrs"]
+ {%- endfilter %}{% endfilter %}
Here, the compiler arguments are set so that #[cfg(docsrs)]
(not to be confused with #[cfg(doc)]
) can be used for conditional compilation.
This approach is also useful for setting cargo features.
@@ -107,11 +107,3 @@