From a3c8ea3b4d3416a627588198a139c66657f2d5d3 Mon Sep 17 00:00:00 2001 From: Chris Wong Date: Thu, 22 Aug 2024 12:58:41 +1000 Subject: [PATCH] Switch to use Comrak for syntax highlighting (#438) --- docs/Cargo.lock | 27 ++++++++++++ docs/Cargo.toml | 2 +- docs/src/bin/build_page.rs | 54 ++--------------------- docs/src/highlight.rs | 44 +++++++++++++++++++ docs/src/lib.rs | 1 + docs/src/views.rs | 90 +++++++++++++++++++++----------------- 6 files changed, 127 insertions(+), 91 deletions(-) create mode 100644 docs/src/highlight.rs diff --git a/docs/Cargo.lock b/docs/Cargo.lock index ddff3f54..1f7fce2c 100644 --- a/docs/Cargo.lock +++ b/docs/Cargo.lock @@ -32,6 +32,21 @@ dependencies = [ "serde", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -82,6 +97,7 @@ dependencies = [ "once_cell", "regex", "slug", + "syntect", "typed-arena", "unicode_categories", ] @@ -198,6 +214,16 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "fancy-regex" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2" +dependencies = [ + "bit-set", + "regex", +] + [[package]] name = "flate2" version = "1.0.32" @@ -519,6 +545,7 @@ checksum = "874dcfa363995604333cf947ae9f751ca3af4522c60886774c4963943b4746b1" dependencies = [ "bincode", "bitflags", + "fancy-regex", "flate2", "fnv", "once_cell", diff --git a/docs/Cargo.toml b/docs/Cargo.toml index 4fb463df..b4ece88c 100644 --- a/docs/Cargo.toml +++ b/docs/Cargo.toml @@ -11,7 +11,7 @@ description = "Documentation for Maud." edition = "2021" [dependencies] -comrak = { version = "*", default-features = false } +comrak = { version = "*", default-features = false, features = ["syntect"] } maud = { path = "../maud" } serde_json = "*" syntect = "*" diff --git a/docs/src/bin/build_page.rs b/docs/src/bin/build_page.rs index b42b4963..4533a39b 100644 --- a/docs/src/bin/build_page.rs +++ b/docs/src/bin/build_page.rs @@ -1,5 +1,5 @@ use comrak::{ - nodes::{AstNode, NodeCodeBlock, NodeHeading, NodeHtmlBlock, NodeLink, NodeValue}, + nodes::{AstNode, NodeCodeBlock, NodeHeading, NodeLink, NodeValue}, Arena, }; use docs::{ @@ -14,11 +14,6 @@ use std::{ path::Path, str, }; -use syntect::{ - highlighting::{Color, ThemeSet}, - html::highlighted_html_for_string, - parsing::SyntaxSet, -}; fn main() -> Result<(), Box> { let args = env::args().collect::>(); @@ -55,7 +50,7 @@ fn build_page( .collect::>(); let page = Page::load(&arena, input_path)?; - postprocess(page.content)?; + postprocess(page.content); let markup = views::main(slug, page, &nav, version, hash); @@ -65,12 +60,10 @@ fn build_page( Ok(()) } -fn postprocess<'a>(content: &'a AstNode<'a>) -> Result<(), Box> { +fn postprocess<'a>(content: &'a AstNode<'a>) { lower_headings(content); rewrite_md_links(content); strip_hidden_code(content); - highlight_code(content)?; - Ok(()) } fn lower_headings<'a>(root: &'a AstNode<'a>) { @@ -98,8 +91,7 @@ fn strip_hidden_code<'a>(root: &'a AstNode<'a>) { for node in root.descendants() { let mut data = node.data.borrow_mut(); if let NodeValue::CodeBlock(NodeCodeBlock { info, literal, .. }) = &mut data.value { - let info = parse_code_block_info(info); - if !info.contains(&"rust") { + if info.split(',').map(str::trim).all(|lang| lang != "rust") { continue; } *literal = strip_hidden_code_inner(literal); @@ -117,41 +109,3 @@ fn strip_hidden_code_inner(literal: &str) -> String { .collect::>(); lines.join("\n") } - -fn highlight_code<'a>(root: &'a AstNode<'a>) -> Result<(), Box> { - let ss = SyntaxSet::load_defaults_newlines(); - let ts = ThemeSet::load_defaults(); - let mut theme = ts.themes["InspiredGitHub"].clone(); - theme.settings.background = Some(Color { - r: 0xff, - g: 0xee, - b: 0xff, - a: 0xff, - }); - for node in root.descendants() { - let mut data = node.data.borrow_mut(); - if let NodeValue::CodeBlock(NodeCodeBlock { info, literal, .. }) = &mut data.value { - let info = parse_code_block_info(info); - let syntax = info - .into_iter() - .filter_map(|token| ss.find_syntax_by_token(token)) - .next() - .unwrap_or_else(|| ss.find_syntax_plain_text()); - let mut literal = std::mem::take(literal); - if !literal.ends_with('\n') { - // Syntect expects a trailing newline - literal.push('\n'); - } - let html = highlighted_html_for_string(&literal, &ss, syntax, &theme)?; - data.value = NodeValue::HtmlBlock(NodeHtmlBlock { - literal: html, - ..Default::default() - }); - } - } - Ok(()) -} - -fn parse_code_block_info(info: &str) -> Vec<&str> { - info.split(',').map(str::trim).collect() -} diff --git a/docs/src/highlight.rs b/docs/src/highlight.rs new file mode 100644 index 00000000..b297fe4a --- /dev/null +++ b/docs/src/highlight.rs @@ -0,0 +1,44 @@ +use comrak::{ + plugins::syntect::{SyntectAdapter, SyntectAdapterBuilder}, + Plugins, +}; +use std::rc::Rc; +use syntect::highlighting::{Color, ThemeSet}; + +pub struct Highlighter { + adapter: Rc, +} + +impl Highlighter { + pub fn get() -> Self { + Self { + adapter: SYNTECT_ADAPTER.with(Rc::clone), + } + } + + pub fn as_plugins(&self) -> Plugins<'_> { + let mut plugins = Plugins::default(); + plugins.render.codefence_syntax_highlighter = Some(&*self.adapter); + plugins + } +} + +thread_local! { + static SYNTECT_ADAPTER: Rc = Rc::new({ + SyntectAdapterBuilder::new() + .theme_set({ + let mut ts = ThemeSet::load_defaults(); + let mut theme = ts.themes["InspiredGitHub"].clone(); + theme.settings.background = Some(Color { + r: 0xff, + g: 0xee, + b: 0xff, + a: 0xff, + }); + ts.themes.insert("InspiredGitHub2".to_string(), theme); + ts + }) + .theme("InspiredGitHub2") + .build() + }); +} diff --git a/docs/src/lib.rs b/docs/src/lib.rs index f2e72ddf..f3d61137 100644 --- a/docs/src/lib.rs +++ b/docs/src/lib.rs @@ -1,3 +1,4 @@ +pub mod highlight; pub mod page; pub mod string_writer; pub mod views; diff --git a/docs/src/views.rs b/docs/src/views.rs index af1035a0..7f21efd7 100644 --- a/docs/src/views.rs +++ b/docs/src/views.rs @@ -3,50 +3,11 @@ use maud::{html, Markup, PreEscaped, Render, DOCTYPE}; use std::str; use crate::{ + highlight::Highlighter, page::{default_comrak_options, Page}, string_writer::StringWriter, }; -struct Comrak<'a>(&'a AstNode<'a>); - -impl<'a> Render for Comrak<'a> { - fn render_to(&self, buffer: &mut String) { - comrak::format_html(self.0, &default_comrak_options(), &mut StringWriter(buffer)).unwrap(); - } -} - -/// Hack! The page title is wrapped in a `Paragraph` node, which introduces an -/// extra `

` tag that we don't want most of the time. -struct ComrakRemovePTags<'a>(&'a AstNode<'a>); - -impl<'a> Render for ComrakRemovePTags<'a> { - fn render(&self) -> Markup { - let mut buffer = String::new(); - comrak::format_html( - self.0, - &default_comrak_options(), - &mut StringWriter(&mut buffer), - ) - .unwrap(); - assert!(buffer.starts_with("

") && buffer.ends_with("

\n")); - PreEscaped( - buffer - .trim_start_matches("

") - .trim_end_matches("

\n") - .to_string(), - ) - } -} - -struct ComrakText<'a>(&'a AstNode<'a>); - -impl<'a> Render for ComrakText<'a> { - fn render_to(&self, buffer: &mut String) { - comrak::format_commonmark(self.0, &default_comrak_options(), &mut StringWriter(buffer)) - .unwrap(); - } -} - pub fn main<'a>( slug: &str, page: Page<'a>, @@ -124,3 +85,52 @@ pub fn main<'a>( } } } + +struct Comrak<'a>(&'a AstNode<'a>); + +impl<'a> Render for Comrak<'a> { + fn render_to(&self, buffer: &mut String) { + let highlighter = Highlighter::get(); + comrak::format_html_with_plugins( + self.0, + &default_comrak_options(), + &mut StringWriter(buffer), + &highlighter.as_plugins(), + ) + .unwrap(); + } +} + +/// Hack! The page title is wrapped in a `Paragraph` node, which introduces an +/// extra `

` tag that we don't want most of the time. +struct ComrakRemovePTags<'a>(&'a AstNode<'a>); + +impl<'a> Render for ComrakRemovePTags<'a> { + fn render(&self) -> Markup { + let mut buffer = String::new(); + let highlighter = Highlighter::get(); + comrak::format_html_with_plugins( + self.0, + &default_comrak_options(), + &mut StringWriter(&mut buffer), + &highlighter.as_plugins(), + ) + .unwrap(); + assert!(buffer.starts_with("

") && buffer.ends_with("

\n")); + PreEscaped( + buffer + .trim_start_matches("

") + .trim_end_matches("

\n") + .to_string(), + ) + } +} + +struct ComrakText<'a>(&'a AstNode<'a>); + +impl<'a> Render for ComrakText<'a> { + fn render_to(&self, buffer: &mut String) { + comrak::format_commonmark(self.0, &default_comrak_options(), &mut StringWriter(buffer)) + .unwrap(); + } +}