Skip to content

Commit

Permalink
Use syntect to properly process links
Browse files Browse the repository at this point in the history
  • Loading branch information
FliegendeWurst committed Jan 10, 2025
1 parent 02aa380 commit 79717c2
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 35 deletions.
17 changes: 16 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ rusqlite_migration = { version = "1", default-features = false }
rust-argon2 = "2.0.0"
sha2 = "0"
serde = { version = "1", features = ["derive"] }
syntect = { version = "5", default-features = false, features = ["html", "plist-load", "regex-fancy"] }
syntect = { version = "5", default-features = false, features = ["html", "plist-load", "regex-fancy", "yaml-load"] }
thiserror = "2"
time = { version = "0.3", features = ["macros", "serde"] }
tokio = { version = "1", features = ["full"] }
Expand All @@ -34,7 +34,6 @@ url = "2"
zstd = "0.13"
parking_lot = "0.12.1"
http = "1.1.0"
regex = { version = "1.11.1", default-features = false, features = ["std"] }

[dev-dependencies]
reqwest = { version = "0", default-features = false, features = ["cookies", "json"] }
Expand Down
33 changes: 33 additions & 0 deletions assets/LinkHighlight.sublime-syntax
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
%YAML 1.2
---
# http://www.sublimetext.com/docs/3/syntax.html
name: LinkHighlight
file_extensions:
- link_highlight
scope: text.highlighting
contexts:
main:
- match: (<)((?:https?|ftp)://.*?)(>)
scope: meta.link.inet.markdown
captures:
1: punctuation.definition.link.begin.markdown
2: markup.underline.link.markdown
3: punctuation.definition.link.end.markdown
- match: (((https|http|ftp)://)|www\.)[\w-]+(\.[\w-]+)+
scope: markup.underline.link.markdown
push: # After a valid domain, zero or more non-space non-< characters may follow
- match: (?=[?!.,:*_~]*[\s<]) # Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will not be considered part of the autolink, though they may be included in the interior of the link
pop: true
- match: (?={{html_entity}}[?!.,:*_~]*[\s<]) # If an autolink ends in a semicolon (;), we check to see if it appears to resemble an entity reference; if the preceding text is & followed by one or more alphanumeric characters. If so, it is excluded from the autolink
pop: true
- match: \( # When an autolink ends in ), we scan the entire autolink for the total number of parentheses. If there is a greater number of closing parentheses than opening ones, we don’t consider the last character part of the autolink, in order to facilitate including an autolink inside a parenthesis
push:
- meta_scope: markup.underline.link.markdown
- match: (?=[?!.,:*_~]*[\s<])
pop: true
- match: \)
pop: true
- match: (?=\)[?!.,:*_~]*[\s<])
pop: true
- match: '[^?!.,:*_~\s<&()]+|\S'
scope: markup.underline.link.markdown
45 changes: 13 additions & 32 deletions src/highlight.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
use crate::db::read::Entry;
use crate::errors::Error;
use regex::{Captures, Regex};
use sha2::{Digest, Sha256};
use std::cmp::Ordering;
use std::collections::HashMap;
use std::io::Cursor;
use std::sync::LazyLock;
use syntect::highlighting::ThemeSet;
use syntect::html::{css_for_theme_with_class_style, line_tokens_to_classed_spans, ClassStyle};
use syntect::parsing::{ParseState, ScopeStack, SyntaxReference, SyntaxSet};
use syntect::parsing::{ParseState, ScopeStack, SyntaxDefinition, SyntaxReference, SyntaxSet};
use syntect::util::LinesWithEndings;

const HIGHLIGHT_LINE_LENGTH_CUTOFF: usize = 2048;
Expand All @@ -34,6 +32,11 @@ pub static DATA: LazyLock<Data> = LazyLock::new(|| {
let dark = Css::new("dark", &DARK_CSS);
let syntax_set: SyntaxSet =
syntect::dumps::from_binary(include_bytes!("../assets/newlines.packdump"));
let link_highlighting = SyntaxDefinition::load_from_str(
include_str!("../assets/LinkHighlight.sublime-syntax"), false, None).expect("loading link style");
let mut builder = syntax_set.into_builder();
builder.add(link_highlighting);
let syntax_set = builder.build();
let mut syntaxes = syntax_set.syntaxes().to_vec();
syntaxes.sort_by(|a, b| a.name.partial_cmp(&b.name).unwrap_or(Ordering::Less));

Expand All @@ -46,14 +49,6 @@ pub static DATA: LazyLock<Data> = LazyLock::new(|| {
}
});

static LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"https?://[^ "><'\t\n`]+"#).unwrap()
});

static LINK_REGEX_2: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"LINKWASTEBIN\d+END"#).unwrap()
});

/// Combines CSS content with a filename containing the hash of the content.
pub struct Css<'a> {
pub name: String,
Expand Down Expand Up @@ -82,12 +77,13 @@ impl<'a> Css<'a> {
}

fn highlight(source: &str, ext: &str) -> Result<String, Error> {
let syntax_ref = DATA
.syntax_set
.find_syntax_by_extension(ext)
let syntax_ref = (ext != "txt").then(||
DATA
.syntax_set
.find_syntax_by_extension(ext)).flatten()
.unwrap_or_else(|| {
DATA.syntax_set
.find_syntax_by_extension("txt")
.find_syntax_by_extension("link_highlight")
.expect("finding txt syntax")
});

Expand All @@ -96,18 +92,10 @@ fn highlight(source: &str, ext: &str) -> Result<String, Error> {
let mut scope_stack = ScopeStack::new();

for (mut line_number, line) in LinesWithEndings::from(source).enumerate() {
let mut links = HashMap::new();
let (mut formatted, delta) = if line.len() > HIGHLIGHT_LINE_LENGTH_CUTOFF {
// let mut links = HashMap::new();
let (formatted, delta) = if line.len() > HIGHLIGHT_LINE_LENGTH_CUTOFF {
(line.to_string(), 0)
} else {
// Add placeholder for link elements.
let line = LINK_REGEX.replace_all(&line, |x: &Captures| {
let num = links.len();
let placeholder = format!("LINKWASTEBIN{num}END");
links.insert(placeholder.clone(), x.get(0).unwrap().as_str().to_owned());
placeholder
}).to_string();

let parsed = parse_state.parse_line(&line, &DATA.syntax_set)?;
line_tokens_to_classed_spans(
&line,
Expand All @@ -128,13 +116,6 @@ fn highlight(source: &str, ext: &str) -> Result<String, Error> {
html.push_str(&"<span>".repeat(delta.abs().try_into()?));
}

// Process link element placeholders.
formatted = LINK_REGEX_2.replace_all(&formatted, |x: &Captures| {
let id = x.get(0).unwrap().as_str();
let link = links.get(id).map(|x| &**x).unwrap_or(id);
format!("<a href='{link}'>{link}</a>")
}).to_string();

// Strip stray newlines that cause vertically stretched lines.
for c in formatted.chars().filter(|c| *c != '\n') {
html.push(c);
Expand Down
1 change: 1 addition & 0 deletions templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
</header>
<main>
{% block content %}{% endblock %}
{% block content_post %}{% endblock %}
</main>
</div>
</body>
Expand Down
23 changes: 23 additions & 0 deletions templates/paste.html
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,26 @@
<li><a href="{{ base_path.join(id) }}?fmt=raw"><button>raw</button></a></li>
<li><a href="{{ base_path.join(id) }}?fmt=qr"><button>qr</button></a></li>
{% endblock %}

{% block content_post %}
<script>
while (true) {
const o = document.querySelector("span.link");
if (!o) {
break;
}
let l = o.nextSibling;
let link = o.innerHTML;
while(l != null && l.tagName == "SPAN" && l.classList.contains("link")) {
let prevL = l;
l = l.nextSibling;
link += prevL.innerText;
prevL.remove();
}
o.setAttribute("href", link);
o.innerText = link;
o.className = "";
o.outerHTML = o.outerHTML.replace(/^<span(.*)span>$/, "<a$1a>");
}
</script>
{% endblock %}

0 comments on commit 79717c2

Please sign in to comment.