rustdoc/html/
escape.rs

1//! HTML escaping.
2//!
3//! This module contains one unit struct, which can be used to HTML-escape a
4//! string of text (for use in a format string).
5
6use std::fmt;
7
8use pulldown_cmark_escape::FmtWriter;
9use unicode_segmentation::UnicodeSegmentation;
10
11/// Wrapper struct which will emit the HTML-escaped version of the contained
12/// string when passed to a format string.
13pub(crate) struct Escape<'a>(pub &'a str);
14
15impl fmt::Display for Escape<'_> {
16    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
17        pulldown_cmark_escape::escape_html(FmtWriter(fmt), self.0)
18    }
19}
20
21/// Wrapper struct which will emit the HTML-escaped version of the contained
22/// string when passed to a format string.
23///
24/// This is only safe to use for text nodes. If you need your output to be
25/// safely contained in an attribute, use [`Escape`]. If you don't know the
26/// difference, use [`Escape`].
27pub(crate) struct EscapeBodyText<'a>(pub &'a str);
28
29impl fmt::Display for EscapeBodyText<'_> {
30    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
31        pulldown_cmark_escape::escape_html_body_text(FmtWriter(fmt), self.0)
32    }
33}
34
35/// Wrapper struct which will emit the HTML-escaped version of the contained
36/// string when passed to a format string. This function also word-breaks
37/// CamelCase and snake_case word names.
38///
39/// This is only safe to use for text nodes. If you need your output to be
40/// safely contained in an attribute, use [`Escape`]. If you don't know the
41/// difference, use [`Escape`].
42pub(crate) struct EscapeBodyTextWithWbr<'a>(pub &'a str);
43
44impl fmt::Display for EscapeBodyTextWithWbr<'_> {
45    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
46        let EscapeBodyTextWithWbr(text) = *self;
47        if text.len() < 8 {
48            return EscapeBodyText(text).fmt(fmt);
49        }
50        let mut last = 0;
51        let mut it = text.grapheme_indices(true).peekable();
52        let _ = it.next(); // don't insert wbr before first char
53        while let Some((i, s)) = it.next() {
54            let pk = it.peek();
55            if s.chars().all(|c| c.is_whitespace()) {
56                // don't need "First <wbr>Second"; the space is enough
57                EscapeBodyText(&text[last..i]).fmt(fmt)?;
58                last = i;
59                continue;
60            }
61            let is_uppercase = || s.chars().any(|c| c.is_uppercase());
62            let next_is_uppercase = || pk.is_none_or(|(_, t)| t.chars().any(|c| c.is_uppercase()));
63            let next_is_underscore = || pk.is_none_or(|(_, t)| t.contains('_'));
64            let next_is_colon = || pk.is_none_or(|(_, t)| t.contains(':'));
65            // Check for CamelCase.
66            //
67            // `i - last > 3` avoids turning FmRadio into Fm<wbr>Radio, which is technically
68            // correct, but needlessly bloated.
69            //
70            // is_uppercase && !next_is_uppercase checks for camelCase. HTTPSProxy,
71            // for example, should become HTTPS<wbr>Proxy.
72            //
73            // !next_is_underscore avoids turning TEST_RUN into TEST<wbr>_<wbr>RUN, which is also
74            // needlessly bloated.
75            if i - last > 3 && is_uppercase() && !next_is_uppercase() && !next_is_underscore() {
76                EscapeBodyText(&text[last..i]).fmt(fmt)?;
77                fmt.write_str("<wbr>")?;
78                last = i;
79            } else if (s.contains(':') && !next_is_colon())
80                || (s.contains('_') && !next_is_underscore())
81            {
82                EscapeBodyText(&text[last..i + 1]).fmt(fmt)?;
83                fmt.write_str("<wbr>")?;
84                last = i + 1;
85            }
86        }
87        if last < text.len() {
88            EscapeBodyText(&text[last..]).fmt(fmt)?;
89        }
90        Ok(())
91    }
92}
93
94#[cfg(test)]
95mod tests;