rustdoc/html/
escape.rs

1//! HTML escaping.
2//!
3//! This module contains one unit struct, which can be used to HTML-escape a
4//! string of text (for use in a format string).
5
6use std::fmt;
7
8use unicode_segmentation::UnicodeSegmentation;
9
10/// Wrapper struct which will emit the HTML-escaped version of the contained
11/// string when passed to a format string.
12pub(crate) struct Escape<'a>(pub &'a str);
13
14impl fmt::Display for Escape<'_> {
15    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
16        // Because the internet is always right, turns out there's not that many
17        // characters to escape: http://stackoverflow.com/questions/7381974
18        let Escape(s) = *self;
19        let pile_o_bits = s;
20        let mut last = 0;
21        for (i, ch) in s.char_indices() {
22            let s = match ch {
23                '>' => "&gt;",
24                '<' => "&lt;",
25                '&' => "&amp;",
26                '\'' => "&#39;",
27                '"' => "&quot;",
28                _ => continue,
29            };
30            fmt.write_str(&pile_o_bits[last..i])?;
31            fmt.write_str(s)?;
32            // NOTE: we only expect single byte characters here - which is fine as long as we
33            // only match single byte characters
34            last = i + 1;
35        }
36
37        if last < s.len() {
38            fmt.write_str(&pile_o_bits[last..])?;
39        }
40        Ok(())
41    }
42}
43
44/// Wrapper struct which will emit the HTML-escaped version of the contained
45/// string when passed to a format string.
46///
47/// This is only safe to use for text nodes. If you need your output to be
48/// safely contained in an attribute, use [`Escape`]. If you don't know the
49/// difference, use [`Escape`].
50pub(crate) struct EscapeBodyText<'a>(pub &'a str);
51
52impl fmt::Display for EscapeBodyText<'_> {
53    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
54        // Because the internet is always right, turns out there's not that many
55        // characters to escape: http://stackoverflow.com/questions/7381974
56        let EscapeBodyText(s) = *self;
57        let pile_o_bits = s;
58        let mut last = 0;
59        for (i, ch) in s.char_indices() {
60            let s = match ch {
61                '>' => "&gt;",
62                '<' => "&lt;",
63                '&' => "&amp;",
64                _ => continue,
65            };
66            fmt.write_str(&pile_o_bits[last..i])?;
67            fmt.write_str(s)?;
68            // NOTE: we only expect single byte characters here - which is fine as long as we
69            // only match single byte characters
70            last = i + 1;
71        }
72
73        if last < s.len() {
74            fmt.write_str(&pile_o_bits[last..])?;
75        }
76        Ok(())
77    }
78}
79
80/// Wrapper struct which will emit the HTML-escaped version of the contained
81/// string when passed to a format string. This function also word-breaks
82/// CamelCase and snake_case word names.
83///
84/// This is only safe to use for text nodes. If you need your output to be
85/// safely contained in an attribute, use [`Escape`]. If you don't know the
86/// difference, use [`Escape`].
87pub(crate) struct EscapeBodyTextWithWbr<'a>(pub &'a str);
88
89impl fmt::Display for EscapeBodyTextWithWbr<'_> {
90    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
91        let EscapeBodyTextWithWbr(text) = *self;
92        if text.len() < 8 {
93            return EscapeBodyText(text).fmt(fmt);
94        }
95        let mut last = 0;
96        let mut it = text.grapheme_indices(true).peekable();
97        let _ = it.next(); // don't insert wbr before first char
98        while let Some((i, s)) = it.next() {
99            let pk = it.peek();
100            if s.chars().all(|c| c.is_whitespace()) {
101                // don't need "First <wbr>Second"; the space is enough
102                EscapeBodyText(&text[last..i]).fmt(fmt)?;
103                last = i;
104                continue;
105            }
106            let is_uppercase = || s.chars().any(|c| c.is_uppercase());
107            let next_is_uppercase = || pk.is_none_or(|(_, t)| t.chars().any(|c| c.is_uppercase()));
108            let next_is_underscore = || pk.is_none_or(|(_, t)| t.contains('_'));
109            let next_is_colon = || pk.is_none_or(|(_, t)| t.contains(':'));
110            // Check for CamelCase.
111            //
112            // `i - last > 3` avoids turning FmRadio into Fm<wbr>Radio, which is technically
113            // correct, but needlessly bloated.
114            //
115            // is_uppercase && !next_is_uppercase checks for camelCase. HTTPSProxy,
116            // for example, should become HTTPS<wbr>Proxy.
117            //
118            // !next_is_underscore avoids turning TEST_RUN into TEST<wbr>_<wbr>RUN, which is also
119            // needlessly bloated.
120            if i - last > 3 && is_uppercase() && !next_is_uppercase() && !next_is_underscore() {
121                EscapeBodyText(&text[last..i]).fmt(fmt)?;
122                fmt.write_str("<wbr>")?;
123                last = i;
124            } else if (s.contains(':') && !next_is_colon())
125                || (s.contains('_') && !next_is_underscore())
126            {
127                EscapeBodyText(&text[last..i + 1]).fmt(fmt)?;
128                fmt.write_str("<wbr>")?;
129                last = i + 1;
130            }
131        }
132        if last < text.len() {
133            EscapeBodyText(&text[last..]).fmt(fmt)?;
134        }
135        Ok(())
136    }
137}
138
139#[cfg(test)]
140mod tests;