rustdoc/html/escape.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
//! HTML escaping.
//!
//! This module contains one unit struct, which can be used to HTML-escape a
//! string of text (for use in a format string).
use std::fmt;
use unicode_segmentation::UnicodeSegmentation;
/// Wrapper struct which will emit the HTML-escaped version of the contained
/// string when passed to a format string.
pub(crate) struct Escape<'a>(pub &'a str);
impl<'a> fmt::Display for Escape<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
// Because the internet is always right, turns out there's not that many
// characters to escape: http://stackoverflow.com/questions/7381974
let Escape(s) = *self;
let pile_o_bits = s;
let mut last = 0;
for (i, ch) in s.char_indices() {
let s = match ch {
'>' => ">",
'<' => "<",
'&' => "&",
'\'' => "'",
'"' => """,
_ => continue,
};
fmt.write_str(&pile_o_bits[last..i])?;
fmt.write_str(s)?;
// NOTE: we only expect single byte characters here - which is fine as long as we
// only match single byte characters
last = i + 1;
}
if last < s.len() {
fmt.write_str(&pile_o_bits[last..])?;
}
Ok(())
}
}
/// Wrapper struct which will emit the HTML-escaped version of the contained
/// string when passed to a format string.
///
/// This is only safe to use for text nodes. If you need your output to be
/// safely contained in an attribute, use [`Escape`]. If you don't know the
/// difference, use [`Escape`].
pub(crate) struct EscapeBodyText<'a>(pub &'a str);
impl<'a> fmt::Display for EscapeBodyText<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
// Because the internet is always right, turns out there's not that many
// characters to escape: http://stackoverflow.com/questions/7381974
let EscapeBodyText(s) = *self;
let pile_o_bits = s;
let mut last = 0;
for (i, ch) in s.char_indices() {
let s = match ch {
'>' => ">",
'<' => "<",
'&' => "&",
_ => continue,
};
fmt.write_str(&pile_o_bits[last..i])?;
fmt.write_str(s)?;
// NOTE: we only expect single byte characters here - which is fine as long as we
// only match single byte characters
last = i + 1;
}
if last < s.len() {
fmt.write_str(&pile_o_bits[last..])?;
}
Ok(())
}
}
/// Wrapper struct which will emit the HTML-escaped version of the contained
/// string when passed to a format string. This function also word-breaks
/// CamelCase and snake_case word names.
///
/// This is only safe to use for text nodes. If you need your output to be
/// safely contained in an attribute, use [`Escape`]. If you don't know the
/// difference, use [`Escape`].
pub(crate) struct EscapeBodyTextWithWbr<'a>(pub &'a str);
impl<'a> fmt::Display for EscapeBodyTextWithWbr<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
let EscapeBodyTextWithWbr(text) = *self;
if text.len() < 8 {
return EscapeBodyText(text).fmt(fmt);
}
let mut last = 0;
let mut it = text.grapheme_indices(true).peekable();
let _ = it.next(); // don't insert wbr before first char
while let Some((i, s)) = it.next() {
let pk = it.peek();
if s.chars().all(|c| c.is_whitespace()) {
// don't need "First <wbr>Second"; the space is enough
EscapeBodyText(&text[last..i]).fmt(fmt)?;
last = i;
continue;
}
let is_uppercase = || s.chars().any(|c| c.is_uppercase());
let next_is_uppercase =
|| pk.map_or(true, |(_, t)| t.chars().any(|c| c.is_uppercase()));
let next_is_underscore = || pk.map_or(true, |(_, t)| t.contains('_'));
let next_is_colon = || pk.map_or(true, |(_, t)| t.contains(':'));
// Check for CamelCase.
//
// `i - last > 3` avoids turning FmRadio into Fm<wbr>Radio, which is technically
// correct, but needlessly bloated.
//
// is_uppercase && !next_is_uppercase checks for camelCase. HTTPSProxy,
// for example, should become HTTPS<wbr>Proxy.
//
// !next_is_underscore avoids turning TEST_RUN into TEST<wbr>_<wbr>RUN, which is also
// needlessly bloated.
if i - last > 3 && is_uppercase() && !next_is_uppercase() && !next_is_underscore() {
EscapeBodyText(&text[last..i]).fmt(fmt)?;
fmt.write_str("<wbr>")?;
last = i;
} else if (s.contains(':') && !next_is_colon())
|| (s.contains('_') && !next_is_underscore())
{
EscapeBodyText(&text[last..i + 1]).fmt(fmt)?;
fmt.write_str("<wbr>")?;
last = i + 1;
}
}
if last < text.len() {
EscapeBodyText(&text[last..]).fmt(fmt)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests;