rustdoc/html/highlight.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057
//! Basic syntax highlighting functionality.
//!
//! This module uses librustc_ast's lexer to provide token-based highlighting for
//! the HTML documentation generated by rustdoc.
//!
//! Use the `render_with_highlighting` to highlight some rust code.
use std::collections::VecDeque;
use std::fmt::{Display, Write};
use rustc_data_structures::fx::FxIndexMap;
use rustc_lexer::{Cursor, LiteralKind, TokenKind};
use rustc_span::edition::Edition;
use rustc_span::symbol::Symbol;
use rustc_span::{BytePos, DUMMY_SP, Span};
use super::format::{self, Buffer};
use crate::clean::PrimitiveType;
use crate::html::escape::EscapeBodyText;
use crate::html::render::{Context, LinkFromSrc};
/// This type is needed in case we want to render links on items to allow to go to their definition.
pub(crate) struct HrefContext<'a, 'tcx> {
pub(crate) context: &'a Context<'tcx>,
/// This span contains the current file we're going through.
pub(crate) file_span: Span,
/// This field is used to know "how far" from the top of the directory we are to link to either
/// documentation pages or other source pages.
pub(crate) root_path: &'a str,
/// This field is used to calculate precise local URLs.
pub(crate) current_href: String,
}
/// Decorations are represented as a map from CSS class to vector of character ranges.
/// Each range will be wrapped in a span with that class.
#[derive(Default)]
pub(crate) struct DecorationInfo(pub(crate) FxIndexMap<&'static str, Vec<(u32, u32)>>);
#[derive(Eq, PartialEq, Clone, Copy)]
pub(crate) enum Tooltip {
Ignore,
CompileFail,
ShouldPanic,
Edition(Edition),
None,
}
/// Highlights `src` as an inline example, returning the HTML output.
pub(crate) fn render_example_with_highlighting(
src: &str,
out: &mut Buffer,
tooltip: Tooltip,
playground_button: Option<&str>,
extra_classes: &[String],
) {
write_header(out, "rust-example-rendered", None, tooltip, extra_classes);
write_code(out, src, None, None);
write_footer(out, playground_button);
}
fn write_header(
out: &mut Buffer,
class: &str,
extra_content: Option<Buffer>,
tooltip: Tooltip,
extra_classes: &[String],
) {
write!(out, "<div class=\"example-wrap{}\">", match tooltip {
Tooltip::Ignore => " ignore",
Tooltip::CompileFail => " compile_fail",
Tooltip::ShouldPanic => " should_panic",
Tooltip::Edition(_) => " edition",
Tooltip::None => "",
},);
if tooltip != Tooltip::None {
let edition_code;
write!(out, "<a href=\"#\" class=\"tooltip\" title=\"{}\">ⓘ</a>", match tooltip {
Tooltip::Ignore => "This example is not tested",
Tooltip::CompileFail => "This example deliberately fails to compile",
Tooltip::ShouldPanic => "This example panics",
Tooltip::Edition(edition) => {
edition_code = format!("This example runs with edition {edition}");
&edition_code
}
Tooltip::None => unreachable!(),
},);
}
if let Some(extra) = extra_content {
out.push_buffer(extra);
}
if class.is_empty() {
write!(
out,
"<pre class=\"rust{}{}\">",
if extra_classes.is_empty() { "" } else { " " },
extra_classes.join(" "),
);
} else {
write!(
out,
"<pre class=\"rust {class}{}{}\">",
if extra_classes.is_empty() { "" } else { " " },
extra_classes.join(" "),
);
}
write!(out, "<code>");
}
/// Check if two `Class` can be merged together. In the following rules, "unclassified" means `None`
/// basically (since it's `Option<Class>`). The following rules apply:
///
/// * If two `Class` have the same variant, then they can be merged.
/// * If the other `Class` is unclassified and only contains white characters (backline,
/// whitespace, etc), it can be merged.
/// * `Class::Ident` is considered the same as unclassified (because it doesn't have an associated
/// CSS class).
fn can_merge(class1: Option<Class>, class2: Option<Class>, text: &str) -> bool {
match (class1, class2) {
(Some(c1), Some(c2)) => c1.is_equal_to(c2),
(Some(Class::Ident(_)), None) | (None, Some(Class::Ident(_))) => true,
(Some(Class::Macro(_)), _) => false,
(Some(_), None) | (None, Some(_)) => text.trim().is_empty(),
(None, None) => true,
}
}
/// This type is used as a conveniency to prevent having to pass all its fields as arguments into
/// the various functions (which became its methods).
struct TokenHandler<'a, 'tcx, F: Write> {
out: &'a mut F,
/// It contains the closing tag and the associated `Class`.
closing_tags: Vec<(&'static str, Class)>,
/// This is used because we don't automatically generate the closing tag on `ExitSpan` in
/// case an `EnterSpan` event with the same class follows.
pending_exit_span: Option<Class>,
/// `current_class` and `pending_elems` are used to group HTML elements with same `class`
/// attributes to reduce the DOM size.
current_class: Option<Class>,
/// We need to keep the `Class` for each element because it could contain a `Span` which is
/// used to generate links.
pending_elems: Vec<(&'a str, Option<Class>)>,
href_context: Option<HrefContext<'a, 'tcx>>,
}
impl<F: Write> TokenHandler<'_, '_, F> {
fn handle_exit_span(&mut self) {
// We can't get the last `closing_tags` element using `pop()` because `closing_tags` is
// being used in `write_pending_elems`.
let class = self.closing_tags.last().expect("ExitSpan without EnterSpan").1;
// We flush everything just in case...
self.write_pending_elems(Some(class));
exit_span(self.out, self.closing_tags.pop().expect("ExitSpan without EnterSpan").0);
self.pending_exit_span = None;
}
/// Write all the pending elements sharing a same (or at mergeable) `Class`.
///
/// If there is a "parent" (if a `EnterSpan` event was encountered) and the parent can be merged
/// with the elements' class, then we simply write the elements since the `ExitSpan` event will
/// close the tag.
///
/// Otherwise, if there is only one pending element, we let the `string` function handle both
/// opening and closing the tag, otherwise we do it into this function.
///
/// It returns `true` if `current_class` must be set to `None` afterwards.
fn write_pending_elems(&mut self, current_class: Option<Class>) -> bool {
if self.pending_elems.is_empty() {
return false;
}
if let Some((_, parent_class)) = self.closing_tags.last()
&& can_merge(current_class, Some(*parent_class), "")
{
for (text, class) in self.pending_elems.iter() {
string(self.out, EscapeBodyText(text), *class, &self.href_context, false);
}
} else {
// We only want to "open" the tag ourselves if we have more than one pending and if the
// current parent tag is not the same as our pending content.
let close_tag = if self.pending_elems.len() > 1
&& let Some(current_class) = current_class
// `PreludeTy` can never include more than an ident so it should not generate
// a wrapping `span`.
&& !matches!(current_class, Class::PreludeTy(_))
{
Some(enter_span(self.out, current_class, &self.href_context))
} else {
None
};
for (text, class) in self.pending_elems.iter() {
string(
self.out,
EscapeBodyText(text),
*class,
&self.href_context,
close_tag.is_none(),
);
}
if let Some(close_tag) = close_tag {
exit_span(self.out, close_tag);
}
}
self.pending_elems.clear();
true
}
}
impl<F: Write> Drop for TokenHandler<'_, '_, F> {
/// When leaving, we need to flush all pending data to not have missing content.
fn drop(&mut self) {
if self.pending_exit_span.is_some() {
self.handle_exit_span();
} else {
self.write_pending_elems(self.current_class);
}
}
}
/// Convert the given `src` source code into HTML by adding classes for highlighting.
///
/// This code is used to render code blocks (in the documentation) as well as the source code pages.
///
/// Some explanations on the last arguments:
///
/// In case we are rendering a code block and not a source code file, `href_context` will be `None`.
/// To put it more simply: if `href_context` is `None`, the code won't try to generate links to an
/// item definition.
///
/// More explanations about spans and how we use them here are provided in the
pub(super) fn write_code(
out: &mut impl Write,
src: &str,
href_context: Option<HrefContext<'_, '_>>,
decoration_info: Option<DecorationInfo>,
) {
// This replace allows to fix how the code source with DOS backline characters is displayed.
let src = src.replace("\r\n", "\n");
let mut token_handler = TokenHandler {
out,
closing_tags: Vec::new(),
pending_exit_span: None,
current_class: None,
pending_elems: Vec::new(),
href_context,
};
Classifier::new(
&src,
token_handler.href_context.as_ref().map(|c| c.file_span).unwrap_or(DUMMY_SP),
decoration_info,
)
.highlight(&mut |highlight| {
match highlight {
Highlight::Token { text, class } => {
// If we received a `ExitSpan` event and then have a non-compatible `Class`, we
// need to close the `<span>`.
let need_current_class_update = if let Some(pending) =
token_handler.pending_exit_span
&& !can_merge(Some(pending), class, text)
{
token_handler.handle_exit_span();
true
// If the two `Class` are different, time to flush the current content and start
// a new one.
} else if !can_merge(token_handler.current_class, class, text) {
token_handler.write_pending_elems(token_handler.current_class);
true
} else {
token_handler.current_class.is_none()
};
if need_current_class_update {
token_handler.current_class = class.map(Class::dummy);
}
token_handler.pending_elems.push((text, class));
}
Highlight::EnterSpan { class } => {
let mut should_add = true;
if let Some(pending_exit_span) = token_handler.pending_exit_span {
if class.is_equal_to(pending_exit_span) {
should_add = false;
} else {
token_handler.handle_exit_span();
}
} else {
// We flush everything just in case...
if token_handler.write_pending_elems(token_handler.current_class) {
token_handler.current_class = None;
}
}
if should_add {
let closing_tag =
enter_span(token_handler.out, class, &token_handler.href_context);
token_handler.closing_tags.push((closing_tag, class));
}
token_handler.current_class = None;
token_handler.pending_exit_span = None;
}
Highlight::ExitSpan => {
token_handler.current_class = None;
token_handler.pending_exit_span = Some(
token_handler
.closing_tags
.last()
.as_ref()
.expect("ExitSpan without EnterSpan")
.1,
);
}
};
});
}
fn write_footer(out: &mut Buffer, playground_button: Option<&str>) {
writeln!(out, "</code></pre>{}</div>", playground_button.unwrap_or_default());
}
/// How a span of text is classified. Mostly corresponds to token kinds.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum Class {
Comment,
DocComment,
Attribute,
KeyWord,
/// Keywords that do pointer/reference stuff.
RefKeyWord,
Self_(Span),
Macro(Span),
MacroNonTerminal,
String,
Number,
Bool,
/// `Ident` isn't rendered in the HTML but we still need it for the `Span` it contains.
Ident(Span),
Lifetime,
PreludeTy(Span),
PreludeVal,
QuestionMark,
Decoration(&'static str),
}
impl Class {
/// It is only looking at the variant, not the variant content.
///
/// It is used mostly to group multiple similar HTML elements into one `<span>` instead of
/// multiple ones.
fn is_equal_to(self, other: Self) -> bool {
match (self, other) {
(Self::Self_(_), Self::Self_(_))
| (Self::Macro(_), Self::Macro(_))
| (Self::Ident(_), Self::Ident(_)) => true,
(Self::Decoration(c1), Self::Decoration(c2)) => c1 == c2,
(x, y) => x == y,
}
}
/// If `self` contains a `Span`, it'll be replaced with `DUMMY_SP` to prevent creating links
/// on "empty content" (because of the attributes merge).
fn dummy(self) -> Self {
match self {
Self::Self_(_) => Self::Self_(DUMMY_SP),
Self::Macro(_) => Self::Macro(DUMMY_SP),
Self::Ident(_) => Self::Ident(DUMMY_SP),
s => s,
}
}
/// Returns the css class expected by rustdoc for each `Class`.
fn as_html(self) -> &'static str {
match self {
Class::Comment => "comment",
Class::DocComment => "doccomment",
Class::Attribute => "attr",
Class::KeyWord => "kw",
Class::RefKeyWord => "kw-2",
Class::Self_(_) => "self",
Class::Macro(_) => "macro",
Class::MacroNonTerminal => "macro-nonterminal",
Class::String => "string",
Class::Number => "number",
Class::Bool => "bool-val",
Class::Ident(_) => "",
Class::Lifetime => "lifetime",
Class::PreludeTy(_) => "prelude-ty",
Class::PreludeVal => "prelude-val",
Class::QuestionMark => "question-mark",
Class::Decoration(kind) => kind,
}
}
/// In case this is an item which can be converted into a link to a definition, it'll contain
/// a "span" (a tuple representing `(lo, hi)` equivalent of `Span`).
fn get_span(self) -> Option<Span> {
match self {
Self::Ident(sp) | Self::Self_(sp) | Self::Macro(sp) | Self::PreludeTy(sp) => Some(sp),
Self::Comment
| Self::DocComment
| Self::Attribute
| Self::KeyWord
| Self::RefKeyWord
| Self::MacroNonTerminal
| Self::String
| Self::Number
| Self::Bool
| Self::Lifetime
| Self::PreludeVal
| Self::QuestionMark
| Self::Decoration(_) => None,
}
}
}
#[derive(Debug)]
enum Highlight<'a> {
Token { text: &'a str, class: Option<Class> },
EnterSpan { class: Class },
ExitSpan,
}
struct TokenIter<'a> {
src: &'a str,
cursor: Cursor<'a>,
}
impl<'a> Iterator for TokenIter<'a> {
type Item = (TokenKind, &'a str);
fn next(&mut self) -> Option<(TokenKind, &'a str)> {
let token = self.cursor.advance_token();
if token.kind == TokenKind::Eof {
return None;
}
let (text, rest) = self.src.split_at(token.len as usize);
self.src = rest;
Some((token.kind, text))
}
}
/// Classifies into identifier class; returns `None` if this is a non-keyword identifier.
fn get_real_ident_class(text: &str, allow_path_keywords: bool) -> Option<Class> {
let ignore: &[&str] =
if allow_path_keywords { &["self", "Self", "super", "crate"] } else { &["self", "Self"] };
if ignore.iter().any(|k| *k == text) {
return None;
}
Some(match text {
"ref" | "mut" => Class::RefKeyWord,
"false" | "true" => Class::Bool,
_ if Symbol::intern(text).is_reserved(|| Edition::Edition2021) => Class::KeyWord,
_ => return None,
})
}
/// This iterator comes from the same idea than "Peekable" except that it allows to "peek" more than
/// just the next item by using `peek_next`. The `peek` method always returns the next item after
/// the current one whereas `peek_next` will return the next item after the last one peeked.
///
/// You can use both `peek` and `peek_next` at the same time without problem.
struct PeekIter<'a> {
stored: VecDeque<(TokenKind, &'a str)>,
/// This position is reinitialized when using `next`. It is used in `peek_next`.
peek_pos: usize,
iter: TokenIter<'a>,
}
impl<'a> PeekIter<'a> {
fn new(iter: TokenIter<'a>) -> Self {
Self { stored: VecDeque::new(), peek_pos: 0, iter }
}
/// Returns the next item after the current one. It doesn't interfere with `peek_next` output.
fn peek(&mut self) -> Option<&(TokenKind, &'a str)> {
if self.stored.is_empty()
&& let Some(next) = self.iter.next()
{
self.stored.push_back(next);
}
self.stored.front()
}
/// Returns the next item after the last one peeked. It doesn't interfere with `peek` output.
fn peek_next(&mut self) -> Option<&(TokenKind, &'a str)> {
self.peek_pos += 1;
if self.peek_pos - 1 < self.stored.len() {
self.stored.get(self.peek_pos - 1)
} else if let Some(next) = self.iter.next() {
self.stored.push_back(next);
self.stored.back()
} else {
None
}
}
}
impl<'a> Iterator for PeekIter<'a> {
type Item = (TokenKind, &'a str);
fn next(&mut self) -> Option<Self::Item> {
self.peek_pos = 0;
if let Some(first) = self.stored.pop_front() { Some(first) } else { self.iter.next() }
}
}
/// Custom spans inserted into the source. Eg --scrape-examples uses this to highlight function calls
struct Decorations {
starts: Vec<(u32, &'static str)>,
ends: Vec<u32>,
}
impl Decorations {
fn new(info: DecorationInfo) -> Self {
// Extract tuples (start, end, kind) into separate sequences of (start, kind) and (end).
let (mut starts, mut ends): (Vec<_>, Vec<_>) = info
.0
.into_iter()
.flat_map(|(kind, ranges)| ranges.into_iter().map(move |(lo, hi)| ((lo, kind), hi)))
.unzip();
// Sort the sequences in document order.
starts.sort_by_key(|(lo, _)| *lo);
ends.sort();
Decorations { starts, ends }
}
}
/// Processes program tokens, classifying strings of text by highlighting
/// category (`Class`).
struct Classifier<'src> {
tokens: PeekIter<'src>,
in_attribute: bool,
in_macro: bool,
in_macro_nonterminal: bool,
byte_pos: u32,
file_span: Span,
src: &'src str,
decorations: Option<Decorations>,
}
impl<'src> Classifier<'src> {
/// Takes as argument the source code to HTML-ify, the rust edition to use and the source code
/// file span which will be used later on by the `span_correspondence_map`.
fn new(src: &str, file_span: Span, decoration_info: Option<DecorationInfo>) -> Classifier<'_> {
let tokens = PeekIter::new(TokenIter { src, cursor: Cursor::new(src) });
let decorations = decoration_info.map(Decorations::new);
Classifier {
tokens,
in_attribute: false,
in_macro: false,
in_macro_nonterminal: false,
byte_pos: 0,
file_span,
src,
decorations,
}
}
/// Convenient wrapper to create a [`Span`] from a position in the file.
fn new_span(&self, lo: u32, text: &str) -> Span {
let hi = lo + text.len() as u32;
let file_lo = self.file_span.lo();
self.file_span.with_lo(file_lo + BytePos(lo)).with_hi(file_lo + BytePos(hi))
}
/// Concatenate colons and idents as one when possible.
fn get_full_ident_path(&mut self) -> Vec<(TokenKind, usize, usize)> {
let start = self.byte_pos as usize;
let mut pos = start;
let mut has_ident = false;
loop {
let mut nb = 0;
while let Some((TokenKind::Colon, _)) = self.tokens.peek() {
self.tokens.next();
nb += 1;
}
// Ident path can start with "::" but if we already have content in the ident path,
// the "::" is mandatory.
if has_ident && nb == 0 {
return vec![(TokenKind::Ident, start, pos)];
} else if nb != 0 && nb != 2 {
if has_ident {
return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
} else {
return vec![(TokenKind::Colon, start, pos + nb)];
}
}
if let Some((None, text)) = self.tokens.peek().map(|(token, text)| {
if *token == TokenKind::Ident {
let class = get_real_ident_class(text, true);
(class, text)
} else {
// Doesn't matter which Class we put in here...
(Some(Class::Comment), text)
}
}) {
// We only "add" the colon if there is an ident behind.
pos += text.len() + nb;
has_ident = true;
self.tokens.next();
} else if nb > 0 && has_ident {
return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
} else if nb > 0 {
return vec![(TokenKind::Colon, start, start + nb)];
} else if has_ident {
return vec![(TokenKind::Ident, start, pos)];
} else {
return Vec::new();
}
}
}
/// Wraps the tokens iteration to ensure that the `byte_pos` is always correct.
///
/// It returns the token's kind, the token as a string and its byte position in the source
/// string.
fn next(&mut self) -> Option<(TokenKind, &'src str, u32)> {
if let Some((kind, text)) = self.tokens.next() {
let before = self.byte_pos;
self.byte_pos += text.len() as u32;
Some((kind, text, before))
} else {
None
}
}
/// Exhausts the `Classifier` writing the output into `sink`.
///
/// The general structure for this method is to iterate over each token,
/// possibly giving it an HTML span with a class specifying what flavor of
/// token is used.
fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'src>)) {
loop {
if let Some(decs) = self.decorations.as_mut() {
let byte_pos = self.byte_pos;
let n_starts = decs.starts.iter().filter(|(i, _)| byte_pos >= *i).count();
for (_, kind) in decs.starts.drain(0..n_starts) {
sink(Highlight::EnterSpan { class: Class::Decoration(kind) });
}
let n_ends = decs.ends.iter().filter(|i| byte_pos >= **i).count();
for _ in decs.ends.drain(0..n_ends) {
sink(Highlight::ExitSpan);
}
}
if self
.tokens
.peek()
.map(|t| matches!(t.0, TokenKind::Colon | TokenKind::Ident))
.unwrap_or(false)
{
let tokens = self.get_full_ident_path();
for (token, start, end) in &tokens {
let text = &self.src[*start..*end];
self.advance(*token, text, sink, *start as u32);
self.byte_pos += text.len() as u32;
}
if !tokens.is_empty() {
continue;
}
}
if let Some((token, text, before)) = self.next() {
self.advance(token, text, sink, before);
} else {
break;
}
}
}
/// Single step of highlighting. This will classify `token`, but maybe also a couple of
/// following ones as well.
///
/// `before` is the position of the given token in the `source` string and is used as "lo" byte
/// in case we want to try to generate a link for this token using the
/// `span_correspondence_map`.
fn advance(
&mut self,
token: TokenKind,
text: &'src str,
sink: &mut dyn FnMut(Highlight<'src>),
before: u32,
) {
let lookahead = self.peek();
let no_highlight = |sink: &mut dyn FnMut(_)| sink(Highlight::Token { text, class: None });
let class = match token {
TokenKind::Whitespace => return no_highlight(sink),
TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
if doc_style.is_some() {
Class::DocComment
} else {
Class::Comment
}
}
// Consider this as part of a macro invocation if there was a
// leading identifier.
TokenKind::Bang if self.in_macro => {
self.in_macro = false;
sink(Highlight::Token { text, class: None });
sink(Highlight::ExitSpan);
return;
}
// Assume that '&' or '*' is the reference or dereference operator
// or a reference or pointer type. Unless, of course, it looks like
// a logical and or a multiplication operator: `&&` or `* `.
TokenKind::Star => match self.tokens.peek() {
Some((TokenKind::Whitespace, _)) => return no_highlight(sink),
Some((TokenKind::Ident, "mut")) => {
self.next();
sink(Highlight::Token { text: "*mut", class: Some(Class::RefKeyWord) });
return;
}
Some((TokenKind::Ident, "const")) => {
self.next();
sink(Highlight::Token { text: "*const", class: Some(Class::RefKeyWord) });
return;
}
_ => Class::RefKeyWord,
},
TokenKind::And => match self.tokens.peek() {
Some((TokenKind::And, _)) => {
self.next();
sink(Highlight::Token { text: "&&", class: None });
return;
}
Some((TokenKind::Eq, _)) => {
self.next();
sink(Highlight::Token { text: "&=", class: None });
return;
}
Some((TokenKind::Whitespace, _)) => return no_highlight(sink),
Some((TokenKind::Ident, "mut")) => {
self.next();
sink(Highlight::Token { text: "&mut", class: Some(Class::RefKeyWord) });
return;
}
_ => Class::RefKeyWord,
},
// These can either be operators, or arrows.
TokenKind::Eq => match lookahead {
Some(TokenKind::Eq) => {
self.next();
sink(Highlight::Token { text: "==", class: None });
return;
}
Some(TokenKind::Gt) => {
self.next();
sink(Highlight::Token { text: "=>", class: None });
return;
}
_ => return no_highlight(sink),
},
TokenKind::Minus if lookahead == Some(TokenKind::Gt) => {
self.next();
sink(Highlight::Token { text: "->", class: None });
return;
}
// Other operators.
TokenKind::Minus
| TokenKind::Plus
| TokenKind::Or
| TokenKind::Slash
| TokenKind::Caret
| TokenKind::Percent
| TokenKind::Bang
| TokenKind::Lt
| TokenKind::Gt => return no_highlight(sink),
// Miscellaneous, no highlighting.
TokenKind::Dot
| TokenKind::Semi
| TokenKind::Comma
| TokenKind::OpenParen
| TokenKind::CloseParen
| TokenKind::OpenBrace
| TokenKind::CloseBrace
| TokenKind::OpenBracket
| TokenKind::At
| TokenKind::Tilde
| TokenKind::Colon
| TokenKind::Unknown => return no_highlight(sink),
TokenKind::Question => Class::QuestionMark,
TokenKind::Dollar => match lookahead {
Some(TokenKind::Ident) => {
self.in_macro_nonterminal = true;
Class::MacroNonTerminal
}
_ => return no_highlight(sink),
},
// This might be the start of an attribute. We're going to want to
// continue highlighting it as an attribute until the ending ']' is
// seen, so skip out early. Down below we terminate the attribute
// span when we see the ']'.
TokenKind::Pound => {
match lookahead {
// Case 1: #![inner_attribute]
Some(TokenKind::Bang) => {
self.next();
if let Some(TokenKind::OpenBracket) = self.peek() {
self.in_attribute = true;
sink(Highlight::EnterSpan { class: Class::Attribute });
}
sink(Highlight::Token { text: "#", class: None });
sink(Highlight::Token { text: "!", class: None });
return;
}
// Case 2: #[outer_attribute]
Some(TokenKind::OpenBracket) => {
self.in_attribute = true;
sink(Highlight::EnterSpan { class: Class::Attribute });
}
_ => (),
}
return no_highlight(sink);
}
TokenKind::CloseBracket => {
if self.in_attribute {
self.in_attribute = false;
sink(Highlight::Token { text: "]", class: None });
sink(Highlight::ExitSpan);
return;
}
return no_highlight(sink);
}
TokenKind::Literal { kind, .. } => match kind {
// Text literals.
LiteralKind::Byte { .. }
| LiteralKind::Char { .. }
| LiteralKind::Str { .. }
| LiteralKind::ByteStr { .. }
| LiteralKind::RawStr { .. }
| LiteralKind::RawByteStr { .. }
| LiteralKind::CStr { .. }
| LiteralKind::RawCStr { .. } => Class::String,
// Number literals.
LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
},
TokenKind::GuardedStrPrefix => return no_highlight(sink),
TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => {
self.in_macro = true;
sink(Highlight::EnterSpan { class: Class::Macro(self.new_span(before, text)) });
sink(Highlight::Token { text, class: None });
return;
}
TokenKind::Ident => match get_real_ident_class(text, false) {
None => match text {
"Option" | "Result" => Class::PreludeTy(self.new_span(before, text)),
"Some" | "None" | "Ok" | "Err" => Class::PreludeVal,
// "union" is a weak keyword and is only considered as a keyword when declaring
// a union type.
"union" if self.check_if_is_union_keyword() => Class::KeyWord,
_ if self.in_macro_nonterminal => {
self.in_macro_nonterminal = false;
Class::MacroNonTerminal
}
"self" | "Self" => Class::Self_(self.new_span(before, text)),
_ => Class::Ident(self.new_span(before, text)),
},
Some(c) => c,
},
TokenKind::RawIdent | TokenKind::UnknownPrefix | TokenKind::InvalidIdent => {
Class::Ident(self.new_span(before, text))
}
TokenKind::Lifetime { .. }
| TokenKind::RawLifetime
| TokenKind::UnknownPrefixLifetime => Class::Lifetime,
TokenKind::Eof => panic!("Eof in advance"),
};
// Anything that didn't return above is the simple case where we the
// class just spans a single token, so we can use the `string` method.
sink(Highlight::Token { text, class: Some(class) });
}
fn peek(&mut self) -> Option<TokenKind> {
self.tokens.peek().map(|(token_kind, _text)| *token_kind)
}
fn check_if_is_union_keyword(&mut self) -> bool {
while let Some(kind) = self.tokens.peek_next().map(|(token_kind, _text)| token_kind) {
if *kind == TokenKind::Whitespace {
continue;
}
return *kind == TokenKind::Ident;
}
false
}
}
/// Called when we start processing a span of text that should be highlighted.
/// The `Class` argument specifies how it should be highlighted.
fn enter_span(
out: &mut impl Write,
klass: Class,
href_context: &Option<HrefContext<'_, '_>>,
) -> &'static str {
string_without_closing_tag(out, "", Some(klass), href_context, true).expect(
"internal error: enter_span was called with Some(klass) but did not return a \
closing HTML tag",
)
}
/// Called at the end of a span of highlighted text.
fn exit_span(out: &mut impl Write, closing_tag: &str) {
out.write_str(closing_tag).unwrap();
}
/// Called for a span of text. If the text should be highlighted differently
/// from the surrounding text, then the `Class` argument will be a value other
/// than `None`.
///
/// The following sequences of callbacks are equivalent:
/// ```plain
/// enter_span(Foo), string("text", None), exit_span()
/// string("text", Foo)
/// ```
///
/// The latter can be thought of as a shorthand for the former, which is more
/// flexible.
///
/// Note that if `context` is not `None` and that the given `klass` contains a `Span`, the function
/// will then try to find this `span` in the `span_correspondence_map`. If found, it'll then
/// generate a link for this element (which corresponds to where its definition is located).
fn string<T: Display>(
out: &mut impl Write,
text: T,
klass: Option<Class>,
href_context: &Option<HrefContext<'_, '_>>,
open_tag: bool,
) {
if let Some(closing_tag) = string_without_closing_tag(out, text, klass, href_context, open_tag)
{
out.write_str(closing_tag).unwrap();
}
}
/// This function writes `text` into `out` with some modifications depending on `klass`:
///
/// * If `klass` is `None`, `text` is written into `out` with no modification.
/// * If `klass` is `Some` but `klass.get_span()` is `None`, it writes the text wrapped in a
/// `<span>` with the provided `klass`.
/// * If `klass` is `Some` and has a [`rustc_span::Span`], it then tries to generate a link (`<a>`
/// element) by retrieving the link information from the `span_correspondence_map` that was filled
/// in `span_map.rs::collect_spans_and_sources`. If it cannot retrieve the information, then it's
/// the same as the second point (`klass` is `Some` but doesn't have a [`rustc_span::Span`]).
fn string_without_closing_tag<T: Display>(
out: &mut impl Write,
text: T,
klass: Option<Class>,
href_context: &Option<HrefContext<'_, '_>>,
open_tag: bool,
) -> Option<&'static str> {
let Some(klass) = klass else {
write!(out, "{text}").unwrap();
return None;
};
let Some(def_span) = klass.get_span() else {
if !open_tag {
write!(out, "{text}").unwrap();
return None;
}
write!(out, "<span class=\"{klass}\">{text}", klass = klass.as_html()).unwrap();
return Some("</span>");
};
let mut text_s = text.to_string();
if text_s.contains("::") {
text_s = text_s.split("::").intersperse("::").fold(String::new(), |mut path, t| {
match t {
"self" | "Self" => write!(
&mut path,
"<span class=\"{klass}\">{t}</span>",
klass = Class::Self_(DUMMY_SP).as_html(),
),
"crate" | "super" => {
write!(
&mut path,
"<span class=\"{klass}\">{t}</span>",
klass = Class::KeyWord.as_html(),
)
}
t => write!(&mut path, "{t}"),
}
.expect("Failed to build source HTML path");
path
});
}
if let Some(href_context) = href_context {
if let Some(href) =
href_context.context.shared.span_correspondence_map.get(&def_span).and_then(|href| {
let context = href_context.context;
// FIXME: later on, it'd be nice to provide two links (if possible) for all items:
// one to the documentation page and one to the source definition.
// FIXME: currently, external items only generate a link to their documentation,
// a link to their definition can be generated using this:
// https://github.com/rust-lang/rust/blob/60f1a2fc4b535ead9c85ce085fdce49b1b097531/src/librustdoc/html/render/context.rs#L315-L338
match href {
LinkFromSrc::Local(span) => {
context.href_from_span_relative(*span, &href_context.current_href)
}
LinkFromSrc::External(def_id) => {
format::href_with_root_path(*def_id, context, Some(href_context.root_path))
.ok()
.map(|(url, _, _)| url)
}
LinkFromSrc::Primitive(prim) => format::href_with_root_path(
PrimitiveType::primitive_locations(context.tcx())[prim],
context,
Some(href_context.root_path),
)
.ok()
.map(|(url, _, _)| url),
LinkFromSrc::Doc(def_id) => {
format::href_with_root_path(*def_id, context, Some(href_context.root_path))
.ok()
.map(|(doc_link, _, _)| doc_link)
}
}
})
{
if !open_tag {
// We're already inside an element which has the same klass, no need to give it
// again.
write!(out, "<a href=\"{href}\">{text_s}").unwrap();
} else {
let klass_s = klass.as_html();
if klass_s.is_empty() {
write!(out, "<a href=\"{href}\">{text_s}").unwrap();
} else {
write!(out, "<a class=\"{klass_s}\" href=\"{href}\">{text_s}").unwrap();
}
}
return Some("</a>");
}
}
if !open_tag {
write!(out, "{}", text_s).unwrap();
return None;
}
let klass_s = klass.as_html();
if klass_s.is_empty() {
out.write_str(&text_s).unwrap();
Some("")
} else {
write!(out, "<span class=\"{klass_s}\">{text_s}").unwrap();
Some("</span>")
}
}
#[cfg(test)]
mod tests;