Skip to main content

cargo/diagnostics/rules/
text_direction_codepoint_in_comment.rs

1use std::path::Path;
2
3use cargo_util_schemas::manifest::TomlToolLints;
4use cargo_util_terminal::report::AnnotationKind;
5use cargo_util_terminal::report::Group;
6use cargo_util_terminal::report::Level;
7use cargo_util_terminal::report::Snippet;
8use toml_parser::Source;
9use toml_parser::Span;
10use toml_parser::decoder::Encoding;
11use toml_parser::parser::Event;
12use toml_parser::parser::EventKind;
13use toml_parser::parser::EventReceiver;
14use tracing::instrument;
15
16use super::CORRECTNESS;
17use crate::CargoResult;
18use crate::GlobalContext;
19use crate::core::MaybePackage;
20use crate::diagnostics::DiagnosticStats;
21use crate::diagnostics::Lint;
22use crate::diagnostics::LintLevel;
23use crate::diagnostics::ManifestFor;
24use crate::diagnostics::rel_cwd_manifest_path;
25
26pub static LINT: &Lint = &Lint {
27    name: "text_direction_codepoint_in_comment",
28    desc: "unicode codepoint changing visible direction of text present in comment",
29    primary_group: &CORRECTNESS,
30    msrv: Some(super::CARGO_LINTS_MSRV),
31    feature_gate: None,
32    docs: Some(
33        r#"
34### What it does
35Detects Unicode codepoints in manifest comments that change the visual representation of text on screen
36in a way that does not correspond to their on memory representation.
37
38### Why it is bad
39Unicode allows changing the visual flow of text on screen
40in order to support scripts that are written right-to-left,
41but a specially crafted comment can make code that will be compiled appear to be part of a comment,
42depending on the software used to read the code.
43To avoid potential problems or confusion,
44such as in CVE-2021-42574,
45by default we deny their use.
46"#,
47    ),
48};
49
50#[instrument(skip_all)]
51pub fn text_direction_codepoint_in_comment(
52    manifest: ManifestFor<'_>,
53    manifest_path: &Path,
54    cargo_lints: &TomlToolLints,
55    stats: &mut DiagnosticStats,
56    gctx: &GlobalContext,
57) -> CargoResult<()> {
58    let (lint_level, source) = manifest.lint_level(cargo_lints, LINT);
59    if lint_level == LintLevel::Allow {
60        return Ok(());
61    }
62
63    if matches!(
64        &manifest,
65        ManifestFor::Workspace {
66            maybe_pkg: MaybePackage::Package { .. },
67            ..
68        }
69    ) {
70        // For real manifests, lint as a package, rather than a workspace
71        return Ok(());
72    }
73
74    let Some(contents) = manifest.contents() else {
75        return Ok(());
76    };
77
78    let bidi_spans = contents
79        .char_indices()
80        .filter(|(_i, c)| {
81            UNICODE_BIDI_CODEPOINTS
82                .iter()
83                .any(|(bidi, _name)| c == bidi)
84        })
85        .map(|(i, c)| (i, i + c.len_utf8()))
86        .collect::<Vec<_>>();
87    if bidi_spans.is_empty() {
88        return Ok(());
89    }
90
91    let events = bidi_events(contents, &bidi_spans);
92    let manifest_path = rel_cwd_manifest_path(manifest_path, gctx);
93    let mut emitted_source = None;
94    for event in events {
95        let token_span = event.token.span();
96        let token_span = token_span.start()..token_span.end();
97        let mut snippet = Snippet::source(contents).path(&manifest_path).annotation(
98            AnnotationKind::Context
99                .span(token_span)
100                .label("this comment contains an invisible unicode text flow control codepoint"),
101        );
102        for bidi_span in event.bidi_spans {
103            let bidi_span = bidi_span.0..bidi_span.1;
104            let escaped = format!("{:?}", &contents[bidi_span.clone()]);
105            snippet = snippet.annotation(AnnotationKind::Primary.span(bidi_span).label(escaped));
106        }
107
108        let level = lint_level.to_diagnostic_level();
109        let mut primary = Group::with_title(level.primary_title(LINT.desc)).element(snippet);
110        if emitted_source.is_none() {
111            emitted_source = Some(LINT.emitted_source(lint_level, source));
112            primary = primary.element(Level::NOTE.message(emitted_source.as_ref().unwrap()));
113        }
114
115        let report = [primary];
116
117        stats.record_lint(lint_level);
118        gctx.shell().print_report(&report, lint_level.force())?;
119    }
120
121    Ok(())
122}
123
124const UNICODE_BIDI_CODEPOINTS: &[(char, &str)] = &[
125    ('\u{202A}', "LEFT-TO-RIGHT EMBEDDING"),
126    ('\u{202B}', "RIGHT-TO-LEFT EMBEDDING"),
127    ('\u{202C}', "POP DIRECTIONAL FORMATTING"),
128    ('\u{202D}', "LEFT-TO-RIGHT OVERRIDE"),
129    ('\u{202E}', "RIGHT-TO-LEFT OVERRIDE"),
130    ('\u{2066}', "LEFT-TO-RIGHT ISOLATE"),
131    ('\u{2067}', "RIGHT-TO-LEFT ISOLATE"),
132    ('\u{2068}', "FIRST STRONG ISOLATE"),
133    ('\u{2069}', "POP DIRECTIONAL ISOLATE"),
134];
135
136struct BiDiEvent {
137    token: Event,
138    bidi_spans: Vec<(usize, usize)>,
139}
140
141fn bidi_events(contents: &str, bidi_spans: &[(usize, usize)]) -> Vec<BiDiEvent> {
142    let mut bidi_spans = bidi_spans.iter();
143    let bidi_span = bidi_spans.next().copied();
144
145    let source = Source::new(contents);
146    let tokens = source.lex().into_vec();
147    let mut collector = BiDiCollector {
148        bidi_span,
149        bidi_spans,
150        events: Vec::new(),
151    };
152    let mut errors = ();
153    toml_parser::parser::parse_document(&tokens, &mut collector, &mut errors);
154
155    collector.events
156}
157
158struct BiDiCollector<'b> {
159    bidi_span: Option<(usize, usize)>,
160    bidi_spans: std::slice::Iter<'b, (usize, usize)>,
161    events: Vec<BiDiEvent>,
162}
163
164impl BiDiCollector<'_> {
165    fn process(&mut self, kind: EventKind, encoding: Option<Encoding>, span: Span) {
166        let mut event_bidi_spans = Vec::new();
167        while let Some(bidi_span) = self.bidi_span {
168            if bidi_span.0 < span.start() {
169                self.bidi_span = self.bidi_spans.next().copied();
170                continue;
171            } else if span.end() <= bidi_span.0 {
172                break;
173            }
174
175            event_bidi_spans.push(bidi_span);
176            self.bidi_span = self.bidi_spans.next().copied();
177        }
178
179        if !event_bidi_spans.is_empty() {
180            let token = Event::new_unchecked(kind, encoding, span);
181            self.events.push(BiDiEvent {
182                token,
183                bidi_spans: event_bidi_spans,
184            });
185        }
186    }
187}
188
189impl EventReceiver for BiDiCollector<'_> {
190    fn comment(&mut self, span: Span, _error: &mut dyn toml_parser::ErrorSink) {
191        self.process(EventKind::Comment, None, span)
192    }
193}