Skip to main content

cargo/diagnostics/rules/
text_direction_codepoint_in_literal.rs

1use std::path::Path;
2
3use cargo_util_schemas::manifest::TomlToolLints;
4use cargo_util_terminal::report::AnnotationKind;
5use cargo_util_terminal::report::Group;
6use cargo_util_terminal::report::Level;
7use cargo_util_terminal::report::Patch;
8use cargo_util_terminal::report::Snippet;
9use toml_parser::Source;
10use toml_parser::Span;
11use toml_parser::decoder::Encoding;
12use toml_parser::parser::Event;
13use toml_parser::parser::EventKind;
14use toml_parser::parser::EventReceiver;
15use tracing::instrument;
16
17use super::CORRECTNESS;
18use crate::CargoResult;
19use crate::GlobalContext;
20use crate::core::MaybePackage;
21use crate::diagnostics::DiagnosticStats;
22use crate::diagnostics::Lint;
23use crate::diagnostics::LintLevel;
24use crate::diagnostics::ManifestFor;
25use crate::diagnostics::rel_cwd_manifest_path;
26
27pub static LINT: &Lint = &Lint {
28    name: "text_direction_codepoint_in_literal",
29    desc: "unicode codepoint changing visible direction of text present in literal",
30    primary_group: &CORRECTNESS,
31    msrv: Some(super::CARGO_LINTS_MSRV),
32    feature_gate: None,
33    docs: Some(
34        r#"
35### What it does
36Detects Unicode codepoints in literals in manifests that change the visual representation of text on screen
37in a way that does not correspond to their on memory representation.
38
39### Why it is bad
40Unicode allows changing the visual flow of text on screen
41in order to support scripts that are written right-to-left,
42but a specially crafted literal can make code that will be compiled appear to be part of a literal,
43depending on the software used to read the code.
44To avoid potential problems or confusion,
45such as in CVE-2021-42574,
46by default we deny their use.
47"#,
48    ),
49};
50
51#[instrument(skip_all)]
52pub fn text_direction_codepoint_in_literal(
53    manifest: ManifestFor<'_>,
54    manifest_path: &Path,
55    cargo_lints: &TomlToolLints,
56    stats: &mut DiagnosticStats,
57    gctx: &GlobalContext,
58) -> CargoResult<()> {
59    let (lint_level, source) = manifest.lint_level(cargo_lints, LINT);
60    if lint_level == LintLevel::Allow {
61        return Ok(());
62    }
63
64    if matches!(
65        &manifest,
66        ManifestFor::Workspace {
67            maybe_pkg: MaybePackage::Package { .. },
68            ..
69        }
70    ) {
71        // For real manifests, lint as a package, rather than a workspace
72        return Ok(());
73    }
74
75    let Some(contents) = manifest.contents() else {
76        return Ok(());
77    };
78
79    let bidi_spans = contents
80        .char_indices()
81        .filter(|(_i, c)| {
82            UNICODE_BIDI_CODEPOINTS
83                .iter()
84                .any(|(bidi, _, _name)| c == bidi)
85        })
86        .map(|(i, c)| (i, i + c.len_utf8()))
87        .collect::<Vec<_>>();
88    if bidi_spans.is_empty() {
89        return Ok(());
90    }
91
92    let toml_source = Source::new(contents);
93    let events = bidi_events(&toml_source, &bidi_spans);
94    let manifest_path = rel_cwd_manifest_path(manifest_path, gctx);
95    let mut emitted_source = None;
96    for event in events {
97        let token_span = event.token.span();
98        let token_span = token_span.start()..token_span.end();
99        let mut snippet = Snippet::source(contents).path(&manifest_path).annotation(
100            AnnotationKind::Context
101                .span(token_span.clone())
102                .label("this literal contains an invisible unicode text flow control codepoint"),
103        );
104        for bidi_span in event.bidi_spans {
105            let bidi_span = bidi_span.0..bidi_span.1;
106            let escaped = format!("{:?}", &contents[bidi_span.clone()]);
107            snippet = snippet.annotation(AnnotationKind::Primary.span(bidi_span).label(escaped));
108        }
109        let mut help_snippet = Snippet::source(contents).path(&manifest_path);
110        if let Some(original_raw) = toml_source.get(&event.token) {
111            let mut decoded = String::new();
112            let replacement = match event.token.kind() {
113                toml_parser::parser::EventKind::SimpleKey => {
114                    use toml_writer::ToTomlKey as _;
115                    original_raw.decode_key(&mut decoded, &mut ());
116                    let builder = toml_writer::TomlKeyBuilder::new(&decoded);
117                    let replacement = builder.as_basic();
118                    Some(replacement.to_toml_key())
119                }
120                toml_parser::parser::EventKind::Scalar => {
121                    use toml_writer::ToTomlValue as _;
122                    let kind = original_raw.decode_scalar(&mut decoded, &mut ());
123                    if matches!(kind, toml_parser::decoder::ScalarKind::String) {
124                        let builder = toml_writer::TomlStringBuilder::new(&decoded);
125                        let replacement = match event.token.encoding() {
126                            Some(toml_parser::decoder::Encoding::BasicString)
127                            | Some(toml_parser::decoder::Encoding::LiteralString)
128                            | None => builder.as_basic(),
129                            Some(toml_parser::decoder::Encoding::MlBasicString)
130                            | Some(toml_parser::decoder::Encoding::MlLiteralString) => {
131                                builder.as_ml_basic()
132                            }
133                        };
134                        Some(replacement.to_toml_value())
135                    } else {
136                        None
137                    }
138                }
139                _ => None,
140            };
141            if let Some(mut replacement) = replacement {
142                for (bidi, escaped, _) in UNICODE_BIDI_CODEPOINTS {
143                    replacement = replacement.replace(*bidi, escaped);
144                }
145                help_snippet = help_snippet.patch(Patch::new(token_span.clone(), replacement));
146            }
147        }
148
149        let level = lint_level.to_diagnostic_level();
150        let mut primary = Group::with_title(level.primary_title(LINT.desc)).element(snippet);
151        if emitted_source.is_none() {
152            emitted_source = Some(LINT.emitted_source(lint_level, source));
153            primary = primary.element(Level::NOTE.message(emitted_source.as_ref().unwrap()));
154        }
155
156        let help = Group::with_title(Level::HELP.secondary_title("if you want to keep them but make them visible in your source code, you can escape them")).element(help_snippet);
157
158        let report = [primary, help];
159
160        stats.record_lint(lint_level);
161        gctx.shell().print_report(&report, lint_level.force())?;
162    }
163
164    Ok(())
165}
166
167const UNICODE_BIDI_CODEPOINTS: &[(char, &str, &str)] = &[
168    ('\u{202A}', r"\u{202A}", "LEFT-TO-RIGHT EMBEDDING"),
169    ('\u{202B}', r"\u{202B}", "RIGHT-TO-LEFT EMBEDDING"),
170    ('\u{202C}', r"\u{202C}", "POP DIRECTIONAL FORMATTING"),
171    ('\u{202D}', r"\u{202D}", "LEFT-TO-RIGHT OVERRIDE"),
172    ('\u{202E}', r"\u{202E}", "RIGHT-TO-LEFT OVERRIDE"),
173    ('\u{2066}', r"\u{2066}", "LEFT-TO-RIGHT ISOLATE"),
174    ('\u{2067}', r"\u{2067}", "RIGHT-TO-LEFT ISOLATE"),
175    ('\u{2068}', r"\u{2068}", "FIRST STRONG ISOLATE"),
176    ('\u{2069}', r"\u{2069}", "POP DIRECTIONAL ISOLATE"),
177];
178
179struct BiDiEvent {
180    token: Event,
181    bidi_spans: Vec<(usize, usize)>,
182}
183
184fn bidi_events(source: &Source<'_>, bidi_spans: &[(usize, usize)]) -> Vec<BiDiEvent> {
185    let mut bidi_spans = bidi_spans.iter();
186    let bidi_span = bidi_spans.next().copied();
187
188    let tokens = source.lex().into_vec();
189    let mut collector = BiDiCollector {
190        bidi_span,
191        bidi_spans,
192        events: Vec::new(),
193    };
194    let mut errors = ();
195    toml_parser::parser::parse_document(&tokens, &mut collector, &mut errors);
196
197    collector.events
198}
199
200struct BiDiCollector<'b> {
201    bidi_span: Option<(usize, usize)>,
202    bidi_spans: std::slice::Iter<'b, (usize, usize)>,
203    events: Vec<BiDiEvent>,
204}
205
206impl BiDiCollector<'_> {
207    fn process(&mut self, kind: EventKind, encoding: Option<Encoding>, span: Span) {
208        let mut event_bidi_spans = Vec::new();
209        while let Some(bidi_span) = self.bidi_span {
210            if bidi_span.0 < span.start() {
211                self.bidi_span = self.bidi_spans.next().copied();
212                continue;
213            } else if span.end() <= bidi_span.0 {
214                break;
215            }
216
217            event_bidi_spans.push(bidi_span);
218            self.bidi_span = self.bidi_spans.next().copied();
219        }
220
221        if !event_bidi_spans.is_empty() {
222            let token = Event::new_unchecked(kind, encoding, span);
223            self.events.push(BiDiEvent {
224                token,
225                bidi_spans: event_bidi_spans,
226            });
227        }
228    }
229}
230
231impl EventReceiver for BiDiCollector<'_> {
232    fn simple_key(
233        &mut self,
234        span: Span,
235        encoding: Option<Encoding>,
236        _error: &mut dyn toml_parser::ErrorSink,
237    ) {
238        self.process(EventKind::SimpleKey, encoding, span)
239    }
240    fn scalar(
241        &mut self,
242        span: Span,
243        encoding: Option<Encoding>,
244        _error: &mut dyn toml_parser::ErrorSink,
245    ) {
246        self.process(EventKind::Scalar, encoding, span)
247    }
248}