Skip to main content

cargo/diagnostics/rules/
text_direction_codepoint_in_literal.rs

1use std::path::Path;
2
3use cargo_util_terminal::report::AnnotationKind;
4use cargo_util_terminal::report::Group;
5use cargo_util_terminal::report::Level;
6use cargo_util_terminal::report::Patch;
7use cargo_util_terminal::report::Snippet;
8use toml_parser::Source;
9use toml_parser::Span;
10use toml_parser::decoder::Encoding;
11use toml_parser::parser::Event;
12use toml_parser::parser::EventKind;
13use toml_parser::parser::EventReceiver;
14use tracing::instrument;
15
16use super::CORRECTNESS;
17use crate::CargoResult;
18use crate::GlobalContext;
19use crate::core::MaybePackage;
20use crate::diagnostics::DiagnosticStats;
21use crate::diagnostics::Lint;
22use crate::diagnostics::LintLevelProduct;
23use crate::diagnostics::ManifestFor;
24use crate::diagnostics::rel_cwd_manifest_path;
25
26pub static LINT: &Lint = &Lint {
27    name: "text_direction_codepoint_in_literal",
28    desc: "unicode codepoint changing visible direction of text present in literal",
29    primary_group: &CORRECTNESS,
30    msrv: Some(super::CARGO_LINTS_MSRV),
31    feature_gate: None,
32    docs: Some(
33        r#"
34### What it does
35Detects Unicode codepoints in literals in manifests that change the visual representation of text on screen
36in a way that does not correspond to their on memory representation.
37
38### Why it is bad
39Unicode allows changing the visual flow of text on screen
40in order to support scripts that are written right-to-left,
41but a specially crafted literal can make code that will be compiled appear to be part of a literal,
42depending on the software used to read the code.
43To avoid potential problems or confusion,
44such as in CVE-2021-42574,
45by default we deny their use.
46"#,
47    ),
48};
49
50#[instrument(skip_all)]
51pub(crate) fn lint_manifest(
52    manifest: ManifestFor<'_>,
53    manifest_path: &Path,
54    level: LintLevelProduct,
55    stats: &mut DiagnosticStats,
56    gctx: &GlobalContext,
57) -> CargoResult<()> {
58    let LintLevelProduct {
59        level: lint_level,
60        source,
61    } = level;
62
63    if matches!(
64        &manifest,
65        ManifestFor::Workspace {
66            maybe_pkg: MaybePackage::Package { .. },
67            ..
68        }
69    ) {
70        // For real manifests, lint as a package, rather than a workspace
71        return Ok(());
72    }
73
74    let Some(contents) = manifest.contents() else {
75        return Ok(());
76    };
77
78    let bidi_spans = contents
79        .char_indices()
80        .filter(|(_i, c)| {
81            UNICODE_BIDI_CODEPOINTS
82                .iter()
83                .any(|(bidi, _, _name)| c == bidi)
84        })
85        .map(|(i, c)| (i, i + c.len_utf8()))
86        .collect::<Vec<_>>();
87    if bidi_spans.is_empty() {
88        return Ok(());
89    }
90
91    let toml_source = Source::new(contents);
92    let events = bidi_events(&toml_source, &bidi_spans);
93    let manifest_path = rel_cwd_manifest_path(manifest_path, gctx);
94    let mut emitted_source = None;
95    for event in events {
96        let token_span = event.token.span();
97        let token_span = token_span.start()..token_span.end();
98        let mut snippet = Snippet::source(contents).path(&manifest_path).annotation(
99            AnnotationKind::Context
100                .span(token_span.clone())
101                .label("this literal contains an invisible unicode text flow control codepoint"),
102        );
103        for bidi_span in event.bidi_spans {
104            let bidi_span = bidi_span.0..bidi_span.1;
105            let escaped = format!("{:?}", &contents[bidi_span.clone()]);
106            snippet = snippet.annotation(AnnotationKind::Primary.span(bidi_span).label(escaped));
107        }
108        let mut help_snippet = Snippet::source(contents).path(&manifest_path);
109        if let Some(original_raw) = toml_source.get(&event.token) {
110            let mut decoded = String::new();
111            let replacement = match event.token.kind() {
112                toml_parser::parser::EventKind::SimpleKey => {
113                    use toml_writer::ToTomlKey as _;
114                    original_raw.decode_key(&mut decoded, &mut ());
115                    let builder = toml_writer::TomlKeyBuilder::new(&decoded);
116                    let replacement = builder.as_basic();
117                    Some(replacement.to_toml_key())
118                }
119                toml_parser::parser::EventKind::Scalar => {
120                    use toml_writer::ToTomlValue as _;
121                    let kind = original_raw.decode_scalar(&mut decoded, &mut ());
122                    if matches!(kind, toml_parser::decoder::ScalarKind::String) {
123                        let builder = toml_writer::TomlStringBuilder::new(&decoded);
124                        let replacement = match event.token.encoding() {
125                            Some(toml_parser::decoder::Encoding::BasicString)
126                            | Some(toml_parser::decoder::Encoding::LiteralString)
127                            | None => builder.as_basic(),
128                            Some(toml_parser::decoder::Encoding::MlBasicString)
129                            | Some(toml_parser::decoder::Encoding::MlLiteralString) => {
130                                builder.as_ml_basic()
131                            }
132                        };
133                        Some(replacement.to_toml_value())
134                    } else {
135                        None
136                    }
137                }
138                _ => None,
139            };
140            if let Some(mut replacement) = replacement {
141                for (bidi, escaped, _) in UNICODE_BIDI_CODEPOINTS {
142                    replacement = replacement.replace(*bidi, escaped);
143                }
144                help_snippet = help_snippet.patch(Patch::new(token_span.clone(), replacement));
145            }
146        }
147
148        let level = lint_level.to_diagnostic_level();
149        let mut primary = Group::with_title(level.primary_title(LINT.desc)).element(snippet);
150        if emitted_source.is_none() {
151            emitted_source = Some(LINT.emitted_source(lint_level, source));
152            primary = primary.element(Level::NOTE.message(emitted_source.as_ref().unwrap()));
153        }
154
155        let help = Group::with_title(Level::HELP.secondary_title("if you want to keep them but make them visible in your source code, you can escape them")).element(help_snippet);
156
157        let report = [primary, help];
158
159        stats.record_lint(lint_level);
160        gctx.shell().print_report(&report, lint_level.force())?;
161    }
162
163    Ok(())
164}
165
166const UNICODE_BIDI_CODEPOINTS: &[(char, &str, &str)] = &[
167    ('\u{202A}', r"\u{202A}", "LEFT-TO-RIGHT EMBEDDING"),
168    ('\u{202B}', r"\u{202B}", "RIGHT-TO-LEFT EMBEDDING"),
169    ('\u{202C}', r"\u{202C}", "POP DIRECTIONAL FORMATTING"),
170    ('\u{202D}', r"\u{202D}", "LEFT-TO-RIGHT OVERRIDE"),
171    ('\u{202E}', r"\u{202E}", "RIGHT-TO-LEFT OVERRIDE"),
172    ('\u{2066}', r"\u{2066}", "LEFT-TO-RIGHT ISOLATE"),
173    ('\u{2067}', r"\u{2067}", "RIGHT-TO-LEFT ISOLATE"),
174    ('\u{2068}', r"\u{2068}", "FIRST STRONG ISOLATE"),
175    ('\u{2069}', r"\u{2069}", "POP DIRECTIONAL ISOLATE"),
176];
177
178struct BiDiEvent {
179    token: Event,
180    bidi_spans: Vec<(usize, usize)>,
181}
182
183fn bidi_events(source: &Source<'_>, bidi_spans: &[(usize, usize)]) -> Vec<BiDiEvent> {
184    let mut bidi_spans = bidi_spans.iter();
185    let bidi_span = bidi_spans.next().copied();
186
187    let tokens = source.lex().into_vec();
188    let mut collector = BiDiCollector {
189        bidi_span,
190        bidi_spans,
191        events: Vec::new(),
192    };
193    let mut errors = ();
194    toml_parser::parser::parse_document(&tokens, &mut collector, &mut errors);
195
196    collector.events
197}
198
199struct BiDiCollector<'b> {
200    bidi_span: Option<(usize, usize)>,
201    bidi_spans: std::slice::Iter<'b, (usize, usize)>,
202    events: Vec<BiDiEvent>,
203}
204
205impl BiDiCollector<'_> {
206    fn process(&mut self, kind: EventKind, encoding: Option<Encoding>, span: Span) {
207        let mut event_bidi_spans = Vec::new();
208        while let Some(bidi_span) = self.bidi_span {
209            if bidi_span.0 < span.start() {
210                self.bidi_span = self.bidi_spans.next().copied();
211                continue;
212            } else if span.end() <= bidi_span.0 {
213                break;
214            }
215
216            event_bidi_spans.push(bidi_span);
217            self.bidi_span = self.bidi_spans.next().copied();
218        }
219
220        if !event_bidi_spans.is_empty() {
221            let token = Event::new_unchecked(kind, encoding, span);
222            self.events.push(BiDiEvent {
223                token,
224                bidi_spans: event_bidi_spans,
225            });
226        }
227    }
228}
229
230impl EventReceiver for BiDiCollector<'_> {
231    fn simple_key(
232        &mut self,
233        span: Span,
234        encoding: Option<Encoding>,
235        _error: &mut dyn toml_parser::ErrorSink,
236    ) {
237        self.process(EventKind::SimpleKey, encoding, span)
238    }
239    fn scalar(
240        &mut self,
241        span: Span,
242        encoding: Option<Encoding>,
243        _error: &mut dyn toml_parser::ErrorSink,
244    ) {
245        self.process(EventKind::Scalar, encoding, span)
246    }
247}