cargo/diagnostics/rules/
text_direction_codepoint_in_comment.rs1use std::path::Path;
2
3use cargo_util_terminal::report::AnnotationKind;
4use cargo_util_terminal::report::Group;
5use cargo_util_terminal::report::Level;
6use cargo_util_terminal::report::Snippet;
7use toml_parser::Source;
8use toml_parser::Span;
9use toml_parser::decoder::Encoding;
10use toml_parser::parser::Event;
11use toml_parser::parser::EventKind;
12use toml_parser::parser::EventReceiver;
13use tracing::instrument;
14
15use super::CORRECTNESS;
16use crate::CargoResult;
17use crate::GlobalContext;
18use crate::core::MaybePackage;
19use crate::diagnostics::DiagnosticStats;
20use crate::diagnostics::Lint;
21use crate::diagnostics::LintLevelProduct;
22use crate::diagnostics::ManifestFor;
23use crate::diagnostics::rel_cwd_manifest_path;
24
25pub static LINT: &Lint = &Lint {
26 name: "text_direction_codepoint_in_comment",
27 desc: "unicode codepoint changing visible direction of text present in comment",
28 primary_group: &CORRECTNESS,
29 msrv: Some(super::CARGO_LINTS_MSRV),
30 feature_gate: None,
31 docs: Some(
32 r#"
33### What it does
34Detects Unicode codepoints in manifest comments that change the visual representation of text on screen
35in a way that does not correspond to their on memory representation.
36
37### Why it is bad
38Unicode allows changing the visual flow of text on screen
39in order to support scripts that are written right-to-left,
40but a specially crafted comment can make code that will be compiled appear to be part of a comment,
41depending on the software used to read the code.
42To avoid potential problems or confusion,
43such as in CVE-2021-42574,
44by default we deny their use.
45"#,
46 ),
47};
48
49#[instrument(skip_all)]
50pub(crate) fn lint_manifest(
51 manifest: ManifestFor<'_>,
52 manifest_path: &Path,
53 level: LintLevelProduct,
54 stats: &mut DiagnosticStats,
55 gctx: &GlobalContext,
56) -> CargoResult<()> {
57 let LintLevelProduct {
58 level: lint_level,
59 source,
60 } = level;
61
62 if matches!(
63 &manifest,
64 ManifestFor::Workspace {
65 maybe_pkg: MaybePackage::Package { .. },
66 ..
67 }
68 ) {
69 return Ok(());
71 }
72
73 let Some(contents) = manifest.contents() else {
74 return Ok(());
75 };
76
77 let bidi_spans = contents
78 .char_indices()
79 .filter(|(_i, c)| {
80 UNICODE_BIDI_CODEPOINTS
81 .iter()
82 .any(|(bidi, _name)| c == bidi)
83 })
84 .map(|(i, c)| (i, i + c.len_utf8()))
85 .collect::<Vec<_>>();
86 if bidi_spans.is_empty() {
87 return Ok(());
88 }
89
90 let events = bidi_events(contents, &bidi_spans);
91 let manifest_path = rel_cwd_manifest_path(manifest_path, gctx);
92 let mut emitted_source = None;
93 for event in events {
94 let token_span = event.token.span();
95 let token_span = token_span.start()..token_span.end();
96 let mut snippet = Snippet::source(contents).path(&manifest_path).annotation(
97 AnnotationKind::Context
98 .span(token_span)
99 .label("this comment contains an invisible unicode text flow control codepoint"),
100 );
101 for bidi_span in event.bidi_spans {
102 let bidi_span = bidi_span.0..bidi_span.1;
103 let escaped = format!("{:?}", &contents[bidi_span.clone()]);
104 snippet = snippet.annotation(AnnotationKind::Primary.span(bidi_span).label(escaped));
105 }
106
107 let level = lint_level.to_diagnostic_level();
108 let mut primary = Group::with_title(level.primary_title(LINT.desc)).element(snippet);
109 if emitted_source.is_none() {
110 emitted_source = Some(LINT.emitted_source(lint_level, source));
111 primary = primary.element(Level::NOTE.message(emitted_source.as_ref().unwrap()));
112 }
113
114 let report = [primary];
115
116 stats.record_lint(lint_level);
117 gctx.shell().print_report(&report, lint_level.force())?;
118 }
119
120 Ok(())
121}
122
123const UNICODE_BIDI_CODEPOINTS: &[(char, &str)] = &[
124 ('\u{202A}', "LEFT-TO-RIGHT EMBEDDING"),
125 ('\u{202B}', "RIGHT-TO-LEFT EMBEDDING"),
126 ('\u{202C}', "POP DIRECTIONAL FORMATTING"),
127 ('\u{202D}', "LEFT-TO-RIGHT OVERRIDE"),
128 ('\u{202E}', "RIGHT-TO-LEFT OVERRIDE"),
129 ('\u{2066}', "LEFT-TO-RIGHT ISOLATE"),
130 ('\u{2067}', "RIGHT-TO-LEFT ISOLATE"),
131 ('\u{2068}', "FIRST STRONG ISOLATE"),
132 ('\u{2069}', "POP DIRECTIONAL ISOLATE"),
133];
134
135struct BiDiEvent {
136 token: Event,
137 bidi_spans: Vec<(usize, usize)>,
138}
139
140fn bidi_events(contents: &str, bidi_spans: &[(usize, usize)]) -> Vec<BiDiEvent> {
141 let mut bidi_spans = bidi_spans.iter();
142 let bidi_span = bidi_spans.next().copied();
143
144 let source = Source::new(contents);
145 let tokens = source.lex().into_vec();
146 let mut collector = BiDiCollector {
147 bidi_span,
148 bidi_spans,
149 events: Vec::new(),
150 };
151 let mut errors = ();
152 toml_parser::parser::parse_document(&tokens, &mut collector, &mut errors);
153
154 collector.events
155}
156
157struct BiDiCollector<'b> {
158 bidi_span: Option<(usize, usize)>,
159 bidi_spans: std::slice::Iter<'b, (usize, usize)>,
160 events: Vec<BiDiEvent>,
161}
162
163impl BiDiCollector<'_> {
164 fn process(&mut self, kind: EventKind, encoding: Option<Encoding>, span: Span) {
165 let mut event_bidi_spans = Vec::new();
166 while let Some(bidi_span) = self.bidi_span {
167 if bidi_span.0 < span.start() {
168 self.bidi_span = self.bidi_spans.next().copied();
169 continue;
170 } else if span.end() <= bidi_span.0 {
171 break;
172 }
173
174 event_bidi_spans.push(bidi_span);
175 self.bidi_span = self.bidi_spans.next().copied();
176 }
177
178 if !event_bidi_spans.is_empty() {
179 let token = Event::new_unchecked(kind, encoding, span);
180 self.events.push(BiDiEvent {
181 token,
182 bidi_spans: event_bidi_spans,
183 });
184 }
185 }
186}
187
188impl EventReceiver for BiDiCollector<'_> {
189 fn comment(&mut self, span: Span, _error: &mut dyn toml_parser::ErrorSink) {
190 self.process(EventKind::Comment, None, span)
191 }
192}