1use std::path::Path;
2
3use cargo_util_schemas::manifest::TomlToolLints;
4use cargo_util_terminal::report::AnnotationKind;
5use cargo_util_terminal::report::Group;
6use cargo_util_terminal::report::Level;
7use cargo_util_terminal::report::Patch;
8use cargo_util_terminal::report::Snippet;
9use toml_parser::Source;
10use toml_parser::Span;
11use toml_parser::decoder::Encoding;
12use toml_parser::parser::Event;
13use toml_parser::parser::EventKind;
14use toml_parser::parser::EventReceiver;
15use tracing::instrument;
16
17use super::CORRECTNESS;
18use crate::CargoResult;
19use crate::GlobalContext;
20use crate::core::MaybePackage;
21use crate::diagnostics::DiagnosticStats;
22use crate::diagnostics::Lint;
23use crate::diagnostics::LintLevel;
24use crate::diagnostics::ManifestFor;
25use crate::diagnostics::rel_cwd_manifest_path;
26
27pub static LINT: &Lint = &Lint {
28 name: "text_direction_codepoint_in_literal",
29 desc: "unicode codepoint changing visible direction of text present in literal",
30 primary_group: &CORRECTNESS,
31 msrv: Some(super::CARGO_LINTS_MSRV),
32 feature_gate: None,
33 docs: Some(
34 r#"
35### What it does
36Detects Unicode codepoints in literals in manifests that change the visual representation of text on screen
37in a way that does not correspond to their on memory representation.
38
39### Why it is bad
40Unicode allows changing the visual flow of text on screen
41in order to support scripts that are written right-to-left,
42but a specially crafted literal can make code that will be compiled appear to be part of a literal,
43depending on the software used to read the code.
44To avoid potential problems or confusion,
45such as in CVE-2021-42574,
46by default we deny their use.
47"#,
48 ),
49};
50
51#[instrument(skip_all)]
52pub fn text_direction_codepoint_in_literal(
53 manifest: ManifestFor<'_>,
54 manifest_path: &Path,
55 cargo_lints: &TomlToolLints,
56 stats: &mut DiagnosticStats,
57 gctx: &GlobalContext,
58) -> CargoResult<()> {
59 let (lint_level, source) = manifest.lint_level(cargo_lints, LINT);
60 if lint_level == LintLevel::Allow {
61 return Ok(());
62 }
63
64 if matches!(
65 &manifest,
66 ManifestFor::Workspace {
67 maybe_pkg: MaybePackage::Package { .. },
68 ..
69 }
70 ) {
71 return Ok(());
73 }
74
75 let Some(contents) = manifest.contents() else {
76 return Ok(());
77 };
78
79 let bidi_spans = contents
80 .char_indices()
81 .filter(|(_i, c)| {
82 UNICODE_BIDI_CODEPOINTS
83 .iter()
84 .any(|(bidi, _, _name)| c == bidi)
85 })
86 .map(|(i, c)| (i, i + c.len_utf8()))
87 .collect::<Vec<_>>();
88 if bidi_spans.is_empty() {
89 return Ok(());
90 }
91
92 let toml_source = Source::new(contents);
93 let events = bidi_events(&toml_source, &bidi_spans);
94 let manifest_path = rel_cwd_manifest_path(manifest_path, gctx);
95 let mut emitted_source = None;
96 for event in events {
97 let token_span = event.token.span();
98 let token_span = token_span.start()..token_span.end();
99 let mut snippet = Snippet::source(contents).path(&manifest_path).annotation(
100 AnnotationKind::Context
101 .span(token_span.clone())
102 .label("this literal contains an invisible unicode text flow control codepoint"),
103 );
104 for bidi_span in event.bidi_spans {
105 let bidi_span = bidi_span.0..bidi_span.1;
106 let escaped = format!("{:?}", &contents[bidi_span.clone()]);
107 snippet = snippet.annotation(AnnotationKind::Primary.span(bidi_span).label(escaped));
108 }
109 let mut help_snippet = Snippet::source(contents).path(&manifest_path);
110 if let Some(original_raw) = toml_source.get(&event.token) {
111 let mut decoded = String::new();
112 let replacement = match event.token.kind() {
113 toml_parser::parser::EventKind::SimpleKey => {
114 use toml_writer::ToTomlKey as _;
115 original_raw.decode_key(&mut decoded, &mut ());
116 let builder = toml_writer::TomlKeyBuilder::new(&decoded);
117 let replacement = builder.as_basic();
118 Some(replacement.to_toml_key())
119 }
120 toml_parser::parser::EventKind::Scalar => {
121 use toml_writer::ToTomlValue as _;
122 let kind = original_raw.decode_scalar(&mut decoded, &mut ());
123 if matches!(kind, toml_parser::decoder::ScalarKind::String) {
124 let builder = toml_writer::TomlStringBuilder::new(&decoded);
125 let replacement = match event.token.encoding() {
126 Some(toml_parser::decoder::Encoding::BasicString)
127 | Some(toml_parser::decoder::Encoding::LiteralString)
128 | None => builder.as_basic(),
129 Some(toml_parser::decoder::Encoding::MlBasicString)
130 | Some(toml_parser::decoder::Encoding::MlLiteralString) => {
131 builder.as_ml_basic()
132 }
133 };
134 Some(replacement.to_toml_value())
135 } else {
136 None
137 }
138 }
139 _ => None,
140 };
141 if let Some(mut replacement) = replacement {
142 for (bidi, escaped, _) in UNICODE_BIDI_CODEPOINTS {
143 replacement = replacement.replace(*bidi, escaped);
144 }
145 help_snippet = help_snippet.patch(Patch::new(token_span.clone(), replacement));
146 }
147 }
148
149 let level = lint_level.to_diagnostic_level();
150 let mut primary = Group::with_title(level.primary_title(LINT.desc)).element(snippet);
151 if emitted_source.is_none() {
152 emitted_source = Some(LINT.emitted_source(lint_level, source));
153 primary = primary.element(Level::NOTE.message(emitted_source.as_ref().unwrap()));
154 }
155
156 let help = Group::with_title(Level::HELP.secondary_title("if you want to keep them but make them visible in your source code, you can escape them")).element(help_snippet);
157
158 let report = [primary, help];
159
160 stats.record_lint(lint_level);
161 gctx.shell().print_report(&report, lint_level.force())?;
162 }
163
164 Ok(())
165}
166
167const UNICODE_BIDI_CODEPOINTS: &[(char, &str, &str)] = &[
168 ('\u{202A}', r"\u{202A}", "LEFT-TO-RIGHT EMBEDDING"),
169 ('\u{202B}', r"\u{202B}", "RIGHT-TO-LEFT EMBEDDING"),
170 ('\u{202C}', r"\u{202C}", "POP DIRECTIONAL FORMATTING"),
171 ('\u{202D}', r"\u{202D}", "LEFT-TO-RIGHT OVERRIDE"),
172 ('\u{202E}', r"\u{202E}", "RIGHT-TO-LEFT OVERRIDE"),
173 ('\u{2066}', r"\u{2066}", "LEFT-TO-RIGHT ISOLATE"),
174 ('\u{2067}', r"\u{2067}", "RIGHT-TO-LEFT ISOLATE"),
175 ('\u{2068}', r"\u{2068}", "FIRST STRONG ISOLATE"),
176 ('\u{2069}', r"\u{2069}", "POP DIRECTIONAL ISOLATE"),
177];
178
179struct BiDiEvent {
180 token: Event,
181 bidi_spans: Vec<(usize, usize)>,
182}
183
184fn bidi_events(source: &Source<'_>, bidi_spans: &[(usize, usize)]) -> Vec<BiDiEvent> {
185 let mut bidi_spans = bidi_spans.iter();
186 let bidi_span = bidi_spans.next().copied();
187
188 let tokens = source.lex().into_vec();
189 let mut collector = BiDiCollector {
190 bidi_span,
191 bidi_spans,
192 events: Vec::new(),
193 };
194 let mut errors = ();
195 toml_parser::parser::parse_document(&tokens, &mut collector, &mut errors);
196
197 collector.events
198}
199
200struct BiDiCollector<'b> {
201 bidi_span: Option<(usize, usize)>,
202 bidi_spans: std::slice::Iter<'b, (usize, usize)>,
203 events: Vec<BiDiEvent>,
204}
205
206impl BiDiCollector<'_> {
207 fn process(&mut self, kind: EventKind, encoding: Option<Encoding>, span: Span) {
208 let mut event_bidi_spans = Vec::new();
209 while let Some(bidi_span) = self.bidi_span {
210 if bidi_span.0 < span.start() {
211 self.bidi_span = self.bidi_spans.next().copied();
212 continue;
213 } else if span.end() <= bidi_span.0 {
214 break;
215 }
216
217 event_bidi_spans.push(bidi_span);
218 self.bidi_span = self.bidi_spans.next().copied();
219 }
220
221 if !event_bidi_spans.is_empty() {
222 let token = Event::new_unchecked(kind, encoding, span);
223 self.events.push(BiDiEvent {
224 token,
225 bidi_spans: event_bidi_spans,
226 });
227 }
228 }
229}
230
231impl EventReceiver for BiDiCollector<'_> {
232 fn simple_key(
233 &mut self,
234 span: Span,
235 encoding: Option<Encoding>,
236 _error: &mut dyn toml_parser::ErrorSink,
237 ) {
238 self.process(EventKind::SimpleKey, encoding, span)
239 }
240 fn scalar(
241 &mut self,
242 span: Span,
243 encoding: Option<Encoding>,
244 _error: &mut dyn toml_parser::ErrorSink,
245 ) {
246 self.process(EventKind::Scalar, encoding, span)
247 }
248}