1use std::path::Path;
2
3use cargo_util_schemas::manifest::TomlToolLints;
4use cargo_util_terminal::report::AnnotationKind;
5use cargo_util_terminal::report::Group;
6use cargo_util_terminal::report::Level;
7use cargo_util_terminal::report::Patch;
8use cargo_util_terminal::report::Snippet;
9use toml_parser::Source;
10use toml_parser::Span;
11use toml_parser::decoder::Encoding;
12use toml_parser::parser::Event;
13use toml_parser::parser::EventKind;
14use toml_parser::parser::EventReceiver;
15use tracing::instrument;
16
17use super::CORRECTNESS;
18use crate::CargoResult;
19use crate::GlobalContext;
20use crate::core::MaybePackage;
21use crate::diagnostics::Lint;
22use crate::diagnostics::LintLevel;
23use crate::diagnostics::ManifestFor;
24use crate::diagnostics::rel_cwd_manifest_path;
25
26pub static LINT: &Lint = &Lint {
27 name: "text_direction_codepoint_in_literal",
28 desc: "unicode codepoint changing visible direction of text present in literal",
29 primary_group: &CORRECTNESS,
30 msrv: Some(super::CARGO_LINTS_MSRV),
31 feature_gate: None,
32 docs: Some(
33 r#"
34### What it does
35Detects Unicode codepoints in literals in manifests that change the visual representation of text on screen
36in a way that does not correspond to their on memory representation.
37
38### Why it is bad
39Unicode allows changing the visual flow of text on screen
40in order to support scripts that are written right-to-left,
41but a specially crafted literal can make code that will be compiled appear to be part of a literal,
42depending on the software used to read the code.
43To avoid potential problems or confusion,
44such as in CVE-2021-42574,
45by default we deny their use.
46"#,
47 ),
48};
49
50#[instrument(skip_all)]
51pub fn text_direction_codepoint_in_literal(
52 manifest: ManifestFor<'_>,
53 manifest_path: &Path,
54 cargo_lints: &TomlToolLints,
55 error_count: &mut usize,
56 gctx: &GlobalContext,
57) -> CargoResult<()> {
58 let (lint_level, source) = manifest.lint_level(cargo_lints, LINT);
59 if lint_level == LintLevel::Allow {
60 return Ok(());
61 }
62
63 if matches!(
64 &manifest,
65 ManifestFor::Workspace {
66 maybe_pkg: MaybePackage::Package { .. },
67 ..
68 }
69 ) {
70 return Ok(());
72 }
73
74 let Some(contents) = manifest.contents() else {
75 return Ok(());
76 };
77
78 let bidi_spans = contents
79 .char_indices()
80 .filter(|(_i, c)| {
81 UNICODE_BIDI_CODEPOINTS
82 .iter()
83 .any(|(bidi, _, _name)| c == bidi)
84 })
85 .map(|(i, c)| (i, i + c.len_utf8()))
86 .collect::<Vec<_>>();
87 if bidi_spans.is_empty() {
88 return Ok(());
89 }
90
91 let toml_source = Source::new(contents);
92 let events = bidi_events(&toml_source, &bidi_spans);
93 let manifest_path = rel_cwd_manifest_path(manifest_path, gctx);
94 let mut emitted_source = None;
95 for event in events {
96 if lint_level.is_error() {
97 *error_count += 1;
98 }
99
100 let token_span = event.token.span();
101 let token_span = token_span.start()..token_span.end();
102 let mut snippet = Snippet::source(contents).path(&manifest_path).annotation(
103 AnnotationKind::Context
104 .span(token_span.clone())
105 .label("this literal contains an invisible unicode text flow control codepoint"),
106 );
107 for bidi_span in event.bidi_spans {
108 let bidi_span = bidi_span.0..bidi_span.1;
109 let escaped = format!("{:?}", &contents[bidi_span.clone()]);
110 snippet = snippet.annotation(AnnotationKind::Primary.span(bidi_span).label(escaped));
111 }
112 let mut help_snippet = Snippet::source(contents).path(&manifest_path);
113 if let Some(original_raw) = toml_source.get(&event.token) {
114 let mut decoded = String::new();
115 let replacement = match event.token.kind() {
116 toml_parser::parser::EventKind::SimpleKey => {
117 use toml_writer::ToTomlKey as _;
118 original_raw.decode_key(&mut decoded, &mut ());
119 let builder = toml_writer::TomlKeyBuilder::new(&decoded);
120 let replacement = builder.as_basic();
121 Some(replacement.to_toml_key())
122 }
123 toml_parser::parser::EventKind::Scalar => {
124 use toml_writer::ToTomlValue as _;
125 let kind = original_raw.decode_scalar(&mut decoded, &mut ());
126 if matches!(kind, toml_parser::decoder::ScalarKind::String) {
127 let builder = toml_writer::TomlStringBuilder::new(&decoded);
128 let replacement = match event.token.encoding() {
129 Some(toml_parser::decoder::Encoding::BasicString)
130 | Some(toml_parser::decoder::Encoding::LiteralString)
131 | None => builder.as_basic(),
132 Some(toml_parser::decoder::Encoding::MlBasicString)
133 | Some(toml_parser::decoder::Encoding::MlLiteralString) => {
134 builder.as_ml_basic()
135 }
136 };
137 Some(replacement.to_toml_value())
138 } else {
139 None
140 }
141 }
142 _ => None,
143 };
144 if let Some(mut replacement) = replacement {
145 for (bidi, escaped, _) in UNICODE_BIDI_CODEPOINTS {
146 replacement = replacement.replace(*bidi, escaped);
147 }
148 help_snippet = help_snippet.patch(Patch::new(token_span.clone(), replacement));
149 }
150 }
151
152 let level = lint_level.to_diagnostic_level();
153 let mut primary = Group::with_title(level.primary_title(LINT.desc)).element(snippet);
154 if emitted_source.is_none() {
155 emitted_source = Some(LINT.emitted_source(lint_level, source));
156 primary = primary.element(Level::NOTE.message(emitted_source.as_ref().unwrap()));
157 }
158
159 let help = Group::with_title(Level::HELP.secondary_title("if you want to keep them but make them visible in your source code, you can escape them")).element(help_snippet);
160
161 let report = [primary, help];
162 gctx.shell().print_report(&report, lint_level.force())?;
163 }
164
165 Ok(())
166}
167
168const UNICODE_BIDI_CODEPOINTS: &[(char, &str, &str)] = &[
169 ('\u{202A}', r"\u{202A}", "LEFT-TO-RIGHT EMBEDDING"),
170 ('\u{202B}', r"\u{202B}", "RIGHT-TO-LEFT EMBEDDING"),
171 ('\u{202C}', r"\u{202C}", "POP DIRECTIONAL FORMATTING"),
172 ('\u{202D}', r"\u{202D}", "LEFT-TO-RIGHT OVERRIDE"),
173 ('\u{202E}', r"\u{202E}", "RIGHT-TO-LEFT OVERRIDE"),
174 ('\u{2066}', r"\u{2066}", "LEFT-TO-RIGHT ISOLATE"),
175 ('\u{2067}', r"\u{2067}", "RIGHT-TO-LEFT ISOLATE"),
176 ('\u{2068}', r"\u{2068}", "FIRST STRONG ISOLATE"),
177 ('\u{2069}', r"\u{2069}", "POP DIRECTIONAL ISOLATE"),
178];
179
180struct BiDiEvent {
181 token: Event,
182 bidi_spans: Vec<(usize, usize)>,
183}
184
185fn bidi_events(source: &Source<'_>, bidi_spans: &[(usize, usize)]) -> Vec<BiDiEvent> {
186 let mut bidi_spans = bidi_spans.iter();
187 let bidi_span = bidi_spans.next().copied();
188
189 let tokens = source.lex().into_vec();
190 let mut collector = BiDiCollector {
191 bidi_span,
192 bidi_spans,
193 events: Vec::new(),
194 };
195 let mut errors = ();
196 toml_parser::parser::parse_document(&tokens, &mut collector, &mut errors);
197
198 collector.events
199}
200
201struct BiDiCollector<'b> {
202 bidi_span: Option<(usize, usize)>,
203 bidi_spans: std::slice::Iter<'b, (usize, usize)>,
204 events: Vec<BiDiEvent>,
205}
206
207impl BiDiCollector<'_> {
208 fn process(&mut self, kind: EventKind, encoding: Option<Encoding>, span: Span) {
209 let mut event_bidi_spans = Vec::new();
210 while let Some(bidi_span) = self.bidi_span {
211 if bidi_span.0 < span.start() {
212 self.bidi_span = self.bidi_spans.next().copied();
213 continue;
214 } else if span.end() <= bidi_span.0 {
215 break;
216 }
217
218 event_bidi_spans.push(bidi_span);
219 self.bidi_span = self.bidi_spans.next().copied();
220 }
221
222 if !event_bidi_spans.is_empty() {
223 let token = Event::new_unchecked(kind, encoding, span);
224 self.events.push(BiDiEvent {
225 token,
226 bidi_spans: event_bidi_spans,
227 });
228 }
229 }
230}
231
232impl EventReceiver for BiDiCollector<'_> {
233 fn simple_key(
234 &mut self,
235 span: Span,
236 encoding: Option<Encoding>,
237 _error: &mut dyn toml_parser::ErrorSink,
238 ) {
239 self.process(EventKind::SimpleKey, encoding, span)
240 }
241 fn scalar(
242 &mut self,
243 span: Span,
244 encoding: Option<Encoding>,
245 _error: &mut dyn toml_parser::ErrorSink,
246 ) {
247 self.process(EventKind::Scalar, encoding, span)
248 }
249}