1use std::path::Path;
2
3use cargo_util_terminal::report::AnnotationKind;
4use cargo_util_terminal::report::Group;
5use cargo_util_terminal::report::Level;
6use cargo_util_terminal::report::Patch;
7use cargo_util_terminal::report::Snippet;
8use toml_parser::Source;
9use toml_parser::Span;
10use toml_parser::decoder::Encoding;
11use toml_parser::parser::Event;
12use toml_parser::parser::EventKind;
13use toml_parser::parser::EventReceiver;
14use tracing::instrument;
15
16use super::CORRECTNESS;
17use crate::CargoResult;
18use crate::GlobalContext;
19use crate::core::MaybePackage;
20use crate::diagnostics::DiagnosticStats;
21use crate::diagnostics::Lint;
22use crate::diagnostics::LintLevelProduct;
23use crate::diagnostics::ManifestFor;
24use crate::diagnostics::rel_cwd_manifest_path;
25
26pub static LINT: &Lint = &Lint {
27 name: "text_direction_codepoint_in_literal",
28 desc: "unicode codepoint changing visible direction of text present in literal",
29 primary_group: &CORRECTNESS,
30 msrv: Some(super::CARGO_LINTS_MSRV),
31 feature_gate: None,
32 docs: Some(
33 r#"
34### What it does
35Detects Unicode codepoints in literals in manifests that change the visual representation of text on screen
36in a way that does not correspond to their on memory representation.
37
38### Why it is bad
39Unicode allows changing the visual flow of text on screen
40in order to support scripts that are written right-to-left,
41but a specially crafted literal can make code that will be compiled appear to be part of a literal,
42depending on the software used to read the code.
43To avoid potential problems or confusion,
44such as in CVE-2021-42574,
45by default we deny their use.
46"#,
47 ),
48};
49
50#[instrument(skip_all)]
51pub(crate) fn lint_manifest(
52 manifest: ManifestFor<'_>,
53 manifest_path: &Path,
54 level: LintLevelProduct,
55 stats: &mut DiagnosticStats,
56 gctx: &GlobalContext,
57) -> CargoResult<()> {
58 let LintLevelProduct {
59 level: lint_level,
60 source,
61 } = level;
62
63 if matches!(
64 &manifest,
65 ManifestFor::Workspace {
66 maybe_pkg: MaybePackage::Package { .. },
67 ..
68 }
69 ) {
70 return Ok(());
72 }
73
74 let Some(contents) = manifest.contents() else {
75 return Ok(());
76 };
77
78 let bidi_spans = contents
79 .char_indices()
80 .filter(|(_i, c)| {
81 UNICODE_BIDI_CODEPOINTS
82 .iter()
83 .any(|(bidi, _, _name)| c == bidi)
84 })
85 .map(|(i, c)| (i, i + c.len_utf8()))
86 .collect::<Vec<_>>();
87 if bidi_spans.is_empty() {
88 return Ok(());
89 }
90
91 let toml_source = Source::new(contents);
92 let events = bidi_events(&toml_source, &bidi_spans);
93 let manifest_path = rel_cwd_manifest_path(manifest_path, gctx);
94 let mut emitted_source = None;
95 for event in events {
96 let token_span = event.token.span();
97 let token_span = token_span.start()..token_span.end();
98 let mut snippet = Snippet::source(contents).path(&manifest_path).annotation(
99 AnnotationKind::Context
100 .span(token_span.clone())
101 .label("this literal contains an invisible unicode text flow control codepoint"),
102 );
103 for bidi_span in event.bidi_spans {
104 let bidi_span = bidi_span.0..bidi_span.1;
105 let escaped = format!("{:?}", &contents[bidi_span.clone()]);
106 snippet = snippet.annotation(AnnotationKind::Primary.span(bidi_span).label(escaped));
107 }
108 let mut help_snippet = Snippet::source(contents).path(&manifest_path);
109 if let Some(original_raw) = toml_source.get(&event.token) {
110 let mut decoded = String::new();
111 let replacement = match event.token.kind() {
112 toml_parser::parser::EventKind::SimpleKey => {
113 use toml_writer::ToTomlKey as _;
114 original_raw.decode_key(&mut decoded, &mut ());
115 let builder = toml_writer::TomlKeyBuilder::new(&decoded);
116 let replacement = builder.as_basic();
117 Some(replacement.to_toml_key())
118 }
119 toml_parser::parser::EventKind::Scalar => {
120 use toml_writer::ToTomlValue as _;
121 let kind = original_raw.decode_scalar(&mut decoded, &mut ());
122 if matches!(kind, toml_parser::decoder::ScalarKind::String) {
123 let builder = toml_writer::TomlStringBuilder::new(&decoded);
124 let replacement = match event.token.encoding() {
125 Some(toml_parser::decoder::Encoding::BasicString)
126 | Some(toml_parser::decoder::Encoding::LiteralString)
127 | None => builder.as_basic(),
128 Some(toml_parser::decoder::Encoding::MlBasicString)
129 | Some(toml_parser::decoder::Encoding::MlLiteralString) => {
130 builder.as_ml_basic()
131 }
132 };
133 Some(replacement.to_toml_value())
134 } else {
135 None
136 }
137 }
138 _ => None,
139 };
140 if let Some(mut replacement) = replacement {
141 for (bidi, escaped, _) in UNICODE_BIDI_CODEPOINTS {
142 replacement = replacement.replace(*bidi, escaped);
143 }
144 help_snippet = help_snippet.patch(Patch::new(token_span.clone(), replacement));
145 }
146 }
147
148 let level = lint_level.to_diagnostic_level();
149 let mut primary = Group::with_title(level.primary_title(LINT.desc)).element(snippet);
150 if emitted_source.is_none() {
151 emitted_source = Some(LINT.emitted_source(lint_level, source));
152 primary = primary.element(Level::NOTE.message(emitted_source.as_ref().unwrap()));
153 }
154
155 let help = Group::with_title(Level::HELP.secondary_title("if you want to keep them but make them visible in your source code, you can escape them")).element(help_snippet);
156
157 let report = [primary, help];
158
159 stats.record_lint(lint_level);
160 gctx.shell().print_report(&report, lint_level.force())?;
161 }
162
163 Ok(())
164}
165
166const UNICODE_BIDI_CODEPOINTS: &[(char, &str, &str)] = &[
167 ('\u{202A}', r"\u{202A}", "LEFT-TO-RIGHT EMBEDDING"),
168 ('\u{202B}', r"\u{202B}", "RIGHT-TO-LEFT EMBEDDING"),
169 ('\u{202C}', r"\u{202C}", "POP DIRECTIONAL FORMATTING"),
170 ('\u{202D}', r"\u{202D}", "LEFT-TO-RIGHT OVERRIDE"),
171 ('\u{202E}', r"\u{202E}", "RIGHT-TO-LEFT OVERRIDE"),
172 ('\u{2066}', r"\u{2066}", "LEFT-TO-RIGHT ISOLATE"),
173 ('\u{2067}', r"\u{2067}", "RIGHT-TO-LEFT ISOLATE"),
174 ('\u{2068}', r"\u{2068}", "FIRST STRONG ISOLATE"),
175 ('\u{2069}', r"\u{2069}", "POP DIRECTIONAL ISOLATE"),
176];
177
178struct BiDiEvent {
179 token: Event,
180 bidi_spans: Vec<(usize, usize)>,
181}
182
183fn bidi_events(source: &Source<'_>, bidi_spans: &[(usize, usize)]) -> Vec<BiDiEvent> {
184 let mut bidi_spans = bidi_spans.iter();
185 let bidi_span = bidi_spans.next().copied();
186
187 let tokens = source.lex().into_vec();
188 let mut collector = BiDiCollector {
189 bidi_span,
190 bidi_spans,
191 events: Vec::new(),
192 };
193 let mut errors = ();
194 toml_parser::parser::parse_document(&tokens, &mut collector, &mut errors);
195
196 collector.events
197}
198
199struct BiDiCollector<'b> {
200 bidi_span: Option<(usize, usize)>,
201 bidi_spans: std::slice::Iter<'b, (usize, usize)>,
202 events: Vec<BiDiEvent>,
203}
204
205impl BiDiCollector<'_> {
206 fn process(&mut self, kind: EventKind, encoding: Option<Encoding>, span: Span) {
207 let mut event_bidi_spans = Vec::new();
208 while let Some(bidi_span) = self.bidi_span {
209 if bidi_span.0 < span.start() {
210 self.bidi_span = self.bidi_spans.next().copied();
211 continue;
212 } else if span.end() <= bidi_span.0 {
213 break;
214 }
215
216 event_bidi_spans.push(bidi_span);
217 self.bidi_span = self.bidi_spans.next().copied();
218 }
219
220 if !event_bidi_spans.is_empty() {
221 let token = Event::new_unchecked(kind, encoding, span);
222 self.events.push(BiDiEvent {
223 token,
224 bidi_spans: event_bidi_spans,
225 });
226 }
227 }
228}
229
230impl EventReceiver for BiDiCollector<'_> {
231 fn simple_key(
232 &mut self,
233 span: Span,
234 encoding: Option<Encoding>,
235 _error: &mut dyn toml_parser::ErrorSink,
236 ) {
237 self.process(EventKind::SimpleKey, encoding, span)
238 }
239 fn scalar(
240 &mut self,
241 span: Span,
242 encoding: Option<Encoding>,
243 _error: &mut dyn toml_parser::ErrorSink,
244 ) {
245 self.process(EventKind::Scalar, encoding, span)
246 }
247}