1use std::path::Path;
2
3use cargo_util_terminal::report::AnnotationKind;
4use cargo_util_terminal::report::Group;
5use cargo_util_terminal::report::Level;
6use cargo_util_terminal::report::Patch;
7use cargo_util_terminal::report::Snippet;
8use toml_parser::Source;
9use toml_parser::Span;
10use toml_parser::decoder::Encoding;
11use toml_parser::parser::Event;
12use toml_parser::parser::EventKind;
13use toml_parser::parser::EventReceiver;
14use tracing::instrument;
15
16use super::CORRECTNESS;
17use crate::CargoResult;
18use crate::GlobalContext;
19use crate::core::MaybePackage;
20use crate::core::Workspace;
21use crate::diagnostics::Lint;
22use crate::diagnostics::LintLevelProduct;
23use crate::diagnostics::ManifestFor;
24use crate::diagnostics::ScopedDiagnosticStats;
25use crate::diagnostics::workspace_rel_path;
26
27pub static LINT: &Lint = &Lint {
28 name: "text_direction_codepoint_in_literal",
29 desc: "unicode codepoint changing visible direction of text present in literal",
30 primary_group: &CORRECTNESS,
31 msrv: Some(super::CARGO_LINTS_MSRV),
32 feature_gate: None,
33 docs: Some(
34 r#"
35### What it does
36Detects Unicode codepoints in literals in manifests that change the visual representation of text on screen
37in a way that does not correspond to their on memory representation.
38
39### Why it is bad
40Unicode allows changing the visual flow of text on screen
41in order to support scripts that are written right-to-left,
42but a specially crafted literal can make code that will be compiled appear to be part of a literal,
43depending on the software used to read the code.
44To avoid potential problems or confusion,
45such as in CVE-2021-42574,
46by default we deny their use.
47"#,
48 ),
49};
50
51#[instrument(skip_all)]
52pub(crate) fn lint_manifest(
53 ws: &Workspace<'_>,
54 manifest: ManifestFor<'_>,
55 manifest_path: &Path,
56 level: LintLevelProduct,
57 pkg_stats: &mut ScopedDiagnosticStats<'_>,
58 gctx: &GlobalContext,
59) -> CargoResult<()> {
60 let LintLevelProduct {
61 level: lint_level,
62 source,
63 } = level;
64
65 if matches!(
66 &manifest,
67 ManifestFor::Workspace {
68 maybe_pkg: MaybePackage::Package { .. },
69 ..
70 }
71 ) {
72 return Ok(());
74 }
75
76 let Some(contents) = manifest.contents() else {
77 return Ok(());
78 };
79
80 let bidi_spans = contents
81 .char_indices()
82 .filter(|(_i, c)| {
83 UNICODE_BIDI_CODEPOINTS
84 .iter()
85 .any(|(bidi, _, _name)| c == bidi)
86 })
87 .map(|(i, c)| (i, i + c.len_utf8()))
88 .collect::<Vec<_>>();
89 if bidi_spans.is_empty() {
90 return Ok(());
91 }
92
93 let toml_source = Source::new(contents);
94 let events = bidi_events(&toml_source, &bidi_spans);
95 let manifest_path = workspace_rel_path(ws, manifest_path);
96 let mut emitted_source = None;
97 for event in events {
98 let token_span = event.token.span();
99 let token_span = token_span.start()..token_span.end();
100 let mut snippet = Snippet::source(contents).path(&manifest_path).annotation(
101 AnnotationKind::Context
102 .span(token_span.clone())
103 .label("this literal contains an invisible unicode text flow control codepoint"),
104 );
105 for bidi_span in event.bidi_spans {
106 let bidi_span = bidi_span.0..bidi_span.1;
107 let escaped = format!("{:?}", &contents[bidi_span.clone()]);
108 snippet = snippet.annotation(AnnotationKind::Primary.span(bidi_span).label(escaped));
109 }
110 let mut help_snippet = Snippet::source(contents).path(&manifest_path);
111 if let Some(original_raw) = toml_source.get(&event.token) {
112 let mut decoded = String::new();
113 let replacement = match event.token.kind() {
114 toml_parser::parser::EventKind::SimpleKey => {
115 use toml_writer::ToTomlKey as _;
116 original_raw.decode_key(&mut decoded, &mut ());
117 let builder = toml_writer::TomlKeyBuilder::new(&decoded);
118 let replacement = builder.as_basic();
119 Some(replacement.to_toml_key())
120 }
121 toml_parser::parser::EventKind::Scalar => {
122 use toml_writer::ToTomlValue as _;
123 let kind = original_raw.decode_scalar(&mut decoded, &mut ());
124 if matches!(kind, toml_parser::decoder::ScalarKind::String) {
125 let builder = toml_writer::TomlStringBuilder::new(&decoded);
126 let replacement = match event.token.encoding() {
127 Some(toml_parser::decoder::Encoding::BasicString)
128 | Some(toml_parser::decoder::Encoding::LiteralString)
129 | None => builder.as_basic(),
130 Some(toml_parser::decoder::Encoding::MlBasicString)
131 | Some(toml_parser::decoder::Encoding::MlLiteralString) => {
132 builder.as_ml_basic()
133 }
134 };
135 Some(replacement.to_toml_value())
136 } else {
137 None
138 }
139 }
140 _ => None,
141 };
142 if let Some(mut replacement) = replacement {
143 for (bidi, escaped, _) in UNICODE_BIDI_CODEPOINTS {
144 replacement = replacement.replace(*bidi, escaped);
145 }
146 help_snippet = help_snippet.patch(Patch::new(token_span.clone(), replacement));
147 }
148 }
149
150 let level = lint_level.to_diagnostic_level();
151 let mut primary = Group::with_title(level.primary_title(LINT.desc)).element(snippet);
152 if emitted_source.is_none() {
153 emitted_source = Some(LINT.emitted_source(lint_level, source));
154 primary = primary.element(Level::NOTE.message(emitted_source.as_ref().unwrap()));
155 }
156
157 let help = Group::with_title(Level::HELP.secondary_title("if you want to keep them but make them visible in your source code, you can escape them")).element(help_snippet);
158
159 let report = [primary, help];
160
161 pkg_stats.record_lint(lint_level);
162 gctx.shell().print_report(&report, lint_level.force())?;
163 }
164
165 Ok(())
166}
167
168const UNICODE_BIDI_CODEPOINTS: &[(char, &str, &str)] = &[
169 ('\u{202A}', r"\u{202A}", "LEFT-TO-RIGHT EMBEDDING"),
170 ('\u{202B}', r"\u{202B}", "RIGHT-TO-LEFT EMBEDDING"),
171 ('\u{202C}', r"\u{202C}", "POP DIRECTIONAL FORMATTING"),
172 ('\u{202D}', r"\u{202D}", "LEFT-TO-RIGHT OVERRIDE"),
173 ('\u{202E}', r"\u{202E}", "RIGHT-TO-LEFT OVERRIDE"),
174 ('\u{2066}', r"\u{2066}", "LEFT-TO-RIGHT ISOLATE"),
175 ('\u{2067}', r"\u{2067}", "RIGHT-TO-LEFT ISOLATE"),
176 ('\u{2068}', r"\u{2068}", "FIRST STRONG ISOLATE"),
177 ('\u{2069}', r"\u{2069}", "POP DIRECTIONAL ISOLATE"),
178];
179
180struct BiDiEvent {
181 token: Event,
182 bidi_spans: Vec<(usize, usize)>,
183}
184
185fn bidi_events(source: &Source<'_>, bidi_spans: &[(usize, usize)]) -> Vec<BiDiEvent> {
186 let mut bidi_spans = bidi_spans.iter();
187 let bidi_span = bidi_spans.next().copied();
188
189 let tokens = source.lex().into_vec();
190 let mut collector = BiDiCollector {
191 bidi_span,
192 bidi_spans,
193 events: Vec::new(),
194 };
195 let mut errors = ();
196 toml_parser::parser::parse_document(&tokens, &mut collector, &mut errors);
197
198 collector.events
199}
200
201struct BiDiCollector<'b> {
202 bidi_span: Option<(usize, usize)>,
203 bidi_spans: std::slice::Iter<'b, (usize, usize)>,
204 events: Vec<BiDiEvent>,
205}
206
207impl BiDiCollector<'_> {
208 fn process(&mut self, kind: EventKind, encoding: Option<Encoding>, span: Span) {
209 let mut event_bidi_spans = Vec::new();
210 while let Some(bidi_span) = self.bidi_span {
211 if bidi_span.0 < span.start() {
212 self.bidi_span = self.bidi_spans.next().copied();
213 continue;
214 } else if span.end() <= bidi_span.0 {
215 break;
216 }
217
218 event_bidi_spans.push(bidi_span);
219 self.bidi_span = self.bidi_spans.next().copied();
220 }
221
222 if !event_bidi_spans.is_empty() {
223 let token = Event::new_unchecked(kind, encoding, span);
224 self.events.push(BiDiEvent {
225 token,
226 bidi_spans: event_bidi_spans,
227 });
228 }
229 }
230}
231
232impl EventReceiver for BiDiCollector<'_> {
233 fn simple_key(
234 &mut self,
235 span: Span,
236 encoding: Option<Encoding>,
237 _error: &mut dyn toml_parser::ErrorSink,
238 ) {
239 self.process(EventKind::SimpleKey, encoding, span)
240 }
241 fn scalar(
242 &mut self,
243 span: Span,
244 encoding: Option<Encoding>,
245 _error: &mut dyn toml_parser::ErrorSink,
246 ) {
247 self.process(EventKind::Scalar, encoding, span)
248 }
249}