1use rustc_ast as ast;
2use rustc_data_structures::fx::FxIndexMap;
3use rustc_data_structures::unord::UnordMap;
4use rustc_session::{declare_lint, declare_lint_pass};
5use rustc_span::Symbol;
6use unicode_security::general_security_profile::IdentifierType;
7
8use crate::lints::{
9 ConfusableIdentifierPair, IdentifierNonAsciiChar, IdentifierUncommonCodepoints,
10 MixedScriptConfusables,
11};
12use crate::{EarlyContext, EarlyLintPass, LintContext};
13
14#[doc = r" The `non_ascii_idents` lint detects non-ASCII identifiers."]
#[doc = r""]
#[doc = r" ### Example"]
#[doc = r""]
#[doc = r" ```rust,compile_fail"]
#[doc = r" # #![allow(unused)]"]
#[doc = r" #![deny(non_ascii_idents)]"]
#[doc = r" fn main() {"]
#[doc = r" let föö = 1;"]
#[doc = r" }"]
#[doc = r" ```"]
#[doc = r""]
#[doc = r" {{produces}}"]
#[doc = r""]
#[doc = r" ### Explanation"]
#[doc = r""]
#[doc =
r" This lint allows projects that wish to retain the limit of only using"]
#[doc =
r#" ASCII characters to switch this lint to "forbid" (for example to ease"#]
#[doc = r" collaboration or for security reasons)."]
#[doc = r" See [RFC 2457] for more details."]
#[doc = r""]
#[doc =
r" [RFC 2457]: https://github.com/rust-lang/rfcs/blob/master/text/2457-non-ascii-idents.md"]
pub static NON_ASCII_IDENTS: &::rustc_lint_defs::Lint =
&::rustc_lint_defs::Lint {
name: "NON_ASCII_IDENTS",
default_level: ::rustc_lint_defs::Allow,
desc: "detects non-ASCII identifiers",
is_externally_loaded: false,
crate_level_only: true,
..::rustc_lint_defs::Lint::default_fields_for_macro()
};declare_lint! {
15 pub NON_ASCII_IDENTS,
38 Allow,
39 "detects non-ASCII identifiers",
40 crate_level_only
41}
42
43#[doc =
r" The `uncommon_codepoints` lint detects uncommon Unicode codepoints in"]
#[doc = r" identifiers."]
#[doc = r""]
#[doc = r" ### Example"]
#[doc = r""]
#[doc = r" ```rust"]
#[doc = r" # #![allow(unused)]"]
#[doc = r" const µ: f64 = 0.000001;"]
#[doc = r" ```"]
#[doc = r""]
#[doc = r" {{produces}}"]
#[doc = r""]
#[doc = r" ### Explanation"]
#[doc = r""]
#[doc =
r" This lint warns about using characters which are not commonly used, and may"]
#[doc = r" cause visual confusion."]
#[doc = r""]
#[doc =
r" This lint is triggered by identifiers that contain a codepoint that is"]
#[doc =
r#" not part of the set of "Allowed" codepoints as described by [Unicode®"#]
#[doc =
r" Technical Standard #39 Unicode Security Mechanisms Section 3.1 General"]
#[doc = r" Security Profile for Identifiers][TR39Allowed]."]
#[doc = r""]
#[doc =
r" Note that the set of uncommon codepoints may change over time. Beware"]
#[doc =
r#" that if you "forbid" this lint that existing code may fail in the"#]
#[doc = r" future."]
#[doc = r""]
#[doc =
r" [TR39Allowed]: https://www.unicode.org/reports/tr39/#General_Security_Profile"]
pub static UNCOMMON_CODEPOINTS: &::rustc_lint_defs::Lint =
&::rustc_lint_defs::Lint {
name: "UNCOMMON_CODEPOINTS",
default_level: ::rustc_lint_defs::Warn,
desc: "detects uncommon Unicode codepoints in identifiers",
is_externally_loaded: false,
crate_level_only: true,
..::rustc_lint_defs::Lint::default_fields_for_macro()
};declare_lint! {
44 pub UNCOMMON_CODEPOINTS,
72 Warn,
73 "detects uncommon Unicode codepoints in identifiers",
74 crate_level_only
75}
76
77#[doc =
r" The `confusable_idents` lint detects visually confusable pairs between"]
#[doc = r" identifiers."]
#[doc = r""]
#[doc = r" ### Example"]
#[doc = r""]
#[doc = r" ```rust"]
#[doc = r" // Latin Capital Letter E With Caron"]
#[doc = r" pub const Ě: i32 = 1;"]
#[doc = r" // Latin Capital Letter E With Breve"]
#[doc = r" pub const Ĕ: i32 = 2;"]
#[doc = r" ```"]
#[doc = r""]
#[doc = r" {{produces}}"]
#[doc = r""]
#[doc = r" ### Explanation"]
#[doc = r""]
#[doc =
r" This lint warns when different identifiers may appear visually similar,"]
#[doc = r" which can cause confusion."]
#[doc = r""]
#[doc =
r" The confusable detection algorithm is based on [Unicode® Technical"]
#[doc = r" Standard #39 Unicode Security Mechanisms Section 4 Confusable"]
#[doc =
r" Detection][TR39Confusable]. For every distinct identifier X execute"]
#[doc =
r" the function `skeleton(X)`. If there exist two distinct identifiers X"]
#[doc =
r" and Y in the same crate where `skeleton(X) = skeleton(Y)` report it."]
#[doc =
r" The compiler uses the same mechanism to check if an identifier is too"]
#[doc = r" similar to a keyword."]
#[doc = r""]
#[doc = r" Note that the set of confusable characters may change over time."]
#[doc =
r#" Beware that if you "forbid" this lint that existing code may fail in"#]
#[doc = r" the future."]
#[doc = r""]
#[doc =
r" [TR39Confusable]: https://www.unicode.org/reports/tr39/#Confusable_Detection"]
pub static CONFUSABLE_IDENTS: &::rustc_lint_defs::Lint =
&::rustc_lint_defs::Lint {
name: "CONFUSABLE_IDENTS",
default_level: ::rustc_lint_defs::Warn,
desc: "detects visually confusable pairs between identifiers",
is_externally_loaded: false,
crate_level_only: true,
..::rustc_lint_defs::Lint::default_fields_for_macro()
};declare_lint! {
78 pub CONFUSABLE_IDENTS,
111 Warn,
112 "detects visually confusable pairs between identifiers",
113 crate_level_only
114}
115
116#[doc = r" The `mixed_script_confusables` lint detects visually confusable"]
#[doc = r" characters in identifiers between different [scripts]."]
#[doc = r""]
#[doc = r" [scripts]: https://en.wikipedia.org/wiki/Script_(Unicode)"]
#[doc = r""]
#[doc = r" ### Example"]
#[doc = r""]
#[doc = r" ```rust"]
#[doc =
r" // The Japanese katakana character エ can be confused with the Han character 工."]
#[doc = r#" const エ: &'static str = "アイウ";"#]
#[doc = r" ```"]
#[doc = r""]
#[doc = r" {{produces}}"]
#[doc = r""]
#[doc = r" ### Explanation"]
#[doc = r""]
#[doc =
r" This lint warns when characters between different scripts may appear"]
#[doc = r" visually similar, which can cause confusion."]
#[doc = r""]
#[doc =
r" If the crate contains other identifiers in the same script that have"]
#[doc =
r" non-confusable characters, then this lint will *not* be issued. For"]
#[doc = r" example, if the example given above has another identifier with"]
#[doc =
r" katakana characters (such as `let カタカナ = 123;`), then this indicates"]
#[doc =
r" that you are intentionally using katakana, and it will not warn about"]
#[doc = r" it."]
#[doc = r""]
#[doc = r" Note that the set of confusable characters may change over time."]
#[doc =
r#" Beware that if you "forbid" this lint that existing code may fail in"#]
#[doc = r" the future."]
pub static MIXED_SCRIPT_CONFUSABLES: &::rustc_lint_defs::Lint =
&::rustc_lint_defs::Lint {
name: "MIXED_SCRIPT_CONFUSABLES",
default_level: ::rustc_lint_defs::Warn,
desc: "detects Unicode scripts whose mixed script confusables codepoints are solely used",
is_externally_loaded: false,
crate_level_only: true,
..::rustc_lint_defs::Lint::default_fields_for_macro()
};declare_lint! {
117 pub MIXED_SCRIPT_CONFUSABLES,
147 Warn,
148 "detects Unicode scripts whose mixed script confusables codepoints are solely used",
149 crate_level_only
150}
151
152pub struct NonAsciiIdents;
#[automatically_derived]
impl ::core::marker::Copy for NonAsciiIdents { }
#[automatically_derived]
#[doc(hidden)]
unsafe impl ::core::clone::TrivialClone for NonAsciiIdents { }
#[automatically_derived]
impl ::core::clone::Clone for NonAsciiIdents {
#[inline]
fn clone(&self) -> NonAsciiIdents { *self }
}
impl ::rustc_lint_defs::LintPass for NonAsciiIdents {
fn name(&self) -> &'static str { "NonAsciiIdents" }
fn get_lints(&self) -> ::rustc_lint_defs::LintVec {
<[_]>::into_vec(::alloc::boxed::box_new([NON_ASCII_IDENTS,
UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS,
MIXED_SCRIPT_CONFUSABLES]))
}
}
impl NonAsciiIdents {
#[allow(unused)]
pub fn lint_vec() -> ::rustc_lint_defs::LintVec {
<[_]>::into_vec(::alloc::boxed::box_new([NON_ASCII_IDENTS,
UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS,
MIXED_SCRIPT_CONFUSABLES]))
}
}declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS, MIXED_SCRIPT_CONFUSABLES]);
153
154impl EarlyLintPass for NonAsciiIdents {
155 fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
156 use std::collections::BTreeMap;
157
158 use rustc_session::lint::Level;
159 use rustc_span::Span;
160 use unicode_security::GeneralSecurityProfile;
161
162 let check_non_ascii_idents = cx.builder.lint_level(NON_ASCII_IDENTS).level != Level::Allow;
163 let check_uncommon_codepoints =
164 cx.builder.lint_level(UNCOMMON_CODEPOINTS).level != Level::Allow;
165 let check_confusable_idents =
166 cx.builder.lint_level(CONFUSABLE_IDENTS).level != Level::Allow;
167 let check_mixed_script_confusables =
168 cx.builder.lint_level(MIXED_SCRIPT_CONFUSABLES).level != Level::Allow;
169
170 if !check_non_ascii_idents
171 && !check_uncommon_codepoints
172 && !check_confusable_idents
173 && !check_mixed_script_confusables
174 {
175 return;
176 }
177
178 let mut has_non_ascii_idents = false;
179 let symbols = cx.sess().psess.symbol_gallery.symbols.lock();
180
181 #[allow(rustc::potential_query_instability)]
185 let mut symbols: Vec<_> = symbols.iter().collect();
186 symbols.sort_by_key(|k| k.1);
187 for &(ref symbol, &sp) in symbols.iter() {
188 let symbol_str = symbol.as_str();
189 if symbol_str.is_ascii() {
190 continue;
191 }
192 has_non_ascii_idents = true;
193 cx.emit_span_lint(NON_ASCII_IDENTS, sp, IdentifierNonAsciiChar);
194 if check_uncommon_codepoints
195 && !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed)
196 {
197 let mut chars: Vec<_> = symbol_str
198 .chars()
199 .map(|c| (c, GeneralSecurityProfile::identifier_type(c)))
200 .collect();
201
202 for (id_ty, id_ty_descr) in [
203 (IdentifierType::Exclusion, "Exclusion"),
204 (IdentifierType::Technical, "Technical"),
205 (IdentifierType::Limited_Use, "Limited_Use"),
206 (IdentifierType::Not_NFKC, "Not_NFKC"),
207 ] {
208 let codepoints: Vec<_> =
209 chars.extract_if(.., |(_, ty)| *ty == Some(id_ty)).collect();
210 if codepoints.is_empty() {
211 continue;
212 }
213 cx.emit_span_lint(
214 UNCOMMON_CODEPOINTS,
215 sp,
216 IdentifierUncommonCodepoints {
217 codepoints_len: codepoints.len(),
218 codepoints: codepoints.into_iter().map(|(c, _)| c).collect(),
219 identifier_type: id_ty_descr,
220 },
221 );
222 }
223
224 let remaining = chars
225 .extract_if(.., |(c, _)| !GeneralSecurityProfile::identifier_allowed(*c))
226 .collect::<Vec<_>>();
227 if !remaining.is_empty() {
228 cx.emit_span_lint(
229 UNCOMMON_CODEPOINTS,
230 sp,
231 IdentifierUncommonCodepoints {
232 codepoints_len: remaining.len(),
233 codepoints: remaining.into_iter().map(|(c, _)| c).collect(),
234 identifier_type: "Restricted",
235 },
236 );
237 }
238 }
239 }
240
241 if has_non_ascii_idents && check_confusable_idents {
242 let mut skeleton_map: UnordMap<Symbol, (Symbol, Span, bool)> =
243 UnordMap::with_capacity(symbols.len());
244 let mut skeleton_buf = String::new();
245
246 for &(&symbol, &sp) in symbols.iter() {
247 use unicode_security::confusable_detection::skeleton;
248
249 let symbol_str = symbol.as_str();
250 let is_ascii = symbol_str.is_ascii();
251
252 skeleton_buf.clear();
254 skeleton_buf.extend(skeleton(symbol_str));
255 let skeleton_sym = if *symbol_str == *skeleton_buf {
256 symbol
257 } else {
258 Symbol::intern(&skeleton_buf)
259 };
260
261 skeleton_map
262 .entry(skeleton_sym)
263 .and_modify(|(existing_symbol, existing_span, existing_is_ascii)| {
264 if !*existing_is_ascii || !is_ascii {
265 cx.emit_span_lint(
266 CONFUSABLE_IDENTS,
267 sp,
268 ConfusableIdentifierPair {
269 existing_sym: *existing_symbol,
270 sym: symbol,
271 label: *existing_span,
272 main_label: sp,
273 },
274 );
275 }
276 if *existing_is_ascii && !is_ascii {
277 *existing_symbol = symbol;
278 *existing_span = sp;
279 *existing_is_ascii = is_ascii;
280 }
281 })
282 .or_insert((symbol, sp, is_ascii));
283 }
284 }
285
286 if has_non_ascii_idents && check_mixed_script_confusables {
287 use unicode_security::is_potential_mixed_script_confusable_char;
288 use unicode_security::mixed_script::AugmentedScriptSet;
289
290 #[derive(#[automatically_derived]
impl ::core::clone::Clone for ScriptSetUsage {
#[inline]
fn clone(&self) -> ScriptSetUsage {
match self {
ScriptSetUsage::Suspicious(__self_0, __self_1) =>
ScriptSetUsage::Suspicious(::core::clone::Clone::clone(__self_0),
::core::clone::Clone::clone(__self_1)),
ScriptSetUsage::Verified => ScriptSetUsage::Verified,
}
}
}Clone)]
291 enum ScriptSetUsage {
292 Suspicious(Vec<char>, Span),
293 Verified,
294 }
295
296 let mut script_states: FxIndexMap<AugmentedScriptSet, ScriptSetUsage> =
297 Default::default();
298 let latin_augmented_script_set = AugmentedScriptSet::for_char('A');
299 script_states.insert(latin_augmented_script_set, ScriptSetUsage::Verified);
300
301 let mut has_suspicious = false;
302 for &(ref symbol, &sp) in symbols.iter() {
303 let symbol_str = symbol.as_str();
304 for ch in symbol_str.chars() {
305 if ch.is_ascii() {
306 continue;
308 }
309 if !GeneralSecurityProfile::identifier_allowed(ch) {
310 continue;
312 }
313 let augmented_script_set = AugmentedScriptSet::for_char(ch);
314 script_states
315 .entry(augmented_script_set)
316 .and_modify(|existing_state| {
317 if let ScriptSetUsage::Suspicious(ch_list, _) = existing_state {
318 if is_potential_mixed_script_confusable_char(ch) {
319 ch_list.push(ch);
320 } else {
321 *existing_state = ScriptSetUsage::Verified;
322 }
323 }
324 })
325 .or_insert_with(|| {
326 if !is_potential_mixed_script_confusable_char(ch) {
327 ScriptSetUsage::Verified
328 } else {
329 has_suspicious = true;
330 ScriptSetUsage::Suspicious(<[_]>::into_vec(::alloc::boxed::box_new([ch]))vec![ch], sp)
331 }
332 });
333 }
334 }
335
336 if has_suspicious {
337 #[allow(rustc::potential_query_instability)]
339 let verified_augmented_script_sets = script_states
340 .iter()
341 .flat_map(|(k, v)| match v {
342 ScriptSetUsage::Verified => Some(*k),
343 _ => None,
344 })
345 .collect::<Vec<_>>();
346
347 let mut lint_reports: BTreeMap<(Span, Vec<char>), AugmentedScriptSet> =
349 BTreeMap::new();
350
351 #[allow(rustc::potential_query_instability)]
353 'outerloop: for (augment_script_set, usage) in script_states {
354 let ScriptSetUsage::Suspicious(mut ch_list, sp) = usage else { continue };
355
356 if augment_script_set.is_all() {
357 continue;
358 }
359
360 for existing in verified_augmented_script_sets.iter() {
361 if existing.is_all() {
362 continue;
363 }
364 let mut intersect = *existing;
365 intersect.intersect_with(augment_script_set);
366 if !intersect.is_empty() && !intersect.is_all() {
367 continue 'outerloop;
368 }
369 }
370
371 ch_list.sort_unstable();
373 ch_list.dedup();
374 lint_reports.insert((sp, ch_list), augment_script_set);
375 }
376
377 for ((sp, ch_list), script_set) in lint_reports {
378 let mut includes = String::new();
379 for (idx, ch) in ch_list.into_iter().enumerate() {
380 if idx != 0 {
381 includes += ", ";
382 }
383 let char_info = ::alloc::__export::must_use({
::alloc::fmt::format(format_args!("\'{0}\' (U+{1:04X})", ch,
ch as u32))
})format!("'{}' (U+{:04X})", ch, ch as u32);
384 includes += &char_info;
385 }
386 cx.emit_span_lint(
387 MIXED_SCRIPT_CONFUSABLES,
388 sp,
389 MixedScriptConfusables { set: script_set.to_string(), includes },
390 );
391 }
392 }
393 }
394 }
395}