rustc_lint/
invalid_from_utf8.rs

1use std::str::Utf8Error;
2
3use rustc_ast::LitKind;
4use rustc_hir::{Expr, ExprKind};
5use rustc_session::{declare_lint, declare_lint_pass};
6use rustc_span::source_map::Spanned;
7use rustc_span::sym;
8
9use crate::lints::InvalidFromUtf8Diag;
10use crate::{LateContext, LateLintPass, LintContext};
11
12declare_lint! {
13    /// The `invalid_from_utf8_unchecked` lint checks for calls to
14    /// `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`
15    /// with a known invalid UTF-8 value.
16    ///
17    /// ### Example
18    ///
19    /// ```rust,compile_fail
20    /// # #[allow(unused)]
21    /// unsafe {
22    ///     std::str::from_utf8_unchecked(b"Ru\x82st");
23    /// }
24    /// ```
25    ///
26    /// {{produces}}
27    ///
28    /// ### Explanation
29    ///
30    /// Creating such a `str` would result in undefined behavior as per documentation
31    /// for `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`.
32    pub INVALID_FROM_UTF8_UNCHECKED,
33    Deny,
34    "using a non UTF-8 literal in `std::str::from_utf8_unchecked`"
35}
36
37declare_lint! {
38    /// The `invalid_from_utf8` lint checks for calls to
39    /// `std::str::from_utf8` and `std::str::from_utf8_mut`
40    /// with a known invalid UTF-8 value.
41    ///
42    /// ### Example
43    ///
44    /// ```rust
45    /// # #[allow(unused)]
46    /// std::str::from_utf8(b"Ru\x82st");
47    /// ```
48    ///
49    /// {{produces}}
50    ///
51    /// ### Explanation
52    ///
53    /// Trying to create such a `str` would always return an error as per documentation
54    /// for `std::str::from_utf8` and `std::str::from_utf8_mut`.
55    pub INVALID_FROM_UTF8,
56    Warn,
57    "using a non UTF-8 literal in `std::str::from_utf8`"
58}
59
60declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED, INVALID_FROM_UTF8]);
61
62impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
63    fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) {
64        if let ExprKind::Call(path, [arg]) = expr.kind
65            && let ExprKind::Path(ref qpath) = path.kind
66            && let Some(def_id) = cx.qpath_res(qpath, path.hir_id).opt_def_id()
67            && let Some(diag_item) = cx.tcx.get_diagnostic_name(def_id)
68            && [
69                sym::str_from_utf8,
70                sym::str_from_utf8_mut,
71                sym::str_from_utf8_unchecked,
72                sym::str_from_utf8_unchecked_mut,
73                sym::str_inherent_from_utf8,
74                sym::str_inherent_from_utf8_mut,
75                sym::str_inherent_from_utf8_unchecked,
76                sym::str_inherent_from_utf8_unchecked_mut,
77            ]
78            .contains(&diag_item)
79        {
80            let lint = |label, utf8_error: Utf8Error| {
81                let method = diag_item.as_str().strip_prefix("str_").unwrap();
82                let method = if let Some(method) = method.strip_prefix("inherent_") {
83                    format!("str::{method}")
84                } else {
85                    format!("std::str::{method}")
86                };
87                let valid_up_to = utf8_error.valid_up_to();
88                let is_unchecked_variant = diag_item.as_str().contains("unchecked");
89
90                cx.emit_span_lint(
91                    if is_unchecked_variant {
92                        INVALID_FROM_UTF8_UNCHECKED
93                    } else {
94                        INVALID_FROM_UTF8
95                    },
96                    expr.span,
97                    if is_unchecked_variant {
98                        InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label }
99                    } else {
100                        InvalidFromUtf8Diag::Checked { method, valid_up_to, label }
101                    },
102                )
103            };
104
105            let mut init = cx.expr_or_init_with_outside_body(arg);
106            while let ExprKind::AddrOf(.., inner) = init.kind {
107                init = cx.expr_or_init_with_outside_body(inner);
108            }
109            match init.kind {
110                ExprKind::Lit(Spanned { node: lit, .. }) => {
111                    if let LitKind::ByteStr(bytes, _) = &lit
112                        && let Err(utf8_error) = std::str::from_utf8(bytes)
113                    {
114                        lint(init.span, utf8_error);
115                    }
116                }
117                ExprKind::Array(args) => {
118                    let elements = args
119                        .iter()
120                        .map(|e| match &e.kind {
121                            ExprKind::Lit(Spanned { node: lit, .. }) => match lit {
122                                LitKind::Byte(b) => Some(*b),
123                                LitKind::Int(b, _) => Some(b.get() as u8),
124                                _ => None,
125                            },
126                            _ => None,
127                        })
128                        .collect::<Option<Vec<_>>>();
129
130                    if let Some(elements) = elements
131                        && let Err(utf8_error) = std::str::from_utf8(&elements)
132                    {
133                        lint(init.span, utf8_error);
134                    }
135                }
136                _ => {}
137            }
138        }
139    }
140}