Skip to main content

core/bstr/
mod.rs

1//! The `ByteStr` type and trait implementations.
2
3mod traits;
4
5#[unstable(feature = "bstr_internals", issue = "none")]
6pub use traits::{impl_partial_eq, impl_partial_eq_n, impl_partial_eq_ord};
7
8use crate::borrow::{Borrow, BorrowMut};
9use crate::fmt::{self, Alignment};
10use crate::ops::{Deref, DerefMut, DerefPure};
11
12/// A wrapper for `&[u8]` representing a human-readable string that's conventionally, but not
13/// always, UTF-8.
14///
15/// Unlike `&str`, this type permits non-UTF-8 contents, making it suitable for user input,
16/// non-native filenames (as `Path` only supports native filenames), and other applications that
17/// need to round-trip whatever data the user provides.
18///
19/// For an owned, growable byte string buffer, use
20/// [`ByteString`](../../std/bstr/struct.ByteString.html).
21///
22/// `ByteStr` implements `Deref` to `[u8]`, so all methods available on `[u8]` are available on
23/// `ByteStr`.
24///
25/// # Representation
26///
27/// A `&ByteStr` has the same representation as a `&str`. That is, a `&ByteStr` is a wide pointer
28/// which includes a pointer to some bytes and a length.
29///
30/// # Trait implementations
31///
32/// The `ByteStr` type has a number of trait implementations, and in particular, defines equality
33/// and comparisons between `&ByteStr`, `&str`, and `&[u8]`, for convenience.
34///
35/// The `Debug` implementation for `ByteStr` shows its bytes as a normal string, with invalid UTF-8
36/// presented as hex escape sequences.
37///
38/// The `Display` implementation behaves as if the `ByteStr` were first lossily converted to a
39/// `str`, with invalid UTF-8 presented as the Unicode replacement character (�).
40#[unstable(feature = "bstr", issue = "134915")]
41#[repr(transparent)]
42#[doc(alias = "BStr")]
43pub struct ByteStr(pub [u8]);
44
45impl ByteStr {
46    /// Creates a `ByteStr` slice from anything that can be converted to a byte slice.
47    ///
48    /// This is a zero-cost conversion.
49    ///
50    /// # Example
51    ///
52    /// You can create a `ByteStr` from a byte array, a byte slice or a string slice:
53    ///
54    /// ```
55    /// # #![feature(bstr)]
56    /// # use std::bstr::ByteStr;
57    /// let a = ByteStr::new(b"abc");
58    /// let b = ByteStr::new(&b"abc"[..]);
59    /// let c = ByteStr::new("abc");
60    ///
61    /// assert_eq!(a, b);
62    /// assert_eq!(a, c);
63    /// ```
64    #[inline]
65    #[unstable(feature = "bstr", issue = "134915")]
66    #[rustc_const_unstable(feature = "const_convert", issue = "143773")]
67    pub const fn new<B: ?Sized + [const] AsRef<[u8]>>(bytes: &B) -> &Self {
68        ByteStr::from_bytes(bytes.as_ref())
69    }
70
71    /// Returns the same string as `&ByteStr`.
72    ///
73    /// This method is redundant when used directly on `&ByteStr`, but
74    /// it helps dereferencing other "container" types,
75    /// for example `Box<ByteStr>` or `Arc<ByteStr>`.
76    #[inline]
77    // #[unstable(feature = "str_as_str", issue = "130366")]
78    #[unstable(feature = "bstr", issue = "134915")]
79    pub const fn as_byte_str(&self) -> &ByteStr {
80        self
81    }
82
83    /// Returns the same string as `&mut ByteStr`.
84    ///
85    /// This method is redundant when used directly on `&mut ByteStr`, but
86    /// it helps dereferencing other "container" types,
87    /// for example `Box<ByteStr>` or `MutexGuard<ByteStr>`.
88    #[inline]
89    // #[unstable(feature = "str_as_str", issue = "130366")]
90    #[unstable(feature = "bstr", issue = "134915")]
91    pub const fn as_mut_byte_str(&mut self) -> &mut ByteStr {
92        self
93    }
94
95    #[doc(hidden)]
96    #[unstable(feature = "bstr_internals", issue = "none")]
97    #[inline]
98    #[rustc_const_unstable(feature = "bstr_internals", issue = "none")]
99    pub const fn from_bytes(slice: &[u8]) -> &Self {
100        // SAFETY: `ByteStr` is a transparent wrapper around `[u8]`, so we can turn a reference to
101        // the wrapped type into a reference to the wrapper type.
102        unsafe { &*(slice as *const [u8] as *const Self) }
103    }
104
105    #[doc(hidden)]
106    #[unstable(feature = "bstr_internals", issue = "none")]
107    #[inline]
108    #[rustc_const_unstable(feature = "bstr_internals", issue = "none")]
109    pub const fn from_bytes_mut(slice: &mut [u8]) -> &mut Self {
110        // SAFETY: `ByteStr` is a transparent wrapper around `[u8]`, so we can turn a reference to
111        // the wrapped type into a reference to the wrapper type.
112        unsafe { &mut *(slice as *mut [u8] as *mut Self) }
113    }
114
115    #[doc(hidden)]
116    #[unstable(feature = "bstr_internals", issue = "none")]
117    #[inline]
118    #[rustc_const_unstable(feature = "bstr_internals", issue = "none")]
119    pub const fn as_bytes(&self) -> &[u8] {
120        &self.0
121    }
122
123    #[doc(hidden)]
124    #[unstable(feature = "bstr_internals", issue = "none")]
125    #[inline]
126    #[rustc_const_unstable(feature = "bstr_internals", issue = "none")]
127    pub const fn as_bytes_mut(&mut self) -> &mut [u8] {
128        &mut self.0
129    }
130}
131
132#[unstable(feature = "bstr", issue = "134915")]
133#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
134impl const Deref for ByteStr {
135    type Target = [u8];
136
137    #[inline]
138    fn deref(&self) -> &[u8] {
139        &self.0
140    }
141}
142
143#[unstable(feature = "bstr", issue = "134915")]
144#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
145impl const DerefMut for ByteStr {
146    #[inline]
147    fn deref_mut(&mut self) -> &mut [u8] {
148        &mut self.0
149    }
150}
151
152#[unstable(feature = "deref_pure_trait", issue = "87121")]
153unsafe impl DerefPure for ByteStr {}
154
155#[unstable(feature = "bstr", issue = "134915")]
156impl fmt::Debug for ByteStr {
157    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
158        write!(f, "\"")?;
159        for chunk in self.utf8_chunks() {
160            for c in chunk.valid().chars() {
161                match c {
162                    '\0' => write!(f, "\\0")?,
163                    '\x01'..='\x7f' => write!(f, "{}", (c as u8).escape_ascii())?,
164                    _ => write!(f, "{}", c.escape_debug())?,
165                }
166            }
167            write!(f, "{}", chunk.invalid().escape_ascii())?;
168        }
169        write!(f, "\"")?;
170        Ok(())
171    }
172}
173
174#[unstable(feature = "bstr", issue = "134915")]
175impl fmt::Display for ByteStr {
176    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
177        fn emit(byte_str: &ByteStr, f: &mut fmt::Formatter<'_>) -> fmt::Result {
178            for chunk in byte_str.utf8_chunks() {
179                f.write_str(chunk.valid())?;
180                if !chunk.invalid().is_empty() {
181                    f.write_str("\u{FFFD}")?;
182                }
183            }
184
185            Ok(())
186        }
187
188        let requested_width = f.width().unwrap_or(0);
189        if requested_width == 0 && f.precision().is_none() {
190            // Avoid counting the characters if no truncation or padding was
191            // requested.
192            return emit(self, f);
193        }
194
195        let (truncated, actual_width) = match f.precision() {
196            // The entire string is truncated away. Weird, but ok.
197            Some(0) => (ByteStr::new(&[]), 0),
198            // Advance through string until we run out of space.
199            Some(precision) => {
200                let mut remaining_width = precision;
201                let mut chunks = self.utf8_chunks();
202                let mut current_width = 0;
203                let mut offset = 0;
204                loop {
205                    let Some(chunk) = chunks.next() else {
206                        // We reached the end of the string without running out
207                        // of space, so print the entire string.
208                        break (self, current_width);
209                    };
210
211                    let mut chars = chunk.valid().char_indices();
212                    let Err(remaining) = chars.advance_by(remaining_width) else {
213                        // We've counted off `precision` characters, so truncate
214                        // the string at the current offset.
215                        break (&self[..offset + chars.offset()], precision);
216                    };
217
218                    offset += chunk.valid().len();
219                    current_width += remaining_width - remaining.get();
220                    remaining_width = remaining.get();
221
222                    // `remaining_width` cannot be zero, there is still space
223                    // remaining. So next, count the � character emitted for
224                    // the invalid chunk (if it exists).
225                    if !chunk.invalid().is_empty() {
226                        offset += chunk.invalid().len();
227                        current_width += 1;
228                        remaining_width -= 1;
229
230                        if remaining_width == 0 {
231                            break (&self[..offset], precision);
232                        }
233                    }
234                }
235            }
236            // The string shouldn't be truncated at all, so just count the number
237            // of characters to calculate the padding.
238            None => {
239                let actual_width = self
240                    .utf8_chunks()
241                    .map(|chunk| {
242                        chunk.valid().chars().count()
243                            + if chunk.invalid().is_empty() { 0 } else { 1 }
244                    })
245                    .sum();
246                (self, actual_width)
247            }
248        };
249
250        // The width is originally stored as a 16-bit number, so this cannot fail.
251        let padding = u16::try_from(requested_width.saturating_sub(actual_width)).unwrap();
252
253        let post_padding = f.padding(padding, Alignment::Left)?;
254        emit(truncated, f)?;
255        post_padding.write(f)
256    }
257}
258
259#[unstable(feature = "bstr", issue = "134915")]
260#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
261impl const AsRef<[u8]> for ByteStr {
262    #[inline]
263    fn as_ref(&self) -> &[u8] {
264        &self.0
265    }
266}
267
268#[unstable(feature = "bstr", issue = "134915")]
269#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
270impl const AsRef<ByteStr> for ByteStr {
271    #[inline]
272    fn as_ref(&self) -> &ByteStr {
273        self
274    }
275}
276
277// `impl AsRef<ByteStr> for [u8]` omitted to avoid widespread inference failures
278
279#[unstable(feature = "bstr", issue = "134915")]
280#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
281impl const AsRef<ByteStr> for str {
282    #[inline]
283    fn as_ref(&self) -> &ByteStr {
284        ByteStr::new(self)
285    }
286}
287
288#[unstable(feature = "bstr", issue = "134915")]
289#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
290impl const AsMut<[u8]> for ByteStr {
291    #[inline]
292    fn as_mut(&mut self) -> &mut [u8] {
293        &mut self.0
294    }
295}
296
297// `impl AsMut<ByteStr> for [u8]` omitted to avoid widespread inference failures
298
299// `impl Borrow<ByteStr> for [u8]` omitted to avoid widespread inference failures
300
301// `impl Borrow<ByteStr> for str` omitted to avoid widespread inference failures
302
303#[unstable(feature = "bstr", issue = "134915")]
304#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
305impl const Borrow<[u8]> for ByteStr {
306    #[inline]
307    fn borrow(&self) -> &[u8] {
308        &self.0
309    }
310}
311
312// `impl BorrowMut<ByteStr> for [u8]` omitted to avoid widespread inference failures
313
314#[unstable(feature = "bstr", issue = "134915")]
315#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
316impl const BorrowMut<[u8]> for ByteStr {
317    #[inline]
318    fn borrow_mut(&mut self) -> &mut [u8] {
319        &mut self.0
320    }
321}
322
323#[unstable(feature = "bstr", issue = "134915")]
324impl<'a> Default for &'a ByteStr {
325    fn default() -> Self {
326        ByteStr::from_bytes(b"")
327    }
328}
329
330#[unstable(feature = "bstr", issue = "134915")]
331impl<'a> Default for &'a mut ByteStr {
332    fn default() -> Self {
333        ByteStr::from_bytes_mut(&mut [])
334    }
335}
336
337// Omitted due to inference failures
338//
339// #[unstable(feature = "bstr", issue = "134915")]
340// impl<'a, const N: usize> From<&'a [u8; N]> for &'a ByteStr {
341//     #[inline]
342//     fn from(s: &'a [u8; N]) -> Self {
343//         ByteStr::from_bytes(s)
344//     }
345// }
346//
347// #[unstable(feature = "bstr", issue = "134915")]
348// impl<'a> From<&'a [u8]> for &'a ByteStr {
349//     #[inline]
350//     fn from(s: &'a [u8]) -> Self {
351//         ByteStr::from_bytes(s)
352//     }
353// }
354
355// Omitted due to slice-from-array-issue-113238:
356//
357// #[unstable(feature = "bstr", issue = "134915")]
358// impl<'a> From<&'a ByteStr> for &'a [u8] {
359//     #[inline]
360//     fn from(s: &'a ByteStr) -> Self {
361//         &s.0
362//     }
363// }
364//
365// #[unstable(feature = "bstr", issue = "134915")]
366// impl<'a> From<&'a mut ByteStr> for &'a mut [u8] {
367//     #[inline]
368//     fn from(s: &'a mut ByteStr) -> Self {
369//         &mut s.0
370//     }
371// }
372
373// Omitted due to inference failures
374//
375// #[unstable(feature = "bstr", issue = "134915")]
376// impl<'a> From<&'a str> for &'a ByteStr {
377//     #[inline]
378//     fn from(s: &'a str) -> Self {
379//         ByteStr::from_bytes(s.as_bytes())
380//     }
381// }
382
383#[unstable(feature = "bstr", issue = "134915")]
384#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
385impl<'a> const TryFrom<&'a ByteStr> for &'a str {
386    type Error = crate::str::Utf8Error;
387
388    #[inline]
389    fn try_from(s: &'a ByteStr) -> Result<Self, Self::Error> {
390        crate::str::from_utf8(&s.0)
391    }
392}
393
394#[unstable(feature = "bstr", issue = "134915")]
395#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
396impl<'a> const TryFrom<&'a mut ByteStr> for &'a mut str {
397    type Error = crate::str::Utf8Error;
398
399    #[inline]
400    fn try_from(s: &'a mut ByteStr) -> Result<Self, Self::Error> {
401        crate::str::from_utf8_mut(&mut s.0)
402    }
403}