std/sys/os_str/
bytes.rs

1//! The underlying OsString/OsStr implementation on Unix and many other
2//! systems: just a `Vec<u8>`/`[u8]`.
3
4use core::clone::CloneToUninit;
5
6use crate::borrow::Cow;
7use crate::collections::TryReserveError;
8use crate::fmt::Write;
9use crate::rc::Rc;
10use crate::sync::Arc;
11use crate::sys_common::{AsInner, IntoInner};
12use crate::{fmt, mem, str};
13
14#[cfg(test)]
15mod tests;
16
17#[derive(Hash)]
18#[repr(transparent)]
19pub struct Buf {
20    pub inner: Vec<u8>,
21}
22
23#[repr(transparent)]
24pub struct Slice {
25    pub inner: [u8],
26}
27
28impl fmt::Debug for Slice {
29    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
30        fmt::Debug::fmt(&self.inner.utf8_chunks().debug(), f)
31    }
32}
33
34impl fmt::Display for Slice {
35    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36        // If we're the empty string then our iterator won't actually yield
37        // anything, so perform the formatting manually
38        if self.inner.is_empty() {
39            return "".fmt(f);
40        }
41
42        for chunk in self.inner.utf8_chunks() {
43            let valid = chunk.valid();
44            // If we successfully decoded the whole chunk as a valid string then
45            // we can return a direct formatting of the string which will also
46            // respect various formatting flags if possible.
47            if chunk.invalid().is_empty() {
48                return valid.fmt(f);
49            }
50
51            f.write_str(valid)?;
52            f.write_char(char::REPLACEMENT_CHARACTER)?;
53        }
54        Ok(())
55    }
56}
57
58impl fmt::Debug for Buf {
59    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
60        fmt::Debug::fmt(self.as_slice(), formatter)
61    }
62}
63
64impl fmt::Display for Buf {
65    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
66        fmt::Display::fmt(self.as_slice(), formatter)
67    }
68}
69
70impl Clone for Buf {
71    #[inline]
72    fn clone(&self) -> Self {
73        Buf { inner: self.inner.clone() }
74    }
75
76    #[inline]
77    fn clone_from(&mut self, source: &Self) {
78        self.inner.clone_from(&source.inner)
79    }
80}
81
82impl IntoInner<Vec<u8>> for Buf {
83    fn into_inner(self) -> Vec<u8> {
84        self.inner
85    }
86}
87
88impl AsInner<[u8]> for Buf {
89    #[inline]
90    fn as_inner(&self) -> &[u8] {
91        &self.inner
92    }
93}
94
95impl Buf {
96    #[inline]
97    pub fn into_encoded_bytes(self) -> Vec<u8> {
98        self.inner
99    }
100
101    #[inline]
102    pub unsafe fn from_encoded_bytes_unchecked(s: Vec<u8>) -> Self {
103        Self { inner: s }
104    }
105
106    pub fn from_string(s: String) -> Buf {
107        Buf { inner: s.into_bytes() }
108    }
109
110    #[inline]
111    pub fn with_capacity(capacity: usize) -> Buf {
112        Buf { inner: Vec::with_capacity(capacity) }
113    }
114
115    #[inline]
116    pub fn clear(&mut self) {
117        self.inner.clear()
118    }
119
120    #[inline]
121    pub fn capacity(&self) -> usize {
122        self.inner.capacity()
123    }
124
125    #[inline]
126    pub fn reserve(&mut self, additional: usize) {
127        self.inner.reserve(additional)
128    }
129
130    #[inline]
131    pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
132        self.inner.try_reserve(additional)
133    }
134
135    #[inline]
136    pub fn reserve_exact(&mut self, additional: usize) {
137        self.inner.reserve_exact(additional)
138    }
139
140    #[inline]
141    pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
142        self.inner.try_reserve_exact(additional)
143    }
144
145    #[inline]
146    pub fn shrink_to_fit(&mut self) {
147        self.inner.shrink_to_fit()
148    }
149
150    #[inline]
151    pub fn shrink_to(&mut self, min_capacity: usize) {
152        self.inner.shrink_to(min_capacity)
153    }
154
155    #[inline]
156    pub fn as_slice(&self) -> &Slice {
157        // SAFETY: Slice just wraps [u8],
158        // and &*self.inner is &[u8], therefore
159        // transmuting &[u8] to &Slice is safe.
160        unsafe { mem::transmute(&*self.inner) }
161    }
162
163    #[inline]
164    pub fn as_mut_slice(&mut self) -> &mut Slice {
165        // SAFETY: Slice just wraps [u8],
166        // and &mut *self.inner is &mut [u8], therefore
167        // transmuting &mut [u8] to &mut Slice is safe.
168        unsafe { mem::transmute(&mut *self.inner) }
169    }
170
171    pub fn into_string(self) -> Result<String, Buf> {
172        String::from_utf8(self.inner).map_err(|p| Buf { inner: p.into_bytes() })
173    }
174
175    pub fn push_slice(&mut self, s: &Slice) {
176        self.inner.extend_from_slice(&s.inner)
177    }
178
179    #[inline]
180    pub fn leak<'a>(self) -> &'a mut Slice {
181        unsafe { mem::transmute(self.inner.leak()) }
182    }
183
184    #[inline]
185    pub fn into_box(self) -> Box<Slice> {
186        unsafe { mem::transmute(self.inner.into_boxed_slice()) }
187    }
188
189    #[inline]
190    pub fn from_box(boxed: Box<Slice>) -> Buf {
191        let inner: Box<[u8]> = unsafe { mem::transmute(boxed) };
192        Buf { inner: inner.into_vec() }
193    }
194
195    #[inline]
196    pub fn into_arc(&self) -> Arc<Slice> {
197        self.as_slice().into_arc()
198    }
199
200    #[inline]
201    pub fn into_rc(&self) -> Rc<Slice> {
202        self.as_slice().into_rc()
203    }
204
205    /// Provides plumbing to core `Vec::truncate`.
206    /// More well behaving alternative to allowing outer types
207    /// full mutable access to the core `Vec`.
208    #[inline]
209    pub(crate) fn truncate(&mut self, len: usize) {
210        self.inner.truncate(len);
211    }
212
213    /// Provides plumbing to core `Vec::extend_from_slice`.
214    /// More well behaving alternative to allowing outer types
215    /// full mutable access to the core `Vec`.
216    #[inline]
217    pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
218        self.inner.extend_from_slice(other);
219    }
220}
221
222impl Slice {
223    #[inline]
224    pub fn as_encoded_bytes(&self) -> &[u8] {
225        &self.inner
226    }
227
228    #[inline]
229    pub unsafe fn from_encoded_bytes_unchecked(s: &[u8]) -> &Slice {
230        unsafe { mem::transmute(s) }
231    }
232
233    #[track_caller]
234    #[inline]
235    pub fn check_public_boundary(&self, index: usize) {
236        if index == 0 || index == self.inner.len() {
237            return;
238        }
239        if index < self.inner.len()
240            && (self.inner[index - 1].is_ascii() || self.inner[index].is_ascii())
241        {
242            return;
243        }
244
245        slow_path(&self.inner, index);
246
247        /// We're betting that typical splits will involve an ASCII character.
248        ///
249        /// Putting the expensive checks in a separate function generates notably
250        /// better assembly.
251        #[track_caller]
252        #[inline(never)]
253        fn slow_path(bytes: &[u8], index: usize) {
254            let (before, after) = bytes.split_at(index);
255
256            // UTF-8 takes at most 4 bytes per codepoint, so we don't
257            // need to check more than that.
258            let after = after.get(..4).unwrap_or(after);
259            match str::from_utf8(after) {
260                Ok(_) => return,
261                Err(err) if err.valid_up_to() != 0 => return,
262                Err(_) => (),
263            }
264
265            for len in 2..=4.min(index) {
266                let before = &before[index - len..];
267                if str::from_utf8(before).is_ok() {
268                    return;
269                }
270            }
271
272            panic!("byte index {index} is not an OsStr boundary");
273        }
274    }
275
276    #[inline]
277    pub fn from_str(s: &str) -> &Slice {
278        unsafe { Slice::from_encoded_bytes_unchecked(s.as_bytes()) }
279    }
280
281    pub fn to_str(&self) -> Result<&str, crate::str::Utf8Error> {
282        str::from_utf8(&self.inner)
283    }
284
285    pub fn to_string_lossy(&self) -> Cow<'_, str> {
286        String::from_utf8_lossy(&self.inner)
287    }
288
289    pub fn to_owned(&self) -> Buf {
290        Buf { inner: self.inner.to_vec() }
291    }
292
293    pub fn clone_into(&self, buf: &mut Buf) {
294        self.inner.clone_into(&mut buf.inner)
295    }
296
297    #[inline]
298    pub fn into_box(&self) -> Box<Slice> {
299        let boxed: Box<[u8]> = self.inner.into();
300        unsafe { mem::transmute(boxed) }
301    }
302
303    pub fn empty_box() -> Box<Slice> {
304        let boxed: Box<[u8]> = Default::default();
305        unsafe { mem::transmute(boxed) }
306    }
307
308    #[inline]
309    pub fn into_arc(&self) -> Arc<Slice> {
310        let arc: Arc<[u8]> = Arc::from(&self.inner);
311        unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Slice) }
312    }
313
314    #[inline]
315    pub fn into_rc(&self) -> Rc<Slice> {
316        let rc: Rc<[u8]> = Rc::from(&self.inner);
317        unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Slice) }
318    }
319
320    #[inline]
321    pub fn make_ascii_lowercase(&mut self) {
322        self.inner.make_ascii_lowercase()
323    }
324
325    #[inline]
326    pub fn make_ascii_uppercase(&mut self) {
327        self.inner.make_ascii_uppercase()
328    }
329
330    #[inline]
331    pub fn to_ascii_lowercase(&self) -> Buf {
332        Buf { inner: self.inner.to_ascii_lowercase() }
333    }
334
335    #[inline]
336    pub fn to_ascii_uppercase(&self) -> Buf {
337        Buf { inner: self.inner.to_ascii_uppercase() }
338    }
339
340    #[inline]
341    pub fn is_ascii(&self) -> bool {
342        self.inner.is_ascii()
343    }
344
345    #[inline]
346    pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
347        self.inner.eq_ignore_ascii_case(&other.inner)
348    }
349}
350
351#[unstable(feature = "clone_to_uninit", issue = "126799")]
352unsafe impl CloneToUninit for Slice {
353    #[inline]
354    #[cfg_attr(debug_assertions, track_caller)]
355    unsafe fn clone_to_uninit(&self, dst: *mut u8) {
356        // SAFETY: we're just a transparent wrapper around [u8]
357        unsafe { self.inner.clone_to_uninit(dst) }
358    }
359}