miri/shims/
os_str.rs

1use std::borrow::Cow;
2use std::ffi::{OsStr, OsString};
3#[cfg(unix)]
4use std::os::unix::ffi::{OsStrExt, OsStringExt};
5#[cfg(windows)]
6use std::os::windows::ffi::{OsStrExt, OsStringExt};
7use std::path::{Path, PathBuf};
8
9use rustc_middle::ty::Ty;
10use rustc_middle::ty::layout::LayoutOf;
11
12use crate::*;
13
14/// Represent how path separator conversion should be done.
15pub enum PathConversion {
16    HostToTarget,
17    TargetToHost,
18}
19
20#[cfg(unix)]
21pub fn bytes_to_os_str<'tcx>(bytes: &[u8]) -> InterpResult<'tcx, &OsStr> {
22    interp_ok(OsStr::from_bytes(bytes))
23}
24#[cfg(not(unix))]
25pub fn bytes_to_os_str<'tcx>(bytes: &[u8]) -> InterpResult<'tcx, &OsStr> {
26    // We cannot use `from_encoded_bytes_unchecked` here since we can't trust `bytes`.
27    let s = std::str::from_utf8(bytes)
28        .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
29    interp_ok(OsStr::new(s))
30}
31
32impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
33pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
34    /// Helper function to read an OsString from a null-terminated sequence of bytes, which is what
35    /// the Unix APIs usually handle.
36    fn read_os_str_from_c_str<'a>(&'a self, ptr: Pointer) -> InterpResult<'tcx, &'a OsStr>
37    where
38        'tcx: 'a,
39    {
40        let this = self.eval_context_ref();
41        let bytes = this.read_c_str(ptr)?;
42        bytes_to_os_str(bytes)
43    }
44
45    /// Helper function to read an OsString from a 0x0000-terminated sequence of u16,
46    /// which is what the Windows APIs usually handle.
47    fn read_os_str_from_wide_str<'a>(&'a self, ptr: Pointer) -> InterpResult<'tcx, OsString>
48    where
49        'tcx: 'a,
50    {
51        #[cfg(windows)]
52        pub fn u16vec_to_osstring<'tcx>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
53            interp_ok(OsString::from_wide(&u16_vec[..]))
54        }
55        #[cfg(not(windows))]
56        pub fn u16vec_to_osstring<'tcx>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
57            let s = String::from_utf16(&u16_vec[..])
58                .map_err(|_| err_unsup_format!("{:?} is not a valid utf-16 string", u16_vec))?;
59            interp_ok(s.into())
60        }
61
62        let u16_vec = self.eval_context_ref().read_wide_str(ptr)?;
63        u16vec_to_osstring(u16_vec)
64    }
65
66    /// Helper function to write an OsStr as a null-terminated sequence of bytes, which is what the
67    /// Unix APIs usually handle. Returns `(success, full_len)`, where length includes the null
68    /// terminator. On failure, nothing is written.
69    fn write_os_str_to_c_str(
70        &mut self,
71        os_str: &OsStr,
72        ptr: Pointer,
73        size: u64,
74    ) -> InterpResult<'tcx, (bool, u64)> {
75        let bytes = os_str.as_encoded_bytes();
76        self.eval_context_mut().write_c_str(bytes, ptr, size)
77    }
78
79    /// Internal helper to share code between `write_os_str_to_wide_str` and
80    /// `write_os_str_to_wide_str_truncated`.
81    fn write_os_str_to_wide_str_helper(
82        &mut self,
83        os_str: &OsStr,
84        ptr: Pointer,
85        size: u64,
86        truncate: bool,
87    ) -> InterpResult<'tcx, (bool, u64)> {
88        #[cfg(windows)]
89        fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
90            interp_ok(os_str.encode_wide().collect())
91        }
92        #[cfg(not(windows))]
93        fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
94            // On non-Windows platforms the best we can do to transform Vec<u16> from/to OS strings is to do the
95            // intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
96            // valid.
97            os_str
98                .to_str()
99                .map(|s| s.encode_utf16().collect())
100                .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str))
101                .into()
102        }
103
104        let u16_vec = os_str_to_u16vec(os_str)?;
105        let (written, size_needed) = self.eval_context_mut().write_wide_str(&u16_vec, ptr, size)?;
106        if truncate && !written && size > 0 {
107            // Write the truncated part that fits.
108            let truncated_data = &u16_vec[..size.saturating_sub(1).try_into().unwrap()];
109            let (written, written_len) =
110                self.eval_context_mut().write_wide_str(truncated_data, ptr, size)?;
111            assert!(written && written_len == size);
112        }
113        interp_ok((written, size_needed))
114    }
115
116    /// Helper function to write an OsStr as a 0x0000-terminated u16-sequence, which is what the
117    /// Windows APIs usually handle. Returns `(success, full_len)`, where length is measured
118    /// in units of `u16` and includes the null terminator. On failure, nothing is written.
119    fn write_os_str_to_wide_str(
120        &mut self,
121        os_str: &OsStr,
122        ptr: Pointer,
123        size: u64,
124    ) -> InterpResult<'tcx, (bool, u64)> {
125        self.write_os_str_to_wide_str_helper(os_str, ptr, size, /*truncate*/ false)
126    }
127
128    /// Like `write_os_str_to_wide_str`, but on failure as much as possible is written into
129    /// the buffer (always with a null terminator).
130    fn write_os_str_to_wide_str_truncated(
131        &mut self,
132        os_str: &OsStr,
133        ptr: Pointer,
134        size: u64,
135    ) -> InterpResult<'tcx, (bool, u64)> {
136        self.write_os_str_to_wide_str_helper(os_str, ptr, size, /*truncate*/ true)
137    }
138
139    /// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of bytes.
140    fn alloc_os_str_as_c_str(
141        &mut self,
142        os_str: &OsStr,
143        memkind: MemoryKind,
144    ) -> InterpResult<'tcx, Pointer> {
145        let size = u64::try_from(os_str.len()).unwrap().strict_add(1); // Make space for `0` terminator.
146        let this = self.eval_context_mut();
147
148        let arg_type = Ty::new_array(this.tcx.tcx, this.tcx.types.u8, size);
149        let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind)?;
150        let (written, _) = self.write_os_str_to_c_str(os_str, arg_place.ptr(), size).unwrap();
151        assert!(written);
152        interp_ok(arg_place.ptr())
153    }
154
155    /// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of `u16`.
156    fn alloc_os_str_as_wide_str(
157        &mut self,
158        os_str: &OsStr,
159        memkind: MemoryKind,
160    ) -> InterpResult<'tcx, Pointer> {
161        let size = u64::try_from(os_str.len()).unwrap().strict_add(1); // Make space for `0x0000` terminator.
162        let this = self.eval_context_mut();
163
164        let arg_type = Ty::new_array(this.tcx.tcx, this.tcx.types.u16, size);
165        let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind)?;
166        let (written, _) = self.write_os_str_to_wide_str(os_str, arg_place.ptr(), size).unwrap();
167        assert!(written);
168        interp_ok(arg_place.ptr())
169    }
170
171    /// Read a null-terminated sequence of bytes, and perform path separator conversion if needed.
172    fn read_path_from_c_str<'a>(&'a self, ptr: Pointer) -> InterpResult<'tcx, Cow<'a, Path>>
173    where
174        'tcx: 'a,
175    {
176        let this = self.eval_context_ref();
177        let os_str = this.read_os_str_from_c_str(ptr)?;
178
179        interp_ok(match this.convert_path(Cow::Borrowed(os_str), PathConversion::TargetToHost) {
180            Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
181            Cow::Owned(y) => Cow::Owned(PathBuf::from(y)),
182        })
183    }
184
185    /// Read a null-terminated sequence of `u16`s, and perform path separator conversion if needed.
186    fn read_path_from_wide_str(&self, ptr: Pointer) -> InterpResult<'tcx, PathBuf> {
187        let this = self.eval_context_ref();
188        let os_str = this.read_os_str_from_wide_str(ptr)?;
189
190        interp_ok(
191            this.convert_path(Cow::Owned(os_str), PathConversion::TargetToHost).into_owned().into(),
192        )
193    }
194
195    /// Write a Path to the machine memory (as a null-terminated sequence of bytes),
196    /// adjusting path separators if needed.
197    fn write_path_to_c_str(
198        &mut self,
199        path: &Path,
200        ptr: Pointer,
201        size: u64,
202    ) -> InterpResult<'tcx, (bool, u64)> {
203        let this = self.eval_context_mut();
204        let os_str =
205            this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
206        this.write_os_str_to_c_str(&os_str, ptr, size)
207    }
208
209    /// Write a Path to the machine memory (as a null-terminated sequence of `u16`s),
210    /// adjusting path separators if needed.
211    fn write_path_to_wide_str(
212        &mut self,
213        path: &Path,
214        ptr: Pointer,
215        size: u64,
216    ) -> InterpResult<'tcx, (bool, u64)> {
217        let this = self.eval_context_mut();
218        let os_str =
219            this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
220        this.write_os_str_to_wide_str(&os_str, ptr, size)
221    }
222
223    /// Write a Path to the machine memory (as a null-terminated sequence of `u16`s),
224    /// adjusting path separators if needed.
225    fn write_path_to_wide_str_truncated(
226        &mut self,
227        path: &Path,
228        ptr: Pointer,
229        size: u64,
230    ) -> InterpResult<'tcx, (bool, u64)> {
231        let this = self.eval_context_mut();
232        let os_str =
233            this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
234        this.write_os_str_to_wide_str_truncated(&os_str, ptr, size)
235    }
236
237    /// Allocate enough memory to store a Path as a null-terminated sequence of bytes,
238    /// adjusting path separators if needed.
239    fn alloc_path_as_c_str(
240        &mut self,
241        path: &Path,
242        memkind: MemoryKind,
243    ) -> InterpResult<'tcx, Pointer> {
244        let this = self.eval_context_mut();
245        let os_str =
246            this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
247        this.alloc_os_str_as_c_str(&os_str, memkind)
248    }
249
250    /// Allocate enough memory to store a Path as a null-terminated sequence of `u16`s,
251    /// adjusting path separators if needed.
252    fn alloc_path_as_wide_str(
253        &mut self,
254        path: &Path,
255        memkind: MemoryKind,
256    ) -> InterpResult<'tcx, Pointer> {
257        let this = self.eval_context_mut();
258        let os_str =
259            this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
260        this.alloc_os_str_as_wide_str(&os_str, memkind)
261    }
262
263    fn convert_path<'a>(
264        &self,
265        os_str: Cow<'a, OsStr>,
266        direction: PathConversion,
267    ) -> Cow<'a, OsStr> {
268        let this = self.eval_context_ref();
269        let target_os = &this.tcx.sess.target.os;
270
271        /// Adjust a Windows path to Unix conventions such that it un-does everything that
272        /// `unix_to_windows` did, and such that if the Windows input path was absolute, then the
273        /// Unix output path is absolute.
274        fn windows_to_unix<T>(path: &mut Vec<T>)
275        where
276            T: From<u8> + Copy + Eq,
277        {
278            let sep = T::from(b'/');
279            // Make sure all path separators are `/`.
280            for c in path.iter_mut() {
281                if *c == b'\\'.into() {
282                    *c = sep;
283                }
284            }
285            // If this starts with `//?/`, it was probably produced by `unix_to_windows`` and we
286            // remove the `//?` that got added to get the Unix path back out.
287            if path.get(0..4) == Some(&[sep, sep, b'?'.into(), sep]) {
288                // Remove first 3 characters. It still starts with `/` so it is absolute on Unix.
289                path.splice(0..3, std::iter::empty());
290            }
291            // If it starts with a drive letter (`X:/`), convert it to an absolute Unix path.
292            else if path.get(1..3) == Some(&[b':'.into(), sep]) {
293                // We add a `/` at the beginning, to store the absolute Windows
294                // path in something that looks like an absolute Unix path.
295                path.insert(0, sep);
296            }
297        }
298
299        /// Adjust a Unix path to Windows conventions such that it un-does everything that
300        /// `windows_to_unix` did, and such that if the Unix input path was absolute, then the
301        /// Windows output path is absolute.
302        fn unix_to_windows<T>(path: &mut Vec<T>)
303        where
304            T: From<u8> + Copy + Eq,
305        {
306            let sep = T::from(b'\\');
307            // Make sure all path separators are `\`.
308            for c in path.iter_mut() {
309                if *c == b'/'.into() {
310                    *c = sep;
311                }
312            }
313            // If the path is `\X:\`, the leading separator was probably added by `windows_to_unix`
314            // and we should get rid of it again.
315            if path.get(2..4) == Some(&[b':'.into(), sep]) && path[0] == sep {
316                // The new path is still absolute on Windows.
317                path.remove(0);
318            }
319            // If this starts withs a `\` but not a `\\`, then this was absolute on Unix but is
320            // relative on Windows (relative to "the root of the current directory", e.g. the
321            // drive letter).
322            else if path.first() == Some(&sep) && path.get(1) != Some(&sep) {
323                // We add `\\?` so it starts with `\\?\` which is some magic path on Windows
324                // that *is* considered absolute. This way we store the absolute Unix path
325                // in something that looks like an absolute Windows path.
326                path.splice(0..0, [sep, sep, b'?'.into()]);
327            }
328        }
329
330        // Below we assume that everything non-Windows works like Unix, at least
331        // when it comes to file system path conventions.
332        #[cfg(windows)]
333        return if target_os == "windows" {
334            // Windows-on-Windows, all fine.
335            os_str
336        } else {
337            // Unix target, Windows host.
338            let mut path: Vec<u16> = os_str.encode_wide().collect();
339            match direction {
340                PathConversion::HostToTarget => {
341                    windows_to_unix(&mut path);
342                }
343                PathConversion::TargetToHost => {
344                    unix_to_windows(&mut path);
345                }
346            }
347            Cow::Owned(OsString::from_wide(&path))
348        };
349        #[cfg(unix)]
350        return if target_os == "windows" {
351            // Windows target, Unix host.
352            let mut path: Vec<u8> = os_str.into_owned().into_encoded_bytes();
353            match direction {
354                PathConversion::HostToTarget => {
355                    unix_to_windows(&mut path);
356                }
357                PathConversion::TargetToHost => {
358                    windows_to_unix(&mut path);
359                }
360            }
361            Cow::Owned(OsString::from_vec(path))
362        } else {
363            // Unix-on-Unix, all is fine.
364            os_str
365        };
366    }
367}