std/os/windows/
ffi.rs

1//! Windows-specific extensions to primitives in the [`std::ffi`] module.
2//!
3//! # Overview
4//!
5//! For historical reasons, the Windows API uses a form of potentially
6//! ill-formed UTF-16 encoding for strings. Specifically, the 16-bit
7//! code units in Windows strings may contain [isolated surrogate code
8//! points which are not paired together][ill-formed-utf-16]. The
9//! Unicode standard requires that surrogate code points (those in the
10//! range U+D800 to U+DFFF) always be *paired*, because in the UTF-16
11//! encoding a *surrogate code unit pair* is used to encode a single
12//! character. For compatibility with code that does not enforce
13//! these pairings, Windows does not enforce them, either.
14//!
15//! While it is not always possible to convert such a string losslessly into
16//! a valid UTF-16 string (or even UTF-8), it is often desirable to be
17//! able to round-trip such a string from and to Windows APIs
18//! losslessly. For example, some Rust code may be "bridging" some
19//! Windows APIs together, just passing `WCHAR` strings among those
20//! APIs without ever really looking into the strings.
21//!
22//! If Rust code *does* need to look into those strings, it can
23//! convert them to valid UTF-8, possibly lossily, by substituting
24//! invalid sequences with [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD], as is
25//! conventionally done in other Rust APIs that deal with string
26//! encodings.
27//!
28//! # `OsStringExt` and `OsStrExt`
29//!
30//! [`OsString`] is the Rust wrapper for owned strings in the
31//! preferred representation of the operating system. On Windows,
32//! this struct gets augmented with an implementation of the
33//! [`OsStringExt`] trait, which has an [`OsStringExt::from_wide`] method. This
34//! lets you create an [`OsString`] from a `&[u16]` slice; presumably
35//! you get such a slice out of a `WCHAR` Windows API.
36//!
37//! Similarly, [`OsStr`] is the Rust wrapper for borrowed strings from
38//! preferred representation of the operating system. On Windows, the
39//! [`OsStrExt`] trait provides the [`OsStrExt::encode_wide`] method, which
40//! outputs an [`EncodeWide`] iterator. You can [`collect`] this
41//! iterator, for example, to obtain a `Vec<u16>`; you can later get a
42//! pointer to this vector's contents and feed it to Windows APIs.
43//!
44//! These traits, along with [`OsString`] and [`OsStr`], work in
45//! conjunction so that it is possible to **round-trip** strings from
46//! Windows and back, with no loss of data, even if the strings are
47//! ill-formed UTF-16.
48//!
49//! [ill-formed-utf-16]: https://simonsapin.github.io/wtf-8/#ill-formed-utf-16
50//! [`collect`]: crate::iter::Iterator::collect
51//! [U+FFFD]: crate::char::REPLACEMENT_CHARACTER
52//! [`std::ffi`]: crate::ffi
53
54#![stable(feature = "rust1", since = "1.0.0")]
55
56use alloc::wtf8::Wtf8Buf;
57
58use crate::ffi::{OsStr, OsString};
59use crate::fmt;
60use crate::iter::FusedIterator;
61use crate::sealed::Sealed;
62use crate::sys::os_str::Buf;
63use crate::sys_common::{AsInner, FromInner};
64
65/// Windows-specific extensions to [`OsString`].
66///
67/// This trait is sealed: it cannot be implemented outside the standard library.
68/// This is so that future additional methods are not breaking changes.
69#[stable(feature = "rust1", since = "1.0.0")]
70pub trait OsStringExt: Sealed {
71    /// Creates an `OsString` from a potentially ill-formed UTF-16 slice of
72    /// 16-bit code units.
73    ///
74    /// This is lossless: calling [`OsStrExt::encode_wide`] on the resulting string
75    /// will always return the original code units.
76    ///
77    /// # Examples
78    ///
79    /// ```
80    /// use std::ffi::OsString;
81    /// use std::os::windows::prelude::*;
82    ///
83    /// // UTF-16 encoding for "Unicode".
84    /// let source = [0x0055, 0x006E, 0x0069, 0x0063, 0x006F, 0x0064, 0x0065];
85    ///
86    /// let string = OsString::from_wide(&source[..]);
87    /// ```
88    #[stable(feature = "rust1", since = "1.0.0")]
89    fn from_wide(wide: &[u16]) -> Self;
90}
91
92#[stable(feature = "rust1", since = "1.0.0")]
93impl OsStringExt for OsString {
94    fn from_wide(wide: &[u16]) -> OsString {
95        FromInner::from_inner(Buf { inner: Wtf8Buf::from_wide(wide) })
96    }
97}
98
99/// Windows-specific extensions to [`OsStr`].
100///
101/// This trait is sealed: it cannot be implemented outside the standard library.
102/// This is so that future additional methods are not breaking changes.
103#[stable(feature = "rust1", since = "1.0.0")]
104pub trait OsStrExt: Sealed {
105    /// Re-encodes an `OsStr` as a wide character sequence, i.e., potentially
106    /// ill-formed UTF-16.
107    ///
108    /// This is lossless: calling [`OsStringExt::from_wide`] and then
109    /// `encode_wide` on the result will yield the original code units.
110    /// Note that the encoding does not add a final null terminator.
111    ///
112    /// # Examples
113    ///
114    /// ```
115    /// use std::ffi::OsString;
116    /// use std::os::windows::prelude::*;
117    ///
118    /// // UTF-16 encoding for "Unicode".
119    /// let source = [0x0055, 0x006E, 0x0069, 0x0063, 0x006F, 0x0064, 0x0065];
120    ///
121    /// let string = OsString::from_wide(&source[..]);
122    ///
123    /// let result: Vec<u16> = string.encode_wide().collect();
124    /// assert_eq!(&source[..], &result[..]);
125    /// ```
126    #[stable(feature = "rust1", since = "1.0.0")]
127    fn encode_wide(&self) -> EncodeWide<'_>;
128}
129
130#[stable(feature = "rust1", since = "1.0.0")]
131impl OsStrExt for OsStr {
132    #[inline]
133    fn encode_wide(&self) -> EncodeWide<'_> {
134        EncodeWide { inner: self.as_inner().inner.encode_wide() }
135    }
136}
137
138/// Iterator returned by [`OsStrExt::encode_wide`].
139#[stable(feature = "rust1", since = "1.0.0")]
140#[derive(Clone)]
141pub struct EncodeWide<'a> {
142    inner: alloc::wtf8::EncodeWide<'a>,
143}
144#[stable(feature = "encode_wide_debug", since = "CURRENT_RUSTC_VERSION")]
145impl fmt::Debug for EncodeWide<'_> {
146    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
147        fmt::Debug::fmt(&self.inner, f)
148    }
149}
150#[stable(feature = "rust1", since = "1.0.0")]
151impl Iterator for EncodeWide<'_> {
152    type Item = u16;
153
154    #[inline]
155    fn next(&mut self) -> Option<u16> {
156        self.inner.next()
157    }
158
159    #[inline]
160    fn size_hint(&self) -> (usize, Option<usize>) {
161        self.inner.size_hint()
162    }
163}
164#[stable(feature = "encode_wide_fused_iterator", since = "1.62.0")]
165impl FusedIterator for EncodeWide<'_> {}