core/char/convert.rs
1//! Character conversions.
2
3use crate::char::TryFromCharError;
4use crate::error::Error;
5use crate::fmt;
6use crate::mem::transmute;
7use crate::str::FromStr;
8use crate::ub_checks::assert_unsafe_precondition;
9
10/// Converts a `u32` to a `char`. See [`char::from_u32`].
11#[must_use]
12#[inline]
13pub(super) const fn from_u32(i: u32) -> Option<char> {
14 // FIXME(const-hack): once Result::ok is const fn, use it here
15 match char_try_from_u32(i) {
16 Ok(c) => Some(c),
17 Err(_) => None,
18 }
19}
20
21/// Converts a `u32` to a `char`, ignoring validity. See [`char::from_u32_unchecked`].
22#[inline]
23#[must_use]
24pub(super) const unsafe fn from_u32_unchecked(i: u32) -> char {
25 // SAFETY: the caller must guarantee that `i` is a valid char value.
26 unsafe {
27 assert_unsafe_precondition!(
28 check_language_ub,
29 "invalid value for `char`",
30 (i: u32 = i) => char_try_from_u32(i).is_ok()
31 );
32 transmute(i)
33 }
34}
35
36#[stable(feature = "char_convert", since = "1.13.0")]
37impl From<char> for u32 {
38 /// Converts a [`char`] into a [`u32`].
39 ///
40 /// # Examples
41 ///
42 /// ```
43 /// let c = 'c';
44 /// let u = u32::from(c);
45 /// assert!(4 == size_of_val(&u))
46 /// ```
47 #[inline]
48 fn from(c: char) -> Self {
49 c as u32
50 }
51}
52
53#[stable(feature = "more_char_conversions", since = "1.51.0")]
54impl From<char> for u64 {
55 /// Converts a [`char`] into a [`u64`].
56 ///
57 /// # Examples
58 ///
59 /// ```
60 /// let c = '👤';
61 /// let u = u64::from(c);
62 /// assert!(8 == size_of_val(&u))
63 /// ```
64 #[inline]
65 fn from(c: char) -> Self {
66 // The char is casted to the value of the code point, then zero-extended to 64 bit.
67 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
68 c as u64
69 }
70}
71
72#[stable(feature = "more_char_conversions", since = "1.51.0")]
73impl From<char> for u128 {
74 /// Converts a [`char`] into a [`u128`].
75 ///
76 /// # Examples
77 ///
78 /// ```
79 /// let c = '⚙';
80 /// let u = u128::from(c);
81 /// assert!(16 == size_of_val(&u))
82 /// ```
83 #[inline]
84 fn from(c: char) -> Self {
85 // The char is casted to the value of the code point, then zero-extended to 128 bit.
86 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
87 c as u128
88 }
89}
90
91/// Maps a `char` with code point in U+0000..=U+00FF to a byte in 0x00..=0xFF with same value,
92/// failing if the code point is greater than U+00FF.
93///
94/// See [`impl From<u8> for char`](char#impl-From<u8>-for-char) for details on the encoding.
95#[stable(feature = "u8_from_char", since = "1.59.0")]
96impl TryFrom<char> for u8 {
97 type Error = TryFromCharError;
98
99 /// Tries to convert a [`char`] into a [`u8`].
100 ///
101 /// # Examples
102 ///
103 /// ```
104 /// let a = 'ÿ'; // U+00FF
105 /// let b = 'Ā'; // U+0100
106 /// assert_eq!(u8::try_from(a), Ok(0xFF_u8));
107 /// assert!(u8::try_from(b).is_err());
108 /// ```
109 #[inline]
110 fn try_from(c: char) -> Result<u8, Self::Error> {
111 u8::try_from(u32::from(c)).map_err(|_| TryFromCharError(()))
112 }
113}
114
115/// Maps a `char` with code point in U+0000..=U+FFFF to a `u16` in 0x0000..=0xFFFF with same value,
116/// failing if the code point is greater than U+FFFF.
117///
118/// This corresponds to the UCS-2 encoding, as specified in ISO/IEC 10646:2003.
119#[stable(feature = "u16_from_char", since = "1.74.0")]
120impl TryFrom<char> for u16 {
121 type Error = TryFromCharError;
122
123 /// Tries to convert a [`char`] into a [`u16`].
124 ///
125 /// # Examples
126 ///
127 /// ```
128 /// let trans_rights = '⚧'; // U+26A7
129 /// let ninjas = '🥷'; // U+1F977
130 /// assert_eq!(u16::try_from(trans_rights), Ok(0x26A7_u16));
131 /// assert!(u16::try_from(ninjas).is_err());
132 /// ```
133 #[inline]
134 fn try_from(c: char) -> Result<u16, Self::Error> {
135 u16::try_from(u32::from(c)).map_err(|_| TryFromCharError(()))
136 }
137}
138
139/// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF.
140///
141/// Unicode is designed such that this effectively decodes bytes
142/// with the character encoding that IANA calls ISO-8859-1.
143/// This encoding is compatible with ASCII.
144///
145/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen),
146/// which leaves some "blanks", byte values that are not assigned to any character.
147/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
148///
149/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
150/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
151/// to punctuation and various Latin characters.
152///
153/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
154/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
155/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
156/// C0 and C1 control codes.
157#[stable(feature = "char_convert", since = "1.13.0")]
158impl From<u8> for char {
159 /// Converts a [`u8`] into a [`char`].
160 ///
161 /// # Examples
162 ///
163 /// ```
164 /// let u = 32 as u8;
165 /// let c = char::from(u);
166 /// assert!(4 == size_of_val(&c))
167 /// ```
168 #[inline]
169 fn from(i: u8) -> Self {
170 i as char
171 }
172}
173
174/// An error which can be returned when parsing a char.
175///
176/// This `struct` is created when using the [`char::from_str`] method.
177#[stable(feature = "char_from_str", since = "1.20.0")]
178#[derive(Clone, Debug, PartialEq, Eq)]
179pub struct ParseCharError {
180 kind: CharErrorKind,
181}
182
183#[derive(Copy, Clone, Debug, PartialEq, Eq)]
184enum CharErrorKind {
185 EmptyString,
186 TooManyChars,
187}
188
189#[stable(feature = "char_from_str", since = "1.20.0")]
190impl Error for ParseCharError {
191 #[allow(deprecated)]
192 fn description(&self) -> &str {
193 match self.kind {
194 CharErrorKind::EmptyString => "cannot parse char from empty string",
195 CharErrorKind::TooManyChars => "too many characters in string",
196 }
197 }
198}
199
200#[stable(feature = "char_from_str", since = "1.20.0")]
201impl fmt::Display for ParseCharError {
202 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
203 #[allow(deprecated)]
204 self.description().fmt(f)
205 }
206}
207
208#[stable(feature = "char_from_str", since = "1.20.0")]
209impl FromStr for char {
210 type Err = ParseCharError;
211
212 #[inline]
213 fn from_str(s: &str) -> Result<Self, Self::Err> {
214 let mut chars = s.chars();
215 match (chars.next(), chars.next()) {
216 (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }),
217 (Some(c), None) => Ok(c),
218 _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }),
219 }
220 }
221}
222
223#[inline]
224const fn char_try_from_u32(i: u32) -> Result<char, CharTryFromError> {
225 // This is an optimized version of the check
226 // (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF),
227 // which can also be written as
228 // i >= 0x110000 || (i >= 0xD800 && i < 0xE000).
229 //
230 // The XOR with 0xD800 permutes the ranges such that 0xD800..0xE000 is
231 // mapped to 0x0000..0x0800, while keeping all the high bits outside 0xFFFF the same.
232 // In particular, numbers >= 0x110000 stay in this range.
233 //
234 // Subtracting 0x800 causes 0x0000..0x0800 to wrap, meaning that a single
235 // unsigned comparison against 0x110000 - 0x800 will detect both the wrapped
236 // surrogate range as well as the numbers originally larger than 0x110000.
237 //
238 if (i ^ 0xD800).wrapping_sub(0x800) >= 0x110000 - 0x800 {
239 Err(CharTryFromError(()))
240 } else {
241 // SAFETY: checked that it's a legal unicode value
242 Ok(unsafe { transmute(i) })
243 }
244}
245
246#[stable(feature = "try_from", since = "1.34.0")]
247impl TryFrom<u32> for char {
248 type Error = CharTryFromError;
249
250 #[inline]
251 fn try_from(i: u32) -> Result<Self, Self::Error> {
252 char_try_from_u32(i)
253 }
254}
255
256/// The error type returned when a conversion from [`prim@u32`] to [`prim@char`] fails.
257///
258/// This `struct` is created by the [`char::try_from<u32>`](char#impl-TryFrom<u32>-for-char) method.
259/// See its documentation for more.
260#[stable(feature = "try_from", since = "1.34.0")]
261#[derive(Copy, Clone, Debug, PartialEq, Eq)]
262pub struct CharTryFromError(());
263
264#[stable(feature = "try_from", since = "1.34.0")]
265impl fmt::Display for CharTryFromError {
266 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
267 "converted integer out of range for `char`".fmt(f)
268 }
269}
270
271/// Converts a digit in the given radix to a `char`. See [`char::from_digit`].
272#[inline]
273#[must_use]
274pub(super) const fn from_digit(num: u32, radix: u32) -> Option<char> {
275 if radix > 36 {
276 panic!("from_digit: radix is too high (maximum 36)");
277 }
278 if num < radix {
279 let num = num as u8;
280 if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) }
281 } else {
282 None
283 }
284}