core/char/
mod.rs

1//! Utilities for the `char` primitive type.
2//!
3//! *[See also the `char` primitive type](primitive@char).*
4//!
5//! The `char` type represents a single character. More specifically, since
6//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
7//! scalar value]', which is similar to, but not the same as, a '[Unicode code
8//! point]'.
9//!
10//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
11//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
12//!
13//! This module exists for technical reasons, the primary documentation for
14//! `char` is directly on [the `char` primitive type][char] itself.
15//!
16//! This module is the home of the iterator implementations for the iterators
17//! implemented on `char`, as well as some useful constants and conversion
18//! functions that convert various types to `char`.
19
20#![allow(non_snake_case)]
21#![stable(feature = "rust1", since = "1.0.0")]
22
23mod convert;
24mod decode;
25mod methods;
26
27// stable re-exports
28#[rustfmt::skip]
29#[stable(feature = "try_from", since = "1.34.0")]
30pub use self::convert::CharTryFromError;
31#[stable(feature = "char_from_str", since = "1.20.0")]
32pub use self::convert::ParseCharError;
33#[stable(feature = "decode_utf16", since = "1.9.0")]
34pub use self::decode::{DecodeUtf16, DecodeUtf16Error};
35
36// perma-unstable re-exports
37#[rustfmt::skip]
38#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
39pub use self::methods::encode_utf16_raw; // perma-unstable
40#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
41pub use self::methods::encode_utf8_raw; // perma-unstable
42
43#[rustfmt::skip]
44use crate::ascii;
45pub(crate) use self::methods::EscapeDebugExtArgs;
46use crate::error::Error;
47use crate::escape;
48use crate::fmt::{self, Write};
49use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
50use crate::num::NonZero;
51
52// UTF-8 ranges and tags for encoding characters
53const TAG_CONT: u8 = 0b1000_0000;
54const TAG_TWO_B: u8 = 0b1100_0000;
55const TAG_THREE_B: u8 = 0b1110_0000;
56const TAG_FOUR_B: u8 = 0b1111_0000;
57const MAX_ONE_B: u32 = 0x80;
58const MAX_TWO_B: u32 = 0x800;
59const MAX_THREE_B: u32 = 0x10000;
60
61/*
62    Lu  Uppercase_Letter        an uppercase letter
63    Ll  Lowercase_Letter        a lowercase letter
64    Lt  Titlecase_Letter        a digraphic character, with first part uppercase
65    Lm  Modifier_Letter         a modifier letter
66    Lo  Other_Letter            other letters, including syllables and ideographs
67    Mn  Nonspacing_Mark         a nonspacing combining mark (zero advance width)
68    Mc  Spacing_Mark            a spacing combining mark (positive advance width)
69    Me  Enclosing_Mark          an enclosing combining mark
70    Nd  Decimal_Number          a decimal digit
71    Nl  Letter_Number           a letterlike numeric character
72    No  Other_Number            a numeric character of other type
73    Pc  Connector_Punctuation   a connecting punctuation mark, like a tie
74    Pd  Dash_Punctuation        a dash or hyphen punctuation mark
75    Ps  Open_Punctuation        an opening punctuation mark (of a pair)
76    Pe  Close_Punctuation       a closing punctuation mark (of a pair)
77    Pi  Initial_Punctuation     an initial quotation mark
78    Pf  Final_Punctuation       a final quotation mark
79    Po  Other_Punctuation       a punctuation mark of other type
80    Sm  Math_Symbol             a symbol of primarily mathematical use
81    Sc  Currency_Symbol         a currency sign
82    Sk  Modifier_Symbol         a non-letterlike modifier symbol
83    So  Other_Symbol            a symbol of other type
84    Zs  Space_Separator         a space character (of various non-zero widths)
85    Zl  Line_Separator          U+2028 LINE SEPARATOR only
86    Zp  Paragraph_Separator     U+2029 PARAGRAPH SEPARATOR only
87    Cc  Control                 a C0 or C1 control code
88    Cf  Format                  a format control character
89    Cs  Surrogate               a surrogate code point
90    Co  Private_Use             a private-use character
91    Cn  Unassigned              a reserved unassigned code point or a noncharacter
92*/
93
94/// The highest valid code point a `char` can have, `'\u{10FFFF}'`. Use [`char::MAX`] instead.
95#[stable(feature = "rust1", since = "1.0.0")]
96pub const MAX: char = char::MAX;
97
98/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
99/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
100#[stable(feature = "decode_utf16", since = "1.9.0")]
101pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
102
103/// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
104/// `char` and `str` methods are based on. Use [`char::UNICODE_VERSION`] instead.
105#[stable(feature = "unicode_version", since = "1.45.0")]
106pub const UNICODE_VERSION: (u8, u8, u8) = char::UNICODE_VERSION;
107
108/// Creates an iterator over the UTF-16 encoded code points in `iter`, returning
109/// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead.
110#[stable(feature = "decode_utf16", since = "1.9.0")]
111#[inline]
112pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
113    self::decode::decode_utf16(iter)
114}
115
116/// Converts a `u32` to a `char`. Use [`char::from_u32`] instead.
117#[stable(feature = "rust1", since = "1.0.0")]
118#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
119#[must_use]
120#[inline]
121pub const fn from_u32(i: u32) -> Option<char> {
122    self::convert::from_u32(i)
123}
124
125/// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`]
126/// instead.
127#[stable(feature = "char_from_unchecked", since = "1.5.0")]
128#[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")]
129#[must_use]
130#[inline]
131pub const unsafe fn from_u32_unchecked(i: u32) -> char {
132    // SAFETY: the safety contract must be upheld by the caller.
133    unsafe { self::convert::from_u32_unchecked(i) }
134}
135
136/// Converts a digit in the given radix to a `char`. Use [`char::from_digit`] instead.
137#[stable(feature = "rust1", since = "1.0.0")]
138#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
139#[must_use]
140#[inline]
141pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
142    self::convert::from_digit(num, radix)
143}
144
145/// Returns an iterator that yields the hexadecimal Unicode escape of a
146/// character, as `char`s.
147///
148/// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
149/// its documentation for more.
150///
151/// [`escape_unicode`]: char::escape_unicode
152#[derive(Clone, Debug)]
153#[stable(feature = "rust1", since = "1.0.0")]
154pub struct EscapeUnicode(escape::EscapeIterInner<10>);
155
156impl EscapeUnicode {
157    #[inline]
158    const fn new(c: char) -> Self {
159        Self(escape::EscapeIterInner::unicode(c))
160    }
161}
162
163#[stable(feature = "rust1", since = "1.0.0")]
164impl Iterator for EscapeUnicode {
165    type Item = char;
166
167    #[inline]
168    fn next(&mut self) -> Option<char> {
169        self.0.next().map(char::from)
170    }
171
172    #[inline]
173    fn size_hint(&self) -> (usize, Option<usize>) {
174        let n = self.0.len();
175        (n, Some(n))
176    }
177
178    #[inline]
179    fn count(self) -> usize {
180        self.0.len()
181    }
182
183    #[inline]
184    fn last(mut self) -> Option<char> {
185        self.0.next_back().map(char::from)
186    }
187
188    #[inline]
189    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
190        self.0.advance_by(n)
191    }
192}
193
194#[stable(feature = "exact_size_escape", since = "1.11.0")]
195impl ExactSizeIterator for EscapeUnicode {
196    #[inline]
197    fn len(&self) -> usize {
198        self.0.len()
199    }
200}
201
202#[stable(feature = "fused", since = "1.26.0")]
203impl FusedIterator for EscapeUnicode {}
204
205#[stable(feature = "char_struct_display", since = "1.16.0")]
206impl fmt::Display for EscapeUnicode {
207    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
208        f.write_str(self.0.as_str())
209    }
210}
211
212/// An iterator that yields the literal escape code of a `char`.
213///
214/// This `struct` is created by the [`escape_default`] method on [`char`]. See
215/// its documentation for more.
216///
217/// [`escape_default`]: char::escape_default
218#[derive(Clone, Debug)]
219#[stable(feature = "rust1", since = "1.0.0")]
220pub struct EscapeDefault(escape::EscapeIterInner<10>);
221
222impl EscapeDefault {
223    #[inline]
224    const fn printable(c: ascii::Char) -> Self {
225        Self(escape::EscapeIterInner::ascii(c.to_u8()))
226    }
227
228    #[inline]
229    const fn backslash(c: ascii::Char) -> Self {
230        Self(escape::EscapeIterInner::backslash(c))
231    }
232
233    #[inline]
234    const fn unicode(c: char) -> Self {
235        Self(escape::EscapeIterInner::unicode(c))
236    }
237}
238
239#[stable(feature = "rust1", since = "1.0.0")]
240impl Iterator for EscapeDefault {
241    type Item = char;
242
243    #[inline]
244    fn next(&mut self) -> Option<char> {
245        self.0.next().map(char::from)
246    }
247
248    #[inline]
249    fn size_hint(&self) -> (usize, Option<usize>) {
250        let n = self.0.len();
251        (n, Some(n))
252    }
253
254    #[inline]
255    fn count(self) -> usize {
256        self.0.len()
257    }
258
259    #[inline]
260    fn last(mut self) -> Option<char> {
261        self.0.next_back().map(char::from)
262    }
263
264    #[inline]
265    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
266        self.0.advance_by(n)
267    }
268}
269
270#[stable(feature = "exact_size_escape", since = "1.11.0")]
271impl ExactSizeIterator for EscapeDefault {
272    #[inline]
273    fn len(&self) -> usize {
274        self.0.len()
275    }
276}
277
278#[stable(feature = "fused", since = "1.26.0")]
279impl FusedIterator for EscapeDefault {}
280
281#[stable(feature = "char_struct_display", since = "1.16.0")]
282impl fmt::Display for EscapeDefault {
283    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
284        f.write_str(self.0.as_str())
285    }
286}
287
288/// An iterator that yields the literal escape code of a `char`.
289///
290/// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
291/// documentation for more.
292///
293/// [`escape_debug`]: char::escape_debug
294#[stable(feature = "char_escape_debug", since = "1.20.0")]
295#[derive(Clone, Debug)]
296pub struct EscapeDebug(EscapeDebugInner);
297
298#[derive(Clone, Debug)]
299// Note: It’s possible to manually encode the EscapeDebugInner inside of
300// EscapeIterInner (e.g. with alive=254..255 indicating that data[0..4] holds
301// a char) which would likely result in a more optimised code.  For now we use
302// the option easier to implement.
303enum EscapeDebugInner {
304    Bytes(escape::EscapeIterInner<10>),
305    Char(char),
306}
307
308impl EscapeDebug {
309    #[inline]
310    const fn printable(chr: char) -> Self {
311        Self(EscapeDebugInner::Char(chr))
312    }
313
314    #[inline]
315    const fn backslash(c: ascii::Char) -> Self {
316        Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::backslash(c)))
317    }
318
319    #[inline]
320    const fn unicode(c: char) -> Self {
321        Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::unicode(c)))
322    }
323
324    #[inline]
325    fn clear(&mut self) {
326        self.0 = EscapeDebugInner::Bytes(escape::EscapeIterInner::empty());
327    }
328}
329
330#[stable(feature = "char_escape_debug", since = "1.20.0")]
331impl Iterator for EscapeDebug {
332    type Item = char;
333
334    #[inline]
335    fn next(&mut self) -> Option<char> {
336        match self.0 {
337            EscapeDebugInner::Bytes(ref mut bytes) => bytes.next().map(char::from),
338            EscapeDebugInner::Char(chr) => {
339                self.clear();
340                Some(chr)
341            }
342        }
343    }
344
345    #[inline]
346    fn size_hint(&self) -> (usize, Option<usize>) {
347        let n = self.len();
348        (n, Some(n))
349    }
350
351    #[inline]
352    fn count(self) -> usize {
353        self.len()
354    }
355}
356
357#[stable(feature = "char_escape_debug", since = "1.20.0")]
358impl ExactSizeIterator for EscapeDebug {
359    fn len(&self) -> usize {
360        match &self.0 {
361            EscapeDebugInner::Bytes(bytes) => bytes.len(),
362            EscapeDebugInner::Char(_) => 1,
363        }
364    }
365}
366
367#[stable(feature = "fused", since = "1.26.0")]
368impl FusedIterator for EscapeDebug {}
369
370#[stable(feature = "char_escape_debug", since = "1.20.0")]
371impl fmt::Display for EscapeDebug {
372    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
373        match &self.0 {
374            EscapeDebugInner::Bytes(bytes) => f.write_str(bytes.as_str()),
375            EscapeDebugInner::Char(chr) => f.write_char(*chr),
376        }
377    }
378}
379
380macro_rules! casemappingiter_impls {
381    ($(#[$attr:meta])* $ITER_NAME:ident) => {
382        $(#[$attr])*
383        #[stable(feature = "rust1", since = "1.0.0")]
384        #[derive(Debug, Clone)]
385        pub struct $ITER_NAME(CaseMappingIter);
386
387        #[stable(feature = "rust1", since = "1.0.0")]
388        impl Iterator for $ITER_NAME {
389            type Item = char;
390            fn next(&mut self) -> Option<char> {
391                self.0.next()
392            }
393
394            fn size_hint(&self) -> (usize, Option<usize>) {
395                self.0.size_hint()
396            }
397
398            fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
399            where
400                Fold: FnMut(Acc, Self::Item) -> Acc,
401            {
402                self.0.fold(init, fold)
403            }
404
405            fn count(self) -> usize {
406                self.0.count()
407            }
408
409            fn last(self) -> Option<Self::Item> {
410                self.0.last()
411            }
412
413            fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
414                self.0.advance_by(n)
415            }
416
417            unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
418                // SAFETY: just forwarding requirements to caller
419                unsafe { self.0.__iterator_get_unchecked(idx) }
420            }
421        }
422
423        #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
424        impl DoubleEndedIterator for $ITER_NAME {
425            fn next_back(&mut self) -> Option<char> {
426                self.0.next_back()
427            }
428
429            fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
430            where
431                Fold: FnMut(Acc, Self::Item) -> Acc,
432            {
433                self.0.rfold(init, rfold)
434            }
435
436            fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
437                self.0.advance_back_by(n)
438            }
439        }
440
441        #[stable(feature = "fused", since = "1.26.0")]
442        impl FusedIterator for $ITER_NAME {}
443
444        #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
445        impl ExactSizeIterator for $ITER_NAME {
446            fn len(&self) -> usize {
447                self.0.len()
448            }
449
450            fn is_empty(&self) -> bool {
451                self.0.is_empty()
452            }
453        }
454
455        // SAFETY: forwards to inner `array::IntoIter`
456        #[unstable(feature = "trusted_len", issue = "37572")]
457        unsafe impl TrustedLen for $ITER_NAME {}
458
459        // SAFETY: forwards to inner `array::IntoIter`
460        #[doc(hidden)]
461        #[unstable(feature = "std_internals", issue = "none")]
462        unsafe impl TrustedRandomAccessNoCoerce for $ITER_NAME {
463            const MAY_HAVE_SIDE_EFFECT: bool = false;
464        }
465
466        // SAFETY: this iter has no subtypes/supertypes
467        #[doc(hidden)]
468        #[unstable(feature = "std_internals", issue = "none")]
469        unsafe impl TrustedRandomAccess for $ITER_NAME {}
470
471        #[stable(feature = "char_struct_display", since = "1.16.0")]
472        impl fmt::Display for $ITER_NAME {
473            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
474                fmt::Display::fmt(&self.0, f)
475            }
476        }
477    }
478}
479
480casemappingiter_impls! {
481    /// Returns an iterator that yields the lowercase equivalent of a `char`.
482    ///
483    /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
484    /// its documentation for more.
485    ///
486    /// [`to_lowercase`]: char::to_lowercase
487    ToLowercase
488}
489
490casemappingiter_impls! {
491    /// Returns an iterator that yields the uppercase equivalent of a `char`.
492    ///
493    /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
494    /// its documentation for more.
495    ///
496    /// [`to_uppercase`]: char::to_uppercase
497    ToUppercase
498}
499
500#[derive(Debug, Clone)]
501struct CaseMappingIter(core::array::IntoIter<char, 3>);
502
503impl CaseMappingIter {
504    #[inline]
505    fn new(chars: [char; 3]) -> CaseMappingIter {
506        let mut iter = chars.into_iter();
507        if chars[2] == '\0' {
508            iter.next_back();
509            if chars[1] == '\0' {
510                iter.next_back();
511
512                // Deliberately don't check `chars[0]`,
513                // as '\0' lowercases to itself
514            }
515        }
516        CaseMappingIter(iter)
517    }
518}
519
520impl Iterator for CaseMappingIter {
521    type Item = char;
522
523    fn next(&mut self) -> Option<char> {
524        self.0.next()
525    }
526
527    fn size_hint(&self) -> (usize, Option<usize>) {
528        self.0.size_hint()
529    }
530
531    fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
532    where
533        Fold: FnMut(Acc, Self::Item) -> Acc,
534    {
535        self.0.fold(init, fold)
536    }
537
538    fn count(self) -> usize {
539        self.0.count()
540    }
541
542    fn last(self) -> Option<Self::Item> {
543        self.0.last()
544    }
545
546    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
547        self.0.advance_by(n)
548    }
549
550    unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
551        // SAFETY: just forwarding requirements to caller
552        unsafe { self.0.__iterator_get_unchecked(idx) }
553    }
554}
555
556impl DoubleEndedIterator for CaseMappingIter {
557    fn next_back(&mut self) -> Option<char> {
558        self.0.next_back()
559    }
560
561    fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
562    where
563        Fold: FnMut(Acc, Self::Item) -> Acc,
564    {
565        self.0.rfold(init, rfold)
566    }
567
568    fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
569        self.0.advance_back_by(n)
570    }
571}
572
573impl ExactSizeIterator for CaseMappingIter {
574    fn len(&self) -> usize {
575        self.0.len()
576    }
577
578    fn is_empty(&self) -> bool {
579        self.0.is_empty()
580    }
581}
582
583impl FusedIterator for CaseMappingIter {}
584
585// SAFETY: forwards to inner `array::IntoIter`
586unsafe impl TrustedLen for CaseMappingIter {}
587
588// SAFETY: forwards to inner `array::IntoIter`
589unsafe impl TrustedRandomAccessNoCoerce for CaseMappingIter {
590    const MAY_HAVE_SIDE_EFFECT: bool = false;
591}
592
593// SAFETY: `CaseMappingIter` has no subtypes/supertypes
594unsafe impl TrustedRandomAccess for CaseMappingIter {}
595
596impl fmt::Display for CaseMappingIter {
597    #[inline]
598    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
599        for c in self.0.clone() {
600            f.write_char(c)?;
601        }
602        Ok(())
603    }
604}
605
606/// The error type returned when a checked char conversion fails.
607#[stable(feature = "u8_from_char", since = "1.59.0")]
608#[derive(Debug, Copy, Clone, PartialEq, Eq)]
609pub struct TryFromCharError(pub(crate) ());
610
611#[stable(feature = "u8_from_char", since = "1.59.0")]
612impl fmt::Display for TryFromCharError {
613    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
614        "unicode code point out of range".fmt(fmt)
615    }
616}
617
618#[stable(feature = "u8_from_char", since = "1.59.0")]
619impl Error for TryFromCharError {}