Skip to main content

core/char/
mod.rs

1//! Utilities for the `char` primitive type.
2//!
3//! *[See also the `char` primitive type](primitive@char).*
4//!
5//! The `char` type represents a single character. More specifically, since
6//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
7//! scalar value]', which is similar to, but not the same as, a '[Unicode code
8//! point]'.
9//!
10//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
11//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
12//!
13//! This module exists for technical reasons, the primary documentation for
14//! `char` is directly on [the `char` primitive type][char] itself.
15//!
16//! This module is the home of the iterator implementations for the iterators
17//! implemented on `char`, as well as some useful constants and conversion
18//! functions that convert various types to `char`.
19
20#![allow(non_snake_case)]
21#![stable(feature = "rust1", since = "1.0.0")]
22
23mod convert;
24mod decode;
25mod methods;
26
27// stable re-exports
28#[rustfmt::skip]
29#[stable(feature = "try_from", since = "1.34.0")]
30pub use self::convert::CharTryFromError;
31#[stable(feature = "char_from_str", since = "1.20.0")]
32pub use self::convert::ParseCharError;
33#[stable(feature = "decode_utf16", since = "1.9.0")]
34pub use self::decode::{DecodeUtf16, DecodeUtf16Error};
35
36// perma-unstable re-exports
37#[rustfmt::skip]
38#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
39pub use self::methods::encode_utf16_raw; // perma-unstable
40#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
41pub use self::methods::{encode_utf8_raw, encode_utf8_raw_unchecked}; // perma-unstable
42
43#[rustfmt::skip]
44use crate::ascii;
45pub(crate) use self::methods::EscapeDebugExtArgs;
46use crate::error::Error;
47use crate::escape::{AlwaysEscaped, EscapeIterInner, MaybeEscaped};
48use crate::fmt::{self, Write};
49use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
50use crate::num::NonZero;
51
52// UTF-8 ranges and tags for encoding characters
53const TAG_CONT: u8 = 0b1000_0000;
54const TAG_TWO_B: u8 = 0b1100_0000;
55const TAG_THREE_B: u8 = 0b1110_0000;
56const TAG_FOUR_B: u8 = 0b1111_0000;
57const MAX_ONE_B: u32 = 0x80;
58const MAX_TWO_B: u32 = 0x800;
59const MAX_THREE_B: u32 = 0x10000;
60
61/*
62    Lu  Uppercase_Letter        an uppercase letter
63    Ll  Lowercase_Letter        a lowercase letter
64    Lt  Titlecase_Letter        a digraphic character, with first part uppercase
65    Lm  Modifier_Letter         a modifier letter
66    Lo  Other_Letter            other letters, including syllables and ideographs
67    Mn  Nonspacing_Mark         a nonspacing combining mark (zero advance width)
68    Mc  Spacing_Mark            a spacing combining mark (positive advance width)
69    Me  Enclosing_Mark          an enclosing combining mark
70    Nd  Decimal_Number          a decimal digit
71    Nl  Letter_Number           a letterlike numeric character
72    No  Other_Number            a numeric character of other type
73    Pc  Connector_Punctuation   a connecting punctuation mark, like a tie
74    Pd  Dash_Punctuation        a dash or hyphen punctuation mark
75    Ps  Open_Punctuation        an opening punctuation mark (of a pair)
76    Pe  Close_Punctuation       a closing punctuation mark (of a pair)
77    Pi  Initial_Punctuation     an initial quotation mark
78    Pf  Final_Punctuation       a final quotation mark
79    Po  Other_Punctuation       a punctuation mark of other type
80    Sm  Math_Symbol             a symbol of primarily mathematical use
81    Sc  Currency_Symbol         a currency sign
82    Sk  Modifier_Symbol         a non-letterlike modifier symbol
83    So  Other_Symbol            a symbol of other type
84    Zs  Space_Separator         a space character (of various non-zero widths)
85    Zl  Line_Separator          U+2028 LINE SEPARATOR only
86    Zp  Paragraph_Separator     U+2029 PARAGRAPH SEPARATOR only
87    Cc  Control                 a C0 or C1 control code
88    Cf  Format                  a format control character
89    Cs  Surrogate               a surrogate code point
90    Co  Private_Use             a private-use character
91    Cn  Unassigned              a reserved unassigned code point or a noncharacter
92*/
93
94/// The highest valid code point a `char` can have, `'\u{10FFFF}'`. Use [`char::MAX`] instead.
95#[stable(feature = "rust1", since = "1.0.0")]
96#[deprecated(since = "TBD", note = "replaced by the `MAX` associated constant on `char`")]
97pub const MAX: char = char::MAX;
98
99/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
100/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
101#[stable(feature = "decode_utf16", since = "1.9.0")]
102#[deprecated(
103    since = "TBD",
104    note = "replaced by the `REPLACEMENT_CHARACTER` associated constant on `char`"
105)]
106pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
107
108/// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
109/// `char` and `str` methods are based on. Use [`char::UNICODE_VERSION`] instead.
110#[stable(feature = "unicode_version", since = "1.45.0")]
111#[deprecated(
112    since = "TBD",
113    note = "replaced by the `UNICODE_VERSION` associated constant on `char`"
114)]
115pub const UNICODE_VERSION: (u8, u8, u8) = char::UNICODE_VERSION;
116
117/// Creates an iterator over the UTF-16 encoded code points in `iter`, returning
118/// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead.
119#[stable(feature = "decode_utf16", since = "1.9.0")]
120#[deprecated(since = "TBD", note = "replaced by the `decode_utf16` method on `char`")]
121#[inline]
122pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
123    self::decode::decode_utf16(iter)
124}
125
126/// Converts a `u32` to a `char`. Use [`char::from_u32`] instead.
127#[stable(feature = "rust1", since = "1.0.0")]
128#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
129#[deprecated(since = "TBD", note = "replaced by the `from_u32` method on `char`")]
130#[must_use]
131#[inline]
132pub const fn from_u32(i: u32) -> Option<char> {
133    self::convert::from_u32(i)
134}
135
136/// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`]
137/// instead.
138#[stable(feature = "char_from_unchecked", since = "1.5.0")]
139#[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")]
140#[deprecated(since = "TBD", note = "replaced by the `from_u32_unchecked` method on `char`")]
141#[must_use]
142#[inline]
143pub const unsafe fn from_u32_unchecked(i: u32) -> char {
144    // SAFETY: the safety contract must be upheld by the caller.
145    unsafe { self::convert::from_u32_unchecked(i) }
146}
147
148/// Converts a digit in the given radix to a `char`. Use [`char::from_digit`] instead.
149#[stable(feature = "rust1", since = "1.0.0")]
150#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
151#[deprecated(since = "TBD", note = "replaced by the `from_digit` method on `char`")]
152#[must_use]
153#[inline]
154pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
155    self::convert::from_digit(num, radix)
156}
157
158/// Returns an iterator that yields the hexadecimal Unicode escape of a
159/// character, as `char`s.
160///
161/// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
162/// its documentation for more.
163///
164/// [`escape_unicode`]: char::escape_unicode
165#[derive(Clone, Debug)]
166#[stable(feature = "rust1", since = "1.0.0")]
167pub struct EscapeUnicode(EscapeIterInner<10, AlwaysEscaped>);
168
169impl EscapeUnicode {
170    #[inline]
171    const fn new(c: char) -> Self {
172        Self(EscapeIterInner::unicode(c))
173    }
174}
175
176#[stable(feature = "rust1", since = "1.0.0")]
177impl Iterator for EscapeUnicode {
178    type Item = char;
179
180    #[inline]
181    fn next(&mut self) -> Option<char> {
182        self.0.next().map(char::from)
183    }
184
185    #[inline]
186    fn size_hint(&self) -> (usize, Option<usize>) {
187        let n = self.0.len();
188        (n, Some(n))
189    }
190
191    #[inline]
192    fn count(self) -> usize {
193        self.0.len()
194    }
195
196    #[inline]
197    fn last(mut self) -> Option<char> {
198        self.0.next_back().map(char::from)
199    }
200
201    #[inline]
202    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
203        self.0.advance_by(n)
204    }
205}
206
207#[stable(feature = "exact_size_escape", since = "1.11.0")]
208impl ExactSizeIterator for EscapeUnicode {
209    #[inline]
210    fn len(&self) -> usize {
211        self.0.len()
212    }
213}
214
215#[stable(feature = "fused", since = "1.26.0")]
216impl FusedIterator for EscapeUnicode {}
217
218#[stable(feature = "char_struct_display", since = "1.16.0")]
219impl fmt::Display for EscapeUnicode {
220    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
221        fmt::Display::fmt(&self.0, f)
222    }
223}
224
225/// An iterator that yields the literal escape code of a `char`.
226///
227/// This `struct` is created by the [`escape_default`] method on [`char`]. See
228/// its documentation for more.
229///
230/// [`escape_default`]: char::escape_default
231#[derive(Clone, Debug)]
232#[stable(feature = "rust1", since = "1.0.0")]
233pub struct EscapeDefault(EscapeIterInner<10, AlwaysEscaped>);
234
235impl EscapeDefault {
236    #[inline]
237    const fn printable(c: ascii::Char) -> Self {
238        Self(EscapeIterInner::ascii(c.to_u8()))
239    }
240
241    #[inline]
242    const fn backslash(c: ascii::Char) -> Self {
243        Self(EscapeIterInner::backslash(c))
244    }
245
246    #[inline]
247    const fn unicode(c: char) -> Self {
248        Self(EscapeIterInner::unicode(c))
249    }
250}
251
252#[stable(feature = "rust1", since = "1.0.0")]
253impl Iterator for EscapeDefault {
254    type Item = char;
255
256    #[inline]
257    fn next(&mut self) -> Option<char> {
258        self.0.next().map(char::from)
259    }
260
261    #[inline]
262    fn size_hint(&self) -> (usize, Option<usize>) {
263        let n = self.0.len();
264        (n, Some(n))
265    }
266
267    #[inline]
268    fn count(self) -> usize {
269        self.0.len()
270    }
271
272    #[inline]
273    fn last(mut self) -> Option<char> {
274        self.0.next_back().map(char::from)
275    }
276
277    #[inline]
278    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
279        self.0.advance_by(n)
280    }
281}
282
283#[stable(feature = "exact_size_escape", since = "1.11.0")]
284impl ExactSizeIterator for EscapeDefault {
285    #[inline]
286    fn len(&self) -> usize {
287        self.0.len()
288    }
289}
290
291#[stable(feature = "fused", since = "1.26.0")]
292impl FusedIterator for EscapeDefault {}
293
294#[stable(feature = "char_struct_display", since = "1.16.0")]
295impl fmt::Display for EscapeDefault {
296    #[inline]
297    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
298        fmt::Display::fmt(&self.0, f)
299    }
300}
301
302/// An iterator that yields the literal escape code of a `char`.
303///
304/// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
305/// documentation for more.
306///
307/// [`escape_debug`]: char::escape_debug
308#[stable(feature = "char_escape_debug", since = "1.20.0")]
309#[derive(Clone, Debug)]
310pub struct EscapeDebug(EscapeIterInner<10, MaybeEscaped>);
311
312impl EscapeDebug {
313    #[inline]
314    const fn printable(chr: char) -> Self {
315        Self(EscapeIterInner::printable(chr))
316    }
317
318    #[inline]
319    const fn backslash(c: ascii::Char) -> Self {
320        Self(EscapeIterInner::backslash(c))
321    }
322
323    #[inline]
324    const fn unicode(c: char) -> Self {
325        Self(EscapeIterInner::unicode(c))
326    }
327}
328
329#[stable(feature = "char_escape_debug", since = "1.20.0")]
330impl Iterator for EscapeDebug {
331    type Item = char;
332
333    #[inline]
334    fn next(&mut self) -> Option<char> {
335        self.0.next()
336    }
337
338    #[inline]
339    fn size_hint(&self) -> (usize, Option<usize>) {
340        let n = self.len();
341        (n, Some(n))
342    }
343
344    #[inline]
345    fn count(self) -> usize {
346        self.len()
347    }
348}
349
350#[stable(feature = "char_escape_debug", since = "1.20.0")]
351impl ExactSizeIterator for EscapeDebug {
352    fn len(&self) -> usize {
353        self.0.len()
354    }
355}
356
357#[stable(feature = "fused", since = "1.26.0")]
358impl FusedIterator for EscapeDebug {}
359
360#[stable(feature = "char_escape_debug", since = "1.20.0")]
361impl fmt::Display for EscapeDebug {
362    #[inline]
363    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
364        fmt::Display::fmt(&self.0, f)
365    }
366}
367
368macro_rules! casemappingiter_impls {
369    (
370        #[$stab:meta]
371        #[$dendstab:meta]
372        #[$fusedstab:meta]
373        #[$exactstab:meta]
374        #[$displaystab:meta]
375        $(#[$attr:meta])*
376        $ITER_NAME:ident
377    ) => {
378        $(#[$attr])*
379        #[$stab]
380        #[derive(Debug, Clone)]
381        pub struct $ITER_NAME(CaseMappingIter);
382
383        #[$stab]
384        impl Iterator for $ITER_NAME {
385            type Item = char;
386            fn next(&mut self) -> Option<char> {
387                self.0.next()
388            }
389
390            fn size_hint(&self) -> (usize, Option<usize>) {
391                self.0.size_hint()
392            }
393
394            fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
395            where
396                Fold: FnMut(Acc, Self::Item) -> Acc,
397            {
398                self.0.fold(init, fold)
399            }
400
401            fn count(self) -> usize {
402                self.0.count()
403            }
404
405            fn last(self) -> Option<Self::Item> {
406                self.0.last()
407            }
408
409            fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
410                self.0.advance_by(n)
411            }
412
413            unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
414                // SAFETY: just forwarding requirements to caller
415                unsafe { self.0.__iterator_get_unchecked(idx) }
416            }
417        }
418
419        #[$dendstab]
420        impl DoubleEndedIterator for $ITER_NAME {
421            fn next_back(&mut self) -> Option<char> {
422                self.0.next_back()
423            }
424
425            fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
426            where
427                Fold: FnMut(Acc, Self::Item) -> Acc,
428            {
429                self.0.rfold(init, rfold)
430            }
431
432            fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
433                self.0.advance_back_by(n)
434            }
435        }
436
437        #[$fusedstab]
438        impl FusedIterator for $ITER_NAME {}
439
440        #[$exactstab]
441        impl ExactSizeIterator for $ITER_NAME {
442            fn len(&self) -> usize {
443                self.0.len()
444            }
445
446            fn is_empty(&self) -> bool {
447                self.0.is_empty()
448            }
449        }
450
451        // SAFETY: forwards to inner `array::IntoIter`
452        #[unstable(feature = "trusted_len", issue = "37572")]
453        unsafe impl TrustedLen for $ITER_NAME {}
454
455        // SAFETY: forwards to inner `array::IntoIter`
456        #[doc(hidden)]
457        #[unstable(feature = "std_internals", issue = "none")]
458        unsafe impl TrustedRandomAccessNoCoerce for $ITER_NAME {
459            const MAY_HAVE_SIDE_EFFECT: bool = false;
460        }
461
462        // SAFETY: this iter has no subtypes/supertypes
463        #[doc(hidden)]
464        #[unstable(feature = "std_internals", issue = "none")]
465        unsafe impl TrustedRandomAccess for $ITER_NAME {}
466
467        #[$displaystab]
468        impl fmt::Display for $ITER_NAME {
469            #[inline]
470            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
471                fmt::Display::fmt(&self.0, f)
472            }
473        }
474    }
475}
476
477casemappingiter_impls! {
478    #[stable(feature = "rust1", since = "1.0.0")]
479    #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
480    #[stable(feature = "fused", since = "1.26.0")]
481    #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
482    #[stable(feature = "char_struct_display", since = "1.16.0")]
483    /// Returns an iterator that yields the uppercase equivalent of a `char`.
484    ///
485    /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
486    /// its documentation for more.
487    ///
488    /// [`to_uppercase`]: char::to_uppercase
489    ToUppercase
490}
491
492casemappingiter_impls! {
493    #[unstable(feature = "titlecase", issue = "153892")]
494    #[unstable(feature = "titlecase", issue = "153892")]
495    #[unstable(feature = "titlecase", issue = "153892")]
496    #[unstable(feature = "titlecase", issue = "153892")]
497    #[unstable(feature = "titlecase", issue = "153892")]
498    /// Returns an iterator that yields the titlecase equivalent of a `char`.
499    ///
500    /// This `struct` is created by the [`to_titlecase`] method on [`char`]. See
501    /// its documentation for more.
502    ///
503    /// [`to_titlecase`]: char::to_titlecase
504    ToTitlecase
505}
506
507casemappingiter_impls! {
508    #[stable(feature = "rust1", since = "1.0.0")]
509    #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
510    #[stable(feature = "fused", since = "1.26.0")]
511    #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
512    #[stable(feature = "char_struct_display", since = "1.16.0")]
513    /// Returns an iterator that yields the lowercase equivalent of a `char`.
514    ///
515    /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
516    /// its documentation for more.
517    ///
518    /// [`to_lowercase`]: char::to_lowercase
519    ToLowercase
520}
521
522#[derive(Debug, Clone)]
523struct CaseMappingIter(core::array::IntoIter<char, 3>);
524
525impl CaseMappingIter {
526    #[inline]
527    fn new(chars: [char; 3]) -> CaseMappingIter {
528        let mut iter = chars.into_iter();
529        if chars[2] == '\0' {
530            iter.next_back();
531            if chars[1] == '\0' {
532                iter.next_back();
533
534                // Deliberately don't check `chars[0]`,
535                // as '\0' lowercases to itself
536            }
537        }
538        CaseMappingIter(iter)
539    }
540}
541
542impl Iterator for CaseMappingIter {
543    type Item = char;
544
545    fn next(&mut self) -> Option<char> {
546        self.0.next()
547    }
548
549    fn size_hint(&self) -> (usize, Option<usize>) {
550        self.0.size_hint()
551    }
552
553    fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
554    where
555        Fold: FnMut(Acc, Self::Item) -> Acc,
556    {
557        self.0.fold(init, fold)
558    }
559
560    fn count(self) -> usize {
561        self.0.count()
562    }
563
564    fn last(self) -> Option<Self::Item> {
565        self.0.last()
566    }
567
568    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
569        self.0.advance_by(n)
570    }
571
572    unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
573        // SAFETY: just forwarding requirements to caller
574        unsafe { self.0.__iterator_get_unchecked(idx) }
575    }
576}
577
578impl DoubleEndedIterator for CaseMappingIter {
579    fn next_back(&mut self) -> Option<char> {
580        self.0.next_back()
581    }
582
583    fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
584    where
585        Fold: FnMut(Acc, Self::Item) -> Acc,
586    {
587        self.0.rfold(init, rfold)
588    }
589
590    fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
591        self.0.advance_back_by(n)
592    }
593}
594
595impl ExactSizeIterator for CaseMappingIter {
596    fn len(&self) -> usize {
597        self.0.len()
598    }
599
600    fn is_empty(&self) -> bool {
601        self.0.is_empty()
602    }
603}
604
605impl FusedIterator for CaseMappingIter {}
606
607// SAFETY: forwards to inner `array::IntoIter`
608unsafe impl TrustedLen for CaseMappingIter {}
609
610// SAFETY: forwards to inner `array::IntoIter`
611unsafe impl TrustedRandomAccessNoCoerce for CaseMappingIter {
612    const MAY_HAVE_SIDE_EFFECT: bool = false;
613}
614
615// SAFETY: `CaseMappingIter` has no subtypes/supertypes
616unsafe impl TrustedRandomAccess for CaseMappingIter {}
617
618impl fmt::Display for CaseMappingIter {
619    #[inline]
620    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
621        for c in self.0.clone() {
622            f.write_char(c)?;
623        }
624        Ok(())
625    }
626}
627
628/// The error type returned when a checked char conversion fails.
629#[stable(feature = "u8_from_char", since = "1.59.0")]
630#[derive(Debug, Copy, Clone, PartialEq, Eq)]
631pub struct TryFromCharError(pub(crate) ());
632
633#[stable(feature = "u8_from_char", since = "1.59.0")]
634impl fmt::Display for TryFromCharError {
635    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
636        "unicode code point out of range".fmt(fmt)
637    }
638}
639
640#[stable(feature = "u8_from_char", since = "1.59.0")]
641impl Error for TryFromCharError {}
642
643/// The case of a cased character,
644/// as returned by [`char::case`].
645///
646/// Titlecase characters conceptually are composed of an uppercase portion
647/// followed by a lowercase portion.
648/// The variant discriminants represent this:
649/// the most significant bit represents whether the case
650/// conceptually starts as uppercase, while the least significant bit
651/// represents whether it conceptually ends as uppercase.
652#[unstable(feature = "titlecase", issue = "153892")]
653#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
654pub enum CharCase {
655    /// Lowercase. Corresponds to the `Lowercase` Unicode property.
656    Lower = 0b00,
657    /// Titlecase. Corresponds to the `Titlecase_Letter` Unicode general category.
658    Title = 0b10,
659    /// Uppercase. Corresponds to the `Uppercase` Unicode property.
660    Upper = 0b11,
661}