core/char/methods.rs
1//! impl char {}
2
3use super::*;
4use crate::panic::const_panic;
5use crate::slice;
6use crate::str::from_utf8_unchecked_mut;
7use crate::unicode::printable::is_printable;
8use crate::unicode::{self, conversions};
9
10impl char {
11 /// The lowest valid code point a `char` can have, `'\0'`.
12 ///
13 /// Unlike integer types, `char` actually has a gap in the middle,
14 /// meaning that the range of possible `char`s is smaller than you
15 /// might expect. Ranges of `char` will automatically hop this gap
16 /// for you:
17 ///
18 /// ```
19 /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
20 /// let size = (char::MIN..=char::MAX).count() as u32;
21 /// assert!(size < dist);
22 /// ```
23 ///
24 /// Despite this gap, the `MIN` and [`MAX`] values can be used as bounds for
25 /// all `char` values.
26 ///
27 /// [`MAX`]: char::MAX
28 ///
29 /// # Examples
30 ///
31 /// ```
32 /// # fn something_which_returns_char() -> char { 'a' }
33 /// let c: char = something_which_returns_char();
34 /// assert!(char::MIN <= c);
35 ///
36 /// let value_at_min = u32::from(char::MIN);
37 /// assert_eq!(char::from_u32(value_at_min), Some('\0'));
38 /// ```
39 #[stable(feature = "char_min", since = "1.83.0")]
40 pub const MIN: char = '\0';
41
42 /// The highest valid code point a `char` can have, `'\u{10FFFF}'`.
43 ///
44 /// Unlike integer types, `char` actually has a gap in the middle,
45 /// meaning that the range of possible `char`s is smaller than you
46 /// might expect. Ranges of `char` will automatically hop this gap
47 /// for you:
48 ///
49 /// ```
50 /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
51 /// let size = (char::MIN..=char::MAX).count() as u32;
52 /// assert!(size < dist);
53 /// ```
54 ///
55 /// Despite this gap, the [`MIN`] and `MAX` values can be used as bounds for
56 /// all `char` values.
57 ///
58 /// [`MIN`]: char::MIN
59 ///
60 /// # Examples
61 ///
62 /// ```
63 /// # fn something_which_returns_char() -> char { 'a' }
64 /// let c: char = something_which_returns_char();
65 /// assert!(c <= char::MAX);
66 ///
67 /// let value_at_max = u32::from(char::MAX);
68 /// assert_eq!(char::from_u32(value_at_max), Some('\u{10FFFF}'));
69 /// assert_eq!(char::from_u32(value_at_max + 1), None);
70 /// ```
71 #[stable(feature = "assoc_char_consts", since = "1.52.0")]
72 pub const MAX: char = '\u{10FFFF}';
73
74 /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
75 /// decoding error.
76 ///
77 /// It can occur, for example, when giving ill-formed UTF-8 bytes to
78 /// [`String::from_utf8_lossy`](../std/string/struct.String.html#method.from_utf8_lossy).
79 #[stable(feature = "assoc_char_consts", since = "1.52.0")]
80 pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
81
82 /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
83 /// `char` and `str` methods are based on.
84 ///
85 /// New versions of Unicode are released regularly and subsequently all methods
86 /// in the standard library depending on Unicode are updated. Therefore the
87 /// behavior of some `char` and `str` methods and the value of this constant
88 /// changes over time. This is *not* considered to be a breaking change.
89 ///
90 /// The version numbering scheme is explained in
91 /// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4).
92 #[stable(feature = "assoc_char_consts", since = "1.52.0")]
93 pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION;
94
95 /// Creates an iterator over the native endian UTF-16 encoded code points in `iter`,
96 /// returning unpaired surrogates as `Err`s.
97 ///
98 /// # Examples
99 ///
100 /// Basic usage:
101 ///
102 /// ```
103 /// // 𝄞mus<invalid>ic<invalid>
104 /// let v = [
105 /// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
106 /// ];
107 ///
108 /// assert_eq!(
109 /// char::decode_utf16(v)
110 /// .map(|r| r.map_err(|e| e.unpaired_surrogate()))
111 /// .collect::<Vec<_>>(),
112 /// vec![
113 /// Ok('𝄞'),
114 /// Ok('m'), Ok('u'), Ok('s'),
115 /// Err(0xDD1E),
116 /// Ok('i'), Ok('c'),
117 /// Err(0xD834)
118 /// ]
119 /// );
120 /// ```
121 ///
122 /// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
123 ///
124 /// ```
125 /// // 𝄞mus<invalid>ic<invalid>
126 /// let v = [
127 /// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
128 /// ];
129 ///
130 /// assert_eq!(
131 /// char::decode_utf16(v)
132 /// .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER))
133 /// .collect::<String>(),
134 /// "𝄞mus�ic�"
135 /// );
136 /// ```
137 #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
138 #[inline]
139 pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
140 super::decode::decode_utf16(iter)
141 }
142
143 /// Converts a `u32` to a `char`.
144 ///
145 /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
146 /// [`as`](../std/keyword.as.html):
147 ///
148 /// ```
149 /// let c = '💯';
150 /// let i = c as u32;
151 ///
152 /// assert_eq!(128175, i);
153 /// ```
154 ///
155 /// However, the reverse is not true: not all valid [`u32`]s are valid
156 /// `char`s. `from_u32()` will return `None` if the input is not a valid value
157 /// for a `char`.
158 ///
159 /// For an unsafe version of this function which ignores these checks, see
160 /// [`from_u32_unchecked`].
161 ///
162 /// [`from_u32_unchecked`]: #method.from_u32_unchecked
163 ///
164 /// # Examples
165 ///
166 /// Basic usage:
167 ///
168 /// ```
169 /// let c = char::from_u32(0x2764);
170 ///
171 /// assert_eq!(Some('❤'), c);
172 /// ```
173 ///
174 /// Returning `None` when the input is not a valid `char`:
175 ///
176 /// ```
177 /// let c = char::from_u32(0x110000);
178 ///
179 /// assert_eq!(None, c);
180 /// ```
181 #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
182 #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
183 #[must_use]
184 #[inline]
185 pub const fn from_u32(i: u32) -> Option<char> {
186 super::convert::from_u32(i)
187 }
188
189 /// Converts a `u32` to a `char`, ignoring validity.
190 ///
191 /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
192 /// `as`:
193 ///
194 /// ```
195 /// let c = '💯';
196 /// let i = c as u32;
197 ///
198 /// assert_eq!(128175, i);
199 /// ```
200 ///
201 /// However, the reverse is not true: not all valid [`u32`]s are valid
202 /// `char`s. `from_u32_unchecked()` will ignore this, and blindly cast to
203 /// `char`, possibly creating an invalid one.
204 ///
205 /// # Safety
206 ///
207 /// This function is unsafe, as it may construct invalid `char` values.
208 ///
209 /// For a safe version of this function, see the [`from_u32`] function.
210 ///
211 /// [`from_u32`]: #method.from_u32
212 ///
213 /// # Examples
214 ///
215 /// Basic usage:
216 ///
217 /// ```
218 /// let c = unsafe { char::from_u32_unchecked(0x2764) };
219 ///
220 /// assert_eq!('❤', c);
221 /// ```
222 #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
223 #[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")]
224 #[must_use]
225 #[inline]
226 pub const unsafe fn from_u32_unchecked(i: u32) -> char {
227 // SAFETY: the safety contract must be upheld by the caller.
228 unsafe { super::convert::from_u32_unchecked(i) }
229 }
230
231 /// Converts a digit in the given radix to a `char`.
232 ///
233 /// A 'radix' here is sometimes also called a 'base'. A radix of two
234 /// indicates a binary number, a radix of ten, decimal, and a radix of
235 /// sixteen, hexadecimal, to give some common values. Arbitrary
236 /// radices are supported.
237 ///
238 /// `from_digit()` will return `None` if the input is not a digit in
239 /// the given radix.
240 ///
241 /// # Panics
242 ///
243 /// Panics if given a radix larger than 36.
244 ///
245 /// # Examples
246 ///
247 /// Basic usage:
248 ///
249 /// ```
250 /// let c = char::from_digit(4, 10);
251 ///
252 /// assert_eq!(Some('4'), c);
253 ///
254 /// // Decimal 11 is a single digit in base 16
255 /// let c = char::from_digit(11, 16);
256 ///
257 /// assert_eq!(Some('b'), c);
258 /// ```
259 ///
260 /// Returning `None` when the input is not a digit:
261 ///
262 /// ```
263 /// let c = char::from_digit(20, 10);
264 ///
265 /// assert_eq!(None, c);
266 /// ```
267 ///
268 /// Passing a large radix, causing a panic:
269 ///
270 /// ```should_panic
271 /// // this panics
272 /// let _c = char::from_digit(1, 37);
273 /// ```
274 #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
275 #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
276 #[must_use]
277 #[inline]
278 pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
279 super::convert::from_digit(num, radix)
280 }
281
282 /// Checks if a `char` is a digit in the given radix.
283 ///
284 /// A 'radix' here is sometimes also called a 'base'. A radix of two
285 /// indicates a binary number, a radix of ten, decimal, and a radix of
286 /// sixteen, hexadecimal, to give some common values. Arbitrary
287 /// radices are supported.
288 ///
289 /// Compared to [`is_numeric()`], this function only recognizes the characters
290 /// `0-9`, `a-z` and `A-Z`.
291 ///
292 /// 'Digit' is defined to be only the following characters:
293 ///
294 /// * `0-9`
295 /// * `a-z`
296 /// * `A-Z`
297 ///
298 /// For a more comprehensive understanding of 'digit', see [`is_numeric()`].
299 ///
300 /// [`is_numeric()`]: #method.is_numeric
301 ///
302 /// # Panics
303 ///
304 /// Panics if given a radix smaller than 2 or larger than 36.
305 ///
306 /// # Examples
307 ///
308 /// Basic usage:
309 ///
310 /// ```
311 /// assert!('1'.is_digit(10));
312 /// assert!('f'.is_digit(16));
313 /// assert!(!'f'.is_digit(10));
314 /// ```
315 ///
316 /// Passing a large radix, causing a panic:
317 ///
318 /// ```should_panic
319 /// // this panics
320 /// '1'.is_digit(37);
321 /// ```
322 ///
323 /// Passing a small radix, causing a panic:
324 ///
325 /// ```should_panic
326 /// // this panics
327 /// '1'.is_digit(1);
328 /// ```
329 #[stable(feature = "rust1", since = "1.0.0")]
330 #[rustc_const_unstable(feature = "const_char_classify", issue = "132241")]
331 #[inline]
332 pub const fn is_digit(self, radix: u32) -> bool {
333 self.to_digit(radix).is_some()
334 }
335
336 /// Converts a `char` to a digit in the given radix.
337 ///
338 /// A 'radix' here is sometimes also called a 'base'. A radix of two
339 /// indicates a binary number, a radix of ten, decimal, and a radix of
340 /// sixteen, hexadecimal, to give some common values. Arbitrary
341 /// radices are supported.
342 ///
343 /// 'Digit' is defined to be only the following characters:
344 ///
345 /// * `0-9`
346 /// * `a-z`
347 /// * `A-Z`
348 ///
349 /// # Errors
350 ///
351 /// Returns `None` if the `char` does not refer to a digit in the given radix.
352 ///
353 /// # Panics
354 ///
355 /// Panics if given a radix smaller than 2 or larger than 36.
356 ///
357 /// # Examples
358 ///
359 /// Basic usage:
360 ///
361 /// ```
362 /// assert_eq!('1'.to_digit(10), Some(1));
363 /// assert_eq!('f'.to_digit(16), Some(15));
364 /// ```
365 ///
366 /// Passing a non-digit results in failure:
367 ///
368 /// ```
369 /// assert_eq!('f'.to_digit(10), None);
370 /// assert_eq!('z'.to_digit(16), None);
371 /// ```
372 ///
373 /// Passing a large radix, causing a panic:
374 ///
375 /// ```should_panic
376 /// // this panics
377 /// let _ = '1'.to_digit(37);
378 /// ```
379 /// Passing a small radix, causing a panic:
380 ///
381 /// ```should_panic
382 /// // this panics
383 /// let _ = '1'.to_digit(1);
384 /// ```
385 #[stable(feature = "rust1", since = "1.0.0")]
386 #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
387 #[must_use = "this returns the result of the operation, \
388 without modifying the original"]
389 #[inline]
390 pub const fn to_digit(self, radix: u32) -> Option<u32> {
391 assert!(
392 radix >= 2 && radix <= 36,
393 "to_digit: invalid radix -- radix must be in the range 2 to 36 inclusive"
394 );
395 // check radix to remove letter handling code when radix is a known constant
396 let value = if self > '9' && radix > 10 {
397 // mask to convert ASCII letters to uppercase
398 const TO_UPPERCASE_MASK: u32 = !0b0010_0000;
399 // Converts an ASCII letter to its corresponding integer value:
400 // A-Z => 10-35, a-z => 10-35. Other characters produce values >= 36.
401 //
402 // Add Overflow Safety:
403 // By applying the mask after the subtraction, the first addendum is
404 // constrained such that it never exceeds u32::MAX - 0x20.
405 ((self as u32).wrapping_sub('A' as u32) & TO_UPPERCASE_MASK) + 10
406 } else {
407 // convert digit to value, non-digits wrap to values > 36
408 (self as u32).wrapping_sub('0' as u32)
409 };
410 // FIXME(const-hack): once then_some is const fn, use it here
411 if value < radix { Some(value) } else { None }
412 }
413
414 /// Returns an iterator that yields the hexadecimal Unicode escape of a
415 /// character as `char`s.
416 ///
417 /// This will escape characters with the Rust syntax of the form
418 /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation.
419 ///
420 /// # Examples
421 ///
422 /// As an iterator:
423 ///
424 /// ```
425 /// for c in '❤'.escape_unicode() {
426 /// print!("{c}");
427 /// }
428 /// println!();
429 /// ```
430 ///
431 /// Using `println!` directly:
432 ///
433 /// ```
434 /// println!("{}", '❤'.escape_unicode());
435 /// ```
436 ///
437 /// Both are equivalent to:
438 ///
439 /// ```
440 /// println!("\\u{{2764}}");
441 /// ```
442 ///
443 /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
444 ///
445 /// ```
446 /// assert_eq!('❤'.escape_unicode().to_string(), "\\u{2764}");
447 /// ```
448 #[must_use = "this returns the escaped char as an iterator, \
449 without modifying the original"]
450 #[stable(feature = "rust1", since = "1.0.0")]
451 #[inline]
452 pub fn escape_unicode(self) -> EscapeUnicode {
453 EscapeUnicode::new(self)
454 }
455
456 /// An extended version of `escape_debug` that optionally permits escaping
457 /// Extended Grapheme codepoints, single quotes, and double quotes. This
458 /// allows us to format characters like nonspacing marks better when they're
459 /// at the start of a string, and allows escaping single quotes in
460 /// characters, and double quotes in strings.
461 #[inline]
462 pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
463 match self {
464 '\0' => EscapeDebug::backslash(ascii::Char::Digit0),
465 '\t' => EscapeDebug::backslash(ascii::Char::SmallT),
466 '\r' => EscapeDebug::backslash(ascii::Char::SmallR),
467 '\n' => EscapeDebug::backslash(ascii::Char::SmallN),
468 '\\' => EscapeDebug::backslash(ascii::Char::ReverseSolidus),
469 '\"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark),
470 '\'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe),
471 _ if args.escape_grapheme_extended && self.is_grapheme_extended() => {
472 EscapeDebug::unicode(self)
473 }
474 _ if is_printable(self) => EscapeDebug::printable(self),
475 _ => EscapeDebug::unicode(self),
476 }
477 }
478
479 /// Returns an iterator that yields the literal escape code of a character
480 /// as `char`s.
481 ///
482 /// This will escape the characters similar to the [`Debug`](core::fmt::Debug) implementations
483 /// of `str` or `char`.
484 ///
485 /// # Examples
486 ///
487 /// As an iterator:
488 ///
489 /// ```
490 /// for c in '\n'.escape_debug() {
491 /// print!("{c}");
492 /// }
493 /// println!();
494 /// ```
495 ///
496 /// Using `println!` directly:
497 ///
498 /// ```
499 /// println!("{}", '\n'.escape_debug());
500 /// ```
501 ///
502 /// Both are equivalent to:
503 ///
504 /// ```
505 /// println!("\\n");
506 /// ```
507 ///
508 /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
509 ///
510 /// ```
511 /// assert_eq!('\n'.escape_debug().to_string(), "\\n");
512 /// ```
513 #[must_use = "this returns the escaped char as an iterator, \
514 without modifying the original"]
515 #[stable(feature = "char_escape_debug", since = "1.20.0")]
516 #[inline]
517 pub fn escape_debug(self) -> EscapeDebug {
518 self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL)
519 }
520
521 /// Returns an iterator that yields the literal escape code of a character
522 /// as `char`s.
523 ///
524 /// The default is chosen with a bias toward producing literals that are
525 /// legal in a variety of languages, including C++11 and similar C-family
526 /// languages. The exact rules are:
527 ///
528 /// * Tab is escaped as `\t`.
529 /// * Carriage return is escaped as `\r`.
530 /// * Line feed is escaped as `\n`.
531 /// * Single quote is escaped as `\'`.
532 /// * Double quote is escaped as `\"`.
533 /// * Backslash is escaped as `\\`.
534 /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e`
535 /// inclusive is not escaped.
536 /// * All other characters are given hexadecimal Unicode escapes; see
537 /// [`escape_unicode`].
538 ///
539 /// [`escape_unicode`]: #method.escape_unicode
540 ///
541 /// # Examples
542 ///
543 /// As an iterator:
544 ///
545 /// ```
546 /// for c in '"'.escape_default() {
547 /// print!("{c}");
548 /// }
549 /// println!();
550 /// ```
551 ///
552 /// Using `println!` directly:
553 ///
554 /// ```
555 /// println!("{}", '"'.escape_default());
556 /// ```
557 ///
558 /// Both are equivalent to:
559 ///
560 /// ```
561 /// println!("\\\"");
562 /// ```
563 ///
564 /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
565 ///
566 /// ```
567 /// assert_eq!('"'.escape_default().to_string(), "\\\"");
568 /// ```
569 #[must_use = "this returns the escaped char as an iterator, \
570 without modifying the original"]
571 #[stable(feature = "rust1", since = "1.0.0")]
572 #[inline]
573 pub fn escape_default(self) -> EscapeDefault {
574 match self {
575 '\t' => EscapeDefault::backslash(ascii::Char::SmallT),
576 '\r' => EscapeDefault::backslash(ascii::Char::SmallR),
577 '\n' => EscapeDefault::backslash(ascii::Char::SmallN),
578 '\\' | '\'' | '\"' => EscapeDefault::backslash(self.as_ascii().unwrap()),
579 '\x20'..='\x7e' => EscapeDefault::printable(self.as_ascii().unwrap()),
580 _ => EscapeDefault::unicode(self),
581 }
582 }
583
584 /// Returns the number of bytes this `char` would need if encoded in UTF-8.
585 ///
586 /// That number of bytes is always between 1 and 4, inclusive.
587 ///
588 /// # Examples
589 ///
590 /// Basic usage:
591 ///
592 /// ```
593 /// let len = 'A'.len_utf8();
594 /// assert_eq!(len, 1);
595 ///
596 /// let len = 'ß'.len_utf8();
597 /// assert_eq!(len, 2);
598 ///
599 /// let len = 'ℝ'.len_utf8();
600 /// assert_eq!(len, 3);
601 ///
602 /// let len = '💣'.len_utf8();
603 /// assert_eq!(len, 4);
604 /// ```
605 ///
606 /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it
607 /// would take if each code point was represented as a `char` vs in the `&str` itself:
608 ///
609 /// ```
610 /// // as chars
611 /// let eastern = '東';
612 /// let capital = '京';
613 ///
614 /// // both can be represented as three bytes
615 /// assert_eq!(3, eastern.len_utf8());
616 /// assert_eq!(3, capital.len_utf8());
617 ///
618 /// // as a &str, these two are encoded in UTF-8
619 /// let tokyo = "東京";
620 ///
621 /// let len = eastern.len_utf8() + capital.len_utf8();
622 ///
623 /// // we can see that they take six bytes total...
624 /// assert_eq!(6, tokyo.len());
625 ///
626 /// // ... just like the &str
627 /// assert_eq!(len, tokyo.len());
628 /// ```
629 #[stable(feature = "rust1", since = "1.0.0")]
630 #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
631 #[inline]
632 #[must_use]
633 pub const fn len_utf8(self) -> usize {
634 len_utf8(self as u32)
635 }
636
637 /// Returns the number of 16-bit code units this `char` would need if
638 /// encoded in UTF-16.
639 ///
640 /// That number of code units is always either 1 or 2, for unicode scalar values in
641 /// the [basic multilingual plane] or [supplementary planes] respectively.
642 ///
643 /// See the documentation for [`len_utf8()`] for more explanation of this
644 /// concept. This function is a mirror, but for UTF-16 instead of UTF-8.
645 ///
646 /// [basic multilingual plane]: http://www.unicode.org/glossary/#basic_multilingual_plane
647 /// [supplementary planes]: http://www.unicode.org/glossary/#supplementary_planes
648 /// [`len_utf8()`]: #method.len_utf8
649 ///
650 /// # Examples
651 ///
652 /// Basic usage:
653 ///
654 /// ```
655 /// let n = 'ß'.len_utf16();
656 /// assert_eq!(n, 1);
657 ///
658 /// let len = '💣'.len_utf16();
659 /// assert_eq!(len, 2);
660 /// ```
661 #[stable(feature = "rust1", since = "1.0.0")]
662 #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
663 #[inline]
664 #[must_use]
665 pub const fn len_utf16(self) -> usize {
666 len_utf16(self as u32)
667 }
668
669 /// Encodes this character as UTF-8 into the provided byte buffer,
670 /// and then returns the subslice of the buffer that contains the encoded character.
671 ///
672 /// # Panics
673 ///
674 /// Panics if the buffer is not large enough.
675 /// A buffer of length four is large enough to encode any `char`.
676 ///
677 /// # Examples
678 ///
679 /// In both of these examples, 'ß' takes two bytes to encode.
680 ///
681 /// ```
682 /// let mut b = [0; 2];
683 ///
684 /// let result = 'ß'.encode_utf8(&mut b);
685 ///
686 /// assert_eq!(result, "ß");
687 ///
688 /// assert_eq!(result.len(), 2);
689 /// ```
690 ///
691 /// A buffer that's too small:
692 ///
693 /// ```should_panic
694 /// let mut b = [0; 1];
695 ///
696 /// // this panics
697 /// 'ß'.encode_utf8(&mut b);
698 /// ```
699 #[stable(feature = "unicode_encode_char", since = "1.15.0")]
700 #[rustc_const_stable(feature = "const_char_encode_utf8", since = "1.83.0")]
701 #[inline]
702 pub const fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
703 // SAFETY: `char` is not a surrogate, so this is valid UTF-8.
704 unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
705 }
706
707 /// Encodes this character as native endian UTF-16 into the provided `u16` buffer,
708 /// and then returns the subslice of the buffer that contains the encoded character.
709 ///
710 /// # Panics
711 ///
712 /// Panics if the buffer is not large enough.
713 /// A buffer of length 2 is large enough to encode any `char`.
714 ///
715 /// # Examples
716 ///
717 /// In both of these examples, '𝕊' takes two `u16`s to encode.
718 ///
719 /// ```
720 /// let mut b = [0; 2];
721 ///
722 /// let result = '𝕊'.encode_utf16(&mut b);
723 ///
724 /// assert_eq!(result.len(), 2);
725 /// ```
726 ///
727 /// A buffer that's too small:
728 ///
729 /// ```should_panic
730 /// let mut b = [0; 1];
731 ///
732 /// // this panics
733 /// '𝕊'.encode_utf16(&mut b);
734 /// ```
735 #[stable(feature = "unicode_encode_char", since = "1.15.0")]
736 #[rustc_const_stable(feature = "const_char_encode_utf16", since = "1.84.0")]
737 #[inline]
738 pub const fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
739 encode_utf16_raw(self as u32, dst)
740 }
741
742 /// Returns `true` if this `char` has the `Alphabetic` property.
743 ///
744 /// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
745 /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
746 ///
747 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
748 /// [ucd]: https://www.unicode.org/reports/tr44/
749 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
750 ///
751 /// # Examples
752 ///
753 /// Basic usage:
754 ///
755 /// ```
756 /// assert!('a'.is_alphabetic());
757 /// assert!('京'.is_alphabetic());
758 ///
759 /// let c = '💝';
760 /// // love is many things, but it is not alphabetic
761 /// assert!(!c.is_alphabetic());
762 /// ```
763 #[must_use]
764 #[stable(feature = "rust1", since = "1.0.0")]
765 #[inline]
766 pub fn is_alphabetic(self) -> bool {
767 match self {
768 'a'..='z' | 'A'..='Z' => true,
769 c => c > '\x7f' && unicode::Alphabetic(c),
770 }
771 }
772
773 /// Returns `true` if this `char` has the `Lowercase` property.
774 ///
775 /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
776 /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
777 ///
778 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
779 /// [ucd]: https://www.unicode.org/reports/tr44/
780 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
781 ///
782 /// # Examples
783 ///
784 /// Basic usage:
785 ///
786 /// ```
787 /// assert!('a'.is_lowercase());
788 /// assert!('δ'.is_lowercase());
789 /// assert!(!'A'.is_lowercase());
790 /// assert!(!'Δ'.is_lowercase());
791 ///
792 /// // The various Chinese scripts and punctuation do not have case, and so:
793 /// assert!(!'中'.is_lowercase());
794 /// assert!(!' '.is_lowercase());
795 /// ```
796 ///
797 /// In a const context:
798 ///
799 /// ```
800 /// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ'.is_lowercase();
801 /// assert!(!CAPITAL_DELTA_IS_LOWERCASE);
802 /// ```
803 #[must_use]
804 #[stable(feature = "rust1", since = "1.0.0")]
805 #[rustc_const_stable(feature = "const_unicode_case_lookup", since = "1.84.0")]
806 #[inline]
807 pub const fn is_lowercase(self) -> bool {
808 match self {
809 'a'..='z' => true,
810 c => c > '\x7f' && unicode::Lowercase(c),
811 }
812 }
813
814 /// Returns `true` if this `char` has the `Uppercase` property.
815 ///
816 /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
817 /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
818 ///
819 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
820 /// [ucd]: https://www.unicode.org/reports/tr44/
821 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
822 ///
823 /// # Examples
824 ///
825 /// Basic usage:
826 ///
827 /// ```
828 /// assert!(!'a'.is_uppercase());
829 /// assert!(!'δ'.is_uppercase());
830 /// assert!('A'.is_uppercase());
831 /// assert!('Δ'.is_uppercase());
832 ///
833 /// // The various Chinese scripts and punctuation do not have case, and so:
834 /// assert!(!'中'.is_uppercase());
835 /// assert!(!' '.is_uppercase());
836 /// ```
837 ///
838 /// In a const context:
839 ///
840 /// ```
841 /// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ'.is_uppercase();
842 /// assert!(CAPITAL_DELTA_IS_UPPERCASE);
843 /// ```
844 #[must_use]
845 #[stable(feature = "rust1", since = "1.0.0")]
846 #[rustc_const_stable(feature = "const_unicode_case_lookup", since = "1.84.0")]
847 #[inline]
848 pub const fn is_uppercase(self) -> bool {
849 match self {
850 'A'..='Z' => true,
851 c => c > '\x7f' && unicode::Uppercase(c),
852 }
853 }
854
855 /// Returns `true` if this `char` has the `White_Space` property.
856 ///
857 /// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`].
858 ///
859 /// [ucd]: https://www.unicode.org/reports/tr44/
860 /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
861 ///
862 /// # Examples
863 ///
864 /// Basic usage:
865 ///
866 /// ```
867 /// assert!(' '.is_whitespace());
868 ///
869 /// // line break
870 /// assert!('\n'.is_whitespace());
871 ///
872 /// // a non-breaking space
873 /// assert!('\u{A0}'.is_whitespace());
874 ///
875 /// assert!(!'越'.is_whitespace());
876 /// ```
877 #[must_use]
878 #[stable(feature = "rust1", since = "1.0.0")]
879 #[rustc_const_unstable(feature = "const_char_classify", issue = "132241")]
880 #[inline]
881 pub const fn is_whitespace(self) -> bool {
882 match self {
883 ' ' | '\x09'..='\x0d' => true,
884 c => c > '\x7f' && unicode::White_Space(c),
885 }
886 }
887
888 /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
889 ///
890 /// [`is_alphabetic()`]: #method.is_alphabetic
891 /// [`is_numeric()`]: #method.is_numeric
892 ///
893 /// # Examples
894 ///
895 /// Basic usage:
896 ///
897 /// ```
898 /// assert!('٣'.is_alphanumeric());
899 /// assert!('7'.is_alphanumeric());
900 /// assert!('৬'.is_alphanumeric());
901 /// assert!('¾'.is_alphanumeric());
902 /// assert!('①'.is_alphanumeric());
903 /// assert!('K'.is_alphanumeric());
904 /// assert!('و'.is_alphanumeric());
905 /// assert!('藏'.is_alphanumeric());
906 /// ```
907 #[must_use]
908 #[stable(feature = "rust1", since = "1.0.0")]
909 #[inline]
910 pub fn is_alphanumeric(self) -> bool {
911 self.is_alphabetic() || self.is_numeric()
912 }
913
914 /// Returns `true` if this `char` has the general category for control codes.
915 ///
916 /// Control codes (code points with the general category of `Cc`) are described in Chapter 4
917 /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
918 /// Database][ucd] [`UnicodeData.txt`].
919 ///
920 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
921 /// [ucd]: https://www.unicode.org/reports/tr44/
922 /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
923 ///
924 /// # Examples
925 ///
926 /// Basic usage:
927 ///
928 /// ```
929 /// // U+009C, STRING TERMINATOR
930 /// assert!(''.is_control());
931 /// assert!(!'q'.is_control());
932 /// ```
933 #[must_use]
934 #[stable(feature = "rust1", since = "1.0.0")]
935 #[inline]
936 pub fn is_control(self) -> bool {
937 unicode::Cc(self)
938 }
939
940 /// Returns `true` if this `char` has the `Grapheme_Extend` property.
941 ///
942 /// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
943 /// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
944 /// [`DerivedCoreProperties.txt`].
945 ///
946 /// [uax29]: https://www.unicode.org/reports/tr29/
947 /// [ucd]: https://www.unicode.org/reports/tr44/
948 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
949 #[must_use]
950 #[inline]
951 pub(crate) fn is_grapheme_extended(self) -> bool {
952 unicode::Grapheme_Extend(self)
953 }
954
955 /// Returns `true` if this `char` has one of the general categories for numbers.
956 ///
957 /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
958 /// characters, and `No` for other numeric characters) are specified in the [Unicode Character
959 /// Database][ucd] [`UnicodeData.txt`].
960 ///
961 /// This method doesn't cover everything that could be considered a number, e.g. ideographic numbers like '三'.
962 /// If you want everything including characters with overlapping purposes then you might want to use
963 /// a unicode or language-processing library that exposes the appropriate character properties instead
964 /// of looking at the unicode categories.
965 ///
966 /// If you want to parse ASCII decimal digits (0-9) or ASCII base-N, use
967 /// `is_ascii_digit` or `is_digit` instead.
968 ///
969 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
970 /// [ucd]: https://www.unicode.org/reports/tr44/
971 /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
972 ///
973 /// # Examples
974 ///
975 /// Basic usage:
976 ///
977 /// ```
978 /// assert!('٣'.is_numeric());
979 /// assert!('7'.is_numeric());
980 /// assert!('৬'.is_numeric());
981 /// assert!('¾'.is_numeric());
982 /// assert!('①'.is_numeric());
983 /// assert!(!'K'.is_numeric());
984 /// assert!(!'و'.is_numeric());
985 /// assert!(!'藏'.is_numeric());
986 /// assert!(!'三'.is_numeric());
987 /// ```
988 #[must_use]
989 #[stable(feature = "rust1", since = "1.0.0")]
990 #[inline]
991 pub fn is_numeric(self) -> bool {
992 match self {
993 '0'..='9' => true,
994 c => c > '\x7f' && unicode::N(c),
995 }
996 }
997
998 /// Returns an iterator that yields the lowercase mapping of this `char` as one or more
999 /// `char`s.
1000 ///
1001 /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
1002 ///
1003 /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
1004 /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1005 ///
1006 /// [ucd]: https://www.unicode.org/reports/tr44/
1007 /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1008 ///
1009 /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
1010 /// the `char`(s) given by [`SpecialCasing.txt`].
1011 ///
1012 /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1013 ///
1014 /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1015 /// is independent of context and language.
1016 ///
1017 /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1018 /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1019 ///
1020 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1021 ///
1022 /// # Examples
1023 ///
1024 /// As an iterator:
1025 ///
1026 /// ```
1027 /// for c in 'İ'.to_lowercase() {
1028 /// print!("{c}");
1029 /// }
1030 /// println!();
1031 /// ```
1032 ///
1033 /// Using `println!` directly:
1034 ///
1035 /// ```
1036 /// println!("{}", 'İ'.to_lowercase());
1037 /// ```
1038 ///
1039 /// Both are equivalent to:
1040 ///
1041 /// ```
1042 /// println!("i\u{307}");
1043 /// ```
1044 ///
1045 /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1046 ///
1047 /// ```
1048 /// assert_eq!('C'.to_lowercase().to_string(), "c");
1049 ///
1050 /// // Sometimes the result is more than one character:
1051 /// assert_eq!('İ'.to_lowercase().to_string(), "i\u{307}");
1052 ///
1053 /// // Characters that do not have both uppercase and lowercase
1054 /// // convert into themselves.
1055 /// assert_eq!('山'.to_lowercase().to_string(), "山");
1056 /// ```
1057 #[must_use = "this returns the lowercase character as a new iterator, \
1058 without modifying the original"]
1059 #[stable(feature = "rust1", since = "1.0.0")]
1060 #[inline]
1061 pub fn to_lowercase(self) -> ToLowercase {
1062 ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
1063 }
1064
1065 /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
1066 /// `char`s.
1067 ///
1068 /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`.
1069 ///
1070 /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
1071 /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1072 ///
1073 /// [ucd]: https://www.unicode.org/reports/tr44/
1074 /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1075 ///
1076 /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
1077 /// the `char`(s) given by [`SpecialCasing.txt`].
1078 ///
1079 /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1080 ///
1081 /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1082 /// is independent of context and language.
1083 ///
1084 /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1085 /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1086 ///
1087 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1088 ///
1089 /// # Examples
1090 ///
1091 /// As an iterator:
1092 ///
1093 /// ```
1094 /// for c in 'ß'.to_uppercase() {
1095 /// print!("{c}");
1096 /// }
1097 /// println!();
1098 /// ```
1099 ///
1100 /// Using `println!` directly:
1101 ///
1102 /// ```
1103 /// println!("{}", 'ß'.to_uppercase());
1104 /// ```
1105 ///
1106 /// Both are equivalent to:
1107 ///
1108 /// ```
1109 /// println!("SS");
1110 /// ```
1111 ///
1112 /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1113 ///
1114 /// ```
1115 /// assert_eq!('c'.to_uppercase().to_string(), "C");
1116 ///
1117 /// // Sometimes the result is more than one character:
1118 /// assert_eq!('ß'.to_uppercase().to_string(), "SS");
1119 ///
1120 /// // Characters that do not have both uppercase and lowercase
1121 /// // convert into themselves.
1122 /// assert_eq!('山'.to_uppercase().to_string(), "山");
1123 /// ```
1124 ///
1125 /// # Note on locale
1126 ///
1127 /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
1128 ///
1129 /// * 'Dotless': I / ı, sometimes written ï
1130 /// * 'Dotted': İ / i
1131 ///
1132 /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
1133 ///
1134 /// ```
1135 /// let upper_i = 'i'.to_uppercase().to_string();
1136 /// ```
1137 ///
1138 /// The value of `upper_i` here relies on the language of the text: if we're
1139 /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should
1140 /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
1141 ///
1142 /// ```
1143 /// let upper_i = 'i'.to_uppercase().to_string();
1144 ///
1145 /// assert_eq!(upper_i, "I");
1146 /// ```
1147 ///
1148 /// holds across languages.
1149 #[must_use = "this returns the uppercase character as a new iterator, \
1150 without modifying the original"]
1151 #[stable(feature = "rust1", since = "1.0.0")]
1152 #[inline]
1153 pub fn to_uppercase(self) -> ToUppercase {
1154 ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
1155 }
1156
1157 /// Checks if the value is within the ASCII range.
1158 ///
1159 /// # Examples
1160 ///
1161 /// ```
1162 /// let ascii = 'a';
1163 /// let non_ascii = '❤';
1164 ///
1165 /// assert!(ascii.is_ascii());
1166 /// assert!(!non_ascii.is_ascii());
1167 /// ```
1168 #[must_use]
1169 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1170 #[rustc_const_stable(feature = "const_char_is_ascii", since = "1.32.0")]
1171 #[inline]
1172 pub const fn is_ascii(&self) -> bool {
1173 *self as u32 <= 0x7F
1174 }
1175
1176 /// Returns `Some` if the value is within the ASCII range,
1177 /// or `None` if it's not.
1178 ///
1179 /// This is preferred to [`Self::is_ascii`] when you're passing the value
1180 /// along to something else that can take [`ascii::Char`] rather than
1181 /// needing to check again for itself whether the value is in ASCII.
1182 #[must_use]
1183 #[unstable(feature = "ascii_char", issue = "110998")]
1184 #[inline]
1185 pub const fn as_ascii(&self) -> Option<ascii::Char> {
1186 if self.is_ascii() {
1187 // SAFETY: Just checked that this is ASCII.
1188 Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) })
1189 } else {
1190 None
1191 }
1192 }
1193
1194 /// Makes a copy of the value in its ASCII upper case equivalent.
1195 ///
1196 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1197 /// but non-ASCII letters are unchanged.
1198 ///
1199 /// To uppercase the value in-place, use [`make_ascii_uppercase()`].
1200 ///
1201 /// To uppercase ASCII characters in addition to non-ASCII characters, use
1202 /// [`to_uppercase()`].
1203 ///
1204 /// # Examples
1205 ///
1206 /// ```
1207 /// let ascii = 'a';
1208 /// let non_ascii = '❤';
1209 ///
1210 /// assert_eq!('A', ascii.to_ascii_uppercase());
1211 /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
1212 /// ```
1213 ///
1214 /// [`make_ascii_uppercase()`]: #method.make_ascii_uppercase
1215 /// [`to_uppercase()`]: #method.to_uppercase
1216 #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`"]
1217 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1218 #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1219 #[inline]
1220 pub const fn to_ascii_uppercase(&self) -> char {
1221 if self.is_ascii_lowercase() {
1222 (*self as u8).ascii_change_case_unchecked() as char
1223 } else {
1224 *self
1225 }
1226 }
1227
1228 /// Makes a copy of the value in its ASCII lower case equivalent.
1229 ///
1230 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1231 /// but non-ASCII letters are unchanged.
1232 ///
1233 /// To lowercase the value in-place, use [`make_ascii_lowercase()`].
1234 ///
1235 /// To lowercase ASCII characters in addition to non-ASCII characters, use
1236 /// [`to_lowercase()`].
1237 ///
1238 /// # Examples
1239 ///
1240 /// ```
1241 /// let ascii = 'A';
1242 /// let non_ascii = '❤';
1243 ///
1244 /// assert_eq!('a', ascii.to_ascii_lowercase());
1245 /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
1246 /// ```
1247 ///
1248 /// [`make_ascii_lowercase()`]: #method.make_ascii_lowercase
1249 /// [`to_lowercase()`]: #method.to_lowercase
1250 #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`"]
1251 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1252 #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1253 #[inline]
1254 pub const fn to_ascii_lowercase(&self) -> char {
1255 if self.is_ascii_uppercase() {
1256 (*self as u8).ascii_change_case_unchecked() as char
1257 } else {
1258 *self
1259 }
1260 }
1261
1262 /// Checks that two values are an ASCII case-insensitive match.
1263 ///
1264 /// Equivalent to <code>[to_ascii_lowercase]\(a) == [to_ascii_lowercase]\(b)</code>.
1265 ///
1266 /// # Examples
1267 ///
1268 /// ```
1269 /// let upper_a = 'A';
1270 /// let lower_a = 'a';
1271 /// let lower_z = 'z';
1272 ///
1273 /// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
1274 /// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
1275 /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
1276 /// ```
1277 ///
1278 /// [to_ascii_lowercase]: #method.to_ascii_lowercase
1279 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1280 #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1281 #[inline]
1282 pub const fn eq_ignore_ascii_case(&self, other: &char) -> bool {
1283 self.to_ascii_lowercase() == other.to_ascii_lowercase()
1284 }
1285
1286 /// Converts this type to its ASCII upper case equivalent in-place.
1287 ///
1288 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1289 /// but non-ASCII letters are unchanged.
1290 ///
1291 /// To return a new uppercased value without modifying the existing one, use
1292 /// [`to_ascii_uppercase()`].
1293 ///
1294 /// # Examples
1295 ///
1296 /// ```
1297 /// let mut ascii = 'a';
1298 ///
1299 /// ascii.make_ascii_uppercase();
1300 ///
1301 /// assert_eq!('A', ascii);
1302 /// ```
1303 ///
1304 /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
1305 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1306 #[rustc_const_stable(feature = "const_make_ascii", since = "1.84.0")]
1307 #[inline]
1308 pub const fn make_ascii_uppercase(&mut self) {
1309 *self = self.to_ascii_uppercase();
1310 }
1311
1312 /// Converts this type to its ASCII lower case equivalent in-place.
1313 ///
1314 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1315 /// but non-ASCII letters are unchanged.
1316 ///
1317 /// To return a new lowercased value without modifying the existing one, use
1318 /// [`to_ascii_lowercase()`].
1319 ///
1320 /// # Examples
1321 ///
1322 /// ```
1323 /// let mut ascii = 'A';
1324 ///
1325 /// ascii.make_ascii_lowercase();
1326 ///
1327 /// assert_eq!('a', ascii);
1328 /// ```
1329 ///
1330 /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
1331 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1332 #[rustc_const_stable(feature = "const_make_ascii", since = "1.84.0")]
1333 #[inline]
1334 pub const fn make_ascii_lowercase(&mut self) {
1335 *self = self.to_ascii_lowercase();
1336 }
1337
1338 /// Checks if the value is an ASCII alphabetic character:
1339 ///
1340 /// - U+0041 'A' ..= U+005A 'Z', or
1341 /// - U+0061 'a' ..= U+007A 'z'.
1342 ///
1343 /// # Examples
1344 ///
1345 /// ```
1346 /// let uppercase_a = 'A';
1347 /// let uppercase_g = 'G';
1348 /// let a = 'a';
1349 /// let g = 'g';
1350 /// let zero = '0';
1351 /// let percent = '%';
1352 /// let space = ' ';
1353 /// let lf = '\n';
1354 /// let esc = '\x1b';
1355 ///
1356 /// assert!(uppercase_a.is_ascii_alphabetic());
1357 /// assert!(uppercase_g.is_ascii_alphabetic());
1358 /// assert!(a.is_ascii_alphabetic());
1359 /// assert!(g.is_ascii_alphabetic());
1360 /// assert!(!zero.is_ascii_alphabetic());
1361 /// assert!(!percent.is_ascii_alphabetic());
1362 /// assert!(!space.is_ascii_alphabetic());
1363 /// assert!(!lf.is_ascii_alphabetic());
1364 /// assert!(!esc.is_ascii_alphabetic());
1365 /// ```
1366 #[must_use]
1367 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1368 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1369 #[inline]
1370 pub const fn is_ascii_alphabetic(&self) -> bool {
1371 matches!(*self, 'A'..='Z' | 'a'..='z')
1372 }
1373
1374 /// Checks if the value is an ASCII uppercase character:
1375 /// U+0041 'A' ..= U+005A 'Z'.
1376 ///
1377 /// # Examples
1378 ///
1379 /// ```
1380 /// let uppercase_a = 'A';
1381 /// let uppercase_g = 'G';
1382 /// let a = 'a';
1383 /// let g = 'g';
1384 /// let zero = '0';
1385 /// let percent = '%';
1386 /// let space = ' ';
1387 /// let lf = '\n';
1388 /// let esc = '\x1b';
1389 ///
1390 /// assert!(uppercase_a.is_ascii_uppercase());
1391 /// assert!(uppercase_g.is_ascii_uppercase());
1392 /// assert!(!a.is_ascii_uppercase());
1393 /// assert!(!g.is_ascii_uppercase());
1394 /// assert!(!zero.is_ascii_uppercase());
1395 /// assert!(!percent.is_ascii_uppercase());
1396 /// assert!(!space.is_ascii_uppercase());
1397 /// assert!(!lf.is_ascii_uppercase());
1398 /// assert!(!esc.is_ascii_uppercase());
1399 /// ```
1400 #[must_use]
1401 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1402 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1403 #[inline]
1404 pub const fn is_ascii_uppercase(&self) -> bool {
1405 matches!(*self, 'A'..='Z')
1406 }
1407
1408 /// Checks if the value is an ASCII lowercase character:
1409 /// U+0061 'a' ..= U+007A 'z'.
1410 ///
1411 /// # Examples
1412 ///
1413 /// ```
1414 /// let uppercase_a = 'A';
1415 /// let uppercase_g = 'G';
1416 /// let a = 'a';
1417 /// let g = 'g';
1418 /// let zero = '0';
1419 /// let percent = '%';
1420 /// let space = ' ';
1421 /// let lf = '\n';
1422 /// let esc = '\x1b';
1423 ///
1424 /// assert!(!uppercase_a.is_ascii_lowercase());
1425 /// assert!(!uppercase_g.is_ascii_lowercase());
1426 /// assert!(a.is_ascii_lowercase());
1427 /// assert!(g.is_ascii_lowercase());
1428 /// assert!(!zero.is_ascii_lowercase());
1429 /// assert!(!percent.is_ascii_lowercase());
1430 /// assert!(!space.is_ascii_lowercase());
1431 /// assert!(!lf.is_ascii_lowercase());
1432 /// assert!(!esc.is_ascii_lowercase());
1433 /// ```
1434 #[must_use]
1435 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1436 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1437 #[inline]
1438 pub const fn is_ascii_lowercase(&self) -> bool {
1439 matches!(*self, 'a'..='z')
1440 }
1441
1442 /// Checks if the value is an ASCII alphanumeric character:
1443 ///
1444 /// - U+0041 'A' ..= U+005A 'Z', or
1445 /// - U+0061 'a' ..= U+007A 'z', or
1446 /// - U+0030 '0' ..= U+0039 '9'.
1447 ///
1448 /// # Examples
1449 ///
1450 /// ```
1451 /// let uppercase_a = 'A';
1452 /// let uppercase_g = 'G';
1453 /// let a = 'a';
1454 /// let g = 'g';
1455 /// let zero = '0';
1456 /// let percent = '%';
1457 /// let space = ' ';
1458 /// let lf = '\n';
1459 /// let esc = '\x1b';
1460 ///
1461 /// assert!(uppercase_a.is_ascii_alphanumeric());
1462 /// assert!(uppercase_g.is_ascii_alphanumeric());
1463 /// assert!(a.is_ascii_alphanumeric());
1464 /// assert!(g.is_ascii_alphanumeric());
1465 /// assert!(zero.is_ascii_alphanumeric());
1466 /// assert!(!percent.is_ascii_alphanumeric());
1467 /// assert!(!space.is_ascii_alphanumeric());
1468 /// assert!(!lf.is_ascii_alphanumeric());
1469 /// assert!(!esc.is_ascii_alphanumeric());
1470 /// ```
1471 #[must_use]
1472 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1473 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1474 #[inline]
1475 pub const fn is_ascii_alphanumeric(&self) -> bool {
1476 matches!(*self, '0'..='9') | matches!(*self, 'A'..='Z') | matches!(*self, 'a'..='z')
1477 }
1478
1479 /// Checks if the value is an ASCII decimal digit:
1480 /// U+0030 '0' ..= U+0039 '9'.
1481 ///
1482 /// # Examples
1483 ///
1484 /// ```
1485 /// let uppercase_a = 'A';
1486 /// let uppercase_g = 'G';
1487 /// let a = 'a';
1488 /// let g = 'g';
1489 /// let zero = '0';
1490 /// let percent = '%';
1491 /// let space = ' ';
1492 /// let lf = '\n';
1493 /// let esc = '\x1b';
1494 ///
1495 /// assert!(!uppercase_a.is_ascii_digit());
1496 /// assert!(!uppercase_g.is_ascii_digit());
1497 /// assert!(!a.is_ascii_digit());
1498 /// assert!(!g.is_ascii_digit());
1499 /// assert!(zero.is_ascii_digit());
1500 /// assert!(!percent.is_ascii_digit());
1501 /// assert!(!space.is_ascii_digit());
1502 /// assert!(!lf.is_ascii_digit());
1503 /// assert!(!esc.is_ascii_digit());
1504 /// ```
1505 #[must_use]
1506 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1507 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1508 #[inline]
1509 pub const fn is_ascii_digit(&self) -> bool {
1510 matches!(*self, '0'..='9')
1511 }
1512
1513 /// Checks if the value is an ASCII octal digit:
1514 /// U+0030 '0' ..= U+0037 '7'.
1515 ///
1516 /// # Examples
1517 ///
1518 /// ```
1519 /// #![feature(is_ascii_octdigit)]
1520 ///
1521 /// let uppercase_a = 'A';
1522 /// let a = 'a';
1523 /// let zero = '0';
1524 /// let seven = '7';
1525 /// let nine = '9';
1526 /// let percent = '%';
1527 /// let lf = '\n';
1528 ///
1529 /// assert!(!uppercase_a.is_ascii_octdigit());
1530 /// assert!(!a.is_ascii_octdigit());
1531 /// assert!(zero.is_ascii_octdigit());
1532 /// assert!(seven.is_ascii_octdigit());
1533 /// assert!(!nine.is_ascii_octdigit());
1534 /// assert!(!percent.is_ascii_octdigit());
1535 /// assert!(!lf.is_ascii_octdigit());
1536 /// ```
1537 #[must_use]
1538 #[unstable(feature = "is_ascii_octdigit", issue = "101288")]
1539 #[inline]
1540 pub const fn is_ascii_octdigit(&self) -> bool {
1541 matches!(*self, '0'..='7')
1542 }
1543
1544 /// Checks if the value is an ASCII hexadecimal digit:
1545 ///
1546 /// - U+0030 '0' ..= U+0039 '9', or
1547 /// - U+0041 'A' ..= U+0046 'F', or
1548 /// - U+0061 'a' ..= U+0066 'f'.
1549 ///
1550 /// # Examples
1551 ///
1552 /// ```
1553 /// let uppercase_a = 'A';
1554 /// let uppercase_g = 'G';
1555 /// let a = 'a';
1556 /// let g = 'g';
1557 /// let zero = '0';
1558 /// let percent = '%';
1559 /// let space = ' ';
1560 /// let lf = '\n';
1561 /// let esc = '\x1b';
1562 ///
1563 /// assert!(uppercase_a.is_ascii_hexdigit());
1564 /// assert!(!uppercase_g.is_ascii_hexdigit());
1565 /// assert!(a.is_ascii_hexdigit());
1566 /// assert!(!g.is_ascii_hexdigit());
1567 /// assert!(zero.is_ascii_hexdigit());
1568 /// assert!(!percent.is_ascii_hexdigit());
1569 /// assert!(!space.is_ascii_hexdigit());
1570 /// assert!(!lf.is_ascii_hexdigit());
1571 /// assert!(!esc.is_ascii_hexdigit());
1572 /// ```
1573 #[must_use]
1574 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1575 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1576 #[inline]
1577 pub const fn is_ascii_hexdigit(&self) -> bool {
1578 matches!(*self, '0'..='9') | matches!(*self, 'A'..='F') | matches!(*self, 'a'..='f')
1579 }
1580
1581 /// Checks if the value is an ASCII punctuation character:
1582 ///
1583 /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /`, or
1584 /// - U+003A ..= U+0040 `: ; < = > ? @`, or
1585 /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``, or
1586 /// - U+007B ..= U+007E `{ | } ~`
1587 ///
1588 /// # Examples
1589 ///
1590 /// ```
1591 /// let uppercase_a = 'A';
1592 /// let uppercase_g = 'G';
1593 /// let a = 'a';
1594 /// let g = 'g';
1595 /// let zero = '0';
1596 /// let percent = '%';
1597 /// let space = ' ';
1598 /// let lf = '\n';
1599 /// let esc = '\x1b';
1600 ///
1601 /// assert!(!uppercase_a.is_ascii_punctuation());
1602 /// assert!(!uppercase_g.is_ascii_punctuation());
1603 /// assert!(!a.is_ascii_punctuation());
1604 /// assert!(!g.is_ascii_punctuation());
1605 /// assert!(!zero.is_ascii_punctuation());
1606 /// assert!(percent.is_ascii_punctuation());
1607 /// assert!(!space.is_ascii_punctuation());
1608 /// assert!(!lf.is_ascii_punctuation());
1609 /// assert!(!esc.is_ascii_punctuation());
1610 /// ```
1611 #[must_use]
1612 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1613 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1614 #[inline]
1615 pub const fn is_ascii_punctuation(&self) -> bool {
1616 matches!(*self, '!'..='/')
1617 | matches!(*self, ':'..='@')
1618 | matches!(*self, '['..='`')
1619 | matches!(*self, '{'..='~')
1620 }
1621
1622 /// Checks if the value is an ASCII graphic character:
1623 /// U+0021 '!' ..= U+007E '~'.
1624 ///
1625 /// # Examples
1626 ///
1627 /// ```
1628 /// let uppercase_a = 'A';
1629 /// let uppercase_g = 'G';
1630 /// let a = 'a';
1631 /// let g = 'g';
1632 /// let zero = '0';
1633 /// let percent = '%';
1634 /// let space = ' ';
1635 /// let lf = '\n';
1636 /// let esc = '\x1b';
1637 ///
1638 /// assert!(uppercase_a.is_ascii_graphic());
1639 /// assert!(uppercase_g.is_ascii_graphic());
1640 /// assert!(a.is_ascii_graphic());
1641 /// assert!(g.is_ascii_graphic());
1642 /// assert!(zero.is_ascii_graphic());
1643 /// assert!(percent.is_ascii_graphic());
1644 /// assert!(!space.is_ascii_graphic());
1645 /// assert!(!lf.is_ascii_graphic());
1646 /// assert!(!esc.is_ascii_graphic());
1647 /// ```
1648 #[must_use]
1649 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1650 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1651 #[inline]
1652 pub const fn is_ascii_graphic(&self) -> bool {
1653 matches!(*self, '!'..='~')
1654 }
1655
1656 /// Checks if the value is an ASCII whitespace character:
1657 /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
1658 /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
1659 ///
1660 /// Rust uses the WhatWG Infra Standard's [definition of ASCII
1661 /// whitespace][infra-aw]. There are several other definitions in
1662 /// wide use. For instance, [the POSIX locale][pct] includes
1663 /// U+000B VERTICAL TAB as well as all the above characters,
1664 /// but—from the very same specification—[the default rule for
1665 /// "field splitting" in the Bourne shell][bfs] considers *only*
1666 /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
1667 ///
1668 /// If you are writing a program that will process an existing
1669 /// file format, check what that format's definition of whitespace is
1670 /// before using this function.
1671 ///
1672 /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
1673 /// [pct]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
1674 /// [bfs]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
1675 ///
1676 /// # Examples
1677 ///
1678 /// ```
1679 /// let uppercase_a = 'A';
1680 /// let uppercase_g = 'G';
1681 /// let a = 'a';
1682 /// let g = 'g';
1683 /// let zero = '0';
1684 /// let percent = '%';
1685 /// let space = ' ';
1686 /// let lf = '\n';
1687 /// let esc = '\x1b';
1688 ///
1689 /// assert!(!uppercase_a.is_ascii_whitespace());
1690 /// assert!(!uppercase_g.is_ascii_whitespace());
1691 /// assert!(!a.is_ascii_whitespace());
1692 /// assert!(!g.is_ascii_whitespace());
1693 /// assert!(!zero.is_ascii_whitespace());
1694 /// assert!(!percent.is_ascii_whitespace());
1695 /// assert!(space.is_ascii_whitespace());
1696 /// assert!(lf.is_ascii_whitespace());
1697 /// assert!(!esc.is_ascii_whitespace());
1698 /// ```
1699 #[must_use]
1700 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1701 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1702 #[inline]
1703 pub const fn is_ascii_whitespace(&self) -> bool {
1704 matches!(*self, '\t' | '\n' | '\x0C' | '\r' | ' ')
1705 }
1706
1707 /// Checks if the value is an ASCII control character:
1708 /// U+0000 NUL ..= U+001F UNIT SEPARATOR, or U+007F DELETE.
1709 /// Note that most ASCII whitespace characters are control
1710 /// characters, but SPACE is not.
1711 ///
1712 /// # Examples
1713 ///
1714 /// ```
1715 /// let uppercase_a = 'A';
1716 /// let uppercase_g = 'G';
1717 /// let a = 'a';
1718 /// let g = 'g';
1719 /// let zero = '0';
1720 /// let percent = '%';
1721 /// let space = ' ';
1722 /// let lf = '\n';
1723 /// let esc = '\x1b';
1724 ///
1725 /// assert!(!uppercase_a.is_ascii_control());
1726 /// assert!(!uppercase_g.is_ascii_control());
1727 /// assert!(!a.is_ascii_control());
1728 /// assert!(!g.is_ascii_control());
1729 /// assert!(!zero.is_ascii_control());
1730 /// assert!(!percent.is_ascii_control());
1731 /// assert!(!space.is_ascii_control());
1732 /// assert!(lf.is_ascii_control());
1733 /// assert!(esc.is_ascii_control());
1734 /// ```
1735 #[must_use]
1736 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1737 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1738 #[inline]
1739 pub const fn is_ascii_control(&self) -> bool {
1740 matches!(*self, '\0'..='\x1F' | '\x7F')
1741 }
1742}
1743
1744pub(crate) struct EscapeDebugExtArgs {
1745 /// Escape Extended Grapheme codepoints?
1746 pub(crate) escape_grapheme_extended: bool,
1747
1748 /// Escape single quotes?
1749 pub(crate) escape_single_quote: bool,
1750
1751 /// Escape double quotes?
1752 pub(crate) escape_double_quote: bool,
1753}
1754
1755impl EscapeDebugExtArgs {
1756 pub(crate) const ESCAPE_ALL: Self = Self {
1757 escape_grapheme_extended: true,
1758 escape_single_quote: true,
1759 escape_double_quote: true,
1760 };
1761}
1762
1763#[inline]
1764#[must_use]
1765const fn len_utf8(code: u32) -> usize {
1766 match code {
1767 ..MAX_ONE_B => 1,
1768 ..MAX_TWO_B => 2,
1769 ..MAX_THREE_B => 3,
1770 _ => 4,
1771 }
1772}
1773
1774#[inline]
1775#[must_use]
1776const fn len_utf16(code: u32) -> usize {
1777 if (code & 0xFFFF) == code { 1 } else { 2 }
1778}
1779
1780/// Encodes a raw `u32` value as UTF-8 into the provided byte buffer,
1781/// and then returns the subslice of the buffer that contains the encoded character.
1782///
1783/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
1784/// (Creating a `char` in the surrogate range is UB.)
1785/// The result is valid [generalized UTF-8] but not valid UTF-8.
1786///
1787/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
1788///
1789/// # Panics
1790///
1791/// Panics if the buffer is not large enough.
1792/// A buffer of length four is large enough to encode any `char`.
1793#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1794#[doc(hidden)]
1795#[inline]
1796pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
1797 let len = len_utf8(code);
1798 match (len, &mut *dst) {
1799 (1, [a, ..]) => {
1800 *a = code as u8;
1801 }
1802 (2, [a, b, ..]) => {
1803 *a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
1804 *b = (code & 0x3F) as u8 | TAG_CONT;
1805 }
1806 (3, [a, b, c, ..]) => {
1807 *a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
1808 *b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
1809 *c = (code & 0x3F) as u8 | TAG_CONT;
1810 }
1811 (4, [a, b, c, d, ..]) => {
1812 *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
1813 *b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
1814 *c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
1815 *d = (code & 0x3F) as u8 | TAG_CONT;
1816 }
1817 _ => {
1818 const_panic!(
1819 "encode_utf8: buffer does not have enough bytes to encode code point",
1820 "encode_utf8: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
1821 code: u32 = code,
1822 len: usize = len,
1823 dst_len: usize = dst.len(),
1824 )
1825 }
1826 };
1827 // SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
1828 unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
1829}
1830
1831/// Encodes a raw `u32` value as native endian UTF-16 into the provided `u16` buffer,
1832/// and then returns the subslice of the buffer that contains the encoded character.
1833///
1834/// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range.
1835/// (Creating a `char` in the surrogate range is UB.)
1836///
1837/// # Panics
1838///
1839/// Panics if the buffer is not large enough.
1840/// A buffer of length 2 is large enough to encode any `char`.
1841#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1842#[doc(hidden)]
1843#[inline]
1844pub const fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
1845 let len = len_utf16(code);
1846 match (len, &mut *dst) {
1847 (1, [a, ..]) => {
1848 *a = code as u16;
1849 }
1850 (2, [a, b, ..]) => {
1851 code -= 0x1_0000;
1852 *a = (code >> 10) as u16 | 0xD800;
1853 *b = (code & 0x3FF) as u16 | 0xDC00;
1854 }
1855 _ => {
1856 const_panic!(
1857 "encode_utf16: buffer does not have enough bytes to encode code point",
1858 "encode_utf16: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
1859 code: u32 = code,
1860 len: usize = len,
1861 dst_len: usize = dst.len(),
1862 )
1863 }
1864 };
1865 // SAFETY: `<&mut [u16]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
1866 unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
1867}