core/char/convert.rs
1//! Character conversions.
2
3use crate::char::TryFromCharError;
4use crate::error::Error;
5use crate::fmt;
6use crate::mem::transmute;
7use crate::str::FromStr;
8use crate::ub_checks::assert_unsafe_precondition;
9
10/// Converts a `u32` to a `char`. See [`char::from_u32`].
11#[must_use]
12#[inline]
13pub(super) const fn from_u32(i: u32) -> Option<char> {
14 // FIXME(const-hack): once Result::ok is const fn, use it here
15 match char_try_from_u32(i) {
16 Ok(c) => Some(c),
17 Err(_) => None,
18 }
19}
20
21/// Converts a `u32` to a `char`, ignoring validity. See [`char::from_u32_unchecked`].
22#[inline]
23#[must_use]
24#[allow(unnecessary_transmutes)]
25#[track_caller]
26pub(super) const unsafe fn from_u32_unchecked(i: u32) -> char {
27 // SAFETY: the caller must guarantee that `i` is a valid char value.
28 unsafe {
29 assert_unsafe_precondition!(
30 check_language_ub,
31 "invalid value for `char`",
32 (i: u32 = i) => char_try_from_u32(i).is_ok()
33 );
34 transmute(i)
35 }
36}
37
38#[stable(feature = "char_convert", since = "1.13.0")]
39#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
40impl const From<char> for u32 {
41 /// Converts a [`char`] into a [`u32`].
42 ///
43 /// # Examples
44 ///
45 /// ```
46 /// let c = 'c';
47 /// let u = u32::from(c);
48 /// assert!(4 == size_of_val(&u))
49 /// ```
50 #[inline]
51 fn from(c: char) -> Self {
52 c as u32
53 }
54}
55
56#[stable(feature = "more_char_conversions", since = "1.51.0")]
57#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
58impl const From<char> for u64 {
59 /// Converts a [`char`] into a [`u64`].
60 ///
61 /// # Examples
62 ///
63 /// ```
64 /// let c = '👤';
65 /// let u = u64::from(c);
66 /// assert!(8 == size_of_val(&u))
67 /// ```
68 #[inline]
69 fn from(c: char) -> Self {
70 // The char is casted to the value of the code point, then zero-extended to 64 bit.
71 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
72 c as u64
73 }
74}
75
76#[stable(feature = "more_char_conversions", since = "1.51.0")]
77#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
78impl const From<char> for u128 {
79 /// Converts a [`char`] into a [`u128`].
80 ///
81 /// # Examples
82 ///
83 /// ```
84 /// let c = 'âš™';
85 /// let u = u128::from(c);
86 /// assert!(16 == size_of_val(&u))
87 /// ```
88 #[inline]
89 fn from(c: char) -> Self {
90 // The char is casted to the value of the code point, then zero-extended to 128 bit.
91 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
92 c as u128
93 }
94}
95
96/// Maps a `char` with code point in U+0000..=U+00FF to a byte in 0x00..=0xFF with same value,
97/// failing if the code point is greater than U+00FF.
98///
99/// See [`impl From<u8> for char`](char#impl-From<u8>-for-char) for details on the encoding.
100#[stable(feature = "u8_from_char", since = "1.59.0")]
101#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
102impl const TryFrom<char> for u8 {
103 type Error = TryFromCharError;
104
105 /// Tries to convert a [`char`] into a [`u8`].
106 ///
107 /// # Examples
108 ///
109 /// ```
110 /// let a = 'ÿ'; // U+00FF
111 /// let b = 'Ä€'; // U+0100
112 /// assert_eq!(u8::try_from(a), Ok(0xFF_u8));
113 /// assert!(u8::try_from(b).is_err());
114 /// ```
115 #[inline]
116 fn try_from(c: char) -> Result<u8, Self::Error> {
117 // FIXME(const-hack): this should use map_err instead
118 match u8::try_from(u32::from(c)) {
119 Ok(b) => Ok(b),
120 Err(_) => Err(TryFromCharError(())),
121 }
122 }
123}
124
125/// Maps a `char` with code point in U+0000..=U+FFFF to a `u16` in 0x0000..=0xFFFF with same value,
126/// failing if the code point is greater than U+FFFF.
127///
128/// This corresponds to the UCS-2 encoding, as specified in ISO/IEC 10646:2003.
129#[stable(feature = "u16_from_char", since = "1.74.0")]
130#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
131impl const TryFrom<char> for u16 {
132 type Error = TryFromCharError;
133
134 /// Tries to convert a [`char`] into a [`u16`].
135 ///
136 /// # Examples
137 ///
138 /// ```
139 /// let trans_rights = 'âš§'; // U+26A7
140 /// let ninjas = '🥷'; // U+1F977
141 /// assert_eq!(u16::try_from(trans_rights), Ok(0x26A7_u16));
142 /// assert!(u16::try_from(ninjas).is_err());
143 /// ```
144 #[inline]
145 fn try_from(c: char) -> Result<u16, Self::Error> {
146 // FIXME(const-hack): this should use map_err instead
147 match u16::try_from(u32::from(c)) {
148 Ok(x) => Ok(x),
149 Err(_) => Err(TryFromCharError(())),
150 }
151 }
152}
153
154/// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF.
155///
156/// Unicode is designed such that this effectively decodes bytes
157/// with the character encoding that IANA calls ISO-8859-1.
158/// This encoding is compatible with ASCII.
159///
160/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen),
161/// which leaves some "blanks", byte values that are not assigned to any character.
162/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
163///
164/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
165/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
166/// to punctuation and various Latin characters.
167///
168/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
169/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
170/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
171/// C0 and C1 control codes.
172#[stable(feature = "char_convert", since = "1.13.0")]
173#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
174impl const From<u8> for char {
175 /// Converts a [`u8`] into a [`char`].
176 ///
177 /// # Examples
178 ///
179 /// ```
180 /// let u = 32 as u8;
181 /// let c = char::from(u);
182 /// assert!(4 == size_of_val(&c))
183 /// ```
184 #[inline]
185 fn from(i: u8) -> Self {
186 i as char
187 }
188}
189
190/// An error which can be returned when parsing a char.
191///
192/// This `struct` is created when using the [`char::from_str`] method.
193#[stable(feature = "char_from_str", since = "1.20.0")]
194#[derive(Clone, Debug, PartialEq, Eq)]
195pub struct ParseCharError {
196 kind: CharErrorKind,
197}
198
199#[derive(Copy, Clone, Debug, PartialEq, Eq)]
200enum CharErrorKind {
201 EmptyString,
202 TooManyChars,
203}
204
205#[stable(feature = "char_from_str", since = "1.20.0")]
206impl Error for ParseCharError {}
207
208#[stable(feature = "char_from_str", since = "1.20.0")]
209impl fmt::Display for ParseCharError {
210 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
211 match self.kind {
212 CharErrorKind::EmptyString => "cannot parse char from empty string",
213 CharErrorKind::TooManyChars => "too many characters in string",
214 }
215 .fmt(f)
216 }
217}
218
219#[stable(feature = "char_from_str", since = "1.20.0")]
220impl FromStr for char {
221 type Err = ParseCharError;
222
223 #[inline]
224 fn from_str(s: &str) -> Result<Self, Self::Err> {
225 let mut chars = s.chars();
226 match (chars.next(), chars.next()) {
227 (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }),
228 (Some(c), None) => Ok(c),
229 _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }),
230 }
231 }
232}
233
234#[inline]
235#[allow(unnecessary_transmutes)]
236const fn char_try_from_u32(i: u32) -> Result<char, CharTryFromError> {
237 // This is an optimized version of the check
238 // (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF),
239 // which can also be written as
240 // i >= 0x110000 || (i >= 0xD800 && i < 0xE000).
241 //
242 // The XOR with 0xD800 permutes the ranges such that 0xD800..0xE000 is
243 // mapped to 0x0000..0x0800, while keeping all the high bits outside 0xFFFF the same.
244 // In particular, numbers >= 0x110000 stay in this range.
245 //
246 // Subtracting 0x800 causes 0x0000..0x0800 to wrap, meaning that a single
247 // unsigned comparison against 0x110000 - 0x800 will detect both the wrapped
248 // surrogate range as well as the numbers originally larger than 0x110000.
249 //
250 if (i ^ 0xD800).wrapping_sub(0x800) >= 0x110000 - 0x800 {
251 Err(CharTryFromError(()))
252 } else {
253 // SAFETY: checked that it's a legal unicode value
254 Ok(unsafe { transmute(i) })
255 }
256}
257
258#[stable(feature = "try_from", since = "1.34.0")]
259#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
260impl const TryFrom<u32> for char {
261 type Error = CharTryFromError;
262
263 #[inline]
264 fn try_from(i: u32) -> Result<Self, Self::Error> {
265 char_try_from_u32(i)
266 }
267}
268
269/// The error type returned when a conversion from [`prim@u32`] to [`prim@char`] fails.
270///
271/// This `struct` is created by the [`char::try_from<u32>`](char#impl-TryFrom<u32>-for-char) method.
272/// See its documentation for more.
273#[stable(feature = "try_from", since = "1.34.0")]
274#[derive(Copy, Clone, Debug, PartialEq, Eq)]
275pub struct CharTryFromError(());
276
277#[stable(feature = "try_from", since = "1.34.0")]
278impl fmt::Display for CharTryFromError {
279 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
280 "converted integer out of range for `char`".fmt(f)
281 }
282}
283
284/// Converts a digit in the given radix to a `char`. See [`char::from_digit`].
285#[inline]
286#[must_use]
287pub(super) const fn from_digit(num: u32, radix: u32) -> Option<char> {
288 if radix > 36 {
289 panic!("from_digit: radix is too high (maximum 36)");
290 }
291 if num < radix {
292 let num = num as u8;
293 if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) }
294 } else {
295 None
296 }
297}