vendor/zerovec/src/ule/chars.rs - toolchain/rustc - Git at Google

 // This file is part of ICU4X. For terms of use, please see the file
 // called LICENSE at the top level of the ICU4X source tree
 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

 #![allow(clippy::upper_case_acronyms)]
 //! ULE implementation for the `char` type.

 use super::*;
 use crate::impl_ule_from_array;
 use core::cmp::Ordering;
 use core::convert::TryFrom;

 /// A u8 array of little-endian data corresponding to a Unicode scalar value.
 ///
 /// The bytes of a `CharULE` are guaranteed to represent a little-endian-encoded u32 that is a
 /// valid `char` and can be converted without validation.
 ///
 /// # Examples
 ///
 /// Convert a `char` to a `CharULE` and back again:
 ///
 /// ```
 /// use zerovec::ule::{AsULE, CharULE, ULE};
 ///
 /// let c1 = '𑄃';
 /// let ule = c1.to_unaligned();
 /// assert_eq!(CharULE::as_byte_slice(&[ule]), &[0x03, 0x11, 0x01]);
 /// let c2 = char::from_unaligned(ule);
 /// assert_eq!(c1, c2);
 /// ```
 ///
 /// Attempt to parse invalid bytes to a `CharULE`:
 ///
 /// ```
 /// use zerovec::ule::{CharULE, ULE};
 ///
 /// let bytes: &[u8] = &[0xFF, 0xFF, 0xFF, 0xFF];
 /// CharULE::parse_byte_slice(bytes).expect_err("Invalid bytes");
 /// ```
 #[repr(transparent)]
 #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
 pub struct CharULE([u8; 3]);

 impl CharULE {
     /// Converts a [`char`] to a [`CharULE`]. This is equivalent to calling
     /// [`AsULE::to_unaligned()`]
     ///
     /// See the type-level documentation for [`CharULE`] for more information.
     #[inline]
     pub const fn from_aligned(c: char) -> Self {
         let [u0, u1, u2, _u3] = (c as u32).to_le_bytes();
         Self([u0, u1, u2])
     }

     impl_ule_from_array!(char, CharULE, Self([0; 3]));
 }

 // Safety (based on the safety checklist on the ULE trait):
 //  1. CharULE does not include any uninitialized or padding bytes.
 //     (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
 //  2. CharULE is aligned to 1 byte.
 //     (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
 //  3. The impl of validate_byte_slice() returns an error if any byte is not valid.
 //  4. The impl of validate_byte_slice() returns an error if there are extra bytes.
 //  5. The other ULE methods use the default impl.
 //  6. CharULE byte equality is semantic equality
 unsafe impl ULE for CharULE {
     #[inline]
     fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
         if bytes.len() % 3 != 0 {
             return Err(ZeroVecError::length::<Self>(bytes.len()));
         }
         // Validate the bytes
         for chunk in bytes.chunks_exact(3) {
             // TODO: Use slice::as_chunks() when stabilized
             #[allow(clippy::indexing_slicing)]
             // Won't panic because the chunks are always 3 bytes long
             let u = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], 0]);
             char::try_from(u).map_err(|_| ZeroVecError::parse::<Self>())?;
         }
         Ok(())
     }
 }

 impl AsULE for char {
     type ULE = CharULE;

     #[inline]
     fn to_unaligned(self) -> Self::ULE {
         CharULE::from_aligned(self)
     }

     #[inline]
     fn from_unaligned(unaligned: Self::ULE) -> Self {
         // Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value.
         unsafe {
             Self::from_u32_unchecked(u32::from_le_bytes([
                 unaligned.0[0],
                 unaligned.0[1],
                 unaligned.0[2],
                 0,
             ]))
         }
     }
 }

 impl PartialOrd for CharULE {
     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
         char::from_unaligned(*self).partial_cmp(&char::from_unaligned(*other))
     }
 }

 impl Ord for CharULE {
     fn cmp(&self, other: &Self) -> Ordering {
         char::from_unaligned(*self).cmp(&char::from_unaligned(*other))
     }
 }

 #[cfg(test)]
 mod test {
     use super::*;

     #[test]
     fn test_from_array() {
         const CHARS: [char; 2] = ['a', '🙃'];
         const CHARS_ULE: [CharULE; 2] = CharULE::from_array(CHARS);
         assert_eq!(
             CharULE::as_byte_slice(&CHARS_ULE),
             &[0x61, 0x00, 0x00, 0x43, 0xF6, 0x01]
         );
     }

     #[test]
     fn test_from_array_zst() {
         const CHARS: [char; 0] = [];
         const CHARS_ULE: [CharULE; 0] = CharULE::from_array(CHARS);
         let bytes = CharULE::as_byte_slice(&CHARS_ULE);
         let empty: &[u8] = &[];
         assert_eq!(bytes, empty);
     }

     #[test]
     fn test_parse() {
         // 1-byte, 2-byte, 3-byte, and two 4-byte character in UTF-8 (not as relevant in UTF-32)
         let chars = ['w', 'ω', '文', '𑄃', '🙃'];
         let char_ules: Vec<CharULE> = chars.iter().copied().map(char::to_unaligned).collect();
         let char_bytes: &[u8] = CharULE::as_byte_slice(&char_ules);

         // Check parsing
         let parsed_ules: &[CharULE] = CharULE::parse_byte_slice(char_bytes).unwrap();
         assert_eq!(char_ules, parsed_ules);
         let parsed_chars: Vec<char> = parsed_ules
             .iter()
             .copied()
             .map(char::from_unaligned)
             .collect();
         assert_eq!(&chars, parsed_chars.as_slice());

         // Compare to golden expected data
         assert_eq!(
             &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1],
             char_bytes
         );
     }

     #[test]
     fn test_failures() {
         // 119 and 120 are valid, but not 0xD800 (high surrogate)
         let u32s = [119, 0xD800, 120];
         let u32_ules: Vec<RawBytesULE<4>> = u32s
             .iter()
             .copied()
             .map(<u32 as AsULE>::to_unaligned)
             .collect();
         let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
         let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
         assert!(parsed_ules_result.is_err());

         // 0x20FFFF is out of range for a char
         let u32s = [0x20FFFF];
         let u32_ules: Vec<RawBytesULE<4>> = u32s
             .iter()
             .copied()
             .map(<u32 as AsULE>::to_unaligned)
             .collect();
         let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
         let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
         assert!(parsed_ules_result.is_err());
     }
 }
	// This file is part of ICU4X. For terms of use, please see the file
	// called LICENSE at the top level of the ICU4X source tree
	// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

	#![allow(clippy::upper_case_acronyms)]
	//! ULE implementation for the `char` type.

	use super::*;
	use crate::impl_ule_from_array;
	use core::cmp::Ordering;
	use core::convert::TryFrom;

	/// A u8 array of little-endian data corresponding to a Unicode scalar value.
	///
	/// The bytes of a `CharULE` are guaranteed to represent a little-endian-encoded u32 that is a
	/// valid `char` and can be converted without validation.
	///
	/// # Examples
	///
	/// Convert a `char` to a `CharULE` and back again:
	///
	/// ```
	/// use zerovec::ule::{AsULE, CharULE, ULE};
	///
	/// let c1 = '𑄃';
	/// let ule = c1.to_unaligned();
	/// assert_eq!(CharULE::as_byte_slice(&[ule]), &[0x03, 0x11, 0x01]);
	/// let c2 = char::from_unaligned(ule);
	/// assert_eq!(c1, c2);
	/// ```
	///
	/// Attempt to parse invalid bytes to a `CharULE`:
	///
	/// ```
	/// use zerovec::ule::{CharULE, ULE};
	///
	/// let bytes: &[u8] = &[0xFF, 0xFF, 0xFF, 0xFF];
	/// CharULE::parse_byte_slice(bytes).expect_err("Invalid bytes");
	/// ```
	#[repr(transparent)]
	#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
	pub struct CharULE([u8; 3]);

	impl CharULE {
	/// Converts a [`char`] to a [`CharULE`]. This is equivalent to calling
	/// [`AsULE::to_unaligned()`]
	///
	/// See the type-level documentation for [`CharULE`] for more information.
	#[inline]
	pub const fn from_aligned(c: char) -> Self {
	let [u0, u1, u2, _u3] = (c as u32).to_le_bytes();
	Self([u0, u1, u2])
	}

	impl_ule_from_array!(char, CharULE, Self([0; 3]));
	}

	// Safety (based on the safety checklist on the ULE trait):
	// 1. CharULE does not include any uninitialized or padding bytes.
	// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
	// 2. CharULE is aligned to 1 byte.
	// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
	// 3. The impl of validate_byte_slice() returns an error if any byte is not valid.
	// 4. The impl of validate_byte_slice() returns an error if there are extra bytes.
	// 5. The other ULE methods use the default impl.
	// 6. CharULE byte equality is semantic equality
	unsafe impl ULE for CharULE {
	#[inline]
	fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
	if bytes.len() % 3 != 0 {
	return Err(ZeroVecError::length::<Self>(bytes.len()));
	}
	// Validate the bytes
	for chunk in bytes.chunks_exact(3) {
	// TODO: Use slice::as_chunks() when stabilized
	#[allow(clippy::indexing_slicing)]
	// Won't panic because the chunks are always 3 bytes long
	let u = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], 0]);
	char::try_from(u).map_err(\|_\| ZeroVecError::parse::<Self>())?;
	}
	Ok(())
	}
	}

	impl AsULE for char {
	type ULE = CharULE;

	#[inline]
	fn to_unaligned(self) -> Self::ULE {
	CharULE::from_aligned(self)
	}

	#[inline]
	fn from_unaligned(unaligned: Self::ULE) -> Self {
	// Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value.
	unsafe {
	Self::from_u32_unchecked(u32::from_le_bytes([
	unaligned.0[0],
	unaligned.0[1],
	unaligned.0[2],
	0,
	]))
	}
	}
	}

	impl PartialOrd for CharULE {
	fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
	char::from_unaligned(self).partial_cmp(&char::from_unaligned(other))
	}
	}

	impl Ord for CharULE {
	fn cmp(&self, other: &Self) -> Ordering {
	char::from_unaligned(self).cmp(&char::from_unaligned(other))
	}
	}

	#[cfg(test)]
	mod test {
	use super::*;

	#[test]
	fn test_from_array() {
	const CHARS: [char; 2] = ['a', '🙃'];
	const CHARS_ULE: [CharULE; 2] = CharULE::from_array(CHARS);
	assert_eq!(
	CharULE::as_byte_slice(&CHARS_ULE),
	&[0x61, 0x00, 0x00, 0x43, 0xF6, 0x01]
	);
	}

	#[test]
	fn test_from_array_zst() {
	const CHARS: [char; 0] = [];
	const CHARS_ULE: [CharULE; 0] = CharULE::from_array(CHARS);
	let bytes = CharULE::as_byte_slice(&CHARS_ULE);
	let empty: &[u8] = &[];
	assert_eq!(bytes, empty);
	}

	#[test]
	fn test_parse() {
	// 1-byte, 2-byte, 3-byte, and two 4-byte character in UTF-8 (not as relevant in UTF-32)
	let chars = ['w', 'ω', '文', '𑄃', '🙃'];
	let char_ules: Vec<CharULE> = chars.iter().copied().map(char::to_unaligned).collect();
	let char_bytes: &[u8] = CharULE::as_byte_slice(&char_ules);

	// Check parsing
	let parsed_ules: &[CharULE] = CharULE::parse_byte_slice(char_bytes).unwrap();
	assert_eq!(char_ules, parsed_ules);
	let parsed_chars: Vec<char> = parsed_ules
	.iter()
	.copied()
	.map(char::from_unaligned)
	.collect();
	assert_eq!(&chars, parsed_chars.as_slice());

	// Compare to golden expected data
	assert_eq!(
	&[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1],
	char_bytes
	);
	}

	#[test]
	fn test_failures() {
	// 119 and 120 are valid, but not 0xD800 (high surrogate)
	let u32s = [119, 0xD800, 120];
	let u32_ules: Vec<RawBytesULE<4>> = u32s
	.iter()
	.copied()
	.map(<u32 as AsULE>::to_unaligned)
	.collect();
	let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
	let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
	assert!(parsed_ules_result.is_err());

	// 0x20FFFF is out of range for a char
	let u32s = [0x20FFFF];
	let u32_ules: Vec<RawBytesULE<4>> = u32s
	.iter()
	.copied()
	.map(<u32 as AsULE>::to_unaligned)
	.collect();
	let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
	let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
	assert!(parsed_ules_result.is_err());
	}
	}