| // This file is part of ICU4X. For terms of use, please see the file |
| // called LICENSE at the top level of the ICU4X source tree |
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| |
| use crate::ordering::SubtagOrderingResult; |
| use crate::parser::{ |
| parse_locale, parse_locale_with_single_variant_single_keyword_unicode_keyword_extension, |
| ParserError, ParserMode, SubtagIterator, |
| }; |
| use crate::{extensions, subtags, LanguageIdentifier}; |
| use alloc::string::String; |
| use core::cmp::Ordering; |
| use core::str::FromStr; |
| use tinystr::TinyAsciiStr; |
| use writeable::Writeable; |
| |
| /// A core struct representing a [`Unicode Locale Identifier`]. |
| /// |
| /// A locale is made of two parts: |
| /// * Unicode Language Identifier |
| /// * A set of Unicode Extensions |
| /// |
| /// [`Locale`] exposes all of the same fields and methods as [`LanguageIdentifier`], and |
| /// on top of that is able to parse, manipulate and serialize unicode extension fields. |
| /// |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use icu_locid::{ |
| /// extensions::unicode::{key, value}, |
| /// locale, |
| /// subtags::{language, region}, |
| /// }; |
| /// |
| /// let loc = locale!("en-US-u-ca-buddhist"); |
| /// |
| /// assert_eq!(loc.id.language, language!("en")); |
| /// assert_eq!(loc.id.script, None); |
| /// assert_eq!(loc.id.region, Some(region!("US"))); |
| /// assert_eq!(loc.id.variants.len(), 0); |
| /// assert_eq!( |
| /// loc.extensions.unicode.keywords.get(&key!("ca")), |
| /// Some(&value!("buddhist")) |
| /// ); |
| /// ``` |
| /// |
| /// # Parsing |
| /// |
| /// Unicode recognizes three levels of standard conformance for a locale: |
| /// |
| /// * *well-formed* - syntactically correct |
| /// * *valid* - well-formed and only uses registered language subtags, extensions, keywords, types... |
| /// * *canonical* - valid and no deprecated codes or structure. |
| /// |
| /// At the moment parsing normalizes a well-formed locale identifier converting |
| /// `_` separators to `-` and adjusting casing to conform to the Unicode standard. |
| /// |
| /// Any bogus subtags will cause the parsing to fail with an error. |
| /// |
| /// No subtag validation or alias resolution is performed. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use icu::locid::{subtags::*, Locale}; |
| /// |
| /// let loc: Locale = "eN_latn_Us-Valencia_u-hC-H12" |
| /// .parse() |
| /// .expect("Failed to parse."); |
| /// |
| /// assert_eq!(loc.id.language, "en".parse::<Language>().unwrap()); |
| /// assert_eq!(loc.id.script, "Latn".parse::<Script>().ok()); |
| /// assert_eq!(loc.id.region, "US".parse::<Region>().ok()); |
| /// assert_eq!( |
| /// loc.id.variants.get(0), |
| /// "valencia".parse::<Variant>().ok().as_ref() |
| /// ); |
| /// ``` |
| /// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/tr35.html#Unicode_locale_identifier |
| #[derive(Default, PartialEq, Eq, Clone, Hash)] |
| #[allow(clippy::exhaustive_structs)] // This struct is stable (and invoked by a macro) |
| pub struct Locale { |
| /// The basic language/script/region components in the locale identifier along with any variants. |
| pub id: LanguageIdentifier, |
| /// Any extensions present in the locale identifier. |
| pub extensions: extensions::Extensions, |
| } |
| |
| #[test] |
| fn test_sizes() { |
| assert_eq!(core::mem::size_of::<subtags::Language>(), 3); |
| assert_eq!(core::mem::size_of::<subtags::Script>(), 4); |
| assert_eq!(core::mem::size_of::<subtags::Region>(), 3); |
| assert_eq!(core::mem::size_of::<subtags::Variant>(), 8); |
| assert_eq!(core::mem::size_of::<subtags::Variants>(), 16); |
| assert_eq!(core::mem::size_of::<LanguageIdentifier>(), 32); |
| |
| assert_eq!(core::mem::size_of::<extensions::transform::Transform>(), 56); |
| assert_eq!(core::mem::size_of::<Option<LanguageIdentifier>>(), 32); |
| assert_eq!(core::mem::size_of::<extensions::transform::Fields>(), 24); |
| |
| assert_eq!(core::mem::size_of::<extensions::unicode::Attributes>(), 16); |
| assert_eq!(core::mem::size_of::<extensions::unicode::Keywords>(), 24); |
| assert_eq!(core::mem::size_of::<Vec<extensions::other::Other>>(), 24); |
| assert_eq!(core::mem::size_of::<extensions::private::Private>(), 16); |
| assert_eq!(core::mem::size_of::<extensions::Extensions>(), 136); |
| |
| assert_eq!(core::mem::size_of::<Locale>(), 168); |
| } |
| |
| impl Locale { |
| /// A constructor which takes a utf8 slice, parses it and |
| /// produces a well-formed [`Locale`]. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use icu::locid::Locale; |
| /// |
| /// Locale::try_from_bytes(b"en-US-u-hc-h12").unwrap(); |
| /// ``` |
| pub fn try_from_bytes(v: &[u8]) -> Result<Self, ParserError> { |
| parse_locale(v) |
| } |
| |
| /// The default undefined locale "und". Same as [`default()`](Default::default()). |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use icu::locid::Locale; |
| /// |
| /// assert_eq!(Locale::default(), Locale::UND); |
| /// ``` |
| pub const UND: Self = Self { |
| id: LanguageIdentifier::UND, |
| extensions: extensions::Extensions::new(), |
| }; |
| |
| /// This is a best-effort operation that performs all available levels of canonicalization. |
| /// |
| /// At the moment the operation will normalize casing and the separator, but in the future |
| /// it may also validate and update from deprecated subtags to canonical ones. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use icu::locid::Locale; |
| /// |
| /// assert_eq!( |
| /// Locale::canonicalize("pL_latn_pl-U-HC-H12").as_deref(), |
| /// Ok("pl-Latn-PL-u-hc-h12") |
| /// ); |
| /// ``` |
| pub fn canonicalize<S: AsRef<[u8]>>(input: S) -> Result<String, ParserError> { |
| let locale = Self::try_from_bytes(input.as_ref())?; |
| Ok(locale.write_to_string().into_owned()) |
| } |
| |
| /// Compare this [`Locale`] with BCP-47 bytes. |
| /// |
| /// The return value is equivalent to what would happen if you first converted this |
| /// [`Locale`] to a BCP-47 string and then performed a byte comparison. |
| /// |
| /// This function is case-sensitive and results in a *total order*, so it is appropriate for |
| /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use icu::locid::Locale; |
| /// use std::cmp::Ordering; |
| /// |
| /// let bcp47_strings: &[&str] = &[ |
| /// "pl-Latn-PL", |
| /// "und", |
| /// "und-fonipa", |
| /// "und-t-m0-true", |
| /// "und-u-ca-hebrew", |
| /// "und-u-ca-japanese", |
| /// "zh", |
| /// ]; |
| /// |
| /// for ab in bcp47_strings.windows(2) { |
| /// let a = ab[0]; |
| /// let b = ab[1]; |
| /// assert!(a.cmp(b) == Ordering::Less); |
| /// let a_loc = a.parse::<Locale>().unwrap(); |
| /// assert!(a_loc.strict_cmp(a.as_bytes()) == Ordering::Equal); |
| /// assert!(a_loc.strict_cmp(b.as_bytes()) == Ordering::Less); |
| /// } |
| /// ``` |
| pub fn strict_cmp(&self, other: &[u8]) -> Ordering { |
| self.strict_cmp_iter(other.split(|b| *b == b'-')).end() |
| } |
| |
| /// Compare this [`Locale`] with an iterator of BCP-47 subtags. |
| /// |
| /// This function has the same equality semantics as [`Locale::strict_cmp`]. It is intended as |
| /// a more modular version that allows multiple subtag iterators to be chained together. |
| /// |
| /// For an additional example, see [`SubtagOrderingResult`]. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use icu::locid::locale; |
| /// use std::cmp::Ordering; |
| /// |
| /// let subtags: &[&[u8]] = |
| /// &[b"ca", b"ES", b"valencia", b"u", b"ca", b"hebrew"]; |
| /// |
| /// let loc = locale!("ca-ES-valencia-u-ca-hebrew"); |
| /// assert_eq!( |
| /// Ordering::Equal, |
| /// loc.strict_cmp_iter(subtags.iter().copied()).end() |
| /// ); |
| /// |
| /// let loc = locale!("ca-ES-valencia"); |
| /// assert_eq!( |
| /// Ordering::Less, |
| /// loc.strict_cmp_iter(subtags.iter().copied()).end() |
| /// ); |
| /// |
| /// let loc = locale!("ca-ES-valencia-u-nu-arab"); |
| /// assert_eq!( |
| /// Ordering::Greater, |
| /// loc.strict_cmp_iter(subtags.iter().copied()).end() |
| /// ); |
| /// ``` |
| pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I> |
| where |
| I: Iterator<Item = &'l [u8]>, |
| { |
| let r = self.for_each_subtag_str(&mut |subtag| { |
| if let Some(other) = subtags.next() { |
| match subtag.as_bytes().cmp(other) { |
| Ordering::Equal => Ok(()), |
| not_equal => Err(not_equal), |
| } |
| } else { |
| Err(Ordering::Greater) |
| } |
| }); |
| match r { |
| Ok(_) => SubtagOrderingResult::Subtags(subtags), |
| Err(o) => SubtagOrderingResult::Ordering(o), |
| } |
| } |
| |
| /// Compare this `Locale` with a potentially unnormalized BCP-47 string. |
| /// |
| /// The return value is equivalent to what would happen if you first parsed the |
| /// BCP-47 string to a `Locale` and then performed a structural comparison. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use icu::locid::Locale; |
| /// use std::cmp::Ordering; |
| /// |
| /// let bcp47_strings: &[&str] = &[ |
| /// "pl-LaTn-pL", |
| /// "uNd", |
| /// "UND-FONIPA", |
| /// "UnD-t-m0-TrUe", |
| /// "uNd-u-CA-Japanese", |
| /// "ZH", |
| /// ]; |
| /// |
| /// for a in bcp47_strings { |
| /// assert!(a.parse::<Locale>().unwrap().normalizing_eq(a)); |
| /// } |
| /// ``` |
| pub fn normalizing_eq(&self, other: &str) -> bool { |
| macro_rules! subtag_matches { |
| ($T:ty, $iter:ident, $expected:expr) => { |
| $iter |
| .next() |
| .map(|b| <$T>::try_from_bytes(b) == Ok($expected)) |
| .unwrap_or(false) |
| }; |
| } |
| |
| let mut iter = SubtagIterator::new(other.as_bytes()); |
| if !subtag_matches!(subtags::Language, iter, self.id.language) { |
| return false; |
| } |
| if let Some(ref script) = self.id.script { |
| if !subtag_matches!(subtags::Script, iter, *script) { |
| return false; |
| } |
| } |
| if let Some(ref region) = self.id.region { |
| if !subtag_matches!(subtags::Region, iter, *region) { |
| return false; |
| } |
| } |
| for variant in self.id.variants.iter() { |
| if !subtag_matches!(subtags::Variant, iter, *variant) { |
| return false; |
| } |
| } |
| if !self.extensions.is_empty() { |
| match extensions::Extensions::try_from_iter(&mut iter) { |
| Ok(exts) => { |
| if self.extensions != exts { |
| return false; |
| } |
| } |
| Err(_) => { |
| return false; |
| } |
| } |
| } |
| iter.next().is_none() |
| } |
| |
| #[doc(hidden)] |
| #[allow(clippy::type_complexity)] |
| pub const fn try_from_bytes_with_single_variant_single_keyword_unicode_extension( |
| v: &[u8], |
| ) -> Result< |
| ( |
| subtags::Language, |
| Option<subtags::Script>, |
| Option<subtags::Region>, |
| Option<subtags::Variant>, |
| Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>, |
| ), |
| ParserError, |
| > { |
| parse_locale_with_single_variant_single_keyword_unicode_keyword_extension( |
| v, |
| ParserMode::Locale, |
| ) |
| } |
| |
| pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> |
| where |
| F: FnMut(&str) -> Result<(), E>, |
| { |
| self.id.for_each_subtag_str(f)?; |
| self.extensions.for_each_subtag_str(f)?; |
| Ok(()) |
| } |
| } |
| |
| impl FromStr for Locale { |
| type Err = ParserError; |
| |
| fn from_str(source: &str) -> Result<Self, Self::Err> { |
| Self::try_from_bytes(source.as_bytes()) |
| } |
| } |
| |
| impl From<LanguageIdentifier> for Locale { |
| fn from(id: LanguageIdentifier) -> Self { |
| Self { |
| id, |
| extensions: extensions::Extensions::default(), |
| } |
| } |
| } |
| |
| impl From<Locale> for LanguageIdentifier { |
| fn from(loc: Locale) -> Self { |
| loc.id |
| } |
| } |
| |
| impl AsRef<LanguageIdentifier> for Locale { |
| fn as_ref(&self) -> &LanguageIdentifier { |
| &self.id |
| } |
| } |
| |
| impl AsMut<LanguageIdentifier> for Locale { |
| fn as_mut(&mut self) -> &mut LanguageIdentifier { |
| &mut self.id |
| } |
| } |
| |
| impl core::fmt::Debug for Locale { |
| fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
| writeable::Writeable::write_to(self, f) |
| } |
| } |
| |
| impl_writeable_for_each_subtag_str_no_test!(Locale, selff, selff.extensions.is_empty() => selff.id.write_to_string()); |
| |
| #[test] |
| fn test_writeable() { |
| use writeable::assert_writeable_eq; |
| assert_writeable_eq!(Locale::UND, "und"); |
| assert_writeable_eq!("und-001".parse::<Locale>().unwrap(), "und-001"); |
| assert_writeable_eq!("und-Mymr".parse::<Locale>().unwrap(), "und-Mymr"); |
| assert_writeable_eq!("my-Mymr-MM".parse::<Locale>().unwrap(), "my-Mymr-MM"); |
| assert_writeable_eq!( |
| "my-Mymr-MM-posix".parse::<Locale>().unwrap(), |
| "my-Mymr-MM-posix", |
| ); |
| assert_writeable_eq!( |
| "zh-macos-posix".parse::<Locale>().unwrap(), |
| "zh-macos-posix", |
| ); |
| assert_writeable_eq!( |
| "my-t-my-d0-zawgyi".parse::<Locale>().unwrap(), |
| "my-t-my-d0-zawgyi", |
| ); |
| assert_writeable_eq!( |
| "ar-SA-u-ca-islamic-civil".parse::<Locale>().unwrap(), |
| "ar-SA-u-ca-islamic-civil", |
| ); |
| assert_writeable_eq!( |
| "en-001-x-foo-bar".parse::<Locale>().unwrap(), |
| "en-001-x-foo-bar", |
| ); |
| assert_writeable_eq!("und-t-m0-true".parse::<Locale>().unwrap(), "und-t-m0-true",); |
| } |
| |
| /// # Examples |
| /// |
| /// ``` |
| /// use icu::locid::Locale; |
| /// use icu::locid::{locale, subtags::language}; |
| /// |
| /// assert_eq!(Locale::from(language!("en")), locale!("en")); |
| /// ``` |
| impl From<subtags::Language> for Locale { |
| fn from(language: subtags::Language) -> Self { |
| Self { |
| id: language.into(), |
| ..Default::default() |
| } |
| } |
| } |
| |
| /// # Examples |
| /// |
| /// ``` |
| /// use icu::locid::Locale; |
| /// use icu::locid::{locale, subtags::script}; |
| /// |
| /// assert_eq!(Locale::from(Some(script!("latn"))), locale!("und-Latn")); |
| /// ``` |
| impl From<Option<subtags::Script>> for Locale { |
| fn from(script: Option<subtags::Script>) -> Self { |
| Self { |
| id: script.into(), |
| ..Default::default() |
| } |
| } |
| } |
| |
| /// # Examples |
| /// |
| /// ``` |
| /// use icu::locid::Locale; |
| /// use icu::locid::{locale, subtags::region}; |
| /// |
| /// assert_eq!(Locale::from(Some(region!("US"))), locale!("und-US")); |
| /// ``` |
| impl From<Option<subtags::Region>> for Locale { |
| fn from(region: Option<subtags::Region>) -> Self { |
| Self { |
| id: region.into(), |
| ..Default::default() |
| } |
| } |
| } |
| |
| /// # Examples |
| /// |
| /// ``` |
| /// use icu::locid::Locale; |
| /// use icu::locid::{ |
| /// locale, |
| /// subtags::{language, region, script}, |
| /// }; |
| /// |
| /// assert_eq!( |
| /// Locale::from(( |
| /// language!("en"), |
| /// Some(script!("Latn")), |
| /// Some(region!("US")) |
| /// )), |
| /// locale!("en-Latn-US") |
| /// ); |
| /// ``` |
| impl |
| From<( |
| subtags::Language, |
| Option<subtags::Script>, |
| Option<subtags::Region>, |
| )> for Locale |
| { |
| fn from( |
| lsr: ( |
| subtags::Language, |
| Option<subtags::Script>, |
| Option<subtags::Region>, |
| ), |
| ) -> Self { |
| Self { |
| id: lsr.into(), |
| ..Default::default() |
| } |
| } |
| } |