blob: 15a6ce831c80262a08ddfcc0829bc97a69ced64e [file] [log] [blame]
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use crate::{DataError, DataErrorKind};
use core::cmp::Ordering;
use core::default::Default;
use core::fmt;
use core::fmt::Debug;
use core::hash::Hash;
use core::str::FromStr;
use icu_locid::extensions::unicode as unicode_ext;
use icu_locid::subtags::{Language, Region, Script, Variants};
use icu_locid::{LanguageIdentifier, Locale, SubtagOrderingResult};
use writeable::{LengthHint, Writeable};
#[cfg(feature = "experimental")]
use alloc::string::String;
#[cfg(feature = "experimental")]
use core::ops::Deref;
#[cfg(feature = "experimental")]
use tinystr::TinyAsciiStr;
#[cfg(doc)]
use icu_locid::subtags::Variant;
const AUXILIARY_KEY_SEPARATOR: u8 = b'+';
/// The request type passed into all data provider implementations.
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
#[allow(clippy::exhaustive_structs)] // this type is stable
pub struct DataRequest<'a> {
/// The locale for which to load data.
///
/// If locale fallback is enabled, the resulting data may be from a different locale
/// than the one requested here.
pub locale: &'a DataLocale,
/// Metadata that may affect the behavior of the data provider.
pub metadata: DataRequestMetadata,
}
impl fmt::Display for DataRequest<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&self.locale, f)
}
}
/// Metadata for data requests. This is currently empty, but it may be extended with options
/// for tuning locale fallback, buffer layout, and so forth.
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
#[non_exhaustive]
pub struct DataRequestMetadata {
/// Silent requests do not log errors. This can be used for exploratory querying, such as fallbacks.
pub silent: bool,
}
/// A locale type optimized for use in fallbacking and the ICU4X data pipeline.
///
/// [`DataLocale`] contains less functionality than [`Locale`] but more than
/// [`LanguageIdentifier`] for better size and performance while still meeting
/// the needs of the ICU4X data pipeline.
///
/// # Examples
///
/// Convert a [`Locale`] to a [`DataLocale`] and back:
///
/// ```
/// use icu_locid::locale;
/// use icu_provider::DataLocale;
///
/// let locale = locale!("en-u-ca-buddhist");
/// let data_locale = DataLocale::from(locale);
/// let locale = data_locale.into_locale();
///
/// assert_eq!(locale, locale!("en-u-ca-buddhist"));
/// ```
///
/// You can alternatively create a [`DataLocale`] from a borrowed [`Locale`], which is more
/// efficient than cloning the [`Locale`], but less efficient than converting an owned
/// [`Locale`]:
///
/// ```
/// use icu_locid::locale;
/// use icu_provider::DataLocale;
///
/// let locale1 = locale!("en-u-ca-buddhist");
/// let data_locale = DataLocale::from(&locale1);
/// let locale2 = data_locale.into_locale();
///
/// assert_eq!(locale1, locale2);
/// ```
///
/// If you are sure that you have no Unicode keywords, start with [`LanguageIdentifier`]:
///
/// ```
/// use icu_locid::langid;
/// use icu_provider::DataLocale;
///
/// let langid = langid!("es-CA-valencia");
/// let data_locale = DataLocale::from(langid);
/// let langid = data_locale.get_langid();
///
/// assert_eq!(langid, langid!("es-CA-valencia"));
/// ```
///
/// [`DataLocale`] only supports `-u` keywords, to reflect the current state of CLDR data
/// lookup and fallback. This may change in the future.
///
/// ```
/// use icu_locid::{locale, Locale};
/// use icu_provider::DataLocale;
///
/// let locale = "hi-t-en-h0-hybrid-u-attr-ca-buddhist"
/// .parse::<Locale>()
/// .unwrap();
/// let data_locale = DataLocale::from(locale);
///
/// assert_eq!(data_locale.into_locale(), locale!("hi-u-ca-buddhist"));
/// ```
#[derive(PartialEq, Clone, Default, Eq, Hash)]
pub struct DataLocale {
langid: LanguageIdentifier,
keywords: unicode_ext::Keywords,
#[cfg(feature = "experimental")]
aux: Option<AuxiliaryKeys>,
}
impl<'a> Default for &'a DataLocale {
fn default() -> Self {
static DEFAULT: DataLocale = DataLocale {
langid: LanguageIdentifier::UND,
keywords: unicode_ext::Keywords::new(),
#[cfg(feature = "experimental")]
aux: None,
};
&DEFAULT
}
}
impl fmt::Debug for DataLocale {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "DataLocale{{{self}}}")
}
}
impl Writeable for DataLocale {
fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
self.langid.write_to(sink)?;
if !self.keywords.is_empty() {
sink.write_str("-u-")?;
self.keywords.write_to(sink)?;
}
#[cfg(feature = "experimental")]
if let Some(aux) = self.aux.as_ref() {
sink.write_char(AuxiliaryKeys::separator() as char)?;
aux.write_to(sink)?;
}
Ok(())
}
fn writeable_length_hint(&self) -> LengthHint {
let mut length_hint = self.langid.writeable_length_hint();
if !self.keywords.is_empty() {
length_hint += self.keywords.writeable_length_hint() + 3;
}
#[cfg(feature = "experimental")]
if let Some(aux) = self.aux.as_ref() {
length_hint += aux.writeable_length_hint() + 1;
}
length_hint
}
fn write_to_string(&self) -> alloc::borrow::Cow<str> {
#[cfg_attr(not(feature = "experimental"), allow(unused_mut))]
let mut is_only_langid = self.keywords.is_empty();
#[cfg(feature = "experimental")]
{
is_only_langid = is_only_langid && self.aux.is_none();
}
if is_only_langid {
return self.langid.write_to_string();
}
let mut string =
alloc::string::String::with_capacity(self.writeable_length_hint().capacity());
let _ = self.write_to(&mut string);
alloc::borrow::Cow::Owned(string)
}
}
writeable::impl_display_with_writeable!(DataLocale);
impl From<LanguageIdentifier> for DataLocale {
fn from(langid: LanguageIdentifier) -> Self {
Self {
langid,
keywords: unicode_ext::Keywords::new(),
#[cfg(feature = "experimental")]
aux: None,
}
}
}
impl From<Locale> for DataLocale {
fn from(locale: Locale) -> Self {
Self {
langid: locale.id,
keywords: locale.extensions.unicode.keywords,
#[cfg(feature = "experimental")]
aux: None,
}
}
}
impl From<&LanguageIdentifier> for DataLocale {
fn from(langid: &LanguageIdentifier) -> Self {
Self {
langid: langid.clone(),
keywords: unicode_ext::Keywords::new(),
#[cfg(feature = "experimental")]
aux: None,
}
}
}
impl From<&Locale> for DataLocale {
fn from(locale: &Locale) -> Self {
Self {
langid: locale.id.clone(),
keywords: locale.extensions.unicode.keywords.clone(),
#[cfg(feature = "experimental")]
aux: None,
}
}
}
impl FromStr for DataLocale {
type Err = DataError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut aux_iter = s.splitn(2, AUXILIARY_KEY_SEPARATOR as char);
let Some(locale_str) = aux_iter.next() else {
return Err(DataErrorKind::KeyLocaleSyntax
.into_error()
.with_display_context(s));
};
let locale = Locale::from_str(locale_str).map_err(|e| {
DataErrorKind::KeyLocaleSyntax
.into_error()
.with_display_context(s)
.with_display_context(&e)
})?;
#[cfg_attr(not(feature = "experimental"), allow(unused_mut))]
let mut data_locale = DataLocale::from(locale);
#[cfg(feature = "experimental")]
if let Some(aux_str) = aux_iter.next() {
let aux = AuxiliaryKeys::from_str(aux_str)?;
data_locale.set_aux(aux);
}
if aux_iter.next().is_some() {
return Err(DataErrorKind::KeyLocaleSyntax
.into_error()
.with_display_context(s));
}
Ok(data_locale)
}
}
impl DataLocale {
/// Compare this [`DataLocale`] with BCP-47 bytes.
///
/// The return value is equivalent to what would happen if you first converted this
/// [`DataLocale`] to a BCP-47 string and then performed a byte comparison.
///
/// This function is case-sensitive and results in a *total order*, so it is appropriate for
/// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
///
/// # Examples
///
/// ```
/// use icu_locid::Locale;
/// use icu_provider::DataLocale;
/// use std::cmp::Ordering;
///
/// let bcp47_strings: &[&str] = &[
/// "ca",
/// "ca+EUR",
/// "ca-ES",
/// "ca-ES+GBP",
/// "ca-ES+GBP+short",
/// "ca-ES+USD",
/// "ca-ES-u-ca-buddhist",
/// "ca-ES-valencia",
/// "cat",
/// "pl-Latn-PL",
/// "und",
/// "und+MXN",
/// "und-fonipa",
/// "und-u-ca-hebrew",
/// "und-u-ca-japanese",
/// "zh",
/// ];
///
/// for ab in bcp47_strings.windows(2) {
/// let a = ab[0];
/// let b = ab[1];
/// assert_eq!(a.cmp(b), Ordering::Less, "strings: {} < {}", a, b);
/// let a_loc: DataLocale = a.parse().unwrap();
/// assert_eq!(
/// a_loc.strict_cmp(a.as_bytes()),
/// Ordering::Equal,
/// "strict_cmp: {} == {}",
/// a_loc,
/// a
/// );
/// assert_eq!(
/// a_loc.strict_cmp(b.as_bytes()),
/// Ordering::Less,
/// "strict_cmp: {} < {}",
/// a_loc,
/// b
/// );
/// let b_loc: DataLocale = b.parse().unwrap();
/// assert_eq!(
/// b_loc.strict_cmp(b.as_bytes()),
/// Ordering::Equal,
/// "strict_cmp: {} == {}",
/// b_loc,
/// b
/// );
/// assert_eq!(
/// b_loc.strict_cmp(a.as_bytes()),
/// Ordering::Greater,
/// "strict_cmp: {} > {}",
/// b_loc,
/// a
/// );
/// }
/// ```
///
/// Comparison against invalid strings:
///
/// ```
/// use icu_provider::DataLocale;
///
/// let invalid_strings: &[&str] = &[
/// // Less than "ca-ES"
/// "CA",
/// "ar+GBP+FOO",
/// // Greater than "ca-ES+GBP"
/// "ca_ES",
/// "ca-ES+GBP+FOO",
/// ];
///
/// let data_locale = "ca-ES+GBP".parse::<DataLocale>().unwrap();
///
/// for s in invalid_strings.iter() {
/// let expected_ordering = "ca-ES+GBP".cmp(s);
/// let actual_ordering = data_locale.strict_cmp(s.as_bytes());
/// assert_eq!(expected_ordering, actual_ordering, "{}", s);
/// }
/// ```
pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
let mut aux_iter = other.splitn(2, |b| *b == AUXILIARY_KEY_SEPARATOR);
let Some(locale_str) = aux_iter.next() else {
debug_assert!(other.is_empty());
return Ordering::Greater;
};
let aux_str = aux_iter.next();
let subtags = locale_str.split(|b| *b == b'-');
let mut subtag_result = self.langid.strict_cmp_iter(subtags);
if self.has_unicode_ext() {
let mut subtags = match subtag_result {
SubtagOrderingResult::Subtags(s) => s,
SubtagOrderingResult::Ordering(o) => return o,
};
match subtags.next() {
Some(b"u") => (),
Some(s) => return s.cmp(b"u").reverse(),
None => return Ordering::Greater,
}
subtag_result = self.keywords.strict_cmp_iter(subtags);
}
let has_more_subtags = match subtag_result {
SubtagOrderingResult::Subtags(mut s) => s.next().is_some(),
SubtagOrderingResult::Ordering(o) => return o,
};
// If we get here, `self` has equal or fewer subtags than the `other`.
// There are 2^3 = 8 cases to handle for auxiliary keys, expanded below.
match (has_more_subtags, self.get_aux(), aux_str) {
(false, None, None) => {
// foo == foo
Ordering::Equal
}
(false, Some(self_aux), Some(other_aux)) => {
// foo+BAR1 ?= foo+BAR2
let aux_ordering = self_aux.as_bytes().cmp(other_aux);
if aux_ordering != Ordering::Equal {
return aux_ordering;
}
Ordering::Equal
}
(false, Some(_), None) => {
// foo+BAR > foo
Ordering::Greater
}
(_, _, _) => {
// foo < foo-bar
// foo < foo-bar+BAR
// foo < foo+BAR
// foo+BAR < foo-bar
// foo+BAR < foo-bar+BAR
Ordering::Less
}
}
}
}
impl DataLocale {
/// Returns whether this [`DataLocale`] has all empty fields (no components).
///
/// See also:
///
/// - [`DataLocale::is_und()`]
/// - [`DataLocale::is_langid_und()`]
///
/// # Examples
///
/// ```
/// use icu_provider::DataLocale;
///
/// assert!("und".parse::<DataLocale>().unwrap().is_empty());
/// assert!(!"und-u-ca-buddhist"
/// .parse::<DataLocale>()
/// .unwrap()
/// .is_empty());
/// assert!(!"und+auxiliary".parse::<DataLocale>().unwrap().is_empty());
/// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_empty());
/// ```
pub fn is_empty(&self) -> bool {
self == <&DataLocale>::default()
}
/// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion.
///
/// This ignores auxiliary keys.
///
/// See also:
///
/// - [`DataLocale::is_empty()`]
/// - [`DataLocale::is_langid_und()`]
///
/// # Examples
///
/// ```
/// use icu_provider::DataLocale;
///
/// assert!("und".parse::<DataLocale>().unwrap().is_und());
/// assert!(!"und-u-ca-buddhist".parse::<DataLocale>().unwrap().is_und());
/// assert!("und+auxiliary".parse::<DataLocale>().unwrap().is_und());
/// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_und());
/// ```
pub fn is_und(&self) -> bool {
self.langid == LanguageIdentifier::UND && self.keywords.is_empty()
}
/// Returns whether the [`LanguageIdentifier`] associated with this request is `und`.
///
/// This ignores extension keywords and auxiliary keys.
///
/// See also:
///
/// - [`DataLocale::is_empty()`]
/// - [`DataLocale::is_und()`]
///
/// # Examples
///
/// ```
/// use icu_provider::DataLocale;
///
/// assert!("und".parse::<DataLocale>().unwrap().is_langid_und());
/// assert!("und-u-ca-buddhist"
/// .parse::<DataLocale>()
/// .unwrap()
/// .is_langid_und());
/// assert!("und+auxiliary"
/// .parse::<DataLocale>()
/// .unwrap()
/// .is_langid_und());
/// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_langid_und());
/// ```
pub fn is_langid_und(&self) -> bool {
self.langid == LanguageIdentifier::UND
}
/// Gets the [`LanguageIdentifier`] for this [`DataLocale`].
///
/// This may allocate memory if there are variant subtags. If you need only the language,
/// script, and/or region subtag, use the specific getters for those subtags:
///
/// - [`DataLocale::language()`]
/// - [`DataLocale::script()`]
/// - [`DataLocale::region()`]
///
/// If you have ownership over the `DataLocale`, use [`DataLocale::into_locale()`]
/// and then access the `id` field.
///
/// # Examples
///
/// ```
/// use icu_locid::langid;
/// use icu_provider::prelude::*;
///
/// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1");
///
/// let req_no_langid = DataRequest {
/// locale: &Default::default(),
/// metadata: Default::default(),
/// };
///
/// let req_with_langid = DataRequest {
/// locale: &langid!("ar-EG").into(),
/// metadata: Default::default(),
/// };
///
/// assert_eq!(req_no_langid.locale.get_langid(), langid!("und"));
/// assert_eq!(req_with_langid.locale.get_langid(), langid!("ar-EG"));
/// ```
pub fn get_langid(&self) -> LanguageIdentifier {
self.langid.clone()
}
/// Overrides the entire [`LanguageIdentifier`] portion of this [`DataLocale`].
#[inline]
pub fn set_langid(&mut self, lid: LanguageIdentifier) {
self.langid = lid;
}
/// Converts this [`DataLocale`] into a [`Locale`].
///
/// See also [`DataLocale::get_langid()`].
///
/// # Examples
///
/// ```
/// use icu_locid::{
/// langid, locale,
/// subtags::{language, region},
/// Locale,
/// };
/// use icu_provider::prelude::*;
///
/// let locale: DataLocale = locale!("it-IT-u-ca-coptic").into();
///
/// assert_eq!(locale.get_langid(), langid!("it-IT"));
/// assert_eq!(locale.language(), language!("it"));
/// assert_eq!(locale.script(), None);
/// assert_eq!(locale.region(), Some(region!("IT")));
///
/// let locale = locale.into_locale();
/// assert_eq!(locale, locale!("it-IT-u-ca-coptic"));
/// ```
pub fn into_locale(self) -> Locale {
let mut loc = Locale {
id: self.langid,
..Default::default()
};
loc.extensions.unicode.keywords = self.keywords;
loc
}
/// Returns the [`Language`] for this [`DataLocale`].
#[inline]
pub fn language(&self) -> Language {
self.langid.language
}
/// Returns the [`Language`] for this [`DataLocale`].
#[inline]
pub fn set_language(&mut self, language: Language) {
self.langid.language = language;
}
/// Returns the [`Script`] for this [`DataLocale`].
#[inline]
pub fn script(&self) -> Option<Script> {
self.langid.script
}
/// Sets the [`Script`] for this [`DataLocale`].
#[inline]
pub fn set_script(&mut self, script: Option<Script>) {
self.langid.script = script;
}
/// Returns the [`Region`] for this [`DataLocale`].
#[inline]
pub fn region(&self) -> Option<Region> {
self.langid.region
}
/// Sets the [`Region`] for this [`DataLocale`].
#[inline]
pub fn set_region(&mut self, region: Option<Region>) {
self.langid.region = region;
}
/// Returns whether there are any [`Variant`] subtags in this [`DataLocale`].
#[inline]
pub fn has_variants(&self) -> bool {
!self.langid.variants.is_empty()
}
/// Sets all [`Variants`] on this [`DataLocale`], overwriting any that were there previously.
#[inline]
pub fn set_variants(&mut self, variants: Variants) {
self.langid.variants = variants;
}
/// Removes all [`Variant`] subtags in this [`DataLocale`].
#[inline]
pub fn clear_variants(&mut self) -> Variants {
self.langid.variants.clear()
}
/// Gets the value of the specified Unicode extension keyword for this [`DataLocale`].
#[inline]
pub fn get_unicode_ext(&self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> {
self.keywords.get(key).cloned()
}
/// Returns whether there are any Unicode extension keywords in this [`DataLocale`].
#[inline]
pub fn has_unicode_ext(&self) -> bool {
!self.keywords.is_empty()
}
/// Returns whether a specific Unicode extension keyword is present in this [`DataLocale`].
#[inline]
pub fn contains_unicode_ext(&self, key: &unicode_ext::Key) -> bool {
self.keywords.contains_key(key)
}
/// Returns whether this [`DataLocale`] contains a Unicode extension keyword
/// with the specified key and value.
///
/// # Examples
///
/// ```
/// use icu_locid::{
/// extensions::unicode::{key, value},
/// Locale,
/// };
/// use icu_provider::prelude::*;
///
/// let locale: Locale = "it-IT-u-ca-coptic".parse().expect("Valid BCP-47");
/// let locale: DataLocale = locale.into();
///
/// assert_eq!(locale.get_unicode_ext(&key!("hc")), None);
/// assert_eq!(locale.get_unicode_ext(&key!("ca")), Some(value!("coptic")));
/// assert!(locale.matches_unicode_ext(&key!("ca"), &value!("coptic"),));
/// ```
#[inline]
pub fn matches_unicode_ext(&self, key: &unicode_ext::Key, value: &unicode_ext::Value) -> bool {
self.keywords.get(key) == Some(value)
}
/// Sets the value for a specific Unicode extension keyword on this [`DataLocale`].
#[inline]
pub fn set_unicode_ext(
&mut self,
key: unicode_ext::Key,
value: unicode_ext::Value,
) -> Option<unicode_ext::Value> {
self.keywords.set(key, value)
}
/// Removes a specific Unicode extension keyword from this [`DataLocale`], returning
/// the value if it was present.
#[inline]
pub fn remove_unicode_ext(&mut self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> {
self.keywords.remove(key)
}
/// Retains a subset of keywords as specified by the predicate function.
#[inline]
pub fn retain_unicode_ext<F>(&mut self, predicate: F)
where
F: FnMut(&unicode_ext::Key) -> bool,
{
self.keywords.retain_by_key(predicate)
}
/// Gets the auxiliary key for this [`DataLocale`].
///
/// For more information and examples, see [`AuxiliaryKeys`].
#[cfg(feature = "experimental")]
pub fn get_aux(&self) -> Option<&AuxiliaryKeys> {
self.aux.as_ref()
}
#[cfg(not(feature = "experimental"))]
pub(crate) fn get_aux(&self) -> Option<&str> {
None
}
/// Returns whether this [`DataLocale`] has an auxiliary key.
///
/// For more information and examples, see [`AuxiliaryKeys`].
#[cfg(feature = "experimental")]
pub fn has_aux(&self) -> bool {
self.aux.is_some()
}
/// Sets an auxiliary key on this [`DataLocale`].
///
/// Returns the previous auxiliary key if present.
///
/// For more information and examples, see [`AuxiliaryKeys`].
#[cfg(feature = "experimental")]
pub fn set_aux(&mut self, value: AuxiliaryKeys) -> Option<AuxiliaryKeys> {
self.aux.replace(value)
}
/// Remove an auxiliary key, if present. Returns the removed auxiliary key.
///
/// # Examples
///
/// ```
/// use icu_locid::locale;
/// use icu_provider::prelude::*;
/// use writeable::assert_writeable_eq;
///
/// let mut data_locale: DataLocale = locale!("ar-EG").into();
/// let aux = "GBP"
/// .parse::<AuxiliaryKeys>()
/// .expect("contains valid characters");
/// data_locale.set_aux(aux);
/// assert_writeable_eq!(data_locale, "ar-EG+GBP");
///
/// let maybe_aux = data_locale.remove_aux();
/// assert_writeable_eq!(data_locale, "ar-EG");
/// assert_writeable_eq!(maybe_aux.unwrap(), "GBP");
/// ```
#[cfg(feature = "experimental")]
pub fn remove_aux(&mut self) -> Option<AuxiliaryKeys> {
self.aux.take()
}
}
/// The "auxiliary key" is an annotation on [`DataLocale`] that can contain an arbitrary
/// information that does not fit into the [`LanguageIdentifier`] or [`Keywords`].
///
/// A [`DataLocale`] can have multiple auxiliary keys, represented by this struct. The auxiliary
/// keys are separated from the BCP-47 locale and from each other with the character returned by
/// [`AuxiliaryKeys::separator()`].
///
/// An auxiliary key currently allows alphanumerics and `-`.
///
/// <div class="stab unstable">
/// 🚧 This code is experimental; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. It can be enabled with the "experimental" Cargo feature
/// of the `icu_provider` crate. Use with caution.
/// <a href="https://github.com/unicode-org/icu4x/issues/3632">#3632</a>
/// </div>
///
/// # Examples
///
/// ```
/// use icu_locid::locale;
/// use icu_provider::prelude::*;
/// use writeable::assert_writeable_eq;
///
/// let mut data_locale: DataLocale = locale!("ar-EG").into();
/// assert_writeable_eq!(data_locale, "ar-EG");
/// assert!(!data_locale.has_aux());
/// assert_eq!(data_locale.get_aux(), None);
///
/// let aux = "GBP"
/// .parse::<AuxiliaryKeys>()
/// .expect("contains valid characters");
///
/// data_locale.set_aux(aux);
/// assert_writeable_eq!(data_locale, "ar-EG+GBP");
/// assert!(data_locale.has_aux());
/// assert_eq!(data_locale.get_aux(), Some(&"GBP".parse().unwrap()));
/// ```
///
/// Multiple auxiliary keys are allowed:
///
/// ```
/// use icu_locid::locale;
/// use icu_provider::prelude::*;
/// use writeable::assert_writeable_eq;
///
/// let data_locale = "ar-EG+GBP+long".parse::<DataLocale>().unwrap();
/// assert_writeable_eq!(data_locale, "ar-EG+GBP+long");
/// assert_eq!(data_locale.get_aux().unwrap().iter().count(), 2);
/// ```
///
/// Not all strings are valid auxiliary keys:
///
/// ```
/// use icu_provider::prelude::*;
///
/// assert!("abcdefg".parse::<AuxiliaryKeys>().is_ok());
/// assert!("ABC123".parse::<AuxiliaryKeys>().is_ok());
/// assert!("abc-xyz".parse::<AuxiliaryKeys>().is_ok());
///
/// assert!("".parse::<AuxiliaryKeys>().is_err());
/// assert!("!@#$%".parse::<AuxiliaryKeys>().is_err());
/// assert!("abc_xyz".parse::<AuxiliaryKeys>().is_err());
/// ```
///
/// [`Keywords`]: unicode_ext::Keywords
#[derive(Debug, PartialEq, Clone, Eq, Hash)]
#[cfg(feature = "experimental")]
pub struct AuxiliaryKeys {
// DISCUSS: SmallStr? TinyStrAuto?
// DISCUSS: Make this a dynamically sized type so references can be taken?
value: AuxiliaryKeysInner,
}
#[cfg(feature = "experimental")]
#[derive(Clone)]
enum AuxiliaryKeysInner {
Boxed(alloc::boxed::Box<str>),
Stack(TinyAsciiStr<23>),
// NOTE: In the future, a `Static` variant could be added to allow `data_locale!("...")`
// Static(&'static str),
}
#[cfg(feature = "experimental")]
impl Deref for AuxiliaryKeysInner {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
match self {
Self::Boxed(s) => s.deref(),
Self::Stack(s) => s.as_str(),
}
}
}
#[cfg(feature = "experimental")]
impl PartialEq for AuxiliaryKeysInner {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.deref() == other.deref()
}
}
#[cfg(feature = "experimental")]
impl Eq for AuxiliaryKeysInner {}
#[cfg(feature = "experimental")]
impl Debug for AuxiliaryKeysInner {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.deref().fmt(f)
}
}
#[cfg(feature = "experimental")]
impl Hash for AuxiliaryKeysInner {
#[inline]
fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
self.deref().hash(state)
}
}
#[cfg(feature = "experimental")]
writeable::impl_display_with_writeable!(AuxiliaryKeys);
#[cfg(feature = "experimental")]
impl Writeable for AuxiliaryKeys {
fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
self.value.write_to(sink)
}
fn writeable_length_hint(&self) -> LengthHint {
self.value.writeable_length_hint()
}
fn write_to_string(&self) -> alloc::borrow::Cow<str> {
self.value.write_to_string()
}
}
#[cfg(feature = "experimental")]
impl FromStr for AuxiliaryKeys {
type Err = DataError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::try_from_str(s)
}
}
#[cfg(feature = "experimental")]
impl AuxiliaryKeys {
/// Returns this [`AuxiliaryKeys`] as a single byte slice.
///
/// NOTE: Do not make this public because we might not always store these in a single string.
/// External clients who need this can use `<Self as Writeable>::write_to_string`.
#[inline]
pub(crate) fn as_bytes(&self) -> &[u8] {
self.value.as_bytes()
}
/// Creates an [`AuxiliaryKeys`] from an iterator of individual keys.
///
/// # Examples
///
/// ```
/// use icu_provider::prelude::*;
///
/// // Single auxiliary key:
/// let a = AuxiliaryKeys::try_from_iter(["abc"]).unwrap();
/// let b = "abc".parse::<AuxiliaryKeys>().unwrap();
/// assert_eq!(a, b);
///
/// // Multiple auxiliary keys:
/// let a = AuxiliaryKeys::try_from_iter(["abc", "defg"]).unwrap();
/// let b = "abc+defg".parse::<AuxiliaryKeys>().unwrap();
/// assert_eq!(a, b);
/// ```
///
/// Don't include the auxiliary key separator or other invalid chars in the iterator strings:
///
/// ```
/// use icu_provider::prelude::*;
///
/// assert!(AuxiliaryKeys::try_from_iter(["abc+defg"]).is_err());
/// assert!(AuxiliaryKeys::try_from_iter(["AB$C"]).is_err());
/// ```
pub fn try_from_iter<'a>(iter: impl IntoIterator<Item = &'a str>) -> Result<Self, DataError> {
// TODO: Avoid the allocation when possible
let mut builder = String::new();
for item in iter {
if !item.is_empty()
&& item
.bytes()
.all(|b| b.is_ascii_alphanumeric() || matches!(b, b'-'))
{
if !builder.is_empty() {
builder.push(AuxiliaryKeys::separator() as char);
}
builder.push_str(item)
} else {
return Err(DataErrorKind::KeyLocaleSyntax
.into_error()
.with_display_context(item));
}
}
if builder.len() <= 23 {
#[allow(clippy::unwrap_used)] // we just checked that the string is ascii
Ok(Self {
value: AuxiliaryKeysInner::Stack(builder.parse().unwrap()),
})
} else {
Ok(Self {
value: AuxiliaryKeysInner::Boxed(builder.into()),
})
}
}
pub(crate) fn try_from_str(s: &str) -> Result<Self, DataError> {
if !s.is_empty()
&& s.bytes()
.all(|b| b.is_ascii_alphanumeric() || matches!(b, b'-' | b'+'))
{
if s.len() <= 23 {
#[allow(clippy::unwrap_used)] // we just checked that the string is ascii
Ok(Self {
value: AuxiliaryKeysInner::Stack(s.parse().unwrap()),
})
} else {
Ok(Self {
value: AuxiliaryKeysInner::Boxed(s.into()),
})
}
} else {
Err(DataErrorKind::KeyLocaleSyntax
.into_error()
.with_display_context(s))
}
}
/// Iterates over the components of the auxiliary key.
///
/// # Example
///
/// ```
/// use icu_provider::AuxiliaryKeys;
///
/// let aux: AuxiliaryKeys = "abc+defg".parse().unwrap();
/// assert_eq!(aux.iter().collect::<Vec<_>>(), vec!["abc", "defg"]);
/// ```
pub fn iter(&self) -> impl Iterator<Item = &str> + '_ {
self.value.split(Self::separator() as char)
}
/// Returns the separator byte used for auxiliary keys in data locales.
///
/// # Examples
///
/// ```
/// use icu_provider::AuxiliaryKeys;
///
/// assert_eq!(AuxiliaryKeys::separator(), b'+');
/// ```
#[inline]
pub const fn separator() -> u8 {
AUXILIARY_KEY_SEPARATOR
}
}
#[test]
fn test_data_locale_to_string() {
use icu_locid::locale;
struct TestCase {
pub locale: Locale,
pub aux: Option<&'static str>,
pub expected: &'static str,
}
for cas in [
TestCase {
locale: Locale::UND,
aux: None,
expected: "und",
},
TestCase {
locale: locale!("und-u-cu-gbp"),
aux: None,
expected: "und-u-cu-gbp",
},
TestCase {
locale: locale!("en-ZA-u-cu-gbp"),
aux: None,
expected: "en-ZA-u-cu-gbp",
},
#[cfg(feature = "experimental")]
TestCase {
locale: locale!("en-ZA-u-nu-arab"),
aux: Some("GBP"),
expected: "en-ZA-u-nu-arab+GBP",
},
] {
let mut data_locale = DataLocale::from(cas.locale);
#[cfg(feature = "experimental")]
if let Some(aux) = cas.aux {
data_locale.set_aux(aux.parse().unwrap());
}
writeable::assert_writeable_eq!(data_locale, cas.expected);
}
}
#[test]
fn test_data_locale_from_string() {
#[derive(Debug)]
struct TestCase {
pub input: &'static str,
pub success: bool,
}
for cas in [
TestCase {
input: "und",
success: true,
},
TestCase {
input: "und-u-cu-gbp",
success: true,
},
TestCase {
input: "en-ZA-u-cu-gbp",
success: true,
},
TestCase {
input: "en...",
success: false,
},
#[cfg(feature = "experimental")]
TestCase {
input: "en-ZA-u-nu-arab+GBP",
success: true,
},
#[cfg(not(feature = "experimental"))]
TestCase {
input: "en-ZA-u-nu-arab+GBP",
success: false,
},
] {
let data_locale = match (DataLocale::from_str(cas.input), cas.success) {
(Ok(l), true) => l,
(Err(_), false) => {
continue;
}
(Ok(_), false) => {
panic!("DataLocale parsed but it was supposed to fail: {cas:?}");
}
(Err(_), true) => {
panic!("DataLocale was supposed to parse but it failed: {cas:?}");
}
};
writeable::assert_writeable_eq!(data_locale, cas.input);
}
}