| // This file is part of ICU4X. For terms of use, please see the file |
| // called LICENSE at the top level of the ICU4X source tree |
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| |
| //! Documentation on zero-copy deserialization of locale types. |
| //! |
| //! [`Locale`] and [`LanguageIdentifier`] are highly structured types that cannot be directly |
| //! stored in a zero-copy data structure, such as those provided by the [`zerovec`] crate. |
| //! This page explains how to indirectly store these types in a [`zerovec`]. |
| //! |
| //! There are two main use cases, which have different solutions: |
| //! |
| //! 1. **Lookup:** You need to locate a locale in a zero-copy vector, such as when querying a map. |
| //! 2. **Obtain:** You have a locale stored in a zero-copy vector, and you need to obtain a proper |
| //! [`Locale`] or [`LanguageIdentifier`] for use elsewhere in your program. |
| //! |
| //! # Lookup |
| //! |
| //! To perform lookup, store the stringified locale in a canonical BCP-47 form as a byte array, |
| //! and then use [`Locale::strict_cmp()`] to perform an efficient, zero-allocation lookup. |
| //! |
| //! To produce more human-readable serialized output, you can use [`UnvalidatedStr`]. |
| //! |
| //! ``` |
| //! use icu_locid::Locale; |
| //! use zerovec::ule::UnvalidatedStr; |
| //! use zerovec::ZeroMap; |
| //! |
| //! // ZeroMap from locales to integers |
| //! let data: &[(&UnvalidatedStr, u32)] = &[ |
| //! ("de-DE-u-hc-h12".into(), 5), |
| //! ("en-US-u-ca-buddhist".into(), 10), |
| //! ("my-MM".into(), 15), |
| //! ("sr-Cyrl-ME".into(), 20), |
| //! ("zh-TW".into(), 25), |
| //! ]; |
| //! let zm: ZeroMap<UnvalidatedStr, u32> = data.iter().copied().collect(); |
| //! |
| //! // Get the value associated with a locale |
| //! let loc: Locale = "en-US-u-ca-buddhist".parse().unwrap(); |
| //! let value = zm.get_copied_by(|uvstr| loc.strict_cmp(uvstr).reverse()); |
| //! assert_eq!(value, Some(10)); |
| //! ``` |
| //! |
| //! # Obtain |
| //! |
| //! Obtaining a [`Locale`] or [`LanguageIdentifier`] is not generally a zero-copy operation, since |
| //! both of these types may require memory allocation. If possible, architect your code such that |
| //! you do not need to obtain a structured type. |
| //! |
| //! If you need the structured type, such as if you need to manipulate it in some way, there are two |
| //! options: storing subtags, and storing a string for parsing. |
| //! |
| //! ## Storing Subtags |
| //! |
| //! If the data being stored only contains a limited number of subtags, you can store them as a |
| //! tuple, and then construct the [`LanguageIdentifier`] externally. |
| //! |
| //! ``` |
| //! use icu_locid::subtags::{Language, Region, Script}; |
| //! use icu_locid::LanguageIdentifier; |
| //! use icu_locid::{ |
| //! langid, |
| //! subtags::{language, region, script}, |
| //! }; |
| //! use zerovec::ZeroMap; |
| //! |
| //! // ZeroMap from integer to LSR (language-script-region) |
| //! let zm: ZeroMap<u32, (Language, Option<Script>, Option<Region>)> = [ |
| //! (5, (language!("de"), None, Some(region!("DE")))), |
| //! (10, (language!("en"), None, Some(region!("US")))), |
| //! (15, (language!("my"), None, Some(region!("MM")))), |
| //! ( |
| //! 20, |
| //! (language!("sr"), Some(script!("Cyrl")), Some(region!("ME"))), |
| //! ), |
| //! (25, (language!("zh"), None, Some(region!("TW")))), |
| //! ] |
| //! .into_iter() |
| //! .collect(); |
| //! |
| //! // Construct a LanguageIdentifier from a tuple entry |
| //! let lid: LanguageIdentifier = |
| //! zm.get_copied(&25).expect("element is present").into(); |
| //! |
| //! assert_eq!(lid, langid!("zh-TW")); |
| //! ``` |
| //! |
| //! ## Storing Strings |
| //! |
| //! If it is necessary to store and obtain an arbitrary locale, it is currently recommended to |
| //! store a BCP-47 string and parse it when needed. |
| //! |
| //! Since the string is stored in an unparsed state, it is not safe to `unwrap` the result from |
| //! `Locale::try_from_bytes()`. See [icu4x#831](https://github.com/unicode-org/icu4x/issues/831) |
| //! for a discussion on potential data models that could ensure that the locale is valid during |
| //! deserialization. |
| //! |
| //! As above, to produce more human-readable serialized output, you can use [`UnvalidatedStr`]. |
| //! |
| //! ``` |
| //! use icu_locid::langid; |
| //! use icu_locid::Locale; |
| //! use zerovec::ule::UnvalidatedStr; |
| //! use zerovec::ZeroMap; |
| //! |
| //! // ZeroMap from integer to locale string |
| //! let data: &[(u32, &UnvalidatedStr)] = &[ |
| //! (5, "de-DE-u-hc-h12".into()), |
| //! (10, "en-US-u-ca-buddhist".into()), |
| //! (15, "my-MM".into()), |
| //! (20, "sr-Cyrl-ME".into()), |
| //! (25, "zh-TW".into()), |
| //! (30, "INVALID".into()), |
| //! ]; |
| //! let zm: ZeroMap<u32, UnvalidatedStr> = data.iter().copied().collect(); |
| //! |
| //! // Construct a Locale by parsing the string. |
| //! let value = zm.get(&25).expect("element is present"); |
| //! let loc = Locale::try_from_bytes(value); |
| //! assert_eq!(loc, Ok(langid!("zh-TW").into())); |
| //! |
| //! // Invalid entries are fallible |
| //! let err_value = zm.get(&30).expect("element is present"); |
| //! let err_loc = Locale::try_from_bytes(err_value); |
| //! assert!(matches!(err_loc, Err(_))); |
| //! ``` |
| //! |
| //! [`Locale`]: crate::Locale |
| //! [`Locale::strict_cmp()`]: crate::Locale::strict_cmp() |
| //! [`LanguageIdentifier`]: crate::LanguageIdentifier |
| //! [`UnvalidatedStr`]: zerovec::ule::UnvalidatedStr |