blob: bfa58d6dbb05e4b80a4ea7b122398079d6629b62 [file] [log] [blame]
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Query character Unicode properties according to
//! [Unicode Standard Annex #44](https://www.unicode.org/reports/tr44/)
//! and [Unicode Technical Standard #51](https://www.unicode.org/reports/tr51/)
//! rules.
//!
//! Currently we support the `General_Category` property as well as `Emoji` and `Emoji_Component`.
//!
//! Future properties can be added as requested.
//!
//! ```rust
//! use unicode_properties::UnicodeEmoji;
//! use unicode_properties::UnicodeGeneralCategory;
//!
//! fn main() {
//! let ch = '🦀'; // U+1F980 CRAB
//! let is_emoji = ch.is_emoji_char();
//! let group = ch.general_category_group();
//! println!("{}({:?})", ch, group);
//! println!("The above char {} for use as emoji char.",
//! if is_emoji { "is recommended" } else { "is not recommended" });
//! }
//! ```
//!
//! # Features
//!
//! ## `general-category`
//!
//! Provides the most general classification of a character,
//! based on its primary characteristic.
//!
//! ## `emoji`
//!
//! Provides the emoji character properties of a character.
//!
#![deny(missing_docs)]
#[rustfmt::skip]
mod tables;
#[cfg(feature = "emoji")]
/// Query the emoji character properties of a character.
pub mod emoji {
pub use crate::tables::emoji::EmojiStatus;
/// Query the emoji character properties of a character.
pub trait UnicodeEmoji: Sized {
/// Returns the emoji character properties in a status enum.
fn emoji_status(self) -> EmojiStatus;
/// Checks whether this character is recommended for use as emoji, i.e. `Emoji=YES`.
fn is_emoji_char(self) -> bool {
crate::tables::emoji::is_emoji_status_for_emoji_char(self.emoji_status())
}
/// Checks whether this character are used in emoji sequences where they're not
/// intended for independent, direct input, i.e. `Emoji_Component=YES`.
fn is_emoji_component(self) -> bool {
crate::tables::emoji::is_emoji_status_for_emoji_component(self.emoji_status())
}
/// Checks whether this character occurs in emoji sequences, i.e. `Emoji=YES | Emoji_Component=YES`
fn is_emoji_char_or_emoji_component(self) -> bool {
crate::tables::emoji::is_emoji_status_for_emoji_char_or_emoji_component(
self.emoji_status(),
)
}
}
impl UnicodeEmoji for char {
fn emoji_status(self) -> EmojiStatus {
crate::tables::emoji::emoji_status(self)
}
}
#[inline]
/// Checks whether this character is the U+200D ZERO WIDTH JOINER (ZWJ) character.
///
/// It can be used between the elements of a sequence of characters to indicate that
/// a single glyph should be presented if available.
pub fn is_zwj(c: char) -> bool {
c == '\u{200D}'
}
#[inline]
/// Checks whether this character is the U+FE0F VARIATION SELECTOR-16 (VS16) character, used to
/// request an emoji presentation for an emoji character.
pub fn is_emoji_presentation_selector(c: char) -> bool {
c == '\u{FE0F}'
}
#[inline]
/// Checks whether this character is the U+FE0E VARIATION SELECTOR-15 (VS15) character, used to
/// request a text presentation for an emoji character.
pub fn is_text_presentation_selector(c: char) -> bool {
c == '\u{FE0E}'
}
#[inline]
/// Checks whether this character is one of the Regional Indicator characters.
///
/// A pair of REGIONAL INDICATOR symbols is referred to as an emoji_flag_sequence.
pub fn is_regional_indicator(c: char) -> bool {
matches!(c, '\u{1F1E6}'..='\u{1F1FF}')
}
#[inline]
/// Checks whether this character is one of the Tag Characters.
///
/// These can be used in indicating variants or extensions of emoji characters.
pub fn is_tag_character(c: char) -> bool {
matches!(c, '\u{E0020}'..='\u{E007F}')
}
}
#[cfg(feature = "general-category")]
/// Query the general category property of a character.
pub mod general_category {
pub use crate::tables::general_category::{GeneralCategory, GeneralCategoryGroup};
/// Query the general category property of a character.
///
/// See [General Category Values](https://www.unicode.org/reports/tr44/#General_Category_Values) for more info.
pub trait UnicodeGeneralCategory: Sized {
/// Queries the most general classification of a character.
fn general_category(self) -> GeneralCategory;
/// Queries the grouping of the most general classification of a character.
fn general_category_group(self) -> GeneralCategoryGroup {
crate::tables::general_category::general_category_group(self.general_category())
}
/// Queries whether the most general classification of a character belongs to the `LetterCased` group
///
/// The `LetterCased` group includes `LetterUppercase`, `LetterLowercase`, and `LetterTitlecase`
/// categories, and is a subset of the `Letter` group.
fn is_letter_cased(self) -> bool {
crate::tables::general_category::general_category_is_letter_cased(
self.general_category(),
)
}
}
impl UnicodeGeneralCategory for char {
fn general_category(self) -> GeneralCategory {
crate::tables::general_category::general_category_of_char(self)
}
}
}
pub use tables::UNICODE_VERSION;
#[cfg(feature = "emoji")]
#[doc(inline)]
pub use emoji::UnicodeEmoji;
#[cfg(feature = "emoji")]
#[doc(inline)]
pub use emoji::EmojiStatus;
#[cfg(feature = "general-category")]
#[doc(inline)]
pub use general_category::GeneralCategory;
#[cfg(feature = "general-category")]
#[doc(inline)]
pub use general_category::GeneralCategoryGroup;
#[cfg(feature = "general-category")]
#[doc(inline)]
pub use general_category::UnicodeGeneralCategory;