// Copyright The rust-url developers. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! This crate abstracts over a Unicode back end for the [`idna`][1] //! crate. //! //! To work around the lack of [`global-features`][2] in Cargo, this //! crate allows the top level `Cargo.lock` to choose an alternative //! Unicode back end for the `idna` crate by pinning a version of this //! crate. //! //! See the [README of the latest version][3] for more details. //! //! [1]: https://docs.rs/crate/idna/latest //! [2]: https://internals.rust-lang.org/t/pre-rfc-mutually-excusive-global-features/19618 //! [3]: https://docs.rs/crate/idna_adapter/latest #![no_std] use icu_normalizer::properties::CanonicalCombiningClassMap; use icu_normalizer::uts46::Uts46Mapper; use icu_properties::maps::CodePointMapDataBorrowed; use icu_properties::CanonicalCombiningClass; use icu_properties::GeneralCategory; /// Turns a joining type into a mask for comparing with multiple type at once. const fn joining_type_to_mask(jt: icu_properties::JoiningType) -> u32 { 1u32 << jt.0 } /// Mask for checking for both left and dual joining. pub const LEFT_OR_DUAL_JOINING_MASK: JoiningTypeMask = JoiningTypeMask( joining_type_to_mask(icu_properties::JoiningType::LeftJoining) | joining_type_to_mask(icu_properties::JoiningType::DualJoining), ); /// Mask for checking for both left and dual joining. pub const RIGHT_OR_DUAL_JOINING_MASK: JoiningTypeMask = JoiningTypeMask( joining_type_to_mask(icu_properties::JoiningType::RightJoining) | joining_type_to_mask(icu_properties::JoiningType::DualJoining), ); /// Turns a bidi class into a mask for comparing with multiple classes at once. const fn bidi_class_to_mask(bc: icu_properties::BidiClass) -> u32 { 1u32 << bc.0 } /// Mask for checking if the domain is a bidi domain. pub const RTL_MASK: BidiClassMask = BidiClassMask( bidi_class_to_mask(icu_properties::BidiClass::RightToLeft) | bidi_class_to_mask(icu_properties::BidiClass::ArabicLetter) | bidi_class_to_mask(icu_properties::BidiClass::ArabicNumber), ); /// Mask for allowable bidi classes in the first character of a label /// (either LTR or RTL) in a bidi domain. pub const FIRST_BC_MASK: BidiClassMask = BidiClassMask( bidi_class_to_mask(icu_properties::BidiClass::LeftToRight) | bidi_class_to_mask(icu_properties::BidiClass::RightToLeft) | bidi_class_to_mask(icu_properties::BidiClass::ArabicLetter), ); // Mask for allowable bidi classes of the last (non-Non-Spacing Mark) // character in an LTR label in a bidi domain. pub const LAST_LTR_MASK: BidiClassMask = BidiClassMask( bidi_class_to_mask(icu_properties::BidiClass::LeftToRight) | bidi_class_to_mask(icu_properties::BidiClass::EuropeanNumber), ); // Mask for allowable bidi classes of the last (non-Non-Spacing Mark) // character in an RTL label in a bidi domain. pub const LAST_RTL_MASK: BidiClassMask = BidiClassMask( bidi_class_to_mask(icu_properties::BidiClass::RightToLeft) | bidi_class_to_mask(icu_properties::BidiClass::ArabicLetter) | bidi_class_to_mask(icu_properties::BidiClass::EuropeanNumber) | bidi_class_to_mask(icu_properties::BidiClass::ArabicNumber), ); // Mask for allowable bidi classes of the middle characters in an LTR label in a bidi domain. pub const MIDDLE_LTR_MASK: BidiClassMask = BidiClassMask( bidi_class_to_mask(icu_properties::BidiClass::LeftToRight) | bidi_class_to_mask(icu_properties::BidiClass::EuropeanNumber) | bidi_class_to_mask(icu_properties::BidiClass::EuropeanSeparator) | bidi_class_to_mask(icu_properties::BidiClass::CommonSeparator) | bidi_class_to_mask(icu_properties::BidiClass::EuropeanTerminator) | bidi_class_to_mask(icu_properties::BidiClass::OtherNeutral) | bidi_class_to_mask(icu_properties::BidiClass::BoundaryNeutral) | bidi_class_to_mask(icu_properties::BidiClass::NonspacingMark), ); // Mask for allowable bidi classes of the middle characters in an RTL label in a bidi domain. pub const MIDDLE_RTL_MASK: BidiClassMask = BidiClassMask( bidi_class_to_mask(icu_properties::BidiClass::RightToLeft) | bidi_class_to_mask(icu_properties::BidiClass::ArabicLetter) | bidi_class_to_mask(icu_properties::BidiClass::ArabicNumber) | bidi_class_to_mask(icu_properties::BidiClass::EuropeanNumber) | bidi_class_to_mask(icu_properties::BidiClass::EuropeanSeparator) | bidi_class_to_mask(icu_properties::BidiClass::CommonSeparator) | bidi_class_to_mask(icu_properties::BidiClass::EuropeanTerminator) | bidi_class_to_mask(icu_properties::BidiClass::OtherNeutral) | bidi_class_to_mask(icu_properties::BidiClass::BoundaryNeutral) | bidi_class_to_mask(icu_properties::BidiClass::NonspacingMark), ); /// Turns a genecal category into a mask for comparing with multiple categories at once. const fn general_category_to_mask(gc: GeneralCategory) -> u32 { 1 << (gc as u32) } /// Mask for the disallowed general categories of the first character in a label. const MARK_MASK: u32 = general_category_to_mask(GeneralCategory::NonspacingMark) | general_category_to_mask(GeneralCategory::SpacingMark) | general_category_to_mask(GeneralCategory::EnclosingMark); /// Value for the Joining_Type Unicode property. #[repr(transparent)] #[derive(Clone, Copy)] pub struct JoiningType(icu_properties::JoiningType); impl JoiningType { /// Returns the corresponding `JoiningTypeMask`. #[inline(always)] pub fn to_mask(self) -> JoiningTypeMask { JoiningTypeMask(joining_type_to_mask(self.0)) } // `true` iff this value is the Transparent value. #[inline(always)] pub fn is_transparent(self) -> bool { self.0 == icu_properties::JoiningType::Transparent } } /// A mask representing potentially multiple `JoiningType` /// values. #[repr(transparent)] #[derive(Clone, Copy)] pub struct JoiningTypeMask(u32); impl JoiningTypeMask { /// `true` iff both masks have at `JoiningType` in common. #[inline(always)] pub fn intersects(self, other: JoiningTypeMask) -> bool { self.0 & other.0 != 0 } } /// Value for the Bidi_Class Unicode property. #[repr(transparent)] #[derive(Clone, Copy)] pub struct BidiClass(icu_properties::BidiClass); impl BidiClass { /// Returns the corresponding `BidiClassMask`. #[inline(always)] pub fn to_mask(self) -> BidiClassMask { BidiClassMask(bidi_class_to_mask(self.0)) } /// `true` iff this value is Left_To_Right #[inline(always)] pub fn is_ltr(self) -> bool { self.0 == icu_properties::BidiClass::LeftToRight } /// `true` iff this value is Nonspacing_Mark #[inline(always)] pub fn is_nonspacing_mark(self) -> bool { self.0 == icu_properties::BidiClass::NonspacingMark } /// `true` iff this value is European_Number #[inline(always)] pub fn is_european_number(self) -> bool { self.0 == icu_properties::BidiClass::EuropeanNumber } /// `true` iff this value is Arabic_Number #[inline(always)] pub fn is_arabic_number(self) -> bool { self.0 == icu_properties::BidiClass::ArabicNumber } } /// A mask representing potentially multiple `BidiClass` /// values. #[repr(transparent)] #[derive(Clone, Copy)] pub struct BidiClassMask(u32); impl BidiClassMask { /// `true` iff both masks have at `BidiClass` in common. #[inline(always)] pub fn intersects(self, other: BidiClassMask) -> bool { self.0 & other.0 != 0 } } /// An adapter between a Unicode back end an the `idna` crate. pub struct Adapter { mapper: Uts46Mapper, canonical_combining_class: CanonicalCombiningClassMap, general_category: CodePointMapDataBorrowed<'static, GeneralCategory>, bidi_class: CodePointMapDataBorrowed<'static, icu_properties::BidiClass>, joining_type: CodePointMapDataBorrowed<'static, icu_properties::JoiningType>, } #[cfg(feature = "compiled_data")] impl Default for Adapter { fn default() -> Self { Self::new() } } impl Adapter { /// Constructor using data compiled into the binary. #[cfg(feature = "compiled_data")] #[inline(always)] pub const fn new() -> Self { Self { mapper: Uts46Mapper::new(), canonical_combining_class: CanonicalCombiningClassMap::new(), general_category: icu_properties::maps::general_category(), bidi_class: icu_properties::maps::bidi_class(), joining_type: icu_properties::maps::joining_type(), } } /// `true` iff the Canonical_Combining_Class of `c` is Virama. #[inline(always)] pub fn is_virama(&self, c: char) -> bool { self.canonical_combining_class.get(c) == CanonicalCombiningClass::Virama } /// `true` iff the General_Category of `c` is Mark, i.e. any of Nonspacing_Mark, /// Spacing_Mark, or Enclosing_Mark. #[inline(always)] pub fn is_mark(&self, c: char) -> bool { (general_category_to_mask(self.general_category.get(c)) & MARK_MASK) != 0 } /// Returns the Bidi_Class of `c`. #[inline(always)] pub fn bidi_class(&self, c: char) -> BidiClass { BidiClass(self.bidi_class.get(c)) } /// Returns the Joining_Type of `c`. #[inline(always)] pub fn joining_type(&self, c: char) -> JoiningType { JoiningType(self.joining_type.get(c)) } /// See the [method of the same name in `icu_normalizer`][1] for the /// exact semantics. /// /// [1]: https://docs.rs/icu_normalizer/latest/icu_normalizer/uts46/struct.Uts46Mapper.html#method.map_normalize #[inline(always)] pub fn map_normalize<'delegate, I: Iterator + 'delegate>( &'delegate self, iter: I, ) -> impl Iterator + 'delegate { self.mapper.map_normalize(iter) } /// See the [method of the same name in `icu_normalizer`][1] for the /// exact semantics. /// /// [1]: https://docs.rs/icu_normalizer/latest/icu_normalizer/uts46/struct.Uts46Mapper.html#method.normalize_validate #[inline(always)] pub fn normalize_validate<'delegate, I: Iterator + 'delegate>( &'delegate self, iter: I, ) -> impl Iterator + 'delegate { self.mapper.normalize_validate(iter) } }