Type (camomile.CamomileLibrary.UCharInfo.Type)

type general_category_type = [

`\|` `Lu	Letter, Uppercase
`\|` `Ll	Letter, Lowercase
`\|` `Lt	Letter, Titlecase
`\|` `Mn	Mark, Non-Spacing
`\|` `Mc	Mark, Spacing Combining
`\|` `Me	Mark, Enclosing
`\|` `Nd	Number, Decimal Digit
`\|` `Nl	Number, Letter
`\|` `No	Number, Other
`\|` `Zs	Separator, Space
`\|` `Zl	Separator, Line
`\|` `Zp	Separator, Paragraph
`\|` `Cc	Other, Control
`\|` `Cf	Other, Format
`\|` `Cs	Other, Surrogate
`\|` `Co	Other, Private Use
`\|` `Cn	Other, Not Assigned
`\|` `Lm	Letter, Modifier
`\|` `Lo	Letter, Other
`\|` `Pc	Punctuation, Connector
`\|` `Pd	Punctuation, Dash
`\|` `Ps	Punctuation, Open
`\|` `Pe	Punctuation, Close
`\|` `Pi	Punctuation, Initial quote
`\|` `Pf	Punctuation, Final quote
`\|` `Po	Punctuation, Other
`\|` `Sm	Symbol, Math
`\|` `Sc	Symbol, Currency
`\|` `Sk	Symbol, Modifier
`\|` `So	Symbol, Other

]

Type of Unicode general character categories. Each variant specifies

`Lu : Letter, Uppercase
`Ll : Letter, Lowercase
`Lt : Letter, Titlecase
`Mn : Mark, Non-Spacing
`Mc : Mark, Spacing Combining
`Me : Mark, Enclosing
`Nd : Number, Decimal Digit
`Nl : Number, Letter
`No : Number, Other
`Zs : Separator, Space
`Zl : Separator, Line
`Zp : Separator, Paragraph
`Cc : Other, Control
`Cf : Other, Format
`Cs : Other, Surrogate
`Co : Other, Private Use
`Cn : Other, Not Assigned
`Lm : Letter, Modifier
`Lo : Letter, Other
`Pc : Punctuation, Connector
`Pd : Punctuation, Dash
`Ps : Punctuation, Open
`Pe : Punctuation, Close
`Pi : Punctuation, Initial
`Pf : Punctuation, Final
`Po : Punctuation, Other
`Sm : Symbol, Math
`Sc : Symbol, Currency
`Sk : Symbol, Modifier
`So : Symbol, Other

val general_category : UChar.t -> general_category_type

val load_general_category_map : unit -> general_category_type UMap.t

type character_property_type = [

`\|` `Math	Derived Core Properties
`\|` `Alphabetic
`\|` `Lowercase
`\|` `Uppercase
`\|` `ID_Start
`\|` `ID_Continue
`\|` `XID_Start
`\|` `XID_Continue
`\|` `Default_Ignorable_Code_Point
`\|` `Grapheme_Extend
`\|` `Grapheme_Base
`\|` `Bidi_Control	Extended Properties
`\|` `White_Space
`\|` `Hyphen
`\|` `Quotation_Mark
`\|` `Terminal_Punctuation
`\|` `Other_Math
`\|` `Hex_Digit
`\|` `Ascii_Hex_Digit
`\|` `Other_Alphabetic
`\|` `Ideographic
`\|` `Diacritic
`\|` `Extender
`\|` `Other_Lowercase
`\|` `Other_Uppercase
`\|` `Noncharacter_Code_Point
`\|` `Other_Grapheme_Extend
`\|` `Grapheme_Link
`\|` `IDS_Binary_Operator
`\|` `IDS_Trinary_Operator
`\|` `Radical
`\|` `Unified_Ideograph
`\|` `Other_default_Ignorable_Code_Point
`\|` `Deprecated
`\|` `Soft_Dotted
`\|` `Logical_Order_Exception

]

Type of character properties

val load_property_tbl : character_property_type -> UCharTbl.Bool.t

Load the table for the given character type.

val load_property_tbl_by_name : string -> UCharTbl.Bool.t

Load the table for the given name of the character type. The name can be obtained by removing ` from its name of the polymorphic variant tag.

val load_property_set : character_property_type -> USet.t

Load the set of characters of the given character type.

val load_property_set_by_name : string -> USet.t

Load the set of characters of the given name of the character type. The name can be obtained by removing ` from its name of the polymorphic variant tag.

type script_type = [

| `Common

| `Inherited

| `Latin

| `Greek

| `Cyrillic

| `Armenian

| `Hebrew

| `Arabic

| `Syriac

| `Thaana

| `Devanagari

| `Bengali

| `Gurmukhi

| `Gujarati

| `Oriya

| `Tamil

| `Telugu

| `Kannada

| `Malayalam

| `Sinhala

| `Thai

| `Lao

| `Tibetan

| `Myanmar

| `Georgian

| `Hangul

| `Ethiopic

| `Cherokee

| `Canadian_Aboriginal

| `Ogham

| `Runic

| `Khmer

| `Mongolian

| `Hiragana

| `Katakana

| `Bopomofo

| `Han

| `Yi

| `Old_Italic

| `Gothic

| `Deseret

| `Tagalog

| `Hanunoo

| `Buhid

| `Tagbanwa

]

Type for script type

val script : UChar.t -> script_type

val load_script_map : unit -> script_type UMap.t

type version_type = [

`\|` `Nc	undefined code point
`\|` `v1_0
`\|` `v1_1
`\|` `v2_0
`\|` `v2_1
`\|` `v3_0
`\|` `v3_1
`\|` `v3_2

]

age

val age : UChar.t -> version_type

age c unicode version in wich c was introduced

val older : version_type -> version_type -> bool

older v1 v2 is true if v1 is older ( or the same version ) than v2. Everithing is older than `Nc

val load_to_lower1_tbl : unit -> UChar.t UCharTbl.t

val load_to_upper1_tbl : unit -> UChar.t UCharTbl.t

val load_to_title1_tbl : unit -> UChar.t UCharTbl.t

type casemap_condition = [

| `Locale of string

| `FinalSigma

| `AfterSoftDotted

| `MoreAbove

| `BeforeDot

| `Not of casemap_condition

]

type special_casing_property = {

lower : UChar.t list;

title : UChar.t list;

upper : UChar.t list;

condition : casemap_condition list;

}

val load_conditional_casing_tbl : unit -> special_casing_property list UCharTbl.t

val load_casefolding_tbl : unit -> UChar.t list UCharTbl.t

val combined_class : UChar.t -> int

Combined class A combined class is an integer of 0 -- 255, showing how this character interacts to other combined characters.

type decomposition_type = [

| `Canon

| `Font

| `NoBreak

| `Initial

| `Medial

| `Final

| `Isolated

| `Circle

| `Super

| `Sub

| `Vertical

| `Wide

| `Narrow

| `Small

| `Square

| `Fraction

| `Compat

]

Types of decomposition.

type decomposition_info = [

`\|` `Canonform	Already in the canonical form
`\|` `HangulSyllable	Hangul is treated algotighmically.
`\|` `Composite of decomposition_type * UChar.t list	`Composite (dtype, text) means the given character is decomposed into text by dtype decomposition.

]

val load_decomposition_tbl : unit -> decomposition_info UCharTbl.t

val load_composition_tbl : unit -> (UChar.t * UChar.t) list UCharTbl.t

The return value [(u_1, u'_1); ... (u_n, u'_1)] means for the given character u, u u_i forms the canonical composition u'_i. If u is a Hangul jamo, composition returns .

val load_composition_exclusion_tbl : unit -> UCharTbl.Bool.t

Whether the given composed character is used in NFC or NFKC

`\|` `Math	Derived Core Properties
`\|` `Alphabetic
`\|` `Lowercase
`\|` `Uppercase
`\|` `ID_Start
`\|` `ID_Continue
`\|` `XID_Start
`\|` `XID_Continue
`\|` `Default_Ignorable_Code_Point
`\|` `Grapheme_Extend
`\|` `Grapheme_Base
`\|` `Bidi_Control	Extended Properties
`\|` `White_Space
`\|` `Hyphen
`\|` `Quotation_Mark
`\|` `Terminal_Punctuation
`\|` `Other_Math
`\|` `Hex_Digit
`\|` `Ascii_Hex_Digit
`\|` `Other_Alphabetic
`\|` `Ideographic
`\|` `Diacritic
`\|` `Extender
`\|` `Other_Lowercase
`\|` `Other_Uppercase
`\|` `Noncharacter_Code_Point
`\|` `Other_Grapheme_Extend
`\|` `Grapheme_Link
`\|` `IDS_Binary_Operator
`\|` `IDS_Trinary_Operator
`\|` `Radical
`\|` `Unified_Ideograph
`\|` `Other_default_Ignorable_Code_Point
`\|` `Deprecated
`\|` `Soft_Dotted
`\|` `Logical_Order_Exception

`\|` `Nc	undefined code point
`\|` `v1_0
`\|` `v1_1
`\|` `v2_0
`\|` `v2_1
`\|` `v3_0
`\|` `v3_1
`\|` `v3_2