public class UnicodeUtilImpl extends AbstractLoggableComponent implements UnicodeUtil
UnicodeUtil
interface.getInstance()
Modifier and Type | Field and Description |
---|---|
private static Map<Character,String> |
CHARACTER_TO_ASCII_MAP |
private static UnicodeUtil |
instance |
private static Map<Character,String> |
TRANSLITERATION_MAP |
ACUTE_ACCENT, ALEF_SYMBOL, ALL_AROUND_PROFILE, ALL_EQUAL_TO, ALMOST_EQUAL_OR_EQUAL_TO, ALMOST_EQUAL_TO, AMALGAMATION_OR_COPRODUCT, AMPERSAND, ANGLE, ANGLE_WITH_UNDERBAR, ANGSTROM_SIGN, ANTICLOCKWISE_CONTOUR_INTEGRAL, ANTICLOCKWISE_INTEGRATION, ANTICLOCKWISE_OPEN_CIRCLE_ARROW, ANTICLOCKWISE_TOP_SEMICIRCLE_ARROW, APL_FUNCTIONAL_SYMBOL_CIRCLE_STILE, APL_FUNCTIONAL_SYMBOL_I_BEAM, APL_FUNCTIONAL_SYMBOL_SLASH_BAR, APOSTROPHE, APPROACHES_THE_LIMIT, APPROXIMATELY_BUT_NOT_ACTUALLY_EQUAL_TO, APPROXIMATELY_EQUAL_OR_EQUAL_TO, APPROXIMATELY_EQUAL_TO, APPROXIMATELY_EQUAL_TO_OR_THE_IMAGE_OF, ARC, ARROW_POINTING_DOWNWARDS_THEN_CURVING_LEFTWARDS, ARROW_POINTING_DOWNWARDS_THEN_CURVING_RIGHTWARDS, ARROW_POINTING_RIGHTWARDS_THEN_CURVING_DOWNWARDS, ASTERISK, ASTERISK_OPERATOR, ASYMPTOTICALLY_EQUAL_TO, BALLOT_X, BECAUSE, BET_SYMBOL, BETWEEN, BLACK_CLUB_SUIT, BLACK_DIAMOND_SUIT, BLACK_DOWN_POINTING_SMALL_TRIANGLE, BLACK_HEART_SUIT, BLACK_LEFT_POINTING_SMALL_TRIANGLE, BLACK_LETTER_CAPITAL_C, BLACK_LETTER_CAPITAL_H, BLACK_LETTER_CAPITAL_I, BLACK_LETTER_CAPITAL_R, BLACK_LETTER_CAPITAL_Z, BLACK_LOZENGE, BLACK_MEDIUM_SQUARE, BLACK_RIGHT_POINTING_SMALL_TRIANGLE, BLACK_SMALL_SQUARE, BLACK_SPADE_SUIT, BLACK_STAR, BLACK_TELEPHONE, BLACK_UP_POINTING_SMALL_TRIANGLE, BLACK_VERTICAL_RECTANGLE, BOTTOM_LEFT_CORNER, BOTTOM_LEFT_CROP, BOTTOM_RIGHT_CORNER, BOTTOM_RIGHT_CROP, BOTTOM_SQUARE_BRACKET, BOTTOM_SQUARE_BRACKET_OVER_TOP_SQUARE_BRACKET, BOWTIE, BOX_DRAWINGS_DOUBLE_DOWN_AND_HORIZONTAL, BOX_DRAWINGS_DOUBLE_DOWN_AND_LEFT, BOX_DRAWINGS_DOUBLE_DOWN_AND_RIGHT, BOX_DRAWINGS_DOUBLE_HORIZONTAL, BOX_DRAWINGS_DOUBLE_UP_AND_HORIZONTAL, BOX_DRAWINGS_DOUBLE_UP_AND_LEFT, BOX_DRAWINGS_DOUBLE_UP_AND_RIGHT, BOX_DRAWINGS_DOUBLE_VERTICAL, BOX_DRAWINGS_DOUBLE_VERTICAL_AND_HORIZONTAL, BOX_DRAWINGS_DOUBLE_VERTICAL_AND_LEFT, BOX_DRAWINGS_DOUBLE_VERTICAL_AND_RIGHT, BOX_DRAWINGS_DOWN_DOUBLE_AND_HORIZONTAL_SINGLE, BOX_DRAWINGS_DOWN_DOUBLE_AND_LEFT_SINGLE, BOX_DRAWINGS_DOWN_DOUBLE_AND_RIGHT_SINGLE, BOX_DRAWINGS_DOWN_SINGLE_AND_HORIZONTAL_DOUBLE, BOX_DRAWINGS_DOWN_SINGLE_AND_LEFT_DOUBLE, BOX_DRAWINGS_DOWN_SINGLE_AND_RIGHT_DOUBLE, BOX_DRAWINGS_LIGHT_DOWN_AND_HORIZONTAL, BOX_DRAWINGS_LIGHT_DOWN_AND_LEFT, BOX_DRAWINGS_LIGHT_DOWN_AND_RIGHT, BOX_DRAWINGS_LIGHT_HORIZONTAL, BOX_DRAWINGS_LIGHT_UP_AND_HORIZONTAL, BOX_DRAWINGS_LIGHT_UP_AND_LEFT, BOX_DRAWINGS_LIGHT_UP_AND_RIGHT, BOX_DRAWINGS_LIGHT_VERTICAL, BOX_DRAWINGS_LIGHT_VERTICAL_AND_HORIZONTAL, BOX_DRAWINGS_LIGHT_VERTICAL_AND_LEFT, BOX_DRAWINGS_LIGHT_VERTICAL_AND_RIGHT, BOX_DRAWINGS_UP_DOUBLE_AND_HORIZONTAL_SINGLE, BOX_DRAWINGS_UP_DOUBLE_AND_LEFT_SINGLE, BOX_DRAWINGS_UP_DOUBLE_AND_RIGHT_SINGLE, BOX_DRAWINGS_UP_SINGLE_AND_HORIZONTAL_DOUBLE, BOX_DRAWINGS_UP_SINGLE_AND_LEFT_DOUBLE, BOX_DRAWINGS_UP_SINGLE_AND_RIGHT_DOUBLE, BOX_DRAWINGS_VERTICAL_DOUBLE_AND_HORIZONTAL_SINGLE, BOX_DRAWINGS_VERTICAL_DOUBLE_AND_LEFT_SINGLE, BOX_DRAWINGS_VERTICAL_DOUBLE_AND_RIGHT_SINGLE, BOX_DRAWINGS_VERTICAL_SINGLE_AND_HORIZONTAL_DOUBLE, BOX_DRAWINGS_VERTICAL_SINGLE_AND_LEFT_DOUBLE, BOX_DRAWINGS_VERTICAL_SINGLE_AND_RIGHT_DOUBLE, BREVE, BROKEN_BAR, BULLET, CARE_OF, CARET_INSERTION_POINT, CARON, CARRIAGE_RETURN, CEDILLA, CENT_SIGN, CHARACTER_TABULATION, CHECK_MARK, CIRCLE_WITH_HORIZONTAL_BAR, CIRCLE_WITH_SMALL_CIRCLE_TO_THE_RIGHT, CIRCLE_WITH_SUPERIMPOSED_X, CIRCLE_WITH_TWO_HORIZONTAL_STROKES_TO_THE_RIGHT, CIRCLED_ANTICLOCKWISE_ROTATED_DIVISION_SIGN, CIRCLED_ASTERISK_OPERATOR, CIRCLED_BULLET, CIRCLED_DASH, CIRCLED_DIVISION_SIGN, CIRCLED_DIVISION_SLASH, CIRCLED_DOT_OPERATOR, CIRCLED_GREATER_THAN, CIRCLED_LATIN_CAPITAL_LETTER_S, CIRCLED_LESS_THAN, CIRCLED_MINUS, CIRCLED_MULTIPLICATION_SIGN_WITH_CIRCUMFLEX_ACCENT, CIRCLED_PARALLEL, CIRCLED_PERPENDICULAR, CIRCLED_PLUS, CIRCLED_RING_OPERATOR, CIRCLED_TIMES, CIRCLED_VERTICAL_BAR, CIRCLED_WHITE_BULLET, CIRCULATION_FUNCTION, CIRCUMFLEX_ACCENT, CLOCKWISE_CONTOUR_INTEGRAL, CLOCKWISE_INTEGRAL, CLOCKWISE_OPEN_CIRCLE_ARROW, CLOCKWISE_TOP_SEMICIRCLE_ARROW, CLOSED_INTERSECTION_WITH_SERIFS, CLOSED_SUBSET, CLOSED_SUBSET_OR_EQUAL_TO, CLOSED_SUPERSET, CLOSED_SUPERSET_OR_EQUAL_TO, CLOSED_UNION_WITH_SERIFS, CLOSED_UNION_WITH_SERIFS_AND_SMASH_PRODUCT, COLON, COLON_EQUALS, COMBINING_ACUTE_ACCENT, COMBINING_ACUTE_ACCENT_BELOW, COMBINING_ACUTE_TONE_MARK, COMBINING_ALMOST_EQUAL_TO_ABOVE, COMBINING_BREVE, COMBINING_BREVE_BELOW, COMBINING_BRIDGE_ABOVE, COMBINING_BRIDGE_BELOW, COMBINING_CANDRABINDU, COMBINING_CARON, COMBINING_CARON_BELOW, COMBINING_CEDILLA, COMBINING_CIRCUMFLEX_ACCENT, COMBINING_CIRCUMFLEX_ACCENT_BELOW, COMBINING_COMMA_ABOVE, COMBINING_COMMA_ABOVE_RIGHT, COMBINING_COMMA_BELOW, COMBINING_CYRILLIC_DASIA_PNEUMATA, COMBINING_CYRILLIC_HUNDRED_THOUSANDS_SIGN, COMBINING_CYRILLIC_MILLIONS_SIGN, COMBINING_CYRILLIC_PALATALIZATION, COMBINING_CYRILLIC_PSILI_PNEUMATA, COMBINING_CYRILLIC_TITLO, COMBINING_DIAERESIS, COMBINING_DIAERESIS_BELOW, COMBINING_DOT_ABOVE, COMBINING_DOT_BELOW, COMBINING_DOUBLE_ACUTE_ACCENT, COMBINING_DOUBLE_GRAVE_ACCENT, COMBINING_DOUBLE_INVERTED_BREVE, COMBINING_DOUBLE_LOW_LINE, COMBINING_DOUBLE_OVERLINE, COMBINING_DOUBLE_RIGHTWARDS_ARROW_BELOW, COMBINING_DOUBLE_TILDE, COMBINING_DOUBLE_VERTICAL_LINE_ABOVE, COMBINING_DOUBLE_VERTICAL_LINE_BELOW, COMBINING_DOWN_TACK_BELOW, COMBINING_EQUALS_SIGN_BELOW, COMBINING_FOUR_DOTS_ABOVE, COMBINING_GRAVE_ACCENT, COMBINING_GRAVE_ACCENT_BELOW, COMBINING_GRAVE_TONE_MARK, COMBINING_GREEK_DIALYTIKA_TONOS, COMBINING_GREEK_KORONIS, COMBINING_GREEK_PERISPOMENI, COMBINING_GREEK_YPOGEGRAMMENI, COMBINING_HOMOTHETIC_ABOVE, COMBINING_HOOK_ABOVE, COMBINING_HORN, COMBINING_INVERTED_BREVE, COMBINING_INVERTED_BREVE_BELOW, COMBINING_INVERTED_BRIDGE_BELOW, COMBINING_INVERTED_DOUBLE_ARCH_BELOW, COMBINING_LEFT_ANGLE_ABOVE, COMBINING_LEFT_ANGLE_BELOW, COMBINING_LEFT_HALF_RING_BELOW, COMBINING_LEFT_RIGHT_ARROW_BELOW, COMBINING_LEFT_TACK_BELOW, COMBINING_LONG_SOLIDUS_OVERLAY, COMBINING_LONG_STROKE_OVERLAY, COMBINING_LOW_LINE, COMBINING_MACRON, COMBINING_MACRON_BELOW, COMBINING_MINUS_SIGN_BELOW, COMBINING_NOT_TILDE_ABOVE, COMBINING_OGONEK, COMBINING_OVERLINE, COMBINING_PALATALIZED_HOOK_BELOW, COMBINING_PLUS_SIGN_BELOW, COMBINING_RETROFLEX_HOOK_BELOW, COMBINING_REVERSED_COMMA_ABOVE, COMBINING_RIGHT_HALF_RING_BELOW, COMBINING_RIGHT_TACK_BELOW, COMBINING_RING_ABOVE, COMBINING_RING_BELOW, COMBINING_SEAGULL_BELOW, COMBINING_SHORT_SOLIDUS_OVERLAY, COMBINING_SHORT_STROKE_OVERLAY, COMBINING_SQUARE_BELOW, COMBINING_THREE_DOTS_ABOVE, COMBINING_TILDE, COMBINING_TILDE_BELOW, COMBINING_TILDE_OVERLAY, COMBINING_TURNED_COMMA_ABOVE, COMBINING_UP_TACK_BELOW, COMBINING_UPWARDS_ARROW_BELOW, COMBINING_VERTICAL_LINE_ABOVE, COMBINING_VERTICAL_LINE_BELOW, COMBINING_VERTICAL_TILDE, COMBINING_X_ABOVE, COMMA, COMMERCIAL_AT, COMPLEMENT, CONGRUENT_WITH_DOT_ABOVE, CONTAINS_AS_MEMBER, CONTAINS_AS_NORMAL_SUBGROUP, CONTAINS_AS_NORMAL_SUBGROUP_OR_EQUAL_TO, CONTAINS_WITH_LONG_HORIZONTAL_STROKE, CONTAINS_WITH_OVERBAR, CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, CONTOUR_INTEGRAL, COPTIC_CAPITAL_LETTER_DEI, COPTIC_CAPITAL_LETTER_FEI, COPTIC_CAPITAL_LETTER_GANGIA, COPTIC_CAPITAL_LETTER_HORI, COPTIC_CAPITAL_LETTER_KHEI, COPTIC_CAPITAL_LETTER_SHEI, COPTIC_CAPITAL_LETTER_SHIMA, COPTIC_SMALL_LETTER_DEI, COPTIC_SMALL_LETTER_FEI, COPTIC_SMALL_LETTER_GANGIA, COPTIC_SMALL_LETTER_HORI, COPTIC_SMALL_LETTER_KHEI, COPTIC_SMALL_LETTER_SHEI, COPTIC_SMALL_LETTER_SHIMA, COPYRIGHT_SIGN, CURLY_LOGICAL_AND, CURLY_LOGICAL_OR, CURRENCY_SIGN, CYLINDRICITY, CYRILLIC_CAPITAL_LETTER_A, CYRILLIC_CAPITAL_LETTER_A_WITH_BREVE, CYRILLIC_CAPITAL_LETTER_A_WITH_DIAERESIS, CYRILLIC_CAPITAL_LETTER_ABKHASIAN_CHE, CYRILLIC_CAPITAL_LETTER_ABKHASIAN_CHE_WITH_DESCENDER, CYRILLIC_CAPITAL_LETTER_ABKHASIAN_DZE, CYRILLIC_CAPITAL_LETTER_ABKHASIAN_HA, CYRILLIC_CAPITAL_LETTER_BARRED_O, CYRILLIC_CAPITAL_LETTER_BARRED_O_WITH_DIAERESIS, CYRILLIC_CAPITAL_LETTER_BASHKIR_KA, CYRILLIC_CAPITAL_LETTER_BE, CYRILLIC_CAPITAL_LETTER_BIG_YUS, CYRILLIC_CAPITAL_LETTER_BYELORUSSIAN_UKRAINIAN_I, CYRILLIC_CAPITAL_LETTER_CHE, CYRILLIC_CAPITAL_LETTER_CHE_WITH_DESCENDER, CYRILLIC_CAPITAL_LETTER_CHE_WITH_DIAERESIS, CYRILLIC_CAPITAL_LETTER_CHE_WITH_VERTICAL_STROKE, CYRILLIC_CAPITAL_LETTER_DE, CYRILLIC_CAPITAL_LETTER_DJE, CYRILLIC_CAPITAL_LETTER_DZE, CYRILLIC_CAPITAL_LETTER_DZHE, CYRILLIC_CAPITAL_LETTER_E, CYRILLIC_CAPITAL_LETTER_E_WITH_DIAERESIS, CYRILLIC_CAPITAL_LETTER_EF, CYRILLIC_CAPITAL_LETTER_EL, CYRILLIC_CAPITAL_LETTER_EM, CYRILLIC_CAPITAL_LETTER_EN, CYRILLIC_CAPITAL_LETTER_EN_WITH_DESCENDER, CYRILLIC_CAPITAL_LETTER_EN_WITH_HOOK, CYRILLIC_CAPITAL_LETTER_ER, CYRILLIC_CAPITAL_LETTER_ER_WITH_TICK, CYRILLIC_CAPITAL_LETTER_ES, CYRILLIC_CAPITAL_LETTER_ES_WITH_DESCENDER, CYRILLIC_CAPITAL_LETTER_FITA, CYRILLIC_CAPITAL_LETTER_GHE, CYRILLIC_CAPITAL_LETTER_GHE_WITH_MIDDLE_HOOK, CYRILLIC_CAPITAL_LETTER_GHE_WITH_STROKE, CYRILLIC_CAPITAL_LETTER_GHE_WITH_UPTURN, CYRILLIC_CAPITAL_LETTER_GJE, CYRILLIC_CAPITAL_LETTER_HA, CYRILLIC_CAPITAL_LETTER_HA_WITH_DESCENDER, CYRILLIC_CAPITAL_LETTER_HARD_SIGN, CYRILLIC_CAPITAL_LETTER_I, CYRILLIC_CAPITAL_LETTER_I_WITH_DIAERESIS, CYRILLIC_CAPITAL_LETTER_IE, CYRILLIC_CAPITAL_LETTER_IE_WITH_BREVE, CYRILLIC_CAPITAL_LETTER_IE_WITH_GRAVE, CYRILLIC_CAPITAL_LETTER_IO, CYRILLIC_CAPITAL_LETTER_IOTIFIED_BIG_YUS, CYRILLIC_CAPITAL_LETTER_IOTIFIED_E, CYRILLIC_CAPITAL_LETTER_IOTIFIED_LITTLE_YUS, CYRILLIC_CAPITAL_LETTER_IZHITSA, CYRILLIC_CAPITAL_LETTER_IZHITSA_WITH_DOUBLE_GRAVE_ACCENT, CYRILLIC_CAPITAL_LETTER_JE, CYRILLIC_CAPITAL_LETTER_KA, CYRILLIC_CAPITAL_LETTER_KA_WITH_DESCENDER, CYRILLIC_CAPITAL_LETTER_KA_WITH_HOOK, CYRILLIC_CAPITAL_LETTER_KA_WITH_STROKE, CYRILLIC_CAPITAL_LETTER_KA_WITH_VERTICAL_STROKE, CYRILLIC_CAPITAL_LETTER_KHAKASSIAN_CHE, CYRILLIC_CAPITAL_LETTER_KJE, CYRILLIC_CAPITAL_LETTER_KOPPA, CYRILLIC_CAPITAL_LETTER_KSI, CYRILLIC_CAPITAL_LETTER_LITTLE_YUS, CYRILLIC_CAPITAL_LETTER_LJE, CYRILLIC_CAPITAL_LETTER_NJE, CYRILLIC_CAPITAL_LETTER_O, CYRILLIC_CAPITAL_LETTER_O_WITH_DIAERESIS, CYRILLIC_CAPITAL_LETTER_OMEGA, CYRILLIC_CAPITAL_LETTER_OMEGA_WITH_TITLO, CYRILLIC_CAPITAL_LETTER_OT, CYRILLIC_CAPITAL_LETTER_PE, CYRILLIC_CAPITAL_LETTER_PE_WITH_MIDDLE_HOOK, CYRILLIC_CAPITAL_LETTER_PSI, CYRILLIC_CAPITAL_LETTER_ROUND_OMEGA, CYRILLIC_CAPITAL_LETTER_SCHWA, CYRILLIC_CAPITAL_LETTER_SCHWA_WITH_DIAERESIS, CYRILLIC_CAPITAL_LETTER_SEMISOFT_SIGN, CYRILLIC_CAPITAL_LETTER_SHA, CYRILLIC_CAPITAL_LETTER_SHCHA, CYRILLIC_CAPITAL_LETTER_SHHA, CYRILLIC_CAPITAL_LETTER_SHORT_I, CYRILLIC_CAPITAL_LETTER_SHORT_U, CYRILLIC_CAPITAL_LETTER_SOFT_SIGN, CYRILLIC_CAPITAL_LETTER_STRAIGHT_U, CYRILLIC_CAPITAL_LETTER_STRAIGHT_U_WITH_STROKE, CYRILLIC_CAPITAL_LETTER_TE, CYRILLIC_CAPITAL_LETTER_TE_WITH_DESCENDER, CYRILLIC_CAPITAL_LETTER_TSE, CYRILLIC_CAPITAL_LETTER_TSHE, CYRILLIC_CAPITAL_LETTER_U, CYRILLIC_CAPITAL_LETTER_U_WITH_DIAERESIS, CYRILLIC_CAPITAL_LETTER_U_WITH_DOUBLE_ACUTE, CYRILLIC_CAPITAL_LETTER_U_WITH_MACRON, CYRILLIC_CAPITAL_LETTER_UK, CYRILLIC_CAPITAL_LETTER_UKRAINIAN_IE, CYRILLIC_CAPITAL_LETTER_VE, CYRILLIC_CAPITAL_LETTER_YA, CYRILLIC_CAPITAL_LETTER_YAT, CYRILLIC_CAPITAL_LETTER_YERU, CYRILLIC_CAPITAL_LETTER_YERU_WITH_DIAERESIS, CYRILLIC_CAPITAL_LETTER_YI, CYRILLIC_CAPITAL_LETTER_YU, CYRILLIC_CAPITAL_LETTER_ZE, CYRILLIC_CAPITAL_LETTER_ZE_WITH_DESCENDER, CYRILLIC_CAPITAL_LETTER_ZE_WITH_DIAERESIS, CYRILLIC_CAPITAL_LETTER_ZHE, CYRILLIC_CAPITAL_LETTER_ZHE_WITH_BREVE, CYRILLIC_CAPITAL_LETTER_ZHE_WITH_DESCENDER, CYRILLIC_CAPITAL_LETTER_ZHE_WITH_DIAERESIS, CYRILLIC_CAPITAL_LIGATURE_A_IE, CYRILLIC_CAPITAL_LIGATURE_EN_GHE, CYRILLIC_CAPITAL_LIGATURE_TE_TSE, CYRILLIC_LETTER_PALOCHKA, CYRILLIC_SMALL_LETTER_A, CYRILLIC_SMALL_LETTER_A_WITH_BREVE, CYRILLIC_SMALL_LETTER_A_WITH_DIAERESIS, CYRILLIC_SMALL_LETTER_ABKHASIAN_CHE, CYRILLIC_SMALL_LETTER_ABKHASIAN_CHE_WITH_DESCENDER, CYRILLIC_SMALL_LETTER_ABKHASIAN_DZE, CYRILLIC_SMALL_LETTER_ABKHASIAN_HA, CYRILLIC_SMALL_LETTER_BARRED_O, CYRILLIC_SMALL_LETTER_BARRED_O_WITH_DIAERESIS, CYRILLIC_SMALL_LETTER_BASHKIR_KA, CYRILLIC_SMALL_LETTER_BE, CYRILLIC_SMALL_LETTER_BIG_YUS, CYRILLIC_SMALL_LETTER_BYELORUSSIAN_UKRAINIAN_I, CYRILLIC_SMALL_LETTER_CHE, CYRILLIC_SMALL_LETTER_CHE_WITH_DESCENDER, CYRILLIC_SMALL_LETTER_CHE_WITH_DIAERESIS, CYRILLIC_SMALL_LETTER_CHE_WITH_VERTICAL_STROKE, CYRILLIC_SMALL_LETTER_DE, CYRILLIC_SMALL_LETTER_DJE, CYRILLIC_SMALL_LETTER_DZE, CYRILLIC_SMALL_LETTER_DZHE, CYRILLIC_SMALL_LETTER_E, CYRILLIC_SMALL_LETTER_E_WITH_DIAERESIS, CYRILLIC_SMALL_LETTER_EF, CYRILLIC_SMALL_LETTER_EL, CYRILLIC_SMALL_LETTER_EM, CYRILLIC_SMALL_LETTER_EN, CYRILLIC_SMALL_LETTER_EN_WITH_DESCENDER, CYRILLIC_SMALL_LETTER_EN_WITH_HOOK, CYRILLIC_SMALL_LETTER_ER, CYRILLIC_SMALL_LETTER_ER_WITH_TICK, CYRILLIC_SMALL_LETTER_ES, CYRILLIC_SMALL_LETTER_ES_WITH_DESCENDER, CYRILLIC_SMALL_LETTER_FITA, CYRILLIC_SMALL_LETTER_GHE, CYRILLIC_SMALL_LETTER_GHE_WITH_MIDDLE_HOOK, CYRILLIC_SMALL_LETTER_GHE_WITH_STROKE, CYRILLIC_SMALL_LETTER_GHE_WITH_UPTURN, CYRILLIC_SMALL_LETTER_GJE, CYRILLIC_SMALL_LETTER_HA, CYRILLIC_SMALL_LETTER_HA_WITH_DESCENDER, CYRILLIC_SMALL_LETTER_HARD_SIGN, CYRILLIC_SMALL_LETTER_I, CYRILLIC_SMALL_LETTER_I_WITH_DIAERESIS, CYRILLIC_SMALL_LETTER_I_WITH_MACRON, CYRILLIC_SMALL_LETTER_IE, CYRILLIC_SMALL_LETTER_IE_WITH_BREVE, CYRILLIC_SMALL_LETTER_IO, CYRILLIC_SMALL_LETTER_IOTIFIED_BIG_YUS, CYRILLIC_SMALL_LETTER_IOTIFIED_E, CYRILLIC_SMALL_LETTER_IOTIFIED_LITTLE_YUS, CYRILLIC_SMALL_LETTER_IZHITSA, CYRILLIC_SMALL_LETTER_IZHITSA_WITH_DOUBLE_GRAVE_ACCENT, CYRILLIC_SMALL_LETTER_JE, CYRILLIC_SMALL_LETTER_KA, CYRILLIC_SMALL_LETTER_KA_WITH_DESCENDER, CYRILLIC_SMALL_LETTER_KA_WITH_HOOK, CYRILLIC_SMALL_LETTER_KA_WITH_STROKE, CYRILLIC_SMALL_LETTER_KA_WITH_VERTICAL_STROKE, CYRILLIC_SMALL_LETTER_KHAKASSIAN_CHE, CYRILLIC_SMALL_LETTER_KJE, CYRILLIC_SMALL_LETTER_KOPPA, CYRILLIC_SMALL_LETTER_KSI, CYRILLIC_SMALL_LETTER_LITTLE_YUS, CYRILLIC_SMALL_LETTER_LJE, CYRILLIC_SMALL_LETTER_NJE, CYRILLIC_SMALL_LETTER_O, CYRILLIC_SMALL_LETTER_O_WITH_DIAERESIS, CYRILLIC_SMALL_LETTER_OMEGA, CYRILLIC_SMALL_LETTER_OMEGA_WITH_TITLO, CYRILLIC_SMALL_LETTER_OT, CYRILLIC_SMALL_LETTER_PE, CYRILLIC_SMALL_LETTER_PE_WITH_MIDDLE_HOOK, CYRILLIC_SMALL_LETTER_PSI, CYRILLIC_SMALL_LETTER_ROUND_OMEGA, CYRILLIC_SMALL_LETTER_SCHWA, CYRILLIC_SMALL_LETTER_SCHWA_WITH_DIAERESIS, CYRILLIC_SMALL_LETTER_SEMISOFT_SIGN, CYRILLIC_SMALL_LETTER_SHA, CYRILLIC_SMALL_LETTER_SHCHA, CYRILLIC_SMALL_LETTER_SHHA, CYRILLIC_SMALL_LETTER_SHORT_I, CYRILLIC_SMALL_LETTER_SHORT_U, CYRILLIC_SMALL_LETTER_SOFT_SIGN, CYRILLIC_SMALL_LETTER_STRAIGHT_U, CYRILLIC_SMALL_LETTER_STRAIGHT_U_WITH_STROKE, CYRILLIC_SMALL_LETTER_TE, CYRILLIC_SMALL_LETTER_TE_WITH_DESCENDER, CYRILLIC_SMALL_LETTER_TSE, CYRILLIC_SMALL_LETTER_TSHE, CYRILLIC_SMALL_LETTER_U, CYRILLIC_SMALL_LETTER_U_WITH_DIAERESIS, CYRILLIC_SMALL_LETTER_U_WITH_DOUBLE_ACUTE, CYRILLIC_SMALL_LETTER_U_WITH_MACRON, CYRILLIC_SMALL_LETTER_UK, CYRILLIC_SMALL_LETTER_UKRAINIAN_IE, CYRILLIC_SMALL_LETTER_VE, CYRILLIC_SMALL_LETTER_YA, CYRILLIC_SMALL_LETTER_YAT, CYRILLIC_SMALL_LETTER_YERU, CYRILLIC_SMALL_LETTER_YERU_WITH_DIAERESIS, CYRILLIC_SMALL_LETTER_YI, CYRILLIC_SMALL_LETTER_YU, CYRILLIC_SMALL_LETTER_ZE, CYRILLIC_SMALL_LETTER_ZE_WITH_DESCENDER, CYRILLIC_SMALL_LETTER_ZE_WITH_DIAERESIS, CYRILLIC_SMALL_LETTER_ZHE, CYRILLIC_SMALL_LETTER_ZHE_WITH_BREVE, CYRILLIC_SMALL_LETTER_ZHE_WITH_DESCENDER, CYRILLIC_SMALL_LETTER_ZHE_WITH_DIAERESIS, CYRILLIC_SMALL_LIGATURE_A_IE, CYRILLIC_SMALL_LIGATURE_EN_GHE, CYRILLIC_SMALL_LIGATURE_TE_TSE, CYRILLIC_THOUSANDS_SIGN, DAGGER, DALET_SYMBOL, DARK_SHADE, DEGREE_SIGN, DELETE, DELTA_EQUAL_TO, DIAERESIS, DIAMOND_OPERATOR, DIFFERENCE_BETWEEN, DIGIT_EIGHT, DIGIT_FIVE, DIGIT_FOUR, DIGIT_NINE, DIGIT_ONE, DIGIT_SEVEN, DIGIT_SIX, DIGIT_THREE, DIGIT_TWO, DIGIT_ZERO, DIVIDES, DIVISION_SIGN, DIVISION_TIMES, DOES_NOT_CONTAIN_AS_MEMBER, DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP, DOES_NOT_CONTAIN_AS_NORMAL_SUBGROUP_OR_EQUAL, DOES_NOT_DIVIDE, DOES_NOT_DIVIDE_WITH_REVERSED_NEGATION_SLASH, DOES_NOT_FORCE, DOES_NOT_PRECEDE, DOES_NOT_PRECEDE_OR_EQUAL, DOES_NOT_PROVE, DOES_NOT_SUCCEED, DOES_NOT_SUCCEED_OR_EQUAL, DOLLAR_SIGN, DOT_ABOVE, DOT_BELOW, DOT_MINUS, DOT_OPERATOR, DOT_PLUS, DOUBLE_ACUTE_ACCENT, DOUBLE_COLON_EQUAL, DOUBLE_DAGGER, DOUBLE_GRAVE_ACCENT, DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK, DOUBLE_INTEGRAL, DOUBLE_INTERSECTION, DOUBLE_LEFT_ARC_GREATER_THAN_BRACKET, DOUBLE_LINE_EQUAL_TO_OR_GREATER_THAN, DOUBLE_LINE_EQUAL_TO_OR_LESS_THAN, DOUBLE_LOGICAL_AND, DOUBLE_LOGICAL_OR, DOUBLE_LOW_9_QUOTATION_MARK, DOUBLE_NESTED_GREATER_THAN, DOUBLE_NESTED_LESS_THAN, DOUBLE_PRECEDES, DOUBLE_PRIME, DOUBLE_RIGHT_ARC_LESS_THAN_BRACKET, DOUBLE_SOLIDUS_OPERATOR, DOUBLE_STROKE_NOT_SIGN, DOUBLE_STRUCK_CAPITAL_C, DOUBLE_STRUCK_CAPITAL_H, DOUBLE_STRUCK_CAPITAL_N, DOUBLE_STRUCK_CAPITAL_P, DOUBLE_STRUCK_CAPITAL_Q, DOUBLE_STRUCK_CAPITAL_R, DOUBLE_STRUCK_CAPITAL_Z, DOUBLE_STRUCK_ITALIC_CAPITAL_D, DOUBLE_STRUCK_ITALIC_SMALL_D, DOUBLE_STRUCK_ITALIC_SMALL_E, DOUBLE_STRUCK_ITALIC_SMALL_I, DOUBLE_SUBSET, DOUBLE_SUCCEEDS, DOUBLE_SUPERSET, DOUBLE_UNION, DOUBLE_UP_TACK, DOUBLE_VERTICAL_BAR_DOUBLE_RIGHT_TURNSTILE, DOUBLE_VERTICAL_LINE, DOWN_FISH_TAIL, DOWN_RIGHT_DIAGONAL_ELLIPSIS, DOWN_TACK, DOWN_TACK_WITH_CIRCLE_BELOW, DOWNWARDS_ARROW, DOWNWARDS_ARROW_FROM_BAR, DOWNWARDS_ARROW_LEFTWARDS_OF_UPWARDS_ARROW, DOWNWARDS_ARROW_TO_BAR, DOWNWARDS_ARROW_WITH_TIP_LEFTWARDS, DOWNWARDS_ARROW_WITH_TIP_RIGHTWARDS, DOWNWARDS_DOUBLE_ARROW, DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT, DOWNWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT, DOWNWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR, DOWNWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR, DOWNWARDS_HARPOON_WITH_BARB_LEFTWARDS, DOWNWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR, DOWNWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR, DOWNWARDS_HARPOON_WITH_BARB_RIGHTWARDS, DOWNWARDS_PAIRED_ARROWS, DOWNWARDS_TWO_HEADED_ARROW, EIGHTH_NOTE, ELEMENT_OF, ELEMENT_OF_OPENING_DOWNWARDS, ELEMENT_OF_WITH_DOT_ABOVE, ELEMENT_OF_WITH_LONG_HORIZONTAL_STROKE, ELEMENT_OF_WITH_OVERBAR, ELEMENT_OF_WITH_TWO_HORIZONTAL_STROKES, ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, EM_DASH, EM_SPACE, EMPTY_SET, EMPTY_SET_WITH_LEFT_ARROW_ABOVE, EMPTY_SET_WITH_OVERBAR, EMPTY_SET_WITH_RIGHT_ARROW_ABOVE, EMPTY_SET_WITH_SMALL_CIRCLE_ABOVE, EN_DASH, EN_SPACE, EQUAL_AND_PARALLEL_TO, EQUAL_TO_OR_PRECEDES, EQUAL_TO_OR_SUCCEEDS, EQUALS_COLON, EQUALS_SIGN, EQUALS_SIGN_ABOVE_PLUS_SIGN, EQUALS_SIGN_ABOVE_RIGHTWARDS_ARROW, EQUALS_SIGN_ABOVE_TILDE_OPERATOR, EQUALS_SIGN_AND_SLANTED_PARALLEL, EQUALS_SIGN_AND_SLANTED_PARALLEL_WITH_TILDE_ABOVE, EQUALS_SIGN_WITH_BUMPY_ABOVE, EQUALS_SIGN_WITH_DOT_BELOW, EQUALS_SIGN_WITH_TWO_DOTS_ABOVE_AND_TWO_DOTS_BELOW, EQUALS_WITH_ASTERISK, EQUIANGULAR_TO, EQUIVALENT_TO, EQUIVALENT_WITH_FOUR_DOTS_ABOVE, ESTIMATES, EURO_CURRENCY_SIGN, EXCLAMATION_MARK, FEMALE_SIGN, FEMININE_ORDINAL_INDICATOR, FIGURE_DASH, FIGURE_SPACE, FINITE_PART_INTEGRAL, FOR_ALL, FORCES, FORM_FEED, FOUR_PER_EM_SPACE, FROWN, FULL_BLOCK, FULL_STOP, FUNCTION_APPLICATION, GEOMETRIC_PROPORTION, GEOMETRICALLY_EQUAL_TO, GEOMETRICALLY_EQUIVALENT_TO, GIMEL_SYMBOL, GRAVE_ACCENT, GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_LESS_THAN, GREATER_THAN_ABOVE_LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL, GREATER_THAN_ABOVE_RIGHTWARDS_ARROW, GREATER_THAN_ABOVE_SIMILAR_ABOVE_LESS_THAN, GREATER_THAN_ABOVE_SIMILAR_OR_EQUAL, GREATER_THAN_ABOVE_SLANTED_EQUAL_ABOVE_LESS_THAN_ABOVE_SLANTED_EQUAL, GREATER_THAN_AND_NOT_APPROXIMATE, GREATER_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO, GREATER_THAN_BESIDE_LESS_THAN, GREATER_THAN_BUT_NOT_EQUAL_TO, GREATER_THAN_BUT_NOT_EQUIVALENT_TO, GREATER_THAN_CLOSED_BY_CURVE, GREATER_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL, GREATER_THAN_EQUAL_TO_OR_LESS_THAN, GREATER_THAN_OR_APPROXIMATE, GREATER_THAN_OR_EQUAL_TO, GREATER_THAN_OR_EQUIVALENT_TO, GREATER_THAN_OR_LESS_THAN, GREATER_THAN_OR_SLANTED_EQUAL_TO, GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE, GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_LEFT, GREATER_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE, GREATER_THAN_OVER_EQUAL_TO, GREATER_THAN_OVERLAPPING_LESS_THAN, GREATER_THAN_SIGN, GREATER_THAN_WITH_CIRCLE_INSIDE, GREATER_THAN_WITH_DOT, GREATER_THAN_WITH_QUESTION_MARK_ABOVE, GREEK_BETA_SYMBOL, GREEK_CAPITAL_KAI_SYMBOL, GREEK_CAPITAL_LETTER_ALPHA, GREEK_CAPITAL_LETTER_BETA, GREEK_CAPITAL_LETTER_CHI, GREEK_CAPITAL_LETTER_DELTA, GREEK_CAPITAL_LETTER_EPSILON, GREEK_CAPITAL_LETTER_ETA, GREEK_CAPITAL_LETTER_GAMMA, GREEK_CAPITAL_LETTER_HETA, GREEK_CAPITAL_LETTER_IOTA, GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA, GREEK_CAPITAL_LETTER_KAPPA, GREEK_CAPITAL_LETTER_LAMDA, GREEK_CAPITAL_LETTER_MU, GREEK_CAPITAL_LETTER_NU, GREEK_CAPITAL_LETTER_OMEGA, GREEK_CAPITAL_LETTER_OMICRON, GREEK_CAPITAL_LETTER_PHI, GREEK_CAPITAL_LETTER_PI, GREEK_CAPITAL_LETTER_PSI, GREEK_CAPITAL_LETTER_RHO, GREEK_CAPITAL_LETTER_SAN, GREEK_CAPITAL_LETTER_SHO, GREEK_CAPITAL_LETTER_SIGMA, GREEK_CAPITAL_LETTER_TAU, GREEK_CAPITAL_LETTER_THETA, GREEK_CAPITAL_LETTER_UPSILON, GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA, GREEK_CAPITAL_LETTER_XI, GREEK_CAPITAL_LETTER_ZETA, GREEK_CAPITAL_LUNATE_SIGMA_SYMBOL, GREEK_CAPITAL_REVERSED_DOTTED_LUNATE_SIGMA_SYMBOL, GREEK_CAPITAL_REVERSED_LUNATE_SIGMA_SYMBOL, GREEK_CAPITAL_THETA_SYMBOL, GREEK_KAI_SYMBOL, GREEK_KAPPA_SYMBOL, GREEK_LETTER_ARCHAIC_KOPPA, GREEK_LETTER_DIGAMMA, GREEK_LETTER_KOPPA, GREEK_LETTER_SAMPI, GREEK_LETTER_STIGMA, GREEK_LETTER_YOT, GREEK_LUNATE_EPSILON_SYMBOL, GREEK_LUNATE_SIGMA_SYMBOL, GREEK_PHI_SYMBOL, GREEK_PI_SYMBOL, GREEK_REVERSED_LUNATE_EPSILON_SYMBOL, GREEK_RHO_SYMBOL, GREEK_RHO_WITH_STROKE_SYMBOL, GREEK_SMALL_LETTER_ALPHA, GREEK_SMALL_LETTER_ALPHA_WITH_TONOS, GREEK_SMALL_LETTER_ARCHAIC_KOPPA, GREEK_SMALL_LETTER_BETA, GREEK_SMALL_LETTER_CHI, GREEK_SMALL_LETTER_DELTA, GREEK_SMALL_LETTER_DIGAMMA, GREEK_SMALL_LETTER_EPSILON, GREEK_SMALL_LETTER_EPSILON_WITH_TONOS, GREEK_SMALL_LETTER_ETA, GREEK_SMALL_LETTER_ETA_WITH_TONOS, GREEK_SMALL_LETTER_FINAL_SIGMA, GREEK_SMALL_LETTER_GAMMA, GREEK_SMALL_LETTER_HETA, GREEK_SMALL_LETTER_IOTA, GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA, GREEK_SMALL_LETTER_IOTA_WITH_TONOS, GREEK_SMALL_LETTER_KAPPA, GREEK_SMALL_LETTER_KOPPA, GREEK_SMALL_LETTER_LAMDA, GREEK_SMALL_LETTER_MU, GREEK_SMALL_LETTER_NU, GREEK_SMALL_LETTER_OMEGA, GREEK_SMALL_LETTER_OMEGA_WITH_TONOS, GREEK_SMALL_LETTER_OMICRON, GREEK_SMALL_LETTER_OMICRON_WITH_TONOS, GREEK_SMALL_LETTER_PHI, GREEK_SMALL_LETTER_PI, GREEK_SMALL_LETTER_PSI, GREEK_SMALL_LETTER_RHO, GREEK_SMALL_LETTER_SAMPI, GREEK_SMALL_LETTER_SAN, GREEK_SMALL_LETTER_SHO, GREEK_SMALL_LETTER_SIGMA, GREEK_SMALL_LETTER_STIGMA, GREEK_SMALL_LETTER_TAU, GREEK_SMALL_LETTER_THETA, GREEK_SMALL_LETTER_UPSILON, GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA, GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS, GREEK_SMALL_LETTER_UPSILON_WITH_TONOS, GREEK_SMALL_LETTER_XI, GREEK_SMALL_LETTER_ZETA, GREEK_THETA_SYMBOL, GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL, GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL, GREEK_UPSILON_WITH_HOOK_SYMBOL, HAIR_SPACE, HERMITIAN_CONJUGATE_MATRIX, HOMOTHETIC, HOOK_ABOVE, HORIZONTAL_BAR, HORIZONTAL_ELLIPSIS, HORN, HYPHEN, HYPHEN_BULLET, HYPHEN_MINUS, HYPHENATION_POINT, IDENTICAL_TO, IDENTICAL_TO_AND_SLANTED_PARALLEL, IMAGE_OF, IMAGE_OF_OR_APPROXIMATELY_EQUAL_TO, INCOMPLETE_INFINITY, INFINITY, INFINITY_NEGATED_WITH_VERTICAL_BAR, INTEGRAL, INTEGRAL_AROUND_A_POINT_OPERATOR, INTEGRAL_WITH_LEFTWARDS_ARROW_WITH_HOOK, INTERCALATE, INTERIOR_PRODUCT, INTERSECTION, INTERSECTION_ABOVE_BAR_ABOVE_UNION, INTERSECTION_ABOVE_UNION, INTERSECTION_BESIDE_AND_JOINED_WITH_INTERSECTION, INTERSECTION_WITH_DOT, INTERSECTION_WITH_LOGICAL_AND, INTERSECTION_WITH_OVERBAR, INVERTED_EXCLAMATION_MARK, INVERTED_LAZY_S, INVERTED_LAZY_S_WITH_DOUBLE_UNDERLINE, INVERTED_OHM_SIGN, INVERTED_QUESTION_MARK, INVISIBLE_SEPARATOR, INVISIBLE_TIMES, LARGE_CIRCLE, LARGER_THAN, LARGER_THAN_OR_EQUAL_TO, LATIN_CAPITAL_LETTER_A, LATIN_CAPITAL_LETTER_A_WITH_ACUTE, LATIN_CAPITAL_LETTER_A_WITH_BREVE, LATIN_CAPITAL_LETTER_A_WITH_CARON, LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX, LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS, LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS_AND_MACRON, LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE, LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON, LATIN_CAPITAL_LETTER_A_WITH_DOUBLE_GRAVE, LATIN_CAPITAL_LETTER_A_WITH_GRAVE, LATIN_CAPITAL_LETTER_A_WITH_INVERTED_BREVE, LATIN_CAPITAL_LETTER_A_WITH_MACRON, LATIN_CAPITAL_LETTER_A_WITH_OGONEK, LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE, LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE, LATIN_CAPITAL_LETTER_A_WITH_STROKE, LATIN_CAPITAL_LETTER_A_WITH_TILDE, LATIN_CAPITAL_LETTER_AE, LATIN_CAPITAL_LETTER_AE_WITH_ACUTE, LATIN_CAPITAL_LETTER_AE_WITH_MACRON, LATIN_CAPITAL_LETTER_AFRICAN_D, LATIN_CAPITAL_LETTER_B, LATIN_CAPITAL_LETTER_B_WITH_HOOK, LATIN_CAPITAL_LETTER_B_WITH_STROKE, LATIN_CAPITAL_LETTER_B_WITH_TOPBAR, LATIN_CAPITAL_LETTER_C, LATIN_CAPITAL_LETTER_C_WITH_ACUTE, LATIN_CAPITAL_LETTER_C_WITH_CARON, LATIN_CAPITAL_LETTER_C_WITH_CEDILLA, LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX, LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE, LATIN_CAPITAL_LETTER_C_WITH_HOOK, LATIN_CAPITAL_LETTER_C_WITH_STROKE, LATIN_CAPITAL_LETTER_D, LATIN_CAPITAL_LETTER_D_WITH_CARON, LATIN_CAPITAL_LETTER_D_WITH_HOOK, LATIN_CAPITAL_LETTER_D_WITH_SMALL_LETTER_Z, LATIN_CAPITAL_LETTER_D_WITH_SMALL_LETTER_Z_WITH_CARON, LATIN_CAPITAL_LETTER_D_WITH_STROKE, LATIN_CAPITAL_LETTER_D_WITH_TOPBAR, LATIN_CAPITAL_LETTER_DZ, LATIN_CAPITAL_LETTER_DZ_WITH_CARON, LATIN_CAPITAL_LETTER_E, LATIN_CAPITAL_LETTER_E_WITH_ACUTE, LATIN_CAPITAL_LETTER_E_WITH_BREVE, LATIN_CAPITAL_LETTER_E_WITH_CARON, LATIN_CAPITAL_LETTER_E_WITH_CEDILLA, LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX, LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS, LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE, LATIN_CAPITAL_LETTER_E_WITH_DOUBLE_GRAVE, LATIN_CAPITAL_LETTER_E_WITH_GRAVE, LATIN_CAPITAL_LETTER_E_WITH_INVERTED_BREVE, LATIN_CAPITAL_LETTER_E_WITH_MACRON, LATIN_CAPITAL_LETTER_E_WITH_MACRON_AND_ACUTE, LATIN_CAPITAL_LETTER_E_WITH_OGONEK, LATIN_CAPITAL_LETTER_E_WITH_STROKE, LATIN_CAPITAL_LETTER_ENG, LATIN_CAPITAL_LETTER_ESH, LATIN_CAPITAL_LETTER_ETH, LATIN_CAPITAL_LETTER_EZH, LATIN_CAPITAL_LETTER_EZH_REVERSED, LATIN_CAPITAL_LETTER_EZH_WITH_CARON, LATIN_CAPITAL_LETTER_F, LATIN_CAPITAL_LETTER_F_WITH_HOOK, LATIN_CAPITAL_LETTER_G, LATIN_CAPITAL_LETTER_G_WITH_ACUTE, LATIN_CAPITAL_LETTER_G_WITH_BREVE, LATIN_CAPITAL_LETTER_G_WITH_CARON, LATIN_CAPITAL_LETTER_G_WITH_CEDILLA, LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX, LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE, LATIN_CAPITAL_LETTER_G_WITH_HOOK, LATIN_CAPITAL_LETTER_G_WITH_STROKE, LATIN_CAPITAL_LETTER_GAMMA, LATIN_CAPITAL_LETTER_GLOTTAL_STOP, LATIN_CAPITAL_LETTER_H, LATIN_CAPITAL_LETTER_H_WITH_CARON, LATIN_CAPITAL_LETTER_H_WITH_CEDILLA, LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX, LATIN_CAPITAL_LETTER_H_WITH_DOT_BELOW, LATIN_CAPITAL_LETTER_H_WITH_STROKE, LATIN_CAPITAL_LETTER_HWAIR, LATIN_CAPITAL_LETTER_I, LATIN_CAPITAL_LETTER_I_WITH_ACUTE, LATIN_CAPITAL_LETTER_I_WITH_BREVE, LATIN_CAPITAL_LETTER_I_WITH_CARON, LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX, LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS, LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE, LATIN_CAPITAL_LETTER_I_WITH_DOUBLE_GRAVE, LATIN_CAPITAL_LETTER_I_WITH_GRAVE, LATIN_CAPITAL_LETTER_I_WITH_INVERTED_BREVE, LATIN_CAPITAL_LETTER_I_WITH_MACRON, LATIN_CAPITAL_LETTER_I_WITH_OGONEK, LATIN_CAPITAL_LETTER_I_WITH_STROKE, LATIN_CAPITAL_LETTER_I_WITH_TILDE, LATIN_CAPITAL_LETTER_IOTA, LATIN_CAPITAL_LETTER_J, LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX, LATIN_CAPITAL_LETTER_J_WITH_STROKE, LATIN_CAPITAL_LETTER_K, LATIN_CAPITAL_LETTER_K_WITH_ACUTE, LATIN_CAPITAL_LETTER_K_WITH_CARON, LATIN_CAPITAL_LETTER_K_WITH_CEDILLA, LATIN_CAPITAL_LETTER_K_WITH_HOOK, LATIN_CAPITAL_LETTER_L, LATIN_CAPITAL_LETTER_L_WITH_ACUTE, LATIN_CAPITAL_LETTER_L_WITH_BAR, LATIN_CAPITAL_LETTER_L_WITH_CARON, LATIN_CAPITAL_LETTER_L_WITH_CEDILLA, LATIN_CAPITAL_LETTER_L_WITH_MIDDLE_DOT, LATIN_CAPITAL_LETTER_L_WITH_SMALL_LETTER_J, LATIN_CAPITAL_LETTER_L_WITH_STROKE, LATIN_CAPITAL_LETTER_LJ, LATIN_CAPITAL_LETTER_M, LATIN_CAPITAL_LETTER_N, LATIN_CAPITAL_LETTER_N_WITH_ACUTE, LATIN_CAPITAL_LETTER_N_WITH_CARON, LATIN_CAPITAL_LETTER_N_WITH_CEDILLA, LATIN_CAPITAL_LETTER_N_WITH_DOT_ABOVE, LATIN_CAPITAL_LETTER_N_WITH_DOT_BELOW, LATIN_CAPITAL_LETTER_N_WITH_GRAVE, LATIN_CAPITAL_LETTER_N_WITH_LEFT_HOOK, LATIN_CAPITAL_LETTER_N_WITH_LONG_RIGHT_LEG, LATIN_CAPITAL_LETTER_N_WITH_SMALL_LETTER_J, LATIN_CAPITAL_LETTER_N_WITH_TILDE, LATIN_CAPITAL_LETTER_NJ, LATIN_CAPITAL_LETTER_O, LATIN_CAPITAL_LETTER_O_WITH_ACUTE, LATIN_CAPITAL_LETTER_O_WITH_BREVE, LATIN_CAPITAL_LETTER_O_WITH_CARON, LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX, LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS, LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS_AND_MACRON, LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE, LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON, LATIN_CAPITAL_LETTER_O_WITH_DOUBLE_ACUTE, LATIN_CAPITAL_LETTER_O_WITH_DOUBLE_GRAVE, LATIN_CAPITAL_LETTER_O_WITH_GRAVE, LATIN_CAPITAL_LETTER_O_WITH_HORN, LATIN_CAPITAL_LETTER_O_WITH_INVERTED_BREVE, LATIN_CAPITAL_LETTER_O_WITH_MACRON, LATIN_CAPITAL_LETTER_O_WITH_MIDDLE_TILDE, LATIN_CAPITAL_LETTER_O_WITH_OGONEK, LATIN_CAPITAL_LETTER_O_WITH_OGONEK_AND_MACRON, LATIN_CAPITAL_LETTER_O_WITH_STROKE, LATIN_CAPITAL_LETTER_O_WITH_STROKE_AND_ACUTE, LATIN_CAPITAL_LETTER_O_WITH_TILDE, LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_MACRON, LATIN_CAPITAL_LETTER_OI, LATIN_CAPITAL_LETTER_OPEN_E, LATIN_CAPITAL_LETTER_OPEN_O, LATIN_CAPITAL_LETTER_OU, LATIN_CAPITAL_LETTER_P, LATIN_CAPITAL_LETTER_P_WITH_ACUTE, LATIN_CAPITAL_LETTER_P_WITH_HOOK, LATIN_CAPITAL_LETTER_Q, LATIN_CAPITAL_LETTER_R, LATIN_CAPITAL_LETTER_R_WITH_ACUTE, LATIN_CAPITAL_LETTER_R_WITH_CARON, LATIN_CAPITAL_LETTER_R_WITH_CEDILLA, LATIN_CAPITAL_LETTER_R_WITH_DOUBLE_GRAVE, LATIN_CAPITAL_LETTER_R_WITH_INVERTED_BREVE, LATIN_CAPITAL_LETTER_R_WITH_STROKE, LATIN_CAPITAL_LETTER_REVERSED_E, LATIN_CAPITAL_LETTER_S, LATIN_CAPITAL_LETTER_S_WITH_ACUTE, LATIN_CAPITAL_LETTER_S_WITH_CARON, LATIN_CAPITAL_LETTER_S_WITH_CEDILLA, LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX, LATIN_CAPITAL_LETTER_S_WITH_COMMA_BELOW, LATIN_CAPITAL_LETTER_SCHWA, LATIN_CAPITAL_LETTER_SMALL_Q_WITH_HOOK_TAIL, LATIN_CAPITAL_LETTER_T, LATIN_CAPITAL_LETTER_T_WITH_CARON, LATIN_CAPITAL_LETTER_T_WITH_CEDILLA, LATIN_CAPITAL_LETTER_T_WITH_COMMA_BELOW, LATIN_CAPITAL_LETTER_T_WITH_DIAGONAL_STROKE, LATIN_CAPITAL_LETTER_T_WITH_HOOK, LATIN_CAPITAL_LETTER_T_WITH_RETROFLEX_HOOK, LATIN_CAPITAL_LETTER_T_WITH_STROKE, LATIN_CAPITAL_LETTER_THORN, LATIN_CAPITAL_LETTER_TONE_FIVE, LATIN_CAPITAL_LETTER_TONE_SIX, LATIN_CAPITAL_LETTER_TONE_TWO, LATIN_CAPITAL_LETTER_TURNED_M, LATIN_CAPITAL_LETTER_TURNED_V, LATIN_CAPITAL_LETTER_U, LATIN_CAPITAL_LETTER_U_BAR, LATIN_CAPITAL_LETTER_U_WITH_ACUTE, LATIN_CAPITAL_LETTER_U_WITH_BREVE, LATIN_CAPITAL_LETTER_U_WITH_CARON, LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX, LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS, LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_ACUTE, LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_CARON, LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_GRAVE, LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_MACRON, LATIN_CAPITAL_LETTER_U_WITH_DOUBLE_ACUTE, LATIN_CAPITAL_LETTER_U_WITH_DOUBLE_GRAVE, LATIN_CAPITAL_LETTER_U_WITH_GRAVE, LATIN_CAPITAL_LETTER_U_WITH_HORN, LATIN_CAPITAL_LETTER_U_WITH_INVERTED_BREVE, LATIN_CAPITAL_LETTER_U_WITH_MACRON, LATIN_CAPITAL_LETTER_U_WITH_OGONEK, LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE, LATIN_CAPITAL_LETTER_U_WITH_TILDE, LATIN_CAPITAL_LETTER_UPSILON, LATIN_CAPITAL_LETTER_V, LATIN_CAPITAL_LETTER_V_WITH_HOOK, LATIN_CAPITAL_LETTER_W, LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX, LATIN_CAPITAL_LETTER_WYNN, LATIN_CAPITAL_LETTER_X, LATIN_CAPITAL_LETTER_Y, LATIN_CAPITAL_LETTER_Y_WITH_ACUTE, LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX, LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS, LATIN_CAPITAL_LETTER_Y_WITH_GRAVE, LATIN_CAPITAL_LETTER_Y_WITH_HOOK, LATIN_CAPITAL_LETTER_Y_WITH_MACRON, LATIN_CAPITAL_LETTER_Y_WITH_STROKE, LATIN_CAPITAL_LETTER_YOGH, LATIN_CAPITAL_LETTER_Z, LATIN_CAPITAL_LETTER_Z_WITH_ACUTE, LATIN_CAPITAL_LETTER_Z_WITH_CARON, LATIN_CAPITAL_LETTER_Z_WITH_CIRCUMFLEX, LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE, LATIN_CAPITAL_LETTER_Z_WITH_HOOK, LATIN_CAPITAL_LETTER_Z_WITH_STROKE, LATIN_CAPITAL_LIGATURE_IJ, LATIN_CAPITAL_LIGATURE_OE, LATIN_LETTER_ALVEOLAR_CLICK, LATIN_LETTER_BIDENTAL_PERCUSSIVE, LATIN_LETTER_BILABIAL_CLICK, LATIN_LETTER_BILABIAL_PERCUSSIVE, LATIN_LETTER_DENTAL_CLICK, LATIN_LETTER_GLOTTAL_STOP, LATIN_LETTER_GLOTTAL_STOP_WITH_STROKE, LATIN_LETTER_INVERTED_GLOTTAL_STOP, LATIN_LETTER_INVERTED_GLOTTAL_STOP_WITH_STROKE, LATIN_LETTER_LATERAL_CLICK, LATIN_LETTER_PHARYNGEAL_VOICED_FRICATIVE, LATIN_LETTER_RETROFLEX_CLICK, LATIN_LETTER_REVERSED_ESH_LOOP, LATIN_LETTER_REVERSED_GLOTTAL_STOP_WITH_STROKE, LATIN_LETTER_SMALL_CAPITAL_B, LATIN_LETTER_SMALL_CAPITAL_G, LATIN_LETTER_SMALL_CAPITAL_G_WITH_HOOK, LATIN_LETTER_SMALL_CAPITAL_H, LATIN_LETTER_SMALL_CAPITAL_I, LATIN_LETTER_SMALL_CAPITAL_INVERTED_R, LATIN_LETTER_SMALL_CAPITAL_L, LATIN_LETTER_SMALL_CAPITAL_N, LATIN_LETTER_SMALL_CAPITAL_OE, LATIN_LETTER_SMALL_CAPITAL_R, LATIN_LETTER_SMALL_CAPITAL_Y, LATIN_LETTER_STRETCHED_C, LATIN_LETTER_TWO_WITH_STROKE, LATIN_LETTER_WYNN, LATIN_LETTER_YR, LATIN_SMALL_LETTER_A, LATIN_SMALL_LETTER_A_WITH_ACUTE, LATIN_SMALL_LETTER_A_WITH_BREVE, LATIN_SMALL_LETTER_A_WITH_CARON, LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX, LATIN_SMALL_LETTER_A_WITH_DIAERESIS, LATIN_SMALL_LETTER_A_WITH_DIAERESIS_AND_MACRON, LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE, LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON, LATIN_SMALL_LETTER_A_WITH_DOUBLE_GRAVE, LATIN_SMALL_LETTER_A_WITH_GRAVE, LATIN_SMALL_LETTER_A_WITH_INVERTED_BREVE, LATIN_SMALL_LETTER_A_WITH_MACRON, LATIN_SMALL_LETTER_A_WITH_OGONEK, LATIN_SMALL_LETTER_A_WITH_RING_ABOVE, LATIN_SMALL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE, LATIN_SMALL_LETTER_A_WITH_TILDE, LATIN_SMALL_LETTER_AE, LATIN_SMALL_LETTER_AE_WITH_ACUTE, LATIN_SMALL_LETTER_AE_WITH_MACRON, LATIN_SMALL_LETTER_ALPHA, LATIN_SMALL_LETTER_B, LATIN_SMALL_LETTER_B_WITH_HOOK, LATIN_SMALL_LETTER_B_WITH_STROKE, LATIN_SMALL_LETTER_B_WITH_TOPBAR, LATIN_SMALL_LETTER_BARRED_O, LATIN_SMALL_LETTER_C, LATIN_SMALL_LETTER_C_WITH_ACUTE, LATIN_SMALL_LETTER_C_WITH_CARON, LATIN_SMALL_LETTER_C_WITH_CEDILLA, LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX, LATIN_SMALL_LETTER_C_WITH_CURL, LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE, LATIN_SMALL_LETTER_C_WITH_HOOK, LATIN_SMALL_LETTER_C_WITH_STROKE, LATIN_SMALL_LETTER_CLOSED_OMEGA, LATIN_SMALL_LETTER_CLOSED_OPEN_E, LATIN_SMALL_LETTER_CLOSED_REVERSED_OPEN_E, LATIN_SMALL_LETTER_D, LATIN_SMALL_LETTER_D_WITH_CARON, LATIN_SMALL_LETTER_D_WITH_CURL, LATIN_SMALL_LETTER_D_WITH_HOOK, LATIN_SMALL_LETTER_D_WITH_STROKE, LATIN_SMALL_LETTER_D_WITH_TAIL, LATIN_SMALL_LETTER_D_WITH_TOPBAR, LATIN_SMALL_LETTER_DB_DIGRAPH, LATIN_SMALL_LETTER_DEZH_DIGRAPH, LATIN_SMALL_LETTER_DOTLESS_I, LATIN_SMALL_LETTER_DOTLESS_J, LATIN_SMALL_LETTER_DOTLESS_J_WITH_STROKE, LATIN_SMALL_LETTER_DOTLESS_J_WITH_STROKE_AND_HOOK, LATIN_SMALL_LETTER_DZ, LATIN_SMALL_LETTER_DZ_DIGRAPH, LATIN_SMALL_LETTER_DZ_DIGRAPH_WITH_CURL, LATIN_SMALL_LETTER_DZ_WITH_CARON, LATIN_SMALL_LETTER_E, LATIN_SMALL_LETTER_E_WITH_ACUTE, LATIN_SMALL_LETTER_E_WITH_BREVE, LATIN_SMALL_LETTER_E_WITH_CARON, LATIN_SMALL_LETTER_E_WITH_CEDILLA, LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX, LATIN_SMALL_LETTER_E_WITH_DIAERESIS, LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE, LATIN_SMALL_LETTER_E_WITH_DOUBLE_GRAVE, LATIN_SMALL_LETTER_E_WITH_GRAVE, LATIN_SMALL_LETTER_E_WITH_INVERTED_BREVE, LATIN_SMALL_LETTER_E_WITH_MACRON, LATIN_SMALL_LETTER_E_WITH_MACRON_AND_ACUTE, LATIN_SMALL_LETTER_E_WITH_OGONEK, LATIN_SMALL_LETTER_E_WITH_STROKE, LATIN_SMALL_LETTER_ENG, LATIN_SMALL_LETTER_ESH, LATIN_SMALL_LETTER_ESH_WITH_CURL, LATIN_SMALL_LETTER_ETH, LATIN_SMALL_LETTER_EZH, LATIN_SMALL_LETTER_EZH_REVERSED, LATIN_SMALL_LETTER_EZH_WITH_CARON, LATIN_SMALL_LETTER_EZH_WITH_CURL, LATIN_SMALL_LETTER_EZH_WITH_TAIL, LATIN_SMALL_LETTER_F, LATIN_SMALL_LETTER_F_WITH_HOOK, LATIN_SMALL_LETTER_FENG_DIGRAPH, LATIN_SMALL_LETTER_G, LATIN_SMALL_LETTER_G_WITH_ACUTE, LATIN_SMALL_LETTER_G_WITH_BREVE, LATIN_SMALL_LETTER_G_WITH_CARON, LATIN_SMALL_LETTER_G_WITH_CEDILLA, LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX, LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE, LATIN_SMALL_LETTER_G_WITH_HOOK, LATIN_SMALL_LETTER_G_WITH_STROKE, LATIN_SMALL_LETTER_GAMMA, LATIN_SMALL_LETTER_GLOTTAL_STOP, LATIN_SMALL_LETTER_H, LATIN_SMALL_LETTER_H_WITH_CARON, LATIN_SMALL_LETTER_H_WITH_CEDILLA, LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX, LATIN_SMALL_LETTER_H_WITH_HOOK, LATIN_SMALL_LETTER_H_WITH_STROKE, LATIN_SMALL_LETTER_HENG_WITH_HOOK, LATIN_SMALL_LETTER_HV, LATIN_SMALL_LETTER_I, LATIN_SMALL_LETTER_I_WITH_ACUTE, LATIN_SMALL_LETTER_I_WITH_BREVE, LATIN_SMALL_LETTER_I_WITH_CARON, LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX, LATIN_SMALL_LETTER_I_WITH_DIAERESIS, LATIN_SMALL_LETTER_I_WITH_DOUBLE_GRAVE, LATIN_SMALL_LETTER_I_WITH_GRAVE, LATIN_SMALL_LETTER_I_WITH_INVERTED_BREVE, LATIN_SMALL_LETTER_I_WITH_MACRON, LATIN_SMALL_LETTER_I_WITH_OGONEK, LATIN_SMALL_LETTER_I_WITH_STROKE, LATIN_SMALL_LETTER_I_WITH_TILDE, LATIN_SMALL_LETTER_IOTA, LATIN_SMALL_LETTER_J, LATIN_SMALL_LETTER_J_WITH_CARON, LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX, LATIN_SMALL_LETTER_J_WITH_CROSSED_TAIL, LATIN_SMALL_LETTER_J_WITH_STROKE, LATIN_SMALL_LETTER_K, LATIN_SMALL_LETTER_K_WITH_ACUTE, LATIN_SMALL_LETTER_K_WITH_CARON, LATIN_SMALL_LETTER_K_WITH_CEDILLA, LATIN_SMALL_LETTER_K_WITH_HOOK, LATIN_SMALL_LETTER_KRA, LATIN_SMALL_LETTER_L, LATIN_SMALL_LETTER_L_WITH_ACUTE, LATIN_SMALL_LETTER_L_WITH_BAR, LATIN_SMALL_LETTER_L_WITH_BELT, LATIN_SMALL_LETTER_L_WITH_CARON, LATIN_SMALL_LETTER_L_WITH_CEDILLA, LATIN_SMALL_LETTER_L_WITH_CURL, LATIN_SMALL_LETTER_L_WITH_MIDDLE_DOT, LATIN_SMALL_LETTER_L_WITH_MIDDLE_TILDE, LATIN_SMALL_LETTER_L_WITH_RETROFLEX_HOOK, LATIN_SMALL_LETTER_L_WITH_STROKE, LATIN_SMALL_LETTER_LAMBDA_WITH_STROKE, LATIN_SMALL_LETTER_LEZH, LATIN_SMALL_LETTER_LJ, LATIN_SMALL_LETTER_LONG_S, LATIN_SMALL_LETTER_LS_DIGRAPH, LATIN_SMALL_LETTER_LZ_DIGRAPH, LATIN_SMALL_LETTER_M, LATIN_SMALL_LETTER_M_WITH_HOOK, LATIN_SMALL_LETTER_N, LATIN_SMALL_LETTER_N_PRECEDED_BY_APOSTROPHE, LATIN_SMALL_LETTER_N_WITH_ACUTE, LATIN_SMALL_LETTER_N_WITH_CARON, LATIN_SMALL_LETTER_N_WITH_CEDILLA, LATIN_SMALL_LETTER_N_WITH_CURL, LATIN_SMALL_LETTER_N_WITH_DOT_ABOVE, LATIN_SMALL_LETTER_N_WITH_DOT_BELOW, LATIN_SMALL_LETTER_N_WITH_GRAVE, LATIN_SMALL_LETTER_N_WITH_LEFT_HOOK, LATIN_SMALL_LETTER_N_WITH_LONG_RIGHT_LEG, LATIN_SMALL_LETTER_N_WITH_RETROFLEX_HOOK, LATIN_SMALL_LETTER_N_WITH_TILDE, LATIN_SMALL_LETTER_NJ, LATIN_SMALL_LETTER_O, LATIN_SMALL_LETTER_O_WITH_ACUTE, LATIN_SMALL_LETTER_O_WITH_BREVE, LATIN_SMALL_LETTER_O_WITH_CARON, LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX, LATIN_SMALL_LETTER_O_WITH_DIAERESIS, LATIN_SMALL_LETTER_O_WITH_DIAERESIS_AND_MACRON, LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE, LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON, LATIN_SMALL_LETTER_O_WITH_DOUBLE_ACUTE, LATIN_SMALL_LETTER_O_WITH_DOUBLE_GRAVE, LATIN_SMALL_LETTER_O_WITH_GRAVE, LATIN_SMALL_LETTER_O_WITH_HORN, LATIN_SMALL_LETTER_O_WITH_INVERTED_BREVE, LATIN_SMALL_LETTER_O_WITH_MACRON, LATIN_SMALL_LETTER_O_WITH_OGONEK, LATIN_SMALL_LETTER_O_WITH_OGONEK_AND_MACRON, LATIN_SMALL_LETTER_O_WITH_STROKE, LATIN_SMALL_LETTER_O_WITH_STROKE_AND_ACUTE, LATIN_SMALL_LETTER_O_WITH_TILDE, LATIN_SMALL_LETTER_O_WITH_TILDE_AND_MACRON, LATIN_SMALL_LETTER_OI, LATIN_SMALL_LETTER_OPEN_E, LATIN_SMALL_LETTER_OPEN_O, LATIN_SMALL_LETTER_OU, LATIN_SMALL_LETTER_P, LATIN_SMALL_LETTER_P_WITH_ACUTE, LATIN_SMALL_LETTER_P_WITH_HOOK, LATIN_SMALL_LETTER_PHI, LATIN_SMALL_LETTER_Q, LATIN_SMALL_LETTER_Q_WITH_HOOK, LATIN_SMALL_LETTER_Q_WITH_HOOK_TAIL, LATIN_SMALL_LETTER_QP_DIGRAPH, LATIN_SMALL_LETTER_R, LATIN_SMALL_LETTER_R_WITH_ACUTE, LATIN_SMALL_LETTER_R_WITH_CARON, LATIN_SMALL_LETTER_R_WITH_CEDILLA, LATIN_SMALL_LETTER_R_WITH_DOUBLE_GRAVE, LATIN_SMALL_LETTER_R_WITH_FISHHOOK, LATIN_SMALL_LETTER_R_WITH_INVERTED_BREVE, LATIN_SMALL_LETTER_R_WITH_LONG_LEG, LATIN_SMALL_LETTER_R_WITH_STROKE, LATIN_SMALL_LETTER_R_WITH_TAIL, LATIN_SMALL_LETTER_RAMS_HORN, LATIN_SMALL_LETTER_REVERSED_E, LATIN_SMALL_LETTER_REVERSED_OPEN_E, LATIN_SMALL_LETTER_REVERSED_OPEN_E_WITH_HOOK, LATIN_SMALL_LETTER_REVERSED_R_WITH_FISHHOOK, LATIN_SMALL_LETTER_S, LATIN_SMALL_LETTER_S_WITH_ACUTE, LATIN_SMALL_LETTER_S_WITH_CARON, LATIN_SMALL_LETTER_S_WITH_CEDILLA, LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX, LATIN_SMALL_LETTER_S_WITH_COMMA_BELOW, LATIN_SMALL_LETTER_S_WITH_HOOK, LATIN_SMALL_LETTER_S_WITH_SWASH_TAIL, LATIN_SMALL_LETTER_SCHWA, LATIN_SMALL_LETTER_SCHWA_WITH_HOOK, LATIN_SMALL_LETTER_SCRIPT_G, LATIN_SMALL_LETTER_SHARP_S, LATIN_SMALL_LETTER_SQUAT_REVERSED_ESH, LATIN_SMALL_LETTER_T, LATIN_SMALL_LETTER_T_WITH_CARON, LATIN_SMALL_LETTER_T_WITH_CEDILLA, LATIN_SMALL_LETTER_T_WITH_COMMA_BELOW, LATIN_SMALL_LETTER_T_WITH_CURL, LATIN_SMALL_LETTER_T_WITH_HOOK, LATIN_SMALL_LETTER_T_WITH_PALATAL_HOOK, LATIN_SMALL_LETTER_T_WITH_RETROFLEX_HOOK, LATIN_SMALL_LETTER_T_WITH_STROKE, LATIN_SMALL_LETTER_TC_DIGRAPH_WITH_CURL, LATIN_SMALL_LETTER_TESH_DIGRAPH, LATIN_SMALL_LETTER_THORN, LATIN_SMALL_LETTER_TONE_FIVE, LATIN_SMALL_LETTER_TONE_SIX, LATIN_SMALL_LETTER_TONE_TWO, LATIN_SMALL_LETTER_TS_DIGRAPH, LATIN_SMALL_LETTER_TURNED_A, LATIN_SMALL_LETTER_TURNED_ALPHA, LATIN_SMALL_LETTER_TURNED_DELTA, LATIN_SMALL_LETTER_TURNED_E, LATIN_SMALL_LETTER_TURNED_H, LATIN_SMALL_LETTER_TURNED_H_WITH_FISHHOOK, LATIN_SMALL_LETTER_TURNED_H_WITH_FISHHOOK_AND_TAIL, LATIN_SMALL_LETTER_TURNED_K, LATIN_SMALL_LETTER_TURNED_M, LATIN_SMALL_LETTER_TURNED_M_WITH_LONG_LEG, LATIN_SMALL_LETTER_TURNED_R, LATIN_SMALL_LETTER_TURNED_R_WITH_HOOK, LATIN_SMALL_LETTER_TURNED_R_WITH_LONG_LEG, LATIN_SMALL_LETTER_TURNED_T, LATIN_SMALL_LETTER_TURNED_V, LATIN_SMALL_LETTER_TURNED_W, LATIN_SMALL_LETTER_TURNED_Y, LATIN_SMALL_LETTER_U, LATIN_SMALL_LETTER_U_BAR, LATIN_SMALL_LETTER_U_WITH_ACUTE, LATIN_SMALL_LETTER_U_WITH_BREVE, LATIN_SMALL_LETTER_U_WITH_CARON, LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX, LATIN_SMALL_LETTER_U_WITH_DIAERESIS, LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_ACUTE, LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_CARON, LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_GRAVE, LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_MACRON, LATIN_SMALL_LETTER_U_WITH_DOUBLE_ACUTE, LATIN_SMALL_LETTER_U_WITH_DOUBLE_GRAVE, LATIN_SMALL_LETTER_U_WITH_GRAVE, LATIN_SMALL_LETTER_U_WITH_HORN, LATIN_SMALL_LETTER_U_WITH_INVERTED_BREVE, LATIN_SMALL_LETTER_U_WITH_MACRON, LATIN_SMALL_LETTER_U_WITH_OGONEK, LATIN_SMALL_LETTER_U_WITH_RING_ABOVE, LATIN_SMALL_LETTER_U_WITH_TILDE, LATIN_SMALL_LETTER_UPSILON, LATIN_SMALL_LETTER_V, LATIN_SMALL_LETTER_V_WITH_HOOK, LATIN_SMALL_LETTER_W, LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX, LATIN_SMALL_LETTER_X, LATIN_SMALL_LETTER_Y, LATIN_SMALL_LETTER_Y_WITH_ACUTE, LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX, LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, LATIN_SMALL_LETTER_Y_WITH_GRAVE, LATIN_SMALL_LETTER_Y_WITH_HOOK, LATIN_SMALL_LETTER_Y_WITH_MACRON, LATIN_SMALL_LETTER_Y_WITH_STROKE, LATIN_SMALL_LETTER_YOGH, LATIN_SMALL_LETTER_Z, LATIN_SMALL_LETTER_Z_WITH_ACUTE, LATIN_SMALL_LETTER_Z_WITH_CARON, LATIN_SMALL_LETTER_Z_WITH_CIRCUMFLEX, LATIN_SMALL_LETTER_Z_WITH_CURL, LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE, LATIN_SMALL_LETTER_Z_WITH_HOOK, LATIN_SMALL_LETTER_Z_WITH_RETROFLEX_HOOK, LATIN_SMALL_LETTER_Z_WITH_STROKE, LATIN_SMALL_LETTER_Z_WITH_SWASH_TAIL, LATIN_SMALL_LIGATURE_FF, LATIN_SMALL_LIGATURE_FFI, LATIN_SMALL_LIGATURE_FFL, LATIN_SMALL_LIGATURE_FI, LATIN_SMALL_LIGATURE_FL, LATIN_SMALL_LIGATURE_IJ, LATIN_SMALL_LIGATURE_OE, LEFT_ANGLE_BRACKET_WITH_DOT, LEFT_ARC_LESS_THAN_BRACKET, LEFT_BARB_DOWN_RIGHT_BARB_DOWN_HARPOON, LEFT_BARB_DOWN_RIGHT_BARB_UP_HARPOON, LEFT_BARB_UP_RIGHT_BARB_DOWN_HARPOON, LEFT_BARB_UP_RIGHT_BARB_UP_HARPOON, LEFT_CEILING, LEFT_CURLY_BRACKET, LEFT_DOUBLE_ANGLE_BRACKET, LEFT_DOUBLE_QUOTATION_MARK, LEFT_DOUBLE_WIGGLY_FENCE, LEFT_FISH_TAIL, LEFT_FLOOR, LEFT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT, LEFT_PARENTHESIS, LEFT_POINTING_ANGLE_BRACKET, LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK, LEFT_RIGHT_ARROW, LEFT_RIGHT_ARROW_THROUGH_SMALL_CIRCLE, LEFT_RIGHT_ARROW_WITH_STROKE, LEFT_RIGHT_DOUBLE_ARROW, LEFT_RIGHT_DOUBLE_ARROW_WITH_STROKE, LEFT_RIGHT_DOUBLE_ARROW_WITH_VERTICAL_STROKE, LEFT_RIGHT_OPEN_HEADED_ARROW, LEFT_RIGHT_WAVE_ARROW, LEFT_SEMIDIRECT_PRODUCT, LEFT_SIDE_ARC_ANTICLOCKWISE_ARROW, LEFT_SINGLE_QUOTATION_MARK, LEFT_SQUARE_BRACKET, LEFT_SQUARE_BRACKET_WITH_TICK_IN_BOTTOM_CORNER, LEFT_SQUARE_BRACKET_WITH_TICK_IN_TOP_CORNER, LEFT_SQUARE_BRACKET_WITH_UNDERBAR, LEFT_TACK, LEFT_TORTOISE_SHELL_BRACKET, LEFT_TRIANGLE_BESIDE_VERTICAL_BAR, LEFT_WHITE_PARENTHESIS, LEFT_WHITE_SQUARE_BRACKET, LEFT_WHITE_TORTOISE_SHELL_BRACKET, LEFTWARDS_ARROW, LEFTWARDS_ARROW_ABOVE_TILDE_OPERATOR, LEFTWARDS_ARROW_FROM_BAR, LEFTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND, LEFTWARDS_ARROW_OVER_RIGHTWARDS_ARROW, LEFTWARDS_ARROW_TAIL, LEFTWARDS_ARROW_TO_BAR, LEFTWARDS_ARROW_TO_BLACK_DIAMOND, LEFTWARDS_ARROW_WITH_HOOK, LEFTWARDS_ARROW_WITH_LOOP, LEFTWARDS_ARROW_WITH_STROKE, LEFTWARDS_ARROW_WITH_TAIL, LEFTWARDS_DOUBLE_ARROW, LEFTWARDS_DOUBLE_ARROW_TAIL, LEFTWARDS_DOUBLE_ARROW_WITH_STROKE, LEFTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE, LEFTWARDS_DOUBLE_DASH_ARROW, LEFTWARDS_HARPOON_OVER_RIGHTWARDS_HARPOON, LEFTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN, LEFTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH, LEFTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR, LEFTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR, LEFTWARDS_HARPOON_WITH_BARB_DOWNWARDS, LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN, LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH, LEFTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_UP, LEFTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR, LEFTWARDS_HARPOON_WITH_BARB_UP_TO_BAR, LEFTWARDS_HARPOON_WITH_BARB_UPWARDS, LEFTWARDS_OPEN_HEADED_ARROW, LEFTWARDS_PAIRED_ARROWS, LEFTWARDS_TRIPLE_ARROW, LEFTWARDS_TRIPLE_DASH_ARROW, LEFTWARDS_TWO_HEADED_ARROW, LESS_THAN_ABOVE_DOUBLE_LINE_EQUAL_ABOVE_GREATER_THAN, LESS_THAN_ABOVE_GREATER_THAN_ABOVE_DOUBLE_LINE_EQUAL, LESS_THAN_ABOVE_LEFTWARDS_ARROW, LESS_THAN_ABOVE_SIMILAR_ABOVE_GREATER_THAN, LESS_THAN_ABOVE_SIMILAR_OR_EQUAL, LESS_THAN_ABOVE_SLANTED_EQUAL_ABOVE_GREATER_THAN_ABOVE_SLANTED_EQUAL, LESS_THAN_AND_NOT_APPROXIMATE, LESS_THAN_AND_SINGLE_LINE_NOT_EQUAL_TO, LESS_THAN_BUT_NOT_EQUAL_TO, LESS_THAN_BUT_NOT_EQUIVALENT_TO, LESS_THAN_CLOSED_BY_CURVE, LESS_THAN_CLOSED_BY_CURVE_ABOVE_SLANTED_EQUAL, LESS_THAN_EQUAL_TO_OR_GREATER_THAN, LESS_THAN_OR_APPROXIMATE, LESS_THAN_OR_EQUAL_TO, LESS_THAN_OR_EQUIVALENT_TO, LESS_THAN_OR_GREATER_THAN, LESS_THAN_OR_SLANTED_EQUAL_TO, LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE, LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_ABOVE_RIGHT, LESS_THAN_OR_SLANTED_EQUAL_TO_WITH_DOT_INSIDE, LESS_THAN_OVER_EQUAL_TO, LESS_THAN_SIGN, LESS_THAN_WITH_CIRCLE_INSIDE, LESS_THAN_WITH_DOT, LESS_THAN_WITH_QUESTION_MARK_ABOVE, LIGHT_SHADE, LIGHT_VERTICAL_BAR, LINE_FEED, LINE_INTEGRATION_NOT_INCLUDING_THE_POLE, LINE_INTEGRATION_WITH_RECTANGULAR_PATH_AROUND_POLE, LINE_INTEGRATION_WITH_SEMICIRCULAR_PATH_AROUND_POLE, LOGICAL_AND, LOGICAL_AND_WITH_HORIZONTAL_DASH, LOGICAL_AND_WITH_MIDDLE_STEM, LOGICAL_AND_WITH_UNDERBAR, LOGICAL_OR, LOGICAL_OR_WITH_HORIZONTAL_DASH, LOGICAL_OR_WITH_MIDDLE_STEM, LONG_DASH_FROM_LEFT_MEMBER_OF_DOUBLE_VERTICAL, LONG_LEFT_RIGHT_ARROW, LONG_LEFT_RIGHT_DOUBLE_ARROW, LONG_LEFTWARDS_ARROW, LONG_LEFTWARDS_DOUBLE_ARROW, LONG_RIGHTWARDS_ARROW, LONG_RIGHTWARDS_ARROW_FROM_BAR, LONG_RIGHTWARDS_DOUBLE_ARROW, LONG_RIGHTWARDS_SQUIGGLE_ARROW, LOW_LINE, LOWER_HALF_BLOCK, LOWER_LEFT_TRIANGLE, LOZENGE, MACRON, MALE_SIGN, MALTESE_CROSS, MASCULINE_ORDINAL_INDICATOR, MEASURED_ANGLE, MEASURED_ANGLE_WITH_OPEN_ARM_ENDING_IN_ARROW_POINTING_DOWN_AND_LEFT, MEASURED_ANGLE_WITH_OPEN_ARM_ENDING_IN_ARROW_POINTING_DOWN_AND_RIGHT, MEASURED_ANGLE_WITH_OPEN_ARM_ENDING_IN_ARROW_POINTING_LEFT_AND_DOWN, MEASURED_ANGLE_WITH_OPEN_ARM_ENDING_IN_ARROW_POINTING_LEFT_AND_UP, MEASURED_ANGLE_WITH_OPEN_ARM_ENDING_IN_ARROW_POINTING_RIGHT_AND_DOWN, MEASURED_ANGLE_WITH_OPEN_ARM_ENDING_IN_ARROW_POINTING_RIGHT_AND_UP, MEASURED_ANGLE_WITH_OPEN_ARM_ENDING_IN_ARROW_POINTING_UP_AND_LEFT, MEASURED_ANGLE_WITH_OPEN_ARM_ENDING_IN_ARROW_POINTING_UP_AND_RIGHT, MEASURED_RIGHT_ANGLE_WITH_DOT, MEDIUM_MATHEMATICAL_SPACE, MEDIUM_SHADE, MICRO_SIGN, MIDDLE_DOT, MIDLINE_HORIZONTAL_ELLIPSIS, MINUS_OR_PLUS_SIGN, MINUS_SIGN, MINUS_SIGN_IN_TRIANGLE, MINUS_SIGN_WITH_COMMA_ABOVE, MINUS_SIGN_WITH_DOT_BELOW, MINUS_TILDE, MODELS, MODIFIER_LETTER_ACUTE_ACCENT, MODIFIER_LETTER_APOSTROPHE, MODIFIER_LETTER_CENTRED_LEFT_HALF_RING, MODIFIER_LETTER_CENTRED_RIGHT_HALF_RING, MODIFIER_LETTER_CIRCUMFLEX_ACCENT, MODIFIER_LETTER_CROSS_ACCENT, MODIFIER_LETTER_DOUBLE_APOSTROPHE, MODIFIER_LETTER_DOUBLE_PRIME, MODIFIER_LETTER_DOWN_ARROWHEAD, MODIFIER_LETTER_DOWN_TACK, MODIFIER_LETTER_EXTRA_HIGH_TONE_BAR, MODIFIER_LETTER_EXTRA_LOW_TONE_BAR, MODIFIER_LETTER_GLOTTAL_STOP, MODIFIER_LETTER_GRAVE_ACCENT, MODIFIER_LETTER_HALF_TRIANGULAR_COLON, MODIFIER_LETTER_HIGH_TONE_BAR, MODIFIER_LETTER_LEFT_ARROWHEAD, MODIFIER_LETTER_LEFT_HALF_RING, MODIFIER_LETTER_LOW_ACUTE_ACCENT, MODIFIER_LETTER_LOW_GRAVE_ACCENT, MODIFIER_LETTER_LOW_MACRON, MODIFIER_LETTER_LOW_TONE_BAR, MODIFIER_LETTER_LOW_VERTICAL_LINE, MODIFIER_LETTER_MACRON, MODIFIER_LETTER_MID_TONE_BAR, MODIFIER_LETTER_MINUS_SIGN, MODIFIER_LETTER_PLUS_SIGN, MODIFIER_LETTER_PRIME, MODIFIER_LETTER_REVERSED_COMMA, MODIFIER_LETTER_REVERSED_GLOTTAL_STOP, MODIFIER_LETTER_RHOTIC_HOOK, MODIFIER_LETTER_RIGHT_ARROWHEAD, MODIFIER_LETTER_RIGHT_HALF_RING, MODIFIER_LETTER_SMALL_CAPITAL_INVERTED_R, MODIFIER_LETTER_SMALL_GAMMA, MODIFIER_LETTER_SMALL_H, MODIFIER_LETTER_SMALL_H_WITH_HOOK, MODIFIER_LETTER_SMALL_J, MODIFIER_LETTER_SMALL_L, MODIFIER_LETTER_SMALL_R, MODIFIER_LETTER_SMALL_REVERSED_GLOTTAL_STOP, MODIFIER_LETTER_SMALL_S, MODIFIER_LETTER_SMALL_TURNED_R, MODIFIER_LETTER_SMALL_TURNED_R_WITH_HOOK, MODIFIER_LETTER_SMALL_W, MODIFIER_LETTER_SMALL_X, MODIFIER_LETTER_SMALL_Y, MODIFIER_LETTER_TRIANGULAR_COLON, MODIFIER_LETTER_TURNED_COMMA, MODIFIER_LETTER_UNASPIRATED, MODIFIER_LETTER_UP_ARROWHEAD, MODIFIER_LETTER_UP_TACK, MODIFIER_LETTER_VERTICAL_LINE, MODIFIER_LETTER_VOICING, MODIFIER_LETTER_YANG_DEPARTING_TONE_MARK, MODIFIER_LETTER_YIN_DEPARTING_TONE_MARK, MUCH_GREATER_THAN, MUCH_LESS_THAN, MULTIMAP, MULTIPLICATION_SIGN, MULTIPLICATION_SIGN_IN_DOUBLE_CIRCLE, MULTIPLICATION_SIGN_IN_LEFT_HALF_CIRCLE, MULTIPLICATION_SIGN_IN_RIGHT_HALF_CIRCLE, MULTIPLICATION_SIGN_IN_TRIANGLE, MULTIPLICATION_SIGN_WITH_DOT_ABOVE, MULTIPLICATION_SIGN_WITH_UNDERBAR, MULTISET_MULTIPLICATION, MULTISET_UNION, MUSIC_FLAT_SIGN, MUSIC_NATURAL_SIGN, MUSIC_SHARP_SIGN, N_ARY_CIRCLED_DOT_OPERATOR, N_ARY_CIRCLED_PLUS_OPERATOR, N_ARY_CIRCLED_TIMES_OPERATOR, N_ARY_COPRODUCT, N_ARY_INTERSECTION, N_ARY_LOGICAL_AND, N_ARY_LOGICAL_OR, N_ARY_PRODUCT, N_ARY_SQUARE_UNION_OPERATOR, N_ARY_SUMMATION, N_ARY_UNION, N_ARY_UNION_OPERATOR_WITH_PLUS, NABLA, NEGATED_DOUBLE_VERTICAL_BAR_DOUBLE_RIGHT_TURNSTILE, NEITHER_A_SUBSET_OF_NOR_EQUAL_TO, NEITHER_A_SUPERSET_OF_NOR_EQUAL_TO, NEITHER_APPROXIMATELY_NOR_ACTUALLY_EQUAL_TO, NEITHER_GREATER_THAN_NOR_EQUAL_TO, NEITHER_GREATER_THAN_NOR_EQUIVALENT_TO, NEITHER_GREATER_THAN_NOR_LESS_THAN, NEITHER_LESS_THAN_NOR_EQUAL_TO, NEITHER_LESS_THAN_NOR_EQUIVALENT_TO, NEITHER_LESS_THAN_NOR_GREATER_THAN, NO_BREAK_SPACE, NON_BREAKING_HYPHEN, NOR, NORMAL_SUBGROUP_OF, NORMAL_SUBGROUP_OF_OR_EQUAL_TO, NORTH_EAST_ARROW, NORTH_EAST_ARROW_AND_SOUTH_EAST_ARROW, NORTH_EAST_ARROW_WITH_HOOK, NORTH_EAST_DOUBLE_ARROW, NORTH_WEST_ARROW, NORTH_WEST_ARROW_AND_NORTH_EAST_ARROW, NORTH_WEST_ARROW_WITH_HOOK, NORTH_WEST_DOUBLE_ARROW, NOT_A_SUBSET_OF, NOT_A_SUPERSET_OF, NOT_ALMOST_EQUAL_TO, NOT_AN_ELEMENT_OF, NOT_ASYMPTOTICALLY_EQUAL_TO, NOT_EQUAL_TO, NOT_EQUIVALENT_TO, NOT_GREATER_THAN, NOT_IDENTICAL_TO, NOT_LESS_THAN, NOT_NORMAL_SUBGROUP_OF, NOT_NORMAL_SUBGROUP_OF_OR_EQUAL_TO, NOT_PARALLEL_TO, NOT_SIGN, NOT_SQUARE_IMAGE_OF_OR_EQUAL_TO, NOT_SQUARE_ORIGINAL_OF_OR_EQUAL_TO, NOT_TILDE, NOT_TRUE, NULL, NUMBER_SIGN, NUMERO_SIGN, OBLIQUE_ANGLE_OPENING_DOWN, OBLIQUE_ANGLE_OPENING_UP, OGONEK, OHM_SIGN, OPEN_BOX, ORIGINAL_OF, PARALLEL_TO, PARALLEL_WITH_HORIZONTAL_STROKE, PARALLEL_WITH_TILDE_OPERATOR, PARTIAL_DIFFERENTIAL, PER_MILLE_SIGN, PER_TEN_THOUSAND_SIGN, PERCENT_SIGN, PERSPECTIVE, PILCROW_SIGN, PITCHFORK, PITCHFORK_WITH_TEE_TOP, PLANCK_CONSTANT, PLANCK_CONSTANT_OVER_TWO_PI, PLUS_MINUS_SIGN, PLUS_SIGN, PLUS_SIGN_ABOVE_EQUALS_SIGN, PLUS_SIGN_IN_LEFT_HALF_CIRCLE, PLUS_SIGN_IN_RIGHT_HALF_CIRCLE, PLUS_SIGN_IN_TRIANGLE, PLUS_SIGN_WITH_CIRCUMFLEX_ACCENT_ABOVE, PLUS_SIGN_WITH_DOT_BELOW, PLUS_SIGN_WITH_SMALL_CIRCLE_ABOVE, PLUS_SIGN_WITH_SUBSCRIPT_TWO, PLUS_SIGN_WITH_TILDE_ABOVE, PLUS_SIGN_WITH_TILDE_BELOW, POSITION_INDICATOR, POUND_SIGN, PRECEDES, PRECEDES_ABOVE_ALMOST_EQUAL_TO, PRECEDES_ABOVE_EQUALS_SIGN, PRECEDES_ABOVE_NOT_ALMOST_EQUAL_TO, PRECEDES_ABOVE_NOT_EQUAL_TO, PRECEDES_ABOVE_SINGLE_LINE_EQUALS_SIGN, PRECEDES_BUT_NOT_EQUIVALENT_TO, PRECEDES_OR_EQUAL_TO, PRECEDES_OR_EQUIVALENT_TO, PRECEDES_UNDER_RELATION, PRESCRIPTION_TAKE, PRESENTATION_FORM_FOR_VERTICAL_LEFT_CURLY_BRACKET, PRESENTATION_FORM_FOR_VERTICAL_LEFT_PARENTHESIS, PRESENTATION_FORM_FOR_VERTICAL_RIGHT_CURLY_BRACKET, PRESENTATION_FORM_FOR_VERTICAL_RIGHT_PARENTHESIS, PRIME, PROJECTIVE, PROPORTION, PROPORTIONAL_TO, PUNCTUATION_SPACE, QUADRUPLE_INTEGRAL_OPERATOR, QUADRUPLE_PRIME, QUATERNION_INTEGRAL_OPERATOR, QUESTION_MARK, QUESTIONED_EQUAL_TO, QUOTATION_MARK, RATIO, REGISTERED_SIGN, REPLACEMENT_CHARACTER, REVERSE_SOLIDUS, REVERSED_ANGLE_WITH_UNDERBAR, REVERSED_DOUBLE_STROKE_NOT_SIGN, REVERSED_EMPTY_SET, REVERSED_NOT_SIGN, REVERSED_PRIME, REVERSED_SEMICOLON, REVERSED_TILDE, REVERSED_TILDE_EQUALS, RIGHT_ANGLE, RIGHT_ANGLE_BRACKET_WITH_DOT, RIGHT_ANGLE_VARIANT_WITH_SQUARE, RIGHT_ANGLE_WITH_ARC, RIGHT_ANGLE_WITH_DOWNWARDS_ZIGZAG_ARROW, RIGHT_ARC_GREATER_THAN_BRACKET, RIGHT_CEILING, RIGHT_CURLY_BRACKET, RIGHT_DOUBLE_ANGLE_BRACKET, RIGHT_DOUBLE_ARROW_WITH_ROUNDED_HEAD, RIGHT_DOUBLE_QUOTATION_MARK, RIGHT_FISH_TAIL, RIGHT_FLOOR, RIGHT_NORMAL_FACTOR_SEMIDIRECT_PRODUCT, RIGHT_PARENTHESIS, RIGHT_POINTING_ANGLE_BRACKET, RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK, RIGHT_SEMIDIRECT_PRODUCT, RIGHT_SIDE_ARC_CLOCKWISE_ARROW, RIGHT_SINGLE_QUOTATION_MARK, RIGHT_SQUARE_BRACKET, RIGHT_SQUARE_BRACKET_WITH_TICK_IN_BOTTOM_CORNER, RIGHT_SQUARE_BRACKET_WITH_TICK_IN_TOP_CORNER, RIGHT_SQUARE_BRACKET_WITH_UNDERBAR, RIGHT_TACK, RIGHT_TORTOISE_SHELL_BRACKET, RIGHT_TRIANGLE, RIGHT_TRIANGLE_ABOVE_LEFT_TRIANGLE, RIGHT_WHITE_PARENTHESIS, RIGHT_WHITE_SQUARE_BRACKET, RIGHT_WHITE_TORTOISE_SHELL_BRACKET, RIGHTWARDS_ARROW, RIGHTWARDS_ARROW_ABOVE_ALMOST_EQUAL_TO, RIGHTWARDS_ARROW_ABOVE_TILDE_OPERATOR, RIGHTWARDS_ARROW_FROM_BAR, RIGHTWARDS_ARROW_FROM_BAR_TO_BLACK_DIAMOND, RIGHTWARDS_ARROW_OVER_LEFTWARDS_ARROW, RIGHTWARDS_ARROW_TAIL, RIGHTWARDS_ARROW_TO_BAR, RIGHTWARDS_ARROW_TO_BLACK_DIAMOND, RIGHTWARDS_ARROW_WITH_DOTTED_STEM, RIGHTWARDS_ARROW_WITH_HOOK, RIGHTWARDS_ARROW_WITH_LOOP, RIGHTWARDS_ARROW_WITH_PLUS_BELOW, RIGHTWARDS_ARROW_WITH_STROKE, RIGHTWARDS_ARROW_WITH_TAIL, RIGHTWARDS_DOUBLE_ARROW, RIGHTWARDS_DOUBLE_ARROW_TAIL, RIGHTWARDS_DOUBLE_ARROW_WITH_STROKE, RIGHTWARDS_DOUBLE_ARROW_WITH_VERTICAL_STROKE, RIGHTWARDS_DOUBLE_DASH_ARROW, RIGHTWARDS_HARPOON_OVER_LEFTWARDS_HARPOON, RIGHTWARDS_HARPOON_WITH_BARB_DOWN_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_DOWN, RIGHTWARDS_HARPOON_WITH_BARB_DOWN_BELOW_LONG_DASH, RIGHTWARDS_HARPOON_WITH_BARB_DOWN_FROM_BAR, RIGHTWARDS_HARPOON_WITH_BARB_DOWN_TO_BAR, RIGHTWARDS_HARPOON_WITH_BARB_DOWNWARDS, RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LEFTWARDS_HARPOON_WITH_BARB_UP, RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_LONG_DASH, RIGHTWARDS_HARPOON_WITH_BARB_UP_ABOVE_RIGHTWARDS_HARPOON_WITH_BARB_DOWN, RIGHTWARDS_HARPOON_WITH_BARB_UP_FROM_BAR, RIGHTWARDS_HARPOON_WITH_BARB_UP_TO_BAR, RIGHTWARDS_HARPOON_WITH_BARB_UPWARDS, RIGHTWARDS_OPEN_HEADED_ARROW, RIGHTWARDS_PAIRED_ARROWS, RIGHTWARDS_SQUIGGLE_ARROW, RIGHTWARDS_TRIPLE_ARROW, RIGHTWARDS_TRIPLE_DASH_ARROW, RIGHTWARDS_TWO_HEADED_ARROW, RIGHTWARDS_TWO_HEADED_ARROW_FROM_BAR, RIGHTWARDS_TWO_HEADED_ARROW_WITH_TAIL, RIGHTWARDS_TWO_HEADED_TRIPLE_DASH_ARROW, RIGHTWARDS_WAVE_ARROW, RING_ABOVE, RING_EQUAL_TO, RING_IN_EQUAL_TO, RING_OPERATOR, RULE_DELAYED, SCRIPT_CAPITAL_B, SCRIPT_CAPITAL_E, SCRIPT_CAPITAL_F, SCRIPT_CAPITAL_H, SCRIPT_CAPITAL_I, SCRIPT_CAPITAL_L, SCRIPT_CAPITAL_M, SCRIPT_CAPITAL_P, SCRIPT_CAPITAL_R, SCRIPT_SMALL_E, SCRIPT_SMALL_G, SCRIPT_SMALL_L, SCRIPT_SMALL_O, SECTION_SIGN, SEGMENT, SEMICOLON, SET_MINUS, SHORT_DOWN_TACK_WITH_OVERBAR, SHORT_UP_TACK_ABOVE_SHORT_DOWN_TACK, SHORT_UP_TACK_WITH_UNDERBAR, SIMILAR_ABOVE_GREATER_THAN_ABOVE_EQUALS_SIGN, SIMILAR_ABOVE_LESS_THAN_ABOVE_EQUALS_SIGN, SIMILAR_OR_GREATER_THAN, SIMILAR_OR_LESS_THAN, SINE_WAVE, SINGLE_HIGH_REVERSED_9_QUOTATION_MARK, SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK, SINGLE_LOW_9_QUOTATION_MARK, SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK, SIX_POINTED_BLACK_STAR, SLANTED_EQUAL_TO_OR_GREATER_THAN, SLANTED_EQUAL_TO_OR_GREATER_THAN_WITH_DOT_INSIDE, SLANTED_EQUAL_TO_OR_LESS_THAN, SLANTED_EQUAL_TO_OR_LESS_THAN_WITH_DOT_INSIDE, SLOPING_LARGE_AND, SLOPING_LARGE_OR, SMALL_CONTAINS_WITH_OVERBAR, SMALL_CONTAINS_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, SMALL_ELEMENT_OF_WITH_OVERBAR, SMALL_ELEMENT_OF_WITH_VERTICAL_BAR_AT_END_OF_HORIZONTAL_STROKE, SMALL_TILDE, SMALLER_THAN, SMALLER_THAN_OR_EQUAL_TO, SMASH_PRODUCT, SMILE, SOFT_HYPHEN, SOLIDUS, SOLIDUS_WITH_OVERBAR, SOUND_RECORDING_COPYRIGHT, SOUTH_EAST_ARROW, SOUTH_EAST_ARROW_AND_SOUTH_WEST_ARROW, SOUTH_EAST_ARROW_WITH_HOOK, SOUTH_EAST_DOUBLE_ARROW, SOUTH_WEST_ARROW, SOUTH_WEST_ARROW_AND_NORTH_WEST_ARROW, SOUTH_WEST_ARROW_WITH_HOOK, SOUTH_WEST_DOUBLE_ARROW, SPACE, SPHERICAL_ANGLE, SQUARE_CAP, SQUARE_CUP, SQUARE_IMAGE_OF, SQUARE_IMAGE_OF_OR_EQUAL_TO, SQUARE_ORIGINAL_OF, SQUARE_ORIGINAL_OF_OR_EQUAL_TO, SQUARE_ROOT, SQUARED_DOT_OPERATOR, SQUARED_FALLING_DIAGONAL_SLASH, SQUARED_MINUS, SQUARED_PLUS, SQUARED_RISING_DIAGONAL_SLASH, SQUARED_TIMES, STAR_OPERATOR, SUBSET_ABOVE_RIGHTWARDS_ARROW, SUBSET_ABOVE_SUBSET, SUBSET_ABOVE_SUPERSET, SUBSET_OF, SUBSET_OF_ABOVE_EQUALS_SIGN, SUBSET_OF_ABOVE_NOT_EQUAL_TO, SUBSET_OF_ABOVE_TILDE_OPERATOR, SUBSET_OF_OR_EQUAL_TO, SUBSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE, SUBSET_OF_WITH_NOT_EQUAL_TO, SUBSET_WITH_DOT, SUBSET_WITH_MULTIPLICATION_SIGN_BELOW, SUBSET_WITH_PLUS_SIGN_BELOW, SUCCEEDS, SUCCEEDS_ABOVE_ALMOST_EQUAL_TO, SUCCEEDS_ABOVE_EQUALS_SIGN, SUCCEEDS_ABOVE_NOT_ALMOST_EQUAL_TO, SUCCEEDS_ABOVE_NOT_EQUAL_TO, SUCCEEDS_ABOVE_SINGLE_LINE_EQUALS_SIGN, SUCCEEDS_BUT_NOT_EQUIVALENT_TO, SUCCEEDS_OR_EQUAL_TO, SUCCEEDS_OR_EQUIVALENT_TO, SUPERSCRIPT_ONE, SUPERSCRIPT_THREE, SUPERSCRIPT_TWO, SUPERSET_ABOVE_LEFTWARDS_ARROW, SUPERSET_ABOVE_SUBSET, SUPERSET_ABOVE_SUPERSET, SUPERSET_BESIDE_AND_JOINED_BY_DASH_WITH_SUBSET, SUPERSET_BESIDE_SUBSET, SUPERSET_OF, SUPERSET_OF_ABOVE_EQUALS_SIGN, SUPERSET_OF_ABOVE_NOT_EQUAL_TO, SUPERSET_OF_ABOVE_TILDE_OPERATOR, SUPERSET_OF_OR_EQUAL_TO, SUPERSET_OF_OR_EQUAL_TO_WITH_DOT_ABOVE, SUPERSET_OF_WITH_NOT_EQUAL_TO, SUPERSET_WITH_DOT, SUPERSET_WITH_MULTIPLICATION_SIGN_BELOW, SUPERSET_WITH_PLUS_SIGN_BELOW, SURFACE_INTEGRAL, SWUNG_DASH, TELEPHONE_RECORDER, THERE_DOES_NOT_EXIST, THERE_EXISTS, THEREFORE, THIN_SPACE, THREE_PER_EM_SPACE, TIE_OVER_INFINITY, TILDE, TILDE_OPERATOR, TILDE_OPERATOR_ABOVE_RIGHTWARDS_ARROW, TILDE_OPERATOR_WITH_DOT_ABOVE, TOP_ARC_ANTICLOCKWISE_ARROW_WITH_PLUS, TOP_ARC_CLOCKWISE_ARROW_WITH_MINUS, TOP_LEFT_CORNER, TOP_LEFT_CROP, TOP_RIGHT_CORNER, TOP_RIGHT_CROP, TOP_SQUARE_BRACKET, TRADE_MARK_SIGN, TRANSVERSAL_INTERSECTION, TRIANGLE_WITH_SERIFS_AT_BOTTOM, TRIANGULAR_BULLET, TRIPLE_INTEGRAL, TRIPLE_PRIME, TRIPLE_TILDE, TRIPLE_VERTICAL_BAR_RIGHT_TURNSTILE, TRUE, TURNED_GREEK_SMALL_LETTER_IOTA, TWO_CONSECUTIVE_EQUALS_SIGNS, TWO_DOT_LEADER, TWO_INTERSECTING_LOGICAL_AND, TWO_INTERSECTING_LOGICAL_OR, TWO_JOINED_SQUARES, UNION, UNION_ABOVE_BAR_ABOVE_INTERSECTION, UNION_ABOVE_INTERSECTION, UNION_BESIDE_AND_JOINED_WITH_UNION, UNION_WITH_LOGICAL_OR, UNION_WITH_OVERBAR, UP_BARB_LEFT_DOWN_BARB_LEFT_HARPOON, UP_BARB_RIGHT_DOWN_BARB_RIGHT_HARPOON, UP_DOWN_ARROW, UP_DOWN_DOUBLE_ARROW, UP_FISH_TAIL, UP_RIGHT_DIAGONAL_ELLIPSIS, UP_TACK, UPPER_HALF_BLOCK, UPPER_LEFT_OR_LOWER_RIGHT_CURLY_BRACKET_SECTION, UPPER_LEFT_TRIANGLE, UPPER_RIGHT_OR_LOWER_LEFT_CURLY_BRACKET_SECTION, UPPER_RIGHT_TRIANGLE, UPWARDS_ARROW, UPWARDS_ARROW_FROM_BAR, UPWARDS_ARROW_LEFTWARDS_OF_DOWNWARDS_ARROW, UPWARDS_ARROW_TO_BAR, UPWARDS_ARROW_WITH_TIP_LEFTWARDS, UPWARDS_ARROW_WITH_TIP_RIGHTWARDS, UPWARDS_DOUBLE_ARROW, UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_DOWNWARDS_HARPOON_WITH_BARB_RIGHT, UPWARDS_HARPOON_WITH_BARB_LEFT_BESIDE_UPWARDS_HARPOON_WITH_BARB_RIGHT, UPWARDS_HARPOON_WITH_BARB_LEFT_FROM_BAR, UPWARDS_HARPOON_WITH_BARB_LEFT_TO_BAR, UPWARDS_HARPOON_WITH_BARB_LEFTWARDS, UPWARDS_HARPOON_WITH_BARB_RIGHT_FROM_BAR, UPWARDS_HARPOON_WITH_BARB_RIGHT_TO_BAR, UPWARDS_HARPOON_WITH_BARB_RIGHTWARDS, UPWARDS_PAIRED_ARROWS, UPWARDS_TWO_HEADED_ARROW, UPWARDS_TWO_HEADED_ARROW_FROM_SMALL_CIRCLE, VECTOR_OR_CROSS_PRODUCT, VERTICAL_BAR_BESIDE_RIGHT_TRIANGLE, VERTICAL_BAR_DOUBLE_LEFT_TURNSTILE, VERTICAL_ELLIPSIS, VERTICAL_LINE, VERTICAL_LINE_WITH_CIRCLE_ABOVE, VERTICAL_LINE_WITH_CIRCLE_BELOW, VERTICAL_ZIGZAG_LINE, VERY_MUCH_GREATER_THAN, VERY_MUCH_LESS_THAN, VOLUME_INTEGRAL, VULGAR_FRACTION_FIVE_EIGHTHS, VULGAR_FRACTION_FIVE_SIXTHS, VULGAR_FRACTION_FOUR_FIFTHS, VULGAR_FRACTION_ONE_EIGHTH, VULGAR_FRACTION_ONE_FIFTH, VULGAR_FRACTION_ONE_HALF, VULGAR_FRACTION_ONE_QUARTER, VULGAR_FRACTION_ONE_SIXTH, VULGAR_FRACTION_ONE_THIRD, VULGAR_FRACTION_SEVEN_EIGHTHS, VULGAR_FRACTION_THREE_EIGHTHS, VULGAR_FRACTION_THREE_FIFTHS, VULGAR_FRACTION_THREE_QUARTERS, VULGAR_FRACTION_TWO_FIFTHS, VULGAR_FRACTION_TWO_THIRDS, WAVE_ARROW_POINTING_DIRECTLY_RIGHT, WHITE_CIRCLE, WHITE_DOWN_POINTING_SMALL_TRIANGLE, WHITE_DOWN_POINTING_TRIANGLE, WHITE_LEFT_POINTING_SMALL_TRIANGLE, WHITE_MEDIUM_SQUARE, WHITE_PARALLELOGRAM, WHITE_RECTANGLE, WHITE_RIGHT_POINTING_SMALL_TRIANGLE, WHITE_SMALL_SQUARE, WHITE_SQUARE, WHITE_STAR, WHITE_UP_POINTING_SMALL_TRIANGLE, WHITE_UP_POINTING_TRIANGLE, WHITE_UP_POINTING_TRIANGLE_WITH_DOT, WORD_JOINER, WREATH_PRODUCT, XOR, YEN_SIGN, ZERO_WIDTH_SPACE
Constructor and Description |
---|
UnicodeUtilImpl()
The constructor.
|
Modifier and Type | Method and Description |
---|---|
protected void |
doInitialized()
This method is invoked at the end of the actual
initialization . |
static UnicodeUtil |
getInstance()
This method gets the singleton instance of this
UnicodeUtilImpl . |
private static void |
initIso843(Map<Character,String> map)
Implementation of ISO 843 (Greek transliteration).
|
private static void |
initIso9(Map<Character,String> map)
Implementation of ISO 9 (cyrillic transliteration).
|
boolean |
isDash(char character)
This method determines if the given character is a dash.
|
boolean |
isHyphen(char character)
This method determines if the given character is a hyphen.
|
boolean |
isMinus(char character)
This method determines if the given character is a minus-sign.
|
String |
normalize2Ascii(char character)
This method determines an ASCII-representation for the given character if available.
|
String |
normalize2Ascii(char character,
char nonNormalizableCharaterReplacement) |
String |
normalize2Ascii(CharSequence text)
This method converts the given
text to the best possible ASCII-representation. |
String |
normalize2Ascii(CharSequence text,
char nonNormalizableCharaterReplacement)
This method converts the given
text to the best possible ASCII-representation. |
String |
transliterate(char character)
This method gets the transliteration of the given
character . |
String |
transliterate(String text)
This method gets the transliteration of the given
text . |
createLogger, doInitialize, getLogger
getInitializationState, initialize
private static UnicodeUtil instance
private static void initIso9(Map<Character,String> map)
map
- is where to add the transliteration mapping.private static void initIso843(Map<Character,String> map)
map
- is where to add the transliteration mapping.protected void doInitialized()
AbstractComponent
initialization
. It is called when
AbstractComponent.initialize()
is invoked for the first time after AbstractComponent.doInitialize()
is completed and
initialization-state
has changed to initialized
. super.
AbstractComponent.doInitialized()
.doInitialized
in class AbstractComponent
public String normalize2Ascii(char character)
UnicodeUtil
normalize2Ascii
in interface UnicodeUtil
character
- is the character to convert.null
if the character is
already ASCII or there is no ASCII-representation available.Normalizer
public String normalize2Ascii(char character, char nonNormalizableCharaterReplacement)
character
- is the character to convert.nonNormalizableCharaterReplacement
- is the character used to replace unicode characters that have no
corresponding ASCII representation
. Use UnicodeUtil.NULL
to remove these
characters. A typical character to use is ?
.null
if the character is
already ASCII or there is no ASCII-representation available.normalize2Ascii(char)
public String transliterate(char character)
UnicodeUtil
character
.transliterate
in interface UnicodeUtil
character
- is the character to convert.null
if no such
transliteration is available.Normalizer
,
UnicodeUtil.transliterate(String)
public String transliterate(String text)
UnicodeUtil
text
. This method will support common transliteration standards such as ISO 843 (greek), ISO 9 (cyrillic),
etc. However, for some writing systems there are multiple ways of transliteration and some things done by this
method may not officially be called transliteration. So please consider it as a pragmatic way to convert text to
the Latin alphabet. We are looking for help in supporting additional writing systems but not for scientific
discussion about linguistic.transliterate
in interface UnicodeUtil
text
- is the String
to convert.Normalizer
public String normalize2Ascii(CharSequence text)
UnicodeUtil
text
to the best possible ASCII-representation. All ASCII-characters will
remain unchanged. All other characters are normalized to ASCII
.normalize2Ascii
in interface UnicodeUtil
text
- is the text to convert.UnicodeUtil.normalize2Ascii(CharSequence, char)
,
Normalizer
public String normalize2Ascii(CharSequence text, char nonNormalizableCharaterReplacement)
UnicodeUtil
text
to the best possible ASCII-representation. All ASCII-characters will
remain unchanged. All other characters are normalized to ASCII
and if not possible
replaced by nonNormalizableCharaterReplacement
.normalize2Ascii
in interface UnicodeUtil
text
- is the text to convert.nonNormalizableCharaterReplacement
- is the character used to replace unicode characters that have no
corresponding ASCII representation
. Use UnicodeUtil.NULL
to remove these
characters. A typical character to use is ?
.Normalizer
public boolean isDash(char character)
UnicodeUtil
isDash
in interface UnicodeUtil
character
- is the character to check.true
if character
is a dash.UnicodeUtil.HYPHEN_MINUS
,
UnicodeUtil.EN_DASH
,
UnicodeUtil.EM_DASH
,
UnicodeUtil.FIGURE_DASH
,
UnicodeUtil.HORIZONTAL_BAR
,
UnicodeUtil.SWUNG_DASH
public boolean isHyphen(char character)
UnicodeUtil
isHyphen
in interface UnicodeUtil
character
- is the character to check.true
if character
is a hyphen.UnicodeUtil.HYPHEN_MINUS
,
UnicodeUtil.HYPHEN
,
UnicodeUtil.HYPHEN_BULLET
,
UnicodeUtil.HYPHENATION_POINT
public boolean isMinus(char character)
UnicodeUtil
isMinus
in interface UnicodeUtil
character
- is the character to check.true
if character
is a minus sign.UnicodeUtil.HYPHEN_MINUS
,
UnicodeUtil.MINUS_SIGN
public static UnicodeUtil getInstance()
UnicodeUtilImpl
. Cdi.GET_INSTANCE
before using.Copyright © 2001–2016 mmm-Team. All rights reserved.