uchar.h source code [include/x86_64-linux-gnu/unicode/uchar.h]

1	/*
2	**********************************************************************
3	* Copyright (C) 1997-2014, International Business Machines
4	* Corporation and others. All Rights Reserved.
5	**********************************************************************
6	*
7	* File UCHAR.H
8	*
9	* Modification History:
10	*
11	* Date Name Description
12	* 04/02/97 aliu Creation.
13	* 03/29/99 helena Updated for C APIs.
14	* 4/15/99 Madhu Updated for C Implementation and Javadoc
15	* 5/20/99 Madhu Added the function u_getVersion()
16	* 8/19/1999 srl Upgraded scripts to Unicode 3.0
17	* 8/27/1999 schererm UCharDirection constants: U_...
18	* 11/11/1999 weiv added u_isalnum(), cleaned comments
19	* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion().
20	******************************************************************************
21	*/
22
23	#ifndef UCHAR_H
24	#define UCHAR_H
25
26	#include "unicode/utypes.h"
27
28	U_CDECL_BEGIN
29
30	/==========================================================================/
31	/ Unicode version number /
32	/==========================================================================/
33	/**
34	* Unicode version number, default for the current ICU version.
35	* The actual Unicode Character Database (UCD) data is stored in uprops.dat
36	* and may be generated from UCD files from a different Unicode version.
37	* Call u_getUnicodeVersion to get the actual Unicode version of the data.
38	*
39	* @see u_getUnicodeVersion
40	* @stable ICU 2.0
41	*/
42	#define U_UNICODE_VERSION "7.0"
43
44	/**
45	* \file
46	* \brief C API: Unicode Properties
47	*
48	* This C API provides low-level access to the Unicode Character Database.
49	* In addition to raw property values, some convenience functions calculate
50	* derived properties, for example for Java-style programming.
51	*
52	* Unicode assigns each code point (not just assigned character) values for
53	* many properties.
54	* Most of them are simple boolean flags, or constants from a small enumerated list.
55	* For some properties, values are strings or other relatively more complex types.
56	*
57	* For more information see
58	* "About the Unicode Character Database" (http://www.unicode.org/ucd/)
59	* and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html).
60	*
61	* Many functions are designed to match java.lang.Character functions.
62	* See the individual function documentation,
63	* and see the JDK 1.4 java.lang.Character documentation
64	* at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html
65	*
66	* There are also functions that provide easy migration from C/POSIX functions
67	* like isblank(). Their use is generally discouraged because the C/POSIX
68	* standards do not define their semantics beyond the ASCII range, which means
69	* that different implementations exhibit very different behavior.
70	* Instead, Unicode properties should be used directly.
71	*
72	* There are also only a few, broad C/POSIX character classes, and they tend
73	* to be used for conflicting purposes. For example, the "isalpha()" class
74	* is sometimes used to determine word boundaries, while a more sophisticated
75	* approach would at least distinguish initial letters from continuation
76	* characters (the latter including combining marks).
77	* (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
78	* Another example: There is no "istitle()" class for titlecase characters.
79	*
80	* ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
81	* ICU implements them according to the Standard Recommendations in
82	* Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
83	* (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
84	*
85	* API access for C/POSIX character classes is as follows:
86	* - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC)
87	* - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE)
88	* - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE)
89	* - punct: u_ispunct(c)
90	* - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER
91	* - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT)
92	* - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM)
93	* - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)
94	* - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK)
95	* - cntrl: u_charType(c)==U_CONTROL_CHAR
96	* - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH)
97	* - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT)
98	*
99	* Note: Some of the u_isxyz() functions in uchar.h predate, and do not match,
100	* the Standard Recommendations in UTS #18. Instead, they match Java
101	* functions according to their API documentation.
102	*
103	* \htmlonly
104	* The C/POSIX character classes are also available in UnicodeSet patterns,
105	* using patterns like [:graph:] or \p{graph}.
106	* \endhtmlonly
107	*
108	* Note: There are several ICU whitespace functions.
109	* Comparison:
110	* - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
111	* most of general categories "Z" (separators) + most whitespace ISO controls
112	* (including no-break spaces, but excluding IS1..IS4 and ZWSP)
113	* - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
114	* - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
115	* - u_isspace: Z + whitespace ISO controls (including no-break spaces)
116	* - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP
117	*/
118
119	/**
120	* Constants.
121	*/
122
123	/* The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 /
124	#define UCHAR_MIN_VALUE 0
125
126	/**
127	* The highest Unicode code point value (scalar value) according to
128	* The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up).
129	* For a single character, UChar32 is a simple type that can hold any code point value.
130	*
131	* @see UChar32
132	* @stable ICU 2.0
133	*/
134	#define UCHAR_MAX_VALUE 0x10ffff
135
136	/**
137	* Get a single-bit bit set (a flag) from a bit number 0..31.
138	* @stable ICU 2.1
139	*/
140	#define U_MASK(x) ((uint32_t)1<<(x))
141
142	/**
143	* Selection constants for Unicode properties.
144	* These constants are used in functions like u_hasBinaryProperty to select
145	* one of the Unicode properties.
146	*
147	* The properties APIs are intended to reflect Unicode properties as defined
148	* in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
149	* For details about the properties see http://www.unicode.org/ucd/ .
150	* For names of Unicode properties see the UCD file PropertyAliases.txt.
151	*
152	* Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
153	* then properties marked with "new in Unicode 3.2" are not or not fully available.
154	* Check u_getUnicodeVersion to be sure.
155	*
156	* @see u_hasBinaryProperty
157	* @see u_getIntPropertyValue
158	* @see u_getUnicodeVersion
159	* @stable ICU 2.1
160	*/
161	typedef enum UProperty {
162	/*
163	* Note: UProperty constants are parsed by preparseucd.py.
164	* It matches lines like
165	* UCHAR_<Unicode property name>=<integer>,
166	*/
167
168	/ Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that*
169	debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
170	rather than UCHAR_BINARY_START. Likewise for other _START*
171	identifiers. /*
172
173	/* Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha.*
174	Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 /*
175	UCHAR_ALPHABETIC=`0`,
176	/* First constant for binary Unicode properties. @stable ICU 2.1 /
177	UCHAR_BINARY_START=UCHAR_ALPHABETIC,
178	/* Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 /
179	UCHAR_ASCII_HEX_DIGIT=`1`,
180	/* Binary property Bidi_Control.*
181	Format controls which have specific functions
182	in the Bidi Algorithm. @stable ICU 2.1 /*
183	UCHAR_BIDI_CONTROL=`2`,
184	/* Binary property Bidi_Mirrored.*
185	Characters that may change display in RTL text.
186	Same as u_isMirrored.
187	See Bidi Algorithm, UTR 9. @stable ICU 2.1 /*
188	UCHAR_BIDI_MIRRORED=`3`,
189	/* Binary property Dash. Variations of dashes. @stable ICU 2.1 /
190	UCHAR_DASH=`4`,
191	/* Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).*
192	Ignorable in most processing.
193	<2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 /*
194	UCHAR_DEFAULT_IGNORABLE_CODE_POINT=`5`,
195	/* Binary property Deprecated (new in Unicode 3.2).*
196	The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 /*
197	UCHAR_DEPRECATED=`6`,
198	/* Binary property Diacritic. Characters that linguistically modify*
199	the meaning of another character to which they apply. @stable ICU 2.1 /*
200	UCHAR_DIACRITIC=`7`,
201	/* Binary property Extender.*
202	Extend the value or shape of a preceding alphabetic character,
203	e.g., length and iteration marks. @stable ICU 2.1 /*
204	UCHAR_EXTENDER=`8`,
205	/* Binary property Full_Composition_Exclusion.*
206	CompositionExclusions.txt+Singleton Decompositions+
207	Non-Starter Decompositions. @stable ICU 2.1 /*
208	UCHAR_FULL_COMPOSITION_EXCLUSION=`9`,
209	/* Binary property Grapheme_Base (new in Unicode 3.2).*
210	For programmatic determination of grapheme cluster boundaries.
211	[0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 /*
212	UCHAR_GRAPHEME_BASE=`10`,
213	/* Binary property Grapheme_Extend (new in Unicode 3.2).*
214	For programmatic determination of grapheme cluster boundaries.
215	Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 /*
216	UCHAR_GRAPHEME_EXTEND=`11`,
217	/* Binary property Grapheme_Link (new in Unicode 3.2).*
218	For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 /*
219	UCHAR_GRAPHEME_LINK=`12`,
220	/* Binary property Hex_Digit.*
221	Characters commonly used for hexadecimal numbers. @stable ICU 2.1 /*
222	UCHAR_HEX_DIGIT=`13`,
223	/* Binary property Hyphen. Dashes used to mark connections*
224	between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 /*
225	UCHAR_HYPHEN=`14`,
226	/* Binary property ID_Continue.*
227	Characters that can continue an identifier.
228	DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out."
229	ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 /*
230	UCHAR_ID_CONTINUE=`15`,
231	/* Binary property ID_Start.*
232	Characters that can start an identifier.
233	Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 /*
234	UCHAR_ID_START=`16`,
235	/* Binary property Ideographic.*
236	CJKV ideographs. @stable ICU 2.1 /*
237	UCHAR_IDEOGRAPHIC=`17`,
238	/* Binary property IDS_Binary_Operator (new in Unicode 3.2).*
239	For programmatic determination of
240	Ideographic Description Sequences. @stable ICU 2.1 /*
241	UCHAR_IDS_BINARY_OPERATOR=`18`,
242	/* Binary property IDS_Trinary_Operator (new in Unicode 3.2).*
243	For programmatic determination of
244	Ideographic Description Sequences. @stable ICU 2.1 /*
245	UCHAR_IDS_TRINARY_OPERATOR=`19`,
246	/* Binary property Join_Control.*
247	Format controls for cursive joining and ligation. @stable ICU 2.1 /*
248	UCHAR_JOIN_CONTROL=`20`,
249	/* Binary property Logical_Order_Exception (new in Unicode 3.2).*
250	Characters that do not use logical order and
251	require special handling in most processing. @stable ICU 2.1 /*
252	UCHAR_LOGICAL_ORDER_EXCEPTION=`21`,
253	/* Binary property Lowercase. Same as u_isULowercase, different from u_islower.*
254	Ll+Other_Lowercase @stable ICU 2.1 /*
255	UCHAR_LOWERCASE=`22`,
256	/* Binary property Math. Sm+Other_Math @stable ICU 2.1 /
257	UCHAR_MATH=`23`,
258	/* Binary property Noncharacter_Code_Point.*
259	Code points that are explicitly defined as illegal
260	for the encoding of characters. @stable ICU 2.1 /*
261	UCHAR_NONCHARACTER_CODE_POINT=`24`,
262	/* Binary property Quotation_Mark. @stable ICU 2.1 /
263	UCHAR_QUOTATION_MARK=`25`,
264	/* Binary property Radical (new in Unicode 3.2).*
265	For programmatic determination of
266	Ideographic Description Sequences. @stable ICU 2.1 /*
267	UCHAR_RADICAL=`26`,
268	/* Binary property Soft_Dotted (new in Unicode 3.2).*
269	Characters with a "soft dot", like i or j.
270	An accent placed on these characters causes
271	the dot to disappear. @stable ICU 2.1 /*
272	UCHAR_SOFT_DOTTED=`27`,
273	/* Binary property Terminal_Punctuation.*
274	Punctuation characters that generally mark
275	the end of textual units. @stable ICU 2.1 /*
276	UCHAR_TERMINAL_PUNCTUATION=`28`,
277	/* Binary property Unified_Ideograph (new in Unicode 3.2).*
278	For programmatic determination of
279	Ideographic Description Sequences. @stable ICU 2.1 /*
280	UCHAR_UNIFIED_IDEOGRAPH=`29`,
281	/* Binary property Uppercase. Same as u_isUUppercase, different from u_isupper.*
282	Lu+Other_Uppercase @stable ICU 2.1 /*
283	UCHAR_UPPERCASE=`30`,
284	/* Binary property White_Space.*
285	Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace.
286	Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 /*
287	UCHAR_WHITE_SPACE=`31`,
288	/* Binary property XID_Continue.*
289	ID_Continue modified to allow closure under
290	normalization forms NFKC and NFKD. @stable ICU 2.1 /*
291	UCHAR_XID_CONTINUE=`32`,
292	/* Binary property XID_Start. ID_Start modified to allow*
293	closure under normalization forms NFKC and NFKD. @stable ICU 2.1 /*
294	UCHAR_XID_START=`33`,
295	/* Binary property Case_Sensitive. Either the source of a case*
296	mapping or _in_ the target of a case mapping. Not the same as
297	the general category Cased_Letter. @stable ICU 2.6 /*
298	UCHAR_CASE_SENSITIVE=`34`,
299	/* Binary property STerm (new in Unicode 4.0.1).*
300	Sentence Terminal. Used in UAX #29: Text Boundaries
301	(http://www.unicode.org/reports/tr29/)
302	@stable ICU 3.0 /*
303	UCHAR_S_TERM=`35`,
304	/* Binary property Variation_Selector (new in Unicode 4.0.1).*
305	Indicates all those characters that qualify as Variation Selectors.
306	For details on the behavior of these characters,
307	see StandardizedVariants.html and 15.6 Variation Selectors.
308	@stable ICU 3.0 /*
309	UCHAR_VARIATION_SELECTOR=`36`,
310	/* Binary property NFD_Inert.*
311	ICU-specific property for characters that are inert under NFD,
312	i.e., they do not interact with adjacent characters.
313	See the documentation for the Normalizer2 class and the
314	Normalizer2::isInert() method.
315	@stable ICU 3.0 /*
316	UCHAR_NFD_INERT=`37`,
317	/* Binary property NFKD_Inert.*
318	ICU-specific property for characters that are inert under NFKD,
319	i.e., they do not interact with adjacent characters.
320	See the documentation for the Normalizer2 class and the
321	Normalizer2::isInert() method.
322	@stable ICU 3.0 /*
323	UCHAR_NFKD_INERT=`38`,
324	/* Binary property NFC_Inert.*
325	ICU-specific property for characters that are inert under NFC,
326	i.e., they do not interact with adjacent characters.
327	See the documentation for the Normalizer2 class and the
328	Normalizer2::isInert() method.
329	@stable ICU 3.0 /*
330	UCHAR_NFC_INERT=`39`,
331	/* Binary property NFKC_Inert.*
332	ICU-specific property for characters that are inert under NFKC,
333	i.e., they do not interact with adjacent characters.
334	See the documentation for the Normalizer2 class and the
335	Normalizer2::isInert() method.
336	@stable ICU 3.0 /*
337	UCHAR_NFKC_INERT=`40`,
338	/* Binary Property Segment_Starter.*
339	ICU-specific property for characters that are starters in terms of
340	Unicode normalization and combining character sequences.
341	They have ccc=0 and do not occur in non-initial position of the
342	canonical decomposition of any character
343	(like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)).
344	ICU uses this property for segmenting a string for generating a set of
345	canonically equivalent strings, e.g. for canonical closure while
346	processing collation tailoring rules.
347	@stable ICU 3.0 /*
348	UCHAR_SEGMENT_STARTER=`41`,
349	/* Binary property Pattern_Syntax (new in Unicode 4.1).*
350	See UAX #31 Identifier and Pattern Syntax
351	(http://www.unicode.org/reports/tr31/)
352	@stable ICU 3.4 /*
353	UCHAR_PATTERN_SYNTAX=`42`,
354	/* Binary property Pattern_White_Space (new in Unicode 4.1).*
355	See UAX #31 Identifier and Pattern Syntax
356	(http://www.unicode.org/reports/tr31/)
357	@stable ICU 3.4 /*
358	UCHAR_PATTERN_WHITE_SPACE=`43`,
359	/* Binary property alnum (a C/POSIX character class).*
360	Implemented according to the UTS #18 Annex C Standard Recommendation.
361	See the uchar.h file documentation.
362	@stable ICU 3.4 /*
363	UCHAR_POSIX_ALNUM=`44`,
364	/* Binary property blank (a C/POSIX character class).*
365	Implemented according to the UTS #18 Annex C Standard Recommendation.
366	See the uchar.h file documentation.
367	@stable ICU 3.4 /*
368	UCHAR_POSIX_BLANK=`45`,
369	/* Binary property graph (a C/POSIX character class).*
370	Implemented according to the UTS #18 Annex C Standard Recommendation.
371	See the uchar.h file documentation.
372	@stable ICU 3.4 /*
373	UCHAR_POSIX_GRAPH=`46`,
374	/* Binary property print (a C/POSIX character class).*
375	Implemented according to the UTS #18 Annex C Standard Recommendation.
376	See the uchar.h file documentation.
377	@stable ICU 3.4 /*
378	UCHAR_POSIX_PRINT=`47`,
379	/* Binary property xdigit (a C/POSIX character class).*
380	Implemented according to the UTS #18 Annex C Standard Recommendation.
381	See the uchar.h file documentation.
382	@stable ICU 3.4 /*
383	UCHAR_POSIX_XDIGIT=`48`,
384	/* Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 /
385	UCHAR_CASED=`49`,
386	/* Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 /
387	UCHAR_CASE_IGNORABLE=`50`,
388	/* Binary property Changes_When_Lowercased. @stable ICU 4.4 /
389	UCHAR_CHANGES_WHEN_LOWERCASED=`51`,
390	/* Binary property Changes_When_Uppercased. @stable ICU 4.4 /
391	UCHAR_CHANGES_WHEN_UPPERCASED=`52`,
392	/* Binary property Changes_When_Titlecased. @stable ICU 4.4 /
393	UCHAR_CHANGES_WHEN_TITLECASED=`53`,
394	/* Binary property Changes_When_Casefolded. @stable ICU 4.4 /
395	UCHAR_CHANGES_WHEN_CASEFOLDED=`54`,
396	/* Binary property Changes_When_Casemapped. @stable ICU 4.4 /
397	UCHAR_CHANGES_WHEN_CASEMAPPED=`55`,
398	/* Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 /
399	UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=`56`,
400	/* One more than the last constant for binary Unicode properties. @stable ICU 2.1 /
401	UCHAR_BINARY_LIMIT=`57`,
402
403	/* Enumerated property Bidi_Class.*
404	Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 /*
405	UCHAR_BIDI_CLASS=`0x1000`,
406	/* First constant for enumerated/integer Unicode properties. @stable ICU 2.2 /
407	UCHAR_INT_START=UCHAR_BIDI_CLASS,
408	/* Enumerated property Block.*
409	Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 /*
410	UCHAR_BLOCK=`0x1001`,
411	/* Enumerated property Canonical_Combining_Class.*
412	Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 /*
413	UCHAR_CANONICAL_COMBINING_CLASS=`0x1002`,
414	/* Enumerated property Decomposition_Type.*
415	Returns UDecompositionType values. @stable ICU 2.2 /*
416	UCHAR_DECOMPOSITION_TYPE=`0x1003`,
417	/* Enumerated property East_Asian_Width.*
418	See http://www.unicode.org/reports/tr11/
419	Returns UEastAsianWidth values. @stable ICU 2.2 /*
420	UCHAR_EAST_ASIAN_WIDTH=`0x1004`,
421	/* Enumerated property General_Category.*
422	Same as u_charType, returns UCharCategory values. @stable ICU 2.2 /*
423	UCHAR_GENERAL_CATEGORY=`0x1005`,
424	/* Enumerated property Joining_Group.*
425	Returns UJoiningGroup values. @stable ICU 2.2 /*
426	UCHAR_JOINING_GROUP=`0x1006`,
427	/* Enumerated property Joining_Type.*
428	Returns UJoiningType values. @stable ICU 2.2 /*
429	UCHAR_JOINING_TYPE=`0x1007`,
430	/* Enumerated property Line_Break.*
431	Returns ULineBreak values. @stable ICU 2.2 /*
432	UCHAR_LINE_BREAK=`0x1008`,
433	/* Enumerated property Numeric_Type.*
434	Returns UNumericType values. @stable ICU 2.2 /*
435	UCHAR_NUMERIC_TYPE=`0x1009`,
436	/* Enumerated property Script.*
437	Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 /*
438	UCHAR_SCRIPT=`0x100A`,
439	/* Enumerated property Hangul_Syllable_Type, new in Unicode 4.*
440	Returns UHangulSyllableType values. @stable ICU 2.6 /*
441	UCHAR_HANGUL_SYLLABLE_TYPE=`0x100B`,
442	/* Enumerated property NFD_Quick_Check.*
443	Returns UNormalizationCheckResult values. @stable ICU 3.0 /*
444	UCHAR_NFD_QUICK_CHECK=`0x100C`,
445	/* Enumerated property NFKD_Quick_Check.*
446	Returns UNormalizationCheckResult values. @stable ICU 3.0 /*
447	UCHAR_NFKD_QUICK_CHECK=`0x100D`,
448	/* Enumerated property NFC_Quick_Check.*
449	Returns UNormalizationCheckResult values. @stable ICU 3.0 /*
450	UCHAR_NFC_QUICK_CHECK=`0x100E`,
451	/* Enumerated property NFKC_Quick_Check.*
452	Returns UNormalizationCheckResult values. @stable ICU 3.0 /*
453	UCHAR_NFKC_QUICK_CHECK=`0x100F`,
454	/* Enumerated property Lead_Canonical_Combining_Class.*
455	ICU-specific property for the ccc of the first code point
456	of the decomposition, or lccc(c)=ccc(NFD(c)[0]).
457	Useful for checking for canonically ordered text;
458	see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
459	Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 /*
460	UCHAR_LEAD_CANONICAL_COMBINING_CLASS=`0x1010`,
461	/* Enumerated property Trail_Canonical_Combining_Class.*
462	ICU-specific property for the ccc of the last code point
463	of the decomposition, or tccc(c)=ccc(NFD(c)[last]).
464	Useful for checking for canonically ordered text;
465	see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
466	Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 /*
467	UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=`0x1011`,
468	/* Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).*
469	Used in UAX #29: Text Boundaries
470	(http://www.unicode.org/reports/tr29/)
471	Returns UGraphemeClusterBreak values. @stable ICU 3.4 /*
472	UCHAR_GRAPHEME_CLUSTER_BREAK=`0x1012`,
473	/* Enumerated property Sentence_Break (new in Unicode 4.1).*
474	Used in UAX #29: Text Boundaries
475	(http://www.unicode.org/reports/tr29/)
476	Returns USentenceBreak values. @stable ICU 3.4 /*
477	UCHAR_SENTENCE_BREAK=`0x1013`,
478	/* Enumerated property Word_Break (new in Unicode 4.1).*
479	Used in UAX #29: Text Boundaries
480	(http://www.unicode.org/reports/tr29/)
481	Returns UWordBreakValues values. @stable ICU 3.4 /*
482	UCHAR_WORD_BREAK=`0x1014`,
483	/* Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).*
484	Used in UAX #9: Unicode Bidirectional Algorithm
485	(http://www.unicode.org/reports/tr9/)
486	Returns UBidiPairedBracketType values. @stable ICU 52 /*
487	UCHAR_BIDI_PAIRED_BRACKET_TYPE=`0x1015`,
488	/* One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 /
489	UCHAR_INT_LIMIT=`0x1016`,
490
491	/* Bitmask property General_Category_Mask.*
492	This is the General_Category property returned as a bit mask.
493	When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)),
494	returns bit masks for UCharCategory values where exactly one bit is set.
495	When used with u_getPropertyValueName() and u_getPropertyValueEnum(),
496	a multi-bit mask is used for sets of categories like "Letters".
497	Mask values should be cast to uint32_t.
498	@stable ICU 2.4 /*
499	UCHAR_GENERAL_CATEGORY_MASK=`0x2000`,
500	/* First constant for bit-mask Unicode properties. @stable ICU 2.4 /
501	UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK,
502	/* One more than the last constant for bit-mask Unicode properties. @stable ICU 2.4 /
503	UCHAR_MASK_LIMIT=`0x2001`,
504
505	/* Double property Numeric_Value.*
506	Corresponds to u_getNumericValue. @stable ICU 2.4 /*
507	UCHAR_NUMERIC_VALUE=`0x3000`,
508	/* First constant for double Unicode properties. @stable ICU 2.4 /
509	UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE,
510	/* One more than the last constant for double Unicode properties. @stable ICU 2.4 /
511	UCHAR_DOUBLE_LIMIT=`0x3001`,
512
513	/* String property Age.*
514	Corresponds to u_charAge. @stable ICU 2.4 /*
515	UCHAR_AGE=`0x4000`,
516	/* First constant for string Unicode properties. @stable ICU 2.4 /
517	UCHAR_STRING_START=UCHAR_AGE,
518	/* String property Bidi_Mirroring_Glyph.*
519	Corresponds to u_charMirror. @stable ICU 2.4 /*
520	UCHAR_BIDI_MIRRORING_GLYPH=`0x4001`,
521	/* String property Case_Folding.*
522	Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 /*
523	UCHAR_CASE_FOLDING=`0x4002`,
524	#ifndef U_HIDE_DEPRECATED_API
525	/* Deprecated string property ISO_Comment.*
526	Corresponds to u_getISOComment. @deprecated ICU 49 /*
527	UCHAR_ISO_COMMENT=`0x4003`,
528	#endif /* U_HIDE_DEPRECATED_API */
529	/* String property Lowercase_Mapping.*
530	Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 /*
531	UCHAR_LOWERCASE_MAPPING=`0x4004`,
532	/* String property Name.*
533	Corresponds to u_charName. @stable ICU 2.4 /*
534	UCHAR_NAME=`0x4005`,
535	/* String property Simple_Case_Folding.*
536	Corresponds to u_foldCase. @stable ICU 2.4 /*
537	UCHAR_SIMPLE_CASE_FOLDING=`0x4006`,
538	/* String property Simple_Lowercase_Mapping.*
539	Corresponds to u_tolower. @stable ICU 2.4 /*
540	UCHAR_SIMPLE_LOWERCASE_MAPPING=`0x4007`,
541	/* String property Simple_Titlecase_Mapping.*
542	Corresponds to u_totitle. @stable ICU 2.4 /*
543	UCHAR_SIMPLE_TITLECASE_MAPPING=`0x4008`,
544	/* String property Simple_Uppercase_Mapping.*
545	Corresponds to u_toupper. @stable ICU 2.4 /*
546	UCHAR_SIMPLE_UPPERCASE_MAPPING=`0x4009`,
547	/* String property Titlecase_Mapping.*
548	Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 /*
549	UCHAR_TITLECASE_MAPPING=`0x400A`,
550	#ifndef U_HIDE_DEPRECATED_API
551	/* String property Unicode_1_Name.*
552	This property is of little practical value.
553	Beginning with ICU 49, ICU APIs return an empty string for this property.
554	Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 /*
555	UCHAR_UNICODE_1_NAME=`0x400B`,
556	#endif /* U_HIDE_DEPRECATED_API */
557	/* String property Uppercase_Mapping.*
558	Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 /*
559	UCHAR_UPPERCASE_MAPPING=`0x400C`,
560	/* String property Bidi_Paired_Bracket (new in Unicode 6.3).*
561	Corresponds to u_getBidiPairedBracket. @stable ICU 52 /*
562	UCHAR_BIDI_PAIRED_BRACKET=`0x400D`,
563	/* One more than the last constant for string Unicode properties. @stable ICU 2.4 /
564	UCHAR_STRING_LIMIT=`0x400E`,
565
566	/* Miscellaneous property Script_Extensions (new in Unicode 6.0).*
567	Some characters are commonly used in multiple scripts.
568	For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
569	Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.
570	@stable ICU 4.6 /*
571	UCHAR_SCRIPT_EXTENSIONS=`0x7000`,
572	/* First constant for Unicode properties with unusual value types. @stable ICU 4.6 /
573	UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS,
574	/* One more than the last constant for Unicode properties with unusual value types.*
575	* @stable ICU 4.6 */
576	UCHAR_OTHER_PROPERTY_LIMIT=`0x7001`,
577	/* Represents a nonexistent or invalid property or property value. @stable ICU 2.4 /
578	UCHAR_INVALID_CODE = -`1`
579	} UProperty;
580
581	/**
582	* Data for enumerated Unicode general category types.
583	* See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .
584	* @stable ICU 2.0
585	*/
586	typedef enum UCharCategory
587	{
588	/*
589	* Note: UCharCategory constants and their API comments are parsed by preparseucd.py.
590	* It matches pairs of lines like
591	* / ** <Unicode 2-letter General_Category value> comment... * /
592	* U_<[A-Z_]+> = <integer>,
593	*/
594
595	/* Non-category for unassigned and non-character code points. @stable ICU 2.0 /
596	U_UNASSIGNED = `0`,
597	/* Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 /
598	U_GENERAL_OTHER_TYPES = `0`,
599	/* Lu @stable ICU 2.0 /
600	U_UPPERCASE_LETTER = `1`,
601	/* Ll @stable ICU 2.0 /
602	U_LOWERCASE_LETTER = `2`,
603	/* Lt @stable ICU 2.0 /
604	U_TITLECASE_LETTER = `3`,
605	/* Lm @stable ICU 2.0 /
606	U_MODIFIER_LETTER = `4`,
607	/* Lo @stable ICU 2.0 /
608	U_OTHER_LETTER = `5`,
609	/* Mn @stable ICU 2.0 /
610	U_NON_SPACING_MARK = `6`,
611	/* Me @stable ICU 2.0 /
612	U_ENCLOSING_MARK = `7`,
613	/* Mc @stable ICU 2.0 /
614	U_COMBINING_SPACING_MARK = `8`,
615	/* Nd @stable ICU 2.0 /
616	U_DECIMAL_DIGIT_NUMBER = `9`,
617	/* Nl @stable ICU 2.0 /
618	U_LETTER_NUMBER = `10`,
619	/* No @stable ICU 2.0 /
620	U_OTHER_NUMBER = `11`,
621	/* Zs @stable ICU 2.0 /
622	U_SPACE_SEPARATOR = `12`,
623	/* Zl @stable ICU 2.0 /
624	U_LINE_SEPARATOR = `13`,
625	/* Zp @stable ICU 2.0 /
626	U_PARAGRAPH_SEPARATOR = `14`,
627	/* Cc @stable ICU 2.0 /
628	U_CONTROL_CHAR = `15`,
629	/* Cf @stable ICU 2.0 /
630	U_FORMAT_CHAR = `16`,
631	/* Co @stable ICU 2.0 /
632	U_PRIVATE_USE_CHAR = `17`,
633	/* Cs @stable ICU 2.0 /
634	U_SURROGATE = `18`,
635	/* Pd @stable ICU 2.0 /
636	U_DASH_PUNCTUATION = `19`,
637	/* Ps @stable ICU 2.0 /
638	U_START_PUNCTUATION = `20`,
639	/* Pe @stable ICU 2.0 /
640	U_END_PUNCTUATION = `21`,
641	/* Pc @stable ICU 2.0 /
642	U_CONNECTOR_PUNCTUATION = `22`,
643	/* Po @stable ICU 2.0 /
644	U_OTHER_PUNCTUATION = `23`,
645	/* Sm @stable ICU 2.0 /
646	U_MATH_SYMBOL = `24`,
647	/* Sc @stable ICU 2.0 /
648	U_CURRENCY_SYMBOL = `25`,
649	/* Sk @stable ICU 2.0 /
650	U_MODIFIER_SYMBOL = `26`,
651	/* So @stable ICU 2.0 /
652	U_OTHER_SYMBOL = `27`,
653	/* Pi @stable ICU 2.0 /
654	U_INITIAL_PUNCTUATION = `28`,
655	/* Pf @stable ICU 2.0 /
656	U_FINAL_PUNCTUATION = `29`,
657	/* One higher than the last enum UCharCategory constant. @stable ICU 2.0 /
658	U_CHAR_CATEGORY_COUNT
659	} UCharCategory;
660
661	/**
662	* U_GC_XX_MASK constants are bit flags corresponding to Unicode
663	* general category values.
664	* For each category, the nth bit is set if the numeric value of the
665	* corresponding UCharCategory constant is n.
666	*
667	* There are also some U_GC_Y_MASK constants for groups of general categories
668	* like L for all letter categories.
669	*
670	* @see u_charType
671	* @see U_GET_GC_MASK
672	* @see UCharCategory
673	* @stable ICU 2.1
674	*/
675	#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)
676
677	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
678	#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER)
679	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
680	#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER)
681	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
682	#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER)
683	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
684	#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER)
685	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
686	#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER)
687
688	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
689	#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK)
690	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
691	#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK)
692	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
693	#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK)
694
695	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
696	#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER)
697	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
698	#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER)
699	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
700	#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER)
701
702	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
703	#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR)
704	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
705	#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR)
706	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
707	#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR)
708
709	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
710	#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR)
711	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
712	#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR)
713	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
714	#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR)
715	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
716	#define U_GC_CS_MASK U_MASK(U_SURROGATE)
717
718	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
719	#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION)
720	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
721	#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION)
722	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
723	#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION)
724	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
725	#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION)
726	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
727	#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION)
728
729	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
730	#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL)
731	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
732	#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL)
733	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
734	#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL)
735	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
736	#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL)
737
738	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
739	#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION)
740	/* Mask constant for a UCharCategory. @stable ICU 2.1 /
741	#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION)
742
743
744	/* Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 /
745	#define U_GC_L_MASK \
746	(U_GC_LU_MASK\|U_GC_LL_MASK\|U_GC_LT_MASK\|U_GC_LM_MASK\|U_GC_LO_MASK)
747
748	/* Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 /
749	#define U_GC_LC_MASK \
750	(U_GC_LU_MASK\|U_GC_LL_MASK\|U_GC_LT_MASK)
751
752	/* Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 /
753	#define U_GC_M_MASK (U_GC_MN_MASK\|U_GC_ME_MASK\|U_GC_MC_MASK)
754
755	/* Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 /
756	#define U_GC_N_MASK (U_GC_ND_MASK\|U_GC_NL_MASK\|U_GC_NO_MASK)
757
758	/* Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 /
759	#define U_GC_Z_MASK (U_GC_ZS_MASK\|U_GC_ZL_MASK\|U_GC_ZP_MASK)
760
761	/* Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 /
762	#define U_GC_C_MASK \
763	(U_GC_CN_MASK\|U_GC_CC_MASK\|U_GC_CF_MASK\|U_GC_CO_MASK\|U_GC_CS_MASK)
764
765	/* Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 /
766	#define U_GC_P_MASK \
767	(U_GC_PD_MASK\|U_GC_PS_MASK\|U_GC_PE_MASK\|U_GC_PC_MASK\|U_GC_PO_MASK\| \
768	U_GC_PI_MASK\|U_GC_PF_MASK)
769
770	/* Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 /
771	#define U_GC_S_MASK (U_GC_SM_MASK\|U_GC_SC_MASK\|U_GC_SK_MASK\|U_GC_SO_MASK)
772
773	/**
774	* This specifies the language directional property of a character set.
775	* @stable ICU 2.0
776	*/
777	typedef enum UCharDirection {
778	/*
779	* Note: UCharDirection constants and their API comments are parsed by preparseucd.py.
780	* It matches pairs of lines like
781	* / ** <Unicode 1..3-letter Bidi_Class value> comment... * /
782	* U_<[A-Z_]+> = <integer>,
783	*/
784
785	/* L @stable ICU 2.0 /
786	U_LEFT_TO_RIGHT = `0`,
787	/* R @stable ICU 2.0 /
788	U_RIGHT_TO_LEFT = `1`,
789	/* EN @stable ICU 2.0 /
790	U_EUROPEAN_NUMBER = `2`,
791	/* ES @stable ICU 2.0 /
792	U_EUROPEAN_NUMBER_SEPARATOR = `3`,
793	/* ET @stable ICU 2.0 /
794	U_EUROPEAN_NUMBER_TERMINATOR = `4`,
795	/* AN @stable ICU 2.0 /
796	U_ARABIC_NUMBER = `5`,
797	/* CS @stable ICU 2.0 /
798	U_COMMON_NUMBER_SEPARATOR = `6`,
799	/* B @stable ICU 2.0 /
800	U_BLOCK_SEPARATOR = `7`,
801	/* S @stable ICU 2.0 /
802	U_SEGMENT_SEPARATOR = `8`,
803	/* WS @stable ICU 2.0 /
804	U_WHITE_SPACE_NEUTRAL = `9`,
805	/* ON @stable ICU 2.0 /
806	U_OTHER_NEUTRAL = `10`,
807	/* LRE @stable ICU 2.0 /
808	U_LEFT_TO_RIGHT_EMBEDDING = `11`,
809	/* LRO @stable ICU 2.0 /
810	U_LEFT_TO_RIGHT_OVERRIDE = `12`,
811	/* AL @stable ICU 2.0 /
812	U_RIGHT_TO_LEFT_ARABIC = `13`,
813	/* RLE @stable ICU 2.0 /
814	U_RIGHT_TO_LEFT_EMBEDDING = `14`,
815	/* RLO @stable ICU 2.0 /
816	U_RIGHT_TO_LEFT_OVERRIDE = `15`,
817	/* PDF @stable ICU 2.0 /
818	U_POP_DIRECTIONAL_FORMAT = `16`,
819	/* NSM @stable ICU 2.0 /
820	U_DIR_NON_SPACING_MARK = `17`,
821	/* BN @stable ICU 2.0 /
822	U_BOUNDARY_NEUTRAL = `18`,
823	/* FSI @stable ICU 52 /
824	U_FIRST_STRONG_ISOLATE = `19`,
825	/* LRI @stable ICU 52 /
826	U_LEFT_TO_RIGHT_ISOLATE = `20`,
827	/* RLI @stable ICU 52 /
828	U_RIGHT_TO_LEFT_ISOLATE = `21`,
829	/* PDI @stable ICU 52 /
830	U_POP_DIRECTIONAL_ISOLATE = `22`,
831	/* @stable ICU 2.0 /
832	U_CHAR_DIRECTION_COUNT
833	} UCharDirection;
834
835	/**
836	* Bidi Paired Bracket Type constants.
837	*
838	* @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
839	* @stable ICU 52
840	*/
841	typedef enum UBidiPairedBracketType {
842	/*
843	* Note: UBidiPairedBracketType constants are parsed by preparseucd.py.
844	* It matches lines like
845	* U_BPT_<Unicode Bidi_Paired_Bracket_Type value name>
846	*/
847
848	/* Not a paired bracket. @stable ICU 52 /
849	U_BPT_NONE,
850	/* Open paired bracket. @stable ICU 52 /
851	U_BPT_OPEN,
852	/* Close paired bracket. @stable ICU 52 /
853	U_BPT_CLOSE,
854	/* @stable ICU 52 /
855	U_BPT_COUNT / 3 /
856	} UBidiPairedBracketType;
857
858	/**
859	* Constants for Unicode blocks, see the Unicode Data file Blocks.txt
860	* @stable ICU 2.0
861	*/
862	enum UBlockCode {
863	/*
864	* Note: UBlockCode constants are parsed by preparseucd.py.
865	* It matches lines like
866	* UBLOCK_<Unicode Block value name> = <integer>,
867	*/
868
869	/* New No_Block value in Unicode 4. @stable ICU 2.6 /
870	UBLOCK_NO_BLOCK = `0`, /[none]/ / Special range indicating No_Block /
871
872	/* @stable ICU 2.0 /
873	UBLOCK_BASIC_LATIN = `1`, /[0000]/
874
875	/* @stable ICU 2.0 /
876	UBLOCK_LATIN_1_SUPPLEMENT=`2`, /[0080]/
877
878	/* @stable ICU 2.0 /
879	UBLOCK_LATIN_EXTENDED_A =`3`, /[0100]/
880
881	/* @stable ICU 2.0 /
882	UBLOCK_LATIN_EXTENDED_B =`4`, /[0180]/
883
884	/* @stable ICU 2.0 /
885	UBLOCK_IPA_EXTENSIONS =`5`, /[0250]/
886
887	/* @stable ICU 2.0 /
888	UBLOCK_SPACING_MODIFIER_LETTERS =`6`, /[02B0]/
889
890	/* @stable ICU 2.0 /
891	UBLOCK_COMBINING_DIACRITICAL_MARKS =`7`, /[0300]/
892
893	/**
894	* Unicode 3.2 renames this block to "Greek and Coptic".
895	* @stable ICU 2.0
896	*/
897	UBLOCK_GREEK =`8`, /[0370]/
898
899	/* @stable ICU 2.0 /
900	UBLOCK_CYRILLIC =`9`, /[0400]/
901
902	/* @stable ICU 2.0 /
903	UBLOCK_ARMENIAN =`10`, /[0530]/
904
905	/* @stable ICU 2.0 /
906	UBLOCK_HEBREW =`11`, /[0590]/
907
908	/* @stable ICU 2.0 /
909	UBLOCK_ARABIC =`12`, /[0600]/
910
911	/* @stable ICU 2.0 /
912	UBLOCK_SYRIAC =`13`, /[0700]/
913
914	/* @stable ICU 2.0 /
915	UBLOCK_THAANA =`14`, /[0780]/
916
917	/* @stable ICU 2.0 /
918	UBLOCK_DEVANAGARI =`15`, /[0900]/
919
920	/* @stable ICU 2.0 /
921	UBLOCK_BENGALI =`16`, /[0980]/
922
923	/* @stable ICU 2.0 /
924	UBLOCK_GURMUKHI =`17`, /[0A00]/
925
926	/* @stable ICU 2.0 /
927	UBLOCK_GUJARATI =`18`, /[0A80]/
928
929	/* @stable ICU 2.0 /
930	UBLOCK_ORIYA =`19`, /[0B00]/
931
932	/* @stable ICU 2.0 /
933	UBLOCK_TAMIL =`20`, /[0B80]/
934
935	/* @stable ICU 2.0 /
936	UBLOCK_TELUGU =`21`, /[0C00]/
937
938	/* @stable ICU 2.0 /
939	UBLOCK_KANNADA =`22`, /[0C80]/
940
941	/* @stable ICU 2.0 /
942	UBLOCK_MALAYALAM =`23`, /[0D00]/
943
944	/* @stable ICU 2.0 /
945	UBLOCK_SINHALA =`24`, /[0D80]/
946
947	/* @stable ICU 2.0 /
948	UBLOCK_THAI =`25`, /[0E00]/
949
950	/* @stable ICU 2.0 /
951	UBLOCK_LAO =`26`, /[0E80]/
952
953	/* @stable ICU 2.0 /
954	UBLOCK_TIBETAN =`27`, /[0F00]/
955
956	/* @stable ICU 2.0 /
957	UBLOCK_MYANMAR =`28`, /[1000]/
958
959	/* @stable ICU 2.0 /
960	UBLOCK_GEORGIAN =`29`, /[10A0]/
961
962	/* @stable ICU 2.0 /
963	UBLOCK_HANGUL_JAMO =`30`, /[1100]/
964
965	/* @stable ICU 2.0 /
966	UBLOCK_ETHIOPIC =`31`, /[1200]/
967
968	/* @stable ICU 2.0 /
969	UBLOCK_CHEROKEE =`32`, /[13A0]/
970
971	/* @stable ICU 2.0 /
972	UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =`33`, /[1400]/
973
974	/* @stable ICU 2.0 /
975	UBLOCK_OGHAM =`34`, /[1680]/
976
977	/* @stable ICU 2.0 /
978	UBLOCK_RUNIC =`35`, /[16A0]/
979
980	/* @stable ICU 2.0 /
981	UBLOCK_KHMER =`36`, /[1780]/
982
983	/* @stable ICU 2.0 /
984	UBLOCK_MONGOLIAN =`37`, /[1800]/
985
986	/* @stable ICU 2.0 /
987	UBLOCK_LATIN_EXTENDED_ADDITIONAL =`38`, /[1E00]/
988
989	/* @stable ICU 2.0 /
990	UBLOCK_GREEK_EXTENDED =`39`, /[1F00]/
991
992	/* @stable ICU 2.0 /
993	UBLOCK_GENERAL_PUNCTUATION =`40`, /[2000]/
994
995	/* @stable ICU 2.0 /
996	UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =`41`, /[2070]/
997
998	/* @stable ICU 2.0 /
999	UBLOCK_CURRENCY_SYMBOLS =`42`, /[20A0]/
1000
1001	/**
1002	* Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
1003	* @stable ICU 2.0
1004	*/
1005	UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =`43`, /[20D0]/
1006
1007	/* @stable ICU 2.0 /
1008	UBLOCK_LETTERLIKE_SYMBOLS =`44`, /[2100]/
1009
1010	/* @stable ICU 2.0 /
1011	UBLOCK_NUMBER_FORMS =`45`, /[2150]/
1012
1013	/* @stable ICU 2.0 /
1014	UBLOCK_ARROWS =`46`, /[2190]/
1015
1016	/* @stable ICU 2.0 /
1017	UBLOCK_MATHEMATICAL_OPERATORS =`47`, /[2200]/
1018
1019	/* @stable ICU 2.0 /
1020	UBLOCK_MISCELLANEOUS_TECHNICAL =`48`, /[2300]/
1021
1022	/* @stable ICU 2.0 /
1023	UBLOCK_CONTROL_PICTURES =`49`, /[2400]/
1024
1025	/* @stable ICU 2.0 /
1026	UBLOCK_OPTICAL_CHARACTER_RECOGNITION =`50`, /[2440]/
1027
1028	/* @stable ICU 2.0 /
1029	UBLOCK_ENCLOSED_ALPHANUMERICS =`51`, /[2460]/
1030
1031	/* @stable ICU 2.0 /
1032	UBLOCK_BOX_DRAWING =`52`, /[2500]/
1033
1034	/* @stable ICU 2.0 /
1035	UBLOCK_BLOCK_ELEMENTS =`53`, /[2580]/
1036
1037	/* @stable ICU 2.0 /
1038	UBLOCK_GEOMETRIC_SHAPES =`54`, /[25A0]/
1039
1040	/* @stable ICU 2.0 /
1041	UBLOCK_MISCELLANEOUS_SYMBOLS =`55`, /[2600]/
1042
1043	/* @stable ICU 2.0 /
1044	UBLOCK_DINGBATS =`56`, /[2700]/
1045
1046	/* @stable ICU 2.0 /
1047	UBLOCK_BRAILLE_PATTERNS =`57`, /[2800]/
1048
1049	/* @stable ICU 2.0 /
1050	UBLOCK_CJK_RADICALS_SUPPLEMENT =`58`, /[2E80]/
1051
1052	/* @stable ICU 2.0 /
1053	UBLOCK_KANGXI_RADICALS =`59`, /[2F00]/
1054
1055	/* @stable ICU 2.0 /
1056	UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =`60`, /[2FF0]/
1057
1058	/* @stable ICU 2.0 /
1059	UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =`61`, /[3000]/
1060
1061	/* @stable ICU 2.0 /
1062	UBLOCK_HIRAGANA =`62`, /[3040]/
1063
1064	/* @stable ICU 2.0 /
1065	UBLOCK_KATAKANA =`63`, /[30A0]/
1066
1067	/* @stable ICU 2.0 /
1068	UBLOCK_BOPOMOFO =`64`, /[3100]/
1069
1070	/* @stable ICU 2.0 /
1071	UBLOCK_HANGUL_COMPATIBILITY_JAMO =`65`, /[3130]/
1072
1073	/* @stable ICU 2.0 /
1074	UBLOCK_KANBUN =`66`, /[3190]/
1075
1076	/* @stable ICU 2.0 /
1077	UBLOCK_BOPOMOFO_EXTENDED =`67`, /[31A0]/
1078
1079	/* @stable ICU 2.0 /
1080	UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =`68`, /[3200]/
1081
1082	/* @stable ICU 2.0 /
1083	UBLOCK_CJK_COMPATIBILITY =`69`, /[3300]/
1084
1085	/* @stable ICU 2.0 /
1086	UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =`70`, /[3400]/
1087
1088	/* @stable ICU 2.0 /
1089	UBLOCK_CJK_UNIFIED_IDEOGRAPHS =`71`, /[4E00]/
1090
1091	/* @stable ICU 2.0 /
1092	UBLOCK_YI_SYLLABLES =`72`, /[A000]/
1093
1094	/* @stable ICU 2.0 /
1095	UBLOCK_YI_RADICALS =`73`, /[A490]/
1096
1097	/* @stable ICU 2.0 /
1098	UBLOCK_HANGUL_SYLLABLES =`74`, /[AC00]/
1099
1100	/* @stable ICU 2.0 /
1101	UBLOCK_HIGH_SURROGATES =`75`, /[D800]/
1102
1103	/* @stable ICU 2.0 /
1104	UBLOCK_HIGH_PRIVATE_USE_SURROGATES =`76`, /[DB80]/
1105
1106	/* @stable ICU 2.0 /
1107	UBLOCK_LOW_SURROGATES =`77`, /[DC00]/
1108
1109	/**
1110	* Same as UBLOCK_PRIVATE_USE.
1111	* Until Unicode 3.1.1, the corresponding block name was "Private Use",
1112	* and multiple code point ranges had this block.
1113	* Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
1114	* adds separate blocks for the supplementary PUAs.
1115	*
1116	* @stable ICU 2.0
1117	*/
1118	UBLOCK_PRIVATE_USE_AREA =`78`, /[E000]/
1119	/**
1120	* Same as UBLOCK_PRIVATE_USE_AREA.
1121	* Until Unicode 3.1.1, the corresponding block name was "Private Use",
1122	* and multiple code point ranges had this block.
1123	* Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
1124	* adds separate blocks for the supplementary PUAs.
1125	*
1126	* @stable ICU 2.0
1127	*/
1128	UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA,
1129
1130	/* @stable ICU 2.0 /
1131	UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =`79`, /[F900]/
1132
1133	/* @stable ICU 2.0 /
1134	UBLOCK_ALPHABETIC_PRESENTATION_FORMS =`80`, /[FB00]/
1135
1136	/* @stable ICU 2.0 /
1137	UBLOCK_ARABIC_PRESENTATION_FORMS_A =`81`, /[FB50]/
1138
1139	/* @stable ICU 2.0 /
1140	UBLOCK_COMBINING_HALF_MARKS =`82`, /[FE20]/
1141
1142	/* @stable ICU 2.0 /
1143	UBLOCK_CJK_COMPATIBILITY_FORMS =`83`, /[FE30]/
1144
1145	/* @stable ICU 2.0 /
1146	UBLOCK_SMALL_FORM_VARIANTS =`84`, /[FE50]/
1147
1148	/* @stable ICU 2.0 /
1149	UBLOCK_ARABIC_PRESENTATION_FORMS_B =`85`, /[FE70]/
1150
1151	/* @stable ICU 2.0 /
1152	UBLOCK_SPECIALS =`86`, /[FFF0]/
1153
1154	/* @stable ICU 2.0 /
1155	UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =`87`, /[FF00]/
1156
1157	/ New blocks in Unicode 3.1 /
1158
1159	/* @stable ICU 2.0 /
1160	UBLOCK_OLD_ITALIC = `88`, /[10300]/
1161	/* @stable ICU 2.0 /
1162	UBLOCK_GOTHIC = `89`, /[10330]/
1163	/* @stable ICU 2.0 /
1164	UBLOCK_DESERET = `90`, /[10400]/
1165	/* @stable ICU 2.0 /
1166	UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = `91`, /[1D000]/
1167	/* @stable ICU 2.0 /
1168	UBLOCK_MUSICAL_SYMBOLS = `92`, /[1D100]/
1169	/* @stable ICU 2.0 /
1170	UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = `93`, /[1D400]/
1171	/* @stable ICU 2.0 /
1172	UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = `94`, /[20000]/
1173	/* @stable ICU 2.0 /
1174	UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = `95`, /[2F800]/
1175	/* @stable ICU 2.0 /
1176	UBLOCK_TAGS = `96`, /[E0000]/
1177
1178	/ New blocks in Unicode 3.2 /
1179
1180	/* @stable ICU 3.0 /
1181	UBLOCK_CYRILLIC_SUPPLEMENT = `97`, /[0500]/
1182	/**
1183	* Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1184	* @stable ICU 2.2
1185	*/
1186	UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT,
1187	/* @stable ICU 2.2 /
1188	UBLOCK_TAGALOG = `98`, /[1700]/
1189	/* @stable ICU 2.2 /
1190	UBLOCK_HANUNOO = `99`, /[1720]/
1191	/* @stable ICU 2.2 /
1192	UBLOCK_BUHID = `100`, /[1740]/
1193	/* @stable ICU 2.2 /
1194	UBLOCK_TAGBANWA = `101`, /[1760]/
1195	/* @stable ICU 2.2 /
1196	UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = `102`, /[27C0]/
1197	/* @stable ICU 2.2 /
1198	UBLOCK_SUPPLEMENTAL_ARROWS_A = `103`, /[27F0]/
1199	/* @stable ICU 2.2 /
1200	UBLOCK_SUPPLEMENTAL_ARROWS_B = `104`, /[2900]/
1201	/* @stable ICU 2.2 /
1202	UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = `105`, /[2980]/
1203	/* @stable ICU 2.2 /
1204	UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = `106`, /[2A00]/
1205	/* @stable ICU 2.2 /
1206	UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = `107`, /[31F0]/
1207	/* @stable ICU 2.2 /
1208	UBLOCK_VARIATION_SELECTORS = `108`, /[FE00]/
1209	/* @stable ICU 2.2 /
1210	UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = `109`, /[F0000]/
1211	/* @stable ICU 2.2 /
1212	UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = `110`, /[100000]/
1213
1214	/ New blocks in Unicode 4 /
1215
1216	/* @stable ICU 2.6 /
1217	UBLOCK_LIMBU = `111`, /[1900]/
1218	/* @stable ICU 2.6 /
1219	UBLOCK_TAI_LE = `112`, /[1950]/
1220	/* @stable ICU 2.6 /
1221	UBLOCK_KHMER_SYMBOLS = `113`, /[19E0]/
1222	/* @stable ICU 2.6 /
1223	UBLOCK_PHONETIC_EXTENSIONS = `114`, /[1D00]/
1224	/* @stable ICU 2.6 /
1225	UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = `115`, /[2B00]/
1226	/* @stable ICU 2.6 /
1227	UBLOCK_YIJING_HEXAGRAM_SYMBOLS = `116`, /[4DC0]/
1228	/* @stable ICU 2.6 /
1229	UBLOCK_LINEAR_B_SYLLABARY = `117`, /[10000]/
1230	/* @stable ICU 2.6 /
1231	UBLOCK_LINEAR_B_IDEOGRAMS = `118`, /[10080]/
1232	/* @stable ICU 2.6 /
1233	UBLOCK_AEGEAN_NUMBERS = `119`, /[10100]/
1234	/* @stable ICU 2.6 /
1235	UBLOCK_UGARITIC = `120`, /[10380]/
1236	/* @stable ICU 2.6 /
1237	UBLOCK_SHAVIAN = `121`, /[10450]/
1238	/* @stable ICU 2.6 /
1239	UBLOCK_OSMANYA = `122`, /[10480]/
1240	/* @stable ICU 2.6 /
1241	UBLOCK_CYPRIOT_SYLLABARY = `123`, /[10800]/
1242	/* @stable ICU 2.6 /
1243	UBLOCK_TAI_XUAN_JING_SYMBOLS = `124`, /[1D300]/
1244	/* @stable ICU 2.6 /
1245	UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = `125`, /[E0100]/
1246
1247	/ New blocks in Unicode 4.1 /
1248
1249	/* @stable ICU 3.4 /
1250	UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = `126`, /[1D200]/
1251	/* @stable ICU 3.4 /
1252	UBLOCK_ANCIENT_GREEK_NUMBERS = `127`, /[10140]/
1253	/* @stable ICU 3.4 /
1254	UBLOCK_ARABIC_SUPPLEMENT = `128`, /[0750]/
1255	/* @stable ICU 3.4 /
1256	UBLOCK_BUGINESE = `129`, /[1A00]/
1257	/* @stable ICU 3.4 /
1258	UBLOCK_CJK_STROKES = `130`, /[31C0]/
1259	/* @stable ICU 3.4 /
1260	UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = `131`, /[1DC0]/
1261	/* @stable ICU 3.4 /
1262	UBLOCK_COPTIC = `132`, /[2C80]/
1263	/* @stable ICU 3.4 /
1264	UBLOCK_ETHIOPIC_EXTENDED = `133`, /[2D80]/
1265	/* @stable ICU 3.4 /
1266	UBLOCK_ETHIOPIC_SUPPLEMENT = `134`, /[1380]/
1267	/* @stable ICU 3.4 /
1268	UBLOCK_GEORGIAN_SUPPLEMENT = `135`, /[2D00]/
1269	/* @stable ICU 3.4 /
1270	UBLOCK_GLAGOLITIC = `136`, /[2C00]/
1271	/* @stable ICU 3.4 /
1272	UBLOCK_KHAROSHTHI = `137`, /[10A00]/
1273	/* @stable ICU 3.4 /
1274	UBLOCK_MODIFIER_TONE_LETTERS = `138`, /[A700]/
1275	/* @stable ICU 3.4 /
1276	UBLOCK_NEW_TAI_LUE = `139`, /[1980]/
1277	/* @stable ICU 3.4 /
1278	UBLOCK_OLD_PERSIAN = `140`, /[103A0]/
1279	/* @stable ICU 3.4 /
1280	UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = `141`, /[1D80]/
1281	/* @stable ICU 3.4 /
1282	UBLOCK_SUPPLEMENTAL_PUNCTUATION = `142`, /[2E00]/
1283	/* @stable ICU 3.4 /
1284	UBLOCK_SYLOTI_NAGRI = `143`, /[A800]/
1285	/* @stable ICU 3.4 /
1286	UBLOCK_TIFINAGH = `144`, /[2D30]/
1287	/* @stable ICU 3.4 /
1288	UBLOCK_VERTICAL_FORMS = `145`, /[FE10]/
1289
1290	/ New blocks in Unicode 5.0 /
1291
1292	/* @stable ICU 3.6 /
1293	UBLOCK_NKO = `146`, /[07C0]/
1294	/* @stable ICU 3.6 /
1295	UBLOCK_BALINESE = `147`, /[1B00]/
1296	/* @stable ICU 3.6 /
1297	UBLOCK_LATIN_EXTENDED_C = `148`, /[2C60]/
1298	/* @stable ICU 3.6 /
1299	UBLOCK_LATIN_EXTENDED_D = `149`, /[A720]/
1300	/* @stable ICU 3.6 /
1301	UBLOCK_PHAGS_PA = `150`, /[A840]/
1302	/* @stable ICU 3.6 /
1303	UBLOCK_PHOENICIAN = `151`, /[10900]/
1304	/* @stable ICU 3.6 /
1305	UBLOCK_CUNEIFORM = `152`, /[12000]/
1306	/* @stable ICU 3.6 /
1307	UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = `153`, /[12400]/
1308	/* @stable ICU 3.6 /
1309	UBLOCK_COUNTING_ROD_NUMERALS = `154`, /[1D360]/
1310
1311	/ New blocks in Unicode 5.1 /
1312
1313	/* @stable ICU 4.0 /
1314	UBLOCK_SUNDANESE = `155`, /[1B80]/
1315	/* @stable ICU 4.0 /
1316	UBLOCK_LEPCHA = `156`, /[1C00]/
1317	/* @stable ICU 4.0 /
1318	UBLOCK_OL_CHIKI = `157`, /[1C50]/
1319	/* @stable ICU 4.0 /
1320	UBLOCK_CYRILLIC_EXTENDED_A = `158`, /[2DE0]/
1321	/* @stable ICU 4.0 /
1322	UBLOCK_VAI = `159`, /[A500]/
1323	/* @stable ICU 4.0 /
1324	UBLOCK_CYRILLIC_EXTENDED_B = `160`, /[A640]/
1325	/* @stable ICU 4.0 /
1326	UBLOCK_SAURASHTRA = `161`, /[A880]/
1327	/* @stable ICU 4.0 /
1328	UBLOCK_KAYAH_LI = `162`, /[A900]/
1329	/* @stable ICU 4.0 /
1330	UBLOCK_REJANG = `163`, /[A930]/
1331	/* @stable ICU 4.0 /
1332	UBLOCK_CHAM = `164`, /[AA00]/
1333	/* @stable ICU 4.0 /
1334	UBLOCK_ANCIENT_SYMBOLS = `165`, /[10190]/
1335	/* @stable ICU 4.0 /
1336	UBLOCK_PHAISTOS_DISC = `166`, /[101D0]/
1337	/* @stable ICU 4.0 /
1338	UBLOCK_LYCIAN = `167`, /[10280]/
1339	/* @stable ICU 4.0 /
1340	UBLOCK_CARIAN = `168`, /[102A0]/
1341	/* @stable ICU 4.0 /
1342	UBLOCK_LYDIAN = `169`, /[10920]/
1343	/* @stable ICU 4.0 /
1344	UBLOCK_MAHJONG_TILES = `170`, /[1F000]/
1345	/* @stable ICU 4.0 /
1346	UBLOCK_DOMINO_TILES = `171`, /[1F030]/
1347
1348	/ New blocks in Unicode 5.2 /
1349
1350	/* @stable ICU 4.4 /
1351	UBLOCK_SAMARITAN = `172`, /[0800]/
1352	/* @stable ICU 4.4 /
1353	UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = `173`, /[18B0]/
1354	/* @stable ICU 4.4 /
1355	UBLOCK_TAI_THAM = `174`, /[1A20]/
1356	/* @stable ICU 4.4 /
1357	UBLOCK_VEDIC_EXTENSIONS = `175`, /[1CD0]/
1358	/* @stable ICU 4.4 /
1359	UBLOCK_LISU = `176`, /[A4D0]/
1360	/* @stable ICU 4.4 /
1361	UBLOCK_BAMUM = `177`, /[A6A0]/
1362	/* @stable ICU 4.4 /
1363	UBLOCK_COMMON_INDIC_NUMBER_FORMS = `178`, /[A830]/
1364	/* @stable ICU 4.4 /
1365	UBLOCK_DEVANAGARI_EXTENDED = `179`, /[A8E0]/
1366	/* @stable ICU 4.4 /
1367	UBLOCK_HANGUL_JAMO_EXTENDED_A = `180`, /[A960]/
1368	/* @stable ICU 4.4 /
1369	UBLOCK_JAVANESE = `181`, /[A980]/
1370	/* @stable ICU 4.4 /
1371	UBLOCK_MYANMAR_EXTENDED_A = `182`, /[AA60]/
1372	/* @stable ICU 4.4 /
1373	UBLOCK_TAI_VIET = `183`, /[AA80]/
1374	/* @stable ICU 4.4 /
1375	UBLOCK_MEETEI_MAYEK = `184`, /[ABC0]/
1376	/* @stable ICU 4.4 /
1377	UBLOCK_HANGUL_JAMO_EXTENDED_B = `185`, /[D7B0]/
1378	/* @stable ICU 4.4 /
1379	UBLOCK_IMPERIAL_ARAMAIC = `186`, /[10840]/
1380	/* @stable ICU 4.4 /
1381	UBLOCK_OLD_SOUTH_ARABIAN = `187`, /[10A60]/
1382	/* @stable ICU 4.4 /
1383	UBLOCK_AVESTAN = `188`, /[10B00]/
1384	/* @stable ICU 4.4 /
1385	UBLOCK_INSCRIPTIONAL_PARTHIAN = `189`, /[10B40]/
1386	/* @stable ICU 4.4 /
1387	UBLOCK_INSCRIPTIONAL_PAHLAVI = `190`, /[10B60]/
1388	/* @stable ICU 4.4 /
1389	UBLOCK_OLD_TURKIC = `191`, /[10C00]/
1390	/* @stable ICU 4.4 /
1391	UBLOCK_RUMI_NUMERAL_SYMBOLS = `192`, /[10E60]/
1392	/* @stable ICU 4.4 /
1393	UBLOCK_KAITHI = `193`, /[11080]/
1394	/* @stable ICU 4.4 /
1395	UBLOCK_EGYPTIAN_HIEROGLYPHS = `194`, /[13000]/
1396	/* @stable ICU 4.4 /
1397	UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = `195`, /[1F100]/
1398	/* @stable ICU 4.4 /
1399	UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = `196`, /[1F200]/
1400	/* @stable ICU 4.4 /
1401	UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = `197`, /[2A700]/
1402
1403	/ New blocks in Unicode 6.0 /
1404
1405	/* @stable ICU 4.6 /
1406	UBLOCK_MANDAIC = `198`, /[0840]/
1407	/* @stable ICU 4.6 /
1408	UBLOCK_BATAK = `199`, /[1BC0]/
1409	/* @stable ICU 4.6 /
1410	UBLOCK_ETHIOPIC_EXTENDED_A = `200`, /[AB00]/
1411	/* @stable ICU 4.6 /
1412	UBLOCK_BRAHMI = `201`, /[11000]/
1413	/* @stable ICU 4.6 /
1414	UBLOCK_BAMUM_SUPPLEMENT = `202`, /[16800]/
1415	/* @stable ICU 4.6 /
1416	UBLOCK_KANA_SUPPLEMENT = `203`, /[1B000]/
1417	/* @stable ICU 4.6 /
1418	UBLOCK_PLAYING_CARDS = `204`, /[1F0A0]/
1419	/* @stable ICU 4.6 /
1420	UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = `205`, /[1F300]/
1421	/* @stable ICU 4.6 /
1422	UBLOCK_EMOTICONS = `206`, /[1F600]/
1423	/* @stable ICU 4.6 /
1424	UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = `207`, /[1F680]/
1425	/* @stable ICU 4.6 /
1426	UBLOCK_ALCHEMICAL_SYMBOLS = `208`, /[1F700]/
1427	/* @stable ICU 4.6 /
1428	UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = `209`, /[2B740]/
1429
1430	/ New blocks in Unicode 6.1 /
1431
1432	/* @stable ICU 49 /
1433	UBLOCK_ARABIC_EXTENDED_A = `210`, /[08A0]/
1434	/* @stable ICU 49 /
1435	UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = `211`, /[1EE00]/
1436	/* @stable ICU 49 /
1437	UBLOCK_CHAKMA = `212`, /[11100]/
1438	/* @stable ICU 49 /
1439	UBLOCK_MEETEI_MAYEK_EXTENSIONS = `213`, /[AAE0]/
1440	/* @stable ICU 49 /
1441	UBLOCK_MEROITIC_CURSIVE = `214`, /[109A0]/
1442	/* @stable ICU 49 /
1443	UBLOCK_MEROITIC_HIEROGLYPHS = `215`, /[10980]/
1444	/* @stable ICU 49 /
1445	UBLOCK_MIAO = `216`, /[16F00]/
1446	/* @stable ICU 49 /
1447	UBLOCK_SHARADA = `217`, /[11180]/
1448	/* @stable ICU 49 /
1449	UBLOCK_SORA_SOMPENG = `218`, /[110D0]/
1450	/* @stable ICU 49 /
1451	UBLOCK_SUNDANESE_SUPPLEMENT = `219`, /[1CC0]/
1452	/* @stable ICU 49 /
1453	UBLOCK_TAKRI = `220`, /[11680]/
1454
1455	/ New blocks in Unicode 7.0 /
1456
1457	/* @stable ICU 54 /
1458	UBLOCK_BASSA_VAH = `221`, /[16AD0]/
1459	/* @stable ICU 54 /
1460	UBLOCK_CAUCASIAN_ALBANIAN = `222`, /[10530]/
1461	/* @stable ICU 54 /
1462	UBLOCK_COPTIC_EPACT_NUMBERS = `223`, /[102E0]/
1463	/* @stable ICU 54 /
1464	UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = `224`, /[1AB0]/
1465	/* @stable ICU 54 /
1466	UBLOCK_DUPLOYAN = `225`, /[1BC00]/
1467	/* @stable ICU 54 /
1468	UBLOCK_ELBASAN = `226`, /[10500]/
1469	/* @stable ICU 54 /
1470	UBLOCK_GEOMETRIC_SHAPES_EXTENDED = `227`, /[1F780]/
1471	/* @stable ICU 54 /
1472	UBLOCK_GRANTHA = `228`, /[11300]/
1473	/* @stable ICU 54 /
1474	UBLOCK_KHOJKI = `229`, /[11200]/
1475	/* @stable ICU 54 /
1476	UBLOCK_KHUDAWADI = `230`, /[112B0]/
1477	/* @stable ICU 54 /
1478	UBLOCK_LATIN_EXTENDED_E = `231`, /[AB30]/
1479	/* @stable ICU 54 /
1480	UBLOCK_LINEAR_A = `232`, /[10600]/
1481	/* @stable ICU 54 /
1482	UBLOCK_MAHAJANI = `233`, /[11150]/
1483	/* @stable ICU 54 /
1484	UBLOCK_MANICHAEAN = `234`, /[10AC0]/
1485	/* @stable ICU 54 /
1486	UBLOCK_MENDE_KIKAKUI = `235`, /[1E800]/
1487	/* @stable ICU 54 /
1488	UBLOCK_MODI = `236`, /[11600]/
1489	/* @stable ICU 54 /
1490	UBLOCK_MRO = `237`, /[16A40]/
1491	/* @stable ICU 54 /
1492	UBLOCK_MYANMAR_EXTENDED_B = `238`, /[A9E0]/
1493	/* @stable ICU 54 /
1494	UBLOCK_NABATAEAN = `239`, /[10880]/
1495	/* @stable ICU 54 /
1496	UBLOCK_OLD_NORTH_ARABIAN = `240`, /[10A80]/
1497	/* @stable ICU 54 /
1498	UBLOCK_OLD_PERMIC = `241`, /[10350]/
1499	/* @stable ICU 54 /
1500	UBLOCK_ORNAMENTAL_DINGBATS = `242`, /[1F650]/
1501	/* @stable ICU 54 /
1502	UBLOCK_PAHAWH_HMONG = `243`, /[16B00]/
1503	/* @stable ICU 54 /
1504	UBLOCK_PALMYRENE = `244`, /[10860]/
1505	/* @stable ICU 54 /
1506	UBLOCK_PAU_CIN_HAU = `245`, /[11AC0]/
1507	/* @stable ICU 54 /
1508	UBLOCK_PSALTER_PAHLAVI = `246`, /[10B80]/
1509	/* @stable ICU 54 /
1510	UBLOCK_SHORTHAND_FORMAT_CONTROLS = `247`, /[1BCA0]/
1511	/* @stable ICU 54 /
1512	UBLOCK_SIDDHAM = `248`, /[11580]/
1513	/* @stable ICU 54 /
1514	UBLOCK_SINHALA_ARCHAIC_NUMBERS = `249`, /[111E0]/
1515	/* @stable ICU 54 /
1516	UBLOCK_SUPPLEMENTAL_ARROWS_C = `250`, /[1F800]/
1517	/* @stable ICU 54 /
1518	UBLOCK_TIRHUTA = `251`, /[11480]/
1519	/* @stable ICU 54 /
1520	UBLOCK_WARANG_CITI = `252`, /[118A0]/
1521
1522	/* @stable ICU 2.0 /
1523	UBLOCK_COUNT = `253`,
1524
1525	/* @stable ICU 2.0 /
1526	UBLOCK_INVALID_CODE=-`1`
1527	};
1528
1529	/* @stable ICU 2.0 /
1530	typedef enum UBlockCode UBlockCode;
1531
1532	/**
1533	* East Asian Width constants.
1534	*
1535	* @see UCHAR_EAST_ASIAN_WIDTH
1536	* @see u_getIntPropertyValue
1537	* @stable ICU 2.2
1538	*/
1539	typedef enum UEastAsianWidth {
1540	/*
1541	* Note: UEastAsianWidth constants are parsed by preparseucd.py.
1542	* It matches lines like
1543	* U_EA_<Unicode East_Asian_Width value name>
1544	*/
1545
1546	U_EA_NEUTRAL, /[N]/
1547	U_EA_AMBIGUOUS, /[A]/
1548	U_EA_HALFWIDTH, /[H]/
1549	U_EA_FULLWIDTH, /[F]/
1550	U_EA_NARROW, /[Na]/
1551	U_EA_WIDE, /[W]/
1552	U_EA_COUNT
1553	} UEastAsianWidth;
1554
1555	/**
1556	* Selector constants for u_charName().
1557	* u_charName() returns the "modern" name of a
1558	* Unicode character; or the name that was defined in
1559	* Unicode version 1.0, before the Unicode standard merged
1560	* with ISO-10646; or an "extended" name that gives each
1561	* Unicode code point a unique name.
1562	*
1563	* @see u_charName
1564	* @stable ICU 2.0
1565	*/
1566	typedef enum UCharNameChoice {
1567	/* Unicode character name (Name property). @stable ICU 2.0 /
1568	U_UNICODE_CHAR_NAME,
1569	#ifndef U_HIDE_DEPRECATED_API
1570	/**
1571	* The Unicode_1_Name property value which is of little practical value.
1572	* Beginning with ICU 49, ICU APIs return an empty string for this name choice.
1573	* @deprecated ICU 49
1574	*/
1575	U_UNICODE_10_CHAR_NAME,
1576	#endif /* U_HIDE_DEPRECATED_API */
1577	/* Standard or synthetic character name. @stable ICU 2.0 /
1578	U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+`2`,
1579	/* Corrected name from NameAliases.txt. @stable ICU 4.4 /
1580	U_CHAR_NAME_ALIAS,
1581	/* @stable ICU 2.0 /
1582	U_CHAR_NAME_CHOICE_COUNT
1583	} UCharNameChoice;
1584
1585	/**
1586	* Selector constants for u_getPropertyName() and
1587	* u_getPropertyValueName(). These selectors are used to choose which
1588	* name is returned for a given property or value. All properties and
1589	* values have a long name. Most have a short name, but some do not.
1590	* Unicode allows for additional names, beyond the long and short
1591	* name, which would be indicated by U_LONG_PROPERTY_NAME + i, where
1592	* i=1, 2,...
1593	*
1594	* @see u_getPropertyName()
1595	* @see u_getPropertyValueName()
1596	* @stable ICU 2.4
1597	*/
1598	typedef enum UPropertyNameChoice {
1599	U_SHORT_PROPERTY_NAME,
1600	U_LONG_PROPERTY_NAME,
1601	U_PROPERTY_NAME_CHOICE_COUNT
1602	} UPropertyNameChoice;
1603
1604	/**
1605	* Decomposition Type constants.
1606	*
1607	* @see UCHAR_DECOMPOSITION_TYPE
1608	* @stable ICU 2.2
1609	*/
1610	typedef enum UDecompositionType {
1611	/*
1612	* Note: UDecompositionType constants are parsed by preparseucd.py.
1613	* It matches lines like
1614	* U_DT_<Unicode Decomposition_Type value name>
1615	*/
1616
1617	U_DT_NONE, /[none]/
1618	U_DT_CANONICAL, /[can]/
1619	U_DT_COMPAT, /[com]/
1620	U_DT_CIRCLE, /[enc]/
1621	U_DT_FINAL, /[fin]/
1622	U_DT_FONT, /[font]/
1623	U_DT_FRACTION, /[fra]/
1624	U_DT_INITIAL, /[init]/
1625	U_DT_ISOLATED, /[iso]/
1626	U_DT_MEDIAL, /[med]/
1627	U_DT_NARROW, /[nar]/
1628	U_DT_NOBREAK, /[nb]/
1629	U_DT_SMALL, /[sml]/
1630	U_DT_SQUARE, /[sqr]/
1631	U_DT_SUB, /[sub]/
1632	U_DT_SUPER, /[sup]/
1633	U_DT_VERTICAL, /[vert]/
1634	U_DT_WIDE, /[wide]/
1635	U_DT_COUNT / 18 /
1636	} UDecompositionType;
1637
1638	/**
1639	* Joining Type constants.
1640	*
1641	* @see UCHAR_JOINING_TYPE
1642	* @stable ICU 2.2
1643	*/
1644	typedef enum UJoiningType {
1645	/*
1646	* Note: UJoiningType constants are parsed by preparseucd.py.
1647	* It matches lines like
1648	* U_JT_<Unicode Joining_Type value name>
1649	*/
1650
1651	U_JT_NON_JOINING, /[U]/
1652	U_JT_JOIN_CAUSING, /[C]/
1653	U_JT_DUAL_JOINING, /[D]/
1654	U_JT_LEFT_JOINING, /[L]/
1655	U_JT_RIGHT_JOINING, /[R]/
1656	U_JT_TRANSPARENT, /[T]/
1657	U_JT_COUNT / 6 /
1658	} UJoiningType;
1659
1660	/**
1661	* Joining Group constants.
1662	*
1663	* @see UCHAR_JOINING_GROUP
1664	* @stable ICU 2.2
1665	*/
1666	typedef enum UJoiningGroup {
1667	/*
1668	* Note: UJoiningGroup constants are parsed by preparseucd.py.
1669	* It matches lines like
1670	* U_JG_<Unicode Joining_Group value name>
1671	*/
1672
1673	U_JG_NO_JOINING_GROUP,
1674	U_JG_AIN,
1675	U_JG_ALAPH,
1676	U_JG_ALEF,
1677	U_JG_BEH,
1678	U_JG_BETH,
1679	U_JG_DAL,
1680	U_JG_DALATH_RISH,
1681	U_JG_E,
1682	U_JG_FEH,
1683	U_JG_FINAL_SEMKATH,
1684	U_JG_GAF,
1685	U_JG_GAMAL,
1686	U_JG_HAH,
1687	U_JG_TEH_MARBUTA_GOAL, /< @stable ICU 4.6 /*
1688	U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL,
1689	U_JG_HE,
1690	U_JG_HEH,
1691	U_JG_HEH_GOAL,
1692	U_JG_HETH,
1693	U_JG_KAF,
1694	U_JG_KAPH,
1695	U_JG_KNOTTED_HEH,
1696	U_JG_LAM,
1697	U_JG_LAMADH,
1698	U_JG_MEEM,
1699	U_JG_MIM,
1700	U_JG_NOON,
1701	U_JG_NUN,
1702	U_JG_PE,
1703	U_JG_QAF,
1704	U_JG_QAPH,
1705	U_JG_REH,
1706	U_JG_REVERSED_PE,
1707	U_JG_SAD,
1708	U_JG_SADHE,
1709	U_JG_SEEN,
1710	U_JG_SEMKATH,
1711	U_JG_SHIN,
1712	U_JG_SWASH_KAF,
1713	U_JG_SYRIAC_WAW,
1714	U_JG_TAH,
1715	U_JG_TAW,
1716	U_JG_TEH_MARBUTA,
1717	U_JG_TETH,
1718	U_JG_WAW,
1719	U_JG_YEH,
1720	U_JG_YEH_BARREE,
1721	U_JG_YEH_WITH_TAIL,
1722	U_JG_YUDH,
1723	U_JG_YUDH_HE,
1724	U_JG_ZAIN,
1725	U_JG_FE, /< @stable ICU 2.6 /*
1726	U_JG_KHAPH, /< @stable ICU 2.6 /*
1727	U_JG_ZHAIN, /< @stable ICU 2.6 /*
1728	U_JG_BURUSHASKI_YEH_BARREE, /< @stable ICU 4.0 /*
1729	U_JG_FARSI_YEH, /< @stable ICU 4.4 /*
1730	U_JG_NYA, /< @stable ICU 4.4 /*
1731	U_JG_ROHINGYA_YEH, /< @stable ICU 49 /*
1732	U_JG_MANICHAEAN_ALEPH, /< @stable ICU 54 /*
1733	U_JG_MANICHAEAN_AYIN, /< @stable ICU 54 /*
1734	U_JG_MANICHAEAN_BETH, /< @stable ICU 54 /*
1735	U_JG_MANICHAEAN_DALETH, /< @stable ICU 54 /*
1736	U_JG_MANICHAEAN_DHAMEDH, /< @stable ICU 54 /*
1737	U_JG_MANICHAEAN_FIVE, /< @stable ICU 54 /*
1738	U_JG_MANICHAEAN_GIMEL, /< @stable ICU 54 /*
1739	U_JG_MANICHAEAN_HETH, /< @stable ICU 54 /*
1740	U_JG_MANICHAEAN_HUNDRED, /< @stable ICU 54 /*
1741	U_JG_MANICHAEAN_KAPH, /< @stable ICU 54 /*
1742	U_JG_MANICHAEAN_LAMEDH, /< @stable ICU 54 /*
1743	U_JG_MANICHAEAN_MEM, /< @stable ICU 54 /*
1744	U_JG_MANICHAEAN_NUN, /< @stable ICU 54 /*
1745	U_JG_MANICHAEAN_ONE, /< @stable ICU 54 /*
1746	U_JG_MANICHAEAN_PE, /< @stable ICU 54 /*
1747	U_JG_MANICHAEAN_QOPH, /< @stable ICU 54 /*
1748	U_JG_MANICHAEAN_RESH, /< @stable ICU 54 /*
1749	U_JG_MANICHAEAN_SADHE, /< @stable ICU 54 /*
1750	U_JG_MANICHAEAN_SAMEKH, /< @stable ICU 54 /*
1751	U_JG_MANICHAEAN_TAW, /< @stable ICU 54 /*
1752	U_JG_MANICHAEAN_TEN, /< @stable ICU 54 /*
1753	U_JG_MANICHAEAN_TETH, /< @stable ICU 54 /*
1754	U_JG_MANICHAEAN_THAMEDH, /< @stable ICU 54 /*
1755	U_JG_MANICHAEAN_TWENTY, /< @stable ICU 54 /*
1756	U_JG_MANICHAEAN_WAW, /< @stable ICU 54 /*
1757	U_JG_MANICHAEAN_YODH, /< @stable ICU 54 /*
1758	U_JG_MANICHAEAN_ZAYIN, /< @stable ICU 54 /*
1759	U_JG_STRAIGHT_WAW, /< @stable ICU 54 /*
1760	U_JG_COUNT
1761	} UJoiningGroup;
1762
1763	/**
1764	* Grapheme Cluster Break constants.
1765	*
1766	* @see UCHAR_GRAPHEME_CLUSTER_BREAK
1767	* @stable ICU 3.4
1768	*/
1769	typedef enum UGraphemeClusterBreak {
1770	/*
1771	* Note: UGraphemeClusterBreak constants are parsed by preparseucd.py.
1772	* It matches lines like
1773	* U_GCB_<Unicode Grapheme_Cluster_Break value name>
1774	*/
1775
1776	U_GCB_OTHER = `0`, /[XX]/
1777	U_GCB_CONTROL = `1`, /[CN]/
1778	U_GCB_CR = `2`, /[CR]/
1779	U_GCB_EXTEND = `3`, /[EX]/
1780	U_GCB_L = `4`, /[L]/
1781	U_GCB_LF = `5`, /[LF]/
1782	U_GCB_LV = `6`, /[LV]/
1783	U_GCB_LVT = `7`, /[LVT]/
1784	U_GCB_T = `8`, /[T]/
1785	U_GCB_V = `9`, /[V]/
1786	U_GCB_SPACING_MARK = `10`, /[SM]/ / from here on: new in Unicode 5.1/ICU 4.0 /
1787	U_GCB_PREPEND = `11`, /[PP]/
1788	U_GCB_REGIONAL_INDICATOR = `12`, /[RI]/ / new in Unicode 6.2/ICU 50 /
1789	U_GCB_COUNT = `13`
1790	} UGraphemeClusterBreak;
1791
1792	/**
1793	* Word Break constants.
1794	* (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)
1795	*
1796	* @see UCHAR_WORD_BREAK
1797	* @stable ICU 3.4
1798	*/
1799	typedef enum UWordBreakValues {
1800	/*
1801	* Note: UWordBreakValues constants are parsed by preparseucd.py.
1802	* It matches lines like
1803	* U_WB_<Unicode Word_Break value name>
1804	*/
1805
1806	U_WB_OTHER = `0`, /[XX]/
1807	U_WB_ALETTER = `1`, /[LE]/
1808	U_WB_FORMAT = `2`, /[FO]/
1809	U_WB_KATAKANA = `3`, /[KA]/
1810	U_WB_MIDLETTER = `4`, /[ML]/
1811	U_WB_MIDNUM = `5`, /[MN]/
1812	U_WB_NUMERIC = `6`, /[NU]/
1813	U_WB_EXTENDNUMLET = `7`, /[EX]/
1814	U_WB_CR = `8`, /[CR]/ / from here on: new in Unicode 5.1/ICU 4.0 /
1815	U_WB_EXTEND = `9`, /[Extend]/
1816	U_WB_LF = `10`, /[LF]/
1817	U_WB_MIDNUMLET =`11`, /[MB]/
1818	U_WB_NEWLINE =`12`, /[NL]/
1819	U_WB_REGIONAL_INDICATOR = `13`, /[RI]/ / new in Unicode 6.2/ICU 50 /
1820	U_WB_HEBREW_LETTER = `14`, /[HL]/ / from here on: new in Unicode 6.3/ICU 52 /
1821	U_WB_SINGLE_QUOTE = `15`, /[SQ]/
1822	U_WB_DOUBLE_QUOTE = `16`, /[DQ]/
1823	U_WB_COUNT = `17`
1824	} UWordBreakValues;
1825
1826	/**
1827	* Sentence Break constants.
1828	*
1829	* @see UCHAR_SENTENCE_BREAK
1830	* @stable ICU 3.4
1831	*/
1832	typedef enum USentenceBreak {
1833	/*
1834	* Note: USentenceBreak constants are parsed by preparseucd.py.
1835	* It matches lines like
1836	* U_SB_<Unicode Sentence_Break value name>
1837	*/
1838
1839	U_SB_OTHER = `0`, /[XX]/
1840	U_SB_ATERM = `1`, /[AT]/
1841	U_SB_CLOSE = `2`, /[CL]/
1842	U_SB_FORMAT = `3`, /[FO]/
1843	U_SB_LOWER = `4`, /[LO]/
1844	U_SB_NUMERIC = `5`, /[NU]/
1845	U_SB_OLETTER = `6`, /[LE]/
1846	U_SB_SEP = `7`, /[SE]/
1847	U_SB_SP = `8`, /[SP]/
1848	U_SB_STERM = `9`, /[ST]/
1849	U_SB_UPPER = `10`, /[UP]/
1850	U_SB_CR = `11`, /[CR]/ / from here on: new in Unicode 5.1/ICU 4.0 /
1851	U_SB_EXTEND = `12`, /[EX]/
1852	U_SB_LF = `13`, /[LF]/
1853	U_SB_SCONTINUE = `14`, /[SC]/
1854	U_SB_COUNT = `15`
1855	} USentenceBreak;
1856
1857	/**
1858	* Line Break constants.
1859	*
1860	* @see UCHAR_LINE_BREAK
1861	* @stable ICU 2.2
1862	*/
1863	typedef enum ULineBreak {
1864	/*
1865	* Note: ULineBreak constants are parsed by preparseucd.py.
1866	* It matches lines like
1867	* U_LB_<Unicode Line_Break value name>
1868	*/
1869
1870	U_LB_UNKNOWN = `0`, /[XX]/
1871	U_LB_AMBIGUOUS = `1`, /[AI]/
1872	U_LB_ALPHABETIC = `2`, /[AL]/
1873	U_LB_BREAK_BOTH = `3`, /[B2]/
1874	U_LB_BREAK_AFTER = `4`, /[BA]/
1875	U_LB_BREAK_BEFORE = `5`, /[BB]/
1876	U_LB_MANDATORY_BREAK = `6`, /[BK]/
1877	U_LB_CONTINGENT_BREAK = `7`, /[CB]/
1878	U_LB_CLOSE_PUNCTUATION = `8`, /[CL]/
1879	U_LB_COMBINING_MARK = `9`, /[CM]/
1880	U_LB_CARRIAGE_RETURN = `10`, /[CR]/
1881	U_LB_EXCLAMATION = `11`, /[EX]/
1882	U_LB_GLUE = `12`, /[GL]/
1883	U_LB_HYPHEN = `13`, /[HY]/
1884	U_LB_IDEOGRAPHIC = `14`, /[ID]/
1885	/* Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 /
1886	U_LB_INSEPARABLE = `15`, /[IN]/
1887	U_LB_INSEPERABLE = U_LB_INSEPARABLE,
1888	U_LB_INFIX_NUMERIC = `16`, /[IS]/
1889	U_LB_LINE_FEED = `17`, /[LF]/
1890	U_LB_NONSTARTER = `18`, /[NS]/
1891	U_LB_NUMERIC = `19`, /[NU]/
1892	U_LB_OPEN_PUNCTUATION = `20`, /[OP]/
1893	U_LB_POSTFIX_NUMERIC = `21`, /[PO]/
1894	U_LB_PREFIX_NUMERIC = `22`, /[PR]/
1895	U_LB_QUOTATION = `23`, /[QU]/
1896	U_LB_COMPLEX_CONTEXT = `24`, /[SA]/
1897	U_LB_SURROGATE = `25`, /[SG]/
1898	U_LB_SPACE = `26`, /[SP]/
1899	U_LB_BREAK_SYMBOLS = `27`, /[SY]/
1900	U_LB_ZWSPACE = `28`, /[ZW]/
1901	U_LB_NEXT_LINE = `29`, /[NL]/ / from here on: new in Unicode 4/ICU 2.6 /
1902	U_LB_WORD_JOINER = `30`, /[WJ]/
1903	U_LB_H2 = `31`, /[H2]/ / from here on: new in Unicode 4.1/ICU 3.4 /
1904	U_LB_H3 = `32`, /[H3]/
1905	U_LB_JL = `33`, /[JL]/
1906	U_LB_JT = `34`, /[JT]/
1907	U_LB_JV = `35`, /[JV]/
1908	U_LB_CLOSE_PARENTHESIS = `36`, /[CP]/ / new in Unicode 5.2/ICU 4.4 /
1909	U_LB_CONDITIONAL_JAPANESE_STARTER = `37`,/[CJ]/ / new in Unicode 6.1/ICU 49 /
1910	U_LB_HEBREW_LETTER = `38`, /[HL]/ / new in Unicode 6.1/ICU 49 /
1911	U_LB_REGIONAL_INDICATOR = `39`,/[RI]/ / new in Unicode 6.2/ICU 50 /
1912	U_LB_COUNT = `40`
1913	} ULineBreak;
1914
1915	/**
1916	* Numeric Type constants.
1917	*
1918	* @see UCHAR_NUMERIC_TYPE
1919	* @stable ICU 2.2
1920	*/
1921	typedef enum UNumericType {
1922	/*
1923	* Note: UNumericType constants are parsed by preparseucd.py.
1924	* It matches lines like
1925	* U_NT_<Unicode Numeric_Type value name>
1926	*/
1927
1928	U_NT_NONE, /[None]/
1929	U_NT_DECIMAL, /[de]/
1930	U_NT_DIGIT, /[di]/
1931	U_NT_NUMERIC, /[nu]/
1932	U_NT_COUNT
1933	} UNumericType;
1934
1935	/**
1936	* Hangul Syllable Type constants.
1937	*
1938	* @see UCHAR_HANGUL_SYLLABLE_TYPE
1939	* @stable ICU 2.6
1940	*/
1941	typedef enum UHangulSyllableType {
1942	/*
1943	* Note: UHangulSyllableType constants are parsed by preparseucd.py.
1944	* It matches lines like
1945	* U_HST_<Unicode Hangul_Syllable_Type value name>
1946	*/
1947
1948	U_HST_NOT_APPLICABLE, /[NA]/
1949	U_HST_LEADING_JAMO, /[L]/
1950	U_HST_VOWEL_JAMO, /[V]/
1951	U_HST_TRAILING_JAMO, /[T]/
1952	U_HST_LV_SYLLABLE, /[LV]/
1953	U_HST_LVT_SYLLABLE, /[LVT]/
1954	U_HST_COUNT
1955	} UHangulSyllableType;
1956
1957	/**
1958	* Check a binary Unicode property for a code point.
1959	*
1960	* Unicode, especially in version 3.2, defines many more properties than the
1961	* original set in UnicodeData.txt.
1962	*
1963	* The properties APIs are intended to reflect Unicode properties as defined
1964	* in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
1965	* For details about the properties see http://www.unicode.org/ucd/ .
1966	* For names of Unicode properties see the UCD file PropertyAliases.txt.
1967	*
1968	* Important: If ICU is built with UCD files from Unicode versions below 3.2,
1969	* then properties marked with "new in Unicode 3.2" are not or not fully available.
1970	*
1971	* @param c Code point to test.
1972	* @param which UProperty selector constant, identifies which binary property to check.
1973	* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT.
1974	* @return TRUE or FALSE according to the binary Unicode property value for c.
1975	* Also FALSE if 'which' is out of bounds or if the Unicode version
1976	* does not have data for the property at all, or not for this code point.
1977	*
1978	* @see UProperty
1979	* @see u_getIntPropertyValue
1980	* @see u_getUnicodeVersion
1981	* @stable ICU 2.1
1982	*/
1983	U_STABLE UBool U_EXPORT2
1984	u_hasBinaryProperty(UChar32 c, UProperty which);
1985
1986	/**
1987	* Check if a code point has the Alphabetic Unicode property.
1988	* Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).
1989	* This is different from u_isalpha!
1990	* @param c Code point to test
1991	* @return true if the code point has the Alphabetic Unicode property, false otherwise
1992	*
1993	* @see UCHAR_ALPHABETIC
1994	* @see u_isalpha
1995	* @see u_hasBinaryProperty
1996	* @stable ICU 2.1
1997	*/
1998	U_STABLE UBool U_EXPORT2
1999	u_isUAlphabetic(UChar32 c);
2000
2001	/**
2002	* Check if a code point has the Lowercase Unicode property.
2003	* Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE).
2004	* This is different from u_islower!
2005	* @param c Code point to test
2006	* @return true if the code point has the Lowercase Unicode property, false otherwise
2007	*
2008	* @see UCHAR_LOWERCASE
2009	* @see u_islower
2010	* @see u_hasBinaryProperty
2011	* @stable ICU 2.1
2012	*/
2013	U_STABLE UBool U_EXPORT2
2014	u_isULowercase(UChar32 c);
2015
2016	/**
2017	* Check if a code point has the Uppercase Unicode property.
2018	* Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE).
2019	* This is different from u_isupper!
2020	* @param c Code point to test
2021	* @return true if the code point has the Uppercase Unicode property, false otherwise
2022	*
2023	* @see UCHAR_UPPERCASE
2024	* @see u_isupper
2025	* @see u_hasBinaryProperty
2026	* @stable ICU 2.1
2027	*/
2028	U_STABLE UBool U_EXPORT2
2029	u_isUUppercase(UChar32 c);
2030
2031	/**
2032	* Check if a code point has the White_Space Unicode property.
2033	* Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE).
2034	* This is different from both u_isspace and u_isWhitespace!
2035	*
2036	* Note: There are several ICU whitespace functions; please see the uchar.h
2037	* file documentation for a detailed comparison.
2038	*
2039	* @param c Code point to test
2040	* @return true if the code point has the White_Space Unicode property, false otherwise.
2041	*
2042	* @see UCHAR_WHITE_SPACE
2043	* @see u_isWhitespace
2044	* @see u_isspace
2045	* @see u_isJavaSpaceChar
2046	* @see u_hasBinaryProperty
2047	* @stable ICU 2.1
2048	*/
2049	U_STABLE UBool U_EXPORT2
2050	u_isUWhiteSpace(UChar32 c);
2051
2052	/**
2053	* Get the property value for an enumerated or integer Unicode property for a code point.
2054	* Also returns binary and mask property values.
2055	*
2056	* Unicode, especially in version 3.2, defines many more properties than the
2057	* original set in UnicodeData.txt.
2058	*
2059	* The properties APIs are intended to reflect Unicode properties as defined
2060	* in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
2061	* For details about the properties see http://www.unicode.org/ .
2062	* For names of Unicode properties see the UCD file PropertyAliases.txt.
2063	*
2064	* Sample usage:
2065	* UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH);
2066	* UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC);
2067	*
2068	* @param c Code point to test.
2069	* @param which UProperty selector constant, identifies which property to check.
2070	* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
2071	* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
2072	* or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
2073	* @return Numeric value that is directly the property value or,
2074	* for enumerated properties, corresponds to the numeric value of the enumerated
2075	* constant of the respective property value enumeration type
2076	* (cast to enum type if necessary).
2077	* Returns 0 or 1 (for FALSE/TRUE) for binary Unicode properties.
2078	* Returns a bit-mask for mask properties.
2079	* Returns 0 if 'which' is out of bounds or if the Unicode version
2080	* does not have data for the property at all, or not for this code point.
2081	*
2082	* @see UProperty
2083	* @see u_hasBinaryProperty
2084	* @see u_getIntPropertyMinValue
2085	* @see u_getIntPropertyMaxValue
2086	* @see u_getUnicodeVersion
2087	* @stable ICU 2.2
2088	*/
2089	U_STABLE int32_t U_EXPORT2
2090	u_getIntPropertyValue(UChar32 c, UProperty which);
2091
2092	/**
2093	* Get the minimum value for an enumerated/integer/binary Unicode property.
2094	* Can be used together with u_getIntPropertyMaxValue
2095	* to allocate arrays of UnicodeSet or similar.
2096	*
2097	* @param which UProperty selector constant, identifies which binary property to check.
2098	* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
2099	* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
2100	* @return Minimum value returned by u_getIntPropertyValue for a Unicode property.
2101	* 0 if the property selector is out of range.
2102	*
2103	* @see UProperty
2104	* @see u_hasBinaryProperty
2105	* @see u_getUnicodeVersion
2106	* @see u_getIntPropertyMaxValue
2107	* @see u_getIntPropertyValue
2108	* @stable ICU 2.2
2109	*/
2110	U_STABLE int32_t U_EXPORT2
2111	u_getIntPropertyMinValue(UProperty which);
2112
2113	/**
2114	* Get the maximum value for an enumerated/integer/binary Unicode property.
2115	* Can be used together with u_getIntPropertyMinValue
2116	* to allocate arrays of UnicodeSet or similar.
2117	*
2118	* Examples for min/max values (for Unicode 3.2):
2119	*
2120	* - UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL)
2121	* - UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA)
2122	* - UCHAR_IDEOGRAPHIC: 0/1 (FALSE/TRUE)
2123	*
2124	* For undefined UProperty constant values, min/max values will be 0/-1.
2125	*
2126	* @param which UProperty selector constant, identifies which binary property to check.
2127	* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
2128	* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
2129	* @return Maximum value returned by u_getIntPropertyValue for a Unicode property.
2130	* <=0 if the property selector is out of range.
2131	*
2132	* @see UProperty
2133	* @see u_hasBinaryProperty
2134	* @see u_getUnicodeVersion
2135	* @see u_getIntPropertyMaxValue
2136	* @see u_getIntPropertyValue
2137	* @stable ICU 2.2
2138	*/
2139	U_STABLE int32_t U_EXPORT2
2140	u_getIntPropertyMaxValue(UProperty which);
2141
2142	/**
2143	* Get the numeric value for a Unicode code point as defined in the
2144	* Unicode Character Database.
2145	*
2146	* A "double" return type is necessary because
2147	* some numeric values are fractions, negative, or too large for int32_t.
2148	*
2149	* For characters without any numeric values in the Unicode Character Database,
2150	* this function will return U_NO_NUMERIC_VALUE.
2151	* Note: This is different from the Unicode Standard which specifies NaN as the default value.
2152	* (NaN is not available on all platforms.)
2153	*
2154	* Similar to java.lang.Character.getNumericValue(), but u_getNumericValue()
2155	* also supports negative values, large values, and fractions,
2156	* while Java's getNumericValue() returns values 10..35 for ASCII letters.
2157	*
2158	* @param c Code point to get the numeric value for.
2159	* @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.
2160	*
2161	* @see U_NO_NUMERIC_VALUE
2162	* @stable ICU 2.2
2163	*/
2164	U_STABLE double U_EXPORT2
2165	u_getNumericValue(UChar32 c);
2166
2167	/**
2168	* Special value that is returned by u_getNumericValue when
2169	* no numeric value is defined for a code point.
2170	*
2171	* @see u_getNumericValue
2172	* @stable ICU 2.2
2173	*/
2174	#define U_NO_NUMERIC_VALUE ((double)-123456789.)
2175
2176	/**
2177	* Determines whether the specified code point has the general category "Ll"
2178	* (lowercase letter).
2179	*
2180	* Same as java.lang.Character.isLowerCase().
2181	*
2182	* This misses some characters that are also lowercase but
2183	* have a different general category value.
2184	* In order to include those, use UCHAR_LOWERCASE.
2185	*
2186	* In addition to being equivalent to a Java function, this also serves
2187	* as a C/POSIX migration function.
2188	* See the comments about C/POSIX character classification functions in the
2189	* documentation at the top of this header file.
2190	*
2191	* @param c the code point to be tested
2192	* @return TRUE if the code point is an Ll lowercase letter
2193	*
2194	* @see UCHAR_LOWERCASE
2195	* @see u_isupper
2196	* @see u_istitle
2197	* @stable ICU 2.0
2198	*/
2199	U_STABLE UBool U_EXPORT2
2200	u_islower(UChar32 c);
2201
2202	/**
2203	* Determines whether the specified code point has the general category "Lu"
2204	* (uppercase letter).
2205	*
2206	* Same as java.lang.Character.isUpperCase().
2207	*
2208	* This misses some characters that are also uppercase but
2209	* have a different general category value.
2210	* In order to include those, use UCHAR_UPPERCASE.
2211	*
2212	* In addition to being equivalent to a Java function, this also serves
2213	* as a C/POSIX migration function.
2214	* See the comments about C/POSIX character classification functions in the
2215	* documentation at the top of this header file.
2216	*
2217	* @param c the code point to be tested
2218	* @return TRUE if the code point is an Lu uppercase letter
2219	*
2220	* @see UCHAR_UPPERCASE
2221	* @see u_islower
2222	* @see u_istitle
2223	* @see u_tolower
2224	* @stable ICU 2.0
2225	*/
2226	U_STABLE UBool U_EXPORT2
2227	u_isupper(UChar32 c);
2228
2229	/**
2230	* Determines whether the specified code point is a titlecase letter.
2231	* True for general category "Lt" (titlecase letter).
2232	*
2233	* Same as java.lang.Character.isTitleCase().
2234	*
2235	* @param c the code point to be tested
2236	* @return TRUE if the code point is an Lt titlecase letter
2237	*
2238	* @see u_isupper
2239	* @see u_islower
2240	* @see u_totitle
2241	* @stable ICU 2.0
2242	*/
2243	U_STABLE UBool U_EXPORT2
2244	u_istitle(UChar32 c);
2245
2246	/**
2247	* Determines whether the specified code point is a digit character according to Java.
2248	* True for characters with general category "Nd" (decimal digit numbers).
2249	* Beginning with Unicode 4, this is the same as
2250	* testing for the Numeric_Type of Decimal.
2251	*
2252	* Same as java.lang.Character.isDigit().
2253	*
2254	* In addition to being equivalent to a Java function, this also serves
2255	* as a C/POSIX migration function.
2256	* See the comments about C/POSIX character classification functions in the
2257	* documentation at the top of this header file.
2258	*
2259	* @param c the code point to be tested
2260	* @return TRUE if the code point is a digit character according to Character.isDigit()
2261	*
2262	* @stable ICU 2.0
2263	*/
2264	U_STABLE UBool U_EXPORT2
2265	u_isdigit(UChar32 c);
2266
2267	/**
2268	* Determines whether the specified code point is a letter character.
2269	* True for general categories "L" (letters).
2270	*
2271	* Same as java.lang.Character.isLetter().
2272	*
2273	* In addition to being equivalent to a Java function, this also serves
2274	* as a C/POSIX migration function.
2275	* See the comments about C/POSIX character classification functions in the
2276	* documentation at the top of this header file.
2277	*
2278	* @param c the code point to be tested
2279	* @return TRUE if the code point is a letter character
2280	*
2281	* @see u_isdigit
2282	* @see u_isalnum
2283	* @stable ICU 2.0
2284	*/
2285	U_STABLE UBool U_EXPORT2
2286	u_isalpha(UChar32 c);
2287
2288	/**
2289	* Determines whether the specified code point is an alphanumeric character
2290	* (letter or digit) according to Java.
2291	* True for characters with general categories
2292	* "L" (letters) and "Nd" (decimal digit numbers).
2293	*
2294	* Same as java.lang.Character.isLetterOrDigit().
2295	*
2296	* In addition to being equivalent to a Java function, this also serves
2297	* as a C/POSIX migration function.
2298	* See the comments about C/POSIX character classification functions in the
2299	* documentation at the top of this header file.
2300	*
2301	* @param c the code point to be tested
2302	* @return TRUE if the code point is an alphanumeric character according to Character.isLetterOrDigit()
2303	*
2304	* @stable ICU 2.0
2305	*/
2306	U_STABLE UBool U_EXPORT2
2307	u_isalnum(UChar32 c);
2308
2309	/**
2310	* Determines whether the specified code point is a hexadecimal digit.
2311	* This is equivalent to u_digit(c, 16)>=0.
2312	* True for characters with general category "Nd" (decimal digit numbers)
2313	* as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII.
2314	* (That is, for letters with code points
2315	* 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)
2316	*
2317	* In order to narrow the definition of hexadecimal digits to only ASCII
2318	* characters, use (c<=0x7f && u_isxdigit(c)).
2319	*
2320	* This is a C/POSIX migration function.
2321	* See the comments about C/POSIX character classification functions in the
2322	* documentation at the top of this header file.
2323	*
2324	* @param c the code point to be tested
2325	* @return TRUE if the code point is a hexadecimal digit
2326	*
2327	* @stable ICU 2.6
2328	*/
2329	U_STABLE UBool U_EXPORT2
2330	u_isxdigit(UChar32 c);
2331
2332	/**
2333	* Determines whether the specified code point is a punctuation character.
2334	* True for characters with general categories "P" (punctuation).
2335	*
2336	* This is a C/POSIX migration function.
2337	* See the comments about C/POSIX character classification functions in the
2338	* documentation at the top of this header file.
2339	*
2340	* @param c the code point to be tested
2341	* @return TRUE if the code point is a punctuation character
2342	*
2343	* @stable ICU 2.6
2344	*/
2345	U_STABLE UBool U_EXPORT2
2346	u_ispunct(UChar32 c);
2347
2348	/**
2349	* Determines whether the specified code point is a "graphic" character
2350	* (printable, excluding spaces).
2351	* TRUE for all characters except those with general categories
2352	* "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates),
2353	* "Cn" (unassigned), and "Z" (separators).
2354	*
2355	* This is a C/POSIX migration function.
2356	* See the comments about C/POSIX character classification functions in the
2357	* documentation at the top of this header file.
2358	*
2359	* @param c the code point to be tested
2360	* @return TRUE if the code point is a "graphic" character
2361	*
2362	* @stable ICU 2.6
2363	*/
2364	U_STABLE UBool U_EXPORT2
2365	u_isgraph(UChar32 c);
2366
2367	/**
2368	* Determines whether the specified code point is a "blank" or "horizontal space",
2369	* a character that visibly separates words on a line.
2370	* The following are equivalent definitions:
2371	*
2372	* TRUE for Unicode White_Space characters except for "vertical space controls"
2373	* where "vertical space controls" are the following characters:
2374	* U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS)
2375	*
2376	* same as
2377	*
2378	* TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators)
2379	* except Zero Width Space (ZWSP, U+200B).
2380	*
2381	* Note: There are several ICU whitespace functions; please see the uchar.h
2382	* file documentation for a detailed comparison.
2383	*
2384	* This is a C/POSIX migration function.
2385	* See the comments about C/POSIX character classification functions in the
2386	* documentation at the top of this header file.
2387	*
2388	* @param c the code point to be tested
2389	* @return TRUE if the code point is a "blank"
2390	*
2391	* @stable ICU 2.6
2392	*/
2393	U_STABLE UBool U_EXPORT2
2394	u_isblank(UChar32 c);
2395
2396	/**
2397	* Determines whether the specified code point is "defined",
2398	* which usually means that it is assigned a character.
2399	* True for general categories other than "Cn" (other, not assigned),
2400	* i.e., true for all code points mentioned in UnicodeData.txt.
2401	*
2402	* Note that non-character code points (e.g., U+FDD0) are not "defined"
2403	* (they are Cn), but surrogate code points are "defined" (Cs).
2404	*
2405	* Same as java.lang.Character.isDefined().
2406	*
2407	* @param c the code point to be tested
2408	* @return TRUE if the code point is assigned a character
2409	*
2410	* @see u_isdigit
2411	* @see u_isalpha
2412	* @see u_isalnum
2413	* @see u_isupper
2414	* @see u_islower
2415	* @see u_istitle
2416	* @stable ICU 2.0
2417	*/
2418	U_STABLE UBool U_EXPORT2
2419	u_isdefined(UChar32 c);
2420
2421	/**
2422	* Determines if the specified character is a space character or not.
2423	*
2424	* Note: There are several ICU whitespace functions; please see the uchar.h
2425	* file documentation for a detailed comparison.
2426	*
2427	* This is a C/POSIX migration function.
2428	* See the comments about C/POSIX character classification functions in the
2429	* documentation at the top of this header file.
2430	*
2431	* @param c the character to be tested
2432	* @return true if the character is a space character; false otherwise.
2433	*
2434	* @see u_isJavaSpaceChar
2435	* @see u_isWhitespace
2436	* @see u_isUWhiteSpace
2437	* @stable ICU 2.0
2438	*/
2439	U_STABLE UBool U_EXPORT2
2440	u_isspace(UChar32 c);
2441
2442	/**
2443	* Determine if the specified code point is a space character according to Java.
2444	* True for characters with general categories "Z" (separators),
2445	* which does not include control codes (e.g., TAB or Line Feed).
2446	*
2447	* Same as java.lang.Character.isSpaceChar().
2448	*
2449	* Note: There are several ICU whitespace functions; please see the uchar.h
2450	* file documentation for a detailed comparison.
2451	*
2452	* @param c the code point to be tested
2453	* @return TRUE if the code point is a space character according to Character.isSpaceChar()
2454	*
2455	* @see u_isspace
2456	* @see u_isWhitespace
2457	* @see u_isUWhiteSpace
2458	* @stable ICU 2.6
2459	*/
2460	U_STABLE UBool U_EXPORT2
2461	u_isJavaSpaceChar(UChar32 c);
2462
2463	/**
2464	* Determines if the specified code point is a whitespace character according to Java/ICU.
2465	* A character is considered to be a Java whitespace character if and only
2466	* if it satisfies one of the following criteria:
2467	*
2468	* - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
2469	* also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
2470	* - It is U+0009 HORIZONTAL TABULATION.
2471	* - It is U+000A LINE FEED.
2472	* - It is U+000B VERTICAL TABULATION.
2473	* - It is U+000C FORM FEED.
2474	* - It is U+000D CARRIAGE RETURN.
2475	* - It is U+001C FILE SEPARATOR.
2476	* - It is U+001D GROUP SEPARATOR.
2477	* - It is U+001E RECORD SEPARATOR.
2478	* - It is U+001F UNIT SEPARATOR.
2479	*
2480	* This API tries to sync with the semantics of Java's
2481	* java.lang.Character.isWhitespace(), but it may not return
2482	* the exact same results because of the Unicode version
2483	* difference.
2484	*
2485	* Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
2486	* to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
2487	* See http://www.unicode.org/versions/Unicode4.0.1/
2488	*
2489	* Note: There are several ICU whitespace functions; please see the uchar.h
2490	* file documentation for a detailed comparison.
2491	*
2492	* @param c the code point to be tested
2493	* @return TRUE if the code point is a whitespace character according to Java/ICU
2494	*
2495	* @see u_isspace
2496	* @see u_isJavaSpaceChar
2497	* @see u_isUWhiteSpace
2498	* @stable ICU 2.0
2499	*/
2500	U_STABLE UBool U_EXPORT2
2501	u_isWhitespace(UChar32 c);
2502
2503	/**
2504	* Determines whether the specified code point is a control character
2505	* (as defined by this function).
2506	* A control character is one of the following:
2507	* - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
2508	* - U_CONTROL_CHAR (Cc)
2509	* - U_FORMAT_CHAR (Cf)
2510	* - U_LINE_SEPARATOR (Zl)
2511	* - U_PARAGRAPH_SEPARATOR (Zp)
2512	*
2513	* This is a C/POSIX migration function.
2514	* See the comments about C/POSIX character classification functions in the
2515	* documentation at the top of this header file.
2516	*
2517	* @param c the code point to be tested
2518	* @return TRUE if the code point is a control character
2519	*
2520	* @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
2521	* @see u_isprint
2522	* @stable ICU 2.0
2523	*/
2524	U_STABLE UBool U_EXPORT2
2525	u_iscntrl(UChar32 c);
2526
2527	/**
2528	* Determines whether the specified code point is an ISO control code.
2529	* True for U+0000..U+001f and U+007f..U+009f (general category "Cc").
2530	*
2531	* Same as java.lang.Character.isISOControl().
2532	*
2533	* @param c the code point to be tested
2534	* @return TRUE if the code point is an ISO control code
2535	*
2536	* @see u_iscntrl
2537	* @stable ICU 2.6
2538	*/
2539	U_STABLE UBool U_EXPORT2
2540	u_isISOControl(UChar32 c);
2541
2542	/**
2543	* Determines whether the specified code point is a printable character.
2544	* True for general categories <em>other</em> than "C" (controls).
2545	*
2546	* This is a C/POSIX migration function.
2547	* See the comments about C/POSIX character classification functions in the
2548	* documentation at the top of this header file.
2549	*
2550	* @param c the code point to be tested
2551	* @return TRUE if the code point is a printable character
2552	*
2553	* @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
2554	* @see u_iscntrl
2555	* @stable ICU 2.0
2556	*/
2557	U_STABLE UBool U_EXPORT2
2558	u_isprint(UChar32 c);
2559
2560	/**
2561	* Determines whether the specified code point is a base character.
2562	* True for general categories "L" (letters), "N" (numbers),
2563	* "Mc" (spacing combining marks), and "Me" (enclosing marks).
2564	*
2565	* Note that this is different from the Unicode definition in
2566	* chapter 3.5, conformance clause D13,
2567	* which defines base characters to be all characters (not Cn)
2568	* that do not graphically combine with preceding characters (M)
2569	* and that are neither control (Cc) or format (Cf) characters.
2570	*
2571	* @param c the code point to be tested
2572	* @return TRUE if the code point is a base character according to this function
2573	*
2574	* @see u_isalpha
2575	* @see u_isdigit
2576	* @stable ICU 2.0
2577	*/
2578	U_STABLE UBool U_EXPORT2
2579	u_isbase(UChar32 c);
2580
2581	/**
2582	* Returns the bidirectional category value for the code point,
2583	* which is used in the Unicode bidirectional algorithm
2584	* (UAX #9 http://www.unicode.org/reports/tr9/).
2585	* Note that some <em>unassigned</em> code points have bidi values
2586	* of R or AL because they are in blocks that are reserved
2587	* for Right-To-Left scripts.
2588	*
2589	* Same as java.lang.Character.getDirectionality()
2590	*
2591	* @param c the code point to be tested
2592	* @return the bidirectional category (UCharDirection) value
2593	*
2594	* @see UCharDirection
2595	* @stable ICU 2.0
2596	*/
2597	U_STABLE UCharDirection U_EXPORT2
2598	u_charDirection(UChar32 c);
2599
2600	/**
2601	* Determines whether the code point has the Bidi_Mirrored property.
2602	* This property is set for characters that are commonly used in
2603	* Right-To-Left contexts and need to be displayed with a "mirrored"
2604	* glyph.
2605	*
2606	* Same as java.lang.Character.isMirrored().
2607	* Same as UCHAR_BIDI_MIRRORED
2608	*
2609	* @param c the code point to be tested
2610	* @return TRUE if the character has the Bidi_Mirrored property
2611	*
2612	* @see UCHAR_BIDI_MIRRORED
2613	* @stable ICU 2.0
2614	*/
2615	U_STABLE UBool U_EXPORT2
2616	u_isMirrored(UChar32 c);
2617
2618	/**
2619	* Maps the specified character to a "mirror-image" character.
2620	* For characters with the Bidi_Mirrored property, implementations
2621	* sometimes need a "poor man's" mapping to another Unicode
2622	* character (code point) such that the default glyph may serve
2623	* as the mirror-image of the default glyph of the specified
2624	* character. This is useful for text conversion to and from
2625	* codepages with visual order, and for displays without glyph
2626	* selection capabilities.
2627	*
2628	* @param c the code point to be mapped
2629	* @return another Unicode code point that may serve as a mirror-image
2630	* substitute, or c itself if there is no such mapping or c
2631	* does not have the Bidi_Mirrored property
2632	*
2633	* @see UCHAR_BIDI_MIRRORED
2634	* @see u_isMirrored
2635	* @stable ICU 2.0
2636	*/
2637	U_STABLE UChar32 U_EXPORT2
2638	u_charMirror(UChar32 c);
2639
2640	/**
2641	* Maps the specified character to its paired bracket character.
2642	* For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror().
2643	* Otherwise c itself is returned.
2644	* See http://www.unicode.org/reports/tr9/
2645	*
2646	* @param c the code point to be mapped
2647	* @return the paired bracket code point,
2648	* or c itself if there is no such mapping
2649	* (Bidi_Paired_Bracket_Type=None)
2650	*
2651	* @see UCHAR_BIDI_PAIRED_BRACKET
2652	* @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
2653	* @see u_charMirror
2654	* @stable ICU 52
2655	*/
2656	U_STABLE UChar32 U_EXPORT2
2657	u_getBidiPairedBracket(UChar32 c);
2658
2659	/**
2660	* Returns the general category value for the code point.
2661	*
2662	* Same as java.lang.Character.getType().
2663	*
2664	* @param c the code point to be tested
2665	* @return the general category (UCharCategory) value
2666	*
2667	* @see UCharCategory
2668	* @stable ICU 2.0
2669	*/
2670	U_STABLE int8_t U_EXPORT2
2671	u_charType(UChar32 c);
2672
2673	/**
2674	* Get a single-bit bit set for the general category of a character.
2675	* This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc.
2676	* Same as U_MASK(u_charType(c)).
2677	*
2678	* @param c the code point to be tested
2679	* @return a single-bit mask corresponding to the general category (UCharCategory) value
2680	*
2681	* @see u_charType
2682	* @see UCharCategory
2683	* @see U_GC_CN_MASK
2684	* @stable ICU 2.1
2685	*/
2686	#define U_GET_GC_MASK(c) U_MASK(u_charType(c))
2687
2688	/**
2689	* Callback from u_enumCharTypes(), is called for each contiguous range
2690	* of code points c (where start<=c<limit)
2691	* with the same Unicode general category ("character type").
2692	*
2693	* The callback function can stop the enumeration by returning FALSE.
2694	*
2695	* @param context an opaque pointer, as passed into utrie_enum()
2696	* @param start the first code point in a contiguous range with value
2697	* @param limit one past the last code point in a contiguous range with value
2698	* @param type the general category for all code points in [start..limit[
2699	* @return FALSE to stop the enumeration
2700	*
2701	* @stable ICU 2.1
2702	* @see UCharCategory
2703	* @see u_enumCharTypes
2704	*/
2705	typedef UBool U_CALLCONV
2706	UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
2707
2708	/**
2709	* Enumerate efficiently all code points with their Unicode general categories.
2710	*
2711	* This is useful for building data structures (e.g., UnicodeSet's),
2712	* for enumerating all assigned code points (type!=U_UNASSIGNED), etc.
2713	*
2714	* For each contiguous range of code points with a given general category ("character type"),
2715	* the UCharEnumTypeRange function is called.
2716	* Adjacent ranges have different types.
2717	* The Unicode Standard guarantees that the numeric value of the type is 0..31.
2718	*
2719	* @param enumRange a pointer to a function that is called for each contiguous range
2720	* of code points with the same general category
2721	* @param context an opaque pointer that is passed on to the callback function
2722	*
2723	* @stable ICU 2.1
2724	* @see UCharCategory
2725	* @see UCharEnumTypeRange
2726	*/
2727	U_STABLE void U_EXPORT2
2728	u_enumCharTypes(UCharEnumTypeRange enumRange, const* void *context);
2729
2730	#if !UCONFIG_NO_NORMALIZATION
2731
2732	/**
2733	* Returns the combining class of the code point as specified in UnicodeData.txt.
2734	*
2735	* @param c the code point of the character
2736	* @return the combining class of the character
2737	* @stable ICU 2.0
2738	*/
2739	U_STABLE uint8_t U_EXPORT2
2740	u_getCombiningClass(UChar32 c);
2741
2742	#endif
2743
2744	/**
2745	* Returns the decimal digit value of a decimal digit character.
2746	* Such characters have the general category "Nd" (decimal digit numbers)
2747	* and a Numeric_Type of Decimal.
2748	*
2749	* Unlike ICU releases before 2.6, no digit values are returned for any
2750	* Han characters because Han number characters are often used with a special
2751	* Chinese-style number format (with characters for powers of 10 in between)
2752	* instead of in decimal-positional notation.
2753	* Unicode 4 explicitly assigns Han number characters the Numeric_Type
2754	* Numeric instead of Decimal.
2755	* See Jitterbug 1483 for more details.
2756	*
2757	* Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue()
2758	* for complete numeric Unicode properties.
2759	*
2760	* @param c the code point for which to get the decimal digit value
2761	* @return the decimal digit value of c,
2762	* or -1 if c is not a decimal digit character
2763	*
2764	* @see u_getNumericValue
2765	* @stable ICU 2.0
2766	*/
2767	U_STABLE int32_t U_EXPORT2
2768	u_charDigitValue(UChar32 c);
2769
2770	/**
2771	* Returns the Unicode allocation block that contains the character.
2772	*
2773	* @param c the code point to be tested
2774	* @return the block value (UBlockCode) for c
2775	*
2776	* @see UBlockCode
2777	* @stable ICU 2.0
2778	*/
2779	U_STABLE UBlockCode U_EXPORT2
2780	ublock_getCode(UChar32 c);
2781
2782	/**
2783	* Retrieve the name of a Unicode character.
2784	* Depending on <code>nameChoice</code>, the character name written
2785	* into the buffer is the "modern" name or the name that was defined
2786	* in Unicode version 1.0.
2787	* The name contains only "invariant" characters
2788	* like A-Z, 0-9, space, and '-'.
2789	* Unicode 1.0 names are only retrieved if they are different from the modern
2790	* names and if the data file contains the data for them. gennames may or may
2791	* not be called with a command line option to include 1.0 names in unames.dat.
2792	*
2793	* @param code The character (code point) for which to get the name.
2794	* It must be <code>0<=code<=0x10ffff</code>.
2795	* @param nameChoice Selector for which name to get.
2796	* @param buffer Destination address for copying the name.
2797	* The name will always be zero-terminated.
2798	* If there is no name, then the buffer will be set to the empty string.
2799	* @param bufferLength <code>==sizeof(buffer)</code>
2800	* @param pErrorCode Pointer to a UErrorCode variable;
2801	* check for <code>U_SUCCESS()</code> after <code>u_charName()</code>
2802	* returns.
2803	* @return The length of the name, or 0 if there is no name for this character.
2804	* If the bufferLength is less than or equal to the length, then the buffer
2805	* contains the truncated name and the returned length indicates the full
2806	* length of the name.
2807	* The length does not include the zero-termination.
2808	*
2809	* @see UCharNameChoice
2810	* @see u_charFromName
2811	* @see u_enumCharNames
2812	* @stable ICU 2.0
2813	*/
2814	U_STABLE int32_t U_EXPORT2
2815	u_charName(UChar32 code, UCharNameChoice nameChoice,
2816	char *buffer, int32_t bufferLength,
2817	UErrorCode *pErrorCode);
2818
2819	#ifndef U_HIDE_DEPRECATED_API
2820	/**
2821	* Returns an empty string.
2822	* Used to return the ISO 10646 comment for a character.
2823	* The Unicode ISO_Comment property is deprecated and has no values.
2824	*
2825	* @param c The character (code point) for which to get the ISO comment.
2826	* It must be <code>0<=c<=0x10ffff</code>.
2827	* @param dest Destination address for copying the comment.
2828	* The comment will be zero-terminated if possible.
2829	* If there is no comment, then the buffer will be set to the empty string.
2830	* @param destCapacity <code>==sizeof(dest)</code>
2831	* @param pErrorCode Pointer to a UErrorCode variable;
2832	* check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code>
2833	* returns.
2834	* @return 0
2835	*
2836	* @deprecated ICU 49
2837	*/
2838	U_DEPRECATED int32_t U_EXPORT2
2839	u_getISOComment(UChar32 c,
2840	char *dest, int32_t destCapacity,
2841	UErrorCode *pErrorCode);
2842	#endif /* U_HIDE_DEPRECATED_API */
2843
2844	/**
2845	* Find a Unicode character by its name and return its code point value.
2846	* The name is matched exactly and completely.
2847	* If the name does not correspond to a code point, <i>pErrorCode</i>
2848	* is set to <code>U_INVALID_CHAR_FOUND</code>.
2849	* A Unicode 1.0 name is matched only if it differs from the modern name.
2850	* Unicode names are all uppercase. Extended names are lowercase followed
2851	* by an uppercase hexadecimal number, and within angle brackets.
2852	*
2853	* @param nameChoice Selector for which name to match.
2854	* @param name The name to match.
2855	* @param pErrorCode Pointer to a UErrorCode variable
2856	* @return The Unicode value of the code point with the given name,
2857	* or an undefined value if there is no such code point.
2858	*
2859	* @see UCharNameChoice
2860	* @see u_charName
2861	* @see u_enumCharNames
2862	* @stable ICU 1.7
2863	*/
2864	U_STABLE UChar32 U_EXPORT2
2865	u_charFromName(UCharNameChoice nameChoice,
2866	const char *name,
2867	UErrorCode *pErrorCode);
2868
2869	/**
2870	* Type of a callback function for u_enumCharNames() that gets called
2871	* for each Unicode character with the code point value and
2872	* the character name.
2873	* If such a function returns FALSE, then the enumeration is stopped.
2874	*
2875	* @param context The context pointer that was passed to u_enumCharNames().
2876	* @param code The Unicode code point for the character with this name.
2877	* @param nameChoice Selector for which kind of names is enumerated.
2878	* @param name The character's name, zero-terminated.
2879	* @param length The length of the name.
2880	* @return TRUE if the enumeration should continue, FALSE to stop it.
2881	*
2882	* @see UCharNameChoice
2883	* @see u_enumCharNames
2884	* @stable ICU 1.7
2885	*/
2886	typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,
2887	UChar32 code,
2888	UCharNameChoice nameChoice,
2889	const char *name,
2890	int32_t length);
2891
2892	/**
2893	* Enumerate all assigned Unicode characters between the start and limit
2894	* code points (start inclusive, limit exclusive) and call a function
2895	* for each, passing the code point value and the character name.
2896	* For Unicode 1.0 names, only those are enumerated that differ from the
2897	* modern names.
2898	*
2899	* @param start The first code point in the enumeration range.
2900	* @param limit One more than the last code point in the enumeration range
2901	* (the first one after the range).
2902	* @param fn The function that is to be called for each character name.
2903	* @param context An arbitrary pointer that is passed to the function.
2904	* @param nameChoice Selector for which kind of names to enumerate.
2905	* @param pErrorCode Pointer to a UErrorCode variable
2906	*
2907	* @see UCharNameChoice
2908	* @see UEnumCharNamesFn
2909	* @see u_charName
2910	* @see u_charFromName
2911	* @stable ICU 1.7
2912	*/
2913	U_STABLE void U_EXPORT2
2914	u_enumCharNames(UChar32 start, UChar32 limit,
2915	UEnumCharNamesFn *fn,
2916	void *context,
2917	UCharNameChoice nameChoice,
2918	UErrorCode *pErrorCode);
2919
2920	/**
2921	* Return the Unicode name for a given property, as given in the
2922	* Unicode database file PropertyAliases.txt.
2923	*
2924	* In addition, this function maps the property
2925	* UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
2926	* "General_Category_Mask". These names are not in
2927	* PropertyAliases.txt.
2928	*
2929	* @param property UProperty selector other than UCHAR_INVALID_CODE.
2930	* If out of range, NULL is returned.
2931	*
2932	* @param nameChoice selector for which name to get. If out of range,
2933	* NULL is returned. All properties have a long name. Most
2934	* have a short name, but some do not. Unicode allows for
2935	* additional names; if present these will be returned by
2936	* U_LONG_PROPERTY_NAME + i, where i=1, 2,...
2937	*
2938	* @return a pointer to the name, or NULL if either the
2939	* property or the nameChoice is out of range. If a given
2940	* nameChoice returns NULL, then all larger values of
2941	* nameChoice will return NULL, with one exception: if NULL is
2942	* returned for U_SHORT_PROPERTY_NAME, then
2943	* U_LONG_PROPERTY_NAME (and higher) may still return a
2944	* non-NULL value. The returned pointer is valid until
2945	* u_cleanup() is called.
2946	*
2947	* @see UProperty
2948	* @see UPropertyNameChoice
2949	* @stable ICU 2.4
2950	*/
2951	U_STABLE const char* U_EXPORT2
2952	u_getPropertyName(UProperty property,
2953	UPropertyNameChoice nameChoice);
2954
2955	/**
2956	* Return the UProperty enum for a given property name, as specified
2957	* in the Unicode database file PropertyAliases.txt. Short, long, and
2958	* any other variants are recognized.
2959	*
2960	* In addition, this function maps the synthetic names "gcm" /
2961	* "General_Category_Mask" to the property
2962	* UCHAR_GENERAL_CATEGORY_MASK. These names are not in
2963	* PropertyAliases.txt.
2964	*
2965	* @param alias the property name to be matched. The name is compared
2966	* using "loose matching" as described in PropertyAliases.txt.
2967	*
2968	* @return a UProperty enum, or UCHAR_INVALID_CODE if the given name
2969	* does not match any property.
2970	*
2971	* @see UProperty
2972	* @stable ICU 2.4
2973	*/
2974	U_STABLE UProperty U_EXPORT2
2975	u_getPropertyEnum(const char* alias);
2976
2977	/**
2978	* Return the Unicode name for a given property value, as given in the
2979	* Unicode database file PropertyValueAliases.txt.
2980	*
2981	* Note: Some of the names in PropertyValueAliases.txt can only be
2982	* retrieved using UCHAR_GENERAL_CATEGORY_MASK, not
2983	* UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
2984	* "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
2985	* / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
2986	*
2987	* @param property UProperty selector constant.
2988	* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
2989	* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
2990	* or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
2991	* If out of range, NULL is returned.
2992	*
2993	* @param value selector for a value for the given property. If out
2994	* of range, NULL is returned. In general, valid values range
2995	* from 0 up to some maximum. There are a few exceptions:
2996	* (1.) UCHAR_BLOCK values begin at the non-zero value
2997	* UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS
2998	* values are not contiguous and range from 0..240. (3.)
2999	* UCHAR_GENERAL_CATEGORY_MASK values are not values of
3000	* UCharCategory, but rather mask values produced by
3001	* U_GET_GC_MASK(). This allows grouped categories such as
3002	* [:L:] to be represented. Mask values range
3003	* non-contiguously from 1..U_GC_P_MASK.
3004	*
3005	* @param nameChoice selector for which name to get. If out of range,
3006	* NULL is returned. All values have a long name. Most have
3007	* a short name, but some do not. Unicode allows for
3008	* additional names; if present these will be returned by
3009	* U_LONG_PROPERTY_NAME + i, where i=1, 2,...
3010
3011	* @return a pointer to the name, or NULL if either the
3012	* property or the nameChoice is out of range. If a given
3013	* nameChoice returns NULL, then all larger values of
3014	* nameChoice will return NULL, with one exception: if NULL is
3015	* returned for U_SHORT_PROPERTY_NAME, then
3016	* U_LONG_PROPERTY_NAME (and higher) may still return a
3017	* non-NULL value. The returned pointer is valid until
3018	* u_cleanup() is called.
3019	*
3020	* @see UProperty
3021	* @see UPropertyNameChoice
3022	* @stable ICU 2.4
3023	*/
3024	U_STABLE const char* U_EXPORT2
3025	u_getPropertyValueName(UProperty property,
3026	int32_t value,
3027	UPropertyNameChoice nameChoice);
3028
3029	/**
3030	* Return the property value integer for a given value name, as
3031	* specified in the Unicode database file PropertyValueAliases.txt.
3032	* Short, long, and any other variants are recognized.
3033	*
3034	* Note: Some of the names in PropertyValueAliases.txt will only be
3035	* recognized with UCHAR_GENERAL_CATEGORY_MASK, not
3036	* UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
3037	* "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
3038	* / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
3039	*
3040	* @param property UProperty selector constant.
3041	* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
3042	* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
3043	* or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
3044	* If out of range, UCHAR_INVALID_CODE is returned.
3045	*
3046	* @param alias the value name to be matched. The name is compared
3047	* using "loose matching" as described in
3048	* PropertyValueAliases.txt.
3049	*
3050	* @return a value integer or UCHAR_INVALID_CODE if the given name
3051	* does not match any value of the given property, or if the
3052	* property is invalid. Note: UCHAR_GENERAL_CATEGORY_MASK values
3053	* are not values of UCharCategory, but rather mask values
3054	* produced by U_GET_GC_MASK(). This allows grouped
3055	* categories such as [:L:] to be represented.
3056	*
3057	* @see UProperty
3058	* @stable ICU 2.4
3059	*/
3060	U_STABLE int32_t U_EXPORT2
3061	u_getPropertyValueEnum(UProperty property,
3062	const char* alias);
3063
3064	/**
3065	* Determines if the specified character is permissible as the
3066	* first character in an identifier according to Unicode
3067	* (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers).
3068	* True for characters with general categories "L" (letters) and "Nl" (letter numbers).
3069	*
3070	* Same as java.lang.Character.isUnicodeIdentifierStart().
3071	* Same as UCHAR_ID_START
3072	*
3073	* @param c the code point to be tested
3074	* @return TRUE if the code point may start an identifier
3075	*
3076	* @see UCHAR_ID_START
3077	* @see u_isalpha
3078	* @see u_isIDPart
3079	* @stable ICU 2.0
3080	*/
3081	U_STABLE UBool U_EXPORT2
3082	u_isIDStart(UChar32 c);
3083
3084	/**
3085	* Determines if the specified character is permissible
3086	* in an identifier according to Java.
3087	* True for characters with general categories "L" (letters),
3088	* "Nl" (letter numbers), "Nd" (decimal digits),
3089	* "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and
3090	* u_isIDIgnorable(c).
3091	*
3092	* Same as java.lang.Character.isUnicodeIdentifierPart().
3093	* Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE)
3094	* except that Unicode recommends to ignore Cf which is less than
3095	* u_isIDIgnorable(c).
3096	*
3097	* @param c the code point to be tested
3098	* @return TRUE if the code point may occur in an identifier according to Java
3099	*
3100	* @see UCHAR_ID_CONTINUE
3101	* @see u_isIDStart
3102	* @see u_isIDIgnorable
3103	* @stable ICU 2.0
3104	*/
3105	U_STABLE UBool U_EXPORT2
3106	u_isIDPart(UChar32 c);
3107
3108	/**
3109	* Determines if the specified character should be regarded
3110	* as an ignorable character in an identifier,
3111	* according to Java.
3112	* True for characters with general category "Cf" (format controls) as well as
3113	* non-whitespace ISO controls
3114	* (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F).
3115	*
3116	* Same as java.lang.Character.isIdentifierIgnorable().
3117	*
3118	* Note that Unicode just recommends to ignore Cf (format controls).
3119	*
3120	* @param c the code point to be tested
3121	* @return TRUE if the code point is ignorable in identifiers according to Java
3122	*
3123	* @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
3124	* @see u_isIDStart
3125	* @see u_isIDPart
3126	* @stable ICU 2.0
3127	*/
3128	U_STABLE UBool U_EXPORT2
3129	u_isIDIgnorable(UChar32 c);
3130
3131	/**
3132	* Determines if the specified character is permissible as the
3133	* first character in a Java identifier.
3134	* In addition to u_isIDStart(c), true for characters with
3135	* general categories "Sc" (currency symbols) and "Pc" (connecting punctuation).
3136	*
3137	* Same as java.lang.Character.isJavaIdentifierStart().
3138	*
3139	* @param c the code point to be tested
3140	* @return TRUE if the code point may start a Java identifier
3141	*
3142	* @see u_isJavaIDPart
3143	* @see u_isalpha
3144	* @see u_isIDStart
3145	* @stable ICU 2.0
3146	*/
3147	U_STABLE UBool U_EXPORT2
3148	u_isJavaIDStart(UChar32 c);
3149
3150	/**
3151	* Determines if the specified character is permissible
3152	* in a Java identifier.
3153	* In addition to u_isIDPart(c), true for characters with
3154	* general category "Sc" (currency symbols).
3155	*
3156	* Same as java.lang.Character.isJavaIdentifierPart().
3157	*
3158	* @param c the code point to be tested
3159	* @return TRUE if the code point may occur in a Java identifier
3160	*
3161	* @see u_isIDIgnorable
3162	* @see u_isJavaIDStart
3163	* @see u_isalpha
3164	* @see u_isdigit
3165	* @see u_isIDPart
3166	* @stable ICU 2.0
3167	*/
3168	U_STABLE UBool U_EXPORT2
3169	u_isJavaIDPart(UChar32 c);
3170
3171	/**
3172	* The given character is mapped to its lowercase equivalent according to
3173	* UnicodeData.txt; if the character has no lowercase equivalent, the character
3174	* itself is returned.
3175	*
3176	* Same as java.lang.Character.toLowerCase().
3177	*
3178	* This function only returns the simple, single-code point case mapping.
3179	* Full case mappings should be used whenever possible because they produce
3180	* better results by working on whole strings.
3181	* They take into account the string context and the language and can map
3182	* to a result string with a different length as appropriate.
3183	* Full case mappings are applied by the string case mapping functions,
3184	* see ustring.h and the UnicodeString class.
3185	* See also the User Guide chapter on C/POSIX migration:
3186	* http://icu-project.org/userguide/posix.html#case_mappings
3187	*
3188	* @param c the code point to be mapped
3189	* @return the Simple_Lowercase_Mapping of the code point, if any;
3190	* otherwise the code point itself.
3191	* @stable ICU 2.0
3192	*/
3193	U_STABLE UChar32 U_EXPORT2
3194	u_tolower(UChar32 c);
3195
3196	/**
3197	* The given character is mapped to its uppercase equivalent according to UnicodeData.txt;
3198	* if the character has no uppercase equivalent, the character itself is
3199	* returned.
3200	*
3201	* Same as java.lang.Character.toUpperCase().
3202	*
3203	* This function only returns the simple, single-code point case mapping.
3204	* Full case mappings should be used whenever possible because they produce
3205	* better results by working on whole strings.
3206	* They take into account the string context and the language and can map
3207	* to a result string with a different length as appropriate.
3208	* Full case mappings are applied by the string case mapping functions,
3209	* see ustring.h and the UnicodeString class.
3210	* See also the User Guide chapter on C/POSIX migration:
3211	* http://icu-project.org/userguide/posix.html#case_mappings
3212	*
3213	* @param c the code point to be mapped
3214	* @return the Simple_Uppercase_Mapping of the code point, if any;
3215	* otherwise the code point itself.
3216	* @stable ICU 2.0
3217	*/
3218	U_STABLE UChar32 U_EXPORT2
3219	u_toupper(UChar32 c);
3220
3221	/**
3222	* The given character is mapped to its titlecase equivalent
3223	* according to UnicodeData.txt;
3224	* if none is defined, the character itself is returned.
3225	*
3226	* Same as java.lang.Character.toTitleCase().
3227	*
3228	* This function only returns the simple, single-code point case mapping.
3229	* Full case mappings should be used whenever possible because they produce
3230	* better results by working on whole strings.
3231	* They take into account the string context and the language and can map
3232	* to a result string with a different length as appropriate.
3233	* Full case mappings are applied by the string case mapping functions,
3234	* see ustring.h and the UnicodeString class.
3235	* See also the User Guide chapter on C/POSIX migration:
3236	* http://icu-project.org/userguide/posix.html#case_mappings
3237	*
3238	* @param c the code point to be mapped
3239	* @return the Simple_Titlecase_Mapping of the code point, if any;
3240	* otherwise the code point itself.
3241	* @stable ICU 2.0
3242	*/
3243	U_STABLE UChar32 U_EXPORT2
3244	u_totitle(UChar32 c);
3245
3246	/* Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 /
3247	#define U_FOLD_CASE_DEFAULT 0
3248
3249	/**
3250	* Option value for case folding:
3251	*
3252	* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
3253	* and dotless i appropriately for Turkic languages (tr, az).
3254	*
3255	* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
3256	* are to be included for default mappings and
3257	* excluded for the Turkic-specific mappings.
3258	*
3259	* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
3260	* are to be excluded for default mappings and
3261	* included for the Turkic-specific mappings.
3262	*
3263	* @stable ICU 2.0
3264	*/
3265	#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
3266
3267	/**
3268	* The given character is mapped to its case folding equivalent according to
3269	* UnicodeData.txt and CaseFolding.txt;
3270	* if the character has no case folding equivalent, the character
3271	* itself is returned.
3272	*
3273	* This function only returns the simple, single-code point case mapping.
3274	* Full case mappings should be used whenever possible because they produce
3275	* better results by working on whole strings.
3276	* They take into account the string context and the language and can map
3277	* to a result string with a different length as appropriate.
3278	* Full case mappings are applied by the string case mapping functions,
3279	* see ustring.h and the UnicodeString class.
3280	* See also the User Guide chapter on C/POSIX migration:
3281	* http://icu-project.org/userguide/posix.html#case_mappings
3282	*
3283	* @param c the code point to be mapped
3284	* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
3285	* @return the Simple_Case_Folding of the code point, if any;
3286	* otherwise the code point itself.
3287	* @stable ICU 2.0
3288	*/
3289	U_STABLE UChar32 U_EXPORT2
3290	u_foldCase(UChar32 c, uint32_t options);
3291
3292	/**
3293	* Returns the decimal digit value of the code point in the
3294	* specified radix.
3295	*
3296	* If the radix is not in the range <code>2<=radix<=36</code> or if the
3297	* value of <code>c</code> is not a valid digit in the specified
3298	* radix, <code>-1</code> is returned. A character is a valid digit
3299	* if at least one of the following is true:
3300	* <ul>
3301	* <li>The character has a decimal digit value.
3302	* Such characters have the general category "Nd" (decimal digit numbers)
3303	* and a Numeric_Type of Decimal.
3304	* In this case the value is the character's decimal digit value.</li>
3305	* <li>The character is one of the uppercase Latin letters
3306	* <code>'A'</code> through <code>'Z'</code>.
3307	* In this case the value is <code>c-'A'+10</code>.</li>
3308	* <li>The character is one of the lowercase Latin letters
3309	* <code>'a'</code> through <code>'z'</code>.
3310	* In this case the value is <code>ch-'a'+10</code>.</li>
3311	* <li>Latin letters from both the ASCII range (0061..007A, 0041..005A)
3312	* as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A)
3313	* are recognized.</li>
3314	* </ul>
3315	*
3316	* Same as java.lang.Character.digit().
3317	*
3318	* @param ch the code point to be tested.
3319	* @param radix the radix.
3320	* @return the numeric value represented by the character in the
3321	* specified radix,
3322	* or -1 if there is no value or if the value exceeds the radix.
3323	*
3324	* @see UCHAR_NUMERIC_TYPE
3325	* @see u_forDigit
3326	* @see u_charDigitValue
3327	* @see u_isdigit
3328	* @stable ICU 2.0
3329	*/
3330	U_STABLE int32_t U_EXPORT2
3331	u_digit(UChar32 ch, int8_t radix);
3332
3333	/**
3334	* Determines the character representation for a specific digit in
3335	* the specified radix. If the value of <code>radix</code> is not a
3336	* valid radix, or the value of <code>digit</code> is not a valid
3337	* digit in the specified radix, the null character
3338	* (<code>U+0000</code>) is returned.
3339	* <p>
3340	* The <code>radix</code> argument is valid if it is greater than or
3341	* equal to 2 and less than or equal to 36.
3342	* The <code>digit</code> argument is valid if
3343	* <code>0 <= digit < radix</code>.
3344	* <p>
3345	* If the digit is less than 10, then
3346	* <code>'0' + digit</code> is returned. Otherwise, the value
3347	* <code>'a' + digit - 10</code> is returned.
3348	*
3349	* Same as java.lang.Character.forDigit().
3350	*
3351	* @param digit the number to convert to a character.
3352	* @param radix the radix.
3353	* @return the <code>char</code> representation of the specified digit
3354	* in the specified radix.
3355	*
3356	* @see u_digit
3357	* @see u_charDigitValue
3358	* @see u_isdigit
3359	* @stable ICU 2.0
3360	*/
3361	U_STABLE UChar32 U_EXPORT2
3362	u_forDigit(int32_t digit, int8_t radix);
3363
3364	/**
3365	* Get the "age" of the code point.
3366	* The "age" is the Unicode version when the code point was first
3367	* designated (as a non-character or for Private Use)
3368	* or assigned a character.
3369	* This can be useful to avoid emitting code points to receiving
3370	* processes that do not accept newer characters.
3371	* The data is from the UCD file DerivedAge.txt.
3372	*
3373	* @param c The code point.
3374	* @param versionArray The Unicode version number array, to be filled in.
3375	*
3376	* @stable ICU 2.1
3377	*/
3378	U_STABLE void U_EXPORT2
3379	u_charAge(UChar32 c, UVersionInfo versionArray);
3380
3381	/**
3382	* Gets the Unicode version information.
3383	* The version array is filled in with the version information
3384	* for the Unicode standard that is currently used by ICU.
3385	* For example, Unicode version 3.1.1 is represented as an array with
3386	* the values { 3, 1, 1, 0 }.
3387	*
3388	* @param versionArray an output array that will be filled in with
3389	* the Unicode version number
3390	* @stable ICU 2.0
3391	*/
3392	U_STABLE void U_EXPORT2
3393	u_getUnicodeVersion(UVersionInfo versionArray);
3394
3395	#if !UCONFIG_NO_NORMALIZATION
3396	/**
3397	* Get the FC_NFKC_Closure property string for a character.
3398	* See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure"
3399	* or for "FNC": http://www.unicode.org/reports/tr15/
3400	*
3401	* @param c The character (code point) for which to get the FC_NFKC_Closure string.
3402	* It must be <code>0<=c<=0x10ffff</code>.
3403	* @param dest Destination address for copying the string.
3404	* The string will be zero-terminated if possible.
3405	* If there is no FC_NFKC_Closure string,
3406	* then the buffer will be set to the empty string.
3407	* @param destCapacity <code>==sizeof(dest)</code>
3408	* @param pErrorCode Pointer to a UErrorCode variable.
3409	* @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character.
3410	* If the destCapacity is less than or equal to the length, then the buffer
3411	* contains the truncated name and the returned length indicates the full
3412	* length of the name.
3413	* The length does not include the zero-termination.
3414	*
3415	* @stable ICU 2.2
3416	*/
3417	U_STABLE int32_t U_EXPORT2
3418	u_getFC_NFKC_Closure(UChar32 c, UChar dest, int32_t destCapacity, UErrorCode pErrorCode);
3419
3420	#endif
3421
3422
3423	U_CDECL_END
3424
3425	#endif /_UCHAR/
3426	/eof/
3427

Browse the source code of include/x86_64-linux-gnu/unicode/uchar.h