unistr.h source code [include/unicode/unistr.h]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 1998-2016, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	*
9	* File unistr.h
10	*
11	* Modification History:
12	*
13	* Date Name Description
14	* 09/25/98 stephen Creation.
15	* 11/11/98 stephen Changed per 11/9 code review.
16	* 04/20/99 stephen Overhauled per 4/16 code review.
17	* 11/18/99 aliu Made to inherit from Replaceable. Added method
18	* handleReplaceBetween(); other methods unchanged.
19	* 06/25/01 grhoten Remove dependency on iostream.
20	******************************************************************************
21	*/
22
23	#ifndef UNISTR_H
24	#define UNISTR_H
25
26	/**
27	* \file
28	* \brief C++ API: Unicode String
29	*/
30
31	#include <cstddef>
32	#include "unicode/utypes.h"
33	#include "unicode/char16ptr.h"
34	#include "unicode/rep.h"
35	#include "unicode/std_string.h"
36	#include "unicode/stringpiece.h"
37	#include "unicode/bytestream.h"
38
39	struct UConverter; // unicode/ucnv.h
40
41	#ifndef USTRING_H
42	/**
43	* \ingroup ustring_ustrlen
44	*/
45	U_STABLE int32_t U_EXPORT2
46	u_strlen(const UChar *s);
47	#endif
48
49	U_NAMESPACE_BEGIN
50
51	#if !UCONFIG_NO_BREAK_ITERATION
52	class BreakIterator; // unicode/brkiter.h
53	#endif
54	class Edits;
55
56	U_NAMESPACE_END
57
58	// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
59	/**
60	* Internal string case mapping function type.
61	* All error checking must be done.
62	* src and dest must not overlap.
63	* @internal
64	*/
65	typedef int32_t U_CALLCONV
66	UStringCaseMapper(int32_t caseLocale, uint32_t options,
67	#if !UCONFIG_NO_BREAK_ITERATION
68	icu::BreakIterator *iter,
69	#endif
70	char16_t *dest, int32_t destCapacity,
71	const char16_t *src, int32_t srcLength,
72	icu::Edits *edits,
73	UErrorCode &errorCode);
74
75	U_NAMESPACE_BEGIN
76
77	class Locale; // unicode/locid.h
78	class StringCharacterIterator;
79	class UnicodeStringAppendable; // unicode/appendable.h
80
81	/ The <iostream> include has been moved to unicode/ustream.h /
82
83	/**
84	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
85	* which constructs a Unicode string from an invariant-character char * string.
86	* About invariant characters see utypes.h.
87	* This constructor has no runtime dependency on conversion code and is
88	* therefore recommended over ones taking a charset name string
89	* (where the empty string "" indicates invariant-character conversion).
90	*
91	* @stable ICU 3.2
92	*/
93	#define US_INV icu::UnicodeString::kInvariant
94
95	/**
96	* Unicode String literals in C++.
97	*
98	* Note: these macros are not recommended for new code.
99	* Prior to the availability of C++11 and u"unicode string literals",
100	* these macros were provided for portability and efficiency when
101	* initializing UnicodeStrings from literals.
102	*
103	* They work only for strings that contain "invariant characters", i.e.,
104	* only latin letters, digits, and some punctuation.
105	* See utypes.h for details.
106	*
107	* The string parameter must be a C string literal.
108	* The length of the string, not including the terminating
109	* `NUL`, must be specified as a constant.
110	* @stable ICU 2.0
111	*/
112	#if !U_CHAR16_IS_TYPEDEF
113	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length)
114	#else
115	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const char16_t*)u ## cs, _length)
116	#endif
117
118	/**
119	* Unicode String literals in C++.
120	* Dependent on the platform properties, different UnicodeString
121	* constructors should be used to create a UnicodeString object from
122	* a string literal.
123	* The macros are defined for improved performance.
124	* They work only for strings that contain "invariant characters", i.e.,
125	* only latin letters, digits, and some punctuation.
126	* See utypes.h for details.
127	*
128	* The string parameter must be a C string literal.
129	* @stable ICU 2.0
130	*/
131	#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
132
133	/**
134	* \def UNISTR_FROM_CHAR_EXPLICIT
135	* This can be defined to be empty or "explicit".
136	* If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32)
137	* constructors are marked as explicit, preventing their inadvertent use.
138	* @stable ICU 49
139	*/
140	#ifndef UNISTR_FROM_CHAR_EXPLICIT
141	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
142	// Auto-"explicit" in ICU library code.
143	# define UNISTR_FROM_CHAR_EXPLICIT explicit
144	# else
145	// Empty by default for source code compatibility.
146	# define UNISTR_FROM_CHAR_EXPLICIT
147	# endif
148	#endif
149
150	/**
151	* \def UNISTR_FROM_STRING_EXPLICIT
152	* This can be defined to be empty or "explicit".
153	* If explicit, then the UnicodeString(const char ) and UnicodeString(const char16_t )
154	* constructors are marked as explicit, preventing their inadvertent use.
155	*
156	* In particular, this helps prevent accidentally depending on ICU conversion code
157	* by passing a string literal into an API with a const UnicodeString & parameter.
158	* @stable ICU 49
159	*/
160	#ifndef UNISTR_FROM_STRING_EXPLICIT
161	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
162	// Auto-"explicit" in ICU library code.
163	# define UNISTR_FROM_STRING_EXPLICIT explicit
164	# else
165	// Empty by default for source code compatibility.
166	# define UNISTR_FROM_STRING_EXPLICIT
167	# endif
168	#endif
169
170	/**
171	* \def UNISTR_OBJECT_SIZE
172	* Desired sizeof(UnicodeString) in bytes.
173	* It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
174	* The object size may want to be a multiple of 16 bytes,
175	* which is a common granularity for heap allocation.
176	*
177	* Any space inside the object beyond sizeof(vtable pointer) + 2
178	* is available for storing short strings inside the object.
179	* The bigger the object, the longer a string that can be stored inside the object,
180	* without additional heap allocation.
181	*
182	* Depending on a platform's pointer size, pointer alignment requirements,
183	* and struct padding, the compiler will usually round up sizeof(UnicodeString)
184	* to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
185	* to hold the fields for heap-allocated strings.
186	* Such a minimum size also ensures that the object is easily large enough
187	* to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH).
188	*
189	* sizeof(UnicodeString) >= 48 should work for all known platforms.
190	*
191	* For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
192	* sizeof(UnicodeString) = 64 would leave space for
193	* (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
194	* char16_ts stored inside the object.
195	*
196	* The minimum object size on a 64-bit machine would be
197	* 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
198	* and the internal buffer would hold up to 11 char16_ts in that case.
199	*
200	* @see U16_MAX_LENGTH
201	* @stable ICU 56
202	*/
203	#ifndef UNISTR_OBJECT_SIZE
204	# define UNISTR_OBJECT_SIZE 64
205	#endif
206
207	/**
208	* UnicodeString is a string class that stores Unicode characters directly and provides
209	* similar functionality as the Java String and StringBuffer/StringBuilder classes.
210	* It is a concrete implementation of the abstract class Replaceable (for transliteration).
211	*
212	* A UnicodeString may also "alias" an external array of characters
213	* (that is, point to it, rather than own the array)
214	* whose lifetime must then at least match the lifetime of the aliasing object.
215	* This aliasing may be preserved when returning a UnicodeString by value,
216	* depending on the compiler and the function implementation,
217	* via Return Value Optimization (RVO) or the move assignment operator.
218	* (However, the copy assignment operator does not preserve aliasing.)
219	* For details see the description of storage models at the end of the class API docs
220	* and in the User Guide chapter linked from there.
221	*
222	* The UnicodeString class is not suitable for subclassing.
223	*
224	* For an overview of Unicode strings in C and C++ see the
225	* [User Guide Strings chapter](http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-).
226	*
227	* In ICU, a Unicode string consists of 16-bit Unicode code units.
228	* A Unicode character may be stored with either one code unit
229	* (the most common case) or with a matched pair of special code units
230	* ("surrogates"). The data type for code units is char16_t.
231	* For single-character handling, a Unicode character code point is a value
232	* in the range 0..0x10ffff. ICU uses the UChar32 type for code points.
233	*
234	* Indexes and offsets into and lengths of strings always count code units, not code points.
235	* This is the same as with multi-byte char* strings in traditional string handling.
236	* Operations on partial strings typically do not test for code point boundaries.
237	* If necessary, the user needs to take care of such boundaries by testing for the code unit
238	* values or by using functions like
239	* UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
240	* (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).
241	*
242	* UnicodeString methods are more lenient with regard to input parameter values
243	* than other ICU APIs. In particular:
244	* - If indexes are out of bounds for a UnicodeString object
245	* (< 0 or > length()) then they are "pinned" to the nearest boundary.
246	* - If the buffer passed to an insert/append/replace operation is owned by the
247	* target object, e.g., calling str.append(str), an extra copy may take place
248	* to ensure safety.
249	* - If primitive string pointer values (e.g., const char16_t * or char *)
250	* for input strings are NULL, then those input string parameters are treated
251	* as if they pointed to an empty string.
252	* However, this is not the case for char * parameters for charset names
253	* or other IDs.
254	* - Most UnicodeString methods do not take a UErrorCode parameter because
255	* there are usually very few opportunities for failure other than a shortage
256	* of memory, error codes in low-level C++ string methods would be inconvenient,
257	* and the error code as the last parameter (ICU convention) would prevent
258	* the use of default parameter values.
259	* Instead, such methods set the UnicodeString into a "bogus" state
260	* (see isBogus()) if an error occurs.
261	*
262	* In string comparisons, two UnicodeString objects that are both "bogus"
263	* compare equal (to be transitive and prevent endless loops in sorting),
264	* and a "bogus" string compares less than any non-"bogus" one.
265	*
266	* Const UnicodeString methods are thread-safe. Multiple threads can use
267	* const methods on the same UnicodeString object simultaneously,
268	* but non-const methods must not be called concurrently (in multiple threads)
269	* with any other (const or non-const) methods.
270	*
271	* Similarly, const UnicodeString & parameters are thread-safe.
272	* One object may be passed in as such a parameter concurrently in multiple threads.
273	* This includes the const UnicodeString & parameters for
274	* copy construction, assignment, and cloning.
275	*
276	* UnicodeString uses several storage methods.
277	* String contents can be stored inside the UnicodeString object itself,
278	* in an allocated and shared buffer, or in an outside buffer that is "aliased".
279	* Most of this is done transparently, but careful aliasing in particular provides
280	* significant performance improvements.
281	* Also, the internal buffer is accessible via special functions.
282	* For details see the
283	* [User Guide Strings chapter](http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model).
284	*
285	* @see utf.h
286	* @see CharacterIterator
287	* @stable ICU 2.0
288	*/
289	class U_COMMON_API UnicodeString : public Replaceable
290	{
291	public:
292
293	/**
294	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
295	* which constructs a Unicode string from an invariant-character char * string.
296	* Use the macro US_INV instead of the full qualification for this value.
297	*
298	* @see US_INV
299	* @stable ICU 3.2
300	*/
301	enum EInvariant {
302	/**
303	* @see EInvariant
304	* @stable ICU 3.2
305	*/
306	kInvariant
307	};
308
309	//========================================
310	// Read-only operations
311	//========================================
312
313	/ Comparison - bitwise only - for international comparison use collation /
314
315	/**
316	* Equality operator. Performs only bitwise comparison.
317	* @param text The UnicodeString to compare to this one.
318	* @return TRUE if `text` contains the same characters as this one,
319	* FALSE otherwise.
320	* @stable ICU 2.0
321	*/
322	inline UBool operator== (const UnicodeString& text) const;
323
324	/**
325	* Inequality operator. Performs only bitwise comparison.
326	* @param text The UnicodeString to compare to this one.
327	* @return FALSE if `text` contains the same characters as this one,
328	* TRUE otherwise.
329	* @stable ICU 2.0
330	*/
331	inline UBool operator!= (const UnicodeString& text) const;
332
333	/**
334	* Greater than operator. Performs only bitwise comparison.
335	* @param text The UnicodeString to compare to this one.
336	* @return TRUE if the characters in this are bitwise
337	* greater than the characters in `text`, FALSE otherwise
338	* @stable ICU 2.0
339	*/
340	inline UBool operator> (const UnicodeString& text) const;
341
342	/**
343	* Less than operator. Performs only bitwise comparison.
344	* @param text The UnicodeString to compare to this one.
345	* @return TRUE if the characters in this are bitwise
346	* less than the characters in `text`, FALSE otherwise
347	* @stable ICU 2.0
348	*/
349	inline UBool operator< (const UnicodeString& text) const;
350
351	/**
352	* Greater than or equal operator. Performs only bitwise comparison.
353	* @param text The UnicodeString to compare to this one.
354	* @return TRUE if the characters in this are bitwise
355	* greater than or equal to the characters in `text`, FALSE otherwise
356	* @stable ICU 2.0
357	*/
358	inline UBool operator>= (const UnicodeString& text) const;
359
360	/**
361	* Less than or equal operator. Performs only bitwise comparison.
362	* @param text The UnicodeString to compare to this one.
363	* @return TRUE if the characters in this are bitwise
364	* less than or equal to the characters in `text`, FALSE otherwise
365	* @stable ICU 2.0
366	*/
367	inline UBool operator<= (const UnicodeString& text) const;
368
369	/**
370	* Compare the characters bitwise in this UnicodeString to
371	* the characters in `text`.
372	* @param text The UnicodeString to compare to this one.
373	* @return The result of bitwise character comparison: 0 if this
374	* contains the same characters as `text`, -1 if the characters in
375	* this are bitwise less than the characters in `text`, +1 if the
376	* characters in this are bitwise greater than the characters
377	* in `text`.
378	* @stable ICU 2.0
379	*/
380	inline int8_t compare(const UnicodeString& text) const;
381
382	/**
383	* Compare the characters bitwise in the range
384	* [`start`, `start + length`) with the characters
385	* in the entire string `text`.
386	* (The parameters "start" and "length" are not applied to the other text "text".)
387	* @param start the offset at which the compare operation begins
388	* @param length the number of characters of text to compare.
389	* @param text the other text to be compared against this string.
390	* @return The result of bitwise character comparison: 0 if this
391	* contains the same characters as `text`, -1 if the characters in
392	* this are bitwise less than the characters in `text`, +1 if the
393	* characters in this are bitwise greater than the characters
394	* in `text`.
395	* @stable ICU 2.0
396	*/
397	inline int8_t compare(int32_t start,
398	int32_t length,
399	const UnicodeString& text) const;
400
401	/**
402	* Compare the characters bitwise in the range
403	* [`start`, `start + length`) with the characters
404	* in `srcText` in the range
405	* [`srcStart`, `srcStart + srcLength`).
406	* @param start the offset at which the compare operation begins
407	* @param length the number of characters in this to compare.
408	* @param srcText the text to be compared
409	* @param srcStart the offset into `srcText` to start comparison
410	* @param srcLength the number of characters in `src` to compare
411	* @return The result of bitwise character comparison: 0 if this
412	* contains the same characters as `srcText`, -1 if the characters in
413	* this are bitwise less than the characters in `srcText`, +1 if the
414	* characters in this are bitwise greater than the characters
415	* in `srcText`.
416	* @stable ICU 2.0
417	*/
418	inline int8_t compare(int32_t start,
419	int32_t length,
420	const UnicodeString& srcText,
421	int32_t srcStart,
422	int32_t srcLength) const;
423
424	/**
425	* Compare the characters bitwise in this UnicodeString with the first
426	* `srcLength` characters in `srcChars`.
427	* @param srcChars The characters to compare to this UnicodeString.
428	* @param srcLength the number of characters in `srcChars` to compare
429	* @return The result of bitwise character comparison: 0 if this
430	* contains the same characters as `srcChars`, -1 if the characters in
431	* this are bitwise less than the characters in `srcChars`, +1 if the
432	* characters in this are bitwise greater than the characters
433	* in `srcChars`.
434	* @stable ICU 2.0
435	*/
436	inline int8_t compare(ConstChar16Ptr srcChars,
437	int32_t srcLength) const;
438
439	/**
440	* Compare the characters bitwise in the range
441	* [`start`, `start + length`) with the first
442	* `length` characters in `srcChars`
443	* @param start the offset at which the compare operation begins
444	* @param length the number of characters to compare.
445	* @param srcChars the characters to be compared
446	* @return The result of bitwise character comparison: 0 if this
447	* contains the same characters as `srcChars`, -1 if the characters in
448	* this are bitwise less than the characters in `srcChars`, +1 if the
449	* characters in this are bitwise greater than the characters
450	* in `srcChars`.
451	* @stable ICU 2.0
452	*/
453	inline int8_t compare(int32_t start,
454	int32_t length,
455	const char16_t srcChars) const*;
456
457	/**
458	* Compare the characters bitwise in the range
459	* [`start`, `start + length`) with the characters
460	* in `srcChars` in the range
461	* [`srcStart`, `srcStart + srcLength`).
462	* @param start the offset at which the compare operation begins
463	* @param length the number of characters in this to compare
464	* @param srcChars the characters to be compared
465	* @param srcStart the offset into `srcChars` to start comparison
466	* @param srcLength the number of characters in `srcChars` to compare
467	* @return The result of bitwise character comparison: 0 if this
468	* contains the same characters as `srcChars`, -1 if the characters in
469	* this are bitwise less than the characters in `srcChars`, +1 if the
470	* characters in this are bitwise greater than the characters
471	* in `srcChars`.
472	* @stable ICU 2.0
473	*/
474	inline int8_t compare(int32_t start,
475	int32_t length,
476	const char16_t *srcChars,
477	int32_t srcStart,
478	int32_t srcLength) const;
479
480	/**
481	* Compare the characters bitwise in the range
482	* [`start`, `limit`) with the characters
483	* in `srcText` in the range
484	* [`srcStart`, `srcLimit`).
485	* @param start the offset at which the compare operation begins
486	* @param limit the offset immediately following the compare operation
487	* @param srcText the text to be compared
488	* @param srcStart the offset into `srcText` to start comparison
489	* @param srcLimit the offset into `srcText` to limit comparison
490	* @return The result of bitwise character comparison: 0 if this
491	* contains the same characters as `srcText`, -1 if the characters in
492	* this are bitwise less than the characters in `srcText`, +1 if the
493	* characters in this are bitwise greater than the characters
494	* in `srcText`.
495	* @stable ICU 2.0
496	*/
497	inline int8_t compareBetween(int32_t start,
498	int32_t limit,
499	const UnicodeString& srcText,
500	int32_t srcStart,
501	int32_t srcLimit) const;
502
503	/**
504	* Compare two Unicode strings in code point order.
505	* The result may be different from the results of compare(), operator<, etc.
506	* if supplementary characters are present:
507	*
508	* In UTF-16, supplementary characters (with code points U+10000 and above) are
509	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
510	* which means that they compare as less than some other BMP characters like U+feff.
511	* This function compares Unicode strings in code point order.
512	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
513	*
514	* @param text Another string to compare this one to.
515	* @return a negative/zero/positive integer corresponding to whether
516	* this string is less than/equal to/greater than the second one
517	* in code point order
518	* @stable ICU 2.0
519	*/
520	inline int8_t compareCodePointOrder(const UnicodeString& text) const;
521
522	/**
523	* Compare two Unicode strings in code point order.
524	* The result may be different from the results of compare(), operator<, etc.
525	* if supplementary characters are present:
526	*
527	* In UTF-16, supplementary characters (with code points U+10000 and above) are
528	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
529	* which means that they compare as less than some other BMP characters like U+feff.
530	* This function compares Unicode strings in code point order.
531	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
532	*
533	* @param start The start offset in this string at which the compare operation begins.
534	* @param length The number of code units from this string to compare.
535	* @param srcText Another string to compare this one to.
536	* @return a negative/zero/positive integer corresponding to whether
537	* this string is less than/equal to/greater than the second one
538	* in code point order
539	* @stable ICU 2.0
540	*/
541	inline int8_t compareCodePointOrder(int32_t start,
542	int32_t length,
543	const UnicodeString& srcText) const;
544
545	/**
546	* Compare two Unicode strings in code point order.
547	* The result may be different from the results of compare(), operator<, etc.
548	* if supplementary characters are present:
549	*
550	* In UTF-16, supplementary characters (with code points U+10000 and above) are
551	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
552	* which means that they compare as less than some other BMP characters like U+feff.
553	* This function compares Unicode strings in code point order.
554	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
555	*
556	* @param start The start offset in this string at which the compare operation begins.
557	* @param length The number of code units from this string to compare.
558	* @param srcText Another string to compare this one to.
559	* @param srcStart The start offset in that string at which the compare operation begins.
560	* @param srcLength The number of code units from that string to compare.
561	* @return a negative/zero/positive integer corresponding to whether
562	* this string is less than/equal to/greater than the second one
563	* in code point order
564	* @stable ICU 2.0
565	*/
566	inline int8_t compareCodePointOrder(int32_t start,
567	int32_t length,
568	const UnicodeString& srcText,
569	int32_t srcStart,
570	int32_t srcLength) const;
571
572	/**
573	* Compare two Unicode strings in code point order.
574	* The result may be different from the results of compare(), operator<, etc.
575	* if supplementary characters are present:
576	*
577	* In UTF-16, supplementary characters (with code points U+10000 and above) are
578	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
579	* which means that they compare as less than some other BMP characters like U+feff.
580	* This function compares Unicode strings in code point order.
581	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
582	*
583	* @param srcChars A pointer to another string to compare this one to.
584	* @param srcLength The number of code units from that string to compare.
585	* @return a negative/zero/positive integer corresponding to whether
586	* this string is less than/equal to/greater than the second one
587	* in code point order
588	* @stable ICU 2.0
589	*/
590	inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
591	int32_t srcLength) const;
592
593	/**
594	* Compare two Unicode strings in code point order.
595	* The result may be different from the results of compare(), operator<, etc.
596	* if supplementary characters are present:
597	*
598	* In UTF-16, supplementary characters (with code points U+10000 and above) are
599	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
600	* which means that they compare as less than some other BMP characters like U+feff.
601	* This function compares Unicode strings in code point order.
602	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
603	*
604	* @param start The start offset in this string at which the compare operation begins.
605	* @param length The number of code units from this string to compare.
606	* @param srcChars A pointer to another string to compare this one to.
607	* @return a negative/zero/positive integer corresponding to whether
608	* this string is less than/equal to/greater than the second one
609	* in code point order
610	* @stable ICU 2.0
611	*/
612	inline int8_t compareCodePointOrder(int32_t start,
613	int32_t length,
614	const char16_t srcChars) const*;
615
616	/**
617	* Compare two Unicode strings in code point order.
618	* The result may be different from the results of compare(), operator<, etc.
619	* if supplementary characters are present:
620	*
621	* In UTF-16, supplementary characters (with code points U+10000 and above) are
622	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
623	* which means that they compare as less than some other BMP characters like U+feff.
624	* This function compares Unicode strings in code point order.
625	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
626	*
627	* @param start The start offset in this string at which the compare operation begins.
628	* @param length The number of code units from this string to compare.
629	* @param srcChars A pointer to another string to compare this one to.
630	* @param srcStart The start offset in that string at which the compare operation begins.
631	* @param srcLength The number of code units from that string to compare.
632	* @return a negative/zero/positive integer corresponding to whether
633	* this string is less than/equal to/greater than the second one
634	* in code point order
635	* @stable ICU 2.0
636	*/
637	inline int8_t compareCodePointOrder(int32_t start,
638	int32_t length,
639	const char16_t *srcChars,
640	int32_t srcStart,
641	int32_t srcLength) const;
642
643	/**
644	* Compare two Unicode strings in code point order.
645	* The result may be different from the results of compare(), operator<, etc.
646	* if supplementary characters are present:
647	*
648	* In UTF-16, supplementary characters (with code points U+10000 and above) are
649	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
650	* which means that they compare as less than some other BMP characters like U+feff.
651	* This function compares Unicode strings in code point order.
652	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
653	*
654	* @param start The start offset in this string at which the compare operation begins.
655	* @param limit The offset after the last code unit from this string to compare.
656	* @param srcText Another string to compare this one to.
657	* @param srcStart The start offset in that string at which the compare operation begins.
658	* @param srcLimit The offset after the last code unit from that string to compare.
659	* @return a negative/zero/positive integer corresponding to whether
660	* this string is less than/equal to/greater than the second one
661	* in code point order
662	* @stable ICU 2.0
663	*/
664	inline int8_t compareCodePointOrderBetween(int32_t start,
665	int32_t limit,
666	const UnicodeString& srcText,
667	int32_t srcStart,
668	int32_t srcLimit) const;
669
670	/**
671	* Compare two strings case-insensitively using full case folding.
672	* This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
673	*
674	* @param text Another string to compare this one to.
675	* @param options A bit set of options:
676	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
677	* Comparison in code unit order with default case folding.
678	*
679	* - U_COMPARE_CODE_POINT_ORDER
680	* Set to choose code point order instead of code unit order
681	* (see u_strCompare for details).
682	*
683	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
684	*
685	* @return A negative, zero, or positive integer indicating the comparison result.
686	* @stable ICU 2.0
687	*/
688	inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
689
690	/**
691	* Compare two strings case-insensitively using full case folding.
692	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
693	*
694	* @param start The start offset in this string at which the compare operation begins.
695	* @param length The number of code units from this string to compare.
696	* @param srcText Another string to compare this one to.
697	* @param options A bit set of options:
698	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
699	* Comparison in code unit order with default case folding.
700	*
701	* - U_COMPARE_CODE_POINT_ORDER
702	* Set to choose code point order instead of code unit order
703	* (see u_strCompare for details).
704	*
705	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
706	*
707	* @return A negative, zero, or positive integer indicating the comparison result.
708	* @stable ICU 2.0
709	*/
710	inline int8_t caseCompare(int32_t start,
711	int32_t length,
712	const UnicodeString& srcText,
713	uint32_t options) const;
714
715	/**
716	* Compare two strings case-insensitively using full case folding.
717	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
718	*
719	* @param start The start offset in this string at which the compare operation begins.
720	* @param length The number of code units from this string to compare.
721	* @param srcText Another string to compare this one to.
722	* @param srcStart The start offset in that string at which the compare operation begins.
723	* @param srcLength The number of code units from that string to compare.
724	* @param options A bit set of options:
725	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
726	* Comparison in code unit order with default case folding.
727	*
728	* - U_COMPARE_CODE_POINT_ORDER
729	* Set to choose code point order instead of code unit order
730	* (see u_strCompare for details).
731	*
732	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
733	*
734	* @return A negative, zero, or positive integer indicating the comparison result.
735	* @stable ICU 2.0
736	*/
737	inline int8_t caseCompare(int32_t start,
738	int32_t length,
739	const UnicodeString& srcText,
740	int32_t srcStart,
741	int32_t srcLength,
742	uint32_t options) const;
743
744	/**
745	* Compare two strings case-insensitively using full case folding.
746	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
747	*
748	* @param srcChars A pointer to another string to compare this one to.
749	* @param srcLength The number of code units from that string to compare.
750	* @param options A bit set of options:
751	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
752	* Comparison in code unit order with default case folding.
753	*
754	* - U_COMPARE_CODE_POINT_ORDER
755	* Set to choose code point order instead of code unit order
756	* (see u_strCompare for details).
757	*
758	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
759	*
760	* @return A negative, zero, or positive integer indicating the comparison result.
761	* @stable ICU 2.0
762	*/
763	inline int8_t caseCompare(ConstChar16Ptr srcChars,
764	int32_t srcLength,
765	uint32_t options) const;
766
767	/**
768	* Compare two strings case-insensitively using full case folding.
769	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
770	*
771	* @param start The start offset in this string at which the compare operation begins.
772	* @param length The number of code units from this string to compare.
773	* @param srcChars A pointer to another string to compare this one to.
774	* @param options A bit set of options:
775	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
776	* Comparison in code unit order with default case folding.
777	*
778	* - U_COMPARE_CODE_POINT_ORDER
779	* Set to choose code point order instead of code unit order
780	* (see u_strCompare for details).
781	*
782	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
783	*
784	* @return A negative, zero, or positive integer indicating the comparison result.
785	* @stable ICU 2.0
786	*/
787	inline int8_t caseCompare(int32_t start,
788	int32_t length,
789	const char16_t *srcChars,
790	uint32_t options) const;
791
792	/**
793	* Compare two strings case-insensitively using full case folding.
794	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
795	*
796	* @param start The start offset in this string at which the compare operation begins.
797	* @param length The number of code units from this string to compare.
798	* @param srcChars A pointer to another string to compare this one to.
799	* @param srcStart The start offset in that string at which the compare operation begins.
800	* @param srcLength The number of code units from that string to compare.
801	* @param options A bit set of options:
802	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
803	* Comparison in code unit order with default case folding.
804	*
805	* - U_COMPARE_CODE_POINT_ORDER
806	* Set to choose code point order instead of code unit order
807	* (see u_strCompare for details).
808	*
809	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
810	*
811	* @return A negative, zero, or positive integer indicating the comparison result.
812	* @stable ICU 2.0
813	*/
814	inline int8_t caseCompare(int32_t start,
815	int32_t length,
816	const char16_t *srcChars,
817	int32_t srcStart,
818	int32_t srcLength,
819	uint32_t options) const;
820
821	/**
822	* Compare two strings case-insensitively using full case folding.
823	* This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
824	*
825	* @param start The start offset in this string at which the compare operation begins.
826	* @param limit The offset after the last code unit from this string to compare.
827	* @param srcText Another string to compare this one to.
828	* @param srcStart The start offset in that string at which the compare operation begins.
829	* @param srcLimit The offset after the last code unit from that string to compare.
830	* @param options A bit set of options:
831	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
832	* Comparison in code unit order with default case folding.
833	*
834	* - U_COMPARE_CODE_POINT_ORDER
835	* Set to choose code point order instead of code unit order
836	* (see u_strCompare for details).
837	*
838	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
839	*
840	* @return A negative, zero, or positive integer indicating the comparison result.
841	* @stable ICU 2.0
842	*/
843	inline int8_t caseCompareBetween(int32_t start,
844	int32_t limit,
845	const UnicodeString& srcText,
846	int32_t srcStart,
847	int32_t srcLimit,
848	uint32_t options) const;
849
850	/**
851	* Determine if this starts with the characters in `text`
852	* @param text The text to match.
853	* @return TRUE if this starts with the characters in `text`,
854	* FALSE otherwise
855	* @stable ICU 2.0
856	*/
857	inline UBool startsWith(const UnicodeString& text) const;
858
859	/**
860	* Determine if this starts with the characters in `srcText`
861	* in the range [`srcStart`, `srcStart + srcLength`).
862	* @param srcText The text to match.
863	* @param srcStart the offset into `srcText` to start matching
864	* @param srcLength the number of characters in `srcText` to match
865	* @return TRUE if this starts with the characters in `text`,
866	* FALSE otherwise
867	* @stable ICU 2.0
868	*/
869	inline UBool startsWith(const UnicodeString& srcText,
870	int32_t srcStart,
871	int32_t srcLength) const;
872
873	/**
874	* Determine if this starts with the characters in `srcChars`
875	* @param srcChars The characters to match.
876	* @param srcLength the number of characters in `srcChars`
877	* @return TRUE if this starts with the characters in `srcChars`,
878	* FALSE otherwise
879	* @stable ICU 2.0
880	*/
881	inline UBool startsWith(ConstChar16Ptr srcChars,
882	int32_t srcLength) const;
883
884	/**
885	* Determine if this ends with the characters in `srcChars`
886	* in the range [`srcStart`, `srcStart + srcLength`).
887	* @param srcChars The characters to match.
888	* @param srcStart the offset into `srcText` to start matching
889	* @param srcLength the number of characters in `srcChars` to match
890	* @return TRUE if this ends with the characters in `srcChars`, FALSE otherwise
891	* @stable ICU 2.0
892	*/
893	inline UBool startsWith(const char16_t *srcChars,
894	int32_t srcStart,
895	int32_t srcLength) const;
896
897	/**
898	* Determine if this ends with the characters in `text`
899	* @param text The text to match.
900	* @return TRUE if this ends with the characters in `text`,
901	* FALSE otherwise
902	* @stable ICU 2.0
903	*/
904	inline UBool endsWith(const UnicodeString& text) const;
905
906	/**
907	* Determine if this ends with the characters in `srcText`
908	* in the range [`srcStart`, `srcStart + srcLength`).
909	* @param srcText The text to match.
910	* @param srcStart the offset into `srcText` to start matching
911	* @param srcLength the number of characters in `srcText` to match
912	* @return TRUE if this ends with the characters in `text`,
913	* FALSE otherwise
914	* @stable ICU 2.0
915	*/
916	inline UBool endsWith(const UnicodeString& srcText,
917	int32_t srcStart,
918	int32_t srcLength) const;
919
920	/**
921	* Determine if this ends with the characters in `srcChars`
922	* @param srcChars The characters to match.
923	* @param srcLength the number of characters in `srcChars`
924	* @return TRUE if this ends with the characters in `srcChars`,
925	* FALSE otherwise
926	* @stable ICU 2.0
927	*/
928	inline UBool endsWith(ConstChar16Ptr srcChars,
929	int32_t srcLength) const;
930
931	/**
932	* Determine if this ends with the characters in `srcChars`
933	* in the range [`srcStart`, `srcStart + srcLength`).
934	* @param srcChars The characters to match.
935	* @param srcStart the offset into `srcText` to start matching
936	* @param srcLength the number of characters in `srcChars` to match
937	* @return TRUE if this ends with the characters in `srcChars`,
938	* FALSE otherwise
939	* @stable ICU 2.0
940	*/
941	inline UBool endsWith(const char16_t *srcChars,
942	int32_t srcStart,
943	int32_t srcLength) const;
944
945
946	/ Searching - bitwise only /
947
948	/**
949	* Locate in this the first occurrence of the characters in `text`,
950	* using bitwise comparison.
951	* @param text The text to search for.
952	* @return The offset into this of the start of `text`,
953	* or -1 if not found.
954	* @stable ICU 2.0
955	*/
956	inline int32_t indexOf(const UnicodeString& text) const;
957
958	/**
959	* Locate in this the first occurrence of the characters in `text`
960	* starting at offset `start`, using bitwise comparison.
961	* @param text The text to search for.
962	* @param start The offset at which searching will start.
963	* @return The offset into this of the start of `text`,
964	* or -1 if not found.
965	* @stable ICU 2.0
966	*/
967	inline int32_t indexOf(const UnicodeString& text,
968	int32_t start) const;
969
970	/**
971	* Locate in this the first occurrence in the range
972	* [`start`, `start + length`) of the characters
973	* in `text`, using bitwise comparison.
974	* @param text The text to search for.
975	* @param start The offset at which searching will start.
976	* @param length The number of characters to search
977	* @return The offset into this of the start of `text`,
978	* or -1 if not found.
979	* @stable ICU 2.0
980	*/
981	inline int32_t indexOf(const UnicodeString& text,
982	int32_t start,
983	int32_t length) const;
984
985	/**
986	* Locate in this the first occurrence in the range
987	* [`start`, `start + length`) of the characters
988	* in `srcText` in the range
989	* [`srcStart`, `srcStart + srcLength`),
990	* using bitwise comparison.
991	* @param srcText The text to search for.
992	* @param srcStart the offset into `srcText` at which
993	* to start matching
994	* @param srcLength the number of characters in `srcText` to match
995	* @param start the offset into this at which to start matching
996	* @param length the number of characters in this to search
997	* @return The offset into this of the start of `text`,
998	* or -1 if not found.
999	* @stable ICU 2.0
1000	*/
1001	inline int32_t indexOf(const UnicodeString& srcText,
1002	int32_t srcStart,
1003	int32_t srcLength,
1004	int32_t start,
1005	int32_t length) const;
1006
1007	/**
1008	* Locate in this the first occurrence of the characters in
1009	* `srcChars`
1010	* starting at offset `start`, using bitwise comparison.
1011	* @param srcChars The text to search for.
1012	* @param srcLength the number of characters in `srcChars` to match
1013	* @param start the offset into this at which to start matching
1014	* @return The offset into this of the start of `text`,
1015	* or -1 if not found.
1016	* @stable ICU 2.0
1017	*/
1018	inline int32_t indexOf(const char16_t *srcChars,
1019	int32_t srcLength,
1020	int32_t start) const;
1021
1022	/**
1023	* Locate in this the first occurrence in the range
1024	* [`start`, `start + length`) of the characters
1025	* in `srcChars`, using bitwise comparison.
1026	* @param srcChars The text to search for.
1027	* @param srcLength the number of characters in `srcChars`
1028	* @param start The offset at which searching will start.
1029	* @param length The number of characters to search
1030	* @return The offset into this of the start of `srcChars`,
1031	* or -1 if not found.
1032	* @stable ICU 2.0
1033	*/
1034	inline int32_t indexOf(ConstChar16Ptr srcChars,
1035	int32_t srcLength,
1036	int32_t start,
1037	int32_t length) const;
1038
1039	/**
1040	* Locate in this the first occurrence in the range
1041	* [`start`, `start + length`) of the characters
1042	* in `srcChars` in the range
1043	* [`srcStart`, `srcStart + srcLength`),
1044	* using bitwise comparison.
1045	* @param srcChars The text to search for.
1046	* @param srcStart the offset into `srcChars` at which
1047	* to start matching
1048	* @param srcLength the number of characters in `srcChars` to match
1049	* @param start the offset into this at which to start matching
1050	* @param length the number of characters in this to search
1051	* @return The offset into this of the start of `text`,
1052	* or -1 if not found.
1053	* @stable ICU 2.0
1054	*/
1055	int32_t indexOf(const char16_t *srcChars,
1056	int32_t srcStart,
1057	int32_t srcLength,
1058	int32_t start,
1059	int32_t length) const;
1060
1061	/**
1062	* Locate in this the first occurrence of the BMP code point `c`,
1063	* using bitwise comparison.
1064	* @param c The code unit to search for.
1065	* @return The offset into this of `c`, or -1 if not found.
1066	* @stable ICU 2.0
1067	*/
1068	inline int32_t indexOf(char16_t c) const;
1069
1070	/**
1071	* Locate in this the first occurrence of the code point `c`,
1072	* using bitwise comparison.
1073	*
1074	* @param c The code point to search for.
1075	* @return The offset into this of `c`, or -1 if not found.
1076	* @stable ICU 2.0
1077	*/
1078	inline int32_t indexOf(UChar32 c) const;
1079
1080	/**
1081	* Locate in this the first occurrence of the BMP code point `c`,
1082	* starting at offset `start`, using bitwise comparison.
1083	* @param c The code unit to search for.
1084	* @param start The offset at which searching will start.
1085	* @return The offset into this of `c`, or -1 if not found.
1086	* @stable ICU 2.0
1087	*/
1088	inline int32_t indexOf(char16_t c,
1089	int32_t start) const;
1090
1091	/**
1092	* Locate in this the first occurrence of the code point `c`
1093	* starting at offset `start`, using bitwise comparison.
1094	*
1095	* @param c The code point to search for.
1096	* @param start The offset at which searching will start.
1097	* @return The offset into this of `c`, or -1 if not found.
1098	* @stable ICU 2.0
1099	*/
1100	inline int32_t indexOf(UChar32 c,
1101	int32_t start) const;
1102
1103	/**
1104	* Locate in this the first occurrence of the BMP code point `c`
1105	* in the range [`start`, `start + length`),
1106	* using bitwise comparison.
1107	* @param c The code unit to search for.
1108	* @param start the offset into this at which to start matching
1109	* @param length the number of characters in this to search
1110	* @return The offset into this of `c`, or -1 if not found.
1111	* @stable ICU 2.0
1112	*/
1113	inline int32_t indexOf(char16_t c,
1114	int32_t start,
1115	int32_t length) const;
1116
1117	/**
1118	* Locate in this the first occurrence of the code point `c`
1119	* in the range [`start`, `start + length`),
1120	* using bitwise comparison.
1121	*
1122	* @param c The code point to search for.
1123	* @param start the offset into this at which to start matching
1124	* @param length the number of characters in this to search
1125	* @return The offset into this of `c`, or -1 if not found.
1126	* @stable ICU 2.0
1127	*/
1128	inline int32_t indexOf(UChar32 c,
1129	int32_t start,
1130	int32_t length) const;
1131
1132	/**
1133	* Locate in this the last occurrence of the characters in `text`,
1134	* using bitwise comparison.
1135	* @param text The text to search for.
1136	* @return The offset into this of the start of `text`,
1137	* or -1 if not found.
1138	* @stable ICU 2.0
1139	*/
1140	inline int32_t lastIndexOf(const UnicodeString& text) const;
1141
1142	/**
1143	* Locate in this the last occurrence of the characters in `text`
1144	* starting at offset `start`, using bitwise comparison.
1145	* @param text The text to search for.
1146	* @param start The offset at which searching will start.
1147	* @return The offset into this of the start of `text`,
1148	* or -1 if not found.
1149	* @stable ICU 2.0
1150	*/
1151	inline int32_t lastIndexOf(const UnicodeString& text,
1152	int32_t start) const;
1153
1154	/**
1155	* Locate in this the last occurrence in the range
1156	* [`start`, `start + length`) of the characters
1157	* in `text`, using bitwise comparison.
1158	* @param text The text to search for.
1159	* @param start The offset at which searching will start.
1160	* @param length The number of characters to search
1161	* @return The offset into this of the start of `text`,
1162	* or -1 if not found.
1163	* @stable ICU 2.0
1164	*/
1165	inline int32_t lastIndexOf(const UnicodeString& text,
1166	int32_t start,
1167	int32_t length) const;
1168
1169	/**
1170	* Locate in this the last occurrence in the range
1171	* [`start`, `start + length`) of the characters
1172	* in `srcText` in the range
1173	* [`srcStart`, `srcStart + srcLength`),
1174	* using bitwise comparison.
1175	* @param srcText The text to search for.
1176	* @param srcStart the offset into `srcText` at which
1177	* to start matching
1178	* @param srcLength the number of characters in `srcText` to match
1179	* @param start the offset into this at which to start matching
1180	* @param length the number of characters in this to search
1181	* @return The offset into this of the start of `text`,
1182	* or -1 if not found.
1183	* @stable ICU 2.0
1184	*/
1185	inline int32_t lastIndexOf(const UnicodeString& srcText,
1186	int32_t srcStart,
1187	int32_t srcLength,
1188	int32_t start,
1189	int32_t length) const;
1190
1191	/**
1192	* Locate in this the last occurrence of the characters in `srcChars`
1193	* starting at offset `start`, using bitwise comparison.
1194	* @param srcChars The text to search for.
1195	* @param srcLength the number of characters in `srcChars` to match
1196	* @param start the offset into this at which to start matching
1197	* @return The offset into this of the start of `text`,
1198	* or -1 if not found.
1199	* @stable ICU 2.0
1200	*/
1201	inline int32_t lastIndexOf(const char16_t *srcChars,
1202	int32_t srcLength,
1203	int32_t start) const;
1204
1205	/**
1206	* Locate in this the last occurrence in the range
1207	* [`start`, `start + length`) of the characters
1208	* in `srcChars`, using bitwise comparison.
1209	* @param srcChars The text to search for.
1210	* @param srcLength the number of characters in `srcChars`
1211	* @param start The offset at which searching will start.
1212	* @param length The number of characters to search
1213	* @return The offset into this of the start of `srcChars`,
1214	* or -1 if not found.
1215	* @stable ICU 2.0
1216	*/
1217	inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1218	int32_t srcLength,
1219	int32_t start,
1220	int32_t length) const;
1221
1222	/**
1223	* Locate in this the last occurrence in the range
1224	* [`start`, `start + length`) of the characters
1225	* in `srcChars` in the range
1226	* [`srcStart`, `srcStart + srcLength`),
1227	* using bitwise comparison.
1228	* @param srcChars The text to search for.
1229	* @param srcStart the offset into `srcChars` at which
1230	* to start matching
1231	* @param srcLength the number of characters in `srcChars` to match
1232	* @param start the offset into this at which to start matching
1233	* @param length the number of characters in this to search
1234	* @return The offset into this of the start of `text`,
1235	* or -1 if not found.
1236	* @stable ICU 2.0
1237	*/
1238	int32_t lastIndexOf(const char16_t *srcChars,
1239	int32_t srcStart,
1240	int32_t srcLength,
1241	int32_t start,
1242	int32_t length) const;
1243
1244	/**
1245	* Locate in this the last occurrence of the BMP code point `c`,
1246	* using bitwise comparison.
1247	* @param c The code unit to search for.
1248	* @return The offset into this of `c`, or -1 if not found.
1249	* @stable ICU 2.0
1250	*/
1251	inline int32_t lastIndexOf(char16_t c) const;
1252
1253	/**
1254	* Locate in this the last occurrence of the code point `c`,
1255	* using bitwise comparison.
1256	*
1257	* @param c The code point to search for.
1258	* @return The offset into this of `c`, or -1 if not found.
1259	* @stable ICU 2.0
1260	*/
1261	inline int32_t lastIndexOf(UChar32 c) const;
1262
1263	/**
1264	* Locate in this the last occurrence of the BMP code point `c`
1265	* starting at offset `start`, using bitwise comparison.
1266	* @param c The code unit to search for.
1267	* @param start The offset at which searching will start.
1268	* @return The offset into this of `c`, or -1 if not found.
1269	* @stable ICU 2.0
1270	*/
1271	inline int32_t lastIndexOf(char16_t c,
1272	int32_t start) const;
1273
1274	/**
1275	* Locate in this the last occurrence of the code point `c`
1276	* starting at offset `start`, using bitwise comparison.
1277	*
1278	* @param c The code point to search for.
1279	* @param start The offset at which searching will start.
1280	* @return The offset into this of `c`, or -1 if not found.
1281	* @stable ICU 2.0
1282	*/
1283	inline int32_t lastIndexOf(UChar32 c,
1284	int32_t start) const;
1285
1286	/**
1287	* Locate in this the last occurrence of the BMP code point `c`
1288	* in the range [`start`, `start + length`),
1289	* using bitwise comparison.
1290	* @param c The code unit to search for.
1291	* @param start the offset into this at which to start matching
1292	* @param length the number of characters in this to search
1293	* @return The offset into this of `c`, or -1 if not found.
1294	* @stable ICU 2.0
1295	*/
1296	inline int32_t lastIndexOf(char16_t c,
1297	int32_t start,
1298	int32_t length) const;
1299
1300	/**
1301	* Locate in this the last occurrence of the code point `c`
1302	* in the range [`start`, `start + length`),
1303	* using bitwise comparison.
1304	*
1305	* @param c The code point to search for.
1306	* @param start the offset into this at which to start matching
1307	* @param length the number of characters in this to search
1308	* @return The offset into this of `c`, or -1 if not found.
1309	* @stable ICU 2.0
1310	*/
1311	inline int32_t lastIndexOf(UChar32 c,
1312	int32_t start,
1313	int32_t length) const;
1314
1315
1316	/ Character access /
1317
1318	/**
1319	* Return the code unit at offset `offset`.
1320	* If the offset is not valid (0..length()-1) then U+ffff is returned.
1321	* @param offset a valid offset into the text
1322	* @return the code unit at offset `offset`
1323	* or 0xffff if the offset is not valid for this string
1324	* @stable ICU 2.0
1325	*/
1326	inline char16_t charAt(int32_t offset) const;
1327
1328	/**
1329	* Return the code unit at offset `offset`.
1330	* If the offset is not valid (0..length()-1) then U+ffff is returned.
1331	* @param offset a valid offset into the text
1332	* @return the code unit at offset `offset`
1333	* @stable ICU 2.0
1334	*/
1335	inline char16_t operator[] (int32_t offset) const;
1336
1337	/**
1338	* Return the code point that contains the code unit
1339	* at offset `offset`.
1340	* If the offset is not valid (0..length()-1) then U+ffff is returned.
1341	* @param offset a valid offset into the text
1342	* that indicates the text offset of any of the code units
1343	* that will be assembled into a code point (21-bit value) and returned
1344	* @return the code point of text at `offset`
1345	* or 0xffff if the offset is not valid for this string
1346	* @stable ICU 2.0
1347	*/
1348	UChar32 char32At(int32_t offset) const;
1349
1350	/**
1351	* Adjust a random-access offset so that
1352	* it points to the beginning of a Unicode character.
1353	* The offset that is passed in points to
1354	* any code unit of a code point,
1355	* while the returned offset will point to the first code unit
1356	* of the same code point.
1357	* In UTF-16, if the input offset points to a second surrogate
1358	* of a surrogate pair, then the returned offset will point
1359	* to the first surrogate.
1360	* @param offset a valid offset into one code point of the text
1361	* @return offset of the first code unit of the same code point
1362	* @see U16_SET_CP_START
1363	* @stable ICU 2.0
1364	*/
1365	int32_t getChar32Start(int32_t offset) const;
1366
1367	/**
1368	* Adjust a random-access offset so that
1369	* it points behind a Unicode character.
1370	* The offset that is passed in points behind
1371	* any code unit of a code point,
1372	* while the returned offset will point behind the last code unit
1373	* of the same code point.
1374	* In UTF-16, if the input offset points behind the first surrogate
1375	* (i.e., to the second surrogate)
1376	* of a surrogate pair, then the returned offset will point
1377	* behind the second surrogate (i.e., to the first surrogate).
1378	* @param offset a valid offset after any code unit of a code point of the text
1379	* @return offset of the first code unit after the same code point
1380	* @see U16_SET_CP_LIMIT
1381	* @stable ICU 2.0
1382	*/
1383	int32_t getChar32Limit(int32_t offset) const;
1384
1385	/**
1386	* Move the code unit index along the string by delta code points.
1387	* Interpret the input index as a code unit-based offset into the string,
1388	* move the index forward or backward by delta code points, and
1389	* return the resulting index.
1390	* The input index should point to the first code unit of a code point,
1391	* if there is more than one.
1392	*
1393	* Both input and output indexes are code unit-based as for all
1394	* string indexes/offsets in ICU (and other libraries, like MBCS char*).
1395	* If delta<0 then the index is moved backward (toward the start of the string).
1396	* If delta>0 then the index is moved forward (toward the end of the string).
1397	*
1398	* This behaves like CharacterIterator::move32(delta, kCurrent).
1399	*
1400	* Behavior for out-of-bounds indexes:
1401	* `moveIndex32` pins the input index to 0..length(), i.e.,
1402	* if the input index<0 then it is pinned to 0;
1403	* if it is index>length() then it is pinned to length().
1404	* Afterwards, the index is moved by `delta` code points
1405	* forward or backward,
1406	* but no further backward than to 0 and no further forward than to length().
1407	* The resulting index return value will be in between 0 and length(), inclusively.
1408	*
1409	* Examples:
1410	* \code
1411	* // s has code points 'a' U+10000 'b' U+10ffff U+2029
1412	* UnicodeString s(u"a\U00010000b\U0010ffff\u2029");
1413	*
1414	* // initial index: position of U+10000
1415	* int32_t index=1;
1416	*
1417	* // the following examples will all result in index==4, position of U+10ffff
1418	*
1419	* // skip 2 code points from some position in the string
1420	* index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1421	*
1422	* // go to the 3rd code point from the start of s (0-based)
1423	* index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1424	*
1425	* // go to the next-to-last code point of s
1426	* index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1427	* \endcode
1428	*
1429	* @param index input code unit index
1430	* @param delta (signed) code point count to move the index forward or backward
1431	* in the string
1432	* @return the resulting code unit index
1433	* @stable ICU 2.0
1434	*/
1435	int32_t moveIndex32(int32_t index, int32_t delta) const;
1436
1437	/ Substring extraction /
1438
1439	/**
1440	* Copy the characters in the range
1441	* [`start`, `start + length`) into the array `dst`,
1442	* beginning at `dstStart`.
1443	* If the string aliases to `dst` itself as an external buffer,
1444	* then extract() will not copy the contents.
1445	*
1446	* @param start offset of first character which will be copied into the array
1447	* @param length the number of characters to extract
1448	* @param dst array in which to copy characters. The length of `dst`
1449	* must be at least (`dstStart + length`).
1450	* @param dstStart the offset in `dst` where the first character
1451	* will be extracted
1452	* @stable ICU 2.0
1453	*/
1454	inline void extract(int32_t start,
1455	int32_t length,
1456	Char16Ptr dst,
1457	int32_t dstStart = `0`) const;
1458
1459	/**
1460	* Copy the contents of the string into dest.
1461	* This is a convenience function that
1462	* checks if there is enough space in dest,
1463	* extracts the entire string if possible,
1464	* and NUL-terminates dest if possible.
1465	*
1466	* If the string fits into dest but cannot be NUL-terminated
1467	* (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1468	* If the string itself does not fit into dest
1469	* (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1470	*
1471	* If the string aliases to `dest` itself as an external buffer,
1472	* then extract() will not copy the contents.
1473	*
1474	* @param dest Destination string buffer.
1475	* @param destCapacity Number of char16_ts available at dest.
1476	* @param errorCode ICU error code.
1477	* @return length()
1478	* @stable ICU 2.0
1479	*/
1480	int32_t
1481	extract(Char16Ptr dest, int32_t destCapacity,
1482	UErrorCode &errorCode) const;
1483
1484	/**
1485	* Copy the characters in the range
1486	* [`start`, `start + length`) into the UnicodeString
1487	* `target`.
1488	* @param start offset of first character which will be copied
1489	* @param length the number of characters to extract
1490	* @param target UnicodeString into which to copy characters.
1491	* @stable ICU 2.0
1492	*/
1493	inline void extract(int32_t start,
1494	int32_t length,
1495	UnicodeString& target) const;
1496
1497	/**
1498	* Copy the characters in the range [`start`, `limit`)
1499	* into the array `dst`, beginning at `dstStart`.
1500	* @param start offset of first character which will be copied into the array
1501	* @param limit offset immediately following the last character to be copied
1502	* @param dst array in which to copy characters. The length of `dst`
1503	* must be at least (`dstStart + (limit - start)`).
1504	* @param dstStart the offset in `dst` where the first character
1505	* will be extracted
1506	* @stable ICU 2.0
1507	*/
1508	inline void extractBetween(int32_t start,
1509	int32_t limit,
1510	char16_t *dst,
1511	int32_t dstStart = `0`) const;
1512
1513	/**
1514	* Copy the characters in the range [`start`, `limit`)
1515	* into the UnicodeString `target`. Replaceable API.
1516	* @param start offset of first character which will be copied
1517	* @param limit offset immediately following the last character to be copied
1518	* @param target UnicodeString into which to copy characters.
1519	* @stable ICU 2.0
1520	*/
1521	virtual void extractBetween(int32_t start,
1522	int32_t limit,
1523	UnicodeString& target) const;
1524
1525	/**
1526	* Copy the characters in the range
1527	* [`start`, `start + startLength`) into an array of characters.
1528	* All characters must be invariant (see utypes.h).
1529	* Use US_INV as the last, signature-distinguishing parameter.
1530	*
1531	* This function does not write any more than `targetCapacity`
1532	* characters but returns the length of the entire output string
1533	* so that one can allocate a larger buffer and call the function again
1534	* if necessary.
1535	* The output string is NUL-terminated if possible.
1536	*
1537	* @param start offset of first character which will be copied
1538	* @param startLength the number of characters to extract
1539	* @param target the target buffer for extraction, can be NULL
1540	* if targetLength is 0
1541	* @param targetCapacity the length of the target buffer
1542	* @param inv Signature-distinguishing paramater, use US_INV.
1543	* @return the output string length, not including the terminating NUL
1544	* @stable ICU 3.2
1545	*/
1546	int32_t extract(int32_t start,
1547	int32_t startLength,
1548	char *target,
1549	int32_t targetCapacity,
1550	enum EInvariant inv) const;
1551
1552	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
1553
1554	/**
1555	* Copy the characters in the range
1556	* [`start`, `start + length`) into an array of characters
1557	* in the platform's default codepage.
1558	* This function does not write any more than `targetLength`
1559	* characters but returns the length of the entire output string
1560	* so that one can allocate a larger buffer and call the function again
1561	* if necessary.
1562	* The output string is NUL-terminated if possible.
1563	*
1564	* @param start offset of first character which will be copied
1565	* @param startLength the number of characters to extract
1566	* @param target the target buffer for extraction
1567	* @param targetLength the length of the target buffer
1568	* If `target` is NULL, then the number of bytes required for
1569	* `target` is returned.
1570	* @return the output string length, not including the terminating NUL
1571	* @stable ICU 2.0
1572	*/
1573	int32_t extract(int32_t start,
1574	int32_t startLength,
1575	char *target,
1576	uint32_t targetLength) const;
1577
1578	#endif
1579
1580	#if !UCONFIG_NO_CONVERSION
1581
1582	/**
1583	* Copy the characters in the range
1584	* [`start`, `start + length`) into an array of characters
1585	* in a specified codepage.
1586	* The output string is NUL-terminated.
1587	*
1588	* Recommendation: For invariant-character strings use
1589	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1590	* because it avoids object code dependencies of UnicodeString on
1591	* the conversion code.
1592	*
1593	* @param start offset of first character which will be copied
1594	* @param startLength the number of characters to extract
1595	* @param target the target buffer for extraction
1596	* @param codepage the desired codepage for the characters. 0 has
1597	* the special meaning of the default codepage
1598	* If `codepage` is an empty string (`""`),
1599	* then a simple conversion is performed on the codepage-invariant
1600	* subset ("invariant characters") of the platform encoding. See utypes.h.
1601	* If `target` is NULL, then the number of bytes required for
1602	* `target` is returned. It is assumed that the target is big enough
1603	* to fit all of the characters.
1604	* @return the output string length, not including the terminating NUL
1605	* @stable ICU 2.0
1606	*/
1607	inline int32_t extract(int32_t start,
1608	int32_t startLength,
1609	char *target,
1610	const char codepage = `0`) const*;
1611
1612	/**
1613	* Copy the characters in the range
1614	* [`start`, `start + length`) into an array of characters
1615	* in a specified codepage.
1616	* This function does not write any more than `targetLength`
1617	* characters but returns the length of the entire output string
1618	* so that one can allocate a larger buffer and call the function again
1619	* if necessary.
1620	* The output string is NUL-terminated if possible.
1621	*
1622	* Recommendation: For invariant-character strings use
1623	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1624	* because it avoids object code dependencies of UnicodeString on
1625	* the conversion code.
1626	*
1627	* @param start offset of first character which will be copied
1628	* @param startLength the number of characters to extract
1629	* @param target the target buffer for extraction
1630	* @param targetLength the length of the target buffer
1631	* @param codepage the desired codepage for the characters. 0 has
1632	* the special meaning of the default codepage
1633	* If `codepage` is an empty string (`""`),
1634	* then a simple conversion is performed on the codepage-invariant
1635	* subset ("invariant characters") of the platform encoding. See utypes.h.
1636	* If `target` is NULL, then the number of bytes required for
1637	* `target` is returned.
1638	* @return the output string length, not including the terminating NUL
1639	* @stable ICU 2.0
1640	*/
1641	int32_t extract(int32_t start,
1642	int32_t startLength,
1643	char *target,
1644	uint32_t targetLength,
1645	const char codepage) const*;
1646
1647	/**
1648	* Convert the UnicodeString into a codepage string using an existing UConverter.
1649	* The output string is NUL-terminated if possible.
1650	*
1651	* This function avoids the overhead of opening and closing a converter if
1652	* multiple strings are extracted.
1653	*
1654	* @param dest destination string buffer, can be NULL if destCapacity==0
1655	* @param destCapacity the number of chars available at dest
1656	* @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1657	* or NULL for the default converter
1658	* @param errorCode normal ICU error code
1659	* @return the length of the output string, not counting the terminating NUL;
1660	* if the length is greater than destCapacity, then the string will not fit
1661	* and a buffer of the indicated length would need to be passed in
1662	* @stable ICU 2.0
1663	*/
1664	int32_t extract(char *dest, int32_t destCapacity,
1665	UConverter *cnv,
1666	UErrorCode &errorCode) const;
1667
1668	#endif
1669
1670	/**
1671	* Create a temporary substring for the specified range.
1672	* Unlike the substring constructor and setTo() functions,
1673	* the object returned here will be a read-only alias (using getBuffer())
1674	* rather than copying the text.
1675	* As a result, this substring operation is much faster but requires
1676	* that the original string not be modified or deleted during the lifetime
1677	* of the returned substring object.
1678	* @param start offset of the first character visible in the substring
1679	* @param length length of the substring
1680	* @return a read-only alias UnicodeString object for the substring
1681	* @stable ICU 4.4
1682	*/
1683	UnicodeString tempSubString(int32_t start=`0`, int32_t length=INT32_MAX) const;
1684
1685	/**
1686	* Create a temporary substring for the specified range.
1687	* Same as tempSubString(start, length) except that the substring range
1688	* is specified as a (start, limit) pair (with an exclusive limit index)
1689	* rather than a (start, length) pair.
1690	* @param start offset of the first character visible in the substring
1691	* @param limit offset immediately following the last character visible in the substring
1692	* @return a read-only alias UnicodeString object for the substring
1693	* @stable ICU 4.4
1694	*/
1695	inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1696
1697	/**
1698	* Convert the UnicodeString to UTF-8 and write the result
1699	* to a ByteSink. This is called by toUTF8String().
1700	* Unpaired surrogates are replaced with U+FFFD.
1701	* Calls u_strToUTF8WithSub().
1702	*
1703	* @param sink A ByteSink to which the UTF-8 version of the string is written.
1704	* sink.Flush() is called at the end.
1705	* @stable ICU 4.2
1706	* @see toUTF8String
1707	*/
1708	void toUTF8(ByteSink &sink) const;
1709
1710	/**
1711	* Convert the UnicodeString to UTF-8 and append the result
1712	* to a standard string.
1713	* Unpaired surrogates are replaced with U+FFFD.
1714	* Calls toUTF8().
1715	*
1716	* @param result A standard string (or a compatible object)
1717	* to which the UTF-8 version of the string is appended.
1718	* @return The string object.
1719	* @stable ICU 4.2
1720	* @see toUTF8
1721	*/
1722	template<typename StringClass>
1723	StringClass &toUTF8String(StringClass &result) const {
1724	StringByteSink<StringClass> sbs(&result, length());
1725	toUTF8(sbs);
1726	return result;
1727	}
1728
1729	/**
1730	* Convert the UnicodeString to UTF-32.
1731	* Unpaired surrogates are replaced with U+FFFD.
1732	* Calls u_strToUTF32WithSub().
1733	*
1734	* @param utf32 destination string buffer, can be NULL if capacity==0
1735	* @param capacity the number of UChar32s available at utf32
1736	* @param errorCode Standard ICU error code. Its input value must
1737	* pass the U_SUCCESS() test, or else the function returns
1738	* immediately. Check for U_FAILURE() on output or use with
1739	* function chaining. (See User Guide for details.)
1740	* @return The length of the UTF-32 string.
1741	* @see fromUTF32
1742	* @stable ICU 4.2
1743	*/
1744	int32_t toUTF32(UChar32 utf32, int32_t capacity, UErrorCode &errorCode) const*;
1745
1746	/ Length operations /
1747
1748	/**
1749	* Return the length of the UnicodeString object.
1750	* The length is the number of char16_t code units are in the UnicodeString.
1751	* If you want the number of code points, please use countChar32().
1752	* @return the length of the UnicodeString object
1753	* @see countChar32
1754	* @stable ICU 2.0
1755	*/
1756	inline int32_t length(void) const;
1757
1758	/**
1759	* Count Unicode code points in the length char16_t code units of the string.
1760	* A code point may occupy either one or two char16_t code units.
1761	* Counting code points involves reading all code units.
1762	*
1763	* This functions is basically the inverse of moveIndex32().
1764	*
1765	* @param start the index of the first code unit to check
1766	* @param length the number of char16_t code units to check
1767	* @return the number of code points in the specified code units
1768	* @see length
1769	* @stable ICU 2.0
1770	*/
1771	int32_t
1772	countChar32(int32_t start=`0`, int32_t length=INT32_MAX) const;
1773
1774	/**
1775	* Check if the length char16_t code units of the string
1776	* contain more Unicode code points than a certain number.
1777	* This is more efficient than counting all code points in this part of the string
1778	* and comparing that number with a threshold.
1779	* This function may not need to scan the string at all if the length
1780	* falls within a certain range, and
1781	* never needs to count more than 'number+1' code points.
1782	* Logically equivalent to (countChar32(start, length)>number).
1783	* A Unicode code point may occupy either one or two char16_t code units.
1784	*
1785	* @param start the index of the first code unit to check (0 for the entire string)
1786	* @param length the number of char16_t code units to check
1787	* (use INT32_MAX for the entire string; remember that start/length
1788	* values are pinned)
1789	* @param number The number of code points in the (sub)string is compared against
1790	* the 'number' parameter.
1791	* @return Boolean value for whether the string contains more Unicode code points
1792	* than 'number'. Same as (u_countChar32(s, length)>number).
1793	* @see countChar32
1794	* @see u_strHasMoreChar32Than
1795	* @stable ICU 2.4
1796	*/
1797	UBool
1798	hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1799
1800	/**
1801	* Determine if this string is empty.
1802	* @return TRUE if this string contains 0 characters, FALSE otherwise.
1803	* @stable ICU 2.0
1804	*/
1805	inline UBool isEmpty(void) const;
1806
1807	/**
1808	* Return the capacity of the internal buffer of the UnicodeString object.
1809	* This is useful together with the getBuffer functions.
1810	* See there for details.
1811	*
1812	* @return the number of char16_ts available in the internal buffer
1813	* @see getBuffer
1814	* @stable ICU 2.0
1815	*/
1816	inline int32_t getCapacity(void) const;
1817
1818	/ Other operations /
1819
1820	/**
1821	* Generate a hash code for this object.
1822	* @return The hash code of this UnicodeString.
1823	* @stable ICU 2.0
1824	*/
1825	inline int32_t hashCode(void) const;
1826
1827	/**
1828	* Determine if this object contains a valid string.
1829	* A bogus string has no value. It is different from an empty string,
1830	* although in both cases isEmpty() returns TRUE and length() returns 0.
1831	* setToBogus() and isBogus() can be used to indicate that no string value is available.
1832	* For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
1833	* length() returns 0.
1834	*
1835	* @return TRUE if the string is bogus/invalid, FALSE otherwise
1836	* @see setToBogus()
1837	* @stable ICU 2.0
1838	*/
1839	inline UBool isBogus(void) const;
1840
1841
1842	//========================================
1843	// Write operations
1844	//========================================
1845
1846	/ Assignment operations /
1847
1848	/**
1849	* Assignment operator. Replace the characters in this UnicodeString
1850	* with the characters from `srcText`.
1851	*
1852	* Starting with ICU 2.4, the assignment operator and the copy constructor
1853	* allocate a new buffer and copy the buffer contents even for readonly aliases.
1854	* By contrast, the fastCopyFrom() function implements the old,
1855	* more efficient but less safe behavior
1856	* of making this string also a readonly alias to the same buffer.
1857	*
1858	* If the source object has an "open" buffer from getBuffer(minCapacity),
1859	* then the copy is an empty string.
1860	*
1861	* @param srcText The text containing the characters to replace
1862	* @return a reference to this
1863	* @stable ICU 2.0
1864	* @see fastCopyFrom
1865	*/
1866	UnicodeString &operator=(const UnicodeString &srcText);
1867
1868	/**
1869	* Almost the same as the assignment operator.
1870	* Replace the characters in this UnicodeString
1871	* with the characters from `srcText`.
1872	*
1873	* This function works the same as the assignment operator
1874	* for all strings except for ones that are readonly aliases.
1875	*
1876	* Starting with ICU 2.4, the assignment operator and the copy constructor
1877	* allocate a new buffer and copy the buffer contents even for readonly aliases.
1878	* This function implements the old, more efficient but less safe behavior
1879	* of making this string also a readonly alias to the same buffer.
1880	*
1881	* The fastCopyFrom function must be used only if it is known that the lifetime of
1882	* this UnicodeString does not exceed the lifetime of the aliased buffer
1883	* including its contents, for example for strings from resource bundles
1884	* or aliases to string constants.
1885	*
1886	* If the source object has an "open" buffer from getBuffer(minCapacity),
1887	* then the copy is an empty string.
1888	*
1889	* @param src The text containing the characters to replace.
1890	* @return a reference to this
1891	* @stable ICU 2.4
1892	*/
1893	UnicodeString &fastCopyFrom(const UnicodeString &src);
1894
1895	/**
1896	* Move assignment operator; might leave src in bogus state.
1897	* This string will have the same contents and state that the source string had.
1898	* The behavior is undefined if *this and src are the same object.
1899	* @param src source string
1900	* @return *this
1901	* @stable ICU 56
1902	*/
1903	UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT;
1904
1905	/**
1906	* Swap strings.
1907	* @param other other string
1908	* @stable ICU 56
1909	*/
1910	void swap(UnicodeString &other) U_NOEXCEPT;
1911
1912	/**
1913	* Non-member UnicodeString swap function.
1914	* @param s1 will get s2's contents and state
1915	* @param s2 will get s1's contents and state
1916	* @stable ICU 56
1917	*/
1918	friend inline void U_EXPORT2
1919	swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT {
1920	s1.swap(s2);
1921	}
1922
1923	/**
1924	* Assignment operator. Replace the characters in this UnicodeString
1925	* with the code unit `ch`.
1926	* @param ch the code unit to replace
1927	* @return a reference to this
1928	* @stable ICU 2.0
1929	*/
1930	inline UnicodeString& operator= (char16_t ch);
1931
1932	/**
1933	* Assignment operator. Replace the characters in this UnicodeString
1934	* with the code point `ch`.
1935	* @param ch the code point to replace
1936	* @return a reference to this
1937	* @stable ICU 2.0
1938	*/
1939	inline UnicodeString& operator= (UChar32 ch);
1940
1941	/**
1942	* Set the text in the UnicodeString object to the characters
1943	* in `srcText` in the range
1944	* [`srcStart`, `srcText.length()`).
1945	* `srcText` is not modified.
1946	* @param srcText the source for the new characters
1947	* @param srcStart the offset into `srcText` where new characters
1948	* will be obtained
1949	* @return a reference to this
1950	* @stable ICU 2.2
1951	*/
1952	inline UnicodeString& setTo(const UnicodeString& srcText,
1953	int32_t srcStart);
1954
1955	/**
1956	* Set the text in the UnicodeString object to the characters
1957	* in `srcText` in the range
1958	* [`srcStart`, `srcStart + srcLength`).
1959	* `srcText` is not modified.
1960	* @param srcText the source for the new characters
1961	* @param srcStart the offset into `srcText` where new characters
1962	* will be obtained
1963	* @param srcLength the number of characters in `srcText` in the
1964	* replace string.
1965	* @return a reference to this
1966	* @stable ICU 2.0
1967	*/
1968	inline UnicodeString& setTo(const UnicodeString& srcText,
1969	int32_t srcStart,
1970	int32_t srcLength);
1971
1972	/**
1973	* Set the text in the UnicodeString object to the characters in
1974	* `srcText`.
1975	* `srcText` is not modified.
1976	* @param srcText the source for the new characters
1977	* @return a reference to this
1978	* @stable ICU 2.0
1979	*/
1980	inline UnicodeString& setTo(const UnicodeString& srcText);
1981
1982	/**
1983	* Set the characters in the UnicodeString object to the characters
1984	* in `srcChars`. `srcChars` is not modified.
1985	* @param srcChars the source for the new characters
1986	* @param srcLength the number of Unicode characters in srcChars.
1987	* @return a reference to this
1988	* @stable ICU 2.0
1989	*/
1990	inline UnicodeString& setTo(const char16_t *srcChars,
1991	int32_t srcLength);
1992
1993	/**
1994	* Set the characters in the UnicodeString object to the code unit
1995	* `srcChar`.
1996	* @param srcChar the code unit which becomes the UnicodeString's character
1997	* content
1998	* @return a reference to this
1999	* @stable ICU 2.0
2000	*/
2001	inline UnicodeString& setTo(char16_t srcChar);
2002
2003	/**
2004	* Set the characters in the UnicodeString object to the code point
2005	* `srcChar`.
2006	* @param srcChar the code point which becomes the UnicodeString's character
2007	* content
2008	* @return a reference to this
2009	* @stable ICU 2.0
2010	*/
2011	inline UnicodeString& setTo(UChar32 srcChar);
2012
2013	/**
2014	* Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
2015	* The text will be used for the UnicodeString object, but
2016	* it will not be released when the UnicodeString is destroyed.
2017	* This has copy-on-write semantics:
2018	* When the string is modified, then the buffer is first copied into
2019	* newly allocated memory.
2020	* The aliased buffer is never modified.
2021	*
2022	* In an assignment to another UnicodeString, when using the copy constructor
2023	* or the assignment operator, the text will be copied.
2024	* When using fastCopyFrom(), the text will be aliased again,
2025	* so that both strings then alias the same readonly-text.
2026	*
2027	* @param isTerminated specifies if `text` is `NUL`-terminated.
2028	* This must be true if `textLength==-1`.
2029	* @param text The characters to alias for the UnicodeString.
2030	* @param textLength The number of Unicode characters in `text` to alias.
2031	* If -1, then this constructor will determine the length
2032	* by calling `u_strlen()`.
2033	* @return a reference to this
2034	* @stable ICU 2.0
2035	*/
2036	UnicodeString &setTo(UBool isTerminated,
2037	ConstChar16Ptr text,
2038	int32_t textLength);
2039
2040	/**
2041	* Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
2042	* The text will be used for the UnicodeString object, but
2043	* it will not be released when the UnicodeString is destroyed.
2044	* This has write-through semantics:
2045	* For as long as the capacity of the buffer is sufficient, write operations
2046	* will directly affect the buffer. When more capacity is necessary, then
2047	* a new buffer will be allocated and the contents copied as with regularly
2048	* constructed strings.
2049	* In an assignment to another UnicodeString, the buffer will be copied.
2050	* The extract(Char16Ptr dst) function detects whether the dst pointer is the same
2051	* as the string buffer itself and will in this case not copy the contents.
2052	*
2053	* @param buffer The characters to alias for the UnicodeString.
2054	* @param buffLength The number of Unicode characters in `buffer` to alias.
2055	* @param buffCapacity The size of `buffer` in char16_ts.
2056	* @return a reference to this
2057	* @stable ICU 2.0
2058	*/
2059	UnicodeString &setTo(char16_t *buffer,
2060	int32_t buffLength,
2061	int32_t buffCapacity);
2062
2063	/**
2064	* Make this UnicodeString object invalid.
2065	* The string will test TRUE with isBogus().
2066	*
2067	* A bogus string has no value. It is different from an empty string.
2068	* It can be used to indicate that no string value is available.
2069	* getBuffer() and getTerminatedBuffer() return NULL, and
2070	* length() returns 0.
2071	*
2072	* This utility function is used throughout the UnicodeString
2073	* implementation to indicate that a UnicodeString operation failed,
2074	* and may be used in other functions,
2075	* especially but not exclusively when such functions do not
2076	* take a UErrorCode for simplicity.
2077	*
2078	* The following methods, and no others, will clear a string object's bogus flag:
2079	* - remove()
2080	* - remove(0, INT32_MAX)
2081	* - truncate(0)
2082	* - operator=() (assignment operator)
2083	* - setTo(...)
2084	*
2085	* The simplest ways to turn a bogus string into an empty one
2086	* is to use the remove() function.
2087	* Examples for other functions that are equivalent to "set to empty string":
2088	* \code
2089	* if(s.isBogus()) {
2090	* s.remove(); // set to an empty string (remove all), or
2091	* s.remove(0, INT32_MAX); // set to an empty string (remove all), or
2092	* s.truncate(0); // set to an empty string (complete truncation), or
2093	* s=UnicodeString(); // assign an empty string, or
2094	* s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
2095	* static const char16_t nul=0;
2096	* s.setTo(&nul, 0); // set to an empty C Unicode string
2097	* }
2098	* \endcode
2099	*
2100	* @see isBogus()
2101	* @stable ICU 2.0
2102	*/
2103	void setToBogus();
2104
2105	/**
2106	* Set the character at the specified offset to the specified character.
2107	* @param offset A valid offset into the text of the character to set
2108	* @param ch The new character
2109	* @return A reference to this
2110	* @stable ICU 2.0
2111	*/
2112	UnicodeString& setCharAt(int32_t offset,
2113	char16_t ch);
2114
2115
2116	/ Append operations /
2117
2118	/**
2119	* Append operator. Append the code unit `ch` to the UnicodeString
2120	* object.
2121	* @param ch the code unit to be appended
2122	* @return a reference to this
2123	* @stable ICU 2.0
2124	*/
2125	inline UnicodeString& operator+= (char16_t ch);
2126
2127	/**
2128	* Append operator. Append the code point `ch` to the UnicodeString
2129	* object.
2130	* @param ch the code point to be appended
2131	* @return a reference to this
2132	* @stable ICU 2.0
2133	*/
2134	inline UnicodeString& operator+= (UChar32 ch);
2135
2136	/**
2137	* Append operator. Append the characters in `srcText` to the
2138	* UnicodeString object. `srcText` is not modified.
2139	* @param srcText the source for the new characters
2140	* @return a reference to this
2141	* @stable ICU 2.0
2142	*/
2143	inline UnicodeString& operator+= (const UnicodeString& srcText);
2144
2145	/**
2146	* Append the characters
2147	* in `srcText` in the range
2148	* [`srcStart`, `srcStart + srcLength`) to the
2149	* UnicodeString object at offset `start`. `srcText`
2150	* is not modified.
2151	* @param srcText the source for the new characters
2152	* @param srcStart the offset into `srcText` where new characters
2153	* will be obtained
2154	* @param srcLength the number of characters in `srcText` in
2155	* the append string
2156	* @return a reference to this
2157	* @stable ICU 2.0
2158	*/
2159	inline UnicodeString& append(const UnicodeString& srcText,
2160	int32_t srcStart,
2161	int32_t srcLength);
2162
2163	/**
2164	* Append the characters in `srcText` to the UnicodeString object.
2165	* `srcText` is not modified.
2166	* @param srcText the source for the new characters
2167	* @return a reference to this
2168	* @stable ICU 2.0
2169	*/
2170	inline UnicodeString& append(const UnicodeString& srcText);
2171
2172	/**
2173	* Append the characters in `srcChars` in the range
2174	* [`srcStart`, `srcStart + srcLength`) to the UnicodeString
2175	* object at offset
2176	* `start`. `srcChars` is not modified.
2177	* @param srcChars the source for the new characters
2178	* @param srcStart the offset into `srcChars` where new characters
2179	* will be obtained
2180	* @param srcLength the number of characters in `srcChars` in
2181	* the append string; can be -1 if `srcChars` is NUL-terminated
2182	* @return a reference to this
2183	* @stable ICU 2.0
2184	*/
2185	inline UnicodeString& append(const char16_t *srcChars,
2186	int32_t srcStart,
2187	int32_t srcLength);
2188
2189	/**
2190	* Append the characters in `srcChars` to the UnicodeString object
2191	* at offset `start`. `srcChars` is not modified.
2192	* @param srcChars the source for the new characters
2193	* @param srcLength the number of Unicode characters in `srcChars`;
2194	* can be -1 if `srcChars` is NUL-terminated
2195	* @return a reference to this
2196	* @stable ICU 2.0
2197	*/
2198	inline UnicodeString& append(ConstChar16Ptr srcChars,
2199	int32_t srcLength);
2200
2201	/**
2202	* Append the code unit `srcChar` to the UnicodeString object.
2203	* @param srcChar the code unit to append
2204	* @return a reference to this
2205	* @stable ICU 2.0
2206	*/
2207	inline UnicodeString& append(char16_t srcChar);
2208
2209	/**
2210	* Append the code point `srcChar` to the UnicodeString object.
2211	* @param srcChar the code point to append
2212	* @return a reference to this
2213	* @stable ICU 2.0
2214	*/
2215	UnicodeString& append(UChar32 srcChar);
2216
2217
2218	/ Insert operations /
2219
2220	/**
2221	* Insert the characters in `srcText` in the range
2222	* [`srcStart`, `srcStart + srcLength`) into the UnicodeString
2223	* object at offset `start`. `srcText` is not modified.
2224	* @param start the offset where the insertion begins
2225	* @param srcText the source for the new characters
2226	* @param srcStart the offset into `srcText` where new characters
2227	* will be obtained
2228	* @param srcLength the number of characters in `srcText` in
2229	* the insert string
2230	* @return a reference to this
2231	* @stable ICU 2.0
2232	*/
2233	inline UnicodeString& insert(int32_t start,
2234	const UnicodeString& srcText,
2235	int32_t srcStart,
2236	int32_t srcLength);
2237
2238	/**
2239	* Insert the characters in `srcText` into the UnicodeString object
2240	* at offset `start`. `srcText` is not modified.
2241	* @param start the offset where the insertion begins
2242	* @param srcText the source for the new characters
2243	* @return a reference to this
2244	* @stable ICU 2.0
2245	*/
2246	inline UnicodeString& insert(int32_t start,
2247	const UnicodeString& srcText);
2248
2249	/**
2250	* Insert the characters in `srcChars` in the range
2251	* [`srcStart`, `srcStart + srcLength`) into the UnicodeString
2252	* object at offset `start`. `srcChars` is not modified.
2253	* @param start the offset at which the insertion begins
2254	* @param srcChars the source for the new characters
2255	* @param srcStart the offset into `srcChars` where new characters
2256	* will be obtained
2257	* @param srcLength the number of characters in `srcChars`
2258	* in the insert string
2259	* @return a reference to this
2260	* @stable ICU 2.0
2261	*/
2262	inline UnicodeString& insert(int32_t start,
2263	const char16_t *srcChars,
2264	int32_t srcStart,
2265	int32_t srcLength);
2266
2267	/**
2268	* Insert the characters in `srcChars` into the UnicodeString object
2269	* at offset `start`. `srcChars` is not modified.
2270	* @param start the offset where the insertion begins
2271	* @param srcChars the source for the new characters
2272	* @param srcLength the number of Unicode characters in srcChars.
2273	* @return a reference to this
2274	* @stable ICU 2.0
2275	*/
2276	inline UnicodeString& insert(int32_t start,
2277	ConstChar16Ptr srcChars,
2278	int32_t srcLength);
2279
2280	/**
2281	* Insert the code unit `srcChar` into the UnicodeString object at
2282	* offset `start`.
2283	* @param start the offset at which the insertion occurs
2284	* @param srcChar the code unit to insert
2285	* @return a reference to this
2286	* @stable ICU 2.0
2287	*/
2288	inline UnicodeString& insert(int32_t start,
2289	char16_t srcChar);
2290
2291	/**
2292	* Insert the code point `srcChar` into the UnicodeString object at
2293	* offset `start`.
2294	* @param start the offset at which the insertion occurs
2295	* @param srcChar the code point to insert
2296	* @return a reference to this
2297	* @stable ICU 2.0
2298	*/
2299	inline UnicodeString& insert(int32_t start,
2300	UChar32 srcChar);
2301
2302
2303	/ Replace operations /
2304
2305	/**
2306	* Replace the characters in the range
2307	* [`start`, `start + length`) with the characters in
2308	* `srcText` in the range
2309	* [`srcStart`, `srcStart + srcLength`).
2310	* `srcText` is not modified.
2311	* @param start the offset at which the replace operation begins
2312	* @param length the number of characters to replace. The character at
2313	* `start + length` is not modified.
2314	* @param srcText the source for the new characters
2315	* @param srcStart the offset into `srcText` where new characters
2316	* will be obtained
2317	* @param srcLength the number of characters in `srcText` in
2318	* the replace string
2319	* @return a reference to this
2320	* @stable ICU 2.0
2321	*/
2322	inline UnicodeString& replace(int32_t start,
2323	int32_t length,
2324	const UnicodeString& srcText,
2325	int32_t srcStart,
2326	int32_t srcLength);
2327
2328	/**
2329	* Replace the characters in the range
2330	* [`start`, `start + length`)
2331	* with the characters in `srcText`. `srcText` is
2332	* not modified.
2333	* @param start the offset at which the replace operation begins
2334	* @param length the number of characters to replace. The character at
2335	* `start + length` is not modified.
2336	* @param srcText the source for the new characters
2337	* @return a reference to this
2338	* @stable ICU 2.0
2339	*/
2340	inline UnicodeString& replace(int32_t start,
2341	int32_t length,
2342	const UnicodeString& srcText);
2343
2344	/**
2345	* Replace the characters in the range
2346	* [`start`, `start + length`) with the characters in
2347	* `srcChars` in the range
2348	* [`srcStart`, `srcStart + srcLength`). `srcChars`
2349	* is not modified.
2350	* @param start the offset at which the replace operation begins
2351	* @param length the number of characters to replace. The character at
2352	* `start + length` is not modified.
2353	* @param srcChars the source for the new characters
2354	* @param srcStart the offset into `srcChars` where new characters
2355	* will be obtained
2356	* @param srcLength the number of characters in `srcChars`
2357	* in the replace string
2358	* @return a reference to this
2359	* @stable ICU 2.0
2360	*/
2361	inline UnicodeString& replace(int32_t start,
2362	int32_t length,
2363	const char16_t *srcChars,
2364	int32_t srcStart,
2365	int32_t srcLength);
2366
2367	/**
2368	* Replace the characters in the range
2369	* [`start`, `start + length`) with the characters in
2370	* `srcChars`. `srcChars` is not modified.
2371	* @param start the offset at which the replace operation begins
2372	* @param length number of characters to replace. The character at
2373	* `start + length` is not modified.
2374	* @param srcChars the source for the new characters
2375	* @param srcLength the number of Unicode characters in srcChars
2376	* @return a reference to this
2377	* @stable ICU 2.0
2378	*/
2379	inline UnicodeString& replace(int32_t start,
2380	int32_t length,
2381	ConstChar16Ptr srcChars,
2382	int32_t srcLength);
2383
2384	/**
2385	* Replace the characters in the range
2386	* [`start`, `start + length`) with the code unit
2387	* `srcChar`.
2388	* @param start the offset at which the replace operation begins
2389	* @param length the number of characters to replace. The character at
2390	* `start + length` is not modified.
2391	* @param srcChar the new code unit
2392	* @return a reference to this
2393	* @stable ICU 2.0
2394	*/
2395	inline UnicodeString& replace(int32_t start,
2396	int32_t length,
2397	char16_t srcChar);
2398
2399	/**
2400	* Replace the characters in the range
2401	* [`start`, `start + length`) with the code point
2402	* `srcChar`.
2403	* @param start the offset at which the replace operation begins
2404	* @param length the number of characters to replace. The character at
2405	* `start + length` is not modified.
2406	* @param srcChar the new code point
2407	* @return a reference to this
2408	* @stable ICU 2.0
2409	*/
2410	UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2411
2412	/**
2413	* Replace the characters in the range [`start`, `limit`)
2414	* with the characters in `srcText`. `srcText` is not modified.
2415	* @param start the offset at which the replace operation begins
2416	* @param limit the offset immediately following the replace range
2417	* @param srcText the source for the new characters
2418	* @return a reference to this
2419	* @stable ICU 2.0
2420	*/
2421	inline UnicodeString& replaceBetween(int32_t start,
2422	int32_t limit,
2423	const UnicodeString& srcText);
2424
2425	/**
2426	* Replace the characters in the range [`start`, `limit`)
2427	* with the characters in `srcText` in the range
2428	* [`srcStart`, `srcLimit`). `srcText` is not modified.
2429	* @param start the offset at which the replace operation begins
2430	* @param limit the offset immediately following the replace range
2431	* @param srcText the source for the new characters
2432	* @param srcStart the offset into `srcChars` where new characters
2433	* will be obtained
2434	* @param srcLimit the offset immediately following the range to copy
2435	* in `srcText`
2436	* @return a reference to this
2437	* @stable ICU 2.0
2438	*/
2439	inline UnicodeString& replaceBetween(int32_t start,
2440	int32_t limit,
2441	const UnicodeString& srcText,
2442	int32_t srcStart,
2443	int32_t srcLimit);
2444
2445	/**
2446	* Replace a substring of this object with the given text.
2447	* @param start the beginning index, inclusive; `0 <= start <= limit`.
2448	* @param limit the ending index, exclusive; `start <= limit <= length()`.
2449	* @param text the text to replace characters `start` to `limit - 1`
2450	* @stable ICU 2.0
2451	*/
2452	virtual void handleReplaceBetween(int32_t start,
2453	int32_t limit,
2454	const UnicodeString& text);
2455
2456	/**
2457	* Replaceable API
2458	* @return TRUE if it has MetaData
2459	* @stable ICU 2.4
2460	*/
2461	virtual UBool hasMetaData() const;
2462
2463	/**
2464	* Copy a substring of this object, retaining attribute (out-of-band)
2465	* information. This method is used to duplicate or reorder substrings.
2466	* The destination index must not overlap the source range.
2467	*
2468	* @param start the beginning index, inclusive; `0 <= start <= limit`.
2469	* @param limit the ending index, exclusive; `start <= limit <= length()`.
2470	* @param dest the destination index. The characters from
2471	* `start..limit-1` will be copied to `dest`.
2472	* Implementations of this method may assume that `dest <= start \|\|
2473	* dest >= limit`.
2474	* @stable ICU 2.0
2475	*/
2476	virtual void copy(int32_t start, int32_t limit, int32_t dest);
2477
2478	/ Search and replace operations /
2479
2480	/**
2481	* Replace all occurrences of characters in oldText with the characters
2482	* in newText
2483	* @param oldText the text containing the search text
2484	* @param newText the text containing the replacement text
2485	* @return a reference to this
2486	* @stable ICU 2.0
2487	*/
2488	inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2489	const UnicodeString& newText);
2490
2491	/**
2492	* Replace all occurrences of characters in oldText with characters
2493	* in newText
2494	* in the range [`start`, `start + length`).
2495	* @param start the start of the range in which replace will performed
2496	* @param length the length of the range in which replace will be performed
2497	* @param oldText the text containing the search text
2498	* @param newText the text containing the replacement text
2499	* @return a reference to this
2500	* @stable ICU 2.0
2501	*/
2502	inline UnicodeString& findAndReplace(int32_t start,
2503	int32_t length,
2504	const UnicodeString& oldText,
2505	const UnicodeString& newText);
2506
2507	/**
2508	* Replace all occurrences of characters in oldText in the range
2509	* [`oldStart`, `oldStart + oldLength`) with the characters
2510	* in newText in the range
2511	* [`newStart`, `newStart + newLength`)
2512	* in the range [`start`, `start + length`).
2513	* @param start the start of the range in which replace will performed
2514	* @param length the length of the range in which replace will be performed
2515	* @param oldText the text containing the search text
2516	* @param oldStart the start of the search range in `oldText`
2517	* @param oldLength the length of the search range in `oldText`
2518	* @param newText the text containing the replacement text
2519	* @param newStart the start of the replacement range in `newText`
2520	* @param newLength the length of the replacement range in `newText`
2521	* @return a reference to this
2522	* @stable ICU 2.0
2523	*/
2524	UnicodeString& findAndReplace(int32_t start,
2525	int32_t length,
2526	const UnicodeString& oldText,
2527	int32_t oldStart,
2528	int32_t oldLength,
2529	const UnicodeString& newText,
2530	int32_t newStart,
2531	int32_t newLength);
2532
2533
2534	/ Remove operations /
2535
2536	/**
2537	* Remove all characters from the UnicodeString object.
2538	* @return a reference to this
2539	* @stable ICU 2.0
2540	*/
2541	inline UnicodeString& remove(void);
2542
2543	/**
2544	* Remove the characters in the range
2545	* [`start`, `start + length`) from the UnicodeString object.
2546	* @param start the offset of the first character to remove
2547	* @param length the number of characters to remove
2548	* @return a reference to this
2549	* @stable ICU 2.0
2550	*/
2551	inline UnicodeString& remove(int32_t start,
2552	int32_t length = (int32_t)INT32_MAX);
2553
2554	/**
2555	* Remove the characters in the range
2556	* [`start`, `limit`) from the UnicodeString object.
2557	* @param start the offset of the first character to remove
2558	* @param limit the offset immediately following the range to remove
2559	* @return a reference to this
2560	* @stable ICU 2.0
2561	*/
2562	inline UnicodeString& removeBetween(int32_t start,
2563	int32_t limit = (int32_t)INT32_MAX);
2564
2565	/**
2566	* Retain only the characters in the range
2567	* [`start`, `limit`) from the UnicodeString object.
2568	* Removes characters before `start` and at and after `limit`.
2569	* @param start the offset of the first character to retain
2570	* @param limit the offset immediately following the range to retain
2571	* @return a reference to this
2572	* @stable ICU 4.4
2573	*/
2574	inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2575
2576	/ Length operations /
2577
2578	/**
2579	* Pad the start of this UnicodeString with the character `padChar`.
2580	* If the length of this UnicodeString is less than targetLength,
2581	* length() - targetLength copies of padChar will be added to the
2582	* beginning of this UnicodeString.
2583	* @param targetLength the desired length of the string
2584	* @param padChar the character to use for padding. Defaults to
2585	* space (U+0020)
2586	* @return TRUE if the text was padded, FALSE otherwise.
2587	* @stable ICU 2.0
2588	*/
2589	UBool padLeading(int32_t targetLength,
2590	char16_t padChar = `0x0020`);
2591
2592	/**
2593	* Pad the end of this UnicodeString with the character `padChar`.
2594	* If the length of this UnicodeString is less than targetLength,
2595	* length() - targetLength copies of padChar will be added to the
2596	* end of this UnicodeString.
2597	* @param targetLength the desired length of the string
2598	* @param padChar the character to use for padding. Defaults to
2599	* space (U+0020)
2600	* @return TRUE if the text was padded, FALSE otherwise.
2601	* @stable ICU 2.0
2602	*/
2603	UBool padTrailing(int32_t targetLength,
2604	char16_t padChar = `0x0020`);
2605
2606	/**
2607	* Truncate this UnicodeString to the `targetLength`.
2608	* @param targetLength the desired length of this UnicodeString.
2609	* @return TRUE if the text was truncated, FALSE otherwise
2610	* @stable ICU 2.0
2611	*/
2612	inline UBool truncate(int32_t targetLength);
2613
2614	/**
2615	* Trims leading and trailing whitespace from this UnicodeString.
2616	* @return a reference to this
2617	* @stable ICU 2.0
2618	*/
2619	UnicodeString& trim(void);
2620
2621
2622	/ Miscellaneous operations /
2623
2624	/**
2625	* Reverse this UnicodeString in place.
2626	* @return a reference to this
2627	* @stable ICU 2.0
2628	*/
2629	inline UnicodeString& reverse(void);
2630
2631	/**
2632	* Reverse the range [`start`, `start + length`) in
2633	* this UnicodeString.
2634	* @param start the start of the range to reverse
2635	* @param length the number of characters to to reverse
2636	* @return a reference to this
2637	* @stable ICU 2.0
2638	*/
2639	inline UnicodeString& reverse(int32_t start,
2640	int32_t length);
2641
2642	/**
2643	* Convert the characters in this to UPPER CASE following the conventions of
2644	* the default locale.
2645	* @return A reference to this.
2646	* @stable ICU 2.0
2647	*/
2648	UnicodeString& toUpper(void);
2649
2650	/**
2651	* Convert the characters in this to UPPER CASE following the conventions of
2652	* a specific locale.
2653	* @param locale The locale containing the conventions to use.
2654	* @return A reference to this.
2655	* @stable ICU 2.0
2656	*/
2657	UnicodeString& toUpper(const Locale& locale);
2658
2659	/**
2660	* Convert the characters in this to lower case following the conventions of
2661	* the default locale.
2662	* @return A reference to this.
2663	* @stable ICU 2.0
2664	*/
2665	UnicodeString& toLower(void);
2666
2667	/**
2668	* Convert the characters in this to lower case following the conventions of
2669	* a specific locale.
2670	* @param locale The locale containing the conventions to use.
2671	* @return A reference to this.
2672	* @stable ICU 2.0
2673	*/
2674	UnicodeString& toLower(const Locale& locale);
2675
2676	#if !UCONFIG_NO_BREAK_ITERATION
2677
2678	/**
2679	* Titlecase this string, convenience function using the default locale.
2680	*
2681	* Casing is locale-dependent and context-sensitive.
2682	* Titlecasing uses a break iterator to find the first characters of words
2683	* that are to be titlecased. It titlecases those characters and lowercases
2684	* all others.
2685	*
2686	* The titlecase break iterator can be provided to customize for arbitrary
2687	* styles, using rules and dictionaries beyond the standard iterators.
2688	* It may be more efficient to always provide an iterator to avoid
2689	* opening and closing one for each string.
2690	* The standard titlecase iterator for the root locale implements the
2691	* algorithm of Unicode TR 21.
2692	*
2693	* This function uses only the setText(), first() and next() methods of the
2694	* provided break iterator.
2695	*
2696	* @param titleIter A break iterator to find the first characters of words
2697	* that are to be titlecased.
2698	* If none is provided (0), then a standard titlecase
2699	* break iterator is opened.
2700	* Otherwise the provided iterator is set to the string's text.
2701	* @return A reference to this.
2702	* @stable ICU 2.1
2703	*/
2704	UnicodeString &toTitle(BreakIterator *titleIter);
2705
2706	/**
2707	* Titlecase this string.
2708	*
2709	* Casing is locale-dependent and context-sensitive.
2710	* Titlecasing uses a break iterator to find the first characters of words
2711	* that are to be titlecased. It titlecases those characters and lowercases
2712	* all others.
2713	*
2714	* The titlecase break iterator can be provided to customize for arbitrary
2715	* styles, using rules and dictionaries beyond the standard iterators.
2716	* It may be more efficient to always provide an iterator to avoid
2717	* opening and closing one for each string.
2718	* The standard titlecase iterator for the root locale implements the
2719	* algorithm of Unicode TR 21.
2720	*
2721	* This function uses only the setText(), first() and next() methods of the
2722	* provided break iterator.
2723	*
2724	* @param titleIter A break iterator to find the first characters of words
2725	* that are to be titlecased.
2726	* If none is provided (0), then a standard titlecase
2727	* break iterator is opened.
2728	* Otherwise the provided iterator is set to the string's text.
2729	* @param locale The locale to consider.
2730	* @return A reference to this.
2731	* @stable ICU 2.1
2732	*/
2733	UnicodeString &toTitle(BreakIterator titleIter, const* Locale &locale);
2734
2735	/**
2736	* Titlecase this string, with options.
2737	*
2738	* Casing is locale-dependent and context-sensitive.
2739	* Titlecasing uses a break iterator to find the first characters of words
2740	* that are to be titlecased. It titlecases those characters and lowercases
2741	* all others. (This can be modified with options.)
2742	*
2743	* The titlecase break iterator can be provided to customize for arbitrary
2744	* styles, using rules and dictionaries beyond the standard iterators.
2745	* It may be more efficient to always provide an iterator to avoid
2746	* opening and closing one for each string.
2747	* The standard titlecase iterator for the root locale implements the
2748	* algorithm of Unicode TR 21.
2749	*
2750	* This function uses only the setText(), first() and next() methods of the
2751	* provided break iterator.
2752	*
2753	* @param titleIter A break iterator to find the first characters of words
2754	* that are to be titlecased.
2755	* If none is provided (0), then a standard titlecase
2756	* break iterator is opened.
2757	* Otherwise the provided iterator is set to the string's text.
2758	* @param locale The locale to consider.
2759	* @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
2760	* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
2761	* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
2762	* @param options Options bit set, see ucasemap_open().
2763	* @return A reference to this.
2764	* @stable ICU 3.8
2765	*/
2766	UnicodeString &toTitle(BreakIterator titleIter, const* Locale &locale, uint32_t options);
2767
2768	#endif
2769
2770	/**
2771	* Case-folds the characters in this string.
2772	*
2773	* Case-folding is locale-independent and not context-sensitive,
2774	* but there is an option for whether to include or exclude mappings for dotted I
2775	* and dotless i that are marked with 'T' in CaseFolding.txt.
2776	*
2777	* The result may be longer or shorter than the original.
2778	*
2779	* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2780	* @return A reference to this.
2781	* @stable ICU 2.0
2782	*/
2783	UnicodeString &foldCase(uint32_t options=`0` /U_FOLD_CASE_DEFAULT/);
2784
2785	//========================================
2786	// Access to the internal buffer
2787	//========================================
2788
2789	/**
2790	* Get a read/write pointer to the internal buffer.
2791	* The buffer is guaranteed to be large enough for at least minCapacity char16_ts,
2792	* writable, and is still owned by the UnicodeString object.
2793	* Calls to getBuffer(minCapacity) must not be nested, and
2794	* must be matched with calls to releaseBuffer(newLength).
2795	* If the string buffer was read-only or shared,
2796	* then it will be reallocated and copied.
2797	*
2798	* An attempted nested call will return 0, and will not further modify the
2799	* state of the UnicodeString object.
2800	* It also returns 0 if the string is bogus.
2801	*
2802	* The actual capacity of the string buffer may be larger than minCapacity.
2803	* getCapacity() returns the actual capacity.
2804	* For many operations, the full capacity should be used to avoid reallocations.
2805	*
2806	* While the buffer is "open" between getBuffer(minCapacity)
2807	* and releaseBuffer(newLength), the following applies:
2808	* - The string length is set to 0.
2809	* - Any read API call on the UnicodeString object will behave like on a 0-length string.
2810	* - Any write API call on the UnicodeString object is disallowed and will have no effect.
2811	* - You can read from and write to the returned buffer.
2812	* - The previous string contents will still be in the buffer;
2813	* if you want to use it, then you need to call length() before getBuffer(minCapacity).
2814	* If the length() was greater than minCapacity, then any contents after minCapacity
2815	* may be lost.
2816	* The buffer contents is not NUL-terminated by getBuffer().
2817	* If length() < getCapacity() then you can terminate it by writing a NUL
2818	* at index length().
2819	* - You must call releaseBuffer(newLength) before and in order to
2820	* return to normal UnicodeString operation.
2821	*
2822	* @param minCapacity the minimum number of char16_ts that are to be available
2823	* in the buffer, starting at the returned pointer;
2824	* default to the current string capacity if minCapacity==-1
2825	* @return a writable pointer to the internal string buffer,
2826	* or nullptr if an error occurs (nested calls, out of memory)
2827	*
2828	* @see releaseBuffer
2829	* @see getTerminatedBuffer()
2830	* @stable ICU 2.0
2831	*/
2832	char16_t *getBuffer(int32_t minCapacity);
2833
2834	/**
2835	* Release a read/write buffer on a UnicodeString object with an
2836	* "open" getBuffer(minCapacity).
2837	* This function must be called in a matched pair with getBuffer(minCapacity).
2838	* releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2839	*
2840	* It will set the string length to newLength, at most to the current capacity.
2841	* If newLength==-1 then it will set the length according to the
2842	* first NUL in the buffer, or to the capacity if there is no NUL.
2843	*
2844	* After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2845	*
2846	* @param newLength the new length of the UnicodeString object;
2847	* defaults to the current capacity if newLength is greater than that;
2848	* if newLength==-1, it defaults to u_strlen(buffer) but not more than
2849	* the current capacity of the string
2850	*
2851	* @see getBuffer(int32_t minCapacity)
2852	* @stable ICU 2.0
2853	*/
2854	void releaseBuffer(int32_t newLength=-`1`);
2855
2856	/**
2857	* Get a read-only pointer to the internal buffer.
2858	* This can be called at any time on a valid UnicodeString.
2859	*
2860	* It returns 0 if the string is bogus, or
2861	* during an "open" getBuffer(minCapacity).
2862	*
2863	* It can be called as many times as desired.
2864	* The pointer that it returns will remain valid until the UnicodeString object is modified,
2865	* at which time the pointer is semantically invalidated and must not be used any more.
2866	*
2867	* The capacity of the buffer can be determined with getCapacity().
2868	* The part after length() may or may not be initialized and valid,
2869	* depending on the history of the UnicodeString object.
2870	*
2871	* The buffer contents is (probably) not NUL-terminated.
2872	* You can check if it is with
2873	* `(s.length() < s.getCapacity() && buffer[s.length()]==0)`.
2874	* (See getTerminatedBuffer().)
2875	*
2876	* The buffer may reside in read-only memory. Its contents must not
2877	* be modified.
2878	*
2879	* @return a read-only pointer to the internal string buffer,
2880	* or nullptr if the string is empty or bogus
2881	*
2882	* @see getBuffer(int32_t minCapacity)
2883	* @see getTerminatedBuffer()
2884	* @stable ICU 2.0
2885	*/
2886	inline const char16_t getBuffer() const*;
2887
2888	/**
2889	* Get a read-only pointer to the internal buffer,
2890	* making sure that it is NUL-terminated.
2891	* This can be called at any time on a valid UnicodeString.
2892	*
2893	* It returns 0 if the string is bogus, or
2894	* during an "open" getBuffer(minCapacity), or if the buffer cannot
2895	* be NUL-terminated (because memory allocation failed).
2896	*
2897	* It can be called as many times as desired.
2898	* The pointer that it returns will remain valid until the UnicodeString object is modified,
2899	* at which time the pointer is semantically invalidated and must not be used any more.
2900	*
2901	* The capacity of the buffer can be determined with getCapacity().
2902	* The part after length()+1 may or may not be initialized and valid,
2903	* depending on the history of the UnicodeString object.
2904	*
2905	* The buffer contents is guaranteed to be NUL-terminated.
2906	* getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2907	* is written.
2908	* For this reason, this function is not const, unlike getBuffer().
2909	* Note that a UnicodeString may also contain NUL characters as part of its contents.
2910	*
2911	* The buffer may reside in read-only memory. Its contents must not
2912	* be modified.
2913	*
2914	* @return a read-only pointer to the internal string buffer,
2915	* or 0 if the string is empty or bogus
2916	*
2917	* @see getBuffer(int32_t minCapacity)
2918	* @see getBuffer()
2919	* @stable ICU 2.2
2920	*/
2921	const char16_t *getTerminatedBuffer();
2922
2923	//========================================
2924	// Constructors
2925	//========================================
2926
2927	/* Construct an empty UnicodeString.*
2928	* @stable ICU 2.0
2929	*/
2930	inline UnicodeString();
2931
2932	/**
2933	* Construct a UnicodeString with capacity to hold `capacity` char16_ts
2934	* @param capacity the number of char16_ts this UnicodeString should hold
2935	* before a resize is necessary; if count is greater than 0 and count
2936	* code points c take up more space than capacity, then capacity is adjusted
2937	* accordingly.
2938	* @param c is used to initially fill the string
2939	* @param count specifies how many code points c are to be written in the
2940	* string
2941	* @stable ICU 2.0
2942	*/
2943	UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2944
2945	/**
2946	* Single char16_t (code unit) constructor.
2947	*
2948	* It is recommended to mark this constructor "explicit" by
2949	* `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
2950	* on the compiler command line or similar.
2951	* @param ch the character to place in the UnicodeString
2952	* @stable ICU 2.0
2953	*/
2954	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch);
2955
2956	/**
2957	* Single UChar32 (code point) constructor.
2958	*
2959	* It is recommended to mark this constructor "explicit" by
2960	* `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
2961	* on the compiler command line or similar.
2962	* @param ch the character to place in the UnicodeString
2963	* @stable ICU 2.0
2964	*/
2965	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
2966
2967	/**
2968	* char16_t* constructor.
2969	*
2970	* It is recommended to mark this constructor "explicit" by
2971	* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
2972	* on the compiler command line or similar.
2973	* @param text The characters to place in the UnicodeString. `text`
2974	* must be NULL (U+0000) terminated.
2975	* @stable ICU 2.0
2976	*/
2977	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
2978
2979	#if !U_CHAR16_IS_TYPEDEF
2980	/**
2981	* uint16_t * constructor.
2982	* Delegates to UnicodeString(const char16_t *).
2983	*
2984	* It is recommended to mark this constructor "explicit" by
2985	* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
2986	* on the compiler command line or similar.
2987	* @param text NUL-terminated UTF-16 string
2988	* @stable ICU 59
2989	*/
2990	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
2991	UnicodeString (ConstChar16Ptr (text)) {}
2992	#endif
2993
2994	#if U_SIZEOF_WCHAR_T==2 \|\| defined(U_IN_DOXYGEN)
2995	/**
2996	* wchar_t * constructor.
2997	* (Only defined if U_SIZEOF_WCHAR_T==2.)
2998	* Delegates to UnicodeString(const char16_t *).
2999	*
3000	* It is recommended to mark this constructor "explicit" by
3001	* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3002	* on the compiler command line or similar.
3003	* @param text NUL-terminated UTF-16 string
3004	* @stable ICU 59
3005	*/
3006	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
3007	UnicodeString(ConstChar16Ptr(text)) {}
3008	#endif
3009
3010	/**
3011	* nullptr_t constructor.
3012	* Effectively the same as the default constructor, makes an empty string object.
3013	*
3014	* It is recommended to mark this constructor "explicit" by
3015	* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3016	* on the compiler command line or similar.
3017	* @param text nullptr
3018	* @stable ICU 59
3019	*/
3020	UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3021
3022	/**
3023	* char16_t* constructor.
3024	* @param text The characters to place in the UnicodeString.
3025	* @param textLength The number of Unicode characters in `text`
3026	* to copy.
3027	* @stable ICU 2.0
3028	*/
3029	UnicodeString(const char16_t *text,
3030	int32_t textLength);
3031
3032	#if !U_CHAR16_IS_TYPEDEF
3033	/**
3034	* uint16_t * constructor.
3035	* Delegates to UnicodeString(const char16_t *, int32_t).
3036	* @param text UTF-16 string
3037	* @param length string length
3038	* @stable ICU 59
3039	*/
3040	UnicodeString(const uint16_t *text, int32_t length) :
3041	UnicodeString (ConstChar16Ptr (text), length) {}
3042	#endif
3043
3044	#if U_SIZEOF_WCHAR_T==2 \|\| defined(U_IN_DOXYGEN)
3045	/**
3046	* wchar_t * constructor.
3047	* (Only defined if U_SIZEOF_WCHAR_T==2.)
3048	* Delegates to UnicodeString(const char16_t *, int32_t).
3049	* @param text NUL-terminated UTF-16 string
3050	* @param length string length
3051	* @stable ICU 59
3052	*/
3053	UnicodeString(const wchar_t *text, int32_t length) :
3054	UnicodeString(ConstChar16Ptr(text), length) {}
3055	#endif
3056
3057	/**
3058	* nullptr_t constructor.
3059	* Effectively the same as the default constructor, makes an empty string object.
3060	* @param text nullptr
3061	* @param length ignored
3062	* @stable ICU 59
3063	*/
3064	inline UnicodeString(const std::nullptr_t text, int32_t length);
3065
3066	/**
3067	* Readonly-aliasing char16_t* constructor.
3068	* The text will be used for the UnicodeString object, but
3069	* it will not be released when the UnicodeString is destroyed.
3070	* This has copy-on-write semantics:
3071	* When the string is modified, then the buffer is first copied into
3072	* newly allocated memory.
3073	* The aliased buffer is never modified.
3074	*
3075	* In an assignment to another UnicodeString, when using the copy constructor
3076	* or the assignment operator, the text will be copied.
3077	* When using fastCopyFrom(), the text will be aliased again,
3078	* so that both strings then alias the same readonly-text.
3079	*
3080	* @param isTerminated specifies if `text` is `NUL`-terminated.
3081	* This must be true if `textLength==-1`.
3082	* @param text The characters to alias for the UnicodeString.
3083	* @param textLength The number of Unicode characters in `text` to alias.
3084	* If -1, then this constructor will determine the length
3085	* by calling `u_strlen()`.
3086	* @stable ICU 2.0
3087	*/
3088	UnicodeString(UBool isTerminated,
3089	ConstChar16Ptr text,
3090	int32_t textLength);
3091
3092	/**
3093	* Writable-aliasing char16_t* constructor.
3094	* The text will be used for the UnicodeString object, but
3095	* it will not be released when the UnicodeString is destroyed.
3096	* This has write-through semantics:
3097	* For as long as the capacity of the buffer is sufficient, write operations
3098	* will directly affect the buffer. When more capacity is necessary, then
3099	* a new buffer will be allocated and the contents copied as with regularly
3100	* constructed strings.
3101	* In an assignment to another UnicodeString, the buffer will be copied.
3102	* The extract(Char16Ptr dst) function detects whether the dst pointer is the same
3103	* as the string buffer itself and will in this case not copy the contents.
3104	*
3105	* @param buffer The characters to alias for the UnicodeString.
3106	* @param buffLength The number of Unicode characters in `buffer` to alias.
3107	* @param buffCapacity The size of `buffer` in char16_ts.
3108	* @stable ICU 2.0
3109	*/
3110	UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
3111
3112	#if !U_CHAR16_IS_TYPEDEF
3113	/**
3114	* Writable-aliasing uint16_t * constructor.
3115	* Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3116	* @param buffer writable buffer of/for UTF-16 text
3117	* @param buffLength length of the current buffer contents
3118	* @param buffCapacity buffer capacity
3119	* @stable ICU 59
3120	*/
3121	UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3122	UnicodeString (Char16Ptr (buffer), buffLength, buffCapacity) {}
3123	#endif
3124
3125	#if U_SIZEOF_WCHAR_T==2 \|\| defined(U_IN_DOXYGEN)
3126	/**
3127	* Writable-aliasing wchar_t * constructor.
3128	* (Only defined if U_SIZEOF_WCHAR_T==2.)
3129	* Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3130	* @param buffer writable buffer of/for UTF-16 text
3131	* @param buffLength length of the current buffer contents
3132	* @param buffCapacity buffer capacity
3133	* @stable ICU 59
3134	*/
3135	UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3136	UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3137	#endif
3138
3139	/**
3140	* Writable-aliasing nullptr_t constructor.
3141	* Effectively the same as the default constructor, makes an empty string object.
3142	* @param buffer nullptr
3143	* @param buffLength ignored
3144	* @param buffCapacity ignored
3145	* @stable ICU 59
3146	*/
3147	inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3148
3149	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
3150
3151	/**
3152	* char* constructor.
3153	* Uses the default converter (and thus depends on the ICU conversion code)
3154	* unless U_CHARSET_IS_UTF8 is set to 1.
3155	*
3156	* For ASCII (really "invariant character") strings it is more efficient to use
3157	* the constructor that takes a US_INV (for its enum EInvariant).
3158	* For ASCII (invariant-character) string literals, see UNICODE_STRING and
3159	* UNICODE_STRING_SIMPLE.
3160	*
3161	* It is recommended to mark this constructor "explicit" by
3162	* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3163	* on the compiler command line or similar.
3164	* @param codepageData an array of bytes, null-terminated,
3165	* in the platform's default codepage.
3166	* @stable ICU 2.0
3167	* @see UNICODE_STRING
3168	* @see UNICODE_STRING_SIMPLE
3169	*/
3170	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
3171
3172	/**
3173	* char* constructor.
3174	* Uses the default converter (and thus depends on the ICU conversion code)
3175	* unless U_CHARSET_IS_UTF8 is set to 1.
3176	* @param codepageData an array of bytes in the platform's default codepage.
3177	* @param dataLength The number of bytes in `codepageData`.
3178	* @stable ICU 2.0
3179	*/
3180	UnicodeString(const char *codepageData, int32_t dataLength);
3181
3182	#endif
3183
3184	#if !UCONFIG_NO_CONVERSION
3185
3186	/**
3187	* char* constructor.
3188	* @param codepageData an array of bytes, null-terminated
3189	* @param codepage the encoding of `codepageData`. The special
3190	* value 0 for `codepage` indicates that the text is in the
3191	* platform's default codepage.
3192	*
3193	* If `codepage` is an empty string (`""`),
3194	* then a simple conversion is performed on the codepage-invariant
3195	* subset ("invariant characters") of the platform encoding. See utypes.h.
3196	* Recommendation: For invariant-character strings use the constructor
3197	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3198	* because it avoids object code dependencies of UnicodeString on
3199	* the conversion code.
3200	*
3201	* @stable ICU 2.0
3202	*/
3203	UnicodeString(const char codepageData, const* char *codepage);
3204
3205	/**
3206	* char* constructor.
3207	* @param codepageData an array of bytes.
3208	* @param dataLength The number of bytes in `codepageData`.
3209	* @param codepage the encoding of `codepageData`. The special
3210	* value 0 for `codepage` indicates that the text is in the
3211	* platform's default codepage.
3212	* If `codepage` is an empty string (`""`),
3213	* then a simple conversion is performed on the codepage-invariant
3214	* subset ("invariant characters") of the platform encoding. See utypes.h.
3215	* Recommendation: For invariant-character strings use the constructor
3216	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3217	* because it avoids object code dependencies of UnicodeString on
3218	* the conversion code.
3219	*
3220	* @stable ICU 2.0
3221	*/
3222	UnicodeString(const char codepageData, int32_t dataLength, const* char *codepage);
3223
3224	/**
3225	* char * / UConverter constructor.
3226	* This constructor uses an existing UConverter object to
3227	* convert the codepage string to Unicode and construct a UnicodeString
3228	* from that.
3229	*
3230	* The converter is reset at first.
3231	* If the error code indicates a failure before this constructor is called,
3232	* or if an error occurs during conversion or construction,
3233	* then the string will be bogus.
3234	*
3235	* This function avoids the overhead of opening and closing a converter if
3236	* multiple strings are constructed.
3237	*
3238	* @param src input codepage string
3239	* @param srcLength length of the input string, can be -1 for NUL-terminated strings
3240	* @param cnv converter object (ucnv_resetToUnicode() will be called),
3241	* can be NULL for the default converter
3242	* @param errorCode normal ICU error code
3243	* @stable ICU 2.0
3244	*/
3245	UnicodeString(
3246	const char *src, int32_t srcLength,
3247	UConverter *cnv,
3248	UErrorCode &errorCode);
3249
3250	#endif
3251
3252	/**
3253	* Constructs a Unicode string from an invariant-character char * string.
3254	* About invariant characters see utypes.h.
3255	* This constructor has no runtime dependency on conversion code and is
3256	* therefore recommended over ones taking a charset name string
3257	* (where the empty string "" indicates invariant-character conversion).
3258	*
3259	* Use the macro US_INV as the third, signature-distinguishing parameter.
3260	*
3261	* For example:
3262	* \code
3263	* void fn(const char *s) {
3264	* UnicodeString ustr(s, -1, US_INV);
3265	* // use ustr ...
3266	* }
3267	* \endcode
3268	* @param src String using only invariant characters.
3269	* @param length Length of src, or -1 if NUL-terminated.
3270	* @param inv Signature-distinguishing paramater, use US_INV.
3271	*
3272	* @see US_INV
3273	* @stable ICU 3.2
3274	*/
3275	UnicodeString(const char src, int32_t length, enum* EInvariant inv);
3276
3277
3278	/**
3279	* Copy constructor.
3280	*
3281	* Starting with ICU 2.4, the assignment operator and the copy constructor
3282	* allocate a new buffer and copy the buffer contents even for readonly aliases.
3283	* By contrast, the fastCopyFrom() function implements the old,
3284	* more efficient but less safe behavior
3285	* of making this string also a readonly alias to the same buffer.
3286	*
3287	* If the source object has an "open" buffer from getBuffer(minCapacity),
3288	* then the copy is an empty string.
3289	*
3290	* @param that The UnicodeString object to copy.
3291	* @stable ICU 2.0
3292	* @see fastCopyFrom
3293	*/
3294	UnicodeString(const UnicodeString& that);
3295
3296	/**
3297	* Move constructor; might leave src in bogus state.
3298	* This string will have the same contents and state that the source string had.
3299	* @param src source string
3300	* @stable ICU 56
3301	*/
3302	UnicodeString(UnicodeString &&src) U_NOEXCEPT;
3303
3304	/**
3305	* 'Substring' constructor from tail of source string.
3306	* @param src The UnicodeString object to copy.
3307	* @param srcStart The offset into `src` at which to start copying.
3308	* @stable ICU 2.2
3309	*/
3310	UnicodeString(const UnicodeString& src, int32_t srcStart);
3311
3312	/**
3313	* 'Substring' constructor from subrange of source string.
3314	* @param src The UnicodeString object to copy.
3315	* @param srcStart The offset into `src` at which to start copying.
3316	* @param srcLength The number of characters from `src` to copy.
3317	* @stable ICU 2.2
3318	*/
3319	UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3320
3321	/**
3322	* Clone this object, an instance of a subclass of Replaceable.
3323	* Clones can be used concurrently in multiple threads.
3324	* If a subclass does not implement clone(), or if an error occurs,
3325	* then NULL is returned.
3326	* The clone functions in all subclasses return a pointer to a Replaceable
3327	* because some compilers do not support covariant (same-as-this)
3328	* return types; cast to the appropriate subclass if necessary.
3329	* The caller must delete the clone.
3330	*
3331	* @return a clone of this object
3332	*
3333	* @see Replaceable::clone
3334	* @see getDynamicClassID
3335	* @stable ICU 2.6
3336	*/
3337	virtual Replaceable clone() const*;
3338
3339	/* Destructor.*
3340	* @stable ICU 2.0
3341	*/
3342	virtual ~UnicodeString();
3343
3344	/**
3345	* Create a UnicodeString from a UTF-8 string.
3346	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3347	* Calls u_strFromUTF8WithSub().
3348	*
3349	* @param utf8 UTF-8 input string.
3350	* Note that a StringPiece can be implicitly constructed
3351	* from a std::string or a NUL-terminated const char * string.
3352	* @return A UnicodeString with equivalent UTF-16 contents.
3353	* @see toUTF8
3354	* @see toUTF8String
3355	* @stable ICU 4.2
3356	*/
3357	static UnicodeString fromUTF8(StringPiece utf8);
3358
3359	/**
3360	* Create a UnicodeString from a UTF-32 string.
3361	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3362	* Calls u_strFromUTF32WithSub().
3363	*
3364	* @param utf32 UTF-32 input string. Must not be NULL.
3365	* @param length Length of the input string, or -1 if NUL-terminated.
3366	* @return A UnicodeString with equivalent UTF-16 contents.
3367	* @see toUTF32
3368	* @stable ICU 4.2
3369	*/
3370	static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3371
3372	/ Miscellaneous operations /
3373
3374	/**
3375	* Unescape a string of characters and return a string containing
3376	* the result. The following escape sequences are recognized:
3377	*
3378	* \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
3379	* \\Uhhhhhhhh 8 hex digits
3380	* \\xhh 1-2 hex digits
3381	* \\ooo 1-3 octal digits; o in [0-7]
3382	* \\cX control-X; X is masked with 0x1F
3383	*
3384	* as well as the standard ANSI C escapes:
3385	*
3386	* \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3387	* \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3388	* \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3389	*
3390	* Anything else following a backslash is generically escaped. For
3391	* example, "[a\\-z]" returns "[a-z]".
3392	*
3393	* If an escape sequence is ill-formed, this method returns an empty
3394	* string. An example of an ill-formed sequence is "\\u" followed by
3395	* fewer than 4 hex digits.
3396	*
3397	* This function is similar to u_unescape() but not identical to it.
3398	* The latter takes a source char*, so it does escape recognition
3399	* and also invariant conversion.
3400	*
3401	* @return a string with backslash escapes interpreted, or an
3402	* empty string on error.
3403	* @see UnicodeString#unescapeAt()
3404	* @see u_unescape()
3405	* @see u_unescapeAt()
3406	* @stable ICU 2.0
3407	*/
3408	UnicodeString unescape() const;
3409
3410	/**
3411	* Unescape a single escape sequence and return the represented
3412	* character. See unescape() for a listing of the recognized escape
3413	* sequences. The character at offset-1 is assumed (without
3414	* checking) to be a backslash. If the escape sequence is
3415	* ill-formed, or the offset is out of range, U_SENTINEL=-1 is
3416	* returned.
3417	*
3418	* @param offset an input output parameter. On input, it is the
3419	* offset into this string where the escape sequence is located,
3420	* after the initial backslash. On output, it is advanced after the
3421	* last character parsed. On error, it is not advanced at all.
3422	* @return the character represented by the escape sequence at
3423	* offset, or U_SENTINEL=-1 on error.
3424	* @see UnicodeString#unescape()
3425	* @see u_unescape()
3426	* @see u_unescapeAt()
3427	* @stable ICU 2.0
3428	*/
3429	UChar32 unescapeAt(int32_t &offset) const;
3430
3431	/**
3432	* ICU "poor man's RTTI", returns a UClassID for this class.
3433	*
3434	* @stable ICU 2.2
3435	*/
3436	static UClassID U_EXPORT2 getStaticClassID();
3437
3438	/**
3439	* ICU "poor man's RTTI", returns a UClassID for the actual class.
3440	*
3441	* @stable ICU 2.2
3442	*/
3443	virtual UClassID getDynamicClassID() const;
3444
3445	//========================================
3446	// Implementation methods
3447	//========================================
3448
3449	protected:
3450	/**
3451	* Implement Replaceable::getLength() (see jitterbug 1027).
3452	* @stable ICU 2.4
3453	*/
3454	virtual int32_t getLength() const;
3455
3456	/**
3457	* The change in Replaceable to use virtual getCharAt() allows
3458	* UnicodeString::charAt() to be inline again (see jitterbug 709).
3459	* @stable ICU 2.4
3460	*/
3461	virtual char16_t getCharAt(int32_t offset) const;
3462
3463	/**
3464	* The change in Replaceable to use virtual getChar32At() allows
3465	* UnicodeString::char32At() to be inline again (see jitterbug 709).
3466	* @stable ICU 2.4
3467	*/
3468	virtual UChar32 getChar32At(int32_t offset) const;
3469
3470	private:
3471	// For char constructors. Could be made public.*
3472	UnicodeString &setToUTF8(StringPiece utf8);
3473	// For extract(char).*
3474	// We could make a toUTF8(target, capacity, errorCode) public but not
3475	// this version: New API will be cleaner if we make callers create substrings
3476	// rather than having start+length on every method,
3477	// and it should take a UErrorCode&.
3478	int32_t
3479	toUTF8(int32_t start, int32_t len,
3480	char target, int32_t capacity) const*;
3481
3482	/**
3483	* Internal string contents comparison, called by operator==.
3484	* Requires: this & text not bogus and have same lengths.
3485	*/
3486	UBool doEquals(const UnicodeString &text, int32_t len) const;
3487
3488	inline int8_t
3489	doCompare(int32_t start,
3490	int32_t length,
3491	const UnicodeString& srcText,
3492	int32_t srcStart,
3493	int32_t srcLength) const;
3494
3495	int8_t doCompare(int32_t start,
3496	int32_t length,
3497	const char16_t *srcChars,
3498	int32_t srcStart,
3499	int32_t srcLength) const;
3500
3501	inline int8_t
3502	doCompareCodePointOrder(int32_t start,
3503	int32_t length,
3504	const UnicodeString& srcText,
3505	int32_t srcStart,
3506	int32_t srcLength) const;
3507
3508	int8_t doCompareCodePointOrder(int32_t start,
3509	int32_t length,
3510	const char16_t *srcChars,
3511	int32_t srcStart,
3512	int32_t srcLength) const;
3513
3514	inline int8_t
3515	doCaseCompare(int32_t start,
3516	int32_t length,
3517	const UnicodeString &srcText,
3518	int32_t srcStart,
3519	int32_t srcLength,
3520	uint32_t options) const;
3521
3522	int8_t
3523	doCaseCompare(int32_t start,
3524	int32_t length,
3525	const char16_t *srcChars,
3526	int32_t srcStart,
3527	int32_t srcLength,
3528	uint32_t options) const;
3529
3530	int32_t doIndexOf(char16_t c,
3531	int32_t start,
3532	int32_t length) const;
3533
3534	int32_t doIndexOf(UChar32 c,
3535	int32_t start,
3536	int32_t length) const;
3537
3538	int32_t doLastIndexOf(char16_t c,
3539	int32_t start,
3540	int32_t length) const;
3541
3542	int32_t doLastIndexOf(UChar32 c,
3543	int32_t start,
3544	int32_t length) const;
3545
3546	void doExtract(int32_t start,
3547	int32_t length,
3548	char16_t *dst,
3549	int32_t dstStart) const;
3550
3551	inline void doExtract(int32_t start,
3552	int32_t length,
3553	UnicodeString& target) const;
3554
3555	inline char16_t doCharAt(int32_t offset) const;
3556
3557	UnicodeString& doReplace(int32_t start,
3558	int32_t length,
3559	const UnicodeString& srcText,
3560	int32_t srcStart,
3561	int32_t srcLength);
3562
3563	UnicodeString& doReplace(int32_t start,
3564	int32_t length,
3565	const char16_t *srcChars,
3566	int32_t srcStart,
3567	int32_t srcLength);
3568
3569	UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3570	UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3571
3572	UnicodeString& doReverse(int32_t start,
3573	int32_t length);
3574
3575	// calculate hash code
3576	int32_t doHashCode(void) const;
3577
3578	// get pointer to start of array
3579	// these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3580	inline char16_t* getArrayStart(void);
3581	inline const char16_t* getArrayStart(void) const;
3582
3583	inline UBool hasShortLength() const;
3584	inline int32_t getShortLength() const;
3585
3586	// A UnicodeString object (not necessarily its current buffer)
3587	// is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3588	inline UBool isWritable() const;
3589
3590	// Is the current buffer writable?
3591	inline UBool isBufferWritable() const;
3592
3593	// None of the following does releaseArray().
3594	inline void setZeroLength();
3595	inline void setShortLength(int32_t len);
3596	inline void setLength(int32_t len);
3597	inline void setToEmpty();
3598	inline void setArray(char16_t array, int32_t len, int32_t capacity); // sets length but not flags*
3599
3600	// allocate the array; result may be the stack buffer
3601	// sets refCount to 1 if appropriate
3602	// sets fArray, fCapacity, and flags
3603	// sets length to 0
3604	// returns boolean for success or failure
3605	UBool allocate(int32_t capacity);
3606
3607	// release the array if owned
3608	void releaseArray(void);
3609
3610	// turn a bogus string into an empty one
3611	void unBogus();
3612
3613	// implements assigment operator, copy constructor, and fastCopyFrom()
3614	UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3615
3616	// Copies just the fields without memory management.
3617	void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;
3618
3619	// Pin start and limit to acceptable values.
3620	inline void pinIndex(int32_t& start) const;
3621	inline void pinIndices(int32_t& start,
3622	int32_t& length) const;
3623
3624	#if !UCONFIG_NO_CONVERSION
3625
3626	/ Internal extract() using UConverter. /
3627	int32_t doExtract(int32_t start, int32_t length,
3628	char *dest, int32_t destCapacity,
3629	UConverter *cnv,
3630	UErrorCode &errorCode) const;
3631
3632	/*
3633	* Real constructor for converting from codepage data.
3634	* It assumes that it is called with !fRefCounted.
3635	*
3636	* If `codepage==0`, then the default converter
3637	* is used for the platform encoding.
3638	* If `codepage` is an empty string (`""`),
3639	* then a simple conversion is performed on the codepage-invariant
3640	* subset ("invariant characters") of the platform encoding. See utypes.h.
3641	*/
3642	void doCodepageCreate(const char *codepageData,
3643	int32_t dataLength,
3644	const char *codepage);
3645
3646	/*
3647	* Worker function for creating a UnicodeString from
3648	* a codepage string using a UConverter.
3649	*/
3650	void
3651	doCodepageCreate(const char *codepageData,
3652	int32_t dataLength,
3653	UConverter *converter,
3654	UErrorCode &status);
3655
3656	#endif
3657
3658	/*
3659	* This function is called when write access to the array
3660	* is necessary.
3661	*
3662	* We need to make a copy of the array if
3663	* the buffer is read-only, or
3664	* the buffer is refCounted (shared), and refCount>1, or
3665	* the buffer is too small.
3666	*
3667	* Return FALSE if memory could not be allocated.
3668	*/
3669	UBool cloneArrayIfNeeded(int32_t newCapacity = -`1`,
3670	int32_t growCapacity = -`1`,
3671	UBool doCopyArray = TRUE,
3672	int32_t **pBufferToDelete = `0`,
3673	UBool forceClone = FALSE);
3674
3675	/**
3676	* Common function for UnicodeString case mappings.
3677	* The stringCaseMapper has the same type UStringCaseMapper
3678	* as in ustr_imp.h for ustrcase_map().
3679	*/
3680	UnicodeString &
3681	caseMap(int32_t caseLocale, uint32_t options,
3682	#if !UCONFIG_NO_BREAK_ITERATION
3683	BreakIterator *iter,
3684	#endif
3685	UStringCaseMapper *stringCaseMapper);
3686
3687	// ref counting
3688	void addRef(void);
3689	int32_t removeRef(void);
3690	int32_t refCount(void) const;
3691
3692	// constants
3693	enum {
3694	/**
3695	* Size of stack buffer for short strings.
3696	* Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
3697	* @see UNISTR_OBJECT_SIZE
3698	*/
3699	US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-`2`)/U_SIZEOF_UCHAR,
3700	kInvalidUChar=`0xffff`, // U+FFFF returned by charAt(invalid index)
3701	kInvalidHashCode=`0`, // invalid hash code
3702	kEmptyHashCode=`1`, // hash code for empty string
3703
3704	// bit flag values for fLengthAndFlags
3705	kIsBogus=`1`, // this string is bogus, i.e., not valid or NULL
3706	kUsingStackBuffer=`2`,// using fUnion.fStackFields instead of fUnion.fFields
3707	kRefCounted=`4`, // there is a refCount field before the characters in fArray
3708	kBufferIsReadonly=`8`,// do not write to this buffer
3709	kOpenGetBuffer=`16`, // getBuffer(minCapacity) was called (is "open"),
3710	// and releaseBuffer(newLength) must be called
3711	kAllStorageFlags=`0x1f`,
3712
3713	kLengthShift=`5`, // remaining 11 bits for non-negative short length, or negative if long
3714	kLength1=`1`<<kLengthShift,
3715	kMaxShortLength=`0x3ff`, // max non-negative short length (leaves top bit 0)
3716	kLengthIsLarge=`0xffe0`, // short length < 0, real length is in fUnion.fFields.fLength
3717
3718	// combined values for convenience
3719	kShortString=kUsingStackBuffer,
3720	kLongString=kRefCounted,
3721	kReadonlyAlias=kBufferIsReadonly,
3722	kWritableAlias=`0`
3723	};
3724
3725	friend class UnicodeStringAppendable;
3726
3727	union StackBufferOrFields; // forward declaration necessary before friend declaration
3728	friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3729
3730	/*
3731	* The following are all the class fields that are stored
3732	* in each UnicodeString object.
3733	* Note that UnicodeString has virtual functions,
3734	* therefore there is an implicit vtable pointer
3735	* as the first real field.
3736	* The fields should be aligned such that no padding is necessary.
3737	* On 32-bit machines, the size should be 32 bytes,
3738	* on 64-bit machines (8-byte pointers), it should be 40 bytes.
3739	*
3740	* We use a hack to achieve this.
3741	*
3742	* With at least some compilers, each of the following is forced to
3743	* a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3744	* rounded up with additional padding if the fields do not already fit that requirement:
3745	* - sizeof(class UnicodeString)
3746	* - offsetof(UnicodeString, fUnion)
3747	* - sizeof(fUnion)
3748	* - sizeof(fStackFields)
3749	*
3750	* We optimize for the longest possible internal buffer for short strings.
3751	* fUnion.fStackFields begins with 2 bytes for storage flags
3752	* and the length of relatively short strings,
3753	* followed by the buffer for short string contents.
3754	* There is no padding inside fStackFields.
3755	*
3756	* Heap-allocated and aliased strings use fUnion.fFields.
3757	* Both fStackFields and fFields must begin with the same fields for flags and short length,
3758	* that is, those must have the same memory offsets inside the object,
3759	* because the flags must be inspected in order to decide which half of fUnion is being used.
3760	* We assume that the compiler does not reorder the fields.
3761	*
3762	* (Padding at the end of fFields is ok:
3763	* As long as it is no larger than fStackFields, it is not wasted space.)
3764	*
3765	* For some of the history of the UnicodeString class fields layout, see
3766	* - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
3767	* - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
3768	* - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
3769	*/
3770	// (implicit) vtable;*
3771	union StackBufferOrFields {
3772	// fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
3773	// Each struct of the union must begin with fLengthAndFlags.
3774	struct {
3775	int16_t fLengthAndFlags; // bit fields: see constants above
3776	char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
3777	} fStackFields;
3778	struct {
3779	int16_t fLengthAndFlags; // bit fields: see constants above
3780	int32_t fLength; // number of characters in fArray if >127; else undefined
3781	int32_t fCapacity; // capacity of fArray (in char16_ts)
3782	// array pointer last to minimize padding for machines with P128 data model
3783	// or pointer sizes that are not a power of 2
3784	char16_t fArray; // the Unicode data*
3785	} fFields;
3786	} fUnion;
3787	};
3788
3789	/**
3790	* Create a new UnicodeString with the concatenation of two others.
3791	*
3792	* @param s1 The first string to be copied to the new one.
3793	* @param s2 The second string to be copied to the new one, after s1.
3794	* @return UnicodeString(s1).append(s2)
3795	* @stable ICU 2.8
3796	*/
3797	U_COMMON_API UnicodeString U_EXPORT2
3798	operator+ (const UnicodeString &s1, const UnicodeString &s2);
3799
3800	//========================================
3801	// Inline members
3802	//========================================
3803
3804	//========================================
3805	// Privates
3806	//========================================
3807
3808	inline void
3809	UnicodeString::pinIndex(int32_t& start) const
3810	{
3811	// pin index
3812	if(start < `0`) {
3813	start = `0`;
3814	} else if(start > length()) {
3815	start = length();
3816	}
3817	}
3818
3819	inline void
3820	UnicodeString::pinIndices(int32_t& start,
3821	int32_t& _length) const
3822	{
3823	// pin indices
3824	int32_t len = length();
3825	if(start < `0`) {
3826	start = `0`;
3827	} else if(start > len) {
3828	start = len;
3829	}
3830	if(_length < `0`) {
3831	_length = `0`;
3832	} else if(_length > (len - start)) {
3833	_length = (len - start);
3834	}
3835	}
3836
3837	inline char16_t*
3838	UnicodeString::getArrayStart() {
3839	return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3840	fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3841	}
3842
3843	inline const char16_t*
3844	UnicodeString::getArrayStart() const {
3845	return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3846	fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3847	}
3848
3849	//========================================
3850	// Default constructor
3851	//========================================
3852
3853	inline
3854	UnicodeString::UnicodeString() {
3855	fUnion.fStackFields.fLengthAndFlags=kShortString;
3856	}
3857
3858	inline UnicodeString::UnicodeString(const std::nullptr_t /text/) {
3859	fUnion.fStackFields.fLengthAndFlags=kShortString;
3860	}
3861
3862	inline UnicodeString::UnicodeString(const std::nullptr_t /text/, int32_t /length/) {
3863	fUnion.fStackFields.fLengthAndFlags=kShortString;
3864	}
3865
3866	inline UnicodeString::UnicodeString(std::nullptr_t /buffer/, int32_t /buffLength/, int32_t /buffCapacity/) {
3867	fUnion.fStackFields.fLengthAndFlags=kShortString;
3868	}
3869
3870	//========================================
3871	// Read-only implementation methods
3872	//========================================
3873	inline UBool
3874	UnicodeString::hasShortLength() const {
3875	return fUnion.fFields.fLengthAndFlags>=`0`;
3876	}
3877
3878	inline int32_t
3879	UnicodeString::getShortLength() const {
3880	// fLengthAndFlags must be non-negative -> short length >= 0
3881	// and arithmetic or logical shift does not matter.
3882	return fUnion.fFields.fLengthAndFlags>>kLengthShift;
3883	}
3884
3885	inline int32_t
3886	UnicodeString::length() const {
3887	return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
3888	}
3889
3890	inline int32_t
3891	UnicodeString::getCapacity() const {
3892	return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3893	US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
3894	}
3895
3896	inline int32_t
3897	UnicodeString::hashCode() const
3898	{ return doHashCode(); }
3899
3900	inline UBool
3901	UnicodeString::isBogus() const
3902	{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
3903
3904	inline UBool
3905	UnicodeString::isWritable() const
3906	{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer\|kIsBogus)); }
3907
3908	inline UBool
3909	UnicodeString::isBufferWritable() const
3910	{
3911	return (UBool)(
3912	!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer\|kIsBogus\|kBufferIsReadonly)) &&
3913	(!(fUnion.fFields.fLengthAndFlags&kRefCounted) \|\| refCount()==`1`));
3914	}
3915
3916	inline const char16_t *
3917	UnicodeString::getBuffer() const {
3918	if(fUnion.fFields.fLengthAndFlags&(kIsBogus\|kOpenGetBuffer)) {
3919	return nullptr;
3920	} else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
3921	return fUnion.fStackFields.fBuffer;
3922	} else {
3923	return fUnion.fFields.fArray;
3924	}
3925	}
3926
3927	//========================================
3928	// Read-only alias methods
3929	//========================================
3930	inline int8_t
3931	UnicodeString::doCompare(int32_t start,
3932	int32_t thisLength,
3933	const UnicodeString& srcText,
3934	int32_t srcStart,
3935	int32_t srcLength) const
3936	{
3937	if(srcText.isBogus()) {
3938	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3939	} else {
3940	srcText.pinIndices(srcStart, srcLength);
3941	return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3942	}
3943	}
3944
3945	inline UBool
3946	UnicodeString::operator== (const UnicodeString& text) const
3947	{
3948	if(isBogus()) {
3949	return text.isBogus();
3950	} else {
3951	int32_t len = length(), textLength = text.length();
3952	return !text.isBogus() && len == textLength && doEquals(text, len);
3953	}
3954	}
3955
3956	inline UBool
3957	UnicodeString::operator!= (const UnicodeString& text) const
3958	{ return (! operator==(text)); }
3959
3960	inline UBool
3961	UnicodeString::operator> (const UnicodeString& text) const
3962	{ return doCompare(`0`, length(), text, `0`, text.length()) == `1`; }
3963
3964	inline UBool
3965	UnicodeString::operator< (const UnicodeString& text) const
3966	{ return doCompare(`0`, length(), text, `0`, text.length()) == -`1`; }
3967
3968	inline UBool
3969	UnicodeString::operator>= (const UnicodeString& text) const
3970	{ return doCompare(`0`, length(), text, `0`, text.length()) != -`1`; }
3971
3972	inline UBool
3973	UnicodeString::operator<= (const UnicodeString& text) const
3974	{ return doCompare(`0`, length(), text, `0`, text.length()) != `1`; }
3975
3976	inline int8_t
3977	UnicodeString::compare(const UnicodeString& text) const
3978	{ return doCompare(`0`, length(), text, `0`, text.length()); }
3979
3980	inline int8_t
3981	UnicodeString::compare(int32_t start,
3982	int32_t _length,
3983	const UnicodeString& srcText) const
3984	{ return doCompare(start, _length, srcText, `0`, srcText.length()); }
3985
3986	inline int8_t
3987	UnicodeString::compare(ConstChar16Ptr srcChars,
3988	int32_t srcLength) const
3989	{ return doCompare(`0`, length(), srcChars, `0`, srcLength); }
3990
3991	inline int8_t
3992	UnicodeString::compare(int32_t start,
3993	int32_t _length,
3994	const UnicodeString& srcText,
3995	int32_t srcStart,
3996	int32_t srcLength) const
3997	{ return doCompare(start, _length, srcText, srcStart, srcLength); }
3998
3999	inline int8_t
4000	UnicodeString::compare(int32_t start,
4001	int32_t _length,
4002	const char16_t srcChars) const*
4003	{ return doCompare(start, _length, srcChars, `0`, _length); }
4004
4005	inline int8_t
4006	UnicodeString::compare(int32_t start,
4007	int32_t _length,
4008	const char16_t *srcChars,
4009	int32_t srcStart,
4010	int32_t srcLength) const
4011	{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
4012
4013	inline int8_t
4014	UnicodeString::compareBetween(int32_t start,
4015	int32_t limit,
4016	const UnicodeString& srcText,
4017	int32_t srcStart,
4018	int32_t srcLimit) const
4019	{ return doCompare(start, limit - start,
4020	srcText, srcStart, srcLimit - srcStart); }
4021
4022	inline int8_t
4023	UnicodeString::doCompareCodePointOrder(int32_t start,
4024	int32_t thisLength,
4025	const UnicodeString& srcText,
4026	int32_t srcStart,
4027	int32_t srcLength) const
4028	{
4029	if(srcText.isBogus()) {
4030	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4031	} else {
4032	srcText.pinIndices(srcStart, srcLength);
4033	return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4034	}
4035	}
4036
4037	inline int8_t
4038	UnicodeString::compareCodePointOrder(const UnicodeString& text) const
4039	{ return doCompareCodePointOrder(`0`, length(), text, `0`, text.length()); }
4040
4041	inline int8_t
4042	UnicodeString::compareCodePointOrder(int32_t start,
4043	int32_t _length,
4044	const UnicodeString& srcText) const
4045	{ return doCompareCodePointOrder(start, _length, srcText, `0`, srcText.length()); }
4046
4047	inline int8_t
4048	UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
4049	int32_t srcLength) const
4050	{ return doCompareCodePointOrder(`0`, length(), srcChars, `0`, srcLength); }
4051
4052	inline int8_t
4053	UnicodeString::compareCodePointOrder(int32_t start,
4054	int32_t _length,
4055	const UnicodeString& srcText,
4056	int32_t srcStart,
4057	int32_t srcLength) const
4058	{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4059
4060	inline int8_t
4061	UnicodeString::compareCodePointOrder(int32_t start,
4062	int32_t _length,
4063	const char16_t srcChars) const*
4064	{ return doCompareCodePointOrder(start, _length, srcChars, `0`, _length); }
4065
4066	inline int8_t
4067	UnicodeString::compareCodePointOrder(int32_t start,
4068	int32_t _length,
4069	const char16_t *srcChars,
4070	int32_t srcStart,
4071	int32_t srcLength) const
4072	{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4073
4074	inline int8_t
4075	UnicodeString::compareCodePointOrderBetween(int32_t start,
4076	int32_t limit,
4077	const UnicodeString& srcText,
4078	int32_t srcStart,
4079	int32_t srcLimit) const
4080	{ return doCompareCodePointOrder(start, limit - start,
4081	srcText, srcStart, srcLimit - srcStart); }
4082
4083	inline int8_t
4084	UnicodeString::doCaseCompare(int32_t start,
4085	int32_t thisLength,
4086	const UnicodeString &srcText,
4087	int32_t srcStart,
4088	int32_t srcLength,
4089	uint32_t options) const
4090	{
4091	if(srcText.isBogus()) {
4092	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4093	} else {
4094	srcText.pinIndices(srcStart, srcLength);
4095	return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4096	}
4097	}
4098
4099	inline int8_t
4100	UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4101	return doCaseCompare(`0`, length(), text, `0`, text.length(), options);
4102	}
4103
4104	inline int8_t
4105	UnicodeString::caseCompare(int32_t start,
4106	int32_t _length,
4107	const UnicodeString &srcText,
4108	uint32_t options) const {
4109	return doCaseCompare(start, _length, srcText, `0`, srcText.length(), options);
4110	}
4111
4112	inline int8_t
4113	UnicodeString::caseCompare(ConstChar16Ptr srcChars,
4114	int32_t srcLength,
4115	uint32_t options) const {
4116	return doCaseCompare(`0`, length(), srcChars, `0`, srcLength, options);
4117	}
4118
4119	inline int8_t
4120	UnicodeString::caseCompare(int32_t start,
4121	int32_t _length,
4122	const UnicodeString &srcText,
4123	int32_t srcStart,
4124	int32_t srcLength,
4125	uint32_t options) const {
4126	return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4127	}
4128
4129	inline int8_t
4130	UnicodeString::caseCompare(int32_t start,
4131	int32_t _length,
4132	const char16_t *srcChars,
4133	uint32_t options) const {
4134	return doCaseCompare(start, _length, srcChars, `0`, _length, options);
4135	}
4136
4137	inline int8_t
4138	UnicodeString::caseCompare(int32_t start,
4139	int32_t _length,
4140	const char16_t *srcChars,
4141	int32_t srcStart,
4142	int32_t srcLength,
4143	uint32_t options) const {
4144	return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4145	}
4146
4147	inline int8_t
4148	UnicodeString::caseCompareBetween(int32_t start,
4149	int32_t limit,
4150	const UnicodeString &srcText,
4151	int32_t srcStart,
4152	int32_t srcLimit,
4153	uint32_t options) const {
4154	return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4155	}
4156
4157	inline int32_t
4158	UnicodeString::indexOf(const UnicodeString& srcText,
4159	int32_t srcStart,
4160	int32_t srcLength,
4161	int32_t start,
4162	int32_t _length) const
4163	{
4164	if(!srcText.isBogus()) {
4165	srcText.pinIndices(srcStart, srcLength);
4166	if(srcLength > `0`) {
4167	return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4168	}
4169	}
4170	return -`1`;
4171	}
4172
4173	inline int32_t
4174	UnicodeString::indexOf(const UnicodeString& text) const
4175	{ return indexOf(text, `0`, text.length(), `0`, length()); }
4176
4177	inline int32_t
4178	UnicodeString::indexOf(const UnicodeString& text,
4179	int32_t start) const {
4180	pinIndex(start);
4181	return indexOf(text, `0`, text.length(), start, length() - start);
4182	}
4183
4184	inline int32_t
4185	UnicodeString::indexOf(const UnicodeString& text,
4186	int32_t start,
4187	int32_t _length) const
4188	{ return indexOf(text, `0`, text.length(), start, _length); }
4189
4190	inline int32_t
4191	UnicodeString::indexOf(const char16_t *srcChars,
4192	int32_t srcLength,
4193	int32_t start) const {
4194	pinIndex(start);
4195	return indexOf(srcChars, `0`, srcLength, start, length() - start);
4196	}
4197
4198	inline int32_t
4199	UnicodeString::indexOf(ConstChar16Ptr srcChars,
4200	int32_t srcLength,
4201	int32_t start,
4202	int32_t _length) const
4203	{ return indexOf(srcChars, `0`, srcLength, start, _length); }
4204
4205	inline int32_t
4206	UnicodeString::indexOf(char16_t c,
4207	int32_t start,
4208	int32_t _length) const
4209	{ return doIndexOf(c, start, _length); }
4210
4211	inline int32_t
4212	UnicodeString::indexOf(UChar32 c,
4213	int32_t start,
4214	int32_t _length) const
4215	{ return doIndexOf(c, start, _length); }
4216
4217	inline int32_t
4218	UnicodeString::indexOf(char16_t c) const
4219	{ return doIndexOf(c, `0`, length()); }
4220
4221	inline int32_t
4222	UnicodeString::indexOf(UChar32 c) const
4223	{ return indexOf(c, `0`, length()); }
4224
4225	inline int32_t
4226	UnicodeString::indexOf(char16_t c,
4227	int32_t start) const {
4228	pinIndex(start);
4229	return doIndexOf(c, start, length() - start);
4230	}
4231
4232	inline int32_t
4233	UnicodeString::indexOf(UChar32 c,
4234	int32_t start) const {
4235	pinIndex(start);
4236	return indexOf(c, start, length() - start);
4237	}
4238
4239	inline int32_t
4240	UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
4241	int32_t srcLength,
4242	int32_t start,
4243	int32_t _length) const
4244	{ return lastIndexOf(srcChars, `0`, srcLength, start, _length); }
4245
4246	inline int32_t
4247	UnicodeString::lastIndexOf(const char16_t *srcChars,
4248	int32_t srcLength,
4249	int32_t start) const {
4250	pinIndex(start);
4251	return lastIndexOf(srcChars, `0`, srcLength, start, length() - start);
4252	}
4253
4254	inline int32_t
4255	UnicodeString::lastIndexOf(const UnicodeString& srcText,
4256	int32_t srcStart,
4257	int32_t srcLength,
4258	int32_t start,
4259	int32_t _length) const
4260	{
4261	if(!srcText.isBogus()) {
4262	srcText.pinIndices(srcStart, srcLength);
4263	if(srcLength > `0`) {
4264	return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4265	}
4266	}
4267	return -`1`;
4268	}
4269
4270	inline int32_t
4271	UnicodeString::lastIndexOf(const UnicodeString& text,
4272	int32_t start,
4273	int32_t _length) const
4274	{ return lastIndexOf(text, `0`, text.length(), start, _length); }
4275
4276	inline int32_t
4277	UnicodeString::lastIndexOf(const UnicodeString& text,
4278	int32_t start) const {
4279	pinIndex(start);
4280	return lastIndexOf(text, `0`, text.length(), start, length() - start);
4281	}
4282
4283	inline int32_t
4284	UnicodeString::lastIndexOf(const UnicodeString& text) const
4285	{ return lastIndexOf(text, `0`, text.length(), `0`, length()); }
4286
4287	inline int32_t
4288	UnicodeString::lastIndexOf(char16_t c,
4289	int32_t start,
4290	int32_t _length) const
4291	{ return doLastIndexOf(c, start, _length); }
4292
4293	inline int32_t
4294	UnicodeString::lastIndexOf(UChar32 c,
4295	int32_t start,
4296	int32_t _length) const {
4297	return doLastIndexOf(c, start, _length);
4298	}
4299
4300	inline int32_t
4301	UnicodeString::lastIndexOf(char16_t c) const
4302	{ return doLastIndexOf(c, `0`, length()); }
4303
4304	inline int32_t
4305	UnicodeString::lastIndexOf(UChar32 c) const {
4306	return lastIndexOf(c, `0`, length());
4307	}
4308
4309	inline int32_t
4310	UnicodeString::lastIndexOf(char16_t c,
4311	int32_t start) const {
4312	pinIndex(start);
4313	return doLastIndexOf(c, start, length() - start);
4314	}
4315
4316	inline int32_t
4317	UnicodeString::lastIndexOf(UChar32 c,
4318	int32_t start) const {
4319	pinIndex(start);
4320	return lastIndexOf(c, start, length() - start);
4321	}
4322
4323	inline UBool
4324	UnicodeString::startsWith(const UnicodeString& text) const
4325	{ return compare(`0`, text.length(), text, `0`, text.length()) == `0`; }
4326
4327	inline UBool
4328	UnicodeString::startsWith(const UnicodeString& srcText,
4329	int32_t srcStart,
4330	int32_t srcLength) const
4331	{ return doCompare(`0`, srcLength, srcText, srcStart, srcLength) == `0`; }
4332
4333	inline UBool
4334	UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4335	if(srcLength < `0`) {
4336	srcLength = u_strlen(toUCharPtr(srcChars));
4337	}
4338	return doCompare(`0`, srcLength, srcChars, `0`, srcLength) == `0`;
4339	}
4340
4341	inline UBool
4342	UnicodeString::startsWith(const char16_t srcChars, int32_t srcStart, int32_t srcLength) const* {
4343	if(srcLength < `0`) {
4344	srcLength = u_strlen(toUCharPtr(srcChars));
4345	}
4346	return doCompare(`0`, srcLength, srcChars, srcStart, srcLength) == `0`;
4347	}
4348
4349	inline UBool
4350	UnicodeString::endsWith(const UnicodeString& text) const
4351	{ return doCompare(length() - text.length(), text.length(),
4352	text, `0`, text.length()) == `0`; }
4353
4354	inline UBool
4355	UnicodeString::endsWith(const UnicodeString& srcText,
4356	int32_t srcStart,
4357	int32_t srcLength) const {
4358	srcText.pinIndices(srcStart, srcLength);
4359	return doCompare(length() - srcLength, srcLength,
4360	srcText, srcStart, srcLength) == `0`;
4361	}
4362
4363	inline UBool
4364	UnicodeString::endsWith(ConstChar16Ptr srcChars,
4365	int32_t srcLength) const {
4366	if(srcLength < `0`) {
4367	srcLength = u_strlen(toUCharPtr(srcChars));
4368	}
4369	return doCompare(length() - srcLength, srcLength,
4370	srcChars, `0`, srcLength) == `0`;
4371	}
4372
4373	inline UBool
4374	UnicodeString::endsWith(const char16_t *srcChars,
4375	int32_t srcStart,
4376	int32_t srcLength) const {
4377	if(srcLength < `0`) {
4378	srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
4379	}
4380	return doCompare(length() - srcLength, srcLength,
4381	srcChars, srcStart, srcLength) == `0`;
4382	}
4383
4384	//========================================
4385	// replace
4386	//========================================
4387	inline UnicodeString&
4388	UnicodeString::replace(int32_t start,
4389	int32_t _length,
4390	const UnicodeString& srcText)
4391	{ return doReplace(start, _length, srcText, `0`, srcText.length()); }
4392
4393	inline UnicodeString&
4394	UnicodeString::replace(int32_t start,
4395	int32_t _length,
4396	const UnicodeString& srcText,
4397	int32_t srcStart,
4398	int32_t srcLength)
4399	{ return doReplace(start, _length, srcText, srcStart, srcLength); }
4400
4401	inline UnicodeString&
4402	UnicodeString::replace(int32_t start,
4403	int32_t _length,
4404	ConstChar16Ptr srcChars,
4405	int32_t srcLength)
4406	{ return doReplace(start, _length, srcChars, `0`, srcLength); }
4407
4408	inline UnicodeString&
4409	UnicodeString::replace(int32_t start,
4410	int32_t _length,
4411	const char16_t *srcChars,
4412	int32_t srcStart,
4413	int32_t srcLength)
4414	{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
4415
4416	inline UnicodeString&
4417	UnicodeString::replace(int32_t start,
4418	int32_t _length,
4419	char16_t srcChar)
4420	{ return doReplace(start, _length, &srcChar, `0`, `1`); }
4421
4422	inline UnicodeString&
4423	UnicodeString::replaceBetween(int32_t start,
4424	int32_t limit,
4425	const UnicodeString& srcText)
4426	{ return doReplace(start, limit - start, srcText, `0`, srcText.length()); }
4427
4428	inline UnicodeString&
4429	UnicodeString::replaceBetween(int32_t start,
4430	int32_t limit,
4431	const UnicodeString& srcText,
4432	int32_t srcStart,
4433	int32_t srcLimit)
4434	{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4435
4436	inline UnicodeString&
4437	UnicodeString::findAndReplace(const UnicodeString& oldText,
4438	const UnicodeString& newText)
4439	{ return findAndReplace(`0`, length(), oldText, `0`, oldText.length(),
4440	newText, `0`, newText.length()); }
4441
4442	inline UnicodeString&
4443	UnicodeString::findAndReplace(int32_t start,
4444	int32_t _length,
4445	const UnicodeString& oldText,
4446	const UnicodeString& newText)
4447	{ return findAndReplace(start, _length, oldText, `0`, oldText.length(),
4448	newText, `0`, newText.length()); }
4449
4450	// ============================
4451	// extract
4452	// ============================
4453	inline void
4454	UnicodeString::doExtract(int32_t start,
4455	int32_t _length,
4456	UnicodeString& target) const
4457	{ target.replace(`0`, target.length(), *this, start, _length); }
4458
4459	inline void
4460	UnicodeString::extract(int32_t start,
4461	int32_t _length,
4462	Char16Ptr target,
4463	int32_t targetStart) const
4464	{ doExtract(start, _length, target, targetStart); }
4465
4466	inline void
4467	UnicodeString::extract(int32_t start,
4468	int32_t _length,
4469	UnicodeString& target) const
4470	{ doExtract(start, _length, target); }
4471
4472	#if !UCONFIG_NO_CONVERSION
4473
4474	inline int32_t
4475	UnicodeString::extract(int32_t start,
4476	int32_t _length,
4477	char *dst,
4478	const char codepage) const*
4479
4480	{
4481	// This dstSize value will be checked explicitly
4482	return extract(start, _length, dst, dst!=`0` ? `0xffffffff` : `0`, codepage);
4483	}
4484
4485	#endif
4486
4487	inline void
4488	UnicodeString::extractBetween(int32_t start,
4489	int32_t limit,
4490	char16_t *dst,
4491	int32_t dstStart) const {
4492	pinIndex(start);
4493	pinIndex(limit);
4494	doExtract(start, limit - start, dst, dstStart);
4495	}
4496
4497	inline UnicodeString
4498	UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4499	return tempSubString(start, limit - start);
4500	}
4501
4502	inline char16_t
4503	UnicodeString::doCharAt(int32_t offset) const
4504	{
4505	if((uint32_t)offset < (uint32_t)length()) {
4506	return getArrayStart()[offset];
4507	} else {
4508	return kInvalidUChar;
4509	}
4510	}
4511
4512	inline char16_t
4513	UnicodeString::charAt(int32_t offset) const
4514	{ return doCharAt(offset); }
4515
4516	inline char16_t
4517	UnicodeString::operator[] (int32_t offset) const
4518	{ return doCharAt(offset); }
4519
4520	inline UBool
4521	UnicodeString::isEmpty() const {
4522	// Arithmetic or logical right shift does not matter: only testing for 0.
4523	return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == `0`;
4524	}
4525
4526	//========================================
4527	// Write implementation methods
4528	//========================================
4529	inline void
4530	UnicodeString::setZeroLength() {
4531	fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4532	}
4533
4534	inline void
4535	UnicodeString::setShortLength(int32_t len) {
4536	// requires 0 <= len <= kMaxShortLength
4537	fUnion.fFields.fLengthAndFlags =
4538	(int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) \| (len << kLengthShift));
4539	}
4540
4541	inline void
4542	UnicodeString::setLength(int32_t len) {
4543	if(len <= kMaxShortLength) {
4544	setShortLength(len);
4545	} else {
4546	fUnion.fFields.fLengthAndFlags \|= kLengthIsLarge;
4547	fUnion.fFields.fLength = len;
4548	}
4549	}
4550
4551	inline void
4552	UnicodeString::setToEmpty() {
4553	fUnion.fFields.fLengthAndFlags = kShortString;
4554	}
4555
4556	inline void
4557	UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4558	setLength(len);
4559	fUnion.fFields.fArray = array;
4560	fUnion.fFields.fCapacity = capacity;
4561	}
4562
4563	inline UnicodeString&
4564	UnicodeString::operator= (char16_t ch)
4565	{ return doReplace(`0`, length(), &ch, `0`, `1`); }
4566
4567	inline UnicodeString&
4568	UnicodeString::operator= (UChar32 ch)
4569	{ return replace(`0`, length(), ch); }
4570
4571	inline UnicodeString&
4572	UnicodeString::setTo(const UnicodeString& srcText,
4573	int32_t srcStart,
4574	int32_t srcLength)
4575	{
4576	unBogus();
4577	return doReplace(`0`, length(), srcText, srcStart, srcLength);
4578	}
4579
4580	inline UnicodeString&
4581	UnicodeString::setTo(const UnicodeString& srcText,
4582	int32_t srcStart)
4583	{
4584	unBogus();
4585	srcText.pinIndex(srcStart);
4586	return doReplace(`0`, length(), srcText, srcStart, srcText.length() - srcStart);
4587	}
4588
4589	inline UnicodeString&
4590	UnicodeString::setTo(const UnicodeString& srcText)
4591	{
4592	return copyFrom(srcText);
4593	}
4594
4595	inline UnicodeString&
4596	UnicodeString::setTo(const char16_t *srcChars,
4597	int32_t srcLength)
4598	{
4599	unBogus();
4600	return doReplace(`0`, length(), srcChars, `0`, srcLength);
4601	}
4602
4603	inline UnicodeString&
4604	UnicodeString::setTo(char16_t srcChar)
4605	{
4606	unBogus();
4607	return doReplace(`0`, length(), &srcChar, `0`, `1`);
4608	}
4609
4610	inline UnicodeString&
4611	UnicodeString::setTo(UChar32 srcChar)
4612	{
4613	unBogus();
4614	return replace(`0`, length(), srcChar);
4615	}
4616
4617	inline UnicodeString&
4618	UnicodeString::append(const UnicodeString& srcText,
4619	int32_t srcStart,
4620	int32_t srcLength)
4621	{ return doAppend(srcText, srcStart, srcLength); }
4622
4623	inline UnicodeString&
4624	UnicodeString::append(const UnicodeString& srcText)
4625	{ return doAppend(srcText, `0`, srcText.length()); }
4626
4627	inline UnicodeString&
4628	UnicodeString::append(const char16_t *srcChars,
4629	int32_t srcStart,
4630	int32_t srcLength)
4631	{ return doAppend(srcChars, srcStart, srcLength); }
4632
4633	inline UnicodeString&
4634	UnicodeString::append(ConstChar16Ptr srcChars,
4635	int32_t srcLength)
4636	{ return doAppend(srcChars, `0`, srcLength); }
4637
4638	inline UnicodeString&
4639	UnicodeString::append(char16_t srcChar)
4640	{ return doAppend(&srcChar, `0`, `1`); }
4641
4642	inline UnicodeString&
4643	UnicodeString::operator+= (char16_t ch)
4644	{ return doAppend(&ch, `0`, `1`); }
4645
4646	inline UnicodeString&
4647	UnicodeString::operator+= (UChar32 ch) {
4648	return append(ch);
4649	}
4650
4651	inline UnicodeString&
4652	UnicodeString::operator+= (const UnicodeString& srcText)
4653	{ return doAppend(srcText, `0`, srcText.length()); }
4654
4655	inline UnicodeString&
4656	UnicodeString::insert(int32_t start,
4657	const UnicodeString& srcText,
4658	int32_t srcStart,
4659	int32_t srcLength)
4660	{ return doReplace(start, `0`, srcText, srcStart, srcLength); }
4661
4662	inline UnicodeString&
4663	UnicodeString::insert(int32_t start,
4664	const UnicodeString& srcText)
4665	{ return doReplace(start, `0`, srcText, `0`, srcText.length()); }
4666
4667	inline UnicodeString&
4668	UnicodeString::insert(int32_t start,
4669	const char16_t *srcChars,
4670	int32_t srcStart,
4671	int32_t srcLength)
4672	{ return doReplace(start, `0`, srcChars, srcStart, srcLength); }
4673
4674	inline UnicodeString&
4675	UnicodeString::insert(int32_t start,
4676	ConstChar16Ptr srcChars,
4677	int32_t srcLength)
4678	{ return doReplace(start, `0`, srcChars, `0`, srcLength); }
4679
4680	inline UnicodeString&
4681	UnicodeString::insert(int32_t start,
4682	char16_t srcChar)
4683	{ return doReplace(start, `0`, &srcChar, `0`, `1`); }
4684
4685	inline UnicodeString&
4686	UnicodeString::insert(int32_t start,
4687	UChar32 srcChar)
4688	{ return replace(start, `0`, srcChar); }
4689
4690
4691	inline UnicodeString&
4692	UnicodeString::remove()
4693	{
4694	// remove() of a bogus string makes the string empty and non-bogus
4695	if(isBogus()) {
4696	setToEmpty();
4697	} else {
4698	setZeroLength();
4699	}
4700	return *this;
4701	}
4702
4703	inline UnicodeString&
4704	UnicodeString::remove(int32_t start,
4705	int32_t _length)
4706	{
4707	if(start <= `0` && _length == INT32_MAX) {
4708	// remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4709	return remove();
4710	}
4711	return doReplace(start, _length, NULL, `0`, `0`);
4712	}
4713
4714	inline UnicodeString&
4715	UnicodeString::removeBetween(int32_t start,
4716	int32_t limit)
4717	{ return doReplace(start, limit - start, NULL, `0`, `0`); }
4718
4719	inline UnicodeString &
4720	UnicodeString::retainBetween(int32_t start, int32_t limit) {
4721	truncate(limit);
4722	return doReplace(`0`, start, NULL, `0`, `0`);
4723	}
4724
4725	inline UBool
4726	UnicodeString::truncate(int32_t targetLength)
4727	{
4728	if(isBogus() && targetLength == `0`) {
4729	// truncate(0) of a bogus string makes the string empty and non-bogus
4730	unBogus();
4731	return FALSE;
4732	} else if((uint32_t)targetLength < (uint32_t)length()) {
4733	setLength(targetLength);
4734	return TRUE;
4735	} else {
4736	return FALSE;
4737	}
4738	}
4739
4740	inline UnicodeString&
4741	UnicodeString::reverse()
4742	{ return doReverse(`0`, length()); }
4743
4744	inline UnicodeString&
4745	UnicodeString::reverse(int32_t start,
4746	int32_t _length)
4747	{ return doReverse(start, _length); }
4748
4749	U_NAMESPACE_END
4750
4751	#endif
4752

Browse the source code of include/unicode/unistr.h