1 | /* |
2 | ******************************************************************************* |
3 | * |
4 | * Copyright (C) 2004-2012, International Business Machines |
5 | * Corporation and others. All Rights Reserved. |
6 | * |
7 | ******************************************************************************* |
8 | * file name: utext.h |
9 | * encoding: US-ASCII |
10 | * tab size: 8 (not used) |
11 | * indentation:4 |
12 | * |
13 | * created on: 2004oct06 |
14 | * created by: Markus W. Scherer |
15 | */ |
16 | |
17 | #ifndef __UTEXT_H__ |
18 | #define __UTEXT_H__ |
19 | |
20 | /** |
21 | * \file |
22 | * \brief C API: Abstract Unicode Text API |
23 | * |
24 | * The Text Access API provides a means to allow text that is stored in alternative |
25 | * formats to work with ICU services. ICU normally operates on text that is |
26 | * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type |
27 | * UnicodeString for C++ APIs. |
28 | * |
29 | * ICU Text Access allows other formats, such as UTF-8 or non-contiguous |
30 | * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services. |
31 | * |
32 | * There are three general classes of usage for UText: |
33 | * |
34 | * Application Level Use. This is the simplest usage - applications would |
35 | * use one of the utext_open() functions on their input text, and pass |
36 | * the resulting UText to the desired ICU service. |
37 | * |
38 | * Second is usage in ICU Services, such as break iteration, that will need to |
39 | * operate on input presented to them as a UText. These implementations |
40 | * will need to use the iteration and related UText functions to gain |
41 | * access to the actual text. |
42 | * |
43 | * The third class of UText users are "text providers." These are the |
44 | * UText implementations for the various text storage formats. An application |
45 | * or system with a unique text storage format can implement a set of |
46 | * UText provider functions for that format, which will then allow |
47 | * ICU services to operate on that format. |
48 | * |
49 | * |
50 | * <em>Iterating over text</em> |
51 | * |
52 | * Here is sample code for a forward iteration over the contents of a UText |
53 | * |
54 | * \code |
55 | * UChar32 c; |
56 | * UText *ut = whatever(); |
57 | * |
58 | * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) { |
59 | * // do whatever with the codepoint c here. |
60 | * } |
61 | * \endcode |
62 | * |
63 | * And here is similar code to iterate in the reverse direction, from the end |
64 | * of the text towards the beginning. |
65 | * |
66 | * \code |
67 | * UChar32 c; |
68 | * UText *ut = whatever(); |
69 | * int textLength = utext_nativeLength(ut); |
70 | * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) { |
71 | * // do whatever with the codepoint c here. |
72 | * } |
73 | * \endcode |
74 | * |
75 | * <em>Characters and Indexing</em> |
76 | * |
77 | * Indexing into text by UText functions is nearly always in terms of the native |
78 | * indexing of the underlying text storage. The storage format could be UTF-8 |
79 | * or UTF-32, for example. When coding to the UText access API, no assumptions |
80 | * can be made regarding the size of characters, or how far an index |
81 | * may move when iterating between characters. |
82 | * |
83 | * All indices supplied to UText functions are pinned to the length of the |
84 | * text. An out-of-bounds index is not considered to be an error, but is |
85 | * adjusted to be in the range 0 <= index <= length of input text. |
86 | * |
87 | * |
88 | * When an index position is returned from a UText function, it will be |
89 | * a native index to the underlying text. In the case of multi-unit characters, |
90 | * it will always refer to the first position of the character, |
91 | * never to the interior. This is essentially the same thing as saying that |
92 | * a returned index will always point to a boundary between characters. |
93 | * |
94 | * When a native index is supplied to a UText function, all indices that |
95 | * refer to any part of a multi-unit character representation are considered |
96 | * to be equivalent. In the case of multi-unit characters, an incoming index |
97 | * will be logically normalized to refer to the start of the character. |
98 | * |
99 | * It is possible to test whether a native index is on a code point boundary |
100 | * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex(). |
101 | * If the index is returned unchanged, it was on a code point boundary. If |
102 | * an adjusted index is returned, the original index referred to the |
103 | * interior of a character. |
104 | * |
105 | * <em>Conventions for calling UText functions</em> |
106 | * |
107 | * Most UText access functions have as their first parameter a (UText *) pointer, |
108 | * which specifies the UText to be used. Unless otherwise noted, the |
109 | * pointer must refer to a valid, open UText. Attempting to |
110 | * use a closed UText or passing a NULL pointer is a programming error and |
111 | * will produce undefined results or NULL pointer exceptions. |
112 | * |
113 | * The UText_Open family of functions can either open an existing (closed) |
114 | * UText, or heap allocate a new UText. Here is sample code for creating |
115 | * a stack-allocated UText. |
116 | * |
117 | * \code |
118 | * char *s = whatever(); // A utf-8 string |
119 | * U_ErrorCode status = U_ZERO_ERROR; |
120 | * UText ut = UTEXT_INITIALIZER; |
121 | * utext_openUTF8(ut, s, -1, &status); |
122 | * if (U_FAILURE(status)) { |
123 | * // error handling |
124 | * } else { |
125 | * // work with the UText |
126 | * } |
127 | * \endcode |
128 | * |
129 | * Any existing UText passed to an open function _must_ have been initialized, |
130 | * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated |
131 | * by an open function. Passing NULL will cause the open function to |
132 | * heap-allocate and fully initialize a new UText. |
133 | * |
134 | */ |
135 | |
136 | |
137 | |
138 | #include "unicode/utypes.h" |
139 | #include "unicode/uchar.h" |
140 | #if U_SHOW_CPLUSPLUS_API |
141 | #include "unicode/localpointer.h" |
142 | #include "unicode/rep.h" |
143 | #include "unicode/unistr.h" |
144 | #include "unicode/chariter.h" |
145 | #endif |
146 | |
147 | |
148 | U_CDECL_BEGIN |
149 | |
150 | struct UText; |
151 | typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */ |
152 | |
153 | |
154 | /*************************************************************************************** |
155 | * |
156 | * C Functions for creating UText wrappers around various kinds of text strings. |
157 | * |
158 | ****************************************************************************************/ |
159 | |
160 | |
161 | /** |
162 | * Close function for UText instances. |
163 | * Cleans up, releases any resources being held by an open UText. |
164 | * <p> |
165 | * If the UText was originally allocated by one of the utext_open functions, |
166 | * the storage associated with the utext will also be freed. |
167 | * If the UText storage originated with the application, as it would with |
168 | * a local or static instance, the storage will not be deleted. |
169 | * |
170 | * An open UText can be reset to refer to new string by using one of the utext_open() |
171 | * functions without first closing the UText. |
172 | * |
173 | * @param ut The UText to be closed. |
174 | * @return NULL if the UText struct was deleted by the close. If the UText struct |
175 | * was originally provided by the caller to the open function, it is |
176 | * returned by this function, and may be safely used again in |
177 | * a subsequent utext_open. |
178 | * |
179 | * @stable ICU 3.4 |
180 | */ |
181 | U_STABLE UText * U_EXPORT2 |
182 | utext_close(UText *ut); |
183 | |
184 | #if U_SHOW_CPLUSPLUS_API |
185 | |
186 | U_NAMESPACE_BEGIN |
187 | |
188 | /** |
189 | * \class LocalUTextPointer |
190 | * "Smart pointer" class, closes a UText via utext_close(). |
191 | * For most methods see the LocalPointerBase base class. |
192 | * |
193 | * @see LocalPointerBase |
194 | * @see LocalPointer |
195 | * @stable ICU 4.4 |
196 | */ |
197 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close); |
198 | |
199 | U_NAMESPACE_END |
200 | |
201 | #endif |
202 | |
203 | /** |
204 | * Open a read-only UText implementation for UTF-8 strings. |
205 | * |
206 | * \htmlonly |
207 | * Any invalid UTF-8 in the input will be handled in this way: |
208 | * a sequence of bytes that has the form of a truncated, but otherwise valid, |
209 | * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. |
210 | * Any other illegal bytes will each be replaced by a \uFFFD. |
211 | * \endhtmlonly |
212 | * |
213 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
214 | * If non-NULL, must refer to an initialized UText struct, which will then |
215 | * be reset to reference the specified UTF-8 string. |
216 | * @param s A UTF-8 string. Must not be NULL. |
217 | * @param length The length of the UTF-8 string in bytes, or -1 if the string is |
218 | * zero terminated. |
219 | * @param status Errors are returned here. |
220 | * @return A pointer to the UText. If a pre-allocated UText was provided, it |
221 | * will always be used and returned. |
222 | * @stable ICU 3.4 |
223 | */ |
224 | U_STABLE UText * U_EXPORT2 |
225 | utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status); |
226 | |
227 | |
228 | /** |
229 | * Open a read-only UText for UChar * string. |
230 | * |
231 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
232 | * If non-NULL, must refer to an initialized UText struct, which will then |
233 | * be reset to reference the specified UChar string. |
234 | * @param s A UChar (UTF-16) string |
235 | * @param length The number of UChars in the input string, or -1 if the string is |
236 | * zero terminated. |
237 | * @param status Errors are returned here. |
238 | * @return A pointer to the UText. If a pre-allocated UText was provided, it |
239 | * will always be used and returned. |
240 | * @stable ICU 3.4 |
241 | */ |
242 | U_STABLE UText * U_EXPORT2 |
243 | utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status); |
244 | |
245 | |
246 | #if U_SHOW_CPLUSPLUS_API |
247 | /** |
248 | * Open a writable UText for a non-const UnicodeString. |
249 | * |
250 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
251 | * If non-NULL, must refer to an initialized UText struct, which will then |
252 | * be reset to reference the specified input string. |
253 | * @param s A UnicodeString. |
254 | * @param status Errors are returned here. |
255 | * @return Pointer to the UText. If a UText was supplied as input, this |
256 | * will always be used and returned. |
257 | * @stable ICU 3.4 |
258 | */ |
259 | U_STABLE UText * U_EXPORT2 |
260 | utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status); |
261 | |
262 | |
263 | /** |
264 | * Open a UText for a const UnicodeString. The resulting UText will not be writable. |
265 | * |
266 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
267 | * If non-NULL, must refer to an initialized UText struct, which will then |
268 | * be reset to reference the specified input string. |
269 | * @param s A const UnicodeString to be wrapped. |
270 | * @param status Errors are returned here. |
271 | * @return Pointer to the UText. If a UText was supplied as input, this |
272 | * will always be used and returned. |
273 | * @stable ICU 3.4 |
274 | */ |
275 | U_STABLE UText * U_EXPORT2 |
276 | utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status); |
277 | |
278 | |
279 | /** |
280 | * Open a writable UText implementation for an ICU Replaceable object. |
281 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
282 | * If non-NULL, must refer to an already existing UText, which will then |
283 | * be reset to reference the specified replaceable text. |
284 | * @param rep A Replaceable text object. |
285 | * @param status Errors are returned here. |
286 | * @return Pointer to the UText. If a UText was supplied as input, this |
287 | * will always be used and returned. |
288 | * @see Replaceable |
289 | * @stable ICU 3.4 |
290 | */ |
291 | U_STABLE UText * U_EXPORT2 |
292 | utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status); |
293 | |
294 | /** |
295 | * Open a UText implementation over an ICU CharacterIterator. |
296 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
297 | * If non-NULL, must refer to an already existing UText, which will then |
298 | * be reset to reference the specified replaceable text. |
299 | * @param ci A Character Iterator. |
300 | * @param status Errors are returned here. |
301 | * @return Pointer to the UText. If a UText was supplied as input, this |
302 | * will always be used and returned. |
303 | * @see Replaceable |
304 | * @stable ICU 3.4 |
305 | */ |
306 | U_STABLE UText * U_EXPORT2 |
307 | utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status); |
308 | |
309 | #endif |
310 | |
311 | |
312 | /** |
313 | * Clone a UText. This is much like opening a UText where the source text is itself |
314 | * another UText. |
315 | * |
316 | * A deep clone will copy both the UText data structures and the underlying text. |
317 | * The original and cloned UText will operate completely independently; modifications |
318 | * made to the text in one will not affect the other. Text providers are not |
319 | * required to support deep clones. The user of clone() must check the status return |
320 | * and be prepared to handle failures. |
321 | * |
322 | * The standard UText implementations for UTF8, UChar *, UnicodeString and |
323 | * Replaceable all support deep cloning. |
324 | * |
325 | * The UText returned from a deep clone will be writable, assuming that the text |
326 | * provider is able to support writing, even if the source UText had been made |
327 | * non-writable by means of UText_freeze(). |
328 | * |
329 | * A shallow clone replicates only the UText data structures; it does not make |
330 | * a copy of the underlying text. Shallow clones can be used as an efficient way to |
331 | * have multiple iterators active in a single text string that is not being |
332 | * modified. |
333 | * |
334 | * A shallow clone operation will not fail, barring truly exceptional conditions such |
335 | * as memory allocation failures. |
336 | * |
337 | * Shallow UText clones should be avoided if the UText functions that modify the |
338 | * text are expected to be used, either on the original or the cloned UText. |
339 | * Any such modifications can cause unpredictable behavior. Read Only |
340 | * shallow clones provide some protection against errors of this type by |
341 | * disabling text modification via the cloned UText. |
342 | * |
343 | * A shallow clone made with the readOnly parameter == FALSE will preserve the |
344 | * utext_isWritable() state of the source object. Note, however, that |
345 | * write operations must be avoided while more than one UText exists that refer |
346 | * to the same underlying text. |
347 | * |
348 | * A UText and its clone may be safely concurrently accessed by separate threads. |
349 | * This is true for read access only with shallow clones, and for both read and |
350 | * write access with deep clones. |
351 | * It is the responsibility of the Text Provider to ensure that this thread safety |
352 | * constraint is met. |
353 | * |
354 | * @param dest A UText struct to be filled in with the result of the clone operation, |
355 | * or NULL if the clone function should heap-allocate a new UText struct. |
356 | * If non-NULL, must refer to an already existing UText, which will then |
357 | * be reset to become the clone. |
358 | * @param src The UText to be cloned. |
359 | * @param deep TRUE to request a deep clone, FALSE for a shallow clone. |
360 | * @param readOnly TRUE to request that the cloned UText have read only access to the |
361 | * underlying text. |
362 | |
363 | * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR |
364 | * will be returned if the text provider is unable to clone the |
365 | * original text. |
366 | * @return The newly created clone, or NULL if the clone operation failed. |
367 | * @stable ICU 3.4 |
368 | */ |
369 | U_STABLE UText * U_EXPORT2 |
370 | utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status); |
371 | |
372 | |
373 | /** |
374 | * Compare two UText objects for equality. |
375 | * UTexts are equal if they are iterating over the same text, and |
376 | * have the same iteration position within the text. |
377 | * If either or both of the parameters are NULL, the comparison is FALSE. |
378 | * |
379 | * @param a The first of the two UTexts to compare. |
380 | * @param b The other UText to be compared. |
381 | * @return TRUE if the two UTexts are equal. |
382 | * @stable ICU 3.6 |
383 | */ |
384 | U_STABLE UBool U_EXPORT2 |
385 | utext_equals(const UText *a, const UText *b); |
386 | |
387 | |
388 | /***************************************************************************** |
389 | * |
390 | * Functions to work with the text represeted by a UText wrapper |
391 | * |
392 | *****************************************************************************/ |
393 | |
394 | /** |
395 | * Get the length of the text. Depending on the characteristics |
396 | * of the underlying text representation, this may be expensive. |
397 | * @see utext_isLengthExpensive() |
398 | * |
399 | * |
400 | * @param ut the text to be accessed. |
401 | * @return the length of the text, expressed in native units. |
402 | * |
403 | * @stable ICU 3.4 |
404 | */ |
405 | U_STABLE int64_t U_EXPORT2 |
406 | utext_nativeLength(UText *ut); |
407 | |
408 | /** |
409 | * Return TRUE if calculating the length of the text could be expensive. |
410 | * Finding the length of NUL terminated strings is considered to be expensive. |
411 | * |
412 | * Note that the value of this function may change |
413 | * as the result of other operations on a UText. |
414 | * Once the length of a string has been discovered, it will no longer |
415 | * be expensive to report it. |
416 | * |
417 | * @param ut the text to be accessed. |
418 | * @return TRUE if determining the length of the text could be time consuming. |
419 | * @stable ICU 3.4 |
420 | */ |
421 | U_STABLE UBool U_EXPORT2 |
422 | utext_isLengthExpensive(const UText *ut); |
423 | |
424 | /** |
425 | * Returns the code point at the requested index, |
426 | * or U_SENTINEL (-1) if it is out of bounds. |
427 | * |
428 | * If the specified index points to the interior of a multi-unit |
429 | * character - one of the trail bytes of a UTF-8 sequence, for example - |
430 | * the complete code point will be returned. |
431 | * |
432 | * The iteration position will be set to the start of the returned code point. |
433 | * |
434 | * This function is roughly equivalent to the the sequence |
435 | * utext_setNativeIndex(index); |
436 | * utext_current32(); |
437 | * (There is a subtle difference if the index is out of bounds by being less than zero - |
438 | * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current() |
439 | * will return the char at zero. utext_char32At(negative index), on the other hand, will |
440 | * return the U_SENTINEL value of -1.) |
441 | * |
442 | * @param ut the text to be accessed |
443 | * @param nativeIndex the native index of the character to be accessed. If the index points |
444 | * to other than the first unit of a multi-unit character, it will be adjusted |
445 | * to the start of the character. |
446 | * @return the code point at the specified index. |
447 | * @stable ICU 3.4 |
448 | */ |
449 | U_STABLE UChar32 U_EXPORT2 |
450 | utext_char32At(UText *ut, int64_t nativeIndex); |
451 | |
452 | |
453 | /** |
454 | * |
455 | * Get the code point at the current iteration position, |
456 | * or U_SENTINEL (-1) if the iteration has reached the end of |
457 | * the input text. |
458 | * |
459 | * @param ut the text to be accessed. |
460 | * @return the Unicode code point at the current iterator position. |
461 | * @stable ICU 3.4 |
462 | */ |
463 | U_STABLE UChar32 U_EXPORT2 |
464 | utext_current32(UText *ut); |
465 | |
466 | |
467 | /** |
468 | * Get the code point at the current iteration position of the UText, and |
469 | * advance the position to the first index following the character. |
470 | * |
471 | * If the position is at the end of the text (the index following |
472 | * the last character, which is also the length of the text), |
473 | * return U_SENTINEL (-1) and do not advance the index. |
474 | * |
475 | * This is a post-increment operation. |
476 | * |
477 | * An inline macro version of this function, UTEXT_NEXT32(), |
478 | * is available for performance critical use. |
479 | * |
480 | * @param ut the text to be accessed. |
481 | * @return the Unicode code point at the iteration position. |
482 | * @see UTEXT_NEXT32 |
483 | * @stable ICU 3.4 |
484 | */ |
485 | U_STABLE UChar32 U_EXPORT2 |
486 | utext_next32(UText *ut); |
487 | |
488 | |
489 | /** |
490 | * Move the iterator position to the character (code point) whose |
491 | * index precedes the current position, and return that character. |
492 | * This is a pre-decrement operation. |
493 | * |
494 | * If the initial position is at the start of the text (index of 0) |
495 | * return U_SENTINEL (-1), and leave the position unchanged. |
496 | * |
497 | * An inline macro version of this function, UTEXT_PREVIOUS32(), |
498 | * is available for performance critical use. |
499 | * |
500 | * @param ut the text to be accessed. |
501 | * @return the previous UChar32 code point, or U_SENTINEL (-1) |
502 | * if the iteration has reached the start of the text. |
503 | * @see UTEXT_PREVIOUS32 |
504 | * @stable ICU 3.4 |
505 | */ |
506 | U_STABLE UChar32 U_EXPORT2 |
507 | utext_previous32(UText *ut); |
508 | |
509 | |
510 | /** |
511 | * Set the iteration index and return the code point at that index. |
512 | * Leave the iteration index at the start of the following code point. |
513 | * |
514 | * This function is the most efficient and convenient way to |
515 | * begin a forward iteration. The results are identical to the those |
516 | * from the sequence |
517 | * \code |
518 | * utext_setIndex(); |
519 | * utext_next32(); |
520 | * \endcode |
521 | * |
522 | * @param ut the text to be accessed. |
523 | * @param nativeIndex Iteration index, in the native units of the text provider. |
524 | * @return Code point which starts at or before index, |
525 | * or U_SENTINEL (-1) if it is out of bounds. |
526 | * @stable ICU 3.4 |
527 | */ |
528 | U_STABLE UChar32 U_EXPORT2 |
529 | utext_next32From(UText *ut, int64_t nativeIndex); |
530 | |
531 | |
532 | |
533 | /** |
534 | * Set the iteration index, and return the code point preceding the |
535 | * one specified by the initial index. Leave the iteration position |
536 | * at the start of the returned code point. |
537 | * |
538 | * This function is the most efficient and convenient way to |
539 | * begin a backwards iteration. |
540 | * |
541 | * @param ut the text to be accessed. |
542 | * @param nativeIndex Iteration index in the native units of the text provider. |
543 | * @return Code point preceding the one at the initial index, |
544 | * or U_SENTINEL (-1) if it is out of bounds. |
545 | * |
546 | * @stable ICU 3.4 |
547 | */ |
548 | U_STABLE UChar32 U_EXPORT2 |
549 | utext_previous32From(UText *ut, int64_t nativeIndex); |
550 | |
551 | /** |
552 | * Get the current iterator position, which can range from 0 to |
553 | * the length of the text. |
554 | * The position is a native index into the input text, in whatever format it |
555 | * may have (possibly UTF-8 for example), and may not always be the same as |
556 | * the corresponding UChar (UTF-16) index. |
557 | * The returned position will always be aligned to a code point boundary. |
558 | * |
559 | * @param ut the text to be accessed. |
560 | * @return the current index position, in the native units of the text provider. |
561 | * @stable ICU 3.4 |
562 | */ |
563 | U_STABLE int64_t U_EXPORT2 |
564 | utext_getNativeIndex(const UText *ut); |
565 | |
566 | /** |
567 | * Set the current iteration position to the nearest code point |
568 | * boundary at or preceding the specified index. |
569 | * The index is in the native units of the original input text. |
570 | * If the index is out of range, it will be pinned to be within |
571 | * the range of the input text. |
572 | * <p> |
573 | * It will usually be more efficient to begin an iteration |
574 | * using the functions utext_next32From() or utext_previous32From() |
575 | * rather than setIndex(). |
576 | * <p> |
577 | * Moving the index position to an adjacent character is best done |
578 | * with utext_next32(), utext_previous32() or utext_moveIndex32(). |
579 | * Attempting to do direct arithmetic on the index position is |
580 | * complicated by the fact that the size (in native units) of a |
581 | * character depends on the underlying representation of the character |
582 | * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not |
583 | * easily knowable. |
584 | * |
585 | * @param ut the text to be accessed. |
586 | * @param nativeIndex the native unit index of the new iteration position. |
587 | * @stable ICU 3.4 |
588 | */ |
589 | U_STABLE void U_EXPORT2 |
590 | utext_setNativeIndex(UText *ut, int64_t nativeIndex); |
591 | |
592 | /** |
593 | * Move the iterator postion by delta code points. The number of code points |
594 | * is a signed number; a negative delta will move the iterator backwards, |
595 | * towards the start of the text. |
596 | * <p> |
597 | * The index is moved by <code>delta</code> code points |
598 | * forward or backward, but no further backward than to 0 and |
599 | * no further forward than to utext_nativeLength(). |
600 | * The resulting index value will be in between 0 and length, inclusive. |
601 | * |
602 | * @param ut the text to be accessed. |
603 | * @param delta the signed number of code points to move the iteration position. |
604 | * @return TRUE if the position could be moved the requested number of positions while |
605 | * staying within the range [0 - text length]. |
606 | * @stable ICU 3.4 |
607 | */ |
608 | U_STABLE UBool U_EXPORT2 |
609 | utext_moveIndex32(UText *ut, int32_t delta); |
610 | |
611 | /** |
612 | * Get the native index of the character preceeding the current position. |
613 | * If the iteration position is already at the start of the text, zero |
614 | * is returned. |
615 | * The value returned is the same as that obtained from the following sequence, |
616 | * but without the side effect of changing the iteration position. |
617 | * |
618 | * \code |
619 | * UText *ut = whatever; |
620 | * ... |
621 | * utext_previous(ut) |
622 | * utext_getNativeIndex(ut); |
623 | * \endcode |
624 | * |
625 | * This function is most useful during forwards iteration, where it will get the |
626 | * native index of the character most recently returned from utext_next(). |
627 | * |
628 | * @param ut the text to be accessed |
629 | * @return the native index of the character preceeding the current index position, |
630 | * or zero if the current position is at the start of the text. |
631 | * @stable ICU 3.6 |
632 | */ |
633 | U_STABLE int64_t U_EXPORT2 |
634 | utext_getPreviousNativeIndex(UText *ut); |
635 | |
636 | |
637 | /** |
638 | * |
639 | * Extract text from a UText into a UChar buffer. The range of text to be extracted |
640 | * is specified in the native indices of the UText provider. These may not necessarily |
641 | * be UTF-16 indices. |
642 | * <p> |
643 | * The size (number of 16 bit UChars) of the data to be extracted is returned. The |
644 | * full number of UChars is returned, even when the extracted text is truncated |
645 | * because the specified buffer size is too small. |
646 | * <p> |
647 | * The extracted string will (if you are a user) / must (if you are a text provider) |
648 | * be NUL-terminated if there is sufficient space in the destination buffer. This |
649 | * terminating NUL is not included in the returned length. |
650 | * <p> |
651 | * The iteration index is left at the position following the last extracted character. |
652 | * |
653 | * @param ut the UText from which to extract data. |
654 | * @param nativeStart the native index of the first character to extract.\ |
655 | * If the specified index is out of range, |
656 | * it will be pinned to to be within 0 <= index <= textLength |
657 | * @param nativeLimit the native string index of the position following the last |
658 | * character to extract. If the specified index is out of range, |
659 | * it will be pinned to to be within 0 <= index <= textLength. |
660 | * nativeLimit must be >= nativeStart. |
661 | * @param dest the UChar (UTF-16) buffer into which the extracted text is placed |
662 | * @param destCapacity The size, in UChars, of the destination buffer. May be zero |
663 | * for precomputing the required size. |
664 | * @param status receives any error status. |
665 | * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the |
666 | * buffer was too small. Returns number of UChars for preflighting. |
667 | * @return Number of UChars in the data to be extracted. Does not include a trailing NUL. |
668 | * |
669 | * @stable ICU 3.4 |
670 | */ |
671 | U_STABLE int32_t U_EXPORT2 |
672 | utext_extract(UText *ut, |
673 | int64_t nativeStart, int64_t nativeLimit, |
674 | UChar *dest, int32_t destCapacity, |
675 | UErrorCode *status); |
676 | |
677 | |
678 | |
679 | /************************************************************************************ |
680 | * |
681 | * #define inline versions of selected performance-critical text access functions |
682 | * Caution: do not use auto increment++ or decrement-- expressions |
683 | * as parameters to these macros. |
684 | * |
685 | * For most use, where there is no extreme performance constraint, the |
686 | * normal, non-inline functions are a better choice. The resulting code |
687 | * will be smaller, and, if the need ever arises, easier to debug. |
688 | * |
689 | * These are implemented as #defines rather than real functions |
690 | * because there is no fully portable way to do inline functions in plain C. |
691 | * |
692 | ************************************************************************************/ |
693 | |
694 | #ifndef U_HIDE_INTERNAL_API |
695 | /** |
696 | * inline version of utext_current32(), for performance-critical situations. |
697 | * |
698 | * Get the code point at the current iteration position of the UText. |
699 | * Returns U_SENTINEL (-1) if the position is at the end of the |
700 | * text. |
701 | * |
702 | * @internal ICU 4.4 technology preview |
703 | */ |
704 | #define UTEXT_CURRENT32(ut) \ |
705 | ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ |
706 | ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut)) |
707 | #endif /* U_HIDE_INTERNAL_API */ |
708 | |
709 | /** |
710 | * inline version of utext_next32(), for performance-critical situations. |
711 | * |
712 | * Get the code point at the current iteration position of the UText, and |
713 | * advance the position to the first index following the character. |
714 | * This is a post-increment operation. |
715 | * Returns U_SENTINEL (-1) if the position is at the end of the |
716 | * text. |
717 | * |
718 | * @stable ICU 3.4 |
719 | */ |
720 | #define UTEXT_NEXT32(ut) \ |
721 | ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ |
722 | ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut)) |
723 | |
724 | /** |
725 | * inline version of utext_previous32(), for performance-critical situations. |
726 | * |
727 | * Move the iterator position to the character (code point) whose |
728 | * index precedes the current position, and return that character. |
729 | * This is a pre-decrement operation. |
730 | * Returns U_SENTINEL (-1) if the position is at the start of the text. |
731 | * |
732 | * @stable ICU 3.4 |
733 | */ |
734 | #define UTEXT_PREVIOUS32(ut) \ |
735 | ((ut)->chunkOffset > 0 && \ |
736 | (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \ |
737 | (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut)) |
738 | |
739 | /** |
740 | * inline version of utext_getNativeIndex(), for performance-critical situations. |
741 | * |
742 | * Get the current iterator position, which can range from 0 to |
743 | * the length of the text. |
744 | * The position is a native index into the input text, in whatever format it |
745 | * may have (possibly UTF-8 for example), and may not always be the same as |
746 | * the corresponding UChar (UTF-16) index. |
747 | * The returned position will always be aligned to a code point boundary. |
748 | * |
749 | * @stable ICU 3.6 |
750 | */ |
751 | #define UTEXT_GETNATIVEINDEX(ut) \ |
752 | ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \ |
753 | (ut)->chunkNativeStart+(ut)->chunkOffset : \ |
754 | (ut)->pFuncs->mapOffsetToNative(ut)) |
755 | |
756 | /** |
757 | * inline version of utext_setNativeIndex(), for performance-critical situations. |
758 | * |
759 | * Set the current iteration position to the nearest code point |
760 | * boundary at or preceding the specified index. |
761 | * The index is in the native units of the original input text. |
762 | * If the index is out of range, it will be pinned to be within |
763 | * the range of the input text. |
764 | * |
765 | * @stable ICU 3.8 |
766 | */ |
767 | #define UTEXT_SETNATIVEINDEX(ut, ix) \ |
768 | { int64_t __offset = (ix) - (ut)->chunkNativeStart; \ |
769 | if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \ |
770 | (ut)->chunkOffset=(int32_t)__offset; \ |
771 | } else { \ |
772 | utext_setNativeIndex((ut), (ix)); } } |
773 | |
774 | |
775 | |
776 | /************************************************************************************ |
777 | * |
778 | * Functions related to writing or modifying the text. |
779 | * These will work only with modifiable UTexts. Attempting to |
780 | * modify a read-only UText will return an error status. |
781 | * |
782 | ************************************************************************************/ |
783 | |
784 | |
785 | /** |
786 | * Return TRUE if the text can be written (modified) with utext_replace() or |
787 | * utext_copy(). For the text to be writable, the text provider must |
788 | * be of a type that supports writing and the UText must not be frozen. |
789 | * |
790 | * Attempting to modify text when utext_isWriteable() is FALSE will fail - |
791 | * the text will not be modified, and an error will be returned from the function |
792 | * that attempted the modification. |
793 | * |
794 | * @param ut the UText to be tested. |
795 | * @return TRUE if the text is modifiable. |
796 | * |
797 | * @see utext_freeze() |
798 | * @see utext_replace() |
799 | * @see utext_copy() |
800 | * @stable ICU 3.4 |
801 | * |
802 | */ |
803 | U_STABLE UBool U_EXPORT2 |
804 | utext_isWritable(const UText *ut); |
805 | |
806 | |
807 | /** |
808 | * Test whether there is meta data associated with the text. |
809 | * @see Replaceable::hasMetaData() |
810 | * |
811 | * @param ut The UText to be tested |
812 | * @return TRUE if the underlying text includes meta data. |
813 | * @stable ICU 3.4 |
814 | */ |
815 | U_STABLE UBool U_EXPORT2 |
816 | utext_hasMetaData(const UText *ut); |
817 | |
818 | |
819 | /** |
820 | * Replace a range of the original text with a replacement text. |
821 | * |
822 | * Leaves the current iteration position at the position following the |
823 | * newly inserted replacement text. |
824 | * |
825 | * This function is only available on UText types that support writing, |
826 | * that is, ones where utext_isWritable() returns TRUE. |
827 | * |
828 | * When using this function, there should be only a single UText opened onto the |
829 | * underlying native text string. Behavior after a replace operation |
830 | * on a UText is undefined for any other additional UTexts that refer to the |
831 | * modified string. |
832 | * |
833 | * @param ut the UText representing the text to be operated on. |
834 | * @param nativeStart the native index of the start of the region to be replaced |
835 | * @param nativeLimit the native index of the character following the region to be replaced. |
836 | * @param replacementText pointer to the replacement text |
837 | * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated. |
838 | * @param status receives any error status. Possible errors include |
839 | * U_NO_WRITE_PERMISSION |
840 | * |
841 | * @return The signed number of (native) storage units by which |
842 | * the length of the text expanded or contracted. |
843 | * |
844 | * @stable ICU 3.4 |
845 | */ |
846 | U_STABLE int32_t U_EXPORT2 |
847 | utext_replace(UText *ut, |
848 | int64_t nativeStart, int64_t nativeLimit, |
849 | const UChar *replacementText, int32_t replacementLength, |
850 | UErrorCode *status); |
851 | |
852 | |
853 | |
854 | /** |
855 | * |
856 | * Copy or move a substring from one position to another within the text, |
857 | * while retaining any metadata associated with the text. |
858 | * This function is used to duplicate or reorder substrings. |
859 | * The destination index must not overlap the source range. |
860 | * |
861 | * The text to be copied or moved is inserted at destIndex; |
862 | * it does not replace or overwrite any existing text. |
863 | * |
864 | * The iteration position is left following the newly inserted text |
865 | * at the destination position. |
866 | * |
867 | * This function is only available on UText types that support writing, |
868 | * that is, ones where utext_isWritable() returns TRUE. |
869 | * |
870 | * When using this function, there should be only a single UText opened onto the |
871 | * underlying native text string. Behavior after a copy operation |
872 | * on a UText is undefined in any other additional UTexts that refer to the |
873 | * modified string. |
874 | * |
875 | * @param ut The UText representing the text to be operated on. |
876 | * @param nativeStart The native index of the start of the region to be copied or moved |
877 | * @param nativeLimit The native index of the character position following the region |
878 | * to be copied. |
879 | * @param destIndex The native destination index to which the source substring is |
880 | * copied or moved. |
881 | * @param move If TRUE, then the substring is moved, not copied/duplicated. |
882 | * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION |
883 | * |
884 | * @stable ICU 3.4 |
885 | */ |
886 | U_STABLE void U_EXPORT2 |
887 | utext_copy(UText *ut, |
888 | int64_t nativeStart, int64_t nativeLimit, |
889 | int64_t destIndex, |
890 | UBool move, |
891 | UErrorCode *status); |
892 | |
893 | |
894 | /** |
895 | * <p> |
896 | * Freeze a UText. This prevents any modification to the underlying text itself |
897 | * by means of functions operating on this UText. |
898 | * </p> |
899 | * <p> |
900 | * Once frozen, a UText can not be unfrozen. The intent is to ensure |
901 | * that a the text underlying a frozen UText wrapper cannot be modified via that UText. |
902 | * </p> |
903 | * <p> |
904 | * Caution: freezing a UText will disable changes made via the specific |
905 | * frozen UText wrapper only; it will not have any effect on the ability to |
906 | * directly modify the text by bypassing the UText. Any such backdoor modifications |
907 | * are always an error while UText access is occuring because the underlying |
908 | * text can get out of sync with UText's buffering. |
909 | * </p> |
910 | * |
911 | * @param ut The UText to be frozen. |
912 | * @see utext_isWritable() |
913 | * @stable ICU 3.6 |
914 | */ |
915 | U_STABLE void U_EXPORT2 |
916 | utext_freeze(UText *ut); |
917 | |
918 | |
919 | /** |
920 | * UText provider properties (bit field indexes). |
921 | * |
922 | * @see UText |
923 | * @stable ICU 3.4 |
924 | */ |
925 | enum { |
926 | /** |
927 | * It is potentially time consuming for the provider to determine the length of the text. |
928 | * @stable ICU 3.4 |
929 | */ |
930 | UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, |
931 | /** |
932 | * Text chunks remain valid and usable until the text object is modified or |
933 | * deleted, not just until the next time the access() function is called |
934 | * (which is the default). |
935 | * @stable ICU 3.4 |
936 | */ |
937 | UTEXT_PROVIDER_STABLE_CHUNKS = 2, |
938 | /** |
939 | * The provider supports modifying the text via the replace() and copy() |
940 | * functions. |
941 | * @see Replaceable |
942 | * @stable ICU 3.4 |
943 | */ |
944 | UTEXT_PROVIDER_WRITABLE = 3, |
945 | /** |
946 | * There is meta data associated with the text. |
947 | * @see Replaceable::hasMetaData() |
948 | * @stable ICU 3.4 |
949 | */ |
950 | UTEXT_PROVIDER_HAS_META_DATA = 4, |
951 | /** |
952 | * Text provider owns the text storage. |
953 | * Generally occurs as the result of a deep clone of the UText. |
954 | * When closing the UText, the associated text must |
955 | * also be closed/deleted/freed/ whatever is appropriate. |
956 | * @stable ICU 3.6 |
957 | */ |
958 | UTEXT_PROVIDER_OWNS_TEXT = 5 |
959 | }; |
960 | |
961 | /** |
962 | * Function type declaration for UText.clone(). |
963 | * |
964 | * clone a UText. Much like opening a UText where the source text is itself |
965 | * another UText. |
966 | * |
967 | * A deep clone will copy both the UText data structures and the underlying text. |
968 | * The original and cloned UText will operate completely independently; modifications |
969 | * made to the text in one will not effect the other. Text providers are not |
970 | * required to support deep clones. The user of clone() must check the status return |
971 | * and be prepared to handle failures. |
972 | * |
973 | * A shallow clone replicates only the UText data structures; it does not make |
974 | * a copy of the underlying text. Shallow clones can be used as an efficient way to |
975 | * have multiple iterators active in a single text string that is not being |
976 | * modified. |
977 | * |
978 | * A shallow clone operation must not fail except for truly exceptional conditions such |
979 | * as memory allocation failures. |
980 | * |
981 | * A UText and its clone may be safely concurrently accessed by separate threads. |
982 | * This is true for both shallow and deep clones. |
983 | * It is the responsibility of the Text Provider to ensure that this thread safety |
984 | * constraint is met. |
985 | |
986 | * |
987 | * @param dest A UText struct to be filled in with the result of the clone operation, |
988 | * or NULL if the clone function should heap-allocate a new UText struct. |
989 | * @param src The UText to be cloned. |
990 | * @param deep TRUE to request a deep clone, FALSE for a shallow clone. |
991 | * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR |
992 | * should be returned if the text provider is unable to clone the |
993 | * original text. |
994 | * @return The newly created clone, or NULL if the clone operation failed. |
995 | * |
996 | * @stable ICU 3.4 |
997 | */ |
998 | typedef UText * U_CALLCONV |
999 | UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status); |
1000 | |
1001 | |
1002 | /** |
1003 | * Function type declaration for UText.nativeLength(). |
1004 | * |
1005 | * @param ut the UText to get the length of. |
1006 | * @return the length, in the native units of the original text string. |
1007 | * @see UText |
1008 | * @stable ICU 3.4 |
1009 | */ |
1010 | typedef int64_t U_CALLCONV |
1011 | UTextNativeLength(UText *ut); |
1012 | |
1013 | /** |
1014 | * Function type declaration for UText.access(). Get the description of the text chunk |
1015 | * containing the text at a requested native index. The UText's iteration |
1016 | * position will be left at the requested index. If the index is out |
1017 | * of bounds, the iteration position will be left at the start or end |
1018 | * of the string, as appropriate. |
1019 | * |
1020 | * Chunks must begin and end on code point boundaries. A single code point |
1021 | * comprised of multiple storage units must never span a chunk boundary. |
1022 | * |
1023 | * |
1024 | * @param ut the UText being accessed. |
1025 | * @param nativeIndex Requested index of the text to be accessed. |
1026 | * @param forward If TRUE, then the returned chunk must contain text |
1027 | * starting from the index, so that start<=index<limit. |
1028 | * If FALSE, then the returned chunk must contain text |
1029 | * before the index, so that start<index<=limit. |
1030 | * @return True if the requested index could be accessed. The chunk |
1031 | * will contain the requested text. |
1032 | * False value if a chunk cannot be accessed |
1033 | * (the requested index is out of bounds). |
1034 | * |
1035 | * @see UText |
1036 | * @stable ICU 3.4 |
1037 | */ |
1038 | typedef UBool U_CALLCONV |
1039 | UTextAccess(UText *ut, int64_t nativeIndex, UBool forward); |
1040 | |
1041 | /** |
1042 | * Function type declaration for UText.extract(). |
1043 | * |
1044 | * Extract text from a UText into a UChar buffer. The range of text to be extracted |
1045 | * is specified in the native indices of the UText provider. These may not necessarily |
1046 | * be UTF-16 indices. |
1047 | * <p> |
1048 | * The size (number of 16 bit UChars) in the data to be extracted is returned. The |
1049 | * full amount is returned, even when the specified buffer size is smaller. |
1050 | * <p> |
1051 | * The extracted string will (if you are a user) / must (if you are a text provider) |
1052 | * be NUL-terminated if there is sufficient space in the destination buffer. |
1053 | * |
1054 | * @param ut the UText from which to extract data. |
1055 | * @param nativeStart the native index of the first characer to extract. |
1056 | * @param nativeLimit the native string index of the position following the last |
1057 | * character to extract. |
1058 | * @param dest the UChar (UTF-16) buffer into which the extracted text is placed |
1059 | * @param destCapacity The size, in UChars, of the destination buffer. May be zero |
1060 | * for precomputing the required size. |
1061 | * @param status receives any error status. |
1062 | * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for |
1063 | * preflighting. |
1064 | * @return Number of UChars in the data. Does not include a trailing NUL. |
1065 | * |
1066 | * @stable ICU 3.4 |
1067 | */ |
1068 | typedef int32_t U_CALLCONV |
1069 | (UText *ut, |
1070 | int64_t nativeStart, int64_t nativeLimit, |
1071 | UChar *dest, int32_t destCapacity, |
1072 | UErrorCode *status); |
1073 | |
1074 | /** |
1075 | * Function type declaration for UText.replace(). |
1076 | * |
1077 | * Replace a range of the original text with a replacement text. |
1078 | * |
1079 | * Leaves the current iteration position at the position following the |
1080 | * newly inserted replacement text. |
1081 | * |
1082 | * This function need only be implemented on UText types that support writing. |
1083 | * |
1084 | * When using this function, there should be only a single UText opened onto the |
1085 | * underlying native text string. The function is responsible for updating the |
1086 | * text chunk within the UText to reflect the updated iteration position, |
1087 | * taking into account any changes to the underlying string's structure caused |
1088 | * by the replace operation. |
1089 | * |
1090 | * @param ut the UText representing the text to be operated on. |
1091 | * @param nativeStart the index of the start of the region to be replaced |
1092 | * @param nativeLimit the index of the character following the region to be replaced. |
1093 | * @param replacementText pointer to the replacement text |
1094 | * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated. |
1095 | * @param status receives any error status. Possible errors include |
1096 | * U_NO_WRITE_PERMISSION |
1097 | * |
1098 | * @return The signed number of (native) storage units by which |
1099 | * the length of the text expanded or contracted. |
1100 | * |
1101 | * @stable ICU 3.4 |
1102 | */ |
1103 | typedef int32_t U_CALLCONV |
1104 | UTextReplace(UText *ut, |
1105 | int64_t nativeStart, int64_t nativeLimit, |
1106 | const UChar *replacementText, int32_t replacmentLength, |
1107 | UErrorCode *status); |
1108 | |
1109 | /** |
1110 | * Function type declaration for UText.copy(). |
1111 | * |
1112 | * Copy or move a substring from one position to another within the text, |
1113 | * while retaining any metadata associated with the text. |
1114 | * This function is used to duplicate or reorder substrings. |
1115 | * The destination index must not overlap the source range. |
1116 | * |
1117 | * The text to be copied or moved is inserted at destIndex; |
1118 | * it does not replace or overwrite any existing text. |
1119 | * |
1120 | * This function need only be implemented for UText types that support writing. |
1121 | * |
1122 | * When using this function, there should be only a single UText opened onto the |
1123 | * underlying native text string. The function is responsible for updating the |
1124 | * text chunk within the UText to reflect the updated iteration position, |
1125 | * taking into account any changes to the underlying string's structure caused |
1126 | * by the replace operation. |
1127 | * |
1128 | * @param ut The UText representing the text to be operated on. |
1129 | * @param nativeStart The index of the start of the region to be copied or moved |
1130 | * @param nativeLimit The index of the character following the region to be replaced. |
1131 | * @param nativeDest The destination index to which the source substring is copied or moved. |
1132 | * @param move If TRUE, then the substring is moved, not copied/duplicated. |
1133 | * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION |
1134 | * |
1135 | * @stable ICU 3.4 |
1136 | */ |
1137 | typedef void U_CALLCONV |
1138 | UTextCopy(UText *ut, |
1139 | int64_t nativeStart, int64_t nativeLimit, |
1140 | int64_t nativeDest, |
1141 | UBool move, |
1142 | UErrorCode *status); |
1143 | |
1144 | /** |
1145 | * Function type declaration for UText.mapOffsetToNative(). |
1146 | * Map from the current UChar offset within the current text chunk to |
1147 | * the corresponding native index in the original source text. |
1148 | * |
1149 | * This is required only for text providers that do not use native UTF-16 indexes. |
1150 | * |
1151 | * @param ut the UText. |
1152 | * @return Absolute (native) index corresponding to chunkOffset in the current chunk. |
1153 | * The returned native index should always be to a code point boundary. |
1154 | * |
1155 | * @stable ICU 3.4 |
1156 | */ |
1157 | typedef int64_t U_CALLCONV |
1158 | UTextMapOffsetToNative(const UText *ut); |
1159 | |
1160 | /** |
1161 | * Function type declaration for UText.mapIndexToUTF16(). |
1162 | * Map from a native index to a UChar offset within a text chunk. |
1163 | * Behavior is undefined if the native index does not fall within the |
1164 | * current chunk. |
1165 | * |
1166 | * This function is required only for text providers that do not use native UTF-16 indexes. |
1167 | * |
1168 | * @param ut The UText containing the text chunk. |
1169 | * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit. |
1170 | * @return Chunk-relative UTF-16 offset corresponding to the specified native |
1171 | * index. |
1172 | * |
1173 | * @stable ICU 3.4 |
1174 | */ |
1175 | typedef int32_t U_CALLCONV |
1176 | UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex); |
1177 | |
1178 | |
1179 | /** |
1180 | * Function type declaration for UText.utextClose(). |
1181 | * |
1182 | * A Text Provider close function is only required for provider types that make |
1183 | * allocations in their open function (or other functions) that must be |
1184 | * cleaned when the UText is closed. |
1185 | * |
1186 | * The allocation of the UText struct itself and any "extra" storage |
1187 | * associated with the UText is handled by the common UText implementation |
1188 | * and does not require provider specific cleanup in a close function. |
1189 | * |
1190 | * Most UText provider implementations do not need to implement this function. |
1191 | * |
1192 | * @param ut A UText object to be closed. |
1193 | * |
1194 | * @stable ICU 3.4 |
1195 | */ |
1196 | typedef void U_CALLCONV |
1197 | UTextClose(UText *ut); |
1198 | |
1199 | |
1200 | /** |
1201 | * (public) Function dispatch table for UText. |
1202 | * Conceptually very much like a C++ Virtual Function Table. |
1203 | * This struct defines the organization of the table. |
1204 | * Each text provider implementation must provide an |
1205 | * actual table that is initialized with the appropriate functions |
1206 | * for the type of text being handled. |
1207 | * @stable ICU 3.6 |
1208 | */ |
1209 | struct UTextFuncs { |
1210 | /** |
1211 | * (public) Function table size, sizeof(UTextFuncs) |
1212 | * Intended for use should the table grow to accomodate added |
1213 | * functions in the future, to allow tests for older format |
1214 | * function tables that do not contain the extensions. |
1215 | * |
1216 | * Fields are placed for optimal alignment on |
1217 | * 32/64/128-bit-pointer machines, by normally grouping together |
1218 | * 4 32-bit fields, |
1219 | * 4 pointers, |
1220 | * 2 64-bit fields |
1221 | * in sequence. |
1222 | * @stable ICU 3.6 |
1223 | */ |
1224 | int32_t tableSize; |
1225 | |
1226 | /** |
1227 | * (private) Alignment padding. |
1228 | * Do not use, reserved for use by the UText framework only. |
1229 | * @internal |
1230 | */ |
1231 | int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3; |
1232 | |
1233 | |
1234 | /** |
1235 | * (public) Function pointer for UTextClone |
1236 | * |
1237 | * @see UTextClone |
1238 | * @stable ICU 3.6 |
1239 | */ |
1240 | UTextClone *clone; |
1241 | |
1242 | /** |
1243 | * (public) function pointer for UTextLength |
1244 | * May be expensive to compute! |
1245 | * |
1246 | * @see UTextLength |
1247 | * @stable ICU 3.6 |
1248 | */ |
1249 | UTextNativeLength *nativeLength; |
1250 | |
1251 | /** |
1252 | * (public) Function pointer for UTextAccess. |
1253 | * |
1254 | * @see UTextAccess |
1255 | * @stable ICU 3.6 |
1256 | */ |
1257 | UTextAccess *access; |
1258 | |
1259 | /** |
1260 | * (public) Function pointer for UTextExtract. |
1261 | * |
1262 | * @see UTextExtract |
1263 | * @stable ICU 3.6 |
1264 | */ |
1265 | UTextExtract *; |
1266 | |
1267 | /** |
1268 | * (public) Function pointer for UTextReplace. |
1269 | * |
1270 | * @see UTextReplace |
1271 | * @stable ICU 3.6 |
1272 | */ |
1273 | UTextReplace *replace; |
1274 | |
1275 | /** |
1276 | * (public) Function pointer for UTextCopy. |
1277 | * |
1278 | * @see UTextCopy |
1279 | * @stable ICU 3.6 |
1280 | */ |
1281 | UTextCopy *copy; |
1282 | |
1283 | /** |
1284 | * (public) Function pointer for UTextMapOffsetToNative. |
1285 | * |
1286 | * @see UTextMapOffsetToNative |
1287 | * @stable ICU 3.6 |
1288 | */ |
1289 | UTextMapOffsetToNative *mapOffsetToNative; |
1290 | |
1291 | /** |
1292 | * (public) Function pointer for UTextMapNativeIndexToUTF16. |
1293 | * |
1294 | * @see UTextMapNativeIndexToUTF16 |
1295 | * @stable ICU 3.6 |
1296 | */ |
1297 | UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16; |
1298 | |
1299 | /** |
1300 | * (public) Function pointer for UTextClose. |
1301 | * |
1302 | * @see UTextClose |
1303 | * @stable ICU 3.6 |
1304 | */ |
1305 | UTextClose *close; |
1306 | |
1307 | /** |
1308 | * (private) Spare function pointer |
1309 | * @internal |
1310 | */ |
1311 | UTextClose *spare1; |
1312 | |
1313 | /** |
1314 | * (private) Spare function pointer |
1315 | * @internal |
1316 | */ |
1317 | UTextClose *spare2; |
1318 | |
1319 | /** |
1320 | * (private) Spare function pointer |
1321 | * @internal |
1322 | */ |
1323 | UTextClose *spare3; |
1324 | |
1325 | }; |
1326 | /** |
1327 | * Function dispatch table for UText |
1328 | * @see UTextFuncs |
1329 | */ |
1330 | typedef struct UTextFuncs UTextFuncs; |
1331 | |
1332 | /** |
1333 | * UText struct. Provides the interface between the generic UText access code |
1334 | * and the UText provider code that works on specific kinds of |
1335 | * text (UTF-8, noncontiguous UTF-16, whatever.) |
1336 | * |
1337 | * Applications that are using predefined types of text providers |
1338 | * to pass text data to ICU services will have no need to view the |
1339 | * internals of the UText structs that they open. |
1340 | * |
1341 | * @stable ICU 3.6 |
1342 | */ |
1343 | struct UText { |
1344 | /** |
1345 | * (private) Magic. Used to help detect when UText functions are handed |
1346 | * invalid or unitialized UText structs. |
1347 | * utext_openXYZ() functions take an initialized, |
1348 | * but not necessarily open, UText struct as an |
1349 | * optional fill-in parameter. This magic field |
1350 | * is used to check for that initialization. |
1351 | * Text provider close functions must NOT clear |
1352 | * the magic field because that would prevent |
1353 | * reuse of the UText struct. |
1354 | * @internal |
1355 | */ |
1356 | uint32_t magic; |
1357 | |
1358 | |
1359 | /** |
1360 | * (private) Flags for managing the allocation and freeing of |
1361 | * memory associated with this UText. |
1362 | * @internal |
1363 | */ |
1364 | int32_t flags; |
1365 | |
1366 | |
1367 | /** |
1368 | * Text provider properties. This set of flags is maintainted by the |
1369 | * text provider implementation. |
1370 | * @stable ICU 3.4 |
1371 | */ |
1372 | int32_t providerProperties; |
1373 | |
1374 | /** |
1375 | * (public) sizeOfStruct=sizeof(UText) |
1376 | * Allows possible backward compatible extension. |
1377 | * |
1378 | * @stable ICU 3.4 |
1379 | */ |
1380 | int32_t sizeOfStruct; |
1381 | |
1382 | /* ------ 16 byte alignment boundary ----------- */ |
1383 | |
1384 | |
1385 | /** |
1386 | * (protected) Native index of the first character position following |
1387 | * the current chunk. |
1388 | * @stable ICU 3.6 |
1389 | */ |
1390 | int64_t chunkNativeLimit; |
1391 | |
1392 | /** |
1393 | * (protected) Size in bytes of the extra space (pExtra). |
1394 | * @stable ICU 3.4 |
1395 | */ |
1396 | int32_t ; |
1397 | |
1398 | /** |
1399 | * (protected) The highest chunk offset where native indexing and |
1400 | * chunk (UTF-16) indexing correspond. For UTF-16 sources, value |
1401 | * will be equal to chunkLength. |
1402 | * |
1403 | * @stable ICU 3.6 |
1404 | */ |
1405 | int32_t nativeIndexingLimit; |
1406 | |
1407 | /* ---- 16 byte alignment boundary------ */ |
1408 | |
1409 | /** |
1410 | * (protected) Native index of the first character in the text chunk. |
1411 | * @stable ICU 3.6 |
1412 | */ |
1413 | int64_t chunkNativeStart; |
1414 | |
1415 | /** |
1416 | * (protected) Current iteration position within the text chunk (UTF-16 buffer). |
1417 | * This is the index to the character that will be returned by utext_next32(). |
1418 | * @stable ICU 3.6 |
1419 | */ |
1420 | int32_t chunkOffset; |
1421 | |
1422 | /** |
1423 | * (protected) Length the text chunk (UTF-16 buffer), in UChars. |
1424 | * @stable ICU 3.6 |
1425 | */ |
1426 | int32_t chunkLength; |
1427 | |
1428 | /* ---- 16 byte alignment boundary-- */ |
1429 | |
1430 | |
1431 | /** |
1432 | * (protected) pointer to a chunk of text in UTF-16 format. |
1433 | * May refer either to original storage of the source of the text, or |
1434 | * if conversion was required, to a buffer owned by the UText. |
1435 | * @stable ICU 3.6 |
1436 | */ |
1437 | const UChar *chunkContents; |
1438 | |
1439 | /** |
1440 | * (public) Pointer to Dispatch table for accessing functions for this UText. |
1441 | * @stable ICU 3.6 |
1442 | */ |
1443 | const UTextFuncs *pFuncs; |
1444 | |
1445 | /** |
1446 | * (protected) Pointer to additional space requested by the |
1447 | * text provider during the utext_open operation. |
1448 | * @stable ICU 3.4 |
1449 | */ |
1450 | void *; |
1451 | |
1452 | /** |
1453 | * (protected) Pointer to string or text-containin object or similar. |
1454 | * This is the source of the text that this UText is wrapping, in a format |
1455 | * that is known to the text provider functions. |
1456 | * @stable ICU 3.4 |
1457 | */ |
1458 | const void *context; |
1459 | |
1460 | /* --- 16 byte alignment boundary--- */ |
1461 | |
1462 | /** |
1463 | * (protected) Pointer fields available for use by the text provider. |
1464 | * Not used by UText common code. |
1465 | * @stable ICU 3.6 |
1466 | */ |
1467 | const void *p; |
1468 | /** |
1469 | * (protected) Pointer fields available for use by the text provider. |
1470 | * Not used by UText common code. |
1471 | * @stable ICU 3.6 |
1472 | */ |
1473 | const void *q; |
1474 | /** |
1475 | * (protected) Pointer fields available for use by the text provider. |
1476 | * Not used by UText common code. |
1477 | * @stable ICU 3.6 |
1478 | */ |
1479 | const void *r; |
1480 | |
1481 | /** |
1482 | * Private field reserved for future use by the UText framework |
1483 | * itself. This is not to be touched by the text providers. |
1484 | * @internal ICU 3.4 |
1485 | */ |
1486 | void *privP; |
1487 | |
1488 | |
1489 | /* --- 16 byte alignment boundary--- */ |
1490 | |
1491 | |
1492 | /** |
1493 | * (protected) Integer field reserved for use by the text provider. |
1494 | * Not used by the UText framework, or by the client (user) of the UText. |
1495 | * @stable ICU 3.4 |
1496 | */ |
1497 | int64_t a; |
1498 | |
1499 | /** |
1500 | * (protected) Integer field reserved for use by the text provider. |
1501 | * Not used by the UText framework, or by the client (user) of the UText. |
1502 | * @stable ICU 3.4 |
1503 | */ |
1504 | int32_t b; |
1505 | |
1506 | /** |
1507 | * (protected) Integer field reserved for use by the text provider. |
1508 | * Not used by the UText framework, or by the client (user) of the UText. |
1509 | * @stable ICU 3.4 |
1510 | */ |
1511 | int32_t c; |
1512 | |
1513 | /* ---- 16 byte alignment boundary---- */ |
1514 | |
1515 | |
1516 | /** |
1517 | * Private field reserved for future use by the UText framework |
1518 | * itself. This is not to be touched by the text providers. |
1519 | * @internal ICU 3.4 |
1520 | */ |
1521 | int64_t privA; |
1522 | /** |
1523 | * Private field reserved for future use by the UText framework |
1524 | * itself. This is not to be touched by the text providers. |
1525 | * @internal ICU 3.4 |
1526 | */ |
1527 | int32_t privB; |
1528 | /** |
1529 | * Private field reserved for future use by the UText framework |
1530 | * itself. This is not to be touched by the text providers. |
1531 | * @internal ICU 3.4 |
1532 | */ |
1533 | int32_t privC; |
1534 | }; |
1535 | |
1536 | |
1537 | /** |
1538 | * Common function for use by Text Provider implementations to allocate and/or initialize |
1539 | * a new UText struct. To be called in the implementation of utext_open() functions. |
1540 | * If the supplied UText parameter is null, a new UText struct will be allocated on the heap. |
1541 | * If the supplied UText is already open, the provider's close function will be called |
1542 | * so that the struct can be reused by the open that is in progress. |
1543 | * |
1544 | * @param ut pointer to a UText struct to be re-used, or null if a new UText |
1545 | * should be allocated. |
1546 | * @param extraSpace The amount of additional space to be allocated as part |
1547 | * of this UText, for use by types of providers that require |
1548 | * additional storage. |
1549 | * @param status Errors are returned here. |
1550 | * @return pointer to the UText, allocated if necessary, with extra space set up if requested. |
1551 | * @stable ICU 3.4 |
1552 | */ |
1553 | U_STABLE UText * U_EXPORT2 |
1554 | utext_setup(UText *ut, int32_t , UErrorCode *status); |
1555 | |
1556 | #ifndef U_HIDE_INTERNAL_API |
1557 | /** |
1558 | * @internal |
1559 | * Value used to help identify correctly initialized UText structs. |
1560 | * Note: must be publicly visible so that UTEXT_INITIALIZER can access it. |
1561 | */ |
1562 | enum { |
1563 | UTEXT_MAGIC = 0x345ad82c |
1564 | }; |
1565 | #endif /* U_HIDE_INTERNAL_API */ |
1566 | |
1567 | /** |
1568 | * initializer to be used with local (stack) instances of a UText |
1569 | * struct. UText structs must be initialized before passing |
1570 | * them to one of the utext_open functions. |
1571 | * |
1572 | * @stable ICU 3.6 |
1573 | */ |
1574 | #define UTEXT_INITIALIZER { \ |
1575 | UTEXT_MAGIC, /* magic */ \ |
1576 | 0, /* flags */ \ |
1577 | 0, /* providerProps */ \ |
1578 | sizeof(UText), /* sizeOfStruct */ \ |
1579 | 0, /* chunkNativeLimit */ \ |
1580 | 0, /* extraSize */ \ |
1581 | 0, /* nativeIndexingLimit */ \ |
1582 | 0, /* chunkNativeStart */ \ |
1583 | 0, /* chunkOffset */ \ |
1584 | 0, /* chunkLength */ \ |
1585 | NULL, /* chunkContents */ \ |
1586 | NULL, /* pFuncs */ \ |
1587 | NULL, /* pExtra */ \ |
1588 | NULL, /* context */ \ |
1589 | NULL, NULL, NULL, /* p, q, r */ \ |
1590 | NULL, /* privP */ \ |
1591 | 0, 0, 0, /* a, b, c */ \ |
1592 | 0, 0, 0 /* privA,B,C, */ \ |
1593 | } |
1594 | |
1595 | |
1596 | U_CDECL_END |
1597 | |
1598 | |
1599 | |
1600 | #endif |
1601 | |