1 | /* |
2 | ******************************************************************************* |
3 | * |
4 | * Copyright (C) 2002-2011 International Business Machines |
5 | * Corporation and others. All Rights Reserved. |
6 | * |
7 | ******************************************************************************* |
8 | * file name: uiter.h |
9 | * encoding: US-ASCII |
10 | * tab size: 8 (not used) |
11 | * indentation:4 |
12 | * |
13 | * created on: 2002jan18 |
14 | * created by: Markus W. Scherer |
15 | */ |
16 | |
17 | #ifndef __UITER_H__ |
18 | #define __UITER_H__ |
19 | |
20 | /** |
21 | * \file |
22 | * \brief C API: Unicode Character Iteration |
23 | * |
24 | * @see UCharIterator |
25 | */ |
26 | |
27 | #include "unicode/utypes.h" |
28 | |
29 | #if U_SHOW_CPLUSPLUS_API |
30 | U_NAMESPACE_BEGIN |
31 | |
32 | class CharacterIterator; |
33 | class Replaceable; |
34 | |
35 | U_NAMESPACE_END |
36 | #endif |
37 | |
38 | U_CDECL_BEGIN |
39 | |
40 | struct UCharIterator; |
41 | typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ |
42 | |
43 | /** |
44 | * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). |
45 | * @see UCharIteratorMove |
46 | * @see UCharIterator |
47 | * @stable ICU 2.1 |
48 | */ |
49 | typedef enum UCharIteratorOrigin { |
50 | UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH |
51 | } UCharIteratorOrigin; |
52 | |
53 | /** Constants for UCharIterator. @stable ICU 2.6 */ |
54 | enum { |
55 | /** |
56 | * Constant value that may be returned by UCharIteratorMove |
57 | * indicating that the final UTF-16 index is not known, but that the move succeeded. |
58 | * This can occur when moving relative to limit or length, or |
59 | * when moving relative to the current index after a setState() |
60 | * when the current UTF-16 index is not known. |
61 | * |
62 | * It would be very inefficient to have to count from the beginning of the text |
63 | * just to get the current/limit/length index after moving relative to it. |
64 | * The actual index can be determined with getIndex(UITER_CURRENT) |
65 | * which will count the UChars if necessary. |
66 | * |
67 | * @stable ICU 2.6 |
68 | */ |
69 | UITER_UNKNOWN_INDEX=-2 |
70 | }; |
71 | |
72 | |
73 | /** |
74 | * Constant for UCharIterator getState() indicating an error or |
75 | * an unknown state. |
76 | * Returned by uiter_getState()/UCharIteratorGetState |
77 | * when an error occurs. |
78 | * Also, some UCharIterator implementations may not be able to return |
79 | * a valid state for each position. This will be clearly documented |
80 | * for each such iterator (none of the public ones here). |
81 | * |
82 | * @stable ICU 2.6 |
83 | */ |
84 | #define UITER_NO_STATE ((uint32_t)0xffffffff) |
85 | |
86 | /** |
87 | * Function type declaration for UCharIterator.getIndex(). |
88 | * |
89 | * Gets the current position, or the start or limit of the |
90 | * iteration range. |
91 | * |
92 | * This function may perform slowly for UITER_CURRENT after setState() was called, |
93 | * or for UITER_LENGTH, because an iterator implementation may have to count |
94 | * UChars if the underlying storage is not UTF-16. |
95 | * |
96 | * @param iter the UCharIterator structure ("this pointer") |
97 | * @param origin get the 0, start, limit, length, or current index |
98 | * @return the requested index, or U_SENTINEL in an error condition |
99 | * |
100 | * @see UCharIteratorOrigin |
101 | * @see UCharIterator |
102 | * @stable ICU 2.1 |
103 | */ |
104 | typedef int32_t U_CALLCONV |
105 | UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); |
106 | |
107 | /** |
108 | * Function type declaration for UCharIterator.move(). |
109 | * |
110 | * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). |
111 | * |
112 | * Moves the current position relative to the start or limit of the |
113 | * iteration range, or relative to the current position itself. |
114 | * The movement is expressed in numbers of code units forward |
115 | * or backward by specifying a positive or negative delta. |
116 | * Out of bounds movement will be pinned to the start or limit. |
117 | * |
118 | * This function may perform slowly for moving relative to UITER_LENGTH |
119 | * because an iterator implementation may have to count the rest of the |
120 | * UChars if the native storage is not UTF-16. |
121 | * |
122 | * When moving relative to the limit or length, or |
123 | * relative to the current position after setState() was called, |
124 | * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient |
125 | * determination of the actual UTF-16 index. |
126 | * The actual index can be determined with getIndex(UITER_CURRENT) |
127 | * which will count the UChars if necessary. |
128 | * See UITER_UNKNOWN_INDEX for details. |
129 | * |
130 | * @param iter the UCharIterator structure ("this pointer") |
131 | * @param delta can be positive, zero, or negative |
132 | * @param origin move relative to the 0, start, limit, length, or current index |
133 | * @return the new index, or U_SENTINEL on an error condition, |
134 | * or UITER_UNKNOWN_INDEX when the index is not known. |
135 | * |
136 | * @see UCharIteratorOrigin |
137 | * @see UCharIterator |
138 | * @see UITER_UNKNOWN_INDEX |
139 | * @stable ICU 2.1 |
140 | */ |
141 | typedef int32_t U_CALLCONV |
142 | UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); |
143 | |
144 | /** |
145 | * Function type declaration for UCharIterator.hasNext(). |
146 | * |
147 | * Check if current() and next() can still |
148 | * return another code unit. |
149 | * |
150 | * @param iter the UCharIterator structure ("this pointer") |
151 | * @return boolean value for whether current() and next() can still return another code unit |
152 | * |
153 | * @see UCharIterator |
154 | * @stable ICU 2.1 |
155 | */ |
156 | typedef UBool U_CALLCONV |
157 | UCharIteratorHasNext(UCharIterator *iter); |
158 | |
159 | /** |
160 | * Function type declaration for UCharIterator.hasPrevious(). |
161 | * |
162 | * Check if previous() can still return another code unit. |
163 | * |
164 | * @param iter the UCharIterator structure ("this pointer") |
165 | * @return boolean value for whether previous() can still return another code unit |
166 | * |
167 | * @see UCharIterator |
168 | * @stable ICU 2.1 |
169 | */ |
170 | typedef UBool U_CALLCONV |
171 | UCharIteratorHasPrevious(UCharIterator *iter); |
172 | |
173 | /** |
174 | * Function type declaration for UCharIterator.current(). |
175 | * |
176 | * Return the code unit at the current position, |
177 | * or U_SENTINEL if there is none (index is at the limit). |
178 | * |
179 | * @param iter the UCharIterator structure ("this pointer") |
180 | * @return the current code unit |
181 | * |
182 | * @see UCharIterator |
183 | * @stable ICU 2.1 |
184 | */ |
185 | typedef UChar32 U_CALLCONV |
186 | UCharIteratorCurrent(UCharIterator *iter); |
187 | |
188 | /** |
189 | * Function type declaration for UCharIterator.next(). |
190 | * |
191 | * Return the code unit at the current index and increment |
192 | * the index (post-increment, like s[i++]), |
193 | * or return U_SENTINEL if there is none (index is at the limit). |
194 | * |
195 | * @param iter the UCharIterator structure ("this pointer") |
196 | * @return the current code unit (and post-increment the current index) |
197 | * |
198 | * @see UCharIterator |
199 | * @stable ICU 2.1 |
200 | */ |
201 | typedef UChar32 U_CALLCONV |
202 | UCharIteratorNext(UCharIterator *iter); |
203 | |
204 | /** |
205 | * Function type declaration for UCharIterator.previous(). |
206 | * |
207 | * Decrement the index and return the code unit from there |
208 | * (pre-decrement, like s[--i]), |
209 | * or return U_SENTINEL if there is none (index is at the start). |
210 | * |
211 | * @param iter the UCharIterator structure ("this pointer") |
212 | * @return the previous code unit (after pre-decrementing the current index) |
213 | * |
214 | * @see UCharIterator |
215 | * @stable ICU 2.1 |
216 | */ |
217 | typedef UChar32 U_CALLCONV |
218 | UCharIteratorPrevious(UCharIterator *iter); |
219 | |
220 | /** |
221 | * Function type declaration for UCharIterator.reservedFn(). |
222 | * Reserved for future use. |
223 | * |
224 | * @param iter the UCharIterator structure ("this pointer") |
225 | * @param something some integer argument |
226 | * @return some integer |
227 | * |
228 | * @see UCharIterator |
229 | * @stable ICU 2.1 |
230 | */ |
231 | typedef int32_t U_CALLCONV |
232 | UCharIteratorReserved(UCharIterator *iter, int32_t something); |
233 | |
234 | /** |
235 | * Function type declaration for UCharIterator.getState(). |
236 | * |
237 | * Get the "state" of the iterator in the form of a single 32-bit word. |
238 | * It is recommended that the state value be calculated to be as small as |
239 | * is feasible. For strings with limited lengths, fewer than 32 bits may |
240 | * be sufficient. |
241 | * |
242 | * This is used together with setState()/UCharIteratorSetState |
243 | * to save and restore the iterator position more efficiently than with |
244 | * getIndex()/move(). |
245 | * |
246 | * The iterator state is defined as a uint32_t value because it is designed |
247 | * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state |
248 | * of the character iterator. |
249 | * |
250 | * With some UCharIterator implementations (e.g., UTF-8), |
251 | * getting and setting the UTF-16 index with existing functions |
252 | * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but |
253 | * relatively slow because the iterator has to "walk" from a known index |
254 | * to the requested one. |
255 | * This takes more time the farther it needs to go. |
256 | * |
257 | * An opaque state value allows an iterator implementation to provide |
258 | * an internal index (UTF-8: the source byte array index) for |
259 | * fast, constant-time restoration. |
260 | * |
261 | * After calling setState(), a getIndex(UITER_CURRENT) may be slow because |
262 | * the UTF-16 index may not be restored as well, but the iterator can deliver |
263 | * the correct text contents and move relative to the current position |
264 | * without performance degradation. |
265 | * |
266 | * Some UCharIterator implementations may not be able to return |
267 | * a valid state for each position, in which case they return UITER_NO_STATE instead. |
268 | * This will be clearly documented for each such iterator (none of the public ones here). |
269 | * |
270 | * @param iter the UCharIterator structure ("this pointer") |
271 | * @return the state word |
272 | * |
273 | * @see UCharIterator |
274 | * @see UCharIteratorSetState |
275 | * @see UITER_NO_STATE |
276 | * @stable ICU 2.6 |
277 | */ |
278 | typedef uint32_t U_CALLCONV |
279 | UCharIteratorGetState(const UCharIterator *iter); |
280 | |
281 | /** |
282 | * Function type declaration for UCharIterator.setState(). |
283 | * |
284 | * Restore the "state" of the iterator using a state word from a getState() call. |
285 | * The iterator object need not be the same one as for which getState() was called, |
286 | * but it must be of the same type (set up using the same uiter_setXYZ function) |
287 | * and it must iterate over the same string |
288 | * (binary identical regardless of memory address). |
289 | * For more about the state word see UCharIteratorGetState. |
290 | * |
291 | * After calling setState(), a getIndex(UITER_CURRENT) may be slow because |
292 | * the UTF-16 index may not be restored as well, but the iterator can deliver |
293 | * the correct text contents and move relative to the current position |
294 | * without performance degradation. |
295 | * |
296 | * @param iter the UCharIterator structure ("this pointer") |
297 | * @param state the state word from a getState() call |
298 | * on a same-type, same-string iterator |
299 | * @param pErrorCode Must be a valid pointer to an error code value, |
300 | * which must not indicate a failure before the function call. |
301 | * |
302 | * @see UCharIterator |
303 | * @see UCharIteratorGetState |
304 | * @stable ICU 2.6 |
305 | */ |
306 | typedef void U_CALLCONV |
307 | UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); |
308 | |
309 | |
310 | /** |
311 | * C API for code unit iteration. |
312 | * This can be used as a C wrapper around |
313 | * CharacterIterator, Replaceable, or implemented using simple strings, etc. |
314 | * |
315 | * There are two roles for using UCharIterator: |
316 | * |
317 | * A "provider" sets the necessary function pointers and controls the "protected" |
318 | * fields of the UCharIterator structure. A "provider" passes a UCharIterator |
319 | * into C APIs that need a UCharIterator as an abstract, flexible string interface. |
320 | * |
321 | * Implementations of such C APIs are "callers" of UCharIterator functions; |
322 | * they only use the "public" function pointers and never access the "protected" |
323 | * fields directly. |
324 | * |
325 | * The current() and next() functions only check the current index against the |
326 | * limit, and previous() only checks the current index against the start, |
327 | * to see if the iterator already reached the end of the iteration range. |
328 | * |
329 | * The assumption - in all iterators - is that the index is moved via the API, |
330 | * which means it won't go out of bounds, or the index is modified by |
331 | * user code that knows enough about the iterator implementation to set valid |
332 | * index values. |
333 | * |
334 | * UCharIterator functions return code unit values 0..0xffff, |
335 | * or U_SENTINEL if the iteration bounds are reached. |
336 | * |
337 | * @stable ICU 2.1 |
338 | */ |
339 | struct UCharIterator { |
340 | /** |
341 | * (protected) Pointer to string or wrapped object or similar. |
342 | * Not used by caller. |
343 | * @stable ICU 2.1 |
344 | */ |
345 | const void *context; |
346 | |
347 | /** |
348 | * (protected) Length of string or similar. |
349 | * Not used by caller. |
350 | * @stable ICU 2.1 |
351 | */ |
352 | int32_t length; |
353 | |
354 | /** |
355 | * (protected) Start index or similar. |
356 | * Not used by caller. |
357 | * @stable ICU 2.1 |
358 | */ |
359 | int32_t start; |
360 | |
361 | /** |
362 | * (protected) Current index or similar. |
363 | * Not used by caller. |
364 | * @stable ICU 2.1 |
365 | */ |
366 | int32_t index; |
367 | |
368 | /** |
369 | * (protected) Limit index or similar. |
370 | * Not used by caller. |
371 | * @stable ICU 2.1 |
372 | */ |
373 | int32_t limit; |
374 | |
375 | /** |
376 | * (protected) Used by UTF-8 iterators and possibly others. |
377 | * @stable ICU 2.1 |
378 | */ |
379 | int32_t reservedField; |
380 | |
381 | /** |
382 | * (public) Returns the current position or the |
383 | * start or limit index of the iteration range. |
384 | * |
385 | * @see UCharIteratorGetIndex |
386 | * @stable ICU 2.1 |
387 | */ |
388 | UCharIteratorGetIndex *getIndex; |
389 | |
390 | /** |
391 | * (public) Moves the current position relative to the start or limit of the |
392 | * iteration range, or relative to the current position itself. |
393 | * The movement is expressed in numbers of code units forward |
394 | * or backward by specifying a positive or negative delta. |
395 | * |
396 | * @see UCharIteratorMove |
397 | * @stable ICU 2.1 |
398 | */ |
399 | UCharIteratorMove *move; |
400 | |
401 | /** |
402 | * (public) Check if current() and next() can still |
403 | * return another code unit. |
404 | * |
405 | * @see UCharIteratorHasNext |
406 | * @stable ICU 2.1 |
407 | */ |
408 | UCharIteratorHasNext *hasNext; |
409 | |
410 | /** |
411 | * (public) Check if previous() can still return another code unit. |
412 | * |
413 | * @see UCharIteratorHasPrevious |
414 | * @stable ICU 2.1 |
415 | */ |
416 | UCharIteratorHasPrevious *hasPrevious; |
417 | |
418 | /** |
419 | * (public) Return the code unit at the current position, |
420 | * or U_SENTINEL if there is none (index is at the limit). |
421 | * |
422 | * @see UCharIteratorCurrent |
423 | * @stable ICU 2.1 |
424 | */ |
425 | UCharIteratorCurrent *current; |
426 | |
427 | /** |
428 | * (public) Return the code unit at the current index and increment |
429 | * the index (post-increment, like s[i++]), |
430 | * or return U_SENTINEL if there is none (index is at the limit). |
431 | * |
432 | * @see UCharIteratorNext |
433 | * @stable ICU 2.1 |
434 | */ |
435 | UCharIteratorNext *next; |
436 | |
437 | /** |
438 | * (public) Decrement the index and return the code unit from there |
439 | * (pre-decrement, like s[--i]), |
440 | * or return U_SENTINEL if there is none (index is at the start). |
441 | * |
442 | * @see UCharIteratorPrevious |
443 | * @stable ICU 2.1 |
444 | */ |
445 | UCharIteratorPrevious *previous; |
446 | |
447 | /** |
448 | * (public) Reserved for future use. Currently NULL. |
449 | * |
450 | * @see UCharIteratorReserved |
451 | * @stable ICU 2.1 |
452 | */ |
453 | UCharIteratorReserved *reservedFn; |
454 | |
455 | /** |
456 | * (public) Return the state of the iterator, to be restored later with setState(). |
457 | * This function pointer is NULL if the iterator does not implement it. |
458 | * |
459 | * @see UCharIteratorGet |
460 | * @stable ICU 2.6 |
461 | */ |
462 | UCharIteratorGetState *getState; |
463 | |
464 | /** |
465 | * (public) Restore the iterator state from the state word from a call |
466 | * to getState(). |
467 | * This function pointer is NULL if the iterator does not implement it. |
468 | * |
469 | * @see UCharIteratorSet |
470 | * @stable ICU 2.6 |
471 | */ |
472 | UCharIteratorSetState *setState; |
473 | }; |
474 | |
475 | /** |
476 | * Helper function for UCharIterator to get the code point |
477 | * at the current index. |
478 | * |
479 | * Return the code point that includes the code unit at the current position, |
480 | * or U_SENTINEL if there is none (index is at the limit). |
481 | * If the current code unit is a lead or trail surrogate, |
482 | * then the following or preceding surrogate is used to form |
483 | * the code point value. |
484 | * |
485 | * @param iter the UCharIterator structure ("this pointer") |
486 | * @return the current code point |
487 | * |
488 | * @see UCharIterator |
489 | * @see U16_GET |
490 | * @see UnicodeString::char32At() |
491 | * @stable ICU 2.1 |
492 | */ |
493 | U_STABLE UChar32 U_EXPORT2 |
494 | uiter_current32(UCharIterator *iter); |
495 | |
496 | /** |
497 | * Helper function for UCharIterator to get the next code point. |
498 | * |
499 | * Return the code point at the current index and increment |
500 | * the index (post-increment, like s[i++]), |
501 | * or return U_SENTINEL if there is none (index is at the limit). |
502 | * |
503 | * @param iter the UCharIterator structure ("this pointer") |
504 | * @return the current code point (and post-increment the current index) |
505 | * |
506 | * @see UCharIterator |
507 | * @see U16_NEXT |
508 | * @stable ICU 2.1 |
509 | */ |
510 | U_STABLE UChar32 U_EXPORT2 |
511 | uiter_next32(UCharIterator *iter); |
512 | |
513 | /** |
514 | * Helper function for UCharIterator to get the previous code point. |
515 | * |
516 | * Decrement the index and return the code point from there |
517 | * (pre-decrement, like s[--i]), |
518 | * or return U_SENTINEL if there is none (index is at the start). |
519 | * |
520 | * @param iter the UCharIterator structure ("this pointer") |
521 | * @return the previous code point (after pre-decrementing the current index) |
522 | * |
523 | * @see UCharIterator |
524 | * @see U16_PREV |
525 | * @stable ICU 2.1 |
526 | */ |
527 | U_STABLE UChar32 U_EXPORT2 |
528 | uiter_previous32(UCharIterator *iter); |
529 | |
530 | /** |
531 | * Get the "state" of the iterator in the form of a single 32-bit word. |
532 | * This is a convenience function that calls iter->getState(iter) |
533 | * if iter->getState is not NULL; |
534 | * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. |
535 | * |
536 | * Some UCharIterator implementations may not be able to return |
537 | * a valid state for each position, in which case they return UITER_NO_STATE instead. |
538 | * This will be clearly documented for each such iterator (none of the public ones here). |
539 | * |
540 | * @param iter the UCharIterator structure ("this pointer") |
541 | * @return the state word |
542 | * |
543 | * @see UCharIterator |
544 | * @see UCharIteratorGetState |
545 | * @see UITER_NO_STATE |
546 | * @stable ICU 2.6 |
547 | */ |
548 | U_STABLE uint32_t U_EXPORT2 |
549 | uiter_getState(const UCharIterator *iter); |
550 | |
551 | /** |
552 | * Restore the "state" of the iterator using a state word from a getState() call. |
553 | * This is a convenience function that calls iter->setState(iter, state, pErrorCode) |
554 | * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. |
555 | * |
556 | * @param iter the UCharIterator structure ("this pointer") |
557 | * @param state the state word from a getState() call |
558 | * on a same-type, same-string iterator |
559 | * @param pErrorCode Must be a valid pointer to an error code value, |
560 | * which must not indicate a failure before the function call. |
561 | * |
562 | * @see UCharIterator |
563 | * @see UCharIteratorSetState |
564 | * @stable ICU 2.6 |
565 | */ |
566 | U_STABLE void U_EXPORT2 |
567 | uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); |
568 | |
569 | /** |
570 | * Set up a UCharIterator to iterate over a string. |
571 | * |
572 | * Sets the UCharIterator function pointers for iteration over the string s |
573 | * with iteration boundaries start=index=0 and length=limit=string length. |
574 | * The "provider" may set the start, index, and limit values at any time |
575 | * within the range 0..length. |
576 | * The length field will be ignored. |
577 | * |
578 | * The string pointer s is set into UCharIterator.context without copying |
579 | * or reallocating the string contents. |
580 | * |
581 | * getState() simply returns the current index. |
582 | * move() will always return the final index. |
583 | * |
584 | * @param iter UCharIterator structure to be set for iteration |
585 | * @param s String to iterate over |
586 | * @param length Length of s, or -1 if NUL-terminated |
587 | * |
588 | * @see UCharIterator |
589 | * @stable ICU 2.1 |
590 | */ |
591 | U_STABLE void U_EXPORT2 |
592 | uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); |
593 | |
594 | /** |
595 | * Set up a UCharIterator to iterate over a UTF-16BE string |
596 | * (byte vector with a big-endian pair of bytes per UChar). |
597 | * |
598 | * Everything works just like with a normal UChar iterator (uiter_setString), |
599 | * except that UChars are assembled from byte pairs, |
600 | * and that the length argument here indicates an even number of bytes. |
601 | * |
602 | * getState() simply returns the current index. |
603 | * move() will always return the final index. |
604 | * |
605 | * @param iter UCharIterator structure to be set for iteration |
606 | * @param s UTF-16BE string to iterate over |
607 | * @param length Length of s as an even number of bytes, or -1 if NUL-terminated |
608 | * (NUL means pair of 0 bytes at even index from s) |
609 | * |
610 | * @see UCharIterator |
611 | * @see uiter_setString |
612 | * @stable ICU 2.6 |
613 | */ |
614 | U_STABLE void U_EXPORT2 |
615 | uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); |
616 | |
617 | /** |
618 | * Set up a UCharIterator to iterate over a UTF-8 string. |
619 | * |
620 | * Sets the UCharIterator function pointers for iteration over the UTF-8 string s |
621 | * with UTF-8 iteration boundaries 0 and length. |
622 | * The implementation counts the UTF-16 index on the fly and |
623 | * lazily evaluates the UTF-16 length of the text. |
624 | * |
625 | * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. |
626 | * When the reservedField is not 0, then it contains a supplementary code point |
627 | * and the UTF-16 index is between the two corresponding surrogates. |
628 | * At that point, the UTF-8 index is behind that code point. |
629 | * |
630 | * The UTF-8 string pointer s is set into UCharIterator.context without copying |
631 | * or reallocating the string contents. |
632 | * |
633 | * getState() returns a state value consisting of |
634 | * - the current UTF-8 source byte index (bits 31..1) |
635 | * - a flag (bit 0) that indicates whether the UChar position is in the middle |
636 | * of a surrogate pair |
637 | * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) |
638 | * |
639 | * getState() cannot also encode the UTF-16 index in the state value. |
640 | * move(relative to limit or length), or |
641 | * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. |
642 | * |
643 | * @param iter UCharIterator structure to be set for iteration |
644 | * @param s UTF-8 string to iterate over |
645 | * @param length Length of s in bytes, or -1 if NUL-terminated |
646 | * |
647 | * @see UCharIterator |
648 | * @stable ICU 2.6 |
649 | */ |
650 | U_STABLE void U_EXPORT2 |
651 | uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); |
652 | |
653 | #if U_SHOW_CPLUSPLUS_API |
654 | |
655 | /** |
656 | * Set up a UCharIterator to wrap around a C++ CharacterIterator. |
657 | * |
658 | * Sets the UCharIterator function pointers for iteration using the |
659 | * CharacterIterator charIter. |
660 | * |
661 | * The CharacterIterator pointer charIter is set into UCharIterator.context |
662 | * without copying or cloning the CharacterIterator object. |
663 | * The other "protected" UCharIterator fields are set to 0 and will be ignored. |
664 | * The iteration index and boundaries are controlled by the CharacterIterator. |
665 | * |
666 | * getState() simply returns the current index. |
667 | * move() will always return the final index. |
668 | * |
669 | * @param iter UCharIterator structure to be set for iteration |
670 | * @param charIter CharacterIterator to wrap |
671 | * |
672 | * @see UCharIterator |
673 | * @stable ICU 2.1 |
674 | */ |
675 | U_STABLE void U_EXPORT2 |
676 | uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter); |
677 | |
678 | /** |
679 | * Set up a UCharIterator to iterate over a C++ Replaceable. |
680 | * |
681 | * Sets the UCharIterator function pointers for iteration over the |
682 | * Replaceable rep with iteration boundaries start=index=0 and |
683 | * length=limit=rep->length(). |
684 | * The "provider" may set the start, index, and limit values at any time |
685 | * within the range 0..length=rep->length(). |
686 | * The length field will be ignored. |
687 | * |
688 | * The Replaceable pointer rep is set into UCharIterator.context without copying |
689 | * or cloning/reallocating the Replaceable object. |
690 | * |
691 | * getState() simply returns the current index. |
692 | * move() will always return the final index. |
693 | * |
694 | * @param iter UCharIterator structure to be set for iteration |
695 | * @param rep Replaceable to iterate over |
696 | * |
697 | * @see UCharIterator |
698 | * @stable ICU 2.1 |
699 | */ |
700 | U_STABLE void U_EXPORT2 |
701 | uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep); |
702 | |
703 | #endif |
704 | |
705 | U_CDECL_END |
706 | |
707 | #endif |
708 | |