1/*
2 * Copyright (C) 2017 Apple Inc. All rights reserved.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
13 *
14 * You should have received a copy of the GNU Library General Public License
15 * along with this library; see the file COPYING.LIB. If not, write to
16 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 *
19 */
20
21#pragma once
22
23#include <unicode/ubrk.h>
24#include <wtf/Optional.h>
25#include <wtf/text/icu/UTextProviderLatin1.h>
26
27namespace WTF {
28
29class TextBreakIteratorICU {
30public:
31 enum class Mode {
32 Line,
33 Character,
34 };
35
36 void set8BitText(const LChar* buffer, unsigned length)
37 {
38 UTextWithBuffer textLocal;
39 textLocal.text = UTEXT_INITIALIZER;
40 textLocal.text.extraSize = sizeof(textLocal.buffer);
41 textLocal.text.pExtra = textLocal.buffer;
42
43 UErrorCode status = U_ZERO_ERROR;
44 UText* text = openLatin1UTextProvider(&textLocal, buffer, length, &status);
45 ASSERT(U_SUCCESS(status));
46 ASSERT(text);
47
48 ubrk_setUText(m_iterator, text, &status);
49 ASSERT(U_SUCCESS(status));
50
51 utext_close(text);
52 }
53
54 TextBreakIteratorICU(StringView string, Mode mode, const char *locale)
55 {
56 UBreakIteratorType type;
57 switch (mode) {
58 case Mode::Line:
59 type = UBRK_LINE;
60 break;
61 case Mode::Character:
62 type = UBRK_CHARACTER;
63 break;
64 default:
65 ASSERT_NOT_REACHED();
66 type = UBRK_CHARACTER;
67 break;
68 }
69
70 bool requiresSet8BitText = string.is8Bit();
71
72 const UChar *text = requiresSet8BitText ? nullptr : string.characters16();
73 int32_t textLength = requiresSet8BitText ? 0 : string.length();
74
75 // FIXME: Handle weak / normal / strict line breaking.
76 UErrorCode status = U_ZERO_ERROR;
77 m_iterator = ubrk_open(type, locale, text, textLength, &status);
78 ASSERT(U_SUCCESS(status));
79
80 if (requiresSet8BitText)
81 set8BitText(string.characters8(), string.length());
82 }
83
84 TextBreakIteratorICU() = delete;
85 TextBreakIteratorICU(const TextBreakIteratorICU&) = delete;
86
87 TextBreakIteratorICU(TextBreakIteratorICU&& other)
88 : m_iterator(other.m_iterator)
89 {
90 other.m_iterator = nullptr;
91 }
92
93 TextBreakIteratorICU& operator=(const TextBreakIteratorICU&) = delete;
94
95 TextBreakIteratorICU& operator=(TextBreakIteratorICU&& other)
96 {
97 if (m_iterator)
98 ubrk_close(m_iterator);
99 m_iterator = other.m_iterator;
100 other.m_iterator = nullptr;
101 return *this;
102 }
103
104 ~TextBreakIteratorICU()
105 {
106 if (m_iterator)
107 ubrk_close(m_iterator);
108 }
109
110 void setText(StringView string)
111 {
112 if (string.is8Bit()) {
113 set8BitText(string.characters8(), string.length());
114 return;
115 }
116 UErrorCode status = U_ZERO_ERROR;
117 ubrk_setText(m_iterator, string.characters16(), string.length(), &status);
118 ASSERT(U_SUCCESS(status));
119 }
120
121 Optional<unsigned> preceding(unsigned location) const
122 {
123 auto result = ubrk_preceding(m_iterator, location);
124 if (result == UBRK_DONE)
125 return { };
126 return result;
127 }
128
129 Optional<unsigned> following(unsigned location) const
130 {
131 auto result = ubrk_following(m_iterator, location);
132 if (result == UBRK_DONE)
133 return { };
134 return result;
135 }
136
137 bool isBoundary(unsigned location) const
138 {
139 return ubrk_isBoundary(m_iterator, location);
140 }
141
142private:
143 UBreakIterator* m_iterator;
144};
145
146}
147