1/*
2 * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#pragma once
27
28// FIXME: Move each iterator class into a separate header file.
29
30#include "FindOptions.h"
31#include "Range.h"
32#include "TextIteratorBehavior.h"
33#include <wtf/Vector.h>
34#include <wtf/text/StringView.h>
35
36namespace WebCore {
37
38class InlineTextBox;
39class RenderText;
40class RenderTextFragment;
41
42namespace SimpleLineLayout {
43class RunResolver;
44}
45
46WEBCORE_EXPORT String plainText(Position start, Position end, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false);
47WEBCORE_EXPORT String plainTextReplacingNoBreakSpace(Position start, Position end, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false);
48
49WEBCORE_EXPORT String plainText(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false);
50WEBCORE_EXPORT String plainTextReplacingNoBreakSpace(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false);
51WEBCORE_EXPORT String plainTextUsingBackwardsTextIteratorForTesting(const Range&);
52
53Ref<Range> findPlainText(const Range&, const String&, FindOptions);
54WEBCORE_EXPORT Ref<Range> findClosestPlainText(const Range&, const String&, FindOptions, unsigned);
55WEBCORE_EXPORT bool hasAnyPlainText(const Range&, TextIteratorBehavior = TextIteratorDefaultBehavior);
56bool findPlainText(const String& document, const String&, FindOptions); // Lets us use the search algorithm on a string.
57
58// FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
59bool isRendererReplacedElement(RenderObject*);
60
61class BitStack {
62public:
63 BitStack();
64 ~BitStack();
65
66 void push(bool);
67 void pop();
68
69 bool top() const;
70 unsigned size() const;
71
72private:
73 unsigned m_size;
74 Vector<unsigned, 1> m_words;
75};
76
77class TextIteratorCopyableText {
78public:
79 TextIteratorCopyableText()
80 : m_singleCharacter(0)
81 , m_offset(0)
82 , m_length(0)
83 {
84 }
85
86 StringView text() const { return m_singleCharacter ? StringView(&m_singleCharacter, 1) : StringView(m_string).substring(m_offset, m_length); }
87 void appendToStringBuilder(StringBuilder&) const;
88
89 void reset();
90 void set(String&&);
91 void set(String&&, unsigned offset, unsigned length);
92 void set(UChar);
93
94private:
95 UChar m_singleCharacter;
96 String m_string;
97 unsigned m_offset;
98 unsigned m_length;
99};
100
101// Iterates through the DOM range, returning all the text, and 0-length boundaries
102// at points where replaced elements break up the text flow. The text is delivered in
103// the chunks it's already stored in, to avoid copying any text.
104
105class TextIterator {
106 WTF_MAKE_FAST_ALLOCATED;
107public:
108 WEBCORE_EXPORT explicit TextIterator(Position start, Position end, TextIteratorBehavior = TextIteratorDefaultBehavior);
109 WEBCORE_EXPORT explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
110 WEBCORE_EXPORT ~TextIterator();
111
112 bool atEnd() const { return !m_positionNode; }
113 WEBCORE_EXPORT void advance();
114
115 StringView text() const { ASSERT(!atEnd()); return m_text; }
116 WEBCORE_EXPORT Ref<Range> range() const;
117 WEBCORE_EXPORT Node* node() const;
118
119 const TextIteratorCopyableText& copyableText() const { ASSERT(!atEnd()); return m_copyableText; }
120 void appendTextToStringBuilder(StringBuilder& builder) const { copyableText().appendToStringBuilder(builder); }
121
122 WEBCORE_EXPORT static int rangeLength(const Range*, bool spacesForReplacedElements = false);
123 WEBCORE_EXPORT static RefPtr<Range> rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
124 WEBCORE_EXPORT static bool getLocationAndLengthFromRange(Node* scope, const Range*, size_t& location, size_t& length);
125 WEBCORE_EXPORT static Ref<Range> subrange(Range& entireRange, int characterOffset, int characterCount);
126
127private:
128 void init();
129 void exitNode(Node*);
130 bool shouldRepresentNodeOffsetZero();
131 bool shouldEmitSpaceBeforeAndAfterNode(Node&);
132 void representNodeOffsetZero();
133 bool handleTextNode();
134 bool handleReplacedElement();
135 bool handleNonTextNode();
136 void handleTextBox();
137 void handleTextNodeFirstLetter(RenderTextFragment&);
138 void emitCharacter(UChar, Node& characterNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
139 void emitText(Text& textNode, RenderText&, int textStartOffset, int textEndOffset);
140
141 Node* baseNodeForEmittingNewLine() const;
142
143 const TextIteratorBehavior m_behavior { TextIteratorDefaultBehavior };
144
145 // Current position, not necessarily of the text being returned, but position as we walk through the DOM tree.
146 Node* m_node { nullptr };
147 int m_offset { 0 };
148 bool m_handledNode { false };
149 bool m_handledChildren { false };
150 BitStack m_fullyClippedStack;
151
152 // The range.
153 Node* m_startContainer { nullptr };
154 int m_startOffset { 0 };
155 Node* m_endContainer { nullptr };
156 int m_endOffset { 0 };
157 Node* m_pastEndNode { nullptr };
158
159 // The current text and its position, in the form to be returned from the iterator.
160 Node* m_positionNode { nullptr };
161 mutable Node* m_positionOffsetBaseNode { nullptr };
162 mutable int m_positionStartOffset { 0 };
163 mutable int m_positionEndOffset { 0 };
164 TextIteratorCopyableText m_copyableText;
165 StringView m_text;
166
167 // Used when there is still some pending text from the current node; when these are false and null, we go back to normal iterating.
168 Node* m_nodeForAdditionalNewline { nullptr };
169 InlineTextBox* m_textBox { nullptr };
170
171 // Used when iterating over :first-letter text to save pointer to remaining text box.
172 InlineTextBox* m_remainingTextBox { nullptr };
173
174 // Used to point to RenderText object for :first-letter.
175 RenderText* m_firstLetterText { nullptr };
176
177 // Used to do the whitespace collapsing logic.
178 Text* m_lastTextNode { nullptr };
179 bool m_lastTextNodeEndedWithCollapsedSpace { false };
180 UChar m_lastCharacter { 0 };
181
182 // Used to do simple line layout run logic.
183 bool m_nextRunNeedsWhitespace { false };
184 unsigned m_accumulatedSimpleTextLengthInFlow { 0 };
185 Text* m_previousSimpleTextNodeInFlow { nullptr };
186 std::unique_ptr<SimpleLineLayout::RunResolver> m_flowRunResolverCache;
187
188 // Used when text boxes are out of order (Hebrew/Arabic with embedded LTR text)
189 Vector<InlineTextBox*> m_sortedTextBoxes;
190 size_t m_sortedTextBoxesPosition { 0 };
191
192 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
193 bool m_hasEmitted { false };
194
195 // Used when deciding text fragment created by :first-letter should be looked into.
196 bool m_handledFirstLetter { false };
197};
198
199// Iterates through the DOM range, returning all the text, and 0-length boundaries
200// at points where replaced elements break up the text flow. The text comes back in
201// chunks so as to optimize for performance of the iteration.
202class SimplifiedBackwardsTextIterator {
203public:
204 explicit SimplifiedBackwardsTextIterator(const Range&);
205
206 bool atEnd() const { return !m_positionNode; }
207 void advance();
208
209 StringView text() const { ASSERT(!atEnd()); return m_text; }
210 WEBCORE_EXPORT Ref<Range> range() const;
211 Node* node() const { ASSERT(!atEnd()); return m_node; }
212
213private:
214 void exitNode();
215 bool handleTextNode();
216 RenderText* handleFirstLetter(int& startOffset, int& offsetInNode);
217 bool handleReplacedElement();
218 bool handleNonTextNode();
219 void emitCharacter(UChar, Node&, int startOffset, int endOffset);
220 bool advanceRespectingRange(Node*);
221
222 const TextIteratorBehavior m_behavior { TextIteratorDefaultBehavior };
223
224 // Current position, not necessarily of the text being returned, but position as we walk through the DOM tree.
225 Node* m_node { nullptr };
226 int m_offset { 0 };
227 bool m_handledNode { false };
228 bool m_handledChildren { false };
229 BitStack m_fullyClippedStack;
230
231 // The range.
232 Node* m_startContainer { nullptr };
233 int m_startOffset { 0 };
234 Node* m_endContainer { nullptr };
235 int m_endOffset { 0 };
236
237 // The current text and its position, in the form to be returned from the iterator.
238 Node* m_positionNode { nullptr };
239 int m_positionStartOffset { 0 };
240 int m_positionEndOffset { 0 };
241 TextIteratorCopyableText m_copyableText;
242 StringView m_text;
243
244 // Used to do the whitespace logic.
245 Text* m_lastTextNode { nullptr };
246 UChar m_lastCharacter { 0 };
247
248 // Whether m_node has advanced beyond the iteration range (i.e. m_startContainer).
249 bool m_havePassedStartContainer { false };
250
251 // Should handle first-letter renderer in the next call to handleTextNode.
252 bool m_shouldHandleFirstLetter { false };
253};
254
255// Builds on the text iterator, adding a character position so we can walk one
256// character at a time, or faster, as needed. Useful for searching.
257class CharacterIterator {
258public:
259 explicit CharacterIterator(const Range&, TextIteratorBehavior = TextIteratorDefaultBehavior);
260 WEBCORE_EXPORT explicit CharacterIterator(Position start, Position end, TextIteratorBehavior = TextIteratorDefaultBehavior);
261
262 bool atEnd() const { return m_underlyingIterator.atEnd(); }
263 WEBCORE_EXPORT void advance(int numCharacters);
264
265 StringView text() const { return m_underlyingIterator.text().substring(m_runOffset); }
266 WEBCORE_EXPORT Ref<Range> range() const;
267
268 bool atBreak() const { return m_atBreak; }
269 int characterOffset() const { return m_offset; }
270
271private:
272 TextIterator m_underlyingIterator;
273
274 int m_offset { 0 };
275 int m_runOffset { 0 };
276 bool m_atBreak { true };
277};
278
279class BackwardsCharacterIterator {
280public:
281 explicit BackwardsCharacterIterator(const Range&);
282
283 bool atEnd() const { return m_underlyingIterator.atEnd(); }
284 void advance(int numCharacters);
285
286 Ref<Range> range() const;
287
288private:
289 SimplifiedBackwardsTextIterator m_underlyingIterator;
290
291 int m_offset;
292 int m_runOffset;
293 bool m_atBreak;
294};
295
296// Similar to the TextIterator, except that the chunks of text returned are "well behaved", meaning
297// they never split up a word. This is useful for spell checking and perhaps one day for searching as well.
298class WordAwareIterator {
299public:
300 explicit WordAwareIterator(const Range&);
301
302 bool atEnd() const { return !m_didLookAhead && m_underlyingIterator.atEnd(); }
303 void advance();
304
305 StringView text() const;
306
307private:
308 TextIterator m_underlyingIterator;
309
310 // Text from the previous chunk from the text iterator.
311 TextIteratorCopyableText m_previousText;
312
313 // Many chunks from text iterator concatenated.
314 Vector<UChar> m_buffer;
315
316 // Did we have to look ahead in the text iterator to confirm the current chunk?
317 bool m_didLookAhead;
318};
319
320} // namespace WebCore
321