1 | /* |
2 | * Copyright (C) 2004-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #pragma once |
27 | |
28 | // FIXME: Move each iterator class into a separate header file. |
29 | |
30 | #include "FindOptions.h" |
31 | #include "Range.h" |
32 | #include "TextIteratorBehavior.h" |
33 | #include <wtf/Vector.h> |
34 | #include <wtf/text/StringView.h> |
35 | |
36 | namespace WebCore { |
37 | |
38 | class InlineTextBox; |
39 | class RenderText; |
40 | class RenderTextFragment; |
41 | |
42 | namespace SimpleLineLayout { |
43 | class RunResolver; |
44 | } |
45 | |
46 | WEBCORE_EXPORT String plainText(Position start, Position end, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false); |
47 | WEBCORE_EXPORT String plainTextReplacingNoBreakSpace(Position start, Position end, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false); |
48 | |
49 | WEBCORE_EXPORT String plainText(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false); |
50 | WEBCORE_EXPORT String plainTextReplacingNoBreakSpace(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false); |
51 | WEBCORE_EXPORT String plainTextUsingBackwardsTextIteratorForTesting(const Range&); |
52 | |
53 | Ref<Range> findPlainText(const Range&, const String&, FindOptions); |
54 | WEBCORE_EXPORT Ref<Range> findClosestPlainText(const Range&, const String&, FindOptions, unsigned); |
55 | WEBCORE_EXPORT bool hasAnyPlainText(const Range&, TextIteratorBehavior = TextIteratorDefaultBehavior); |
56 | bool findPlainText(const String& document, const String&, FindOptions); // Lets us use the search algorithm on a string. |
57 | |
58 | // FIXME: Move this somewhere else in the editing directory. It doesn't belong here. |
59 | bool isRendererReplacedElement(RenderObject*); |
60 | |
61 | class BitStack { |
62 | public: |
63 | BitStack(); |
64 | ~BitStack(); |
65 | |
66 | void push(bool); |
67 | void pop(); |
68 | |
69 | bool top() const; |
70 | unsigned size() const; |
71 | |
72 | private: |
73 | unsigned m_size; |
74 | Vector<unsigned, 1> m_words; |
75 | }; |
76 | |
77 | class TextIteratorCopyableText { |
78 | public: |
79 | TextIteratorCopyableText() |
80 | : m_singleCharacter(0) |
81 | , m_offset(0) |
82 | , m_length(0) |
83 | { |
84 | } |
85 | |
86 | StringView text() const { return m_singleCharacter ? StringView(&m_singleCharacter, 1) : StringView(m_string).substring(m_offset, m_length); } |
87 | void appendToStringBuilder(StringBuilder&) const; |
88 | |
89 | void reset(); |
90 | void set(String&&); |
91 | void set(String&&, unsigned offset, unsigned length); |
92 | void set(UChar); |
93 | |
94 | private: |
95 | UChar m_singleCharacter; |
96 | String m_string; |
97 | unsigned m_offset; |
98 | unsigned m_length; |
99 | }; |
100 | |
101 | // Iterates through the DOM range, returning all the text, and 0-length boundaries |
102 | // at points where replaced elements break up the text flow. The text is delivered in |
103 | // the chunks it's already stored in, to avoid copying any text. |
104 | |
105 | class TextIterator { |
106 | WTF_MAKE_FAST_ALLOCATED; |
107 | public: |
108 | WEBCORE_EXPORT explicit TextIterator(Position start, Position end, TextIteratorBehavior = TextIteratorDefaultBehavior); |
109 | WEBCORE_EXPORT explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); |
110 | WEBCORE_EXPORT ~TextIterator(); |
111 | |
112 | bool atEnd() const { return !m_positionNode; } |
113 | WEBCORE_EXPORT void advance(); |
114 | |
115 | StringView text() const { ASSERT(!atEnd()); return m_text; } |
116 | WEBCORE_EXPORT Ref<Range> range() const; |
117 | WEBCORE_EXPORT Node* node() const; |
118 | |
119 | const TextIteratorCopyableText& copyableText() const { ASSERT(!atEnd()); return m_copyableText; } |
120 | void appendTextToStringBuilder(StringBuilder& builder) const { copyableText().appendToStringBuilder(builder); } |
121 | |
122 | WEBCORE_EXPORT static int rangeLength(const Range*, bool spacesForReplacedElements = false); |
123 | WEBCORE_EXPORT static RefPtr<Range> rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false); |
124 | WEBCORE_EXPORT static bool getLocationAndLengthFromRange(Node* scope, const Range*, size_t& location, size_t& length); |
125 | WEBCORE_EXPORT static Ref<Range> subrange(Range& entireRange, int characterOffset, int characterCount); |
126 | |
127 | private: |
128 | void init(); |
129 | void exitNode(Node*); |
130 | bool shouldRepresentNodeOffsetZero(); |
131 | bool shouldEmitSpaceBeforeAndAfterNode(Node&); |
132 | void representNodeOffsetZero(); |
133 | bool handleTextNode(); |
134 | bool handleReplacedElement(); |
135 | bool handleNonTextNode(); |
136 | void handleTextBox(); |
137 | void handleTextNodeFirstLetter(RenderTextFragment&); |
138 | void emitCharacter(UChar, Node& characterNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset); |
139 | void emitText(Text& textNode, RenderText&, int textStartOffset, int textEndOffset); |
140 | |
141 | Node* baseNodeForEmittingNewLine() const; |
142 | |
143 | const TextIteratorBehavior m_behavior { TextIteratorDefaultBehavior }; |
144 | |
145 | // Current position, not necessarily of the text being returned, but position as we walk through the DOM tree. |
146 | Node* m_node { nullptr }; |
147 | int m_offset { 0 }; |
148 | bool m_handledNode { false }; |
149 | bool m_handledChildren { false }; |
150 | BitStack m_fullyClippedStack; |
151 | |
152 | // The range. |
153 | Node* m_startContainer { nullptr }; |
154 | int m_startOffset { 0 }; |
155 | Node* m_endContainer { nullptr }; |
156 | int m_endOffset { 0 }; |
157 | Node* m_pastEndNode { nullptr }; |
158 | |
159 | // The current text and its position, in the form to be returned from the iterator. |
160 | Node* m_positionNode { nullptr }; |
161 | mutable Node* m_positionOffsetBaseNode { nullptr }; |
162 | mutable int m_positionStartOffset { 0 }; |
163 | mutable int m_positionEndOffset { 0 }; |
164 | TextIteratorCopyableText m_copyableText; |
165 | StringView m_text; |
166 | |
167 | // Used when there is still some pending text from the current node; when these are false and null, we go back to normal iterating. |
168 | Node* m_nodeForAdditionalNewline { nullptr }; |
169 | InlineTextBox* m_textBox { nullptr }; |
170 | |
171 | // Used when iterating over :first-letter text to save pointer to remaining text box. |
172 | InlineTextBox* m_remainingTextBox { nullptr }; |
173 | |
174 | // Used to point to RenderText object for :first-letter. |
175 | RenderText* m_firstLetterText { nullptr }; |
176 | |
177 | // Used to do the whitespace collapsing logic. |
178 | Text* m_lastTextNode { nullptr }; |
179 | bool m_lastTextNodeEndedWithCollapsedSpace { false }; |
180 | UChar m_lastCharacter { 0 }; |
181 | |
182 | // Used to do simple line layout run logic. |
183 | bool m_nextRunNeedsWhitespace { false }; |
184 | unsigned m_accumulatedSimpleTextLengthInFlow { 0 }; |
185 | Text* m_previousSimpleTextNodeInFlow { nullptr }; |
186 | std::unique_ptr<SimpleLineLayout::RunResolver> m_flowRunResolverCache; |
187 | |
188 | // Used when text boxes are out of order (Hebrew/Arabic with embedded LTR text) |
189 | Vector<InlineTextBox*> m_sortedTextBoxes; |
190 | size_t m_sortedTextBoxesPosition { 0 }; |
191 | |
192 | // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content |
193 | bool m_hasEmitted { false }; |
194 | |
195 | // Used when deciding text fragment created by :first-letter should be looked into. |
196 | bool m_handledFirstLetter { false }; |
197 | }; |
198 | |
199 | // Iterates through the DOM range, returning all the text, and 0-length boundaries |
200 | // at points where replaced elements break up the text flow. The text comes back in |
201 | // chunks so as to optimize for performance of the iteration. |
202 | class SimplifiedBackwardsTextIterator { |
203 | public: |
204 | explicit SimplifiedBackwardsTextIterator(const Range&); |
205 | |
206 | bool atEnd() const { return !m_positionNode; } |
207 | void advance(); |
208 | |
209 | StringView text() const { ASSERT(!atEnd()); return m_text; } |
210 | WEBCORE_EXPORT Ref<Range> range() const; |
211 | Node* node() const { ASSERT(!atEnd()); return m_node; } |
212 | |
213 | private: |
214 | void exitNode(); |
215 | bool handleTextNode(); |
216 | RenderText* handleFirstLetter(int& startOffset, int& offsetInNode); |
217 | bool handleReplacedElement(); |
218 | bool handleNonTextNode(); |
219 | void emitCharacter(UChar, Node&, int startOffset, int endOffset); |
220 | bool advanceRespectingRange(Node*); |
221 | |
222 | const TextIteratorBehavior m_behavior { TextIteratorDefaultBehavior }; |
223 | |
224 | // Current position, not necessarily of the text being returned, but position as we walk through the DOM tree. |
225 | Node* m_node { nullptr }; |
226 | int m_offset { 0 }; |
227 | bool m_handledNode { false }; |
228 | bool m_handledChildren { false }; |
229 | BitStack m_fullyClippedStack; |
230 | |
231 | // The range. |
232 | Node* m_startContainer { nullptr }; |
233 | int m_startOffset { 0 }; |
234 | Node* m_endContainer { nullptr }; |
235 | int m_endOffset { 0 }; |
236 | |
237 | // The current text and its position, in the form to be returned from the iterator. |
238 | Node* m_positionNode { nullptr }; |
239 | int m_positionStartOffset { 0 }; |
240 | int m_positionEndOffset { 0 }; |
241 | TextIteratorCopyableText m_copyableText; |
242 | StringView m_text; |
243 | |
244 | // Used to do the whitespace logic. |
245 | Text* m_lastTextNode { nullptr }; |
246 | UChar m_lastCharacter { 0 }; |
247 | |
248 | // Whether m_node has advanced beyond the iteration range (i.e. m_startContainer). |
249 | bool m_havePassedStartContainer { false }; |
250 | |
251 | // Should handle first-letter renderer in the next call to handleTextNode. |
252 | bool m_shouldHandleFirstLetter { false }; |
253 | }; |
254 | |
255 | // Builds on the text iterator, adding a character position so we can walk one |
256 | // character at a time, or faster, as needed. Useful for searching. |
257 | class CharacterIterator { |
258 | public: |
259 | explicit CharacterIterator(const Range&, TextIteratorBehavior = TextIteratorDefaultBehavior); |
260 | WEBCORE_EXPORT explicit CharacterIterator(Position start, Position end, TextIteratorBehavior = TextIteratorDefaultBehavior); |
261 | |
262 | bool atEnd() const { return m_underlyingIterator.atEnd(); } |
263 | WEBCORE_EXPORT void advance(int numCharacters); |
264 | |
265 | StringView text() const { return m_underlyingIterator.text().substring(m_runOffset); } |
266 | WEBCORE_EXPORT Ref<Range> range() const; |
267 | |
268 | bool atBreak() const { return m_atBreak; } |
269 | int characterOffset() const { return m_offset; } |
270 | |
271 | private: |
272 | TextIterator m_underlyingIterator; |
273 | |
274 | int m_offset { 0 }; |
275 | int m_runOffset { 0 }; |
276 | bool m_atBreak { true }; |
277 | }; |
278 | |
279 | class BackwardsCharacterIterator { |
280 | public: |
281 | explicit BackwardsCharacterIterator(const Range&); |
282 | |
283 | bool atEnd() const { return m_underlyingIterator.atEnd(); } |
284 | void advance(int numCharacters); |
285 | |
286 | Ref<Range> range() const; |
287 | |
288 | private: |
289 | SimplifiedBackwardsTextIterator m_underlyingIterator; |
290 | |
291 | int m_offset; |
292 | int m_runOffset; |
293 | bool m_atBreak; |
294 | }; |
295 | |
296 | // Similar to the TextIterator, except that the chunks of text returned are "well behaved", meaning |
297 | // they never split up a word. This is useful for spell checking and perhaps one day for searching as well. |
298 | class WordAwareIterator { |
299 | public: |
300 | explicit WordAwareIterator(const Range&); |
301 | |
302 | bool atEnd() const { return !m_didLookAhead && m_underlyingIterator.atEnd(); } |
303 | void advance(); |
304 | |
305 | StringView text() const; |
306 | |
307 | private: |
308 | TextIterator m_underlyingIterator; |
309 | |
310 | // Text from the previous chunk from the text iterator. |
311 | TextIteratorCopyableText m_previousText; |
312 | |
313 | // Many chunks from text iterator concatenated. |
314 | Vector<UChar> m_buffer; |
315 | |
316 | // Did we have to look ahead in the text iterator to confirm the current chunk? |
317 | bool m_didLookAhead; |
318 | }; |
319 | |
320 | } // namespace WebCore |
321 | |