1 | /* |
2 | * Copyright (C) 2011 Google Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are |
6 | * met: |
7 | * |
8 | * 1. Redistributions of source code must retain the above copyright |
9 | * notice, this list of conditions and the following disclaimer. |
10 | * |
11 | * 2. Redistributions in binary form must reproduce the above |
12 | * copyright notice, this list of conditions and the following disclaimer |
13 | * in the documentation and/or other materials provided with the |
14 | * distribution. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. AND ITS CONTRIBUTORS |
17 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
18 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
19 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. |
20 | * OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
22 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
23 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
24 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
26 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "config.h" |
30 | #include "ContentSearchUtilities.h" |
31 | |
32 | #include "RegularExpression.h" |
33 | #include "Yarr.h" |
34 | #include "YarrFlags.h" |
35 | #include "YarrInterpreter.h" |
36 | #include <wtf/BumpPointerAllocator.h> |
37 | #include <wtf/StdLibExtras.h> |
38 | #include <wtf/text/StringBuilder.h> |
39 | #include <wtf/text/TextPosition.h> |
40 | |
41 | using namespace JSC::Yarr; |
42 | |
43 | namespace Inspector { |
44 | namespace ContentSearchUtilities { |
45 | |
46 | static const char regexSpecialCharacters[] = "[](){}+-*.,?\\^$|" ; |
47 | |
48 | static String createSearchRegexSource(const String& text) |
49 | { |
50 | StringBuilder result; |
51 | |
52 | for (unsigned i = 0; i < text.length(); i++) { |
53 | UChar character = text[i]; |
54 | if (isASCII(character) && strchr(regexSpecialCharacters, character)) |
55 | result.append('\\'); |
56 | result.append(character); |
57 | } |
58 | |
59 | return result.toString(); |
60 | } |
61 | |
62 | static inline size_t (const size_t* value) |
63 | { |
64 | return *value; |
65 | } |
66 | |
67 | TextPosition textPositionFromOffset(size_t offset, const Vector<size_t>& lineEndings) |
68 | { |
69 | const size_t* foundNextStart = approximateBinarySearch<size_t, size_t>(lineEndings, lineEndings.size(), offset, sizetExtractor); |
70 | size_t lineIndex = foundNextStart - &lineEndings.at(0); |
71 | if (offset >= *foundNextStart) |
72 | ++lineIndex; |
73 | size_t lineStartOffset = lineIndex > 0 ? lineEndings.at(lineIndex - 1) : 0; |
74 | size_t column = offset - lineStartOffset; |
75 | return TextPosition(OrdinalNumber::fromZeroBasedInt(lineIndex), OrdinalNumber::fromZeroBasedInt(column)); |
76 | } |
77 | |
78 | static Vector<std::pair<size_t, String>> getRegularExpressionMatchesByLines(const RegularExpression& regex, const String& text) |
79 | { |
80 | Vector<std::pair<size_t, String>> result; |
81 | if (text.isEmpty()) |
82 | return result; |
83 | |
84 | auto endings = lineEndings(text); |
85 | size_t size = endings.size(); |
86 | size_t start = 0; |
87 | |
88 | for (size_t lineNumber = 0; lineNumber < size; ++lineNumber) { |
89 | size_t nextStart = endings[lineNumber]; |
90 | String line = text.substring(start, nextStart - start); |
91 | |
92 | int matchLength; |
93 | if (regex.match(line, 0, &matchLength) != -1) |
94 | result.append(std::pair<size_t, String>(lineNumber, line)); |
95 | |
96 | start = nextStart; |
97 | } |
98 | |
99 | return result; |
100 | } |
101 | |
102 | Vector<size_t> lineEndings(const String& text) |
103 | { |
104 | Vector<size_t> result; |
105 | |
106 | size_t start = 0; |
107 | while (start < text.length()) { |
108 | size_t nextStart = text.find('\n', start); |
109 | if (nextStart == notFound || nextStart == (text.length() - 1)) { |
110 | result.append(text.length()); |
111 | break; |
112 | } |
113 | |
114 | nextStart += 1; |
115 | result.append(nextStart); |
116 | start = nextStart; |
117 | } |
118 | |
119 | result.append(text.length()); |
120 | |
121 | return result; |
122 | } |
123 | |
124 | static Ref<Protocol::GenericTypes::SearchMatch> buildObjectForSearchMatch(size_t lineNumber, const String& lineContent) |
125 | { |
126 | return Protocol::GenericTypes::SearchMatch::create() |
127 | .setLineNumber(lineNumber) |
128 | .setLineContent(lineContent) |
129 | .release(); |
130 | } |
131 | |
132 | RegularExpression createSearchRegex(const String& query, bool caseSensitive, bool isRegex) |
133 | { |
134 | return RegularExpression { isRegex ? query : createSearchRegexSource(query), caseSensitive ? TextCaseSensitive : TextCaseInsensitive }; |
135 | } |
136 | |
137 | int countRegularExpressionMatches(const RegularExpression& regex, const String& content) |
138 | { |
139 | if (content.isEmpty()) |
140 | return 0; |
141 | |
142 | int result = 0; |
143 | int position; |
144 | unsigned start = 0; |
145 | int matchLength; |
146 | while ((position = regex.match(content, start, &matchLength)) != -1) { |
147 | if (start >= content.length()) |
148 | break; |
149 | if (matchLength > 0) |
150 | ++result; |
151 | start = position + 1; |
152 | } |
153 | return result; |
154 | } |
155 | |
156 | Ref<JSON::ArrayOf<Protocol::GenericTypes::SearchMatch>> searchInTextByLines(const String& text, const String& query, const bool caseSensitive, const bool isRegex) |
157 | { |
158 | auto result = JSON::ArrayOf<Protocol::GenericTypes::SearchMatch>::create(); |
159 | auto regex = ContentSearchUtilities::createSearchRegex(query, caseSensitive, isRegex); |
160 | for (const auto& match : getRegularExpressionMatchesByLines(regex, text)) |
161 | result->addItem(buildObjectForSearchMatch(match.first, match.second)); |
162 | return result; |
163 | } |
164 | |
165 | static String (const String& content, const String& patternString) |
166 | { |
167 | if (content.isEmpty()) |
168 | return String(); |
169 | |
170 | JSC::Yarr::ErrorCode error { JSC::Yarr::ErrorCode::NoError }; |
171 | YarrPattern pattern(patternString, JSC::Yarr::Flags::Multiline, error); |
172 | ASSERT(!hasError(error)); |
173 | BumpPointerAllocator regexAllocator; |
174 | JSC::Yarr::ErrorCode ignoredErrorCode = JSC::Yarr::ErrorCode::NoError; |
175 | auto bytecodePattern = byteCompile(pattern, ®exAllocator, ignoredErrorCode); |
176 | RELEASE_ASSERT(bytecodePattern); |
177 | |
178 | ASSERT(pattern.m_numSubpatterns == 1); |
179 | std::array<unsigned, 4> matches; |
180 | unsigned result = interpret(bytecodePattern.get(), content, 0, matches.data()); |
181 | if (result == offsetNoMatch) |
182 | return String(); |
183 | |
184 | ASSERT(matches[2] > 0 && matches[3] > 0); |
185 | return content.substring(matches[2], matches[3] - matches[2]); |
186 | } |
187 | |
188 | String findStylesheetSourceMapURL(const String& content) |
189 | { |
190 | // "/*# <name>=<value> */" and deprecated "/*@" |
191 | return findMagicComment(content, "/\\*[#@][\040\t]sourceMappingURL=[\040\t]*([^\\s\'\"]*)[\040\t]*\\*/"_s ); |
192 | } |
193 | |
194 | } // namespace ContentSearchUtilities |
195 | } // namespace Inspector |
196 | |