1/*
2 * Copyright (C) 2011 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following disclaimer
13 * in the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. AND ITS CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC.
20 * OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "config.h"
30#include "ContentSearchUtilities.h"
31
32#include "RegularExpression.h"
33#include "Yarr.h"
34#include "YarrFlags.h"
35#include "YarrInterpreter.h"
36#include <wtf/BumpPointerAllocator.h>
37#include <wtf/StdLibExtras.h>
38#include <wtf/text/StringBuilder.h>
39#include <wtf/text/TextPosition.h>
40
41using namespace JSC::Yarr;
42
43namespace Inspector {
44namespace ContentSearchUtilities {
45
46static const char regexSpecialCharacters[] = "[](){}+-*.,?\\^$|";
47
48static String createSearchRegexSource(const String& text)
49{
50 StringBuilder result;
51
52 for (unsigned i = 0; i < text.length(); i++) {
53 UChar character = text[i];
54 if (isASCII(character) && strchr(regexSpecialCharacters, character))
55 result.append('\\');
56 result.append(character);
57 }
58
59 return result.toString();
60}
61
62static inline size_t sizetExtractor(const size_t* value)
63{
64 return *value;
65}
66
67TextPosition textPositionFromOffset(size_t offset, const Vector<size_t>& lineEndings)
68{
69 const size_t* foundNextStart = approximateBinarySearch<size_t, size_t>(lineEndings, lineEndings.size(), offset, sizetExtractor);
70 size_t lineIndex = foundNextStart - &lineEndings.at(0);
71 if (offset >= *foundNextStart)
72 ++lineIndex;
73 size_t lineStartOffset = lineIndex > 0 ? lineEndings.at(lineIndex - 1) : 0;
74 size_t column = offset - lineStartOffset;
75 return TextPosition(OrdinalNumber::fromZeroBasedInt(lineIndex), OrdinalNumber::fromZeroBasedInt(column));
76}
77
78static Vector<std::pair<size_t, String>> getRegularExpressionMatchesByLines(const RegularExpression& regex, const String& text)
79{
80 Vector<std::pair<size_t, String>> result;
81 if (text.isEmpty())
82 return result;
83
84 auto endings = lineEndings(text);
85 size_t size = endings.size();
86 size_t start = 0;
87
88 for (size_t lineNumber = 0; lineNumber < size; ++lineNumber) {
89 size_t nextStart = endings[lineNumber];
90 String line = text.substring(start, nextStart - start);
91
92 int matchLength;
93 if (regex.match(line, 0, &matchLength) != -1)
94 result.append(std::pair<size_t, String>(lineNumber, line));
95
96 start = nextStart;
97 }
98
99 return result;
100}
101
102Vector<size_t> lineEndings(const String& text)
103{
104 Vector<size_t> result;
105
106 size_t start = 0;
107 while (start < text.length()) {
108 size_t nextStart = text.find('\n', start);
109 if (nextStart == notFound || nextStart == (text.length() - 1)) {
110 result.append(text.length());
111 break;
112 }
113
114 nextStart += 1;
115 result.append(nextStart);
116 start = nextStart;
117 }
118
119 result.append(text.length());
120
121 return result;
122}
123
124static Ref<Protocol::GenericTypes::SearchMatch> buildObjectForSearchMatch(size_t lineNumber, const String& lineContent)
125{
126 return Protocol::GenericTypes::SearchMatch::create()
127 .setLineNumber(lineNumber)
128 .setLineContent(lineContent)
129 .release();
130}
131
132RegularExpression createSearchRegex(const String& query, bool caseSensitive, bool isRegex)
133{
134 return RegularExpression { isRegex ? query : createSearchRegexSource(query), caseSensitive ? TextCaseSensitive : TextCaseInsensitive };
135}
136
137int countRegularExpressionMatches(const RegularExpression& regex, const String& content)
138{
139 if (content.isEmpty())
140 return 0;
141
142 int result = 0;
143 int position;
144 unsigned start = 0;
145 int matchLength;
146 while ((position = regex.match(content, start, &matchLength)) != -1) {
147 if (start >= content.length())
148 break;
149 if (matchLength > 0)
150 ++result;
151 start = position + 1;
152 }
153 return result;
154}
155
156Ref<JSON::ArrayOf<Protocol::GenericTypes::SearchMatch>> searchInTextByLines(const String& text, const String& query, const bool caseSensitive, const bool isRegex)
157{
158 auto result = JSON::ArrayOf<Protocol::GenericTypes::SearchMatch>::create();
159 auto regex = ContentSearchUtilities::createSearchRegex(query, caseSensitive, isRegex);
160 for (const auto& match : getRegularExpressionMatchesByLines(regex, text))
161 result->addItem(buildObjectForSearchMatch(match.first, match.second));
162 return result;
163}
164
165static String findMagicComment(const String& content, const String& patternString)
166{
167 if (content.isEmpty())
168 return String();
169
170 JSC::Yarr::ErrorCode error { JSC::Yarr::ErrorCode::NoError };
171 YarrPattern pattern(patternString, JSC::Yarr::Flags::Multiline, error);
172 ASSERT(!hasError(error));
173 BumpPointerAllocator regexAllocator;
174 JSC::Yarr::ErrorCode ignoredErrorCode = JSC::Yarr::ErrorCode::NoError;
175 auto bytecodePattern = byteCompile(pattern, &regexAllocator, ignoredErrorCode);
176 RELEASE_ASSERT(bytecodePattern);
177
178 ASSERT(pattern.m_numSubpatterns == 1);
179 std::array<unsigned, 4> matches;
180 unsigned result = interpret(bytecodePattern.get(), content, 0, matches.data());
181 if (result == offsetNoMatch)
182 return String();
183
184 ASSERT(matches[2] > 0 && matches[3] > 0);
185 return content.substring(matches[2], matches[3] - matches[2]);
186}
187
188String findStylesheetSourceMapURL(const String& content)
189{
190 // "/*# <name>=<value> */" and deprecated "/*@"
191 return findMagicComment(content, "/\\*[#@][\040\t]sourceMappingURL=[\040\t]*([^\\s\'\"]*)[\040\t]*\\*/"_s);
192}
193
194} // namespace ContentSearchUtilities
195} // namespace Inspector
196