1/*
2 * Copyright (C) 1999-2001, 2004 Harri Porten ([email protected])
3 * Copyright (c) 2007, 2008, 2016 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile, Inc.
5 * Copyright (C) 2010 Peter Varga ([email protected]), University of Szeged
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 *
21 */
22
23#pragma once
24
25#include "RegExp.h"
26#include "JSCInlines.h"
27#include "Yarr.h"
28#include "YarrInterpreter.h"
29#include "YarrJIT.h"
30
31#define REGEXP_FUNC_TEST_DATA_GEN 0
32
33#if REGEXP_FUNC_TEST_DATA_GEN
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#endif
38
39namespace JSC {
40
41#if REGEXP_FUNC_TEST_DATA_GEN
42class RegExpFunctionalTestCollector {
43 // This class is not thread safe.
44protected:
45 static const char* const s_fileName;
46
47public:
48 static RegExpFunctionalTestCollector* get();
49
50 ~RegExpFunctionalTestCollector();
51
52 void outputOneTest(RegExp*, String, int, int*, int);
53 void clearRegExp(RegExp* regExp)
54 {
55 if (regExp == m_lastRegExp)
56 m_lastRegExp = 0;
57 }
58
59private:
60 RegExpFunctionalTestCollector();
61
62 void outputEscapedString(const String&, bool escapeSlash = false);
63
64 static RegExpFunctionalTestCollector* s_instance;
65 FILE* m_file;
66 RegExp* m_lastRegExp;
67};
68#endif // REGEXP_FUNC_TEST_DATA_GEN
69
70ALWAYS_INLINE bool RegExp::hasCodeFor(Yarr::YarrCharSize charSize)
71{
72 if (hasCode()) {
73#if ENABLE(YARR_JIT)
74 if (m_state != JITCode)
75 return true;
76 ASSERT(m_regExpJITCode);
77 if ((charSize == Yarr::Char8) && (m_regExpJITCode->has8BitCode()))
78 return true;
79 if ((charSize == Yarr::Char16) && (m_regExpJITCode->has16BitCode()))
80 return true;
81#else
82 UNUSED_PARAM(charSize);
83 return true;
84#endif
85 }
86 return false;
87}
88
89class PatternContextBufferHolder {
90 WTF_FORBID_HEAP_ALLOCATION;
91public:
92 PatternContextBufferHolder(VM& vm, bool needBuffer)
93 : m_vm(vm)
94 {
95#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
96 if (needBuffer)
97 m_buffer = m_vm.acquireRegExpPatternContexBuffer();
98#else
99 UNUSED_PARAM(needBuffer);
100#endif
101 }
102
103 ~PatternContextBufferHolder()
104 {
105#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
106 if (buffer())
107 m_vm.releaseRegExpPatternContexBuffer();
108#else
109 UNUSED_PARAM(m_vm);
110#endif
111 }
112
113 void* buffer() { return m_buffer; }
114 unsigned size() { return buffer() ? VM::patternContextBufferSize : 0; }
115
116private:
117 VM& m_vm;
118 void* m_buffer { nullptr };
119};
120
121ALWAYS_INLINE void RegExp::compileIfNecessary(VM& vm, Yarr::YarrCharSize charSize)
122{
123 if (hasCodeFor(charSize))
124 return;
125
126 if (m_state == ParseError)
127 return;
128
129 compile(&vm, charSize);
130}
131
132template<typename VectorType>
133ALWAYS_INLINE int RegExp::matchInline(VM& vm, const String& s, unsigned startOffset, VectorType& ovector)
134{
135#if ENABLE(REGEXP_TRACING)
136 m_rtMatchCallCount++;
137 m_rtMatchTotalSubjectStringLen += (double)(s.length() - startOffset);
138#endif
139
140 compileIfNecessary(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
141
142 auto throwError = [&] {
143 auto throwScope = DECLARE_THROW_SCOPE(vm);
144 // FIXME: Revisit JSGlobalObject.
145 // https://bugs.webkit.org/show_bug.cgi?id=203204
146 JSGlobalObject* globalObject = vm.topCallFrame->lexicalGlobalObject(vm);
147 throwScope.throwException(globalObject, errorToThrow(globalObject));
148 if (!hasHardError(m_constructionErrorCode))
149 reset();
150 return -1;
151 };
152
153 if (m_state == ParseError)
154 return throwError();
155
156 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
157 ovector.resize(offsetVectorSize);
158 int* offsetVector = ovector.data();
159
160 int result;
161#if ENABLE(YARR_JIT)
162 if (m_state == JITCode) {
163 {
164 ASSERT(m_regExpJITCode);
165 PatternContextBufferHolder patternContextBufferHolder(vm, m_regExpJITCode->usesPatternContextBuffer());
166
167 if (s.is8Bit())
168 result = m_regExpJITCode->execute(s.characters8(), startOffset, s.length(), offsetVector, patternContextBufferHolder.buffer(), patternContextBufferHolder.size()).start;
169 else
170 result = m_regExpJITCode->execute(s.characters16(), startOffset, s.length(), offsetVector, patternContextBufferHolder.buffer(), patternContextBufferHolder.size()).start;
171 }
172
173 if (result == Yarr::JSRegExpJITCodeFailure) {
174 // JIT'ed code couldn't handle expression, so punt back to the interpreter.
175 byteCodeCompileIfNecessary(&vm);
176 if (m_state == ParseError)
177 return throwError();
178 result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
179 }
180
181#if ENABLE(YARR_JIT_DEBUG)
182 if (m_state == JITCode) {
183 byteCodeCompileIfNecessary(&vm);
184 if (m_state == ParseError)
185 return throwError();
186 matchCompareWithInterpreter(s, startOffset, offsetVector, result);
187 }
188#endif
189 } else
190#endif
191 result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
192
193 // FIXME: The YARR engine should handle unsigned or size_t length matches.
194 // The YARR Interpreter is "unsigned" clean, while the YARR JIT hasn't been addressed.
195 // The offset vector handling needs to change as well.
196 // Right now we convert a match where the offsets overflowed into match failure.
197 // There are two places in WebCore that call the interpreter directly that need to
198 // have their offsets changed to int as well. They are yarr/RegularExpression.cpp
199 // and inspector/ContentSearchUtilities.cpp
200 if (s.length() > INT_MAX) {
201 bool overflowed = false;
202
203 if (result < -1)
204 overflowed = true;
205
206 for (unsigned i = 0; i <= m_numSubpatterns; i++) {
207 if ((offsetVector[i*2] < -1) || ((offsetVector[i*2] >= 0) && (offsetVector[i*2+1] < -1))) {
208 overflowed = true;
209 offsetVector[i*2] = -1;
210 offsetVector[i*2+1] = -1;
211 }
212 }
213
214 if (overflowed)
215 result = -1;
216 }
217
218 ASSERT(result >= -1);
219
220#if REGEXP_FUNC_TEST_DATA_GEN
221 RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result);
222#endif
223
224#if ENABLE(REGEXP_TRACING)
225 if (result != -1)
226 m_rtMatchFoundCount++;
227#endif
228
229 return result;
230}
231
232ALWAYS_INLINE bool RegExp::hasMatchOnlyCodeFor(Yarr::YarrCharSize charSize)
233{
234 if (hasCode()) {
235#if ENABLE(YARR_JIT)
236 if (m_state != JITCode)
237 return true;
238 ASSERT(m_regExpJITCode);
239 if ((charSize == Yarr::Char8) && (m_regExpJITCode->has8BitCodeMatchOnly()))
240 return true;
241 if ((charSize == Yarr::Char16) && (m_regExpJITCode->has16BitCodeMatchOnly()))
242 return true;
243#else
244 UNUSED_PARAM(charSize);
245 return true;
246#endif
247 }
248
249 return false;
250}
251
252ALWAYS_INLINE void RegExp::compileIfNecessaryMatchOnly(VM& vm, Yarr::YarrCharSize charSize)
253{
254 if (hasMatchOnlyCodeFor(charSize))
255 return;
256
257 if (m_state == ParseError)
258 return;
259
260 compileMatchOnly(&vm, charSize);
261}
262
263ALWAYS_INLINE MatchResult RegExp::matchInline(VM& vm, const String& s, unsigned startOffset)
264{
265#if ENABLE(REGEXP_TRACING)
266 m_rtMatchOnlyCallCount++;
267 m_rtMatchOnlyTotalSubjectStringLen += (double)(s.length() - startOffset);
268#endif
269
270 compileIfNecessaryMatchOnly(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
271
272 auto throwError = [&] {
273 auto throwScope = DECLARE_THROW_SCOPE(vm);
274 // FIXME: Revisit JSGlobalObject.
275 // https://bugs.webkit.org/show_bug.cgi?id=203204
276 JSGlobalObject* globalObject = vm.topCallFrame->lexicalGlobalObject(vm);
277 throwScope.throwException(globalObject, errorToThrow(globalObject));
278 if (!hasHardError(m_constructionErrorCode))
279 reset();
280 return MatchResult::failed();
281 };
282
283 if (m_state == ParseError)
284 return throwError();
285
286#if ENABLE(YARR_JIT)
287 MatchResult result;
288
289 if (m_state == JITCode) {
290 {
291 ASSERT(m_regExpJITCode);
292 PatternContextBufferHolder patternContextBufferHolder(vm, m_regExpJITCode->usesPatternContextBuffer());
293 if (s.is8Bit())
294 result = m_regExpJITCode->execute(s.characters8(), startOffset, s.length(), patternContextBufferHolder.buffer(), patternContextBufferHolder.size());
295 else
296 result = m_regExpJITCode->execute(s.characters16(), startOffset, s.length(), patternContextBufferHolder.buffer(), patternContextBufferHolder.size());
297 }
298
299#if ENABLE(REGEXP_TRACING)
300 if (!result)
301 m_rtMatchOnlyFoundCount++;
302#endif
303 if (result.start != static_cast<size_t>(Yarr::JSRegExpJITCodeFailure))
304 return result;
305
306 // JIT'ed code couldn't handle expression, so punt back to the interpreter.
307 byteCodeCompileIfNecessary(&vm);
308 if (m_state == ParseError)
309 return throwError();
310 }
311#endif
312
313 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
314 int* offsetVector;
315 Vector<int, 32> nonReturnedOvector;
316 nonReturnedOvector.grow(offsetVectorSize);
317 offsetVector = nonReturnedOvector.data();
318 int r = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
319#if REGEXP_FUNC_TEST_DATA_GEN
320 RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result);
321#endif
322
323 if (r >= 0) {
324#if ENABLE(REGEXP_TRACING)
325 m_rtMatchOnlyFoundCount++;
326#endif
327 return MatchResult(r, reinterpret_cast<unsigned*>(offsetVector)[1]);
328 }
329
330 return MatchResult::failed();
331}
332
333} // namespace JSC
334