1 | /* |
2 | * Copyright (C) 2011-2019 Apple Inc. All rights reserved. |
3 | * |
4 | * This library is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU Library General Public |
6 | * License as published by the Free Software Foundation; either |
7 | * version 2 of the License, or (at your option) any later version. |
8 | * |
9 | * This library is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | * Library General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU Library General Public License |
15 | * along with this library; see the file COPYING.LIB. If not, write to |
16 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
17 | * Boston, MA 02110-1301, USA. |
18 | * |
19 | */ |
20 | |
21 | #include "config.h" |
22 | #include "RegExp.h" |
23 | |
24 | #include "InitializeThreading.h" |
25 | #include "JSCInlines.h" |
26 | #include "JSGlobalObject.h" |
27 | #include "YarrFlags.h" |
28 | #include <errno.h> |
29 | #include <stdio.h> |
30 | #include <stdlib.h> |
31 | #include <string.h> |
32 | #include <wtf/Vector.h> |
33 | #include <wtf/text/StringBuilder.h> |
34 | |
35 | #if !OS(WINDOWS) |
36 | #include <unistd.h> |
37 | #endif |
38 | |
39 | #if HAVE(SYS_TIME_H) |
40 | #include <sys/time.h> |
41 | #endif |
42 | |
43 | #if COMPILER(MSVC) |
44 | #include <crtdbg.h> |
45 | #include <mmsystem.h> |
46 | #include <windows.h> |
47 | #endif |
48 | |
49 | const int MaxLineLength = 100 * 1024; |
50 | |
51 | using namespace JSC; |
52 | |
53 | struct CommandLine { |
54 | CommandLine() |
55 | : interactive(false) |
56 | , verbose(false) |
57 | { |
58 | } |
59 | |
60 | bool interactive; |
61 | bool verbose; |
62 | Vector<String> arguments; |
63 | Vector<String> files; |
64 | }; |
65 | |
66 | class StopWatch { |
67 | public: |
68 | void start(); |
69 | void stop(); |
70 | long getElapsedMS(); // call stop() first |
71 | |
72 | private: |
73 | MonotonicTime m_startTime; |
74 | MonotonicTime m_stopTime; |
75 | }; |
76 | |
77 | void StopWatch::start() |
78 | { |
79 | m_startTime = MonotonicTime::now(); |
80 | } |
81 | |
82 | void StopWatch::stop() |
83 | { |
84 | m_stopTime = MonotonicTime::now(); |
85 | } |
86 | |
87 | long StopWatch::getElapsedMS() |
88 | { |
89 | return (m_stopTime - m_startTime).millisecondsAs<long>(); |
90 | } |
91 | |
92 | struct RegExpTest { |
93 | RegExpTest() |
94 | : offset(0) |
95 | , result(0) |
96 | { |
97 | } |
98 | |
99 | String subject; |
100 | int offset; |
101 | int result; |
102 | Vector<int, 32> expectVector; |
103 | }; |
104 | |
105 | class GlobalObject : public JSGlobalObject { |
106 | private: |
107 | GlobalObject(VM&, Structure*, const Vector<String>& arguments); |
108 | |
109 | public: |
110 | typedef JSGlobalObject Base; |
111 | |
112 | static GlobalObject* create(VM& vm, Structure* structure, const Vector<String>& arguments) |
113 | { |
114 | GlobalObject* globalObject = new (NotNull, allocateCell<GlobalObject>(vm.heap)) GlobalObject(vm, structure, arguments); |
115 | return globalObject; |
116 | } |
117 | |
118 | DECLARE_INFO; |
119 | |
120 | static constexpr bool needsDestructor = false; |
121 | |
122 | static Structure* createStructure(VM& vm, JSValue prototype) |
123 | { |
124 | return Structure::create(vm, 0, prototype, TypeInfo(GlobalObjectType, StructureFlags), info()); |
125 | } |
126 | |
127 | protected: |
128 | void finishCreation(VM& vm, const Vector<String>& arguments) |
129 | { |
130 | Base::finishCreation(vm); |
131 | UNUSED_PARAM(arguments); |
132 | } |
133 | }; |
134 | |
135 | const ClassInfo GlobalObject::s_info = { "global" , &JSGlobalObject::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(GlobalObject) }; |
136 | |
137 | GlobalObject::GlobalObject(VM& vm, Structure* structure, const Vector<String>& arguments) |
138 | : JSGlobalObject(vm, structure) |
139 | { |
140 | finishCreation(vm, arguments); |
141 | } |
142 | |
143 | // Use SEH for Release builds only to get rid of the crash report dialog |
144 | // (luckily the same tests fail in Release and Debug builds so far). Need to |
145 | // be in a separate main function because the realMain function requires object |
146 | // unwinding. |
147 | |
148 | #if COMPILER(MSVC) && !defined(_DEBUG) |
149 | #define TRY __try { |
150 | #define EXCEPT(x) } __except (EXCEPTION_EXECUTE_HANDLER) { x; } |
151 | #else |
152 | #define TRY |
153 | #define EXCEPT(x) |
154 | #endif |
155 | |
156 | int realMain(int argc, char** argv); |
157 | |
158 | int main(int argc, char** argv) |
159 | { |
160 | #if OS(WINDOWS) |
161 | // Cygwin calls ::SetErrorMode(SEM_FAILCRITICALERRORS), which we will inherit. This is bad for |
162 | // testing/debugging, as it causes the post-mortem debugger not to be invoked. We reset the |
163 | // error mode here to work around Cygwin's behavior. See <http://webkit.org/b/55222>. |
164 | ::SetErrorMode(0); |
165 | |
166 | #if defined(_DEBUG) |
167 | _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR); |
168 | _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE); |
169 | _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR); |
170 | _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE); |
171 | _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR); |
172 | _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE); |
173 | #endif |
174 | |
175 | timeBeginPeriod(1); |
176 | #endif |
177 | |
178 | // Initialize JSC before getting VM. |
179 | JSC::initializeThreading(); |
180 | |
181 | // We can't use destructors in the following code because it uses Windows |
182 | // Structured Exception Handling |
183 | int res = 0; |
184 | TRY |
185 | res = realMain(argc, argv); |
186 | EXCEPT(res = 3) |
187 | return res; |
188 | } |
189 | |
190 | static bool testOneRegExp(VM& vm, RegExp* regexp, RegExpTest* regExpTest, bool verbose, unsigned int lineNumber) |
191 | { |
192 | bool result = true; |
193 | Vector<int> outVector; |
194 | outVector.resize(regExpTest->expectVector.size()); |
195 | int matchResult = regexp->match(vm, regExpTest->subject, regExpTest->offset, outVector); |
196 | |
197 | if (matchResult != regExpTest->result) { |
198 | result = false; |
199 | if (verbose) |
200 | printf("Line %d: results mismatch - expected %d got %d\n" , lineNumber, regExpTest->result, matchResult); |
201 | } else if (matchResult != -1) { |
202 | if (outVector.size() != regExpTest->expectVector.size()) { |
203 | result = false; |
204 | if (verbose) { |
205 | #if OS(WINDOWS) |
206 | printf("Line %d: output vector size mismatch - expected %Iu got %Iu\n" , lineNumber, regExpTest->expectVector.size(), outVector.size()); |
207 | #else |
208 | printf("Line %d: output vector size mismatch - expected %zu got %zu\n" , lineNumber, regExpTest->expectVector.size(), outVector.size()); |
209 | #endif |
210 | } |
211 | } else if (outVector.size() % 2) { |
212 | result = false; |
213 | if (verbose) { |
214 | #if OS(WINDOWS) |
215 | printf("Line %d: output vector size is odd (%Iu), should be even\n" , lineNumber, outVector.size()); |
216 | #else |
217 | printf("Line %d: output vector size is odd (%zu), should be even\n" , lineNumber, outVector.size()); |
218 | #endif |
219 | } |
220 | } else { |
221 | // Check in pairs since the first value of the pair could be -1 in which case the second doesn't matter. |
222 | size_t pairCount = outVector.size() / 2; |
223 | for (size_t i = 0; i < pairCount; ++i) { |
224 | size_t startIndex = i*2; |
225 | if (outVector[startIndex] != regExpTest->expectVector[startIndex]) { |
226 | result = false; |
227 | if (verbose) { |
228 | #if OS(WINDOWS) |
229 | printf("Line %d: output vector mismatch at index %Iu - expected %d got %d\n" , lineNumber, startIndex, regExpTest->expectVector[startIndex], outVector[startIndex]); |
230 | #else |
231 | printf("Line %d: output vector mismatch at index %zu - expected %d got %d\n" , lineNumber, startIndex, regExpTest->expectVector[startIndex], outVector[startIndex]); |
232 | #endif |
233 | } |
234 | } |
235 | if ((i > 0) && (regExpTest->expectVector[startIndex] != -1) && (outVector[startIndex+1] != regExpTest->expectVector[startIndex+1])) { |
236 | result = false; |
237 | if (verbose) { |
238 | #if OS(WINDOWS) |
239 | printf("Line %d: output vector mismatch at index %Iu - expected %d got %d\n" , lineNumber, startIndex + 1, regExpTest->expectVector[startIndex + 1], outVector[startIndex + 1]); |
240 | #else |
241 | printf("Line %d: output vector mismatch at index %zu - expected %d got %d\n" , lineNumber, startIndex + 1, regExpTest->expectVector[startIndex + 1], outVector[startIndex + 1]); |
242 | #endif |
243 | } |
244 | } |
245 | } |
246 | } |
247 | } |
248 | |
249 | return result; |
250 | } |
251 | |
252 | static int scanString(char* buffer, int bufferLength, StringBuilder& builder, char termChar) |
253 | { |
254 | bool escape = false; |
255 | |
256 | for (int i = 0; i < bufferLength; ++i) { |
257 | UChar c = buffer[i]; |
258 | |
259 | if (escape) { |
260 | switch (c) { |
261 | case '0': |
262 | c = '\0'; |
263 | break; |
264 | case 'a': |
265 | c = '\a'; |
266 | break; |
267 | case 'b': |
268 | c = '\b'; |
269 | break; |
270 | case 'f': |
271 | c = '\f'; |
272 | break; |
273 | case 'n': |
274 | c = '\n'; |
275 | break; |
276 | case 'r': |
277 | c = '\r'; |
278 | break; |
279 | case 't': |
280 | c = '\t'; |
281 | break; |
282 | case 'v': |
283 | c = '\v'; |
284 | break; |
285 | case '\\': |
286 | c = '\\'; |
287 | break; |
288 | case '?': |
289 | c = '\?'; |
290 | break; |
291 | case 'u': |
292 | if ((i + 4) >= bufferLength) |
293 | return -1; |
294 | unsigned int charValue; |
295 | if (sscanf(buffer+i+1, "%04x" , &charValue) != 1) |
296 | return -1; |
297 | c = static_cast<UChar>(charValue); |
298 | i += 4; |
299 | break; |
300 | } |
301 | |
302 | builder.append(c); |
303 | escape = false; |
304 | } else { |
305 | if (c == termChar) |
306 | return i; |
307 | |
308 | if (c == '\\') |
309 | escape = true; |
310 | else |
311 | builder.append(c); |
312 | } |
313 | } |
314 | |
315 | return -1; |
316 | } |
317 | |
318 | static RegExp* parseRegExpLine(VM& vm, char* line, int lineLength, const char** regexpError) |
319 | { |
320 | StringBuilder pattern; |
321 | |
322 | if (line[0] != '/') |
323 | return 0; |
324 | |
325 | int i = scanString(line + 1, lineLength - 1, pattern, '/') + 1; |
326 | |
327 | if ((i >= lineLength) || (line[i] != '/')) |
328 | return 0; |
329 | |
330 | ++i; |
331 | |
332 | auto flags = Yarr::parseFlags(line + i); |
333 | if (!flags) { |
334 | *regexpError = Yarr::errorMessage(Yarr::ErrorCode::InvalidRegularExpressionFlags); |
335 | return nullptr; |
336 | } |
337 | |
338 | RegExp* r = RegExp::create(vm, pattern.toString(), flags.value()); |
339 | if (!r->isValid()) { |
340 | *regexpError = r->errorMessage(); |
341 | return nullptr; |
342 | } |
343 | |
344 | return r; |
345 | } |
346 | |
347 | static RegExpTest* parseTestLine(char* line, int lineLength) |
348 | { |
349 | StringBuilder subjectString; |
350 | |
351 | if ((line[0] != ' ') || (line[1] != '"')) |
352 | return 0; |
353 | |
354 | int i = scanString(line + 2, lineLength - 2, subjectString, '"') + 2; |
355 | |
356 | if ((i >= (lineLength - 2)) || (line[i] != '"') || (line[i+1] != ',') || (line[i+2] != ' ')) |
357 | return 0; |
358 | |
359 | i += 3; |
360 | |
361 | int offset; |
362 | |
363 | if (sscanf(line + i, "%d, " , &offset) != 1) |
364 | return 0; |
365 | |
366 | while (line[i] && line[i] != ' ') |
367 | ++i; |
368 | |
369 | ++i; |
370 | |
371 | int matchResult; |
372 | |
373 | if (sscanf(line + i, "%d, " , &matchResult) != 1) |
374 | return 0; |
375 | |
376 | while (line[i] && line[i] != ' ') |
377 | ++i; |
378 | |
379 | ++i; |
380 | |
381 | if (line[i++] != '(') |
382 | return 0; |
383 | |
384 | int start, end; |
385 | |
386 | RegExpTest* result = new RegExpTest(); |
387 | |
388 | result->subject = subjectString.toString(); |
389 | result->offset = offset; |
390 | result->result = matchResult; |
391 | |
392 | while (line[i] && line[i] != ')') { |
393 | if (sscanf(line + i, "%d, %d" , &start, &end) != 2) { |
394 | delete result; |
395 | return 0; |
396 | } |
397 | |
398 | result->expectVector.append(start); |
399 | result->expectVector.append(end); |
400 | |
401 | while (line[i] && (line[i] != ',') && (line[i] != ')')) |
402 | i++; |
403 | i++; |
404 | while (line[i] && (line[i] != ',') && (line[i] != ')')) |
405 | i++; |
406 | |
407 | if (line[i] == ')') |
408 | break; |
409 | if (!line[i] || (line[i] != ',')) { |
410 | delete result; |
411 | return 0; |
412 | } |
413 | i++; |
414 | } |
415 | |
416 | return result; |
417 | } |
418 | |
419 | static bool runFromFiles(GlobalObject* globalObject, const Vector<String>& files, bool verbose) |
420 | { |
421 | String script; |
422 | String fileName; |
423 | Vector<char> scriptBuffer; |
424 | unsigned tests = 0; |
425 | unsigned failures = 0; |
426 | Vector<char> lineBuffer(MaxLineLength + 1); |
427 | |
428 | VM& vm = globalObject->vm(); |
429 | |
430 | bool success = true; |
431 | for (size_t i = 0; i < files.size(); i++) { |
432 | FILE* testCasesFile = fopen(files[i].utf8().data(), "rb" ); |
433 | |
434 | if (!testCasesFile) { |
435 | printf("Unable to open test data file \"%s\"\n" , files[i].utf8().data()); |
436 | continue; |
437 | } |
438 | |
439 | RegExp* regexp = 0; |
440 | size_t lineLength = 0; |
441 | char* linePtr = 0; |
442 | unsigned int lineNumber = 0; |
443 | const char* regexpError = nullptr; |
444 | |
445 | while ((linePtr = fgets(lineBuffer.data(), MaxLineLength, testCasesFile))) { |
446 | lineLength = strlen(linePtr); |
447 | if (linePtr[lineLength - 1] == '\n') { |
448 | linePtr[lineLength - 1] = '\0'; |
449 | --lineLength; |
450 | } |
451 | ++lineNumber; |
452 | |
453 | if (linePtr[0] == '#') |
454 | continue; |
455 | |
456 | if (linePtr[0] == '/') { |
457 | regexp = parseRegExpLine(vm, linePtr, lineLength, ®expError); |
458 | if (!regexp) { |
459 | failures++; |
460 | fprintf(stderr, "Failure on line %u. '%s' %s\n" , lineNumber, linePtr, regexpError); |
461 | } |
462 | } else if (linePtr[0] == ' ') { |
463 | RegExpTest* regExpTest = parseTestLine(linePtr, lineLength); |
464 | |
465 | if (regexp && regExpTest) { |
466 | ++tests; |
467 | if (!testOneRegExp(vm, regexp, regExpTest, verbose, lineNumber)) { |
468 | failures++; |
469 | printf("Failure on line %u\n" , lineNumber); |
470 | } |
471 | } |
472 | |
473 | if (regExpTest) |
474 | delete regExpTest; |
475 | } else if (linePtr[0] == '-') { |
476 | tests++; |
477 | regexp = 0; // Reset the live regexp to avoid confusing other subsequent tests |
478 | bool successfullyParsed = parseRegExpLine(vm, linePtr + 1, lineLength - 1, ®expError); |
479 | if (successfullyParsed) { |
480 | failures++; |
481 | fprintf(stderr, "Failure on line %u. '%s' %s\n" , lineNumber, linePtr + 1, regexpError); |
482 | } |
483 | } |
484 | } |
485 | |
486 | fclose(testCasesFile); |
487 | } |
488 | |
489 | if (failures) |
490 | printf("%u tests run, %u failures\n" , tests, failures); |
491 | else |
492 | printf("%u tests passed\n" , tests); |
493 | |
494 | #if ENABLE(REGEXP_TRACING) |
495 | vm.dumpRegExpTrace(); |
496 | #endif |
497 | return success; |
498 | } |
499 | |
500 | #define RUNNING_FROM_XCODE 0 |
501 | |
502 | static NO_RETURN void printUsageStatement(bool help = false) |
503 | { |
504 | fprintf(stderr, "Usage: regexp_test [options] file\n" ); |
505 | fprintf(stderr, " -h|--help Prints this help message\n" ); |
506 | fprintf(stderr, " -v|--verbose Verbose output\n" ); |
507 | |
508 | exit(help ? EXIT_SUCCESS : EXIT_FAILURE); |
509 | } |
510 | |
511 | static void parseArguments(int argc, char** argv, CommandLine& options) |
512 | { |
513 | int i = 1; |
514 | for (; i < argc; ++i) { |
515 | const char* arg = argv[i]; |
516 | if (!strcmp(arg, "-h" ) || !strcmp(arg, "--help" )) |
517 | printUsageStatement(true); |
518 | if (!strcmp(arg, "-v" ) || !strcmp(arg, "--verbose" )) |
519 | options.verbose = true; |
520 | else |
521 | options.files.append(argv[i]); |
522 | } |
523 | |
524 | for (; i < argc; ++i) |
525 | options.arguments.append(argv[i]); |
526 | } |
527 | |
528 | int realMain(int argc, char** argv) |
529 | { |
530 | VM* vm = &VM::create(LargeHeap).leakRef(); |
531 | JSLockHolder locker(vm); |
532 | |
533 | CommandLine options; |
534 | parseArguments(argc, argv, options); |
535 | |
536 | GlobalObject* globalObject = GlobalObject::create(*vm, GlobalObject::createStructure(*vm, jsNull()), options.arguments); |
537 | bool success = runFromFiles(globalObject, options.files, options.verbose); |
538 | |
539 | return success ? 0 : 3; |
540 | } |
541 | |
542 | #if OS(WINDOWS) |
543 | extern "C" __declspec(dllexport) int WINAPI dllLauncherEntryPoint(int argc, const char* argv[]) |
544 | { |
545 | return main(argc, const_cast<char**>(argv)); |
546 | } |
547 | #endif |
548 | |