1/*
2 * Copyright (C) 2011, 2015 Apple Inc. All rights reserved.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
13 *
14 * You should have received a copy of the GNU Library General Public License
15 * along with this library; see the file COPYING.LIB. If not, write to
16 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 *
19 */
20
21#include "config.h"
22#include "RegExp.h"
23
24#include "InitializeThreading.h"
25#include "JSCInlines.h"
26#include "JSGlobalObject.h"
27#include "YarrFlags.h"
28#include <errno.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32#include <wtf/Vector.h>
33#include <wtf/text/StringBuilder.h>
34
35#if !OS(WINDOWS)
36#include <unistd.h>
37#endif
38
39#if HAVE(SYS_TIME_H)
40#include <sys/time.h>
41#endif
42
43#if COMPILER(MSVC)
44#include <crtdbg.h>
45#include <mmsystem.h>
46#include <windows.h>
47#endif
48
49const int MaxLineLength = 100 * 1024;
50
51using namespace JSC;
52
53struct CommandLine {
54 CommandLine()
55 : interactive(false)
56 , verbose(false)
57 {
58 }
59
60 bool interactive;
61 bool verbose;
62 Vector<String> arguments;
63 Vector<String> files;
64};
65
66class StopWatch {
67public:
68 void start();
69 void stop();
70 long getElapsedMS(); // call stop() first
71
72private:
73 MonotonicTime m_startTime;
74 MonotonicTime m_stopTime;
75};
76
77void StopWatch::start()
78{
79 m_startTime = MonotonicTime::now();
80}
81
82void StopWatch::stop()
83{
84 m_stopTime = MonotonicTime::now();
85}
86
87long StopWatch::getElapsedMS()
88{
89 return (m_stopTime - m_startTime).millisecondsAs<long>();
90}
91
92struct RegExpTest {
93 RegExpTest()
94 : offset(0)
95 , result(0)
96 {
97 }
98
99 String subject;
100 int offset;
101 int result;
102 Vector<int, 32> expectVector;
103};
104
105class GlobalObject : public JSGlobalObject {
106private:
107 GlobalObject(VM&, Structure*, const Vector<String>& arguments);
108
109public:
110 typedef JSGlobalObject Base;
111
112 static GlobalObject* create(VM& vm, Structure* structure, const Vector<String>& arguments)
113 {
114 GlobalObject* globalObject = new (NotNull, allocateCell<GlobalObject>(vm.heap)) GlobalObject(vm, structure, arguments);
115 return globalObject;
116 }
117
118 DECLARE_INFO;
119
120 static const bool needsDestructor = false;
121
122 static Structure* createStructure(VM& vm, JSValue prototype)
123 {
124 return Structure::create(vm, 0, prototype, TypeInfo(GlobalObjectType, StructureFlags), info());
125 }
126
127protected:
128 void finishCreation(VM& vm, const Vector<String>& arguments)
129 {
130 Base::finishCreation(vm);
131 UNUSED_PARAM(arguments);
132 }
133};
134
135const ClassInfo GlobalObject::s_info = { "global", &JSGlobalObject::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(GlobalObject) };
136
137GlobalObject::GlobalObject(VM& vm, Structure* structure, const Vector<String>& arguments)
138 : JSGlobalObject(vm, structure)
139{
140 finishCreation(vm, arguments);
141}
142
143// Use SEH for Release builds only to get rid of the crash report dialog
144// (luckily the same tests fail in Release and Debug builds so far). Need to
145// be in a separate main function because the realMain function requires object
146// unwinding.
147
148#if COMPILER(MSVC) && !defined(_DEBUG)
149#define TRY __try {
150#define EXCEPT(x) } __except (EXCEPTION_EXECUTE_HANDLER) { x; }
151#else
152#define TRY
153#define EXCEPT(x)
154#endif
155
156int realMain(int argc, char** argv);
157
158int main(int argc, char** argv)
159{
160#if OS(WINDOWS)
161 // Cygwin calls ::SetErrorMode(SEM_FAILCRITICALERRORS), which we will inherit. This is bad for
162 // testing/debugging, as it causes the post-mortem debugger not to be invoked. We reset the
163 // error mode here to work around Cygwin's behavior. See <http://webkit.org/b/55222>.
164 ::SetErrorMode(0);
165
166#if defined(_DEBUG)
167 _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
168 _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE);
169 _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR);
170 _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE);
171 _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
172 _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE);
173#endif
174
175 timeBeginPeriod(1);
176#endif
177
178 // Initialize JSC before getting VM.
179 JSC::initializeThreading();
180
181 // We can't use destructors in the following code because it uses Windows
182 // Structured Exception Handling
183 int res = 0;
184 TRY
185 res = realMain(argc, argv);
186 EXCEPT(res = 3)
187 return res;
188}
189
190static bool testOneRegExp(VM& vm, RegExp* regexp, RegExpTest* regExpTest, bool verbose, unsigned int lineNumber)
191{
192 bool result = true;
193 Vector<int> outVector;
194 outVector.resize(regExpTest->expectVector.size());
195 int matchResult = regexp->match(vm, regExpTest->subject, regExpTest->offset, outVector);
196
197 if (matchResult != regExpTest->result) {
198 result = false;
199 if (verbose)
200 printf("Line %d: results mismatch - expected %d got %d\n", lineNumber, regExpTest->result, matchResult);
201 } else if (matchResult != -1) {
202 if (outVector.size() != regExpTest->expectVector.size()) {
203 result = false;
204 if (verbose) {
205#if OS(WINDOWS)
206 printf("Line %d: output vector size mismatch - expected %Iu got %Iu\n", lineNumber, regExpTest->expectVector.size(), outVector.size());
207#else
208 printf("Line %d: output vector size mismatch - expected %zu got %zu\n", lineNumber, regExpTest->expectVector.size(), outVector.size());
209#endif
210 }
211 } else if (outVector.size() % 2) {
212 result = false;
213 if (verbose) {
214#if OS(WINDOWS)
215 printf("Line %d: output vector size is odd (%Iu), should be even\n", lineNumber, outVector.size());
216#else
217 printf("Line %d: output vector size is odd (%zu), should be even\n", lineNumber, outVector.size());
218#endif
219 }
220 } else {
221 // Check in pairs since the first value of the pair could be -1 in which case the second doesn't matter.
222 size_t pairCount = outVector.size() / 2;
223 for (size_t i = 0; i < pairCount; ++i) {
224 size_t startIndex = i*2;
225 if (outVector[startIndex] != regExpTest->expectVector[startIndex]) {
226 result = false;
227 if (verbose) {
228#if OS(WINDOWS)
229 printf("Line %d: output vector mismatch at index %Iu - expected %d got %d\n", lineNumber, startIndex, regExpTest->expectVector[startIndex], outVector[startIndex]);
230#else
231 printf("Line %d: output vector mismatch at index %zu - expected %d got %d\n", lineNumber, startIndex, regExpTest->expectVector[startIndex], outVector[startIndex]);
232#endif
233 }
234 }
235 if ((i > 0) && (regExpTest->expectVector[startIndex] != -1) && (outVector[startIndex+1] != regExpTest->expectVector[startIndex+1])) {
236 result = false;
237 if (verbose) {
238#if OS(WINDOWS)
239 printf("Line %d: output vector mismatch at index %Iu - expected %d got %d\n", lineNumber, startIndex + 1, regExpTest->expectVector[startIndex + 1], outVector[startIndex + 1]);
240#else
241 printf("Line %d: output vector mismatch at index %zu - expected %d got %d\n", lineNumber, startIndex + 1, regExpTest->expectVector[startIndex + 1], outVector[startIndex + 1]);
242#endif
243 }
244 }
245 }
246 }
247 }
248
249 return result;
250}
251
252static int scanString(char* buffer, int bufferLength, StringBuilder& builder, char termChar)
253{
254 bool escape = false;
255
256 for (int i = 0; i < bufferLength; ++i) {
257 UChar c = buffer[i];
258
259 if (escape) {
260 switch (c) {
261 case '0':
262 c = '\0';
263 break;
264 case 'a':
265 c = '\a';
266 break;
267 case 'b':
268 c = '\b';
269 break;
270 case 'f':
271 c = '\f';
272 break;
273 case 'n':
274 c = '\n';
275 break;
276 case 'r':
277 c = '\r';
278 break;
279 case 't':
280 c = '\t';
281 break;
282 case 'v':
283 c = '\v';
284 break;
285 case '\\':
286 c = '\\';
287 break;
288 case '?':
289 c = '\?';
290 break;
291 case 'u':
292 if ((i + 4) >= bufferLength)
293 return -1;
294 unsigned int charValue;
295 if (sscanf(buffer+i+1, "%04x", &charValue) != 1)
296 return -1;
297 c = static_cast<UChar>(charValue);
298 i += 4;
299 break;
300 }
301
302 builder.append(c);
303 escape = false;
304 } else {
305 if (c == termChar)
306 return i;
307
308 if (c == '\\')
309 escape = true;
310 else
311 builder.append(c);
312 }
313 }
314
315 return -1;
316}
317
318static RegExp* parseRegExpLine(VM& vm, char* line, int lineLength, const char** regexpError)
319{
320 StringBuilder pattern;
321
322 if (line[0] != '/')
323 return 0;
324
325 int i = scanString(line + 1, lineLength - 1, pattern, '/') + 1;
326
327 if ((i >= lineLength) || (line[i] != '/'))
328 return 0;
329
330 ++i;
331
332 auto flags = Yarr::parseFlags(line + i);
333 if (!flags) {
334 *regexpError = Yarr::errorMessage(Yarr::ErrorCode::InvalidRegularExpressionFlags);
335 return nullptr;
336 }
337
338 RegExp* r = RegExp::create(vm, pattern.toString(), flags.value());
339 if (!r->isValid()) {
340 *regexpError = r->errorMessage();
341 return nullptr;
342 }
343
344 return r;
345}
346
347static RegExpTest* parseTestLine(char* line, int lineLength)
348{
349 StringBuilder subjectString;
350
351 if ((line[0] != ' ') || (line[1] != '"'))
352 return 0;
353
354 int i = scanString(line + 2, lineLength - 2, subjectString, '"') + 2;
355
356 if ((i >= (lineLength - 2)) || (line[i] != '"') || (line[i+1] != ',') || (line[i+2] != ' '))
357 return 0;
358
359 i += 3;
360
361 int offset;
362
363 if (sscanf(line + i, "%d, ", &offset) != 1)
364 return 0;
365
366 while (line[i] && line[i] != ' ')
367 ++i;
368
369 ++i;
370
371 int matchResult;
372
373 if (sscanf(line + i, "%d, ", &matchResult) != 1)
374 return 0;
375
376 while (line[i] && line[i] != ' ')
377 ++i;
378
379 ++i;
380
381 if (line[i++] != '(')
382 return 0;
383
384 int start, end;
385
386 RegExpTest* result = new RegExpTest();
387
388 result->subject = subjectString.toString();
389 result->offset = offset;
390 result->result = matchResult;
391
392 while (line[i] && line[i] != ')') {
393 if (sscanf(line + i, "%d, %d", &start, &end) != 2) {
394 delete result;
395 return 0;
396 }
397
398 result->expectVector.append(start);
399 result->expectVector.append(end);
400
401 while (line[i] && (line[i] != ',') && (line[i] != ')'))
402 i++;
403 i++;
404 while (line[i] && (line[i] != ',') && (line[i] != ')'))
405 i++;
406
407 if (line[i] == ')')
408 break;
409 if (!line[i] || (line[i] != ',')) {
410 delete result;
411 return 0;
412 }
413 i++;
414 }
415
416 return result;
417}
418
419static bool runFromFiles(GlobalObject* globalObject, const Vector<String>& files, bool verbose)
420{
421 String script;
422 String fileName;
423 Vector<char> scriptBuffer;
424 unsigned tests = 0;
425 unsigned failures = 0;
426 Vector<char> lineBuffer(MaxLineLength + 1);
427
428 VM& vm = globalObject->vm();
429
430 bool success = true;
431 for (size_t i = 0; i < files.size(); i++) {
432 FILE* testCasesFile = fopen(files[i].utf8().data(), "rb");
433
434 if (!testCasesFile) {
435 printf("Unable to open test data file \"%s\"\n", files[i].utf8().data());
436 continue;
437 }
438
439 RegExp* regexp = 0;
440 size_t lineLength = 0;
441 char* linePtr = 0;
442 unsigned int lineNumber = 0;
443 const char* regexpError = nullptr;
444
445 while ((linePtr = fgets(lineBuffer.data(), MaxLineLength, testCasesFile))) {
446 lineLength = strlen(linePtr);
447 if (linePtr[lineLength - 1] == '\n') {
448 linePtr[lineLength - 1] = '\0';
449 --lineLength;
450 }
451 ++lineNumber;
452
453 if (linePtr[0] == '#')
454 continue;
455
456 if (linePtr[0] == '/') {
457 regexp = parseRegExpLine(vm, linePtr, lineLength, &regexpError);
458 if (!regexp) {
459 failures++;
460 fprintf(stderr, "Failure on line %u. '%s' %s\n", lineNumber, linePtr, regexpError);
461 }
462 } else if (linePtr[0] == ' ') {
463 RegExpTest* regExpTest = parseTestLine(linePtr, lineLength);
464
465 if (regexp && regExpTest) {
466 ++tests;
467 if (!testOneRegExp(vm, regexp, regExpTest, verbose, lineNumber)) {
468 failures++;
469 printf("Failure on line %u\n", lineNumber);
470 }
471 }
472
473 if (regExpTest)
474 delete regExpTest;
475 } else if (linePtr[0] == '-') {
476 tests++;
477 regexp = 0; // Reset the live regexp to avoid confusing other subsequent tests
478 bool successfullyParsed = parseRegExpLine(vm, linePtr + 1, lineLength - 1, &regexpError);
479 if (successfullyParsed) {
480 failures++;
481 fprintf(stderr, "Failure on line %u. '%s' %s\n", lineNumber, linePtr + 1, regexpError);
482 }
483 }
484 }
485
486 fclose(testCasesFile);
487 }
488
489 if (failures)
490 printf("%u tests run, %u failures\n", tests, failures);
491 else
492 printf("%u tests passed\n", tests);
493
494#if ENABLE(REGEXP_TRACING)
495 vm.dumpRegExpTrace();
496#endif
497 return success;
498}
499
500#define RUNNING_FROM_XCODE 0
501
502static NO_RETURN void printUsageStatement(bool help = false)
503{
504 fprintf(stderr, "Usage: regexp_test [options] file\n");
505 fprintf(stderr, " -h|--help Prints this help message\n");
506 fprintf(stderr, " -v|--verbose Verbose output\n");
507
508 exit(help ? EXIT_SUCCESS : EXIT_FAILURE);
509}
510
511static void parseArguments(int argc, char** argv, CommandLine& options)
512{
513 int i = 1;
514 for (; i < argc; ++i) {
515 const char* arg = argv[i];
516 if (!strcmp(arg, "-h") || !strcmp(arg, "--help"))
517 printUsageStatement(true);
518 if (!strcmp(arg, "-v") || !strcmp(arg, "--verbose"))
519 options.verbose = true;
520 else
521 options.files.append(argv[i]);
522 }
523
524 for (; i < argc; ++i)
525 options.arguments.append(argv[i]);
526}
527
528int realMain(int argc, char** argv)
529{
530 VM* vm = &VM::create(LargeHeap).leakRef();
531 JSLockHolder locker(vm);
532
533 CommandLine options;
534 parseArguments(argc, argv, options);
535
536 GlobalObject* globalObject = GlobalObject::create(*vm, GlobalObject::createStructure(*vm, jsNull()), options.arguments);
537 bool success = runFromFiles(globalObject, options.files, options.verbose);
538
539 return success ? 0 : 3;
540}
541
542#if OS(WINDOWS)
543extern "C" __declspec(dllexport) int WINAPI dllLauncherEntryPoint(int argc, const char* argv[])
544{
545 return main(argc, const_cast<char**>(argv));
546}
547#endif
548