1/*
2 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2008 Collabora Ltd.
4 * Copyright (C) 2011 Peter Varga ([email protected]), University of Szeged
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "config.h"
29#include "RegularExpression.h"
30
31#include "Yarr.h"
32#include "YarrFlags.h"
33#include "YarrInterpreter.h"
34#include <wtf/Assertions.h>
35#include <wtf/BumpPointerAllocator.h>
36
37namespace JSC { namespace Yarr {
38
39class RegularExpression::Private : public RefCounted<RegularExpression::Private> {
40public:
41 static Ref<Private> create(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode, UnicodeMode unicodeMode)
42 {
43 return adoptRef(*new Private(pattern, caseSensitivity, multilineMode, unicodeMode));
44 }
45
46private:
47 Private(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode, UnicodeMode unicodeMode)
48 : m_regExpByteCode(compile(pattern, caseSensitivity, multilineMode, unicodeMode))
49 {
50 }
51
52 std::unique_ptr<JSC::Yarr::BytecodePattern> compile(const String& patternString, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode, UnicodeMode unicodeMode)
53 {
54 OptionSet<JSC::Yarr::Flags> flags;
55
56 if (caseSensitivity == TextCaseInsensitive)
57 flags.add(Flags::IgnoreCase);
58
59 if (multilineMode == MultilineEnabled)
60 flags.add(Flags::Multiline);
61
62 if (unicodeMode == UnicodeAwareMode)
63 flags.add(Flags::Unicode);
64
65 JSC::Yarr::YarrPattern pattern(patternString, flags, m_constructionErrorCode);
66 if (JSC::Yarr::hasError(m_constructionErrorCode)) {
67 LOG_ERROR("RegularExpression: YARR compile failed with '%s'", JSC::Yarr::errorMessage(m_constructionErrorCode));
68 return nullptr;
69 }
70
71 m_numSubpatterns = pattern.m_numSubpatterns;
72
73 return JSC::Yarr::byteCompile(pattern, &m_regexAllocator, m_constructionErrorCode);
74 }
75
76 JSC::Yarr::ErrorCode m_constructionErrorCode { Yarr::ErrorCode::NoError };
77 BumpPointerAllocator m_regexAllocator;
78
79public:
80 int lastMatchLength { -1 };
81 unsigned m_numSubpatterns;
82 std::unique_ptr<JSC::Yarr::BytecodePattern> m_regExpByteCode;
83};
84
85RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode, UnicodeMode unicodeMode)
86 : d(Private::create(pattern, caseSensitivity, multilineMode, unicodeMode))
87{
88}
89
90RegularExpression::RegularExpression(const RegularExpression& re)
91 : d(re.d)
92{
93}
94
95RegularExpression::~RegularExpression()
96{
97}
98
99RegularExpression& RegularExpression::operator=(const RegularExpression& re)
100{
101 d = re.d;
102 return *this;
103}
104
105int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
106{
107 if (!d->m_regExpByteCode)
108 return -1;
109
110 if (str.isNull())
111 return -1;
112
113 int offsetVectorSize = (d->m_numSubpatterns + 1) * 2;
114 unsigned* offsetVector;
115 Vector<unsigned, 32> nonReturnedOvector;
116
117 nonReturnedOvector.grow(offsetVectorSize);
118 offsetVector = nonReturnedOvector.data();
119
120 ASSERT(offsetVector);
121 for (unsigned j = 0, i = 0; i < d->m_numSubpatterns + 1; j += 2, i++)
122 offsetVector[j] = JSC::Yarr::offsetNoMatch;
123
124 unsigned result;
125 if (str.length() <= INT_MAX)
126 result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), str, startFrom, offsetVector);
127 else {
128 // This code can't handle unsigned offsets. Limit our processing to strings with offsets that
129 // can be represented as ints.
130 result = JSC::Yarr::offsetNoMatch;
131 }
132
133 if (result == JSC::Yarr::offsetNoMatch) {
134 d->lastMatchLength = -1;
135 return -1;
136 }
137
138 // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector.
139 d->lastMatchLength = offsetVector[1] - offsetVector[0];
140 if (matchLength)
141 *matchLength = d->lastMatchLength;
142 return offsetVector[0];
143}
144
145int RegularExpression::searchRev(const String& str) const
146{
147 // FIXME: This could be faster if it actually searched backwards.
148 // Instead, it just searches forwards, multiple times until it finds the last match.
149
150 int start = 0;
151 int pos;
152 int lastPos = -1;
153 int lastMatchLength = -1;
154 do {
155 int matchLength;
156 pos = match(str, start, &matchLength);
157 if (pos >= 0) {
158 if (pos + matchLength > lastPos + lastMatchLength) {
159 // replace last match if this one is later and not a subset of the last match
160 lastPos = pos;
161 lastMatchLength = matchLength;
162 }
163 start = pos + 1;
164 }
165 } while (pos != -1);
166 d->lastMatchLength = lastMatchLength;
167 return lastPos;
168}
169
170int RegularExpression::matchedLength() const
171{
172 return d->lastMatchLength;
173}
174
175void replace(String& string, const RegularExpression& target, const String& replacement)
176{
177 int index = 0;
178 while (index < static_cast<int>(string.length())) {
179 int matchLength;
180 index = target.match(string, index, &matchLength);
181 if (index < 0)
182 break;
183 string.replace(index, matchLength, replacement);
184 index += replacement.length();
185 if (!matchLength)
186 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
187 }
188}
189
190bool RegularExpression::isValid() const
191{
192 return d->m_regExpByteCode.get();
193}
194
195} } // namespace JSC::Yarr
196