1 | /* |
2 | * Copyright (C) 1999-2000 Harri Porten ([email protected]) |
3 | * Copyright (C) 2007, 2008, 2009, 2016 Apple Inc. All rights reserved. |
4 | * Copyright (C) 2009 Torch Mobile, Inc. |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with this library; if not, write to the Free Software |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | * |
20 | */ |
21 | |
22 | #pragma once |
23 | |
24 | #include "ConcurrentJSLock.h" |
25 | #include "MatchResult.h" |
26 | #include "RegExpKey.h" |
27 | #include "Structure.h" |
28 | #include "Yarr.h" |
29 | #include <wtf/Forward.h> |
30 | #include <wtf/text/WTFString.h> |
31 | |
32 | #if ENABLE(YARR_JIT) |
33 | #include "YarrJIT.h" |
34 | #endif |
35 | |
36 | namespace JSC { |
37 | |
38 | struct RegExpRepresentation; |
39 | class VM; |
40 | |
41 | class RegExp final : public JSCell { |
42 | friend class CachedRegExp; |
43 | |
44 | public: |
45 | typedef JSCell Base; |
46 | static const unsigned StructureFlags = Base::StructureFlags | StructureIsImmortal; |
47 | |
48 | JS_EXPORT_PRIVATE static RegExp* create(VM&, const String& pattern, OptionSet<Yarr::Flags>); |
49 | static const bool needsDestruction = true; |
50 | static void destroy(JSCell*); |
51 | static size_t estimatedSize(JSCell*, VM&); |
52 | JS_EXPORT_PRIVATE static void dumpToStream(const JSCell*, PrintStream&); |
53 | |
54 | bool global() const { return m_flags.contains(Yarr::Flags::Global); } |
55 | bool ignoreCase() const { return m_flags.contains(Yarr::Flags::IgnoreCase); } |
56 | bool multiline() const { return m_flags.contains(Yarr::Flags::Multiline); } |
57 | bool sticky() const { return m_flags.contains(Yarr::Flags::Sticky); } |
58 | bool globalOrSticky() const { return global() || sticky(); } |
59 | bool unicode() const { return m_flags.contains(Yarr::Flags::Unicode); } |
60 | bool dotAll() const { return m_flags.contains(Yarr::Flags::DotAll); } |
61 | |
62 | const String& pattern() const { return m_patternString; } |
63 | |
64 | bool isValid() const { return !Yarr::hasError(m_constructionErrorCode); } |
65 | const char* errorMessage() const { return Yarr::errorMessage(m_constructionErrorCode); } |
66 | JSObject* errorToThrow(ExecState* exec) { return Yarr::errorToThrow(exec, m_constructionErrorCode); } |
67 | void reset() |
68 | { |
69 | m_state = NotCompiled; |
70 | m_constructionErrorCode = Yarr::ErrorCode::NoError; |
71 | } |
72 | |
73 | JS_EXPORT_PRIVATE int match(VM&, const String&, unsigned startOffset, Vector<int>& ovector); |
74 | |
75 | // Returns false if we couldn't run the regular expression for any reason. |
76 | bool matchConcurrently(VM&, const String&, unsigned startOffset, int& position, Vector<int>& ovector); |
77 | |
78 | JS_EXPORT_PRIVATE MatchResult match(VM&, const String&, unsigned startOffset); |
79 | |
80 | bool matchConcurrently(VM&, const String&, unsigned startOffset, MatchResult&); |
81 | |
82 | // Call these versions of the match functions if you're desperate for performance. |
83 | template<typename VectorType> |
84 | int matchInline(VM&, const String&, unsigned startOffset, VectorType& ovector); |
85 | MatchResult matchInline(VM&, const String&, unsigned startOffset); |
86 | |
87 | unsigned numSubpatterns() const { return m_numSubpatterns; } |
88 | |
89 | bool hasNamedCaptures() |
90 | { |
91 | return m_rareData && !m_rareData->m_captureGroupNames.isEmpty(); |
92 | } |
93 | |
94 | String getCaptureGroupName(unsigned i) |
95 | { |
96 | if (!i || !m_rareData || m_rareData->m_captureGroupNames.size() <= i) |
97 | return String(); |
98 | ASSERT(m_rareData); |
99 | return m_rareData->m_captureGroupNames[i]; |
100 | } |
101 | |
102 | unsigned subpatternForName(String groupName) |
103 | { |
104 | if (!m_rareData) |
105 | return 0; |
106 | auto it = m_rareData->m_namedGroupToParenIndex.find(groupName); |
107 | if (it == m_rareData->m_namedGroupToParenIndex.end()) |
108 | return 0; |
109 | return it->value; |
110 | } |
111 | |
112 | bool hasCode() |
113 | { |
114 | return m_state == JITCode || m_state == ByteCode; |
115 | } |
116 | |
117 | bool hasCodeFor(Yarr::YarrCharSize); |
118 | bool hasMatchOnlyCodeFor(Yarr::YarrCharSize); |
119 | |
120 | void deleteCode(); |
121 | |
122 | #if ENABLE(REGEXP_TRACING) |
123 | void printTraceData(); |
124 | #endif |
125 | |
126 | static Structure* createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype) |
127 | { |
128 | return Structure::create(vm, globalObject, prototype, TypeInfo(CellType, StructureFlags), info()); |
129 | } |
130 | |
131 | DECLARE_INFO; |
132 | |
133 | RegExpKey key() { return RegExpKey(m_flags, m_patternString); } |
134 | |
135 | protected: |
136 | void finishCreation(VM&); |
137 | |
138 | private: |
139 | friend class RegExpCache; |
140 | RegExp(VM&, const String&, OptionSet<Yarr::Flags>); |
141 | |
142 | static RegExp* createWithoutCaching(VM&, const String&, OptionSet<Yarr::Flags>); |
143 | |
144 | enum RegExpState : uint8_t { |
145 | ParseError, |
146 | JITCode, |
147 | ByteCode, |
148 | NotCompiled |
149 | }; |
150 | |
151 | void byteCodeCompileIfNecessary(VM*); |
152 | |
153 | void compile(VM*, Yarr::YarrCharSize); |
154 | void compileIfNecessary(VM&, Yarr::YarrCharSize); |
155 | |
156 | void compileMatchOnly(VM*, Yarr::YarrCharSize); |
157 | void compileIfNecessaryMatchOnly(VM&, Yarr::YarrCharSize); |
158 | |
159 | #if ENABLE(YARR_JIT_DEBUG) |
160 | void matchCompareWithInterpreter(const String&, int startOffset, int* offsetVector, int jitResult); |
161 | #endif |
162 | |
163 | #if ENABLE(YARR_JIT) |
164 | Yarr::YarrCodeBlock& ensureRegExpJITCode() |
165 | { |
166 | if (!m_regExpJITCode) |
167 | m_regExpJITCode = std::make_unique<Yarr::YarrCodeBlock>(); |
168 | return *m_regExpJITCode.get(); |
169 | } |
170 | #endif |
171 | |
172 | struct RareData { |
173 | WTF_MAKE_STRUCT_FAST_ALLOCATED; |
174 | Vector<String> m_captureGroupNames; |
175 | HashMap<String, unsigned> m_namedGroupToParenIndex; |
176 | }; |
177 | |
178 | String m_patternString; |
179 | RegExpState m_state { NotCompiled }; |
180 | OptionSet<Yarr::Flags> m_flags; |
181 | Yarr::ErrorCode m_constructionErrorCode { Yarr::ErrorCode::NoError }; |
182 | unsigned m_numSubpatterns { 0 }; |
183 | std::unique_ptr<Yarr::BytecodePattern> m_regExpBytecode; |
184 | #if ENABLE(YARR_JIT) |
185 | std::unique_ptr<Yarr::YarrCodeBlock> m_regExpJITCode; |
186 | #endif |
187 | std::unique_ptr<RareData> m_rareData; |
188 | #if ENABLE(REGEXP_TRACING) |
189 | double m_rtMatchOnlyTotalSubjectStringLen { 0.0 }; |
190 | double m_rtMatchTotalSubjectStringLen { 0.0 }; |
191 | unsigned m_rtMatchOnlyCallCount { 0 }; |
192 | unsigned m_rtMatchOnlyFoundCount { 0 }; |
193 | unsigned m_rtMatchCallCount { 0 }; |
194 | unsigned m_rtMatchFoundCount { 0 }; |
195 | #endif |
196 | }; |
197 | |
198 | } // namespace JSC |
199 | |