1 | /* |
2 | * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2011, 2012, 2013 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #pragma once |
27 | |
28 | #include <wtf/Forward.h> |
29 | #include <wtf/RetainPtr.h> |
30 | #include <wtf/text/WTFString.h> |
31 | |
32 | #if USE(CF) |
33 | typedef const struct __CFURL* CFURLRef; |
34 | #endif |
35 | |
36 | #if USE(FOUNDATION) |
37 | OBJC_CLASS NSURL; |
38 | #endif |
39 | |
40 | namespace WTF { |
41 | class TextStream; |
42 | |
43 | class URLTextEncoding { |
44 | public: |
45 | virtual Vector<uint8_t> encodeForURLParsing(StringView) const = 0; |
46 | virtual ~URLTextEncoding() { }; |
47 | }; |
48 | |
49 | struct URLHash; |
50 | |
51 | class WTF_EXPORT_PRIVATE URL { |
52 | WTF_MAKE_FAST_ALLOCATED; |
53 | public: |
54 | // Generates a URL which contains a null string. |
55 | URL() { invalidate(); } |
56 | |
57 | explicit URL(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { } |
58 | bool isHashTableDeletedValue() const { return string().isHashTableDeletedValue(); } |
59 | |
60 | // Resolves the relative URL with the given base URL. If provided, the |
61 | // URLTextEncoding is used to encode non-ASCII characers. The base URL can be |
62 | // null or empty, in which case the relative URL will be interpreted as |
63 | // absolute. |
64 | // FIXME: If the base URL is invalid, this always creates an invalid |
65 | // URL. Instead I think it would be better to treat all invalid base URLs |
66 | // the same way we treate null and empty base URLs. |
67 | URL(const URL& base, const String& relative, const URLTextEncoding* = nullptr); |
68 | |
69 | static URL fakeURLWithRelativePart(const String&); |
70 | static URL fileURLWithFileSystemPath(const String&); |
71 | |
72 | String strippedForUseAsReferrer() const; |
73 | |
74 | // FIXME: The above functions should be harmonized so that passing a |
75 | // base of null or the empty string gives the same result as the |
76 | // standard String constructor. |
77 | |
78 | // Makes a deep copy. Helpful only if you need to use a URL on another |
79 | // thread. Since the underlying StringImpl objects are immutable, there's |
80 | // no other reason to ever prefer isolatedCopy() over plain old assignment. |
81 | URL isolatedCopy() const; |
82 | |
83 | bool isNull() const; |
84 | bool isEmpty() const; |
85 | bool isValid() const; |
86 | |
87 | // Returns true if you can set the host and port for the URL. |
88 | // Non-hierarchical URLs don't have a host and port. |
89 | bool canSetHostOrPort() const { return isHierarchical(); } |
90 | |
91 | bool canSetPathname() const { return isHierarchical(); } |
92 | bool isHierarchical() const; |
93 | |
94 | const String& string() const { return m_string; } |
95 | |
96 | String stringCenterEllipsizedToLength(unsigned length = 1024) const; |
97 | |
98 | StringView protocol() const; |
99 | StringView host() const; |
100 | Optional<uint16_t> port() const; |
101 | String hostAndPort() const; |
102 | String protocolHostAndPort() const; |
103 | String user() const; |
104 | String pass() const; |
105 | String path() const; |
106 | String lastPathComponent() const; |
107 | String query() const; |
108 | String fragmentIdentifier() const; |
109 | bool hasFragmentIdentifier() const; |
110 | |
111 | bool hasUsername() const; |
112 | bool hasPassword() const; |
113 | bool hasQuery() const; |
114 | bool hasFragment() const; |
115 | bool hasPath() const; |
116 | |
117 | // Unlike user() and pass(), these functions don't decode escape sequences. |
118 | // This is necessary for accurate round-tripping, because encoding doesn't encode '%' characters. |
119 | String encodedUser() const; |
120 | String encodedPass() const; |
121 | |
122 | String baseAsString() const; |
123 | |
124 | String fileSystemPath() const; |
125 | |
126 | // Returns true if the current URL's protocol is the same as the null- |
127 | // terminated ASCII argument. The argument must be lower-case. |
128 | bool protocolIs(const char*) const; |
129 | bool protocolIs(StringView) const; |
130 | bool protocolIsBlob() const { return protocolIs("blob" ); } |
131 | bool protocolIsData() const { return protocolIs("data" ); } |
132 | bool protocolIsAbout() const; |
133 | bool protocolIsInHTTPFamily() const; |
134 | bool isLocalFile() const; |
135 | bool isBlankURL() const; |
136 | bool cannotBeABaseURL() const { return m_cannotBeABaseURL; } |
137 | |
138 | bool isMatchingDomain(const String&) const; |
139 | |
140 | bool setProtocol(const String&); |
141 | void setHost(const String&); |
142 | |
143 | void removePort(); |
144 | void setPort(unsigned short); |
145 | |
146 | // Input is like "foo.com" or "foo.com:8000". |
147 | void setHostAndPort(const String&); |
148 | |
149 | void setUser(const String&); |
150 | void setPass(const String&); |
151 | |
152 | // If you pass an empty path for HTTP or HTTPS URLs, the resulting path |
153 | // will be "/". |
154 | void setPath(const String&); |
155 | |
156 | // The query may begin with a question mark, or, if not, one will be added |
157 | // for you. Setting the query to the empty string will leave a "?" in the |
158 | // URL (with nothing after it). To clear the query, pass a null string. |
159 | void setQuery(const String&); |
160 | |
161 | void setFragmentIdentifier(StringView); |
162 | void removeFragmentIdentifier(); |
163 | |
164 | void removeQueryAndFragmentIdentifier(); |
165 | |
166 | static bool hostIsIPAddress(StringView); |
167 | |
168 | unsigned pathStart() const; |
169 | unsigned pathEnd() const; |
170 | unsigned pathAfterLastSlash() const; |
171 | |
172 | operator const String&() const { return string(); } |
173 | |
174 | #if USE(CF) |
175 | URL(CFURLRef); |
176 | RetainPtr<CFURLRef> createCFURL() const; |
177 | #endif |
178 | |
179 | #if USE(FOUNDATION) |
180 | URL(NSURL*); |
181 | operator NSURL*() const; |
182 | #endif |
183 | #ifdef __OBJC__ |
184 | operator NSString*() const { return string(); } |
185 | #endif |
186 | |
187 | #ifndef NDEBUG |
188 | void print() const; |
189 | #endif |
190 | |
191 | template <class Encoder> void encode(Encoder&) const; |
192 | template <class Decoder> static bool decode(Decoder&, URL&); |
193 | template <class Decoder> static Optional<URL> decode(Decoder&); |
194 | |
195 | private: |
196 | friend class URLParser; |
197 | void invalidate(); |
198 | static bool protocolIs(const String&, const char*); |
199 | void copyToBuffer(Vector<char, 512>& buffer) const; |
200 | unsigned hostStart() const; |
201 | |
202 | friend WTF_EXPORT_PRIVATE bool equalIgnoringFragmentIdentifier(const URL&, const URL&); |
203 | friend WTF_EXPORT_PRIVATE bool protocolHostAndPortAreEqual(const URL&, const URL&); |
204 | friend WTF_EXPORT_PRIVATE bool hostsAreEqual(const URL&, const URL&); |
205 | |
206 | String m_string; |
207 | |
208 | unsigned m_isValid : 1; |
209 | unsigned m_protocolIsInHTTPFamily : 1; |
210 | unsigned m_cannotBeABaseURL : 1; |
211 | |
212 | // This is out of order to align the bits better. The port is after the host. |
213 | unsigned m_portLength : 3; |
214 | static constexpr unsigned maxPortLength = (1 << 3) - 1; |
215 | |
216 | static constexpr unsigned maxSchemeLength = (1 << 26) - 1; |
217 | unsigned m_schemeEnd : 26; |
218 | unsigned m_userStart; |
219 | unsigned m_userEnd; |
220 | unsigned m_passwordEnd; |
221 | unsigned m_hostEnd; |
222 | unsigned m_pathAfterLastSlash; |
223 | unsigned m_pathEnd; |
224 | unsigned m_queryEnd; |
225 | }; |
226 | |
227 | static_assert(sizeof(URL) == sizeof(String) + 8 * sizeof(unsigned), "URL should stay small" ); |
228 | |
229 | template <class Encoder> |
230 | void URL::encode(Encoder& encoder) const |
231 | { |
232 | encoder << m_string; |
233 | } |
234 | |
235 | template <class Decoder> |
236 | bool URL::decode(Decoder& decoder, URL& url) |
237 | { |
238 | auto optionalURL = URL::decode(decoder); |
239 | if (!optionalURL) |
240 | return false; |
241 | url = WTFMove(*optionalURL); |
242 | return true; |
243 | } |
244 | |
245 | template <class Decoder> |
246 | Optional<URL> URL::decode(Decoder& decoder) |
247 | { |
248 | String string; |
249 | if (!decoder.decode(string)) |
250 | return WTF::nullopt; |
251 | return URL(URL(), string); |
252 | } |
253 | |
254 | WTF_EXPORT_PRIVATE bool equalIgnoringFragmentIdentifier(const URL&, const URL&); |
255 | WTF_EXPORT_PRIVATE bool equalIgnoringQueryAndFragment(const URL&, const URL&); |
256 | WTF_EXPORT_PRIVATE bool protocolHostAndPortAreEqual(const URL&, const URL&); |
257 | WTF_EXPORT_PRIVATE bool hostsAreEqual(const URL&, const URL&); |
258 | |
259 | WTF_EXPORT_PRIVATE const URL& blankURL(); |
260 | |
261 | // Functions to do URL operations on strings. |
262 | // These are operations that aren't faster on a parsed URL. |
263 | // These are also different from the URL functions in that they don't require the string to be a valid and parsable URL. |
264 | // This is especially important because valid javascript URLs are not necessarily considered valid by URL. |
265 | |
266 | WTF_EXPORT_PRIVATE bool protocolIs(const String& url, const char* protocol); |
267 | WTF_EXPORT_PRIVATE bool protocolIsJavaScript(const String& url); |
268 | WTF_EXPORT_PRIVATE bool protocolIsJavaScript(StringView url); |
269 | WTF_EXPORT_PRIVATE bool protocolIsInHTTPFamily(const String& url); |
270 | |
271 | WTF_EXPORT_PRIVATE Optional<uint16_t> defaultPortForProtocol(StringView protocol); |
272 | WTF_EXPORT_PRIVATE bool isDefaultPortForProtocol(uint16_t port, StringView protocol); |
273 | WTF_EXPORT_PRIVATE bool portAllowed(const URL&); // Blacklist ports that should never be used for Web resources. |
274 | |
275 | WTF_EXPORT_PRIVATE void registerDefaultPortForProtocolForTesting(uint16_t port, const String& protocol); |
276 | WTF_EXPORT_PRIVATE void clearDefaultPortForProtocolMapForTesting(); |
277 | |
278 | WTF_EXPORT_PRIVATE bool isValidProtocol(const String&); |
279 | |
280 | WTF_EXPORT_PRIVATE String mimeTypeFromDataURL(const String& url); |
281 | |
282 | // FIXME: This is a wrong concept to expose, different parts of a URL need different escaping per the URL Standard. |
283 | WTF_EXPORT_PRIVATE String encodeWithURLEscapeSequences(const String&); |
284 | |
285 | // Inlines. |
286 | |
287 | inline bool operator==(const URL& a, const URL& b) |
288 | { |
289 | return a.string() == b.string(); |
290 | } |
291 | |
292 | inline bool operator==(const URL& a, const String& b) |
293 | { |
294 | return a.string() == b; |
295 | } |
296 | |
297 | inline bool operator==(const String& a, const URL& b) |
298 | { |
299 | return a == b.string(); |
300 | } |
301 | |
302 | inline bool operator!=(const URL& a, const URL& b) |
303 | { |
304 | return a.string() != b.string(); |
305 | } |
306 | |
307 | inline bool operator!=(const URL& a, const String& b) |
308 | { |
309 | return a.string() != b; |
310 | } |
311 | |
312 | inline bool operator!=(const String& a, const URL& b) |
313 | { |
314 | return a != b.string(); |
315 | } |
316 | |
317 | // Inline versions of some non-GoogleURL functions so we can get inlining |
318 | // without having to have a lot of ugly ifdefs in the class definition. |
319 | |
320 | inline bool URL::isNull() const |
321 | { |
322 | return m_string.isNull(); |
323 | } |
324 | |
325 | inline bool URL::isEmpty() const |
326 | { |
327 | return m_string.isEmpty(); |
328 | } |
329 | |
330 | inline bool URL::isValid() const |
331 | { |
332 | return m_isValid; |
333 | } |
334 | |
335 | inline bool URL::hasPath() const |
336 | { |
337 | return m_pathEnd != m_hostEnd + m_portLength; |
338 | } |
339 | |
340 | inline bool URL::hasUsername() const |
341 | { |
342 | return m_userEnd > m_userStart; |
343 | } |
344 | |
345 | inline bool URL::hasPassword() const |
346 | { |
347 | return m_passwordEnd > (m_userEnd + 1); |
348 | } |
349 | |
350 | inline bool URL::hasQuery() const |
351 | { |
352 | return m_queryEnd > m_pathEnd; |
353 | } |
354 | |
355 | inline bool URL::hasFragment() const |
356 | { |
357 | return m_isValid && m_string.length() > m_queryEnd; |
358 | } |
359 | |
360 | inline bool URL::protocolIsInHTTPFamily() const |
361 | { |
362 | return m_protocolIsInHTTPFamily; |
363 | } |
364 | |
365 | inline unsigned URL::pathStart() const |
366 | { |
367 | return m_hostEnd + m_portLength; |
368 | } |
369 | |
370 | inline unsigned URL::pathEnd() const |
371 | { |
372 | return m_pathEnd; |
373 | } |
374 | |
375 | inline unsigned URL::pathAfterLastSlash() const |
376 | { |
377 | return m_pathAfterLastSlash; |
378 | } |
379 | |
380 | WTF_EXPORT_PRIVATE WTF::TextStream& operator<<(WTF::TextStream&, const URL&); |
381 | |
382 | template<> struct DefaultHash<URL>; |
383 | template<> struct HashTraits<URL>; |
384 | |
385 | } // namespace WTF |
386 | |