1/*
2 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2011, 2012, 2013 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#pragma once
27
28#include <wtf/Forward.h>
29#include <wtf/RetainPtr.h>
30#include <wtf/text/WTFString.h>
31
32#if USE(CF)
33typedef const struct __CFURL* CFURLRef;
34#endif
35
36#if USE(FOUNDATION)
37OBJC_CLASS NSURL;
38#endif
39
40namespace WTF {
41class TextStream;
42
43class URLTextEncoding {
44public:
45 virtual Vector<uint8_t> encodeForURLParsing(StringView) const = 0;
46 virtual ~URLTextEncoding() { };
47};
48
49struct URLHash;
50
51class WTF_EXPORT_PRIVATE URL {
52 WTF_MAKE_FAST_ALLOCATED;
53public:
54 // Generates a URL which contains a null string.
55 URL() { invalidate(); }
56
57 explicit URL(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { }
58 bool isHashTableDeletedValue() const { return string().isHashTableDeletedValue(); }
59
60 // Resolves the relative URL with the given base URL. If provided, the
61 // URLTextEncoding is used to encode non-ASCII characers. The base URL can be
62 // null or empty, in which case the relative URL will be interpreted as
63 // absolute.
64 // FIXME: If the base URL is invalid, this always creates an invalid
65 // URL. Instead I think it would be better to treat all invalid base URLs
66 // the same way we treate null and empty base URLs.
67 URL(const URL& base, const String& relative, const URLTextEncoding* = nullptr);
68
69 static URL fakeURLWithRelativePart(const String&);
70 static URL fileURLWithFileSystemPath(const String&);
71
72 String strippedForUseAsReferrer() const;
73
74 // FIXME: The above functions should be harmonized so that passing a
75 // base of null or the empty string gives the same result as the
76 // standard String constructor.
77
78 // Makes a deep copy. Helpful only if you need to use a URL on another
79 // thread. Since the underlying StringImpl objects are immutable, there's
80 // no other reason to ever prefer isolatedCopy() over plain old assignment.
81 URL isolatedCopy() const;
82
83 bool isNull() const;
84 bool isEmpty() const;
85 bool isValid() const;
86
87 // Returns true if you can set the host and port for the URL.
88 // Non-hierarchical URLs don't have a host and port.
89 bool canSetHostOrPort() const { return isHierarchical(); }
90
91 bool canSetPathname() const { return isHierarchical(); }
92 bool isHierarchical() const;
93
94 const String& string() const { return m_string; }
95
96 String stringCenterEllipsizedToLength(unsigned length = 1024) const;
97
98 StringView protocol() const;
99 StringView host() const;
100 Optional<uint16_t> port() const;
101 String hostAndPort() const;
102 String protocolHostAndPort() const;
103 String user() const;
104 String pass() const;
105 String path() const;
106 String lastPathComponent() const;
107 String query() const;
108 String fragmentIdentifier() const;
109 bool hasFragmentIdentifier() const;
110
111 bool hasUsername() const;
112 bool hasPassword() const;
113 bool hasQuery() const;
114 bool hasFragment() const;
115 bool hasPath() const;
116
117 // Unlike user() and pass(), these functions don't decode escape sequences.
118 // This is necessary for accurate round-tripping, because encoding doesn't encode '%' characters.
119 String encodedUser() const;
120 String encodedPass() const;
121
122 String baseAsString() const;
123
124 String fileSystemPath() const;
125
126 // Returns true if the current URL's protocol is the same as the null-
127 // terminated ASCII argument. The argument must be lower-case.
128 bool protocolIs(const char*) const;
129 bool protocolIs(StringView) const;
130 bool protocolIsBlob() const { return protocolIs("blob"); }
131 bool protocolIsData() const { return protocolIs("data"); }
132 bool protocolIsAbout() const;
133 bool protocolIsInHTTPFamily() const;
134 bool isLocalFile() const;
135 bool isBlankURL() const;
136 bool cannotBeABaseURL() const { return m_cannotBeABaseURL; }
137
138 bool isMatchingDomain(const String&) const;
139
140 bool setProtocol(const String&);
141 void setHost(const String&);
142
143 void removePort();
144 void setPort(unsigned short);
145
146 // Input is like "foo.com" or "foo.com:8000".
147 void setHostAndPort(const String&);
148
149 void setUser(const String&);
150 void setPass(const String&);
151
152 // If you pass an empty path for HTTP or HTTPS URLs, the resulting path
153 // will be "/".
154 void setPath(const String&);
155
156 // The query may begin with a question mark, or, if not, one will be added
157 // for you. Setting the query to the empty string will leave a "?" in the
158 // URL (with nothing after it). To clear the query, pass a null string.
159 void setQuery(const String&);
160
161 void setFragmentIdentifier(StringView);
162 void removeFragmentIdentifier();
163
164 void removeQueryAndFragmentIdentifier();
165
166 static bool hostIsIPAddress(StringView);
167
168 unsigned pathStart() const;
169 unsigned pathEnd() const;
170 unsigned pathAfterLastSlash() const;
171
172 operator const String&() const { return string(); }
173
174#if USE(CF)
175 URL(CFURLRef);
176 RetainPtr<CFURLRef> createCFURL() const;
177#endif
178
179#if USE(FOUNDATION)
180 URL(NSURL*);
181 operator NSURL*() const;
182#endif
183#ifdef __OBJC__
184 operator NSString*() const { return string(); }
185#endif
186
187#ifndef NDEBUG
188 void print() const;
189#endif
190
191 template <class Encoder> void encode(Encoder&) const;
192 template <class Decoder> static bool decode(Decoder&, URL&);
193 template <class Decoder> static Optional<URL> decode(Decoder&);
194
195private:
196 friend class URLParser;
197 void invalidate();
198 static bool protocolIs(const String&, const char*);
199 void copyToBuffer(Vector<char, 512>& buffer) const;
200 unsigned hostStart() const;
201
202 friend WTF_EXPORT_PRIVATE bool equalIgnoringFragmentIdentifier(const URL&, const URL&);
203 friend WTF_EXPORT_PRIVATE bool protocolHostAndPortAreEqual(const URL&, const URL&);
204 friend WTF_EXPORT_PRIVATE bool hostsAreEqual(const URL&, const URL&);
205
206 String m_string;
207
208 unsigned m_isValid : 1;
209 unsigned m_protocolIsInHTTPFamily : 1;
210 unsigned m_cannotBeABaseURL : 1;
211
212 // This is out of order to align the bits better. The port is after the host.
213 unsigned m_portLength : 3;
214 static constexpr unsigned maxPortLength = (1 << 3) - 1;
215
216 static constexpr unsigned maxSchemeLength = (1 << 26) - 1;
217 unsigned m_schemeEnd : 26;
218 unsigned m_userStart;
219 unsigned m_userEnd;
220 unsigned m_passwordEnd;
221 unsigned m_hostEnd;
222 unsigned m_pathAfterLastSlash;
223 unsigned m_pathEnd;
224 unsigned m_queryEnd;
225};
226
227static_assert(sizeof(URL) == sizeof(String) + 8 * sizeof(unsigned), "URL should stay small");
228
229template <class Encoder>
230void URL::encode(Encoder& encoder) const
231{
232 encoder << m_string;
233}
234
235template <class Decoder>
236bool URL::decode(Decoder& decoder, URL& url)
237{
238 auto optionalURL = URL::decode(decoder);
239 if (!optionalURL)
240 return false;
241 url = WTFMove(*optionalURL);
242 return true;
243}
244
245template <class Decoder>
246Optional<URL> URL::decode(Decoder& decoder)
247{
248 String string;
249 if (!decoder.decode(string))
250 return WTF::nullopt;
251 return URL(URL(), string);
252}
253
254WTF_EXPORT_PRIVATE bool equalIgnoringFragmentIdentifier(const URL&, const URL&);
255WTF_EXPORT_PRIVATE bool equalIgnoringQueryAndFragment(const URL&, const URL&);
256WTF_EXPORT_PRIVATE bool protocolHostAndPortAreEqual(const URL&, const URL&);
257WTF_EXPORT_PRIVATE bool hostsAreEqual(const URL&, const URL&);
258
259WTF_EXPORT_PRIVATE const URL& blankURL();
260
261// Functions to do URL operations on strings.
262// These are operations that aren't faster on a parsed URL.
263// These are also different from the URL functions in that they don't require the string to be a valid and parsable URL.
264// This is especially important because valid javascript URLs are not necessarily considered valid by URL.
265
266WTF_EXPORT_PRIVATE bool protocolIs(const String& url, const char* protocol);
267WTF_EXPORT_PRIVATE bool protocolIsJavaScript(const String& url);
268WTF_EXPORT_PRIVATE bool protocolIsJavaScript(StringView url);
269WTF_EXPORT_PRIVATE bool protocolIsInHTTPFamily(const String& url);
270
271WTF_EXPORT_PRIVATE Optional<uint16_t> defaultPortForProtocol(StringView protocol);
272WTF_EXPORT_PRIVATE bool isDefaultPortForProtocol(uint16_t port, StringView protocol);
273WTF_EXPORT_PRIVATE bool portAllowed(const URL&); // Blacklist ports that should never be used for Web resources.
274
275WTF_EXPORT_PRIVATE void registerDefaultPortForProtocolForTesting(uint16_t port, const String& protocol);
276WTF_EXPORT_PRIVATE void clearDefaultPortForProtocolMapForTesting();
277
278WTF_EXPORT_PRIVATE bool isValidProtocol(const String&);
279
280WTF_EXPORT_PRIVATE String mimeTypeFromDataURL(const String& url);
281
282// FIXME: This is a wrong concept to expose, different parts of a URL need different escaping per the URL Standard.
283WTF_EXPORT_PRIVATE String encodeWithURLEscapeSequences(const String&);
284
285// Inlines.
286
287inline bool operator==(const URL& a, const URL& b)
288{
289 return a.string() == b.string();
290}
291
292inline bool operator==(const URL& a, const String& b)
293{
294 return a.string() == b;
295}
296
297inline bool operator==(const String& a, const URL& b)
298{
299 return a == b.string();
300}
301
302inline bool operator!=(const URL& a, const URL& b)
303{
304 return a.string() != b.string();
305}
306
307inline bool operator!=(const URL& a, const String& b)
308{
309 return a.string() != b;
310}
311
312inline bool operator!=(const String& a, const URL& b)
313{
314 return a != b.string();
315}
316
317// Inline versions of some non-GoogleURL functions so we can get inlining
318// without having to have a lot of ugly ifdefs in the class definition.
319
320inline bool URL::isNull() const
321{
322 return m_string.isNull();
323}
324
325inline bool URL::isEmpty() const
326{
327 return m_string.isEmpty();
328}
329
330inline bool URL::isValid() const
331{
332 return m_isValid;
333}
334
335inline bool URL::hasPath() const
336{
337 return m_pathEnd != m_hostEnd + m_portLength;
338}
339
340inline bool URL::hasUsername() const
341{
342 return m_userEnd > m_userStart;
343}
344
345inline bool URL::hasPassword() const
346{
347 return m_passwordEnd > (m_userEnd + 1);
348}
349
350inline bool URL::hasQuery() const
351{
352 return m_queryEnd > m_pathEnd;
353}
354
355inline bool URL::hasFragment() const
356{
357 return m_isValid && m_string.length() > m_queryEnd;
358}
359
360inline bool URL::protocolIsInHTTPFamily() const
361{
362 return m_protocolIsInHTTPFamily;
363}
364
365inline unsigned URL::pathStart() const
366{
367 return m_hostEnd + m_portLength;
368}
369
370inline unsigned URL::pathEnd() const
371{
372 return m_pathEnd;
373}
374
375inline unsigned URL::pathAfterLastSlash() const
376{
377 return m_pathAfterLastSlash;
378}
379
380WTF_EXPORT_PRIVATE WTF::TextStream& operator<<(WTF::TextStream&, const URL&);
381
382template<> struct DefaultHash<URL>;
383template<> struct HashTraits<URL>;
384
385} // namespace WTF
386