1 | /* |
2 | * Copyright (C) 2004-2019 Apple Inc. All rights reserved. |
3 | * Copyright (C) 2012 Research In Motion Limited. All rights reserved. |
4 | * |
5 | * Redistribution and use in source and binary forms, with or without |
6 | * modification, are permitted provided that the following conditions |
7 | * are met: |
8 | * 1. Redistributions of source code must retain the above copyright |
9 | * notice, this list of conditions and the following disclaimer. |
10 | * 2. Redistributions in binary form must reproduce the above copyright |
11 | * notice, this list of conditions and the following disclaimer in the |
12 | * documentation and/or other materials provided with the distribution. |
13 | * |
14 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
15 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
17 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
18 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | */ |
26 | |
27 | #include "config.h" |
28 | #include <wtf/URL.h> |
29 | |
30 | #include "URLParser.h" |
31 | #include <stdio.h> |
32 | #include <unicode/uidna.h> |
33 | #include <wtf/HashMap.h> |
34 | #include <wtf/NeverDestroyed.h> |
35 | #include <wtf/StdLibExtras.h> |
36 | #include <wtf/UUID.h> |
37 | #include <wtf/text/CString.h> |
38 | #include <wtf/text/StringBuilder.h> |
39 | #include <wtf/text/StringConcatenateNumbers.h> |
40 | #include <wtf/text/StringHash.h> |
41 | #include <wtf/text/TextStream.h> |
42 | |
43 | namespace WTF { |
44 | |
45 | typedef Vector<char, 512> CharBuffer; |
46 | typedef Vector<UChar, 512> UCharBuffer; |
47 | |
48 | static const unsigned invalidPortNumber = 0xFFFF; |
49 | |
50 | // Copies the source to the destination, assuming all the source characters are |
51 | // ASCII. The destination buffer must be large enough. Null characters are allowed |
52 | // in the source string, and no attempt is made to null-terminate the result. |
53 | static void copyASCII(const String& string, char* dest) |
54 | { |
55 | if (string.isEmpty()) |
56 | return; |
57 | |
58 | if (string.is8Bit()) |
59 | memcpy(dest, string.characters8(), string.length()); |
60 | else { |
61 | const UChar* src = string.characters16(); |
62 | size_t length = string.length(); |
63 | for (size_t i = 0; i < length; i++) |
64 | dest[i] = static_cast<char>(src[i]); |
65 | } |
66 | } |
67 | |
68 | void URL::invalidate() |
69 | { |
70 | m_isValid = false; |
71 | m_protocolIsInHTTPFamily = false; |
72 | m_cannotBeABaseURL = false; |
73 | m_schemeEnd = 0; |
74 | m_userStart = 0; |
75 | m_userEnd = 0; |
76 | m_passwordEnd = 0; |
77 | m_hostEnd = 0; |
78 | m_portLength = 0; |
79 | m_pathEnd = 0; |
80 | m_pathAfterLastSlash = 0; |
81 | m_queryEnd = 0; |
82 | } |
83 | |
84 | URL::URL(const URL& base, const String& relative, const URLTextEncoding* encoding) |
85 | { |
86 | URLParser parser(relative, base, encoding); |
87 | *this = parser.result(); |
88 | } |
89 | |
90 | static bool shouldTrimFromURL(UChar c) |
91 | { |
92 | // Browsers ignore leading/trailing whitespace and control |
93 | // characters from URLs. Note that c is an *unsigned* char here |
94 | // so this comparison should only catch control characters. |
95 | return c <= ' '; |
96 | } |
97 | |
98 | URL URL::isolatedCopy() const |
99 | { |
100 | URL result = *this; |
101 | result.m_string = result.m_string.isolatedCopy(); |
102 | return result; |
103 | } |
104 | |
105 | String URL::lastPathComponent() const |
106 | { |
107 | if (!hasPath()) |
108 | return String(); |
109 | |
110 | unsigned end = m_pathEnd - 1; |
111 | if (m_string[end] == '/') |
112 | --end; |
113 | |
114 | size_t start = m_string.reverseFind('/', end); |
115 | if (start < static_cast<unsigned>(m_hostEnd + m_portLength)) |
116 | return String(); |
117 | ++start; |
118 | |
119 | return m_string.substring(start, end - start + 1); |
120 | } |
121 | |
122 | StringView URL::protocol() const |
123 | { |
124 | return StringView(m_string).substring(0, m_schemeEnd); |
125 | } |
126 | |
127 | StringView URL::host() const |
128 | { |
129 | unsigned start = hostStart(); |
130 | return StringView(m_string).substring(start, m_hostEnd - start); |
131 | } |
132 | |
133 | Optional<uint16_t> URL::port() const |
134 | { |
135 | if (!m_portLength) |
136 | return WTF::nullopt; |
137 | |
138 | bool ok = false; |
139 | unsigned number; |
140 | if (m_string.is8Bit()) |
141 | number = charactersToUIntStrict(m_string.characters8() + m_hostEnd + 1, m_portLength - 1, &ok); |
142 | else |
143 | number = charactersToUIntStrict(m_string.characters16() + m_hostEnd + 1, m_portLength - 1, &ok); |
144 | if (!ok || number > std::numeric_limits<uint16_t>::max()) |
145 | return WTF::nullopt; |
146 | return number; |
147 | } |
148 | |
149 | String URL::hostAndPort() const |
150 | { |
151 | if (auto port = this->port()) |
152 | return makeString(host(), ':', static_cast<unsigned>(port.value())); |
153 | return host().toString(); |
154 | } |
155 | |
156 | String URL::protocolHostAndPort() const |
157 | { |
158 | String result = m_string.substring(0, m_hostEnd + m_portLength); |
159 | |
160 | if (m_passwordEnd - m_userStart > 0) { |
161 | const int allowForTrailingAtSign = 1; |
162 | result.remove(m_userStart, m_passwordEnd - m_userStart + allowForTrailingAtSign); |
163 | } |
164 | |
165 | return result; |
166 | } |
167 | |
168 | static String decodeEscapeSequencesFromParsedURL(StringView input) |
169 | { |
170 | auto inputLength = input.length(); |
171 | if (!inputLength) |
172 | return emptyString(); |
173 | Vector<LChar> percentDecoded; |
174 | percentDecoded.reserveInitialCapacity(inputLength); |
175 | for (unsigned i = 0; i < inputLength; ++i) { |
176 | if (input[i] == '%' |
177 | && inputLength > 2 |
178 | && i < inputLength - 2 |
179 | && isASCIIHexDigit(input[i + 1]) |
180 | && isASCIIHexDigit(input[i + 2])) { |
181 | percentDecoded.uncheckedAppend(toASCIIHexValue(input[i + 1], input[i + 2])); |
182 | i += 2; |
183 | } else |
184 | percentDecoded.uncheckedAppend(input[i]); |
185 | } |
186 | return String::fromUTF8(percentDecoded.data(), percentDecoded.size()); |
187 | } |
188 | |
189 | String URL::user() const |
190 | { |
191 | return decodeEscapeSequencesFromParsedURL(StringView(m_string).substring(m_userStart, m_userEnd - m_userStart)); |
192 | } |
193 | |
194 | String URL::pass() const |
195 | { |
196 | if (m_passwordEnd == m_userEnd) |
197 | return String(); |
198 | |
199 | return decodeEscapeSequencesFromParsedURL(StringView(m_string).substring(m_userEnd + 1, m_passwordEnd - m_userEnd - 1)); |
200 | } |
201 | |
202 | String URL::encodedUser() const |
203 | { |
204 | return m_string.substring(m_userStart, m_userEnd - m_userStart); |
205 | } |
206 | |
207 | String URL::encodedPass() const |
208 | { |
209 | if (m_passwordEnd == m_userEnd) |
210 | return String(); |
211 | |
212 | return m_string.substring(m_userEnd + 1, m_passwordEnd - m_userEnd - 1); |
213 | } |
214 | |
215 | String URL::fragmentIdentifier() const |
216 | { |
217 | if (!hasFragmentIdentifier()) |
218 | return String(); |
219 | |
220 | return m_string.substring(m_queryEnd + 1); |
221 | } |
222 | |
223 | bool URL::hasFragmentIdentifier() const |
224 | { |
225 | return m_isValid && m_string.length() != m_queryEnd; |
226 | } |
227 | |
228 | String URL::baseAsString() const |
229 | { |
230 | return m_string.left(m_pathAfterLastSlash); |
231 | } |
232 | |
233 | #if !USE(CF) |
234 | |
235 | String URL::fileSystemPath() const |
236 | { |
237 | if (!isValid() || !isLocalFile()) |
238 | return String(); |
239 | |
240 | return decodeEscapeSequencesFromParsedURL(StringView(path())); |
241 | } |
242 | |
243 | #endif |
244 | |
245 | #ifdef NDEBUG |
246 | |
247 | static inline void assertProtocolIsGood(StringView) |
248 | { |
249 | } |
250 | |
251 | #else |
252 | |
253 | static void assertProtocolIsGood(StringView protocol) |
254 | { |
255 | // FIXME: We probably don't need this function any more. |
256 | // The isASCIIAlphaCaselessEqual function asserts that passed-in characters |
257 | // are ones it can handle; the older code did not and relied on these checks. |
258 | for (auto character : protocol.codeUnits()) { |
259 | ASSERT(isASCII(character)); |
260 | ASSERT(character > ' '); |
261 | ASSERT(!isASCIIUpper(character)); |
262 | ASSERT(toASCIILowerUnchecked(character) == character); |
263 | } |
264 | } |
265 | |
266 | #endif |
267 | |
268 | static Lock defaultPortForProtocolMapForTestingLock; |
269 | |
270 | using DefaultPortForProtocolMapForTesting = HashMap<String, uint16_t>; |
271 | static DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMapForTesting() |
272 | { |
273 | static DefaultPortForProtocolMapForTesting* defaultPortForProtocolMap; |
274 | return defaultPortForProtocolMap; |
275 | } |
276 | |
277 | static DefaultPortForProtocolMapForTesting& ensureDefaultPortForProtocolMapForTesting() |
278 | { |
279 | DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMap = defaultPortForProtocolMapForTesting(); |
280 | if (!defaultPortForProtocolMap) |
281 | defaultPortForProtocolMap = new DefaultPortForProtocolMapForTesting; |
282 | return *defaultPortForProtocolMap; |
283 | } |
284 | |
285 | void registerDefaultPortForProtocolForTesting(uint16_t port, const String& protocol) |
286 | { |
287 | auto locker = holdLock(defaultPortForProtocolMapForTestingLock); |
288 | ensureDefaultPortForProtocolMapForTesting().add(protocol, port); |
289 | } |
290 | |
291 | void clearDefaultPortForProtocolMapForTesting() |
292 | { |
293 | auto locker = holdLock(defaultPortForProtocolMapForTestingLock); |
294 | if (auto* map = defaultPortForProtocolMapForTesting()) |
295 | map->clear(); |
296 | } |
297 | |
298 | Optional<uint16_t> defaultPortForProtocol(StringView protocol) |
299 | { |
300 | if (auto* overrideMap = defaultPortForProtocolMapForTesting()) { |
301 | auto locker = holdLock(defaultPortForProtocolMapForTestingLock); |
302 | ASSERT(overrideMap); // No need to null check again here since overrideMap cannot become null after being non-null. |
303 | auto iterator = overrideMap->find(protocol.toStringWithoutCopying()); |
304 | if (iterator != overrideMap->end()) |
305 | return iterator->value; |
306 | } |
307 | return URLParser::defaultPortForProtocol(protocol); |
308 | } |
309 | |
310 | bool isDefaultPortForProtocol(uint16_t port, StringView protocol) |
311 | { |
312 | return defaultPortForProtocol(protocol) == port; |
313 | } |
314 | |
315 | bool URL::protocolIs(const char* protocol) const |
316 | { |
317 | assertProtocolIsGood(StringView(reinterpret_cast<const LChar*>(protocol), strlen(protocol))); |
318 | |
319 | // JavaScript URLs are "valid" and should be executed even if URL decides they are invalid. |
320 | // The free function protocolIsJavaScript() should be used instead. |
321 | ASSERT(!equalLettersIgnoringASCIICase(StringView(protocol), "javascript" )); |
322 | |
323 | if (!m_isValid) |
324 | return false; |
325 | |
326 | // Do the comparison without making a new string object. |
327 | for (unsigned i = 0; i < m_schemeEnd; ++i) { |
328 | if (!protocol[i] || !isASCIIAlphaCaselessEqual(m_string[i], protocol[i])) |
329 | return false; |
330 | } |
331 | return !protocol[m_schemeEnd]; // We should have consumed all characters in the argument. |
332 | } |
333 | |
334 | bool URL::protocolIs(StringView protocol) const |
335 | { |
336 | assertProtocolIsGood(protocol); |
337 | |
338 | if (!m_isValid) |
339 | return false; |
340 | |
341 | if (m_schemeEnd != protocol.length()) |
342 | return false; |
343 | |
344 | // Do the comparison without making a new string object. |
345 | for (unsigned i = 0; i < m_schemeEnd; ++i) { |
346 | if (!isASCIIAlphaCaselessEqual(m_string[i], protocol[i])) |
347 | return false; |
348 | } |
349 | return true; |
350 | } |
351 | |
352 | String URL::query() const |
353 | { |
354 | if (m_queryEnd == m_pathEnd) |
355 | return String(); |
356 | |
357 | return m_string.substring(m_pathEnd + 1, m_queryEnd - (m_pathEnd + 1)); |
358 | } |
359 | |
360 | String URL::path() const |
361 | { |
362 | unsigned portEnd = m_hostEnd + m_portLength; |
363 | return m_string.substring(portEnd, m_pathEnd - portEnd); |
364 | } |
365 | |
366 | bool URL::setProtocol(const String& s) |
367 | { |
368 | // Firefox and IE remove everything after the first ':'. |
369 | size_t separatorPosition = s.find(':'); |
370 | String newProtocol = s.substring(0, separatorPosition); |
371 | auto canonicalized = URLParser::maybeCanonicalizeScheme(newProtocol); |
372 | if (!canonicalized) |
373 | return false; |
374 | |
375 | if (!m_isValid) { |
376 | URLParser parser(makeString(*canonicalized, ":" , m_string)); |
377 | *this = parser.result(); |
378 | return true; |
379 | } |
380 | |
381 | URLParser parser(makeString(*canonicalized, m_string.substring(m_schemeEnd))); |
382 | *this = parser.result(); |
383 | return true; |
384 | } |
385 | |
386 | static bool isAllASCII(StringView string) |
387 | { |
388 | if (string.is8Bit()) |
389 | return charactersAreAllASCII(string.characters8(), string.length()); |
390 | return charactersAreAllASCII(string.characters16(), string.length()); |
391 | } |
392 | |
393 | // Appends the punycoded hostname identified by the given string and length to |
394 | // the output buffer. The result will not be null terminated. |
395 | // Return value of false means error in encoding. |
396 | static bool appendEncodedHostname(UCharBuffer& buffer, StringView string) |
397 | { |
398 | // Needs to be big enough to hold an IDN-encoded name. |
399 | // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK. |
400 | const unsigned hostnameBufferLength = 2048; |
401 | |
402 | if (string.length() > hostnameBufferLength || isAllASCII(string)) { |
403 | append(buffer, string); |
404 | return true; |
405 | } |
406 | |
407 | UChar hostnameBuffer[hostnameBufferLength]; |
408 | UErrorCode error = U_ZERO_ERROR; |
409 | UIDNAInfo processingDetails = UIDNA_INFO_INITIALIZER; |
410 | int32_t numCharactersConverted = uidna_nameToASCII(&URLParser::internationalDomainNameTranscoder(), |
411 | string.upconvertedCharacters(), string.length(), hostnameBuffer, hostnameBufferLength, &processingDetails, &error); |
412 | |
413 | if (U_SUCCESS(error) && !processingDetails.errors) { |
414 | buffer.append(hostnameBuffer, numCharactersConverted); |
415 | return true; |
416 | } |
417 | return false; |
418 | } |
419 | |
420 | unsigned URL::hostStart() const |
421 | { |
422 | return (m_passwordEnd == m_userStart) ? m_passwordEnd : m_passwordEnd + 1; |
423 | } |
424 | |
425 | void URL::setHost(const String& s) |
426 | { |
427 | if (!m_isValid) |
428 | return; |
429 | |
430 | auto colonIndex = s.find(':'); |
431 | if (colonIndex != notFound) |
432 | return; |
433 | |
434 | UCharBuffer encodedHostName; |
435 | if (!appendEncodedHostname(encodedHostName, s)) |
436 | return; |
437 | |
438 | bool slashSlashNeeded = m_userStart == static_cast<unsigned>(m_schemeEnd + 1); |
439 | |
440 | StringBuilder builder; |
441 | builder.append(m_string.left(hostStart())); |
442 | if (slashSlashNeeded) |
443 | builder.appendLiteral("//" ); |
444 | builder.append(StringView(encodedHostName.data(), encodedHostName.size())); |
445 | builder.append(m_string.substring(m_hostEnd)); |
446 | |
447 | URLParser parser(builder.toString()); |
448 | *this = parser.result(); |
449 | } |
450 | |
451 | void URL::removePort() |
452 | { |
453 | if (!m_portLength) |
454 | return; |
455 | URLParser parser(makeString(StringView(m_string).left(m_hostEnd), StringView(m_string).substring(m_hostEnd + m_portLength))); |
456 | *this = parser.result(); |
457 | } |
458 | |
459 | void URL::setPort(unsigned short i) |
460 | { |
461 | if (!m_isValid) |
462 | return; |
463 | |
464 | bool colonNeeded = !m_portLength; |
465 | unsigned portStart = (colonNeeded ? m_hostEnd : m_hostEnd + 1); |
466 | |
467 | URLParser parser(makeString(StringView(m_string).left(portStart), (colonNeeded ? ":" : "" ), static_cast<unsigned>(i), StringView(m_string).substring(m_hostEnd + m_portLength))); |
468 | *this = parser.result(); |
469 | } |
470 | |
471 | void URL::setHostAndPort(const String& hostAndPort) |
472 | { |
473 | if (!m_isValid) |
474 | return; |
475 | |
476 | StringView hostName(hostAndPort); |
477 | StringView port; |
478 | |
479 | auto colonIndex = hostName.find(':'); |
480 | if (colonIndex != notFound) { |
481 | port = hostName.substring(colonIndex + 1); |
482 | bool ok; |
483 | int portInt = port.toIntStrict(ok); |
484 | if (!ok || portInt < 0) |
485 | return; |
486 | hostName = hostName.substring(0, colonIndex); |
487 | } |
488 | |
489 | if (hostName.isEmpty()) |
490 | return; |
491 | |
492 | UCharBuffer encodedHostName; |
493 | if (!appendEncodedHostname(encodedHostName, hostName)) |
494 | return; |
495 | |
496 | bool slashSlashNeeded = m_userStart == static_cast<unsigned>(m_schemeEnd + 1); |
497 | |
498 | StringBuilder builder; |
499 | builder.append(m_string.left(hostStart())); |
500 | if (slashSlashNeeded) |
501 | builder.appendLiteral("//" ); |
502 | builder.append(StringView(encodedHostName.data(), encodedHostName.size())); |
503 | if (!port.isEmpty()) { |
504 | builder.appendLiteral(":" ); |
505 | builder.append(port); |
506 | } |
507 | builder.append(StringView(m_string).substring(m_hostEnd + m_portLength)); |
508 | |
509 | URLParser parser(builder.toString()); |
510 | *this = parser.result(); |
511 | } |
512 | |
513 | static String percentEncodeCharacters(const String& input, bool(*shouldEncode)(UChar)) |
514 | { |
515 | auto encode = [shouldEncode] (const String& input) { |
516 | CString utf8 = input.utf8(); |
517 | auto* data = utf8.data(); |
518 | StringBuilder builder; |
519 | auto length = utf8.length(); |
520 | for (unsigned j = 0; j < length; j++) { |
521 | auto c = data[j]; |
522 | if (shouldEncode(c)) { |
523 | builder.append('%'); |
524 | builder.append(upperNibbleToASCIIHexDigit(c)); |
525 | builder.append(lowerNibbleToASCIIHexDigit(c)); |
526 | } else |
527 | builder.append(c); |
528 | } |
529 | return builder.toString(); |
530 | }; |
531 | |
532 | for (size_t i = 0; i < input.length(); ++i) { |
533 | if (UNLIKELY(shouldEncode(input[i]))) |
534 | return encode(input); |
535 | } |
536 | return input; |
537 | } |
538 | |
539 | void URL::setUser(const String& user) |
540 | { |
541 | if (!m_isValid) |
542 | return; |
543 | |
544 | // FIXME: Non-ASCII characters must be encoded and escaped to match parse() expectations, |
545 | // and to avoid changing more than just the user login. |
546 | |
547 | unsigned end = m_userEnd; |
548 | if (!user.isEmpty()) { |
549 | String u = percentEncodeCharacters(user, URLParser::isInUserInfoEncodeSet); |
550 | if (m_userStart == static_cast<unsigned>(m_schemeEnd + 1)) |
551 | u = "//" + u; |
552 | // Add '@' if we didn't have one before. |
553 | if (end == m_hostEnd || (end == m_passwordEnd && m_string[end] != '@')) |
554 | u.append('@'); |
555 | URLParser parser(makeString(StringView(m_string).left(m_userStart), u, StringView(m_string).substring(end))); |
556 | *this = parser.result(); |
557 | } else { |
558 | // Remove '@' if we now have neither user nor password. |
559 | if (m_userEnd == m_passwordEnd && end != m_hostEnd && m_string[end] == '@') |
560 | end += 1; |
561 | // We don't want to parse in the extremely common case where we are not going to make a change. |
562 | if (m_userStart != end) { |
563 | URLParser parser(makeString(StringView(m_string).left(m_userStart), StringView(m_string).substring(end))); |
564 | *this = parser.result(); |
565 | } |
566 | } |
567 | } |
568 | |
569 | void URL::setPass(const String& password) |
570 | { |
571 | if (!m_isValid) |
572 | return; |
573 | |
574 | unsigned end = m_passwordEnd; |
575 | if (!password.isEmpty()) { |
576 | String p = ":" + percentEncodeCharacters(password, URLParser::isInUserInfoEncodeSet) + "@" ; |
577 | if (m_userEnd == static_cast<unsigned>(m_schemeEnd + 1)) |
578 | p = "//" + p; |
579 | // Eat the existing '@' since we are going to add our own. |
580 | if (end != m_hostEnd && m_string[end] == '@') |
581 | end += 1; |
582 | URLParser parser(makeString(StringView(m_string).left(m_userEnd), p, StringView(m_string).substring(end))); |
583 | *this = parser.result(); |
584 | } else { |
585 | // Remove '@' if we now have neither user nor password. |
586 | if (m_userStart == m_userEnd && end != m_hostEnd && m_string[end] == '@') |
587 | end += 1; |
588 | // We don't want to parse in the extremely common case where we are not going to make a change. |
589 | if (m_userEnd != end) { |
590 | URLParser parser(makeString(StringView(m_string).left(m_userEnd), StringView(m_string).substring(end))); |
591 | *this = parser.result(); |
592 | } |
593 | } |
594 | } |
595 | |
596 | void URL::setFragmentIdentifier(StringView identifier) |
597 | { |
598 | if (!m_isValid) |
599 | return; |
600 | |
601 | // FIXME: Optimize the case where the identifier already happens to be equal to what was passed? |
602 | // FIXME: Is it correct to do this without encoding and escaping non-ASCII characters? |
603 | *this = URLParser { makeString(StringView { m_string }.substring(0, m_queryEnd), '#', identifier) }.result(); |
604 | } |
605 | |
606 | void URL::removeFragmentIdentifier() |
607 | { |
608 | if (!m_isValid) { |
609 | ASSERT(!m_queryEnd); |
610 | return; |
611 | } |
612 | if (m_isValid && m_string.length() > m_queryEnd) |
613 | m_string = m_string.left(m_queryEnd); |
614 | } |
615 | |
616 | void URL::removeQueryAndFragmentIdentifier() |
617 | { |
618 | if (!m_isValid) |
619 | return; |
620 | |
621 | m_string = m_string.left(m_pathEnd); |
622 | m_queryEnd = m_pathEnd; |
623 | } |
624 | |
625 | void URL::setQuery(const String& query) |
626 | { |
627 | if (!m_isValid) |
628 | return; |
629 | |
630 | // FIXME: '#' and non-ASCII characters must be encoded and escaped. |
631 | // Usually, the query is encoded using document encoding, not UTF-8, but we don't have |
632 | // access to the document in this function. |
633 | // https://webkit.org/b/161176 |
634 | if ((query.isEmpty() || query[0] != '?') && !query.isNull()) { |
635 | URLParser parser(makeString(StringView(m_string).left(m_pathEnd), "?" , query, StringView(m_string).substring(m_queryEnd))); |
636 | *this = parser.result(); |
637 | } else { |
638 | URLParser parser(makeString(StringView(m_string).left(m_pathEnd), query, StringView(m_string).substring(m_queryEnd))); |
639 | *this = parser.result(); |
640 | } |
641 | |
642 | } |
643 | |
644 | void URL::setPath(const String& s) |
645 | { |
646 | if (!m_isValid) |
647 | return; |
648 | |
649 | String path = s; |
650 | if (path.isEmpty() || path[0] != '/') |
651 | path = "/" + path; |
652 | |
653 | auto questionMarkOrNumberSign = [] (UChar character) { |
654 | return character == '?' || character == '#'; |
655 | }; |
656 | URLParser parser(makeString(StringView(m_string).left(m_hostEnd + m_portLength), percentEncodeCharacters(path, questionMarkOrNumberSign), StringView(m_string).substring(m_pathEnd))); |
657 | *this = parser.result(); |
658 | } |
659 | |
660 | bool equalIgnoringFragmentIdentifier(const URL& a, const URL& b) |
661 | { |
662 | if (a.m_queryEnd != b.m_queryEnd) |
663 | return false; |
664 | unsigned queryLength = a.m_queryEnd; |
665 | for (unsigned i = 0; i < queryLength; ++i) |
666 | if (a.string()[i] != b.string()[i]) |
667 | return false; |
668 | return true; |
669 | } |
670 | |
671 | bool equalIgnoringQueryAndFragment(const URL& a, const URL& b) |
672 | { |
673 | if (a.pathEnd() != b.pathEnd()) |
674 | return false; |
675 | unsigned pathEnd = a.pathEnd(); |
676 | for (unsigned i = 0; i < pathEnd; ++i) { |
677 | if (a.string()[i] != b.string()[i]) |
678 | return false; |
679 | } |
680 | return true; |
681 | } |
682 | |
683 | bool protocolHostAndPortAreEqual(const URL& a, const URL& b) |
684 | { |
685 | if (a.m_schemeEnd != b.m_schemeEnd) |
686 | return false; |
687 | |
688 | unsigned hostStartA = a.hostStart(); |
689 | unsigned hostLengthA = a.m_hostEnd - hostStartA; |
690 | unsigned hostStartB = b.hostStart(); |
691 | unsigned hostLengthB = b.m_hostEnd - b.hostStart(); |
692 | if (hostLengthA != hostLengthB) |
693 | return false; |
694 | |
695 | // Check the scheme |
696 | for (unsigned i = 0; i < a.m_schemeEnd; ++i) { |
697 | if (a.string()[i] != b.string()[i]) |
698 | return false; |
699 | } |
700 | |
701 | // And the host |
702 | for (unsigned i = 0; i < hostLengthA; ++i) { |
703 | if (a.string()[hostStartA + i] != b.string()[hostStartB + i]) |
704 | return false; |
705 | } |
706 | |
707 | if (a.port() != b.port()) |
708 | return false; |
709 | |
710 | return true; |
711 | } |
712 | |
713 | bool hostsAreEqual(const URL& a, const URL& b) |
714 | { |
715 | unsigned hostStartA = a.hostStart(); |
716 | unsigned hostLengthA = a.m_hostEnd - hostStartA; |
717 | unsigned hostStartB = b.hostStart(); |
718 | unsigned hostLengthB = b.m_hostEnd - hostStartB; |
719 | if (hostLengthA != hostLengthB) |
720 | return false; |
721 | |
722 | for (unsigned i = 0; i < hostLengthA; ++i) { |
723 | if (a.string()[hostStartA + i] != b.string()[hostStartB + i]) |
724 | return false; |
725 | } |
726 | |
727 | return true; |
728 | } |
729 | |
730 | bool URL::isMatchingDomain(const String& domain) const |
731 | { |
732 | if (isNull()) |
733 | return false; |
734 | |
735 | if (domain.isEmpty()) |
736 | return true; |
737 | |
738 | if (!protocolIsInHTTPFamily()) |
739 | return false; |
740 | |
741 | auto host = this->host(); |
742 | if (!host.endsWith(domain)) |
743 | return false; |
744 | |
745 | return host.length() == domain.length() || host[host.length() - domain.length() - 1] == '.'; |
746 | } |
747 | |
748 | String encodeWithURLEscapeSequences(const String& input) |
749 | { |
750 | return percentEncodeCharacters(input, URLParser::isInUserInfoEncodeSet); |
751 | } |
752 | |
753 | bool URL::isHierarchical() const |
754 | { |
755 | if (!m_isValid) |
756 | return false; |
757 | ASSERT(m_string[m_schemeEnd] == ':'); |
758 | return m_string[m_schemeEnd + 1] == '/'; |
759 | } |
760 | |
761 | void URL::copyToBuffer(Vector<char, 512>& buffer) const |
762 | { |
763 | // FIXME: This throws away the high bytes of all the characters in the string! |
764 | // That's fine for a valid URL, which is all ASCII, but not for invalid URLs. |
765 | buffer.resize(m_string.length()); |
766 | copyASCII(m_string, buffer.data()); |
767 | } |
768 | |
769 | template<typename StringClass> |
770 | bool protocolIsInternal(const StringClass& url, const char* protocol) |
771 | { |
772 | // Do the comparison without making a new string object. |
773 | assertProtocolIsGood(StringView(reinterpret_cast<const LChar*>(protocol), strlen(protocol))); |
774 | bool isLeading = true; |
775 | for (unsigned i = 0, j = 0; url[i]; ++i) { |
776 | // Skip leading whitespace and control characters. |
777 | if (isLeading && shouldTrimFromURL(url[i])) |
778 | continue; |
779 | isLeading = false; |
780 | |
781 | // Skip any tabs and newlines. |
782 | if (url[i] == '\t' || url[i] == '\r' || url[i] == '\n') |
783 | continue; |
784 | |
785 | if (!protocol[j]) |
786 | return url[i] == ':'; |
787 | if (!isASCIIAlphaCaselessEqual(url[i], protocol[j])) |
788 | return false; |
789 | |
790 | ++j; |
791 | } |
792 | |
793 | return false; |
794 | } |
795 | |
796 | bool protocolIs(const String& url, const char* protocol) |
797 | { |
798 | return protocolIsInternal(url, protocol); |
799 | } |
800 | |
801 | inline bool URL::protocolIs(const String& string, const char* protocol) |
802 | { |
803 | return WTF::protocolIsInternal(string, protocol); |
804 | } |
805 | |
806 | #ifndef NDEBUG |
807 | |
808 | void URL::print() const |
809 | { |
810 | printf("%s\n" , m_string.utf8().data()); |
811 | } |
812 | |
813 | #endif |
814 | |
815 | String URL::strippedForUseAsReferrer() const |
816 | { |
817 | URL referrer(*this); |
818 | referrer.setUser(String()); |
819 | referrer.setPass(String()); |
820 | referrer.removeFragmentIdentifier(); |
821 | return referrer.string(); |
822 | } |
823 | |
824 | bool URL::isLocalFile() const |
825 | { |
826 | // Including feed here might be a bad idea since drag and drop uses this check |
827 | // and including feed would allow feeds to potentially let someone's blog |
828 | // read the contents of the clipboard on a drag, even without a drop. |
829 | // Likewise with using the FrameLoader::shouldTreatURLAsLocal() function. |
830 | return protocolIs("file" ); |
831 | } |
832 | |
833 | bool protocolIsJavaScript(const String& url) |
834 | { |
835 | return protocolIsInternal(url, "javascript" ); |
836 | } |
837 | |
838 | bool protocolIsJavaScript(StringView url) |
839 | { |
840 | return protocolIsInternal(url, "javascript" ); |
841 | } |
842 | |
843 | bool protocolIsInHTTPFamily(const String& url) |
844 | { |
845 | auto length = url.length(); |
846 | // Do the comparison without making a new string object. |
847 | return length >= 5 |
848 | && isASCIIAlphaCaselessEqual(url[0], 'h') |
849 | && isASCIIAlphaCaselessEqual(url[1], 't') |
850 | && isASCIIAlphaCaselessEqual(url[2], 't') |
851 | && isASCIIAlphaCaselessEqual(url[3], 'p') |
852 | && (url[4] == ':' || (isASCIIAlphaCaselessEqual(url[4], 's') && length >= 6 && url[5] == ':')); |
853 | } |
854 | |
855 | const URL& blankURL() |
856 | { |
857 | static NeverDestroyed<URL> staticBlankURL(URL(), "about:blank" ); |
858 | return staticBlankURL; |
859 | } |
860 | |
861 | bool URL::protocolIsAbout() const |
862 | { |
863 | return protocolIs("about" ); |
864 | } |
865 | |
866 | bool portAllowed(const URL& url) |
867 | { |
868 | Optional<uint16_t> port = url.port(); |
869 | |
870 | // Since most URLs don't have a port, return early for the "no port" case. |
871 | if (!port) |
872 | return true; |
873 | |
874 | // This blocked port list matches the port blocking that Mozilla implements. |
875 | // See http://www.mozilla.org/projects/netlib/PortBanning.html for more information. |
876 | static const uint16_t blockedPortList[] = { |
877 | 1, // tcpmux |
878 | 7, // echo |
879 | 9, // discard |
880 | 11, // systat |
881 | 13, // daytime |
882 | 15, // netstat |
883 | 17, // qotd |
884 | 19, // chargen |
885 | 20, // FTP-data |
886 | 21, // FTP-control |
887 | 22, // SSH |
888 | 23, // telnet |
889 | 25, // SMTP |
890 | 37, // time |
891 | 42, // name |
892 | 43, // nicname |
893 | 53, // domain |
894 | 77, // priv-rjs |
895 | 79, // finger |
896 | 87, // ttylink |
897 | 95, // supdup |
898 | 101, // hostriame |
899 | 102, // iso-tsap |
900 | 103, // gppitnp |
901 | 104, // acr-nema |
902 | 109, // POP2 |
903 | 110, // POP3 |
904 | 111, // sunrpc |
905 | 113, // auth |
906 | 115, // SFTP |
907 | 117, // uucp-path |
908 | 119, // nntp |
909 | 123, // NTP |
910 | 135, // loc-srv / epmap |
911 | 139, // netbios |
912 | 143, // IMAP2 |
913 | 179, // BGP |
914 | 389, // LDAP |
915 | 427, // SLP (Also used by Apple Filing Protocol) |
916 | 465, // SMTP+SSL |
917 | 512, // print / exec |
918 | 513, // login |
919 | 514, // shell |
920 | 515, // printer |
921 | 526, // tempo |
922 | 530, // courier |
923 | 531, // Chat |
924 | 532, // netnews |
925 | 540, // UUCP |
926 | 548, // afpovertcp [Apple addition] |
927 | 556, // remotefs |
928 | 563, // NNTP+SSL |
929 | 587, // ESMTP |
930 | 601, // syslog-conn |
931 | 636, // LDAP+SSL |
932 | 993, // IMAP+SSL |
933 | 995, // POP3+SSL |
934 | 2049, // NFS |
935 | 3659, // apple-sasl / PasswordServer [Apple addition] |
936 | 4045, // lockd |
937 | 4190, // ManageSieve [Apple addition] |
938 | 6000, // X11 |
939 | 6665, // Alternate IRC [Apple addition] |
940 | 6666, // Alternate IRC [Apple addition] |
941 | 6667, // Standard IRC [Apple addition] |
942 | 6668, // Alternate IRC [Apple addition] |
943 | 6669, // Alternate IRC [Apple addition] |
944 | 6679, // Alternate IRC SSL [Apple addition] |
945 | 6697, // IRC+SSL [Apple addition] |
946 | invalidPortNumber, // Used to block all invalid port numbers |
947 | }; |
948 | |
949 | // If the port is not in the blocked port list, allow it. |
950 | ASSERT(std::is_sorted(std::begin(blockedPortList), std::end(blockedPortList))); |
951 | if (!std::binary_search(std::begin(blockedPortList), std::end(blockedPortList), port.value())) |
952 | return true; |
953 | |
954 | // Allow ports 21 and 22 for FTP URLs, as Mozilla does. |
955 | if ((port.value() == 21 || port.value() == 22) && url.protocolIs("ftp" )) |
956 | return true; |
957 | |
958 | // Allow any port number in a file URL, since the port number is ignored. |
959 | if (url.protocolIs("file" )) |
960 | return true; |
961 | |
962 | return false; |
963 | } |
964 | |
965 | String mimeTypeFromDataURL(const String& url) |
966 | { |
967 | ASSERT(protocolIsInternal(url, "data" )); |
968 | |
969 | // FIXME: What's the right behavior when the URL has a comma first, but a semicolon later? |
970 | // Currently this code will break at the semicolon in that case. Not sure that's correct. |
971 | auto index = url.find(';', 5); |
972 | if (index == notFound) |
973 | index = url.find(',', 5); |
974 | if (index == notFound) { |
975 | // FIXME: There was an old comment here that made it sound like this should be returning text/plain. |
976 | // But we have been returning empty string here for some time, so not changing its behavior at this time. |
977 | return emptyString(); |
978 | } |
979 | if (index == 5) |
980 | return "text/plain"_s ; |
981 | ASSERT(index >= 5); |
982 | return url.substring(5, index - 5).convertToASCIILowercase(); |
983 | } |
984 | |
985 | String URL::stringCenterEllipsizedToLength(unsigned length) const |
986 | { |
987 | if (string().length() <= length) |
988 | return string(); |
989 | |
990 | return string().left(length / 2 - 1) + "..." + string().right(length / 2 - 2); |
991 | } |
992 | |
993 | URL URL::fakeURLWithRelativePart(const String& relativePart) |
994 | { |
995 | return URL(URL(), "webkit-fake-url://" + createCanonicalUUIDString() + '/' + relativePart); |
996 | } |
997 | |
998 | URL URL::fileURLWithFileSystemPath(const String& filePath) |
999 | { |
1000 | return URL(URL(), "file:///" + filePath); |
1001 | } |
1002 | |
1003 | TextStream& operator<<(TextStream& ts, const URL& url) |
1004 | { |
1005 | ts << url.string(); |
1006 | return ts; |
1007 | } |
1008 | |
1009 | #if !PLATFORM(COCOA) && !USE(SOUP) |
1010 | static bool isIPv4Address(StringView string) |
1011 | { |
1012 | auto count = 0; |
1013 | |
1014 | for (const auto octet : string.splitAllowingEmptyEntries('.')) { |
1015 | if (count >= 4) |
1016 | return false; |
1017 | |
1018 | const auto length = octet.length(); |
1019 | if (!length || length > 3) |
1020 | return false; |
1021 | |
1022 | auto value = 0; |
1023 | for (auto i = 0u; i < length; ++i) { |
1024 | const auto digit = octet[i]; |
1025 | |
1026 | // Prohibit leading zeroes. |
1027 | if (digit > '9' || digit < (!i && length > 1 ? '1' : '0')) |
1028 | return false; |
1029 | |
1030 | value = 10 * value + (digit - '0'); |
1031 | } |
1032 | |
1033 | if (value > 255) |
1034 | return false; |
1035 | |
1036 | count++; |
1037 | } |
1038 | |
1039 | return (count == 4); |
1040 | } |
1041 | |
1042 | static bool isIPv6Address(StringView string) |
1043 | { |
1044 | enum SkipState { None, WillSkip, Skipping, Skipped, Final }; |
1045 | auto skipState = None; |
1046 | auto count = 0; |
1047 | |
1048 | for (const auto hextet : string.splitAllowingEmptyEntries(':')) { |
1049 | if (count >= 8 || skipState == Final) |
1050 | return false; |
1051 | |
1052 | const auto length = hextet.length(); |
1053 | if (!length) { |
1054 | // :: may be used anywhere to skip 1 to 8 hextets, but only once. |
1055 | if (skipState == Skipped) |
1056 | return false; |
1057 | |
1058 | if (skipState == None) |
1059 | skipState = !count ? WillSkip : Skipping; |
1060 | else if (skipState == WillSkip) |
1061 | skipState = Skipping; |
1062 | else |
1063 | skipState = Final; |
1064 | continue; |
1065 | } |
1066 | |
1067 | if (skipState == WillSkip) |
1068 | return false; |
1069 | |
1070 | if (skipState == Skipping) |
1071 | skipState = Skipped; |
1072 | |
1073 | if (length > 4) { |
1074 | // An IPv4 address may be used in place of the final two hextets. |
1075 | if ((skipState == None && count != 6) || (skipState == Skipped && count >= 6) || !isIPv4Address(hextet)) |
1076 | return false; |
1077 | |
1078 | skipState = Final; |
1079 | continue; |
1080 | } |
1081 | |
1082 | for (const auto codeUnit : hextet.codeUnits()) { |
1083 | // IPv6 allows leading zeroes. |
1084 | if (!isASCIIHexDigit(codeUnit)) |
1085 | return false; |
1086 | } |
1087 | |
1088 | count++; |
1089 | } |
1090 | |
1091 | return (count == 8 && skipState == None) || skipState == Skipped || skipState == Final; |
1092 | } |
1093 | |
1094 | bool URL::hostIsIPAddress(StringView host) |
1095 | { |
1096 | if (host.find(':') == notFound) |
1097 | return isIPv4Address(host); |
1098 | |
1099 | return isIPv6Address(host); |
1100 | } |
1101 | #endif |
1102 | |
1103 | } // namespace WTF |
1104 | |