1/*
2 * Copyright (C) 2004-2019 Apple Inc. All rights reserved.
3 * Copyright (C) 2012 Research In Motion Limited. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include <wtf/URL.h>
29
30#include "URLParser.h"
31#include <stdio.h>
32#include <unicode/uidna.h>
33#include <wtf/HashMap.h>
34#include <wtf/NeverDestroyed.h>
35#include <wtf/StdLibExtras.h>
36#include <wtf/UUID.h>
37#include <wtf/text/CString.h>
38#include <wtf/text/StringBuilder.h>
39#include <wtf/text/StringConcatenateNumbers.h>
40#include <wtf/text/StringHash.h>
41#include <wtf/text/TextStream.h>
42
43namespace WTF {
44
45typedef Vector<char, 512> CharBuffer;
46typedef Vector<UChar, 512> UCharBuffer;
47
48static const unsigned invalidPortNumber = 0xFFFF;
49
50// Copies the source to the destination, assuming all the source characters are
51// ASCII. The destination buffer must be large enough. Null characters are allowed
52// in the source string, and no attempt is made to null-terminate the result.
53static void copyASCII(const String& string, char* dest)
54{
55 if (string.isEmpty())
56 return;
57
58 if (string.is8Bit())
59 memcpy(dest, string.characters8(), string.length());
60 else {
61 const UChar* src = string.characters16();
62 size_t length = string.length();
63 for (size_t i = 0; i < length; i++)
64 dest[i] = static_cast<char>(src[i]);
65 }
66}
67
68void URL::invalidate()
69{
70 m_isValid = false;
71 m_protocolIsInHTTPFamily = false;
72 m_cannotBeABaseURL = false;
73 m_schemeEnd = 0;
74 m_userStart = 0;
75 m_userEnd = 0;
76 m_passwordEnd = 0;
77 m_hostEnd = 0;
78 m_portLength = 0;
79 m_pathEnd = 0;
80 m_pathAfterLastSlash = 0;
81 m_queryEnd = 0;
82}
83
84URL::URL(const URL& base, const String& relative, const URLTextEncoding* encoding)
85{
86 URLParser parser(relative, base, encoding);
87 *this = parser.result();
88}
89
90static bool shouldTrimFromURL(UChar c)
91{
92 // Browsers ignore leading/trailing whitespace and control
93 // characters from URLs. Note that c is an *unsigned* char here
94 // so this comparison should only catch control characters.
95 return c <= ' ';
96}
97
98URL URL::isolatedCopy() const
99{
100 URL result = *this;
101 result.m_string = result.m_string.isolatedCopy();
102 return result;
103}
104
105String URL::lastPathComponent() const
106{
107 if (!hasPath())
108 return String();
109
110 unsigned end = m_pathEnd - 1;
111 if (m_string[end] == '/')
112 --end;
113
114 size_t start = m_string.reverseFind('/', end);
115 if (start < static_cast<unsigned>(m_hostEnd + m_portLength))
116 return String();
117 ++start;
118
119 return m_string.substring(start, end - start + 1);
120}
121
122StringView URL::protocol() const
123{
124 return StringView(m_string).substring(0, m_schemeEnd);
125}
126
127StringView URL::host() const
128{
129 unsigned start = hostStart();
130 return StringView(m_string).substring(start, m_hostEnd - start);
131}
132
133Optional<uint16_t> URL::port() const
134{
135 if (!m_portLength)
136 return WTF::nullopt;
137
138 bool ok = false;
139 unsigned number;
140 if (m_string.is8Bit())
141 number = charactersToUIntStrict(m_string.characters8() + m_hostEnd + 1, m_portLength - 1, &ok);
142 else
143 number = charactersToUIntStrict(m_string.characters16() + m_hostEnd + 1, m_portLength - 1, &ok);
144 if (!ok || number > std::numeric_limits<uint16_t>::max())
145 return WTF::nullopt;
146 return number;
147}
148
149String URL::hostAndPort() const
150{
151 if (auto port = this->port())
152 return makeString(host(), ':', static_cast<unsigned>(port.value()));
153 return host().toString();
154}
155
156String URL::protocolHostAndPort() const
157{
158 String result = m_string.substring(0, m_hostEnd + m_portLength);
159
160 if (m_passwordEnd - m_userStart > 0) {
161 const int allowForTrailingAtSign = 1;
162 result.remove(m_userStart, m_passwordEnd - m_userStart + allowForTrailingAtSign);
163 }
164
165 return result;
166}
167
168static String decodeEscapeSequencesFromParsedURL(StringView input)
169{
170 auto inputLength = input.length();
171 if (!inputLength)
172 return emptyString();
173 Vector<LChar> percentDecoded;
174 percentDecoded.reserveInitialCapacity(inputLength);
175 for (unsigned i = 0; i < inputLength; ++i) {
176 if (input[i] == '%'
177 && inputLength > 2
178 && i < inputLength - 2
179 && isASCIIHexDigit(input[i + 1])
180 && isASCIIHexDigit(input[i + 2])) {
181 percentDecoded.uncheckedAppend(toASCIIHexValue(input[i + 1], input[i + 2]));
182 i += 2;
183 } else
184 percentDecoded.uncheckedAppend(input[i]);
185 }
186 return String::fromUTF8(percentDecoded.data(), percentDecoded.size());
187}
188
189String URL::user() const
190{
191 return decodeEscapeSequencesFromParsedURL(StringView(m_string).substring(m_userStart, m_userEnd - m_userStart));
192}
193
194String URL::pass() const
195{
196 if (m_passwordEnd == m_userEnd)
197 return String();
198
199 return decodeEscapeSequencesFromParsedURL(StringView(m_string).substring(m_userEnd + 1, m_passwordEnd - m_userEnd - 1));
200}
201
202String URL::encodedUser() const
203{
204 return m_string.substring(m_userStart, m_userEnd - m_userStart);
205}
206
207String URL::encodedPass() const
208{
209 if (m_passwordEnd == m_userEnd)
210 return String();
211
212 return m_string.substring(m_userEnd + 1, m_passwordEnd - m_userEnd - 1);
213}
214
215String URL::fragmentIdentifier() const
216{
217 if (!hasFragmentIdentifier())
218 return String();
219
220 return m_string.substring(m_queryEnd + 1);
221}
222
223bool URL::hasFragmentIdentifier() const
224{
225 return m_isValid && m_string.length() != m_queryEnd;
226}
227
228String URL::baseAsString() const
229{
230 return m_string.left(m_pathAfterLastSlash);
231}
232
233#if !USE(CF)
234
235String URL::fileSystemPath() const
236{
237 if (!isValid() || !isLocalFile())
238 return String();
239
240 return decodeEscapeSequencesFromParsedURL(StringView(path()));
241}
242
243#endif
244
245#ifdef NDEBUG
246
247static inline void assertProtocolIsGood(StringView)
248{
249}
250
251#else
252
253static void assertProtocolIsGood(StringView protocol)
254{
255 // FIXME: We probably don't need this function any more.
256 // The isASCIIAlphaCaselessEqual function asserts that passed-in characters
257 // are ones it can handle; the older code did not and relied on these checks.
258 for (auto character : protocol.codeUnits()) {
259 ASSERT(isASCII(character));
260 ASSERT(character > ' ');
261 ASSERT(!isASCIIUpper(character));
262 ASSERT(toASCIILowerUnchecked(character) == character);
263 }
264}
265
266#endif
267
268static Lock defaultPortForProtocolMapForTestingLock;
269
270using DefaultPortForProtocolMapForTesting = HashMap<String, uint16_t>;
271static DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMapForTesting()
272{
273 static DefaultPortForProtocolMapForTesting* defaultPortForProtocolMap;
274 return defaultPortForProtocolMap;
275}
276
277static DefaultPortForProtocolMapForTesting& ensureDefaultPortForProtocolMapForTesting()
278{
279 DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMap = defaultPortForProtocolMapForTesting();
280 if (!defaultPortForProtocolMap)
281 defaultPortForProtocolMap = new DefaultPortForProtocolMapForTesting;
282 return *defaultPortForProtocolMap;
283}
284
285void registerDefaultPortForProtocolForTesting(uint16_t port, const String& protocol)
286{
287 auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
288 ensureDefaultPortForProtocolMapForTesting().add(protocol, port);
289}
290
291void clearDefaultPortForProtocolMapForTesting()
292{
293 auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
294 if (auto* map = defaultPortForProtocolMapForTesting())
295 map->clear();
296}
297
298Optional<uint16_t> defaultPortForProtocol(StringView protocol)
299{
300 if (auto* overrideMap = defaultPortForProtocolMapForTesting()) {
301 auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
302 ASSERT(overrideMap); // No need to null check again here since overrideMap cannot become null after being non-null.
303 auto iterator = overrideMap->find(protocol.toStringWithoutCopying());
304 if (iterator != overrideMap->end())
305 return iterator->value;
306 }
307 return URLParser::defaultPortForProtocol(protocol);
308}
309
310bool isDefaultPortForProtocol(uint16_t port, StringView protocol)
311{
312 return defaultPortForProtocol(protocol) == port;
313}
314
315bool URL::protocolIs(const char* protocol) const
316{
317 assertProtocolIsGood(StringView(reinterpret_cast<const LChar*>(protocol), strlen(protocol)));
318
319 // JavaScript URLs are "valid" and should be executed even if URL decides they are invalid.
320 // The free function protocolIsJavaScript() should be used instead.
321 ASSERT(!equalLettersIgnoringASCIICase(StringView(protocol), "javascript"));
322
323 if (!m_isValid)
324 return false;
325
326 // Do the comparison without making a new string object.
327 for (unsigned i = 0; i < m_schemeEnd; ++i) {
328 if (!protocol[i] || !isASCIIAlphaCaselessEqual(m_string[i], protocol[i]))
329 return false;
330 }
331 return !protocol[m_schemeEnd]; // We should have consumed all characters in the argument.
332}
333
334bool URL::protocolIs(StringView protocol) const
335{
336 assertProtocolIsGood(protocol);
337
338 if (!m_isValid)
339 return false;
340
341 if (m_schemeEnd != protocol.length())
342 return false;
343
344 // Do the comparison without making a new string object.
345 for (unsigned i = 0; i < m_schemeEnd; ++i) {
346 if (!isASCIIAlphaCaselessEqual(m_string[i], protocol[i]))
347 return false;
348 }
349 return true;
350}
351
352String URL::query() const
353{
354 if (m_queryEnd == m_pathEnd)
355 return String();
356
357 return m_string.substring(m_pathEnd + 1, m_queryEnd - (m_pathEnd + 1));
358}
359
360String URL::path() const
361{
362 unsigned portEnd = m_hostEnd + m_portLength;
363 return m_string.substring(portEnd, m_pathEnd - portEnd);
364}
365
366bool URL::setProtocol(const String& s)
367{
368 // Firefox and IE remove everything after the first ':'.
369 size_t separatorPosition = s.find(':');
370 String newProtocol = s.substring(0, separatorPosition);
371 auto canonicalized = URLParser::maybeCanonicalizeScheme(newProtocol);
372 if (!canonicalized)
373 return false;
374
375 if (!m_isValid) {
376 URLParser parser(makeString(*canonicalized, ":", m_string));
377 *this = parser.result();
378 return true;
379 }
380
381 URLParser parser(makeString(*canonicalized, m_string.substring(m_schemeEnd)));
382 *this = parser.result();
383 return true;
384}
385
386static bool isAllASCII(StringView string)
387{
388 if (string.is8Bit())
389 return charactersAreAllASCII(string.characters8(), string.length());
390 return charactersAreAllASCII(string.characters16(), string.length());
391}
392
393// Appends the punycoded hostname identified by the given string and length to
394// the output buffer. The result will not be null terminated.
395// Return value of false means error in encoding.
396static bool appendEncodedHostname(UCharBuffer& buffer, StringView string)
397{
398 // Needs to be big enough to hold an IDN-encoded name.
399 // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.
400 const unsigned hostnameBufferLength = 2048;
401
402 if (string.length() > hostnameBufferLength || isAllASCII(string)) {
403 append(buffer, string);
404 return true;
405 }
406
407 UChar hostnameBuffer[hostnameBufferLength];
408 UErrorCode error = U_ZERO_ERROR;
409 UIDNAInfo processingDetails = UIDNA_INFO_INITIALIZER;
410 int32_t numCharactersConverted = uidna_nameToASCII(&URLParser::internationalDomainNameTranscoder(),
411 string.upconvertedCharacters(), string.length(), hostnameBuffer, hostnameBufferLength, &processingDetails, &error);
412
413 if (U_SUCCESS(error) && !processingDetails.errors) {
414 buffer.append(hostnameBuffer, numCharactersConverted);
415 return true;
416 }
417 return false;
418}
419
420unsigned URL::hostStart() const
421{
422 return (m_passwordEnd == m_userStart) ? m_passwordEnd : m_passwordEnd + 1;
423}
424
425void URL::setHost(const String& s)
426{
427 if (!m_isValid)
428 return;
429
430 auto colonIndex = s.find(':');
431 if (colonIndex != notFound)
432 return;
433
434 UCharBuffer encodedHostName;
435 if (!appendEncodedHostname(encodedHostName, s))
436 return;
437
438 bool slashSlashNeeded = m_userStart == static_cast<unsigned>(m_schemeEnd + 1);
439
440 StringBuilder builder;
441 builder.append(m_string.left(hostStart()));
442 if (slashSlashNeeded)
443 builder.appendLiteral("//");
444 builder.append(StringView(encodedHostName.data(), encodedHostName.size()));
445 builder.append(m_string.substring(m_hostEnd));
446
447 URLParser parser(builder.toString());
448 *this = parser.result();
449}
450
451void URL::removePort()
452{
453 if (!m_portLength)
454 return;
455 URLParser parser(makeString(StringView(m_string).left(m_hostEnd), StringView(m_string).substring(m_hostEnd + m_portLength)));
456 *this = parser.result();
457}
458
459void URL::setPort(unsigned short i)
460{
461 if (!m_isValid)
462 return;
463
464 bool colonNeeded = !m_portLength;
465 unsigned portStart = (colonNeeded ? m_hostEnd : m_hostEnd + 1);
466
467 URLParser parser(makeString(StringView(m_string).left(portStart), (colonNeeded ? ":" : ""), static_cast<unsigned>(i), StringView(m_string).substring(m_hostEnd + m_portLength)));
468 *this = parser.result();
469}
470
471void URL::setHostAndPort(const String& hostAndPort)
472{
473 if (!m_isValid)
474 return;
475
476 StringView hostName(hostAndPort);
477 StringView port;
478
479 auto colonIndex = hostName.find(':');
480 if (colonIndex != notFound) {
481 port = hostName.substring(colonIndex + 1);
482 bool ok;
483 int portInt = port.toIntStrict(ok);
484 if (!ok || portInt < 0)
485 return;
486 hostName = hostName.substring(0, colonIndex);
487 }
488
489 if (hostName.isEmpty())
490 return;
491
492 UCharBuffer encodedHostName;
493 if (!appendEncodedHostname(encodedHostName, hostName))
494 return;
495
496 bool slashSlashNeeded = m_userStart == static_cast<unsigned>(m_schemeEnd + 1);
497
498 StringBuilder builder;
499 builder.append(m_string.left(hostStart()));
500 if (slashSlashNeeded)
501 builder.appendLiteral("//");
502 builder.append(StringView(encodedHostName.data(), encodedHostName.size()));
503 if (!port.isEmpty()) {
504 builder.appendLiteral(":");
505 builder.append(port);
506 }
507 builder.append(StringView(m_string).substring(m_hostEnd + m_portLength));
508
509 URLParser parser(builder.toString());
510 *this = parser.result();
511}
512
513static String percentEncodeCharacters(const String& input, bool(*shouldEncode)(UChar))
514{
515 auto encode = [shouldEncode] (const String& input) {
516 CString utf8 = input.utf8();
517 auto* data = utf8.data();
518 StringBuilder builder;
519 auto length = utf8.length();
520 for (unsigned j = 0; j < length; j++) {
521 auto c = data[j];
522 if (shouldEncode(c)) {
523 builder.append('%');
524 builder.append(upperNibbleToASCIIHexDigit(c));
525 builder.append(lowerNibbleToASCIIHexDigit(c));
526 } else
527 builder.append(c);
528 }
529 return builder.toString();
530 };
531
532 for (size_t i = 0; i < input.length(); ++i) {
533 if (UNLIKELY(shouldEncode(input[i])))
534 return encode(input);
535 }
536 return input;
537}
538
539void URL::setUser(const String& user)
540{
541 if (!m_isValid)
542 return;
543
544 // FIXME: Non-ASCII characters must be encoded and escaped to match parse() expectations,
545 // and to avoid changing more than just the user login.
546
547 unsigned end = m_userEnd;
548 if (!user.isEmpty()) {
549 String u = percentEncodeCharacters(user, URLParser::isInUserInfoEncodeSet);
550 if (m_userStart == static_cast<unsigned>(m_schemeEnd + 1))
551 u = "//" + u;
552 // Add '@' if we didn't have one before.
553 if (end == m_hostEnd || (end == m_passwordEnd && m_string[end] != '@'))
554 u.append('@');
555 URLParser parser(makeString(StringView(m_string).left(m_userStart), u, StringView(m_string).substring(end)));
556 *this = parser.result();
557 } else {
558 // Remove '@' if we now have neither user nor password.
559 if (m_userEnd == m_passwordEnd && end != m_hostEnd && m_string[end] == '@')
560 end += 1;
561 // We don't want to parse in the extremely common case where we are not going to make a change.
562 if (m_userStart != end) {
563 URLParser parser(makeString(StringView(m_string).left(m_userStart), StringView(m_string).substring(end)));
564 *this = parser.result();
565 }
566 }
567}
568
569void URL::setPass(const String& password)
570{
571 if (!m_isValid)
572 return;
573
574 unsigned end = m_passwordEnd;
575 if (!password.isEmpty()) {
576 String p = ":" + percentEncodeCharacters(password, URLParser::isInUserInfoEncodeSet) + "@";
577 if (m_userEnd == static_cast<unsigned>(m_schemeEnd + 1))
578 p = "//" + p;
579 // Eat the existing '@' since we are going to add our own.
580 if (end != m_hostEnd && m_string[end] == '@')
581 end += 1;
582 URLParser parser(makeString(StringView(m_string).left(m_userEnd), p, StringView(m_string).substring(end)));
583 *this = parser.result();
584 } else {
585 // Remove '@' if we now have neither user nor password.
586 if (m_userStart == m_userEnd && end != m_hostEnd && m_string[end] == '@')
587 end += 1;
588 // We don't want to parse in the extremely common case where we are not going to make a change.
589 if (m_userEnd != end) {
590 URLParser parser(makeString(StringView(m_string).left(m_userEnd), StringView(m_string).substring(end)));
591 *this = parser.result();
592 }
593 }
594}
595
596void URL::setFragmentIdentifier(StringView identifier)
597{
598 if (!m_isValid)
599 return;
600
601 // FIXME: Optimize the case where the identifier already happens to be equal to what was passed?
602 // FIXME: Is it correct to do this without encoding and escaping non-ASCII characters?
603 *this = URLParser { makeString(StringView { m_string }.substring(0, m_queryEnd), '#', identifier) }.result();
604}
605
606void URL::removeFragmentIdentifier()
607{
608 if (!m_isValid) {
609 ASSERT(!m_queryEnd);
610 return;
611 }
612 if (m_isValid && m_string.length() > m_queryEnd)
613 m_string = m_string.left(m_queryEnd);
614}
615
616void URL::removeQueryAndFragmentIdentifier()
617{
618 if (!m_isValid)
619 return;
620
621 m_string = m_string.left(m_pathEnd);
622 m_queryEnd = m_pathEnd;
623}
624
625void URL::setQuery(const String& query)
626{
627 if (!m_isValid)
628 return;
629
630 // FIXME: '#' and non-ASCII characters must be encoded and escaped.
631 // Usually, the query is encoded using document encoding, not UTF-8, but we don't have
632 // access to the document in this function.
633 // https://webkit.org/b/161176
634 if ((query.isEmpty() || query[0] != '?') && !query.isNull()) {
635 URLParser parser(makeString(StringView(m_string).left(m_pathEnd), "?", query, StringView(m_string).substring(m_queryEnd)));
636 *this = parser.result();
637 } else {
638 URLParser parser(makeString(StringView(m_string).left(m_pathEnd), query, StringView(m_string).substring(m_queryEnd)));
639 *this = parser.result();
640 }
641
642}
643
644void URL::setPath(const String& s)
645{
646 if (!m_isValid)
647 return;
648
649 String path = s;
650 if (path.isEmpty() || path[0] != '/')
651 path = "/" + path;
652
653 auto questionMarkOrNumberSign = [] (UChar character) {
654 return character == '?' || character == '#';
655 };
656 URLParser parser(makeString(StringView(m_string).left(m_hostEnd + m_portLength), percentEncodeCharacters(path, questionMarkOrNumberSign), StringView(m_string).substring(m_pathEnd)));
657 *this = parser.result();
658}
659
660bool equalIgnoringFragmentIdentifier(const URL& a, const URL& b)
661{
662 if (a.m_queryEnd != b.m_queryEnd)
663 return false;
664 unsigned queryLength = a.m_queryEnd;
665 for (unsigned i = 0; i < queryLength; ++i)
666 if (a.string()[i] != b.string()[i])
667 return false;
668 return true;
669}
670
671bool equalIgnoringQueryAndFragment(const URL& a, const URL& b)
672{
673 if (a.pathEnd() != b.pathEnd())
674 return false;
675 unsigned pathEnd = a.pathEnd();
676 for (unsigned i = 0; i < pathEnd; ++i) {
677 if (a.string()[i] != b.string()[i])
678 return false;
679 }
680 return true;
681}
682
683bool protocolHostAndPortAreEqual(const URL& a, const URL& b)
684{
685 if (a.m_schemeEnd != b.m_schemeEnd)
686 return false;
687
688 unsigned hostStartA = a.hostStart();
689 unsigned hostLengthA = a.m_hostEnd - hostStartA;
690 unsigned hostStartB = b.hostStart();
691 unsigned hostLengthB = b.m_hostEnd - b.hostStart();
692 if (hostLengthA != hostLengthB)
693 return false;
694
695 // Check the scheme
696 for (unsigned i = 0; i < a.m_schemeEnd; ++i) {
697 if (a.string()[i] != b.string()[i])
698 return false;
699 }
700
701 // And the host
702 for (unsigned i = 0; i < hostLengthA; ++i) {
703 if (a.string()[hostStartA + i] != b.string()[hostStartB + i])
704 return false;
705 }
706
707 if (a.port() != b.port())
708 return false;
709
710 return true;
711}
712
713bool hostsAreEqual(const URL& a, const URL& b)
714{
715 unsigned hostStartA = a.hostStart();
716 unsigned hostLengthA = a.m_hostEnd - hostStartA;
717 unsigned hostStartB = b.hostStart();
718 unsigned hostLengthB = b.m_hostEnd - hostStartB;
719 if (hostLengthA != hostLengthB)
720 return false;
721
722 for (unsigned i = 0; i < hostLengthA; ++i) {
723 if (a.string()[hostStartA + i] != b.string()[hostStartB + i])
724 return false;
725 }
726
727 return true;
728}
729
730bool URL::isMatchingDomain(const String& domain) const
731{
732 if (isNull())
733 return false;
734
735 if (domain.isEmpty())
736 return true;
737
738 if (!protocolIsInHTTPFamily())
739 return false;
740
741 auto host = this->host();
742 if (!host.endsWith(domain))
743 return false;
744
745 return host.length() == domain.length() || host[host.length() - domain.length() - 1] == '.';
746}
747
748String encodeWithURLEscapeSequences(const String& input)
749{
750 return percentEncodeCharacters(input, URLParser::isInUserInfoEncodeSet);
751}
752
753bool URL::isHierarchical() const
754{
755 if (!m_isValid)
756 return false;
757 ASSERT(m_string[m_schemeEnd] == ':');
758 return m_string[m_schemeEnd + 1] == '/';
759}
760
761void URL::copyToBuffer(Vector<char, 512>& buffer) const
762{
763 // FIXME: This throws away the high bytes of all the characters in the string!
764 // That's fine for a valid URL, which is all ASCII, but not for invalid URLs.
765 buffer.resize(m_string.length());
766 copyASCII(m_string, buffer.data());
767}
768
769template<typename StringClass>
770bool protocolIsInternal(const StringClass& url, const char* protocol)
771{
772 // Do the comparison without making a new string object.
773 assertProtocolIsGood(StringView(reinterpret_cast<const LChar*>(protocol), strlen(protocol)));
774 bool isLeading = true;
775 for (unsigned i = 0, j = 0; url[i]; ++i) {
776 // Skip leading whitespace and control characters.
777 if (isLeading && shouldTrimFromURL(url[i]))
778 continue;
779 isLeading = false;
780
781 // Skip any tabs and newlines.
782 if (url[i] == '\t' || url[i] == '\r' || url[i] == '\n')
783 continue;
784
785 if (!protocol[j])
786 return url[i] == ':';
787 if (!isASCIIAlphaCaselessEqual(url[i], protocol[j]))
788 return false;
789
790 ++j;
791 }
792
793 return false;
794}
795
796bool protocolIs(const String& url, const char* protocol)
797{
798 return protocolIsInternal(url, protocol);
799}
800
801inline bool URL::protocolIs(const String& string, const char* protocol)
802{
803 return WTF::protocolIsInternal(string, protocol);
804}
805
806#ifndef NDEBUG
807
808void URL::print() const
809{
810 printf("%s\n", m_string.utf8().data());
811}
812
813#endif
814
815String URL::strippedForUseAsReferrer() const
816{
817 URL referrer(*this);
818 referrer.setUser(String());
819 referrer.setPass(String());
820 referrer.removeFragmentIdentifier();
821 return referrer.string();
822}
823
824bool URL::isLocalFile() const
825{
826 // Including feed here might be a bad idea since drag and drop uses this check
827 // and including feed would allow feeds to potentially let someone's blog
828 // read the contents of the clipboard on a drag, even without a drop.
829 // Likewise with using the FrameLoader::shouldTreatURLAsLocal() function.
830 return protocolIs("file");
831}
832
833bool protocolIsJavaScript(const String& url)
834{
835 return protocolIsInternal(url, "javascript");
836}
837
838bool protocolIsJavaScript(StringView url)
839{
840 return protocolIsInternal(url, "javascript");
841}
842
843bool protocolIsInHTTPFamily(const String& url)
844{
845 auto length = url.length();
846 // Do the comparison without making a new string object.
847 return length >= 5
848 && isASCIIAlphaCaselessEqual(url[0], 'h')
849 && isASCIIAlphaCaselessEqual(url[1], 't')
850 && isASCIIAlphaCaselessEqual(url[2], 't')
851 && isASCIIAlphaCaselessEqual(url[3], 'p')
852 && (url[4] == ':' || (isASCIIAlphaCaselessEqual(url[4], 's') && length >= 6 && url[5] == ':'));
853}
854
855const URL& blankURL()
856{
857 static NeverDestroyed<URL> staticBlankURL(URL(), "about:blank");
858 return staticBlankURL;
859}
860
861bool URL::protocolIsAbout() const
862{
863 return protocolIs("about");
864}
865
866bool portAllowed(const URL& url)
867{
868 Optional<uint16_t> port = url.port();
869
870 // Since most URLs don't have a port, return early for the "no port" case.
871 if (!port)
872 return true;
873
874 // This blocked port list matches the port blocking that Mozilla implements.
875 // See http://www.mozilla.org/projects/netlib/PortBanning.html for more information.
876 static const uint16_t blockedPortList[] = {
877 1, // tcpmux
878 7, // echo
879 9, // discard
880 11, // systat
881 13, // daytime
882 15, // netstat
883 17, // qotd
884 19, // chargen
885 20, // FTP-data
886 21, // FTP-control
887 22, // SSH
888 23, // telnet
889 25, // SMTP
890 37, // time
891 42, // name
892 43, // nicname
893 53, // domain
894 77, // priv-rjs
895 79, // finger
896 87, // ttylink
897 95, // supdup
898 101, // hostriame
899 102, // iso-tsap
900 103, // gppitnp
901 104, // acr-nema
902 109, // POP2
903 110, // POP3
904 111, // sunrpc
905 113, // auth
906 115, // SFTP
907 117, // uucp-path
908 119, // nntp
909 123, // NTP
910 135, // loc-srv / epmap
911 139, // netbios
912 143, // IMAP2
913 179, // BGP
914 389, // LDAP
915 427, // SLP (Also used by Apple Filing Protocol)
916 465, // SMTP+SSL
917 512, // print / exec
918 513, // login
919 514, // shell
920 515, // printer
921 526, // tempo
922 530, // courier
923 531, // Chat
924 532, // netnews
925 540, // UUCP
926 548, // afpovertcp [Apple addition]
927 556, // remotefs
928 563, // NNTP+SSL
929 587, // ESMTP
930 601, // syslog-conn
931 636, // LDAP+SSL
932 993, // IMAP+SSL
933 995, // POP3+SSL
934 2049, // NFS
935 3659, // apple-sasl / PasswordServer [Apple addition]
936 4045, // lockd
937 4190, // ManageSieve [Apple addition]
938 6000, // X11
939 6665, // Alternate IRC [Apple addition]
940 6666, // Alternate IRC [Apple addition]
941 6667, // Standard IRC [Apple addition]
942 6668, // Alternate IRC [Apple addition]
943 6669, // Alternate IRC [Apple addition]
944 6679, // Alternate IRC SSL [Apple addition]
945 6697, // IRC+SSL [Apple addition]
946 invalidPortNumber, // Used to block all invalid port numbers
947 };
948
949 // If the port is not in the blocked port list, allow it.
950 ASSERT(std::is_sorted(std::begin(blockedPortList), std::end(blockedPortList)));
951 if (!std::binary_search(std::begin(blockedPortList), std::end(blockedPortList), port.value()))
952 return true;
953
954 // Allow ports 21 and 22 for FTP URLs, as Mozilla does.
955 if ((port.value() == 21 || port.value() == 22) && url.protocolIs("ftp"))
956 return true;
957
958 // Allow any port number in a file URL, since the port number is ignored.
959 if (url.protocolIs("file"))
960 return true;
961
962 return false;
963}
964
965String mimeTypeFromDataURL(const String& url)
966{
967 ASSERT(protocolIsInternal(url, "data"));
968
969 // FIXME: What's the right behavior when the URL has a comma first, but a semicolon later?
970 // Currently this code will break at the semicolon in that case. Not sure that's correct.
971 auto index = url.find(';', 5);
972 if (index == notFound)
973 index = url.find(',', 5);
974 if (index == notFound) {
975 // FIXME: There was an old comment here that made it sound like this should be returning text/plain.
976 // But we have been returning empty string here for some time, so not changing its behavior at this time.
977 return emptyString();
978 }
979 if (index == 5)
980 return "text/plain"_s;
981 ASSERT(index >= 5);
982 return url.substring(5, index - 5).convertToASCIILowercase();
983}
984
985String URL::stringCenterEllipsizedToLength(unsigned length) const
986{
987 if (string().length() <= length)
988 return string();
989
990 return string().left(length / 2 - 1) + "..." + string().right(length / 2 - 2);
991}
992
993URL URL::fakeURLWithRelativePart(const String& relativePart)
994{
995 return URL(URL(), "webkit-fake-url://" + createCanonicalUUIDString() + '/' + relativePart);
996}
997
998URL URL::fileURLWithFileSystemPath(const String& filePath)
999{
1000 return URL(URL(), "file:///" + filePath);
1001}
1002
1003TextStream& operator<<(TextStream& ts, const URL& url)
1004{
1005 ts << url.string();
1006 return ts;
1007}
1008
1009#if !PLATFORM(COCOA) && !USE(SOUP)
1010static bool isIPv4Address(StringView string)
1011{
1012 auto count = 0;
1013
1014 for (const auto octet : string.splitAllowingEmptyEntries('.')) {
1015 if (count >= 4)
1016 return false;
1017
1018 const auto length = octet.length();
1019 if (!length || length > 3)
1020 return false;
1021
1022 auto value = 0;
1023 for (auto i = 0u; i < length; ++i) {
1024 const auto digit = octet[i];
1025
1026 // Prohibit leading zeroes.
1027 if (digit > '9' || digit < (!i && length > 1 ? '1' : '0'))
1028 return false;
1029
1030 value = 10 * value + (digit - '0');
1031 }
1032
1033 if (value > 255)
1034 return false;
1035
1036 count++;
1037 }
1038
1039 return (count == 4);
1040}
1041
1042static bool isIPv6Address(StringView string)
1043{
1044 enum SkipState { None, WillSkip, Skipping, Skipped, Final };
1045 auto skipState = None;
1046 auto count = 0;
1047
1048 for (const auto hextet : string.splitAllowingEmptyEntries(':')) {
1049 if (count >= 8 || skipState == Final)
1050 return false;
1051
1052 const auto length = hextet.length();
1053 if (!length) {
1054 // :: may be used anywhere to skip 1 to 8 hextets, but only once.
1055 if (skipState == Skipped)
1056 return false;
1057
1058 if (skipState == None)
1059 skipState = !count ? WillSkip : Skipping;
1060 else if (skipState == WillSkip)
1061 skipState = Skipping;
1062 else
1063 skipState = Final;
1064 continue;
1065 }
1066
1067 if (skipState == WillSkip)
1068 return false;
1069
1070 if (skipState == Skipping)
1071 skipState = Skipped;
1072
1073 if (length > 4) {
1074 // An IPv4 address may be used in place of the final two hextets.
1075 if ((skipState == None && count != 6) || (skipState == Skipped && count >= 6) || !isIPv4Address(hextet))
1076 return false;
1077
1078 skipState = Final;
1079 continue;
1080 }
1081
1082 for (const auto codeUnit : hextet.codeUnits()) {
1083 // IPv6 allows leading zeroes.
1084 if (!isASCIIHexDigit(codeUnit))
1085 return false;
1086 }
1087
1088 count++;
1089 }
1090
1091 return (count == 8 && skipState == None) || skipState == Skipped || skipState == Final;
1092}
1093
1094bool URL::hostIsIPAddress(StringView host)
1095{
1096 if (host.find(':') == notFound)
1097 return isIPv4Address(host);
1098
1099 return isIPv6Address(host);
1100}
1101#endif
1102
1103} // namespace WTF
1104