1/*
2 * Copyright (C) 2004-2019 Apple Inc. All rights reserved.
3 * Copyright (C) 2012 Research In Motion Limited. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include <wtf/URL.h>
29
30#include "URLParser.h"
31#include <stdio.h>
32#include <unicode/uidna.h>
33#include <wtf/HashMap.h>
34#include <wtf/NeverDestroyed.h>
35#include <wtf/StdLibExtras.h>
36#include <wtf/UUID.h>
37#include <wtf/text/CString.h>
38#include <wtf/text/StringBuilder.h>
39#include <wtf/text/StringConcatenateNumbers.h>
40#include <wtf/text/StringHash.h>
41#include <wtf/text/TextStream.h>
42
43namespace WTF {
44
45typedef Vector<char, 512> CharBuffer;
46typedef Vector<UChar, 512> UCharBuffer;
47
48static constexpr unsigned invalidPortNumber = 0xFFFF;
49
50// Copies the source to the destination, assuming all the source characters are
51// ASCII. The destination buffer must be large enough. Null characters are allowed
52// in the source string, and no attempt is made to null-terminate the result.
53static void copyASCII(const String& string, char* dest)
54{
55 if (string.isEmpty())
56 return;
57
58 if (string.is8Bit())
59 memcpy(dest, string.characters8(), string.length());
60 else {
61 const UChar* src = string.characters16();
62 size_t length = string.length();
63 for (size_t i = 0; i < length; i++)
64 dest[i] = static_cast<char>(src[i]);
65 }
66}
67
68void URL::invalidate()
69{
70 m_isValid = false;
71 m_protocolIsInHTTPFamily = false;
72 m_cannotBeABaseURL = false;
73 m_schemeEnd = 0;
74 m_userStart = 0;
75 m_userEnd = 0;
76 m_passwordEnd = 0;
77 m_hostEnd = 0;
78 m_portLength = 0;
79 m_pathEnd = 0;
80 m_pathAfterLastSlash = 0;
81 m_queryEnd = 0;
82}
83
84URL::URL(const URL& base, const String& relative, const URLTextEncoding* encoding)
85{
86 URLParser parser(relative, base, encoding);
87 *this = parser.result();
88}
89
90static bool shouldTrimFromURL(UChar c)
91{
92 // Browsers ignore leading/trailing whitespace and control
93 // characters from URLs. Note that c is an *unsigned* char here
94 // so this comparison should only catch control characters.
95 return c <= ' ';
96}
97
98URL URL::isolatedCopy() const
99{
100 URL result = *this;
101 result.m_string = result.m_string.isolatedCopy();
102 return result;
103}
104
105String URL::lastPathComponent() const
106{
107 if (!hasPath())
108 return String();
109
110 unsigned end = m_pathEnd - 1;
111 if (m_string[end] == '/')
112 --end;
113
114 size_t start = m_string.reverseFind('/', end);
115 if (start < static_cast<unsigned>(m_hostEnd + m_portLength))
116 return String();
117 ++start;
118
119 return m_string.substring(start, end - start + 1);
120}
121
122StringView URL::protocol() const
123{
124 return StringView(m_string).substring(0, m_schemeEnd);
125}
126
127StringView URL::host() const
128{
129 unsigned start = hostStart();
130 return StringView(m_string).substring(start, m_hostEnd - start);
131}
132
133Optional<uint16_t> URL::port() const
134{
135 if (!m_portLength)
136 return WTF::nullopt;
137
138 bool ok = false;
139 unsigned number;
140 if (m_string.is8Bit())
141 number = charactersToUIntStrict(m_string.characters8() + m_hostEnd + 1, m_portLength - 1, &ok);
142 else
143 number = charactersToUIntStrict(m_string.characters16() + m_hostEnd + 1, m_portLength - 1, &ok);
144 if (!ok || number > std::numeric_limits<uint16_t>::max())
145 return WTF::nullopt;
146 return number;
147}
148
149String URL::hostAndPort() const
150{
151 if (auto port = this->port())
152 return makeString(host(), ':', static_cast<unsigned>(port.value()));
153 return host().toString();
154}
155
156String URL::protocolHostAndPort() const
157{
158 String result = m_string.substring(0, m_hostEnd + m_portLength);
159
160 if (m_passwordEnd - m_userStart > 0) {
161 const int allowForTrailingAtSign = 1;
162 result.remove(m_userStart, m_passwordEnd - m_userStart + allowForTrailingAtSign);
163 }
164
165 return result;
166}
167
168static String decodeEscapeSequencesFromParsedURL(StringView input)
169{
170 auto inputLength = input.length();
171 if (!inputLength)
172 return emptyString();
173 Vector<LChar> percentDecoded;
174 percentDecoded.reserveInitialCapacity(inputLength);
175 for (unsigned i = 0; i < inputLength; ++i) {
176 if (input[i] == '%'
177 && inputLength > 2
178 && i < inputLength - 2
179 && isASCIIHexDigit(input[i + 1])
180 && isASCIIHexDigit(input[i + 2])) {
181 percentDecoded.uncheckedAppend(toASCIIHexValue(input[i + 1], input[i + 2]));
182 i += 2;
183 } else
184 percentDecoded.uncheckedAppend(input[i]);
185 }
186 return String::fromUTF8(percentDecoded.data(), percentDecoded.size());
187}
188
189String URL::user() const
190{
191 return decodeEscapeSequencesFromParsedURL(StringView(m_string).substring(m_userStart, m_userEnd - m_userStart));
192}
193
194String URL::pass() const
195{
196 if (m_passwordEnd == m_userEnd)
197 return String();
198
199 return decodeEscapeSequencesFromParsedURL(StringView(m_string).substring(m_userEnd + 1, m_passwordEnd - m_userEnd - 1));
200}
201
202String URL::encodedUser() const
203{
204 return m_string.substring(m_userStart, m_userEnd - m_userStart);
205}
206
207String URL::encodedPass() const
208{
209 if (m_passwordEnd == m_userEnd)
210 return String();
211
212 return m_string.substring(m_userEnd + 1, m_passwordEnd - m_userEnd - 1);
213}
214
215String URL::fragmentIdentifier() const
216{
217 if (!hasFragmentIdentifier())
218 return String();
219
220 return m_string.substring(m_queryEnd + 1);
221}
222
223bool URL::hasFragmentIdentifier() const
224{
225 return m_isValid && m_string.length() != m_queryEnd;
226}
227
228String URL::baseAsString() const
229{
230 return m_string.left(m_pathAfterLastSlash);
231}
232
233#if !USE(CF)
234
235String URL::fileSystemPath() const
236{
237 if (!isValid() || !isLocalFile())
238 return String();
239
240 return decodeEscapeSequencesFromParsedURL(StringView(path()));
241}
242
243#endif
244
245#ifdef NDEBUG
246
247static inline void assertProtocolIsGood(StringView)
248{
249}
250
251#else
252
253static void assertProtocolIsGood(StringView protocol)
254{
255 // FIXME: We probably don't need this function any more.
256 // The isASCIIAlphaCaselessEqual function asserts that passed-in characters
257 // are ones it can handle; the older code did not and relied on these checks.
258 for (auto character : protocol.codeUnits()) {
259 ASSERT(isASCII(character));
260 ASSERT(character > ' ');
261 ASSERT(!isASCIIUpper(character));
262 ASSERT(toASCIILowerUnchecked(character) == character);
263 }
264}
265
266#endif
267
268static Lock defaultPortForProtocolMapForTestingLock;
269
270using DefaultPortForProtocolMapForTesting = HashMap<String, uint16_t>;
271static DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMapForTesting()
272{
273 static DefaultPortForProtocolMapForTesting* defaultPortForProtocolMap;
274 return defaultPortForProtocolMap;
275}
276
277static DefaultPortForProtocolMapForTesting& ensureDefaultPortForProtocolMapForTesting()
278{
279 DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMap = defaultPortForProtocolMapForTesting();
280 if (!defaultPortForProtocolMap)
281 defaultPortForProtocolMap = new DefaultPortForProtocolMapForTesting;
282 return *defaultPortForProtocolMap;
283}
284
285void registerDefaultPortForProtocolForTesting(uint16_t port, const String& protocol)
286{
287 auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
288 ensureDefaultPortForProtocolMapForTesting().add(protocol, port);
289}
290
291void clearDefaultPortForProtocolMapForTesting()
292{
293 auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
294 if (auto* map = defaultPortForProtocolMapForTesting())
295 map->clear();
296}
297
298Optional<uint16_t> defaultPortForProtocol(StringView protocol)
299{
300 if (auto* overrideMap = defaultPortForProtocolMapForTesting()) {
301 auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
302 ASSERT(overrideMap); // No need to null check again here since overrideMap cannot become null after being non-null.
303 auto iterator = overrideMap->find(protocol.toStringWithoutCopying());
304 if (iterator != overrideMap->end())
305 return iterator->value;
306 }
307 return URLParser::defaultPortForProtocol(protocol);
308}
309
310bool isDefaultPortForProtocol(uint16_t port, StringView protocol)
311{
312 return defaultPortForProtocol(protocol) == port;
313}
314
315bool URL::protocolIs(const char* protocol) const
316{
317 assertProtocolIsGood(StringView { protocol });
318
319 // JavaScript URLs are "valid" and should be executed even if URL decides they are invalid.
320 // The free function protocolIsJavaScript() should be used instead.
321 ASSERT(!equalLettersIgnoringASCIICase(StringView(protocol), "javascript"));
322
323 if (!m_isValid)
324 return false;
325
326 // Do the comparison without making a new string object.
327 for (unsigned i = 0; i < m_schemeEnd; ++i) {
328 if (!protocol[i] || !isASCIIAlphaCaselessEqual(m_string[i], protocol[i]))
329 return false;
330 }
331 return !protocol[m_schemeEnd]; // We should have consumed all characters in the argument.
332}
333
334bool URL::protocolIs(StringView protocol) const
335{
336 assertProtocolIsGood(protocol);
337
338 if (!m_isValid)
339 return false;
340
341 if (m_schemeEnd != protocol.length())
342 return false;
343
344 // Do the comparison without making a new string object.
345 for (unsigned i = 0; i < m_schemeEnd; ++i) {
346 if (!isASCIIAlphaCaselessEqual(m_string[i], protocol[i]))
347 return false;
348 }
349 return true;
350}
351
352String URL::query() const
353{
354 if (m_queryEnd == m_pathEnd)
355 return String();
356
357 return m_string.substring(m_pathEnd + 1, m_queryEnd - (m_pathEnd + 1));
358}
359
360String URL::path() const
361{
362 unsigned portEnd = m_hostEnd + m_portLength;
363 return m_string.substring(portEnd, m_pathEnd - portEnd);
364}
365
366bool URL::setProtocol(const String& s)
367{
368 // Firefox and IE remove everything after the first ':'.
369 size_t separatorPosition = s.find(':');
370 String newProtocol = s.substring(0, separatorPosition);
371 auto canonicalized = URLParser::maybeCanonicalizeScheme(newProtocol);
372 if (!canonicalized)
373 return false;
374
375 if (!m_isValid) {
376 URLParser parser(makeString(*canonicalized, ":", m_string));
377 *this = parser.result();
378 return true;
379 }
380
381 if ((m_passwordEnd != m_userStart || port()) && *canonicalized == "file")
382 return true;
383
384 if (isLocalFile() && host().isEmpty())
385 return true;
386
387 URLParser parser(makeString(*canonicalized, m_string.substring(m_schemeEnd)));
388 *this = parser.result();
389 return true;
390}
391
392static bool isAllASCII(StringView string)
393{
394 if (string.is8Bit())
395 return charactersAreAllASCII(string.characters8(), string.length());
396 return charactersAreAllASCII(string.characters16(), string.length());
397}
398
399// Appends the punycoded hostname identified by the given string and length to
400// the output buffer. The result will not be null terminated.
401// Return value of false means error in encoding.
402static bool appendEncodedHostname(UCharBuffer& buffer, StringView string)
403{
404 // Needs to be big enough to hold an IDN-encoded name.
405 // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.
406 const unsigned hostnameBufferLength = 2048;
407
408 if (string.length() > hostnameBufferLength || isAllASCII(string)) {
409 append(buffer, string);
410 return true;
411 }
412
413 UChar hostnameBuffer[hostnameBufferLength];
414 UErrorCode error = U_ZERO_ERROR;
415 UIDNAInfo processingDetails = UIDNA_INFO_INITIALIZER;
416 int32_t numCharactersConverted = uidna_nameToASCII(&URLParser::internationalDomainNameTranscoder(),
417 string.upconvertedCharacters(), string.length(), hostnameBuffer, hostnameBufferLength, &processingDetails, &error);
418
419 if (U_SUCCESS(error) && !processingDetails.errors) {
420 buffer.append(hostnameBuffer, numCharactersConverted);
421 return true;
422 }
423 return false;
424}
425
426unsigned URL::hostStart() const
427{
428 return (m_passwordEnd == m_userStart) ? m_passwordEnd : m_passwordEnd + 1;
429}
430
431void URL::setHost(const String& s)
432{
433 if (!m_isValid)
434 return;
435
436 auto colonIndex = s.find(':');
437 if (colonIndex != notFound)
438 return;
439
440 UCharBuffer encodedHostName;
441 if (!appendEncodedHostname(encodedHostName, s))
442 return;
443
444 bool slashSlashNeeded = m_userStart == static_cast<unsigned>(m_schemeEnd + 1);
445
446 StringBuilder builder;
447 builder.append(m_string.left(hostStart()));
448 if (slashSlashNeeded)
449 builder.appendLiteral("//");
450 builder.append(StringView(encodedHostName.data(), encodedHostName.size()));
451 builder.append(m_string.substring(m_hostEnd));
452
453 URLParser parser(builder.toString());
454 *this = parser.result();
455}
456
457void URL::removePort()
458{
459 if (!m_portLength)
460 return;
461 URLParser parser(makeString(StringView(m_string).left(m_hostEnd), StringView(m_string).substring(m_hostEnd + m_portLength)));
462 *this = parser.result();
463}
464
465void URL::setPort(unsigned short i)
466{
467 if (!m_isValid)
468 return;
469
470 bool colonNeeded = !m_portLength;
471 unsigned portStart = (colonNeeded ? m_hostEnd : m_hostEnd + 1);
472
473 URLParser parser(makeString(StringView(m_string).left(portStart), (colonNeeded ? ":" : ""), static_cast<unsigned>(i), StringView(m_string).substring(m_hostEnd + m_portLength)));
474 *this = parser.result();
475}
476
477void URL::setHostAndPort(const String& hostAndPort)
478{
479 if (!m_isValid)
480 return;
481
482 StringView hostName(hostAndPort);
483 StringView port;
484
485 auto colonIndex = hostName.find(':');
486 if (colonIndex != notFound) {
487 port = hostName.substring(colonIndex + 1);
488 bool ok;
489 int portInt = port.toIntStrict(ok);
490 if (!ok || portInt < 0)
491 return;
492 hostName = hostName.substring(0, colonIndex);
493 }
494
495 if (hostName.isEmpty())
496 return;
497
498 UCharBuffer encodedHostName;
499 if (!appendEncodedHostname(encodedHostName, hostName))
500 return;
501
502 bool slashSlashNeeded = m_userStart == static_cast<unsigned>(m_schemeEnd + 1);
503
504 StringBuilder builder;
505 builder.append(m_string.left(hostStart()));
506 if (slashSlashNeeded)
507 builder.appendLiteral("//");
508 builder.append(StringView(encodedHostName.data(), encodedHostName.size()));
509 if (!port.isEmpty()) {
510 builder.appendLiteral(":");
511 builder.append(port);
512 }
513 builder.append(StringView(m_string).substring(m_hostEnd + m_portLength));
514
515 URLParser parser(builder.toString());
516 *this = parser.result();
517}
518
519static String percentEncodeCharacters(const String& input, bool(*shouldEncode)(UChar))
520{
521 auto encode = [shouldEncode] (const String& input) {
522 CString utf8 = input.utf8();
523 auto* data = utf8.data();
524 StringBuilder builder;
525 auto length = utf8.length();
526 for (unsigned j = 0; j < length; j++) {
527 auto c = data[j];
528 if (shouldEncode(c)) {
529 builder.append('%');
530 builder.append(upperNibbleToASCIIHexDigit(c));
531 builder.append(lowerNibbleToASCIIHexDigit(c));
532 } else
533 builder.append(c);
534 }
535 return builder.toString();
536 };
537
538 for (size_t i = 0; i < input.length(); ++i) {
539 if (UNLIKELY(shouldEncode(input[i])))
540 return encode(input);
541 }
542 return input;
543}
544
545void URL::setUser(const String& user)
546{
547 if (!m_isValid)
548 return;
549
550 // FIXME: Non-ASCII characters must be encoded and escaped to match parse() expectations,
551 // and to avoid changing more than just the user login.
552
553 unsigned end = m_userEnd;
554 if (!user.isEmpty()) {
555 String u = percentEncodeCharacters(user, URLParser::isInUserInfoEncodeSet);
556 if (m_userStart == static_cast<unsigned>(m_schemeEnd + 1))
557 u = "//" + u;
558 // Add '@' if we didn't have one before.
559 if (end == m_hostEnd || (end == m_passwordEnd && m_string[end] != '@'))
560 u.append('@');
561 URLParser parser(makeString(StringView(m_string).left(m_userStart), u, StringView(m_string).substring(end)));
562 *this = parser.result();
563 } else {
564 // Remove '@' if we now have neither user nor password.
565 if (m_userEnd == m_passwordEnd && end != m_hostEnd && m_string[end] == '@')
566 end += 1;
567 // We don't want to parse in the extremely common case where we are not going to make a change.
568 if (m_userStart != end) {
569 URLParser parser(makeString(StringView(m_string).left(m_userStart), StringView(m_string).substring(end)));
570 *this = parser.result();
571 }
572 }
573}
574
575void URL::setPass(const String& password)
576{
577 if (!m_isValid)
578 return;
579
580 unsigned end = m_passwordEnd;
581 if (!password.isEmpty()) {
582 String p = ":" + percentEncodeCharacters(password, URLParser::isInUserInfoEncodeSet) + "@";
583 if (m_userEnd == static_cast<unsigned>(m_schemeEnd + 1))
584 p = "//" + p;
585 // Eat the existing '@' since we are going to add our own.
586 if (end != m_hostEnd && m_string[end] == '@')
587 end += 1;
588 URLParser parser(makeString(StringView(m_string).left(m_userEnd), p, StringView(m_string).substring(end)));
589 *this = parser.result();
590 } else {
591 // Remove '@' if we now have neither user nor password.
592 if (m_userStart == m_userEnd && end != m_hostEnd && m_string[end] == '@')
593 end += 1;
594 // We don't want to parse in the extremely common case where we are not going to make a change.
595 if (m_userEnd != end) {
596 URLParser parser(makeString(StringView(m_string).left(m_userEnd), StringView(m_string).substring(end)));
597 *this = parser.result();
598 }
599 }
600}
601
602void URL::setFragmentIdentifier(StringView identifier)
603{
604 if (!m_isValid)
605 return;
606
607 // FIXME: Optimize the case where the identifier already happens to be equal to what was passed?
608 // FIXME: Is it correct to do this without encoding and escaping non-ASCII characters?
609 *this = URLParser { makeString(StringView { m_string }.substring(0, m_queryEnd), '#', identifier) }.result();
610}
611
612void URL::removeFragmentIdentifier()
613{
614 if (!m_isValid) {
615 ASSERT(!m_queryEnd);
616 return;
617 }
618 if (m_isValid && m_string.length() > m_queryEnd)
619 m_string = m_string.left(m_queryEnd);
620}
621
622void URL::removeQueryAndFragmentIdentifier()
623{
624 if (!m_isValid)
625 return;
626
627 m_string = m_string.left(m_pathEnd);
628 m_queryEnd = m_pathEnd;
629}
630
631void URL::setQuery(const String& query)
632{
633 if (!m_isValid)
634 return;
635
636 // FIXME: '#' and non-ASCII characters must be encoded and escaped.
637 // Usually, the query is encoded using document encoding, not UTF-8, but we don't have
638 // access to the document in this function.
639 // https://webkit.org/b/161176
640 if ((query.isEmpty() || query[0] != '?') && !query.isNull()) {
641 URLParser parser(makeString(StringView(m_string).left(m_pathEnd), "?", query, StringView(m_string).substring(m_queryEnd)));
642 *this = parser.result();
643 } else {
644 URLParser parser(makeString(StringView(m_string).left(m_pathEnd), query, StringView(m_string).substring(m_queryEnd)));
645 *this = parser.result();
646 }
647
648}
649
650void URL::setPath(const String& s)
651{
652 if (!m_isValid)
653 return;
654
655 String path = s;
656 if (path.isEmpty() || path[0] != '/')
657 path = "/" + path;
658
659 auto questionMarkOrNumberSign = [] (UChar character) {
660 return character == '?' || character == '#';
661 };
662 URLParser parser(makeString(StringView(m_string).left(m_hostEnd + m_portLength), percentEncodeCharacters(path, questionMarkOrNumberSign), StringView(m_string).substring(m_pathEnd)));
663 *this = parser.result();
664}
665
666bool equalIgnoringFragmentIdentifier(const URL& a, const URL& b)
667{
668 if (a.m_queryEnd != b.m_queryEnd)
669 return false;
670 unsigned queryLength = a.m_queryEnd;
671 for (unsigned i = 0; i < queryLength; ++i)
672 if (a.string()[i] != b.string()[i])
673 return false;
674 return true;
675}
676
677bool equalIgnoringQueryAndFragment(const URL& a, const URL& b)
678{
679 if (a.pathEnd() != b.pathEnd())
680 return false;
681 unsigned pathEnd = a.pathEnd();
682 for (unsigned i = 0; i < pathEnd; ++i) {
683 if (a.string()[i] != b.string()[i])
684 return false;
685 }
686 return true;
687}
688
689bool protocolHostAndPortAreEqual(const URL& a, const URL& b)
690{
691 if (a.m_schemeEnd != b.m_schemeEnd)
692 return false;
693
694 unsigned hostStartA = a.hostStart();
695 unsigned hostLengthA = a.m_hostEnd - hostStartA;
696 unsigned hostStartB = b.hostStart();
697 unsigned hostLengthB = b.m_hostEnd - b.hostStart();
698 if (hostLengthA != hostLengthB)
699 return false;
700
701 // Check the scheme
702 for (unsigned i = 0; i < a.m_schemeEnd; ++i) {
703 if (a.string()[i] != b.string()[i])
704 return false;
705 }
706
707 // And the host
708 for (unsigned i = 0; i < hostLengthA; ++i) {
709 if (a.string()[hostStartA + i] != b.string()[hostStartB + i])
710 return false;
711 }
712
713 if (a.port() != b.port())
714 return false;
715
716 return true;
717}
718
719bool hostsAreEqual(const URL& a, const URL& b)
720{
721 unsigned hostStartA = a.hostStart();
722 unsigned hostLengthA = a.m_hostEnd - hostStartA;
723 unsigned hostStartB = b.hostStart();
724 unsigned hostLengthB = b.m_hostEnd - hostStartB;
725 if (hostLengthA != hostLengthB)
726 return false;
727
728 for (unsigned i = 0; i < hostLengthA; ++i) {
729 if (a.string()[hostStartA + i] != b.string()[hostStartB + i])
730 return false;
731 }
732
733 return true;
734}
735
736bool URL::isMatchingDomain(const String& domain) const
737{
738 if (isNull())
739 return false;
740
741 if (domain.isEmpty())
742 return true;
743
744 if (!protocolIsInHTTPFamily())
745 return false;
746
747 auto host = this->host();
748 if (!host.endsWith(domain))
749 return false;
750
751 return host.length() == domain.length() || host[host.length() - domain.length() - 1] == '.';
752}
753
754String encodeWithURLEscapeSequences(const String& input)
755{
756 return percentEncodeCharacters(input, URLParser::isInUserInfoEncodeSet);
757}
758
759bool URL::isHierarchical() const
760{
761 if (!m_isValid)
762 return false;
763 ASSERT(m_string[m_schemeEnd] == ':');
764 return m_string[m_schemeEnd + 1] == '/';
765}
766
767void URL::copyToBuffer(Vector<char, 512>& buffer) const
768{
769 // FIXME: This throws away the high bytes of all the characters in the string!
770 // That's fine for a valid URL, which is all ASCII, but not for invalid URLs.
771 buffer.resize(m_string.length());
772 copyASCII(m_string, buffer.data());
773}
774
775template<typename StringClass>
776bool protocolIsInternal(const StringClass& url, const char* protocol)
777{
778 // Do the comparison without making a new string object.
779 assertProtocolIsGood(StringView { protocol });
780 bool isLeading = true;
781 for (unsigned i = 0, j = 0; url[i]; ++i) {
782 // Skip leading whitespace and control characters.
783 if (isLeading && shouldTrimFromURL(url[i]))
784 continue;
785 isLeading = false;
786
787 // Skip any tabs and newlines.
788 if (url[i] == '\t' || url[i] == '\r' || url[i] == '\n')
789 continue;
790
791 if (!protocol[j])
792 return url[i] == ':';
793 if (!isASCIIAlphaCaselessEqual(url[i], protocol[j]))
794 return false;
795
796 ++j;
797 }
798
799 return false;
800}
801
802bool protocolIs(const String& url, const char* protocol)
803{
804 return protocolIsInternal(url, protocol);
805}
806
807inline bool URL::protocolIs(const String& string, const char* protocol)
808{
809 return WTF::protocolIsInternal(string, protocol);
810}
811
812#ifndef NDEBUG
813
814void URL::print() const
815{
816 printf("%s\n", m_string.utf8().data());
817}
818
819#endif
820
821String URL::strippedForUseAsReferrer() const
822{
823 URL referrer(*this);
824 referrer.setUser(String());
825 referrer.setPass(String());
826 referrer.removeFragmentIdentifier();
827 return referrer.string();
828}
829
830bool URL::isLocalFile() const
831{
832 // Including feed here might be a bad idea since drag and drop uses this check
833 // and including feed would allow feeds to potentially let someone's blog
834 // read the contents of the clipboard on a drag, even without a drop.
835 // Likewise with using the FrameLoader::shouldTreatURLAsLocal() function.
836 return protocolIs("file");
837}
838
839bool protocolIsJavaScript(const String& url)
840{
841 return protocolIsInternal(url, "javascript");
842}
843
844bool protocolIsJavaScript(StringView url)
845{
846 return protocolIsInternal(url, "javascript");
847}
848
849bool protocolIsInHTTPFamily(const String& url)
850{
851 auto length = url.length();
852 // Do the comparison without making a new string object.
853 return length >= 5
854 && isASCIIAlphaCaselessEqual(url[0], 'h')
855 && isASCIIAlphaCaselessEqual(url[1], 't')
856 && isASCIIAlphaCaselessEqual(url[2], 't')
857 && isASCIIAlphaCaselessEqual(url[3], 'p')
858 && (url[4] == ':' || (isASCIIAlphaCaselessEqual(url[4], 's') && length >= 6 && url[5] == ':'));
859}
860
861const URL& blankURL()
862{
863 static NeverDestroyed<URL> staticBlankURL(URL(), "about:blank");
864 return staticBlankURL;
865}
866
867bool URL::protocolIsAbout() const
868{
869 return protocolIs("about");
870}
871
872bool portAllowed(const URL& url)
873{
874 Optional<uint16_t> port = url.port();
875
876 // Since most URLs don't have a port, return early for the "no port" case.
877 if (!port)
878 return true;
879
880 // This blocked port list matches the port blocking that Mozilla implements.
881 // See http://www.mozilla.org/projects/netlib/PortBanning.html for more information.
882 static const uint16_t blockedPortList[] = {
883 1, // tcpmux
884 7, // echo
885 9, // discard
886 11, // systat
887 13, // daytime
888 15, // netstat
889 17, // qotd
890 19, // chargen
891 20, // FTP-data
892 21, // FTP-control
893 22, // SSH
894 23, // telnet
895 25, // SMTP
896 37, // time
897 42, // name
898 43, // nicname
899 53, // domain
900 77, // priv-rjs
901 79, // finger
902 87, // ttylink
903 95, // supdup
904 101, // hostriame
905 102, // iso-tsap
906 103, // gppitnp
907 104, // acr-nema
908 109, // POP2
909 110, // POP3
910 111, // sunrpc
911 113, // auth
912 115, // SFTP
913 117, // uucp-path
914 119, // nntp
915 123, // NTP
916 135, // loc-srv / epmap
917 139, // netbios
918 143, // IMAP2
919 179, // BGP
920 389, // LDAP
921 427, // SLP (Also used by Apple Filing Protocol)
922 465, // SMTP+SSL
923 512, // print / exec
924 513, // login
925 514, // shell
926 515, // printer
927 526, // tempo
928 530, // courier
929 531, // Chat
930 532, // netnews
931 540, // UUCP
932 548, // afpovertcp [Apple addition]
933 556, // remotefs
934 563, // NNTP+SSL
935 587, // ESMTP
936 601, // syslog-conn
937 636, // LDAP+SSL
938 993, // IMAP+SSL
939 995, // POP3+SSL
940 2049, // NFS
941 3659, // apple-sasl / PasswordServer [Apple addition]
942 4045, // lockd
943 4190, // ManageSieve [Apple addition]
944 6000, // X11
945 6665, // Alternate IRC [Apple addition]
946 6666, // Alternate IRC [Apple addition]
947 6667, // Standard IRC [Apple addition]
948 6668, // Alternate IRC [Apple addition]
949 6669, // Alternate IRC [Apple addition]
950 6679, // Alternate IRC SSL [Apple addition]
951 6697, // IRC+SSL [Apple addition]
952 invalidPortNumber, // Used to block all invalid port numbers
953 };
954
955 // If the port is not in the blocked port list, allow it.
956 ASSERT(std::is_sorted(std::begin(blockedPortList), std::end(blockedPortList)));
957 if (!std::binary_search(std::begin(blockedPortList), std::end(blockedPortList), port.value()))
958 return true;
959
960 // Allow ports 21 and 22 for FTP URLs, as Mozilla does.
961 if ((port.value() == 21 || port.value() == 22) && url.protocolIs("ftp"))
962 return true;
963
964 // Allow any port number in a file URL, since the port number is ignored.
965 if (url.protocolIs("file"))
966 return true;
967
968 return false;
969}
970
971String mimeTypeFromDataURL(const String& url)
972{
973 ASSERT(protocolIsInternal(url, "data"));
974
975 // FIXME: What's the right behavior when the URL has a comma first, but a semicolon later?
976 // Currently this code will break at the semicolon in that case. Not sure that's correct.
977 auto index = url.find(';', 5);
978 if (index == notFound)
979 index = url.find(',', 5);
980 if (index == notFound) {
981 // FIXME: There was an old comment here that made it sound like this should be returning text/plain.
982 // But we have been returning empty string here for some time, so not changing its behavior at this time.
983 return emptyString();
984 }
985 if (index == 5)
986 return "text/plain"_s;
987 ASSERT(index >= 5);
988 return url.substring(5, index - 5).convertToASCIILowercase();
989}
990
991String URL::stringCenterEllipsizedToLength(unsigned length) const
992{
993 if (string().length() <= length)
994 return string();
995
996 return string().left(length / 2 - 1) + "..." + string().right(length / 2 - 2);
997}
998
999URL URL::fakeURLWithRelativePart(const String& relativePart)
1000{
1001 return URL(URL(), "webkit-fake-url://" + createCanonicalUUIDString() + '/' + relativePart);
1002}
1003
1004URL URL::fileURLWithFileSystemPath(const String& filePath)
1005{
1006 return URL(URL(), "file:///" + filePath);
1007}
1008
1009TextStream& operator<<(TextStream& ts, const URL& url)
1010{
1011 ts << url.string();
1012 return ts;
1013}
1014
1015#if !PLATFORM(COCOA) && !USE(SOUP)
1016static bool isIPv4Address(StringView string)
1017{
1018 auto count = 0;
1019
1020 for (const auto octet : string.splitAllowingEmptyEntries('.')) {
1021 if (count >= 4)
1022 return false;
1023
1024 const auto length = octet.length();
1025 if (!length || length > 3)
1026 return false;
1027
1028 auto value = 0;
1029 for (auto i = 0u; i < length; ++i) {
1030 const auto digit = octet[i];
1031
1032 // Prohibit leading zeroes.
1033 if (digit > '9' || digit < (!i && length > 1 ? '1' : '0'))
1034 return false;
1035
1036 value = 10 * value + (digit - '0');
1037 }
1038
1039 if (value > 255)
1040 return false;
1041
1042 count++;
1043 }
1044
1045 return (count == 4);
1046}
1047
1048static bool isIPv6Address(StringView string)
1049{
1050 enum SkipState { None, WillSkip, Skipping, Skipped, Final };
1051 auto skipState = None;
1052 auto count = 0;
1053
1054 for (const auto hextet : string.splitAllowingEmptyEntries(':')) {
1055 if (count >= 8 || skipState == Final)
1056 return false;
1057
1058 const auto length = hextet.length();
1059 if (!length) {
1060 // :: may be used anywhere to skip 1 to 8 hextets, but only once.
1061 if (skipState == Skipped)
1062 return false;
1063
1064 if (skipState == None)
1065 skipState = !count ? WillSkip : Skipping;
1066 else if (skipState == WillSkip)
1067 skipState = Skipping;
1068 else
1069 skipState = Final;
1070 continue;
1071 }
1072
1073 if (skipState == WillSkip)
1074 return false;
1075
1076 if (skipState == Skipping)
1077 skipState = Skipped;
1078
1079 if (length > 4) {
1080 // An IPv4 address may be used in place of the final two hextets.
1081 if ((skipState == None && count != 6) || (skipState == Skipped && count >= 6) || !isIPv4Address(hextet))
1082 return false;
1083
1084 skipState = Final;
1085 continue;
1086 }
1087
1088 for (const auto codeUnit : hextet.codeUnits()) {
1089 // IPv6 allows leading zeroes.
1090 if (!isASCIIHexDigit(codeUnit))
1091 return false;
1092 }
1093
1094 count++;
1095 }
1096
1097 return (count == 8 && skipState == None) || skipState == Skipped || skipState == Final;
1098}
1099
1100bool URL::hostIsIPAddress(StringView host)
1101{
1102 if (host.find(':') == notFound)
1103 return isIPv4Address(host);
1104
1105 return isIPv6Address(host);
1106}
1107#endif
1108
1109} // namespace WTF
1110