URL.cpp source code [jsc/Source/WTF/wtf/URL.cpp]

1	/*
2	* Copyright (C) 2004-2019 Apple Inc. All rights reserved.
3	* Copyright (C) 2012 Research In Motion Limited. All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	* 1. Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	* 2. Redistributions in binary form must reproduce the above copyright
11	* notice, this list of conditions and the following disclaimer in the
12	* documentation and/or other materials provided with the distribution.
13	*
14	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25	*/
26
27	#include "config.h"
28	#include <wtf/URL.h>
29
30	#include "URLParser.h"
31	#include <stdio.h>
32	#include <unicode/uidna.h>
33	#include <wtf/HashMap.h>
34	#include <wtf/NeverDestroyed.h>
35	#include <wtf/StdLibExtras.h>
36	#include <wtf/UUID.h>
37	#include <wtf/text/CString.h>
38	#include <wtf/text/StringBuilder.h>
39	#include <wtf/text/StringConcatenateNumbers.h>
40	#include <wtf/text/StringHash.h>
41	#include <wtf/text/TextStream.h>
42
43	namespace WTF {
44
45	typedef Vector<char, `512`> CharBuffer;
46	typedef Vector<UChar, `512`> UCharBuffer;
47
48	static constexpr unsigned invalidPortNumber = `0xFFFF`;
49
50	// Copies the source to the destination, assuming all the source characters are
51	// ASCII. The destination buffer must be large enough. Null characters are allowed
52	// in the source string, and no attempt is made to null-terminate the result.
53	static void copyASCII(const String& string, char* dest)
54	{
55	if (string.isEmpty())
56	return;
57
58	if (string.is8Bit())
59	memcpy(dest, string.characters8(), string.length());
60	else {
61	const UChar* src = string.characters16();
62	size_t length = string.length();
63	for (size_t i = `0`; i < length; i++)
64	dest[i] = static_cast<char>(src[i]);
65	}
66	}
67
68	void URL::invalidate()
69	{
70	m_isValid = false;
71	m_protocolIsInHTTPFamily = false;
72	m_cannotBeABaseURL = false;
73	m_schemeEnd = `0`;
74	m_userStart = `0`;
75	m_userEnd = `0`;
76	m_passwordEnd = `0`;
77	m_hostEnd = `0`;
78	m_portLength = `0`;
79	m_pathEnd = `0`;
80	m_pathAfterLastSlash = `0`;
81	m_queryEnd = `0`;
82	}
83
84	URL::URL(const URL& base, const String& relative, const URLTextEncoding* encoding)
85	{
86	URLParser parser(relative, base, encoding);
87	*this = parser.result();
88	}
89
90	static bool shouldTrimFromURL(UChar c)
91	{
92	// Browsers ignore leading/trailing whitespace and control
93	// characters from URLs. Note that c is an unsigned* char here*
94	// so this comparison should only catch control characters.
95	return c <= `' '`;
96	}
97
98	URL URL::isolatedCopy() const
99	{
100	URL result = *this;
101	result.m_string = result.m_string.isolatedCopy();
102	return result;
103	}
104
105	String URL::lastPathComponent() const
106	{
107	if (!hasPath())
108	return String ();
109
110	unsigned end = m_pathEnd - `1`;
111	if (m_string [end] == `'/'`)
112	--end;
113
114	size_t start = m_string.reverseFind(`'/'`, end);
115	if (start < static_cast<unsigned>(m_hostEnd + m_portLength))
116	return String ();
117	++start;
118
119	return m_string.substring(start, end - start + `1`);
120	}
121
122	StringView URL::protocol() const
123	{
124	return StringView (m_string).substring(`0`, m_schemeEnd);
125	}
126
127	StringView URL::host() const
128	{
129	unsigned start = hostStart();
130	return StringView (m_string).substring(start, m_hostEnd - start);
131	}
132
133	Optional<uint16_t> URL::port() const
134	{
135	if (!m_portLength)
136	return WTF::nullopt;
137
138	bool ok = false;
139	unsigned number;
140	if (m_string.is8Bit())
141	number = charactersToUIntStrict(m_string.characters8() + m_hostEnd + `1`, m_portLength - `1`, &ok);
142	else
143	number = charactersToUIntStrict(m_string.characters16() + m_hostEnd + `1`, m_portLength - `1`, &ok);
144	if (!ok \|\| number > std::numeric_limits<uint16_t>::max())
145	return WTF::nullopt;
146	return number;
147	}
148
149	String URL::hostAndPort() const
150	{
151	if (auto port = this->port())
152	return makeString(host(), `':'`, static_cast<unsigned>(port.value()));
153	return host().toString();
154	}
155
156	String URL::protocolHostAndPort() const
157	{
158	String result = m_string.substring(`0`, m_hostEnd + m_portLength);
159
160	if (m_passwordEnd - m_userStart > `0`) {
161	const int allowForTrailingAtSign = `1`;
162	result.remove(m_userStart, m_passwordEnd - m_userStart + allowForTrailingAtSign);
163	}
164
165	return result;
166	}
167
168	static String decodeEscapeSequencesFromParsedURL(StringView input)
169	{
170	auto inputLength = input.length();
171	if (!inputLength)
172	return emptyString();
173	Vector<LChar> percentDecoded;
174	percentDecoded.reserveInitialCapacity(inputLength);
175	for (unsigned i = `0`; i < inputLength; ++i) {
176	if (input [i] == `'%'`
177	&& inputLength > `2`
178	&& i < inputLength - `2`
179	&& isASCIIHexDigit(input [i + `1`])
180	&& isASCIIHexDigit(input [i + `2`])) {
181	percentDecoded.uncheckedAppend(toASCIIHexValue(input [i + `1`], input [i + `2`]));
182	i += `2`;
183	} else
184	percentDecoded.uncheckedAppend(input [i]);
185	}
186	return String::fromUTF8(percentDecoded.data(), percentDecoded.size());
187	}
188
189	String URL::user() const
190	{
191	return decodeEscapeSequencesFromParsedURL(StringView (m_string).substring(m_userStart, m_userEnd - m_userStart));
192	}
193
194	String URL::pass() const
195	{
196	if (m_passwordEnd == m_userEnd)
197	return String ();
198
199	return decodeEscapeSequencesFromParsedURL(StringView (m_string).substring(m_userEnd + `1`, m_passwordEnd - m_userEnd - `1`));
200	}
201
202	String URL::encodedUser() const
203	{
204	return m_string.substring(m_userStart, m_userEnd - m_userStart);
205	}
206
207	String URL::encodedPass() const
208	{
209	if (m_passwordEnd == m_userEnd)
210	return String ();
211
212	return m_string.substring(m_userEnd + `1`, m_passwordEnd - m_userEnd - `1`);
213	}
214
215	String URL::fragmentIdentifier() const
216	{
217	if (!hasFragmentIdentifier())
218	return String ();
219
220	return m_string.substring(m_queryEnd + `1`);
221	}
222
223	bool URL::hasFragmentIdentifier() const
224	{
225	return m_isValid && m_string.length() != m_queryEnd;
226	}
227
228	String URL::baseAsString() const
229	{
230	return m_string.left(m_pathAfterLastSlash);
231	}
232
233	#if !USE(CF)
234
235	String URL::fileSystemPath() const
236	{
237	if (!isValid() \|\| !isLocalFile())
238	return String ();
239
240	return decodeEscapeSequencesFromParsedURL(StringView (path()));
241	}
242
243	#endif
244
245	#ifdef NDEBUG
246
247	static inline void assertProtocolIsGood(StringView)
248	{
249	}
250
251	#else
252
253	static void assertProtocolIsGood(StringView protocol)
254	{
255	// FIXME: We probably don't need this function any more.
256	// The isASCIIAlphaCaselessEqual function asserts that passed-in characters
257	// are ones it can handle; the older code did not and relied on these checks.
258	for (auto character : protocol.codeUnits()) {
259	ASSERT(isASCII(character));
260	ASSERT(character > `' '`);
261	ASSERT(!isASCIIUpper(character));
262	ASSERT(toASCIILowerUnchecked(character) == character);
263	}
264	}
265
266	#endif
267
268	static Lock defaultPortForProtocolMapForTestingLock;
269
270	using DefaultPortForProtocolMapForTesting = HashMap<String, uint16_t>;
271	static DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMapForTesting()
272	{
273	static DefaultPortForProtocolMapForTesting* defaultPortForProtocolMap;
274	return defaultPortForProtocolMap;
275	}
276
277	static DefaultPortForProtocolMapForTesting& ensureDefaultPortForProtocolMapForTesting()
278	{
279	DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMap = defaultPortForProtocolMapForTesting();
280	if (!defaultPortForProtocolMap)
281	defaultPortForProtocolMap = new DefaultPortForProtocolMapForTesting;
282	return *defaultPortForProtocolMap;
283	}
284
285	void registerDefaultPortForProtocolForTesting(uint16_t port, const String& protocol)
286	{
287	auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
288	ensureDefaultPortForProtocolMapForTesting().add(protocol, port);
289	}
290
291	void clearDefaultPortForProtocolMapForTesting()
292	{
293	auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
294	if (auto* map = defaultPortForProtocolMapForTesting())
295	map->clear();
296	}
297
298	Optional<uint16_t> defaultPortForProtocol(StringView protocol)
299	{
300	if (auto* overrideMap = defaultPortForProtocolMapForTesting()) {
301	auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
302	ASSERT(overrideMap); // No need to null check again here since overrideMap cannot become null after being non-null.
303	auto iterator = overrideMap->find(protocol.toStringWithoutCopying());
304	if (iterator != overrideMap->end())
305	return iterator ->value;
306	}
307	return URLParser::defaultPortForProtocol(protocol);
308	}
309
310	bool isDefaultPortForProtocol(uint16_t port, StringView protocol)
311	{
312	return defaultPortForProtocol(protocol) == port;
313	}
314
315	bool URL::protocolIs(const char* protocol) const
316	{
317	assertProtocolIsGood(StringView { protocol });
318
319	// JavaScript URLs are "valid" and should be executed even if URL decides they are invalid.
320	// The free function protocolIsJavaScript() should be used instead.
321	ASSERT(!equalLettersIgnoringASCIICase(StringView (protocol), "javascript"));
322
323	if (!m_isValid)
324	return false;
325
326	// Do the comparison without making a new string object.
327	for (unsigned i = `0`; i < m_schemeEnd; ++i) {
328	if (!protocol[i] \|\| !isASCIIAlphaCaselessEqual(m_string [i], protocol[i]))
329	return false;
330	}
331	return !protocol[m_schemeEnd]; // We should have consumed all characters in the argument.
332	}
333
334	bool URL::protocolIs(StringView protocol) const
335	{
336	assertProtocolIsGood(protocol);
337
338	if (!m_isValid)
339	return false;
340
341	if (m_schemeEnd != protocol.length())
342	return false;
343
344	// Do the comparison without making a new string object.
345	for (unsigned i = `0`; i < m_schemeEnd; ++i) {
346	if (!isASCIIAlphaCaselessEqual(m_string [i], protocol [i]))
347	return false;
348	}
349	return true;
350	}
351
352	String URL::query() const
353	{
354	if (m_queryEnd == m_pathEnd)
355	return String ();
356
357	return m_string.substring(m_pathEnd + `1`, m_queryEnd - (m_pathEnd + `1`));
358	}
359
360	String URL::path() const
361	{
362	unsigned portEnd = m_hostEnd + m_portLength;
363	return m_string.substring(portEnd, m_pathEnd - portEnd);
364	}
365
366	bool URL::setProtocol(const String& s)
367	{
368	// Firefox and IE remove everything after the first ':'.
369	size_t separatorPosition = s.find(`':'`);
370	String newProtocol = s.substring(`0`, separatorPosition);
371	auto canonicalized = URLParser::maybeCanonicalizeScheme(newProtocol);
372	if (!canonicalized)
373	return false;
374
375	if (!m_isValid) {
376	URLParser parser(makeString(*canonicalized, ":", m_string));
377	*this = parser.result();
378	return true;
379	}
380
381	if ((m_passwordEnd != m_userStart \|\| port()) && *canonicalized == "file")
382	return true;
383
384	if (isLocalFile() && host().isEmpty())
385	return true;
386
387	URLParser parser(makeString(*canonicalized, m_string.substring(m_schemeEnd)));
388	*this = parser.result();
389	return true;
390	}
391
392	static bool isAllASCII(StringView string)
393	{
394	if (string.is8Bit())
395	return charactersAreAllASCII(string.characters8(), string.length());
396	return charactersAreAllASCII(string.characters16(), string.length());
397	}
398
399	// Appends the punycoded hostname identified by the given string and length to
400	// the output buffer. The result will not be null terminated.
401	// Return value of false means error in encoding.
402	static bool appendEncodedHostname(UCharBuffer& buffer, StringView string)
403	{
404	// Needs to be big enough to hold an IDN-encoded name.
405	// For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.
406	const unsigned hostnameBufferLength = `2048`;
407
408	if (string.length() > hostnameBufferLength \|\| isAllASCII(string)) {
409	append(buffer, string);
410	return true;
411	}
412
413	UChar hostnameBuffer[hostnameBufferLength];
414	UErrorCode error = U_ZERO_ERROR;
415	UIDNAInfo processingDetails = UIDNA_INFO_INITIALIZER;
416	int32_t numCharactersConverted = uidna_nameToASCII(&URLParser::internationalDomainNameTranscoder(),
417	string.upconvertedCharacters(), string.length(), hostnameBuffer, hostnameBufferLength, &processingDetails, &error);
418
419	if (U_SUCCESS(error) && !processingDetails.errors) {
420	buffer.append(hostnameBuffer, numCharactersConverted);
421	return true;
422	}
423	return false;
424	}
425
426	unsigned URL::hostStart() const
427	{
428	return (m_passwordEnd == m_userStart) ? m_passwordEnd : m_passwordEnd + `1`;
429	}
430
431	void URL::setHost(const String& s)
432	{
433	if (!m_isValid)
434	return;
435
436	auto colonIndex = s.find(`':'`);
437	if (colonIndex != notFound)
438	return;
439
440	UCharBuffer encodedHostName;
441	if (!appendEncodedHostname(encodedHostName, s))
442	return;
443
444	bool slashSlashNeeded = m_userStart == static_cast<unsigned>(m_schemeEnd + `1`);
445
446	StringBuilder builder;
447	builder.append(m_string.left(hostStart()));
448	if (slashSlashNeeded)
449	builder.appendLiteral("//");
450	builder.append(StringView (encodedHostName.data(), encodedHostName.size()));
451	builder.append(m_string.substring(m_hostEnd));
452
453	URLParser parser(builder.toString());
454	*this = parser.result();
455	}
456
457	void URL::removePort()
458	{
459	if (!m_portLength)
460	return;
461	URLParser parser(makeString(StringView (m_string).left(m_hostEnd), StringView (m_string).substring(m_hostEnd + m_portLength)));
462	*this = parser.result();
463	}
464
465	void URL::setPort(unsigned short i)
466	{
467	if (!m_isValid)
468	return;
469
470	bool colonNeeded = !m_portLength;
471	unsigned portStart = (colonNeeded ? m_hostEnd : m_hostEnd + `1`);
472
473	URLParser parser(makeString(StringView (m_string).left(portStart), (colonNeeded ? ":" : ""), static_cast<unsigned>(i), StringView (m_string).substring(m_hostEnd + m_portLength)));
474	*this = parser.result();
475	}
476
477	void URL::setHostAndPort(const String& hostAndPort)
478	{
479	if (!m_isValid)
480	return;
481
482	StringView hostName(hostAndPort);
483	StringView port;
484
485	auto colonIndex = hostName.find(`':'`);
486	if (colonIndex != notFound) {
487	port = hostName.substring(colonIndex + `1`);
488	bool ok;
489	int portInt = port.toIntStrict(ok);
490	if (!ok \|\| portInt < `0`)
491	return;
492	hostName = hostName.substring(`0`, colonIndex);
493	}
494
495	if (hostName.isEmpty())
496	return;
497
498	UCharBuffer encodedHostName;
499	if (!appendEncodedHostname(encodedHostName, hostName))
500	return;
501
502	bool slashSlashNeeded = m_userStart == static_cast<unsigned>(m_schemeEnd + `1`);
503
504	StringBuilder builder;
505	builder.append(m_string.left(hostStart()));
506	if (slashSlashNeeded)
507	builder.appendLiteral("//");
508	builder.append(StringView (encodedHostName.data(), encodedHostName.size()));
509	if (!port.isEmpty()) {
510	builder.appendLiteral(":");
511	builder.append(port);
512	}
513	builder.append(StringView (m_string).substring(m_hostEnd + m_portLength));
514
515	URLParser parser(builder.toString());
516	*this = parser.result();
517	}
518
519	static String percentEncodeCharacters(const String& input, bool(*shouldEncode)(UChar))
520	{
521	auto encode = [shouldEncode] (const String& input) {
522	CString utf8 = input.utf8();
523	auto* data = utf8.data();
524	StringBuilder builder;
525	auto length = utf8.length();
526	for (unsigned j = `0`; j < length; j++) {
527	auto c = data[j];
528	if (shouldEncode(c)) {
529	builder.append(`'%'`);
530	builder.append(upperNibbleToASCIIHexDigit(c));
531	builder.append(lowerNibbleToASCIIHexDigit(c));
532	} else
533	builder.append(c);
534	}
535	return builder.toString();
536	};
537
538	for (size_t i = `0`; i < input.length(); ++i) {
539	if (UNLIKELY(shouldEncode(input[i])))
540	return encode (input);
541	}
542	return input;
543	}
544
545	void URL::setUser(const String& user)
546	{
547	if (!m_isValid)
548	return;
549
550	// FIXME: Non-ASCII characters must be encoded and escaped to match parse() expectations,
551	// and to avoid changing more than just the user login.
552
553	unsigned end = m_userEnd;
554	if (!user.isEmpty()) {
555	String u = percentEncodeCharacters(user, URLParser::isInUserInfoEncodeSet);
556	if (m_userStart == static_cast<unsigned>(m_schemeEnd + `1`))
557	u = "//" + u;
558	// Add '@' if we didn't have one before.
559	if (end == m_hostEnd \|\| (end == m_passwordEnd && m_string [end] != `'@'`))
560	u.append(`'@'`);
561	URLParser parser(makeString(StringView (m_string).left(m_userStart), u, StringView (m_string).substring(end)));
562	*this = parser.result();
563	} else {
564	// Remove '@' if we now have neither user nor password.
565	if (m_userEnd == m_passwordEnd && end != m_hostEnd && m_string [end] == `'@'`)
566	end += `1`;
567	// We don't want to parse in the extremely common case where we are not going to make a change.
568	if (m_userStart != end) {
569	URLParser parser(makeString(StringView (m_string).left(m_userStart), StringView (m_string).substring(end)));
570	*this = parser.result();
571	}
572	}
573	}
574
575	void URL::setPass(const String& password)
576	{
577	if (!m_isValid)
578	return;
579
580	unsigned end = m_passwordEnd;
581	if (!password.isEmpty()) {
582	String p = ":" + percentEncodeCharacters(password, URLParser::isInUserInfoEncodeSet) + "@";
583	if (m_userEnd == static_cast<unsigned>(m_schemeEnd + `1`))
584	p = "//" + p;
585	// Eat the existing '@' since we are going to add our own.
586	if (end != m_hostEnd && m_string [end] == `'@'`)
587	end += `1`;
588	URLParser parser(makeString(StringView (m_string).left(m_userEnd), p, StringView (m_string).substring(end)));
589	*this = parser.result();
590	} else {
591	// Remove '@' if we now have neither user nor password.
592	if (m_userStart == m_userEnd && end != m_hostEnd && m_string [end] == `'@'`)
593	end += `1`;
594	// We don't want to parse in the extremely common case where we are not going to make a change.
595	if (m_userEnd != end) {
596	URLParser parser(makeString(StringView (m_string).left(m_userEnd), StringView (m_string).substring(end)));
597	*this = parser.result();
598	}
599	}
600	}
601
602	void URL::setFragmentIdentifier(StringView identifier)
603	{
604	if (!m_isValid)
605	return;
606
607	// FIXME: Optimize the case where the identifier already happens to be equal to what was passed?
608	// FIXME: Is it correct to do this without encoding and escaping non-ASCII characters?
609	*this = URLParser { makeString(StringView { m_string }.substring(`0`, m_queryEnd), `'#'`, identifier) }.result();
610	}
611
612	void URL::removeFragmentIdentifier()
613	{
614	if (!m_isValid) {
615	ASSERT(!m_queryEnd);
616	return;
617	}
618	if (m_isValid && m_string.length() > m_queryEnd)
619	m_string = m_string.left(m_queryEnd);
620	}
621
622	void URL::removeQueryAndFragmentIdentifier()
623	{
624	if (!m_isValid)
625	return;
626
627	m_string = m_string.left(m_pathEnd);
628	m_queryEnd = m_pathEnd;
629	}
630
631	void URL::setQuery(const String& query)
632	{
633	if (!m_isValid)
634	return;
635
636	// FIXME: '#' and non-ASCII characters must be encoded and escaped.
637	// Usually, the query is encoded using document encoding, not UTF-8, but we don't have
638	// access to the document in this function.
639	// https://webkit.org/b/161176
640	if ((query.isEmpty() \|\| query [`0`] != `'?'`) && !query.isNull()) {
641	URLParser parser(makeString(StringView (m_string).left(m_pathEnd), "?", query, StringView (m_string).substring(m_queryEnd)));
642	*this = parser.result();
643	} else {
644	URLParser parser(makeString(StringView (m_string).left(m_pathEnd), query, StringView (m_string).substring(m_queryEnd)));
645	*this = parser.result();
646	}
647
648	}
649
650	void URL::setPath(const String& s)
651	{
652	if (!m_isValid)
653	return;
654
655	String path = s;
656	if (path.isEmpty() \|\| path [`0`] != `'/'`)
657	path = "/" + path;
658
659	auto questionMarkOrNumberSign = [] (UChar character) {
660	return character == `'?'` \|\| character == `'#'`;
661	};
662	URLParser parser(makeString(StringView (m_string).left(m_hostEnd + m_portLength), percentEncodeCharacters(path, questionMarkOrNumberSign), StringView (m_string).substring(m_pathEnd)));
663	*this = parser.result();
664	}
665
666	bool equalIgnoringFragmentIdentifier(const URL& a, const URL& b)
667	{
668	if (a.m_queryEnd != b.m_queryEnd)
669	return false;
670	unsigned queryLength = a.m_queryEnd;
671	for (unsigned i = `0`; i < queryLength; ++i)
672	if (a.string()[i] != b.string()[i])
673	return false;
674	return true;
675	}
676
677	bool equalIgnoringQueryAndFragment(const URL& a, const URL& b)
678	{
679	if (a.pathEnd() != b.pathEnd())
680	return false;
681	unsigned pathEnd = a.pathEnd();
682	for (unsigned i = `0`; i < pathEnd; ++i) {
683	if (a.string()[i] != b.string()[i])
684	return false;
685	}
686	return true;
687	}
688
689	bool protocolHostAndPortAreEqual(const URL& a, const URL& b)
690	{
691	if (a.m_schemeEnd != b.m_schemeEnd)
692	return false;
693
694	unsigned hostStartA = a.hostStart();
695	unsigned hostLengthA = a.m_hostEnd - hostStartA;
696	unsigned hostStartB = b.hostStart();
697	unsigned hostLengthB = b.m_hostEnd - b.hostStart();
698	if (hostLengthA != hostLengthB)
699	return false;
700
701	// Check the scheme
702	for (unsigned i = `0`; i < a.m_schemeEnd; ++i) {
703	if (a.string()[i] != b.string()[i])
704	return false;
705	}
706
707	// And the host
708	for (unsigned i = `0`; i < hostLengthA; ++i) {
709	if (a.string()[hostStartA + i] != b.string()[hostStartB + i])
710	return false;
711	}
712
713	if (a.port() != b.port())
714	return false;
715
716	return true;
717	}
718
719	bool hostsAreEqual(const URL& a, const URL& b)
720	{
721	unsigned hostStartA = a.hostStart();
722	unsigned hostLengthA = a.m_hostEnd - hostStartA;
723	unsigned hostStartB = b.hostStart();
724	unsigned hostLengthB = b.m_hostEnd - hostStartB;
725	if (hostLengthA != hostLengthB)
726	return false;
727
728	for (unsigned i = `0`; i < hostLengthA; ++i) {
729	if (a.string()[hostStartA + i] != b.string()[hostStartB + i])
730	return false;
731	}
732
733	return true;
734	}
735
736	bool URL::isMatchingDomain(const String& domain) const
737	{
738	if (isNull())
739	return false;
740
741	if (domain.isEmpty())
742	return true;
743
744	if (!protocolIsInHTTPFamily())
745	return false;
746
747	auto host = this->host();
748	if (!host.endsWith(domain))
749	return false;
750
751	return host.length() == domain.length() \|\| host [host.length() - domain.length() - `1`] == `'.'`;
752	}
753
754	String encodeWithURLEscapeSequences(const String& input)
755	{
756	return percentEncodeCharacters(input, URLParser::isInUserInfoEncodeSet);
757	}
758
759	bool URL::isHierarchical() const
760	{
761	if (!m_isValid)
762	return false;
763	ASSERT(m_string[m_schemeEnd] == `':'`);
764	return m_string [m_schemeEnd + `1`] == `'/'`;
765	}
766
767	void URL::copyToBuffer(Vector<char, `512`>& buffer) const
768	{
769	// FIXME: This throws away the high bytes of all the characters in the string!
770	// That's fine for a valid URL, which is all ASCII, but not for invalid URLs.
771	buffer.resize(m_string.length());
772	copyASCII(m_string, buffer.data());
773	}
774
775	template<typename StringClass>
776	bool protocolIsInternal(const StringClass& url, const char* protocol)
777	{
778	// Do the comparison without making a new string object.
779	assertProtocolIsGood(StringView { protocol });
780	bool isLeading = true;
781	for (unsigned i = `0`, j = `0`; url[i]; ++i) {
782	// Skip leading whitespace and control characters.
783	if (isLeading && shouldTrimFromURL(url[i]))
784	continue;
785	isLeading = false;
786
787	// Skip any tabs and newlines.
788	if (url[i] == `'\t'` \|\| url[i] == `'\r'` \|\| url[i] == `'\n'`)
789	continue;
790
791	if (!protocol[j])
792	return url[i] == `':'`;
793	if (!isASCIIAlphaCaselessEqual(url[i], protocol[j]))
794	return false;
795
796	++j;
797	}
798
799	return false;
800	}
801
802	bool protocolIs(const String& url, const char* protocol)
803	{
804	return protocolIsInternal(url, protocol);
805	}
806
807	inline bool URL::protocolIs(const String& string, const char* protocol)
808	{
809	return WTF::protocolIsInternal(string, protocol);
810	}
811
812	#ifndef NDEBUG
813
814	void URL::print() const
815	{
816	printf("%s\n", m_string.utf8().data());
817	}
818
819	#endif
820
821	String URL::strippedForUseAsReferrer() const
822	{
823	URL referrer(*this);
824	referrer.setUser(String ());
825	referrer.setPass(String ());
826	referrer.removeFragmentIdentifier();
827	return referrer.string();
828	}
829
830	bool URL::isLocalFile() const
831	{
832	// Including feed here might be a bad idea since drag and drop uses this check
833	// and including feed would allow feeds to potentially let someone's blog
834	// read the contents of the clipboard on a drag, even without a drop.
835	// Likewise with using the FrameLoader::shouldTreatURLAsLocal() function.
836	return protocolIs("file");
837	}
838
839	bool protocolIsJavaScript(const String& url)
840	{
841	return protocolIsInternal(url, "javascript");
842	}
843
844	bool protocolIsJavaScript(StringView url)
845	{
846	return protocolIsInternal(url, "javascript");
847	}
848
849	bool protocolIsInHTTPFamily(const String& url)
850	{
851	auto length = url.length();
852	// Do the comparison without making a new string object.
853	return length >= `5`
854	&& isASCIIAlphaCaselessEqual(url [`0`], `'h'`)
855	&& isASCIIAlphaCaselessEqual(url [`1`], `'t'`)
856	&& isASCIIAlphaCaselessEqual(url [`2`], `'t'`)
857	&& isASCIIAlphaCaselessEqual(url [`3`], `'p'`)
858	&& (url [`4`] == `':'` \|\| (isASCIIAlphaCaselessEqual(url [`4`], `'s'`) && length >= `6` && url [`5`] == `':'`));
859	}
860
861	const URL& blankURL()
862	{
863	static NeverDestroyed<URL> staticBlankURL(URL (), "about:blank");
864	return staticBlankURL;
865	}
866
867	bool URL::protocolIsAbout() const
868	{
869	return protocolIs("about");
870	}
871
872	bool portAllowed(const URL& url)
873	{
874	Optional<uint16_t> port = url.port();
875
876	// Since most URLs don't have a port, return early for the "no port" case.
877	if (!port)
878	return true;
879
880	// This blocked port list matches the port blocking that Mozilla implements.
881	// See http://www.mozilla.org/projects/netlib/PortBanning.html for more information.
882	static const uint16_t blockedPortList[] = {
883	`1`, // tcpmux
884	`7`, // echo
885	`9`, // discard
886	`11`, // systat
887	`13`, // daytime
888	`15`, // netstat
889	`17`, // qotd
890	`19`, // chargen
891	`20`, // FTP-data
892	`21`, // FTP-control
893	`22`, // SSH
894	`23`, // telnet
895	`25`, // SMTP
896	`37`, // time
897	`42`, // name
898	`43`, // nicname
899	`53`, // domain
900	`77`, // priv-rjs
901	`79`, // finger
902	`87`, // ttylink
903	`95`, // supdup
904	`101`, // hostriame
905	`102`, // iso-tsap
906	`103`, // gppitnp
907	`104`, // acr-nema
908	`109`, // POP2
909	`110`, // POP3
910	`111`, // sunrpc
911	`113`, // auth
912	`115`, // SFTP
913	`117`, // uucp-path
914	`119`, // nntp
915	`123`, // NTP
916	`135`, // loc-srv / epmap
917	`139`, // netbios
918	`143`, // IMAP2
919	`179`, // BGP
920	`389`, // LDAP
921	`427`, // SLP (Also used by Apple Filing Protocol)
922	`465`, // SMTP+SSL
923	`512`, // print / exec
924	`513`, // login
925	`514`, // shell
926	`515`, // printer
927	`526`, // tempo
928	`530`, // courier
929	`531`, // Chat
930	`532`, // netnews
931	`540`, // UUCP
932	`548`, // afpovertcp [Apple addition]
933	`556`, // remotefs
934	`563`, // NNTP+SSL
935	`587`, // ESMTP
936	`601`, // syslog-conn
937	`636`, // LDAP+SSL
938	`993`, // IMAP+SSL
939	`995`, // POP3+SSL
940	`2049`, // NFS
941	`3659`, // apple-sasl / PasswordServer [Apple addition]
942	`4045`, // lockd
943	`4190`, // ManageSieve [Apple addition]
944	`6000`, // X11
945	`6665`, // Alternate IRC [Apple addition]
946	`6666`, // Alternate IRC [Apple addition]
947	`6667`, // Standard IRC [Apple addition]
948	`6668`, // Alternate IRC [Apple addition]
949	`6669`, // Alternate IRC [Apple addition]
950	`6679`, // Alternate IRC SSL [Apple addition]
951	`6697`, // IRC+SSL [Apple addition]
952	invalidPortNumber, // Used to block all invalid port numbers
953	};
954
955	// If the port is not in the blocked port list, allow it.
956	ASSERT(std::is_sorted(std::begin(blockedPortList), std::end(blockedPortList)));
957	if (!std::binary_search(std::begin(blockedPortList), std::end(blockedPortList), port.value()))
958	return true;
959
960	// Allow ports 21 and 22 for FTP URLs, as Mozilla does.
961	if ((port.value() == `21` \|\| port.value() == `22`) && url.protocolIs("ftp"))
962	return true;
963
964	// Allow any port number in a file URL, since the port number is ignored.
965	if (url.protocolIs("file"))
966	return true;
967
968	return false;
969	}
970
971	String mimeTypeFromDataURL(const String& url)
972	{
973	ASSERT(protocolIsInternal(url, "data"));
974
975	// FIXME: What's the right behavior when the URL has a comma first, but a semicolon later?
976	// Currently this code will break at the semicolon in that case. Not sure that's correct.
977	auto index = url.find(`';'`, `5`);
978	if (index == notFound)
979	index = url.find(`','`, `5`);
980	if (index == notFound) {
981	// FIXME: There was an old comment here that made it sound like this should be returning text/plain.
982	// But we have been returning empty string here for some time, so not changing its behavior at this time.
983	return emptyString();
984	}
985	if (index == `5`)
986	return "text/plain"_s;
987	ASSERT(index >= `5`);
988	return url.substring(`5`, index - `5`).convertToASCIILowercase();
989	}
990
991	String URL::stringCenterEllipsizedToLength(unsigned length) const
992	{
993	if (string().length() <= length)
994	return string();
995
996	return string().left(length / `2` - `1`) + "..." + string().right(length / `2` - `2`);
997	}
998
999	URL URL::fakeURLWithRelativePart(const String& relativePart)
1000	{
1001	return URL (URL (), "webkit-fake-url://" + createCanonicalUUIDString() + `'/'` + relativePart);
1002	}
1003
1004	URL URL::fileURLWithFileSystemPath(const String& filePath)
1005	{
1006	return URL (URL (), "file:///" + filePath);
1007	}
1008
1009	TextStream& operator<<(TextStream& ts, const URL& url)
1010	{
1011	ts << url.string();
1012	return ts;
1013	}
1014
1015	#if !PLATFORM(COCOA) && !USE(SOUP)
1016	static bool isIPv4Address(StringView string)
1017	{
1018	auto count = `0`;
1019
1020	for (const auto octet : string.splitAllowingEmptyEntries(`'.'`)) {
1021	if (count >= `4`)
1022	return false;
1023
1024	const auto length = octet.length();
1025	if (!length \|\| length > `3`)
1026	return false;
1027
1028	auto value = `0`;
1029	for (auto i = `0u`; i < length; ++i) {
1030	const auto digit = octet [i];
1031
1032	// Prohibit leading zeroes.
1033	if (digit > `'9'` \|\| digit < (!i && length > `1` ? `'1'` : `'0'`))
1034	return false;
1035
1036	value = `10` * value + (digit - `'0'`);
1037	}
1038
1039	if (value > `255`)
1040	return false;
1041
1042	count++;
1043	}
1044
1045	return (count == `4`);
1046	}
1047
1048	static bool isIPv6Address(StringView string)
1049	{
1050	enum SkipState { None, WillSkip, Skipping, Skipped, Final };
1051	auto skipState = None;
1052	auto count = `0`;
1053
1054	for (const auto hextet : string.splitAllowingEmptyEntries(`':'`)) {
1055	if (count >= `8` \|\| skipState == Final)
1056	return false;
1057
1058	const auto length = hextet.length();
1059	if (!length) {
1060	// :: may be used anywhere to skip 1 to 8 hextets, but only once.
1061	if (skipState == Skipped)
1062	return false;
1063
1064	if (skipState == None)
1065	skipState = !count ? WillSkip : Skipping;
1066	else if (skipState == WillSkip)
1067	skipState = Skipping;
1068	else
1069	skipState = Final;
1070	continue;
1071	}
1072
1073	if (skipState == WillSkip)
1074	return false;
1075
1076	if (skipState == Skipping)
1077	skipState = Skipped;
1078
1079	if (length > `4`) {
1080	// An IPv4 address may be used in place of the final two hextets.
1081	if ((skipState == None && count != `6`) \|\| (skipState == Skipped && count >= `6`) \|\| !isIPv4Address(hextet))
1082	return false;
1083
1084	skipState = Final;
1085	continue;
1086	}
1087
1088	for (const auto codeUnit : hextet.codeUnits()) {
1089	// IPv6 allows leading zeroes.
1090	if (!isASCIIHexDigit(codeUnit))
1091	return false;
1092	}
1093
1094	count++;
1095	}
1096
1097	return (count == `8` && skipState == None) \|\| skipState == Skipped \|\| skipState == Final;
1098	}
1099
1100	bool URL::hostIsIPAddress(StringView host)
1101	{
1102	if (host.find(`':'`) == notFound)
1103	return isIPv4Address(host);
1104
1105	return isIPv6Address(host);
1106	}
1107	#endif
1108
1109	} // namespace WTF
1110

Browse the source code of jsc/Source/WTF/wtf/URL.cpp