1 | /* |
2 | * Copyright (C) 2004-2019 Apple Inc. All rights reserved. |
3 | * Copyright (C) 2012 Research In Motion Limited. All rights reserved. |
4 | * |
5 | * Redistribution and use in source and binary forms, with or without |
6 | * modification, are permitted provided that the following conditions |
7 | * are met: |
8 | * 1. Redistributions of source code must retain the above copyright |
9 | * notice, this list of conditions and the following disclaimer. |
10 | * 2. Redistributions in binary form must reproduce the above copyright |
11 | * notice, this list of conditions and the following disclaimer in the |
12 | * documentation and/or other materials provided with the distribution. |
13 | * |
14 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
15 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
17 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
18 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | */ |
26 | |
27 | #include "config.h" |
28 | #include <wtf/URL.h> |
29 | |
30 | #include "URLParser.h" |
31 | #include <stdio.h> |
32 | #include <unicode/uidna.h> |
33 | #include <wtf/HashMap.h> |
34 | #include <wtf/NeverDestroyed.h> |
35 | #include <wtf/StdLibExtras.h> |
36 | #include <wtf/UUID.h> |
37 | #include <wtf/text/CString.h> |
38 | #include <wtf/text/StringBuilder.h> |
39 | #include <wtf/text/StringConcatenateNumbers.h> |
40 | #include <wtf/text/StringHash.h> |
41 | #include <wtf/text/TextStream.h> |
42 | |
43 | namespace WTF { |
44 | |
45 | typedef Vector<char, 512> CharBuffer; |
46 | typedef Vector<UChar, 512> UCharBuffer; |
47 | |
48 | static constexpr unsigned invalidPortNumber = 0xFFFF; |
49 | |
50 | // Copies the source to the destination, assuming all the source characters are |
51 | // ASCII. The destination buffer must be large enough. Null characters are allowed |
52 | // in the source string, and no attempt is made to null-terminate the result. |
53 | static void copyASCII(const String& string, char* dest) |
54 | { |
55 | if (string.isEmpty()) |
56 | return; |
57 | |
58 | if (string.is8Bit()) |
59 | memcpy(dest, string.characters8(), string.length()); |
60 | else { |
61 | const UChar* src = string.characters16(); |
62 | size_t length = string.length(); |
63 | for (size_t i = 0; i < length; i++) |
64 | dest[i] = static_cast<char>(src[i]); |
65 | } |
66 | } |
67 | |
68 | void URL::invalidate() |
69 | { |
70 | m_isValid = false; |
71 | m_protocolIsInHTTPFamily = false; |
72 | m_cannotBeABaseURL = false; |
73 | m_schemeEnd = 0; |
74 | m_userStart = 0; |
75 | m_userEnd = 0; |
76 | m_passwordEnd = 0; |
77 | m_hostEnd = 0; |
78 | m_portLength = 0; |
79 | m_pathEnd = 0; |
80 | m_pathAfterLastSlash = 0; |
81 | m_queryEnd = 0; |
82 | } |
83 | |
84 | URL::URL(const URL& base, const String& relative, const URLTextEncoding* encoding) |
85 | { |
86 | URLParser parser(relative, base, encoding); |
87 | *this = parser.result(); |
88 | } |
89 | |
90 | static bool shouldTrimFromURL(UChar c) |
91 | { |
92 | // Browsers ignore leading/trailing whitespace and control |
93 | // characters from URLs. Note that c is an *unsigned* char here |
94 | // so this comparison should only catch control characters. |
95 | return c <= ' '; |
96 | } |
97 | |
98 | URL URL::isolatedCopy() const |
99 | { |
100 | URL result = *this; |
101 | result.m_string = result.m_string.isolatedCopy(); |
102 | return result; |
103 | } |
104 | |
105 | String URL::lastPathComponent() const |
106 | { |
107 | if (!hasPath()) |
108 | return String(); |
109 | |
110 | unsigned end = m_pathEnd - 1; |
111 | if (m_string[end] == '/') |
112 | --end; |
113 | |
114 | size_t start = m_string.reverseFind('/', end); |
115 | if (start < static_cast<unsigned>(m_hostEnd + m_portLength)) |
116 | return String(); |
117 | ++start; |
118 | |
119 | return m_string.substring(start, end - start + 1); |
120 | } |
121 | |
122 | StringView URL::protocol() const |
123 | { |
124 | return StringView(m_string).substring(0, m_schemeEnd); |
125 | } |
126 | |
127 | StringView URL::host() const |
128 | { |
129 | unsigned start = hostStart(); |
130 | return StringView(m_string).substring(start, m_hostEnd - start); |
131 | } |
132 | |
133 | Optional<uint16_t> URL::port() const |
134 | { |
135 | if (!m_portLength) |
136 | return WTF::nullopt; |
137 | |
138 | bool ok = false; |
139 | unsigned number; |
140 | if (m_string.is8Bit()) |
141 | number = charactersToUIntStrict(m_string.characters8() + m_hostEnd + 1, m_portLength - 1, &ok); |
142 | else |
143 | number = charactersToUIntStrict(m_string.characters16() + m_hostEnd + 1, m_portLength - 1, &ok); |
144 | if (!ok || number > std::numeric_limits<uint16_t>::max()) |
145 | return WTF::nullopt; |
146 | return number; |
147 | } |
148 | |
149 | String URL::hostAndPort() const |
150 | { |
151 | if (auto port = this->port()) |
152 | return makeString(host(), ':', static_cast<unsigned>(port.value())); |
153 | return host().toString(); |
154 | } |
155 | |
156 | String URL::protocolHostAndPort() const |
157 | { |
158 | String result = m_string.substring(0, m_hostEnd + m_portLength); |
159 | |
160 | if (m_passwordEnd - m_userStart > 0) { |
161 | const int allowForTrailingAtSign = 1; |
162 | result.remove(m_userStart, m_passwordEnd - m_userStart + allowForTrailingAtSign); |
163 | } |
164 | |
165 | return result; |
166 | } |
167 | |
168 | static String decodeEscapeSequencesFromParsedURL(StringView input) |
169 | { |
170 | auto inputLength = input.length(); |
171 | if (!inputLength) |
172 | return emptyString(); |
173 | Vector<LChar> percentDecoded; |
174 | percentDecoded.reserveInitialCapacity(inputLength); |
175 | for (unsigned i = 0; i < inputLength; ++i) { |
176 | if (input[i] == '%' |
177 | && inputLength > 2 |
178 | && i < inputLength - 2 |
179 | && isASCIIHexDigit(input[i + 1]) |
180 | && isASCIIHexDigit(input[i + 2])) { |
181 | percentDecoded.uncheckedAppend(toASCIIHexValue(input[i + 1], input[i + 2])); |
182 | i += 2; |
183 | } else |
184 | percentDecoded.uncheckedAppend(input[i]); |
185 | } |
186 | return String::fromUTF8(percentDecoded.data(), percentDecoded.size()); |
187 | } |
188 | |
189 | String URL::user() const |
190 | { |
191 | return decodeEscapeSequencesFromParsedURL(StringView(m_string).substring(m_userStart, m_userEnd - m_userStart)); |
192 | } |
193 | |
194 | String URL::pass() const |
195 | { |
196 | if (m_passwordEnd == m_userEnd) |
197 | return String(); |
198 | |
199 | return decodeEscapeSequencesFromParsedURL(StringView(m_string).substring(m_userEnd + 1, m_passwordEnd - m_userEnd - 1)); |
200 | } |
201 | |
202 | String URL::encodedUser() const |
203 | { |
204 | return m_string.substring(m_userStart, m_userEnd - m_userStart); |
205 | } |
206 | |
207 | String URL::encodedPass() const |
208 | { |
209 | if (m_passwordEnd == m_userEnd) |
210 | return String(); |
211 | |
212 | return m_string.substring(m_userEnd + 1, m_passwordEnd - m_userEnd - 1); |
213 | } |
214 | |
215 | String URL::fragmentIdentifier() const |
216 | { |
217 | if (!hasFragmentIdentifier()) |
218 | return String(); |
219 | |
220 | return m_string.substring(m_queryEnd + 1); |
221 | } |
222 | |
223 | bool URL::hasFragmentIdentifier() const |
224 | { |
225 | return m_isValid && m_string.length() != m_queryEnd; |
226 | } |
227 | |
228 | String URL::baseAsString() const |
229 | { |
230 | return m_string.left(m_pathAfterLastSlash); |
231 | } |
232 | |
233 | #if !USE(CF) |
234 | |
235 | String URL::fileSystemPath() const |
236 | { |
237 | if (!isValid() || !isLocalFile()) |
238 | return String(); |
239 | |
240 | return decodeEscapeSequencesFromParsedURL(StringView(path())); |
241 | } |
242 | |
243 | #endif |
244 | |
245 | #ifdef NDEBUG |
246 | |
247 | static inline void assertProtocolIsGood(StringView) |
248 | { |
249 | } |
250 | |
251 | #else |
252 | |
253 | static void assertProtocolIsGood(StringView protocol) |
254 | { |
255 | // FIXME: We probably don't need this function any more. |
256 | // The isASCIIAlphaCaselessEqual function asserts that passed-in characters |
257 | // are ones it can handle; the older code did not and relied on these checks. |
258 | for (auto character : protocol.codeUnits()) { |
259 | ASSERT(isASCII(character)); |
260 | ASSERT(character > ' '); |
261 | ASSERT(!isASCIIUpper(character)); |
262 | ASSERT(toASCIILowerUnchecked(character) == character); |
263 | } |
264 | } |
265 | |
266 | #endif |
267 | |
268 | static Lock defaultPortForProtocolMapForTestingLock; |
269 | |
270 | using DefaultPortForProtocolMapForTesting = HashMap<String, uint16_t>; |
271 | static DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMapForTesting() |
272 | { |
273 | static DefaultPortForProtocolMapForTesting* defaultPortForProtocolMap; |
274 | return defaultPortForProtocolMap; |
275 | } |
276 | |
277 | static DefaultPortForProtocolMapForTesting& ensureDefaultPortForProtocolMapForTesting() |
278 | { |
279 | DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMap = defaultPortForProtocolMapForTesting(); |
280 | if (!defaultPortForProtocolMap) |
281 | defaultPortForProtocolMap = new DefaultPortForProtocolMapForTesting; |
282 | return *defaultPortForProtocolMap; |
283 | } |
284 | |
285 | void registerDefaultPortForProtocolForTesting(uint16_t port, const String& protocol) |
286 | { |
287 | auto locker = holdLock(defaultPortForProtocolMapForTestingLock); |
288 | ensureDefaultPortForProtocolMapForTesting().add(protocol, port); |
289 | } |
290 | |
291 | void clearDefaultPortForProtocolMapForTesting() |
292 | { |
293 | auto locker = holdLock(defaultPortForProtocolMapForTestingLock); |
294 | if (auto* map = defaultPortForProtocolMapForTesting()) |
295 | map->clear(); |
296 | } |
297 | |
298 | Optional<uint16_t> defaultPortForProtocol(StringView protocol) |
299 | { |
300 | if (auto* overrideMap = defaultPortForProtocolMapForTesting()) { |
301 | auto locker = holdLock(defaultPortForProtocolMapForTestingLock); |
302 | ASSERT(overrideMap); // No need to null check again here since overrideMap cannot become null after being non-null. |
303 | auto iterator = overrideMap->find(protocol.toStringWithoutCopying()); |
304 | if (iterator != overrideMap->end()) |
305 | return iterator->value; |
306 | } |
307 | return URLParser::defaultPortForProtocol(protocol); |
308 | } |
309 | |
310 | bool isDefaultPortForProtocol(uint16_t port, StringView protocol) |
311 | { |
312 | return defaultPortForProtocol(protocol) == port; |
313 | } |
314 | |
315 | bool URL::protocolIs(const char* protocol) const |
316 | { |
317 | assertProtocolIsGood(StringView { protocol }); |
318 | |
319 | // JavaScript URLs are "valid" and should be executed even if URL decides they are invalid. |
320 | // The free function protocolIsJavaScript() should be used instead. |
321 | ASSERT(!equalLettersIgnoringASCIICase(StringView(protocol), "javascript" )); |
322 | |
323 | if (!m_isValid) |
324 | return false; |
325 | |
326 | // Do the comparison without making a new string object. |
327 | for (unsigned i = 0; i < m_schemeEnd; ++i) { |
328 | if (!protocol[i] || !isASCIIAlphaCaselessEqual(m_string[i], protocol[i])) |
329 | return false; |
330 | } |
331 | return !protocol[m_schemeEnd]; // We should have consumed all characters in the argument. |
332 | } |
333 | |
334 | bool URL::protocolIs(StringView protocol) const |
335 | { |
336 | assertProtocolIsGood(protocol); |
337 | |
338 | if (!m_isValid) |
339 | return false; |
340 | |
341 | if (m_schemeEnd != protocol.length()) |
342 | return false; |
343 | |
344 | // Do the comparison without making a new string object. |
345 | for (unsigned i = 0; i < m_schemeEnd; ++i) { |
346 | if (!isASCIIAlphaCaselessEqual(m_string[i], protocol[i])) |
347 | return false; |
348 | } |
349 | return true; |
350 | } |
351 | |
352 | String URL::query() const |
353 | { |
354 | if (m_queryEnd == m_pathEnd) |
355 | return String(); |
356 | |
357 | return m_string.substring(m_pathEnd + 1, m_queryEnd - (m_pathEnd + 1)); |
358 | } |
359 | |
360 | String URL::path() const |
361 | { |
362 | unsigned portEnd = m_hostEnd + m_portLength; |
363 | return m_string.substring(portEnd, m_pathEnd - portEnd); |
364 | } |
365 | |
366 | bool URL::setProtocol(const String& s) |
367 | { |
368 | // Firefox and IE remove everything after the first ':'. |
369 | size_t separatorPosition = s.find(':'); |
370 | String newProtocol = s.substring(0, separatorPosition); |
371 | auto canonicalized = URLParser::maybeCanonicalizeScheme(newProtocol); |
372 | if (!canonicalized) |
373 | return false; |
374 | |
375 | if (!m_isValid) { |
376 | URLParser parser(makeString(*canonicalized, ":" , m_string)); |
377 | *this = parser.result(); |
378 | return true; |
379 | } |
380 | |
381 | if ((m_passwordEnd != m_userStart || port()) && *canonicalized == "file" ) |
382 | return true; |
383 | |
384 | if (isLocalFile() && host().isEmpty()) |
385 | return true; |
386 | |
387 | URLParser parser(makeString(*canonicalized, m_string.substring(m_schemeEnd))); |
388 | *this = parser.result(); |
389 | return true; |
390 | } |
391 | |
392 | static bool isAllASCII(StringView string) |
393 | { |
394 | if (string.is8Bit()) |
395 | return charactersAreAllASCII(string.characters8(), string.length()); |
396 | return charactersAreAllASCII(string.characters16(), string.length()); |
397 | } |
398 | |
399 | // Appends the punycoded hostname identified by the given string and length to |
400 | // the output buffer. The result will not be null terminated. |
401 | // Return value of false means error in encoding. |
402 | static bool appendEncodedHostname(UCharBuffer& buffer, StringView string) |
403 | { |
404 | // Needs to be big enough to hold an IDN-encoded name. |
405 | // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK. |
406 | const unsigned hostnameBufferLength = 2048; |
407 | |
408 | if (string.length() > hostnameBufferLength || isAllASCII(string)) { |
409 | append(buffer, string); |
410 | return true; |
411 | } |
412 | |
413 | UChar hostnameBuffer[hostnameBufferLength]; |
414 | UErrorCode error = U_ZERO_ERROR; |
415 | UIDNAInfo processingDetails = UIDNA_INFO_INITIALIZER; |
416 | int32_t numCharactersConverted = uidna_nameToASCII(&URLParser::internationalDomainNameTranscoder(), |
417 | string.upconvertedCharacters(), string.length(), hostnameBuffer, hostnameBufferLength, &processingDetails, &error); |
418 | |
419 | if (U_SUCCESS(error) && !processingDetails.errors) { |
420 | buffer.append(hostnameBuffer, numCharactersConverted); |
421 | return true; |
422 | } |
423 | return false; |
424 | } |
425 | |
426 | unsigned URL::hostStart() const |
427 | { |
428 | return (m_passwordEnd == m_userStart) ? m_passwordEnd : m_passwordEnd + 1; |
429 | } |
430 | |
431 | void URL::setHost(const String& s) |
432 | { |
433 | if (!m_isValid) |
434 | return; |
435 | |
436 | auto colonIndex = s.find(':'); |
437 | if (colonIndex != notFound) |
438 | return; |
439 | |
440 | UCharBuffer encodedHostName; |
441 | if (!appendEncodedHostname(encodedHostName, s)) |
442 | return; |
443 | |
444 | bool slashSlashNeeded = m_userStart == static_cast<unsigned>(m_schemeEnd + 1); |
445 | |
446 | StringBuilder builder; |
447 | builder.append(m_string.left(hostStart())); |
448 | if (slashSlashNeeded) |
449 | builder.appendLiteral("//" ); |
450 | builder.append(StringView(encodedHostName.data(), encodedHostName.size())); |
451 | builder.append(m_string.substring(m_hostEnd)); |
452 | |
453 | URLParser parser(builder.toString()); |
454 | *this = parser.result(); |
455 | } |
456 | |
457 | void URL::removePort() |
458 | { |
459 | if (!m_portLength) |
460 | return; |
461 | URLParser parser(makeString(StringView(m_string).left(m_hostEnd), StringView(m_string).substring(m_hostEnd + m_portLength))); |
462 | *this = parser.result(); |
463 | } |
464 | |
465 | void URL::setPort(unsigned short i) |
466 | { |
467 | if (!m_isValid) |
468 | return; |
469 | |
470 | bool colonNeeded = !m_portLength; |
471 | unsigned portStart = (colonNeeded ? m_hostEnd : m_hostEnd + 1); |
472 | |
473 | URLParser parser(makeString(StringView(m_string).left(portStart), (colonNeeded ? ":" : "" ), static_cast<unsigned>(i), StringView(m_string).substring(m_hostEnd + m_portLength))); |
474 | *this = parser.result(); |
475 | } |
476 | |
477 | void URL::setHostAndPort(const String& hostAndPort) |
478 | { |
479 | if (!m_isValid) |
480 | return; |
481 | |
482 | StringView hostName(hostAndPort); |
483 | StringView port; |
484 | |
485 | auto colonIndex = hostName.find(':'); |
486 | if (colonIndex != notFound) { |
487 | port = hostName.substring(colonIndex + 1); |
488 | bool ok; |
489 | int portInt = port.toIntStrict(ok); |
490 | if (!ok || portInt < 0) |
491 | return; |
492 | hostName = hostName.substring(0, colonIndex); |
493 | } |
494 | |
495 | if (hostName.isEmpty()) |
496 | return; |
497 | |
498 | UCharBuffer encodedHostName; |
499 | if (!appendEncodedHostname(encodedHostName, hostName)) |
500 | return; |
501 | |
502 | bool slashSlashNeeded = m_userStart == static_cast<unsigned>(m_schemeEnd + 1); |
503 | |
504 | StringBuilder builder; |
505 | builder.append(m_string.left(hostStart())); |
506 | if (slashSlashNeeded) |
507 | builder.appendLiteral("//" ); |
508 | builder.append(StringView(encodedHostName.data(), encodedHostName.size())); |
509 | if (!port.isEmpty()) { |
510 | builder.appendLiteral(":" ); |
511 | builder.append(port); |
512 | } |
513 | builder.append(StringView(m_string).substring(m_hostEnd + m_portLength)); |
514 | |
515 | URLParser parser(builder.toString()); |
516 | *this = parser.result(); |
517 | } |
518 | |
519 | static String percentEncodeCharacters(const String& input, bool(*shouldEncode)(UChar)) |
520 | { |
521 | auto encode = [shouldEncode] (const String& input) { |
522 | CString utf8 = input.utf8(); |
523 | auto* data = utf8.data(); |
524 | StringBuilder builder; |
525 | auto length = utf8.length(); |
526 | for (unsigned j = 0; j < length; j++) { |
527 | auto c = data[j]; |
528 | if (shouldEncode(c)) { |
529 | builder.append('%'); |
530 | builder.append(upperNibbleToASCIIHexDigit(c)); |
531 | builder.append(lowerNibbleToASCIIHexDigit(c)); |
532 | } else |
533 | builder.append(c); |
534 | } |
535 | return builder.toString(); |
536 | }; |
537 | |
538 | for (size_t i = 0; i < input.length(); ++i) { |
539 | if (UNLIKELY(shouldEncode(input[i]))) |
540 | return encode(input); |
541 | } |
542 | return input; |
543 | } |
544 | |
545 | void URL::setUser(const String& user) |
546 | { |
547 | if (!m_isValid) |
548 | return; |
549 | |
550 | // FIXME: Non-ASCII characters must be encoded and escaped to match parse() expectations, |
551 | // and to avoid changing more than just the user login. |
552 | |
553 | unsigned end = m_userEnd; |
554 | if (!user.isEmpty()) { |
555 | String u = percentEncodeCharacters(user, URLParser::isInUserInfoEncodeSet); |
556 | if (m_userStart == static_cast<unsigned>(m_schemeEnd + 1)) |
557 | u = "//" + u; |
558 | // Add '@' if we didn't have one before. |
559 | if (end == m_hostEnd || (end == m_passwordEnd && m_string[end] != '@')) |
560 | u.append('@'); |
561 | URLParser parser(makeString(StringView(m_string).left(m_userStart), u, StringView(m_string).substring(end))); |
562 | *this = parser.result(); |
563 | } else { |
564 | // Remove '@' if we now have neither user nor password. |
565 | if (m_userEnd == m_passwordEnd && end != m_hostEnd && m_string[end] == '@') |
566 | end += 1; |
567 | // We don't want to parse in the extremely common case where we are not going to make a change. |
568 | if (m_userStart != end) { |
569 | URLParser parser(makeString(StringView(m_string).left(m_userStart), StringView(m_string).substring(end))); |
570 | *this = parser.result(); |
571 | } |
572 | } |
573 | } |
574 | |
575 | void URL::setPass(const String& password) |
576 | { |
577 | if (!m_isValid) |
578 | return; |
579 | |
580 | unsigned end = m_passwordEnd; |
581 | if (!password.isEmpty()) { |
582 | String p = ":" + percentEncodeCharacters(password, URLParser::isInUserInfoEncodeSet) + "@" ; |
583 | if (m_userEnd == static_cast<unsigned>(m_schemeEnd + 1)) |
584 | p = "//" + p; |
585 | // Eat the existing '@' since we are going to add our own. |
586 | if (end != m_hostEnd && m_string[end] == '@') |
587 | end += 1; |
588 | URLParser parser(makeString(StringView(m_string).left(m_userEnd), p, StringView(m_string).substring(end))); |
589 | *this = parser.result(); |
590 | } else { |
591 | // Remove '@' if we now have neither user nor password. |
592 | if (m_userStart == m_userEnd && end != m_hostEnd && m_string[end] == '@') |
593 | end += 1; |
594 | // We don't want to parse in the extremely common case where we are not going to make a change. |
595 | if (m_userEnd != end) { |
596 | URLParser parser(makeString(StringView(m_string).left(m_userEnd), StringView(m_string).substring(end))); |
597 | *this = parser.result(); |
598 | } |
599 | } |
600 | } |
601 | |
602 | void URL::setFragmentIdentifier(StringView identifier) |
603 | { |
604 | if (!m_isValid) |
605 | return; |
606 | |
607 | // FIXME: Optimize the case where the identifier already happens to be equal to what was passed? |
608 | // FIXME: Is it correct to do this without encoding and escaping non-ASCII characters? |
609 | *this = URLParser { makeString(StringView { m_string }.substring(0, m_queryEnd), '#', identifier) }.result(); |
610 | } |
611 | |
612 | void URL::removeFragmentIdentifier() |
613 | { |
614 | if (!m_isValid) { |
615 | ASSERT(!m_queryEnd); |
616 | return; |
617 | } |
618 | if (m_isValid && m_string.length() > m_queryEnd) |
619 | m_string = m_string.left(m_queryEnd); |
620 | } |
621 | |
622 | void URL::removeQueryAndFragmentIdentifier() |
623 | { |
624 | if (!m_isValid) |
625 | return; |
626 | |
627 | m_string = m_string.left(m_pathEnd); |
628 | m_queryEnd = m_pathEnd; |
629 | } |
630 | |
631 | void URL::setQuery(const String& query) |
632 | { |
633 | if (!m_isValid) |
634 | return; |
635 | |
636 | // FIXME: '#' and non-ASCII characters must be encoded and escaped. |
637 | // Usually, the query is encoded using document encoding, not UTF-8, but we don't have |
638 | // access to the document in this function. |
639 | // https://webkit.org/b/161176 |
640 | if ((query.isEmpty() || query[0] != '?') && !query.isNull()) { |
641 | URLParser parser(makeString(StringView(m_string).left(m_pathEnd), "?" , query, StringView(m_string).substring(m_queryEnd))); |
642 | *this = parser.result(); |
643 | } else { |
644 | URLParser parser(makeString(StringView(m_string).left(m_pathEnd), query, StringView(m_string).substring(m_queryEnd))); |
645 | *this = parser.result(); |
646 | } |
647 | |
648 | } |
649 | |
650 | void URL::setPath(const String& s) |
651 | { |
652 | if (!m_isValid) |
653 | return; |
654 | |
655 | String path = s; |
656 | if (path.isEmpty() || path[0] != '/') |
657 | path = "/" + path; |
658 | |
659 | auto questionMarkOrNumberSign = [] (UChar character) { |
660 | return character == '?' || character == '#'; |
661 | }; |
662 | URLParser parser(makeString(StringView(m_string).left(m_hostEnd + m_portLength), percentEncodeCharacters(path, questionMarkOrNumberSign), StringView(m_string).substring(m_pathEnd))); |
663 | *this = parser.result(); |
664 | } |
665 | |
666 | bool equalIgnoringFragmentIdentifier(const URL& a, const URL& b) |
667 | { |
668 | if (a.m_queryEnd != b.m_queryEnd) |
669 | return false; |
670 | unsigned queryLength = a.m_queryEnd; |
671 | for (unsigned i = 0; i < queryLength; ++i) |
672 | if (a.string()[i] != b.string()[i]) |
673 | return false; |
674 | return true; |
675 | } |
676 | |
677 | bool equalIgnoringQueryAndFragment(const URL& a, const URL& b) |
678 | { |
679 | if (a.pathEnd() != b.pathEnd()) |
680 | return false; |
681 | unsigned pathEnd = a.pathEnd(); |
682 | for (unsigned i = 0; i < pathEnd; ++i) { |
683 | if (a.string()[i] != b.string()[i]) |
684 | return false; |
685 | } |
686 | return true; |
687 | } |
688 | |
689 | bool protocolHostAndPortAreEqual(const URL& a, const URL& b) |
690 | { |
691 | if (a.m_schemeEnd != b.m_schemeEnd) |
692 | return false; |
693 | |
694 | unsigned hostStartA = a.hostStart(); |
695 | unsigned hostLengthA = a.m_hostEnd - hostStartA; |
696 | unsigned hostStartB = b.hostStart(); |
697 | unsigned hostLengthB = b.m_hostEnd - b.hostStart(); |
698 | if (hostLengthA != hostLengthB) |
699 | return false; |
700 | |
701 | // Check the scheme |
702 | for (unsigned i = 0; i < a.m_schemeEnd; ++i) { |
703 | if (a.string()[i] != b.string()[i]) |
704 | return false; |
705 | } |
706 | |
707 | // And the host |
708 | for (unsigned i = 0; i < hostLengthA; ++i) { |
709 | if (a.string()[hostStartA + i] != b.string()[hostStartB + i]) |
710 | return false; |
711 | } |
712 | |
713 | if (a.port() != b.port()) |
714 | return false; |
715 | |
716 | return true; |
717 | } |
718 | |
719 | bool hostsAreEqual(const URL& a, const URL& b) |
720 | { |
721 | unsigned hostStartA = a.hostStart(); |
722 | unsigned hostLengthA = a.m_hostEnd - hostStartA; |
723 | unsigned hostStartB = b.hostStart(); |
724 | unsigned hostLengthB = b.m_hostEnd - hostStartB; |
725 | if (hostLengthA != hostLengthB) |
726 | return false; |
727 | |
728 | for (unsigned i = 0; i < hostLengthA; ++i) { |
729 | if (a.string()[hostStartA + i] != b.string()[hostStartB + i]) |
730 | return false; |
731 | } |
732 | |
733 | return true; |
734 | } |
735 | |
736 | bool URL::isMatchingDomain(const String& domain) const |
737 | { |
738 | if (isNull()) |
739 | return false; |
740 | |
741 | if (domain.isEmpty()) |
742 | return true; |
743 | |
744 | if (!protocolIsInHTTPFamily()) |
745 | return false; |
746 | |
747 | auto host = this->host(); |
748 | if (!host.endsWith(domain)) |
749 | return false; |
750 | |
751 | return host.length() == domain.length() || host[host.length() - domain.length() - 1] == '.'; |
752 | } |
753 | |
754 | String encodeWithURLEscapeSequences(const String& input) |
755 | { |
756 | return percentEncodeCharacters(input, URLParser::isInUserInfoEncodeSet); |
757 | } |
758 | |
759 | bool URL::isHierarchical() const |
760 | { |
761 | if (!m_isValid) |
762 | return false; |
763 | ASSERT(m_string[m_schemeEnd] == ':'); |
764 | return m_string[m_schemeEnd + 1] == '/'; |
765 | } |
766 | |
767 | void URL::copyToBuffer(Vector<char, 512>& buffer) const |
768 | { |
769 | // FIXME: This throws away the high bytes of all the characters in the string! |
770 | // That's fine for a valid URL, which is all ASCII, but not for invalid URLs. |
771 | buffer.resize(m_string.length()); |
772 | copyASCII(m_string, buffer.data()); |
773 | } |
774 | |
775 | template<typename StringClass> |
776 | bool protocolIsInternal(const StringClass& url, const char* protocol) |
777 | { |
778 | // Do the comparison without making a new string object. |
779 | assertProtocolIsGood(StringView { protocol }); |
780 | bool isLeading = true; |
781 | for (unsigned i = 0, j = 0; url[i]; ++i) { |
782 | // Skip leading whitespace and control characters. |
783 | if (isLeading && shouldTrimFromURL(url[i])) |
784 | continue; |
785 | isLeading = false; |
786 | |
787 | // Skip any tabs and newlines. |
788 | if (url[i] == '\t' || url[i] == '\r' || url[i] == '\n') |
789 | continue; |
790 | |
791 | if (!protocol[j]) |
792 | return url[i] == ':'; |
793 | if (!isASCIIAlphaCaselessEqual(url[i], protocol[j])) |
794 | return false; |
795 | |
796 | ++j; |
797 | } |
798 | |
799 | return false; |
800 | } |
801 | |
802 | bool protocolIs(const String& url, const char* protocol) |
803 | { |
804 | return protocolIsInternal(url, protocol); |
805 | } |
806 | |
807 | inline bool URL::protocolIs(const String& string, const char* protocol) |
808 | { |
809 | return WTF::protocolIsInternal(string, protocol); |
810 | } |
811 | |
812 | #ifndef NDEBUG |
813 | |
814 | void URL::print() const |
815 | { |
816 | printf("%s\n" , m_string.utf8().data()); |
817 | } |
818 | |
819 | #endif |
820 | |
821 | String URL::strippedForUseAsReferrer() const |
822 | { |
823 | URL referrer(*this); |
824 | referrer.setUser(String()); |
825 | referrer.setPass(String()); |
826 | referrer.removeFragmentIdentifier(); |
827 | return referrer.string(); |
828 | } |
829 | |
830 | bool URL::isLocalFile() const |
831 | { |
832 | // Including feed here might be a bad idea since drag and drop uses this check |
833 | // and including feed would allow feeds to potentially let someone's blog |
834 | // read the contents of the clipboard on a drag, even without a drop. |
835 | // Likewise with using the FrameLoader::shouldTreatURLAsLocal() function. |
836 | return protocolIs("file" ); |
837 | } |
838 | |
839 | bool protocolIsJavaScript(const String& url) |
840 | { |
841 | return protocolIsInternal(url, "javascript" ); |
842 | } |
843 | |
844 | bool protocolIsJavaScript(StringView url) |
845 | { |
846 | return protocolIsInternal(url, "javascript" ); |
847 | } |
848 | |
849 | bool protocolIsInHTTPFamily(const String& url) |
850 | { |
851 | auto length = url.length(); |
852 | // Do the comparison without making a new string object. |
853 | return length >= 5 |
854 | && isASCIIAlphaCaselessEqual(url[0], 'h') |
855 | && isASCIIAlphaCaselessEqual(url[1], 't') |
856 | && isASCIIAlphaCaselessEqual(url[2], 't') |
857 | && isASCIIAlphaCaselessEqual(url[3], 'p') |
858 | && (url[4] == ':' || (isASCIIAlphaCaselessEqual(url[4], 's') && length >= 6 && url[5] == ':')); |
859 | } |
860 | |
861 | const URL& blankURL() |
862 | { |
863 | static NeverDestroyed<URL> staticBlankURL(URL(), "about:blank" ); |
864 | return staticBlankURL; |
865 | } |
866 | |
867 | bool URL::protocolIsAbout() const |
868 | { |
869 | return protocolIs("about" ); |
870 | } |
871 | |
872 | bool portAllowed(const URL& url) |
873 | { |
874 | Optional<uint16_t> port = url.port(); |
875 | |
876 | // Since most URLs don't have a port, return early for the "no port" case. |
877 | if (!port) |
878 | return true; |
879 | |
880 | // This blocked port list matches the port blocking that Mozilla implements. |
881 | // See http://www.mozilla.org/projects/netlib/PortBanning.html for more information. |
882 | static const uint16_t blockedPortList[] = { |
883 | 1, // tcpmux |
884 | 7, // echo |
885 | 9, // discard |
886 | 11, // systat |
887 | 13, // daytime |
888 | 15, // netstat |
889 | 17, // qotd |
890 | 19, // chargen |
891 | 20, // FTP-data |
892 | 21, // FTP-control |
893 | 22, // SSH |
894 | 23, // telnet |
895 | 25, // SMTP |
896 | 37, // time |
897 | 42, // name |
898 | 43, // nicname |
899 | 53, // domain |
900 | 77, // priv-rjs |
901 | 79, // finger |
902 | 87, // ttylink |
903 | 95, // supdup |
904 | 101, // hostriame |
905 | 102, // iso-tsap |
906 | 103, // gppitnp |
907 | 104, // acr-nema |
908 | 109, // POP2 |
909 | 110, // POP3 |
910 | 111, // sunrpc |
911 | 113, // auth |
912 | 115, // SFTP |
913 | 117, // uucp-path |
914 | 119, // nntp |
915 | 123, // NTP |
916 | 135, // loc-srv / epmap |
917 | 139, // netbios |
918 | 143, // IMAP2 |
919 | 179, // BGP |
920 | 389, // LDAP |
921 | 427, // SLP (Also used by Apple Filing Protocol) |
922 | 465, // SMTP+SSL |
923 | 512, // print / exec |
924 | 513, // login |
925 | 514, // shell |
926 | 515, // printer |
927 | 526, // tempo |
928 | 530, // courier |
929 | 531, // Chat |
930 | 532, // netnews |
931 | 540, // UUCP |
932 | 548, // afpovertcp [Apple addition] |
933 | 556, // remotefs |
934 | 563, // NNTP+SSL |
935 | 587, // ESMTP |
936 | 601, // syslog-conn |
937 | 636, // LDAP+SSL |
938 | 993, // IMAP+SSL |
939 | 995, // POP3+SSL |
940 | 2049, // NFS |
941 | 3659, // apple-sasl / PasswordServer [Apple addition] |
942 | 4045, // lockd |
943 | 4190, // ManageSieve [Apple addition] |
944 | 6000, // X11 |
945 | 6665, // Alternate IRC [Apple addition] |
946 | 6666, // Alternate IRC [Apple addition] |
947 | 6667, // Standard IRC [Apple addition] |
948 | 6668, // Alternate IRC [Apple addition] |
949 | 6669, // Alternate IRC [Apple addition] |
950 | 6679, // Alternate IRC SSL [Apple addition] |
951 | 6697, // IRC+SSL [Apple addition] |
952 | invalidPortNumber, // Used to block all invalid port numbers |
953 | }; |
954 | |
955 | // If the port is not in the blocked port list, allow it. |
956 | ASSERT(std::is_sorted(std::begin(blockedPortList), std::end(blockedPortList))); |
957 | if (!std::binary_search(std::begin(blockedPortList), std::end(blockedPortList), port.value())) |
958 | return true; |
959 | |
960 | // Allow ports 21 and 22 for FTP URLs, as Mozilla does. |
961 | if ((port.value() == 21 || port.value() == 22) && url.protocolIs("ftp" )) |
962 | return true; |
963 | |
964 | // Allow any port number in a file URL, since the port number is ignored. |
965 | if (url.protocolIs("file" )) |
966 | return true; |
967 | |
968 | return false; |
969 | } |
970 | |
971 | String mimeTypeFromDataURL(const String& url) |
972 | { |
973 | ASSERT(protocolIsInternal(url, "data" )); |
974 | |
975 | // FIXME: What's the right behavior when the URL has a comma first, but a semicolon later? |
976 | // Currently this code will break at the semicolon in that case. Not sure that's correct. |
977 | auto index = url.find(';', 5); |
978 | if (index == notFound) |
979 | index = url.find(',', 5); |
980 | if (index == notFound) { |
981 | // FIXME: There was an old comment here that made it sound like this should be returning text/plain. |
982 | // But we have been returning empty string here for some time, so not changing its behavior at this time. |
983 | return emptyString(); |
984 | } |
985 | if (index == 5) |
986 | return "text/plain"_s ; |
987 | ASSERT(index >= 5); |
988 | return url.substring(5, index - 5).convertToASCIILowercase(); |
989 | } |
990 | |
991 | String URL::stringCenterEllipsizedToLength(unsigned length) const |
992 | { |
993 | if (string().length() <= length) |
994 | return string(); |
995 | |
996 | return string().left(length / 2 - 1) + "..." + string().right(length / 2 - 2); |
997 | } |
998 | |
999 | URL URL::fakeURLWithRelativePart(const String& relativePart) |
1000 | { |
1001 | return URL(URL(), "webkit-fake-url://" + createCanonicalUUIDString() + '/' + relativePart); |
1002 | } |
1003 | |
1004 | URL URL::fileURLWithFileSystemPath(const String& filePath) |
1005 | { |
1006 | return URL(URL(), "file:///" + filePath); |
1007 | } |
1008 | |
1009 | TextStream& operator<<(TextStream& ts, const URL& url) |
1010 | { |
1011 | ts << url.string(); |
1012 | return ts; |
1013 | } |
1014 | |
1015 | #if !PLATFORM(COCOA) && !USE(SOUP) |
1016 | static bool isIPv4Address(StringView string) |
1017 | { |
1018 | auto count = 0; |
1019 | |
1020 | for (const auto octet : string.splitAllowingEmptyEntries('.')) { |
1021 | if (count >= 4) |
1022 | return false; |
1023 | |
1024 | const auto length = octet.length(); |
1025 | if (!length || length > 3) |
1026 | return false; |
1027 | |
1028 | auto value = 0; |
1029 | for (auto i = 0u; i < length; ++i) { |
1030 | const auto digit = octet[i]; |
1031 | |
1032 | // Prohibit leading zeroes. |
1033 | if (digit > '9' || digit < (!i && length > 1 ? '1' : '0')) |
1034 | return false; |
1035 | |
1036 | value = 10 * value + (digit - '0'); |
1037 | } |
1038 | |
1039 | if (value > 255) |
1040 | return false; |
1041 | |
1042 | count++; |
1043 | } |
1044 | |
1045 | return (count == 4); |
1046 | } |
1047 | |
1048 | static bool isIPv6Address(StringView string) |
1049 | { |
1050 | enum SkipState { None, WillSkip, Skipping, Skipped, Final }; |
1051 | auto skipState = None; |
1052 | auto count = 0; |
1053 | |
1054 | for (const auto hextet : string.splitAllowingEmptyEntries(':')) { |
1055 | if (count >= 8 || skipState == Final) |
1056 | return false; |
1057 | |
1058 | const auto length = hextet.length(); |
1059 | if (!length) { |
1060 | // :: may be used anywhere to skip 1 to 8 hextets, but only once. |
1061 | if (skipState == Skipped) |
1062 | return false; |
1063 | |
1064 | if (skipState == None) |
1065 | skipState = !count ? WillSkip : Skipping; |
1066 | else if (skipState == WillSkip) |
1067 | skipState = Skipping; |
1068 | else |
1069 | skipState = Final; |
1070 | continue; |
1071 | } |
1072 | |
1073 | if (skipState == WillSkip) |
1074 | return false; |
1075 | |
1076 | if (skipState == Skipping) |
1077 | skipState = Skipped; |
1078 | |
1079 | if (length > 4) { |
1080 | // An IPv4 address may be used in place of the final two hextets. |
1081 | if ((skipState == None && count != 6) || (skipState == Skipped && count >= 6) || !isIPv4Address(hextet)) |
1082 | return false; |
1083 | |
1084 | skipState = Final; |
1085 | continue; |
1086 | } |
1087 | |
1088 | for (const auto codeUnit : hextet.codeUnits()) { |
1089 | // IPv6 allows leading zeroes. |
1090 | if (!isASCIIHexDigit(codeUnit)) |
1091 | return false; |
1092 | } |
1093 | |
1094 | count++; |
1095 | } |
1096 | |
1097 | return (count == 8 && skipState == None) || skipState == Skipped || skipState == Final; |
1098 | } |
1099 | |
1100 | bool URL::hostIsIPAddress(StringView host) |
1101 | { |
1102 | if (host.find(':') == notFound) |
1103 | return isIPv4Address(host); |
1104 | |
1105 | return isIPv6Address(host); |
1106 | } |
1107 | #endif |
1108 | |
1109 | } // namespace WTF |
1110 | |