1/*
2 * Copyright (C) 2016-2018 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23 * THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#pragma once
27
28#include <wtf/Expected.h>
29#include <wtf/Forward.h>
30#include <wtf/URL.h>
31
32struct UIDNA;
33
34namespace WTF {
35
36template<typename CharacterType> class CodePointIterator;
37
38class URLParser {
39 WTF_MAKE_FAST_ALLOCATED;
40public:
41 WTF_EXPORT_PRIVATE static bool allValuesEqual(const URL&, const URL&);
42 WTF_EXPORT_PRIVATE static bool internalValuesConsistent(const URL&);
43
44 using URLEncodedForm = Vector<WTF::KeyValuePair<String, String>>;
45 WTF_EXPORT_PRIVATE static URLEncodedForm parseURLEncodedForm(StringView);
46 WTF_EXPORT_PRIVATE static String serialize(const URLEncodedForm&);
47
48 WTF_EXPORT_PRIVATE static bool isSpecialScheme(const String& scheme);
49 WTF_EXPORT_PRIVATE static Optional<String> maybeCanonicalizeScheme(const String& scheme);
50
51 static const UIDNA& internationalDomainNameTranscoder();
52 static bool isInUserInfoEncodeSet(UChar);
53
54 static Optional<uint16_t> defaultPortForProtocol(StringView);
55
56private:
57 URLParser(const String&, const URL& = { }, const URLTextEncoding* = nullptr);
58 URL result() { return m_url; }
59
60 friend class URL;
61
62 URL m_url;
63 Vector<LChar> m_asciiBuffer;
64 bool m_urlIsSpecial { false };
65 bool m_urlIsFile { false };
66 bool m_hostHasPercentOrNonASCII { false };
67 String m_inputString;
68 const void* m_inputBegin { nullptr };
69
70 bool m_didSeeSyntaxViolation { false };
71 static constexpr size_t defaultInlineBufferSize = 2048;
72 using LCharBuffer = Vector<LChar, defaultInlineBufferSize>;
73
74 template<typename CharacterType> void parse(const CharacterType*, const unsigned length, const URL&, const URLTextEncoding*);
75 template<typename CharacterType> void parseAuthority(CodePointIterator<CharacterType>);
76 template<typename CharacterType> bool parseHostAndPort(CodePointIterator<CharacterType>);
77 template<typename CharacterType> bool parsePort(CodePointIterator<CharacterType>&);
78
79 void failure();
80 enum class ReportSyntaxViolation { No, Yes };
81 template<typename CharacterType, ReportSyntaxViolation reportSyntaxViolation = ReportSyntaxViolation::Yes>
82 void advance(CodePointIterator<CharacterType>& iterator) { advance<CharacterType, reportSyntaxViolation>(iterator, iterator); }
83 template<typename CharacterType, ReportSyntaxViolation = ReportSyntaxViolation::Yes>
84 void advance(CodePointIterator<CharacterType>&, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition);
85 template<typename CharacterType> bool takesTwoAdvancesUntilEnd(CodePointIterator<CharacterType>);
86 template<typename CharacterType> void syntaxViolation(const CodePointIterator<CharacterType>&);
87 template<typename CharacterType> bool isPercentEncodedDot(CodePointIterator<CharacterType>);
88 template<typename CharacterType> bool isWindowsDriveLetter(CodePointIterator<CharacterType>);
89 template<typename CharacterType> bool isSingleDotPathSegment(CodePointIterator<CharacterType>);
90 template<typename CharacterType> bool isDoubleDotPathSegment(CodePointIterator<CharacterType>);
91 template<typename CharacterType> bool shouldCopyFileURL(CodePointIterator<CharacterType>);
92 template<typename CharacterType> bool checkLocalhostCodePoint(CodePointIterator<CharacterType>&, UChar32);
93 template<typename CharacterType> bool isAtLocalhost(CodePointIterator<CharacterType>);
94 bool isLocalhost(StringView);
95 template<typename CharacterType> void consumeSingleDotPathSegment(CodePointIterator<CharacterType>&);
96 template<typename CharacterType> void consumeDoubleDotPathSegment(CodePointIterator<CharacterType>&);
97 template<typename CharacterType> void appendWindowsDriveLetter(CodePointIterator<CharacterType>&);
98 template<typename CharacterType> size_t currentPosition(const CodePointIterator<CharacterType>&);
99 template<typename UnsignedIntegerType> void appendNumberToASCIIBuffer(UnsignedIntegerType);
100 template<bool(*isInCodeSet)(UChar32), typename CharacterType> void utf8PercentEncode(const CodePointIterator<CharacterType>&);
101 template<typename CharacterType> void utf8QueryEncode(const CodePointIterator<CharacterType>&);
102 template<typename CharacterType> Optional<LCharBuffer> domainToASCII(StringImpl&, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition);
103 template<typename CharacterType> LCharBuffer percentDecode(const LChar*, size_t, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition);
104 static LCharBuffer percentDecode(const LChar*, size_t);
105 static Optional<String> formURLDecode(StringView input);
106 static bool hasForbiddenHostCodePoint(const LCharBuffer&);
107 void percentEncodeByte(uint8_t);
108 void appendToASCIIBuffer(UChar32);
109 void appendToASCIIBuffer(const char*, size_t);
110 void appendToASCIIBuffer(const LChar* characters, size_t size) { appendToASCIIBuffer(reinterpret_cast<const char*>(characters), size); }
111 template<typename CharacterType> void encodeNonUTF8Query(const Vector<UChar>& source, const URLTextEncoding&, CodePointIterator<CharacterType>);
112 void copyASCIIStringUntil(const String&, size_t length);
113 bool copyBaseWindowsDriveLetter(const URL&);
114 StringView parsedDataView(size_t start, size_t length);
115 UChar parsedDataView(size_t position);
116
117 using IPv4Address = uint32_t;
118 void serializeIPv4(IPv4Address);
119 enum class IPv4ParsingError;
120 enum class IPv4PieceParsingError;
121 template<typename CharacterTypeForSyntaxViolation, typename CharacterType> Expected<IPv4Address, IPv4ParsingError> parseIPv4Host(const CodePointIterator<CharacterTypeForSyntaxViolation>&, CodePointIterator<CharacterType>);
122 template<typename CharacterType> Expected<uint32_t, URLParser::IPv4PieceParsingError> parseIPv4Piece(CodePointIterator<CharacterType>&, bool& syntaxViolation);
123 using IPv6Address = std::array<uint16_t, 8>;
124 template<typename CharacterType> Optional<IPv6Address> parseIPv6Host(CodePointIterator<CharacterType>);
125 template<typename CharacterType> Optional<uint32_t> parseIPv4PieceInsideIPv6(CodePointIterator<CharacterType>&);
126 template<typename CharacterType> Optional<IPv4Address> parseIPv4AddressInsideIPv6(CodePointIterator<CharacterType>);
127 void serializeIPv6Piece(uint16_t piece);
128 void serializeIPv6(IPv6Address);
129
130 enum class URLPart;
131 template<typename CharacterType> void copyURLPartsUntil(const URL& base, URLPart, const CodePointIterator<CharacterType>&, const URLTextEncoding*&);
132 static size_t urlLengthUntilPart(const URL&, URLPart);
133 void popPath();
134 bool shouldPopPath(unsigned);
135};
136
137}
138