1 | /* |
2 | * Copyright (C) 2016-2018 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
14 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
15 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
17 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
18 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
19 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
20 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
21 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
22 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
23 | * THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #pragma once |
27 | |
28 | #include <wtf/Expected.h> |
29 | #include <wtf/Forward.h> |
30 | #include <wtf/URL.h> |
31 | |
32 | struct UIDNA; |
33 | |
34 | namespace WTF { |
35 | |
36 | template<typename CharacterType> class CodePointIterator; |
37 | |
38 | class URLParser { |
39 | WTF_MAKE_FAST_ALLOCATED; |
40 | public: |
41 | WTF_EXPORT_PRIVATE static bool allValuesEqual(const URL&, const URL&); |
42 | WTF_EXPORT_PRIVATE static bool internalValuesConsistent(const URL&); |
43 | |
44 | using URLEncodedForm = Vector<WTF::KeyValuePair<String, String>>; |
45 | WTF_EXPORT_PRIVATE static URLEncodedForm parseURLEncodedForm(StringView); |
46 | WTF_EXPORT_PRIVATE static String serialize(const URLEncodedForm&); |
47 | |
48 | WTF_EXPORT_PRIVATE static bool isSpecialScheme(const String& scheme); |
49 | WTF_EXPORT_PRIVATE static Optional<String> maybeCanonicalizeScheme(const String& scheme); |
50 | |
51 | static const UIDNA& internationalDomainNameTranscoder(); |
52 | static bool isInUserInfoEncodeSet(UChar); |
53 | |
54 | static Optional<uint16_t> defaultPortForProtocol(StringView); |
55 | |
56 | private: |
57 | URLParser(const String&, const URL& = { }, const URLTextEncoding* = nullptr); |
58 | URL result() { return m_url; } |
59 | |
60 | friend class URL; |
61 | |
62 | URL m_url; |
63 | Vector<LChar> m_asciiBuffer; |
64 | bool m_urlIsSpecial { false }; |
65 | bool m_urlIsFile { false }; |
66 | bool m_hostHasPercentOrNonASCII { false }; |
67 | String m_inputString; |
68 | const void* m_inputBegin { nullptr }; |
69 | |
70 | bool m_didSeeSyntaxViolation { false }; |
71 | static constexpr size_t defaultInlineBufferSize = 2048; |
72 | using LCharBuffer = Vector<LChar, defaultInlineBufferSize>; |
73 | |
74 | template<typename CharacterType> void parse(const CharacterType*, const unsigned length, const URL&, const URLTextEncoding*); |
75 | template<typename CharacterType> void parseAuthority(CodePointIterator<CharacterType>); |
76 | template<typename CharacterType> bool parseHostAndPort(CodePointIterator<CharacterType>); |
77 | template<typename CharacterType> bool parsePort(CodePointIterator<CharacterType>&); |
78 | |
79 | void failure(); |
80 | enum class ReportSyntaxViolation { No, Yes }; |
81 | template<typename CharacterType, ReportSyntaxViolation reportSyntaxViolation = ReportSyntaxViolation::Yes> |
82 | void advance(CodePointIterator<CharacterType>& iterator) { advance<CharacterType, reportSyntaxViolation>(iterator, iterator); } |
83 | template<typename CharacterType, ReportSyntaxViolation = ReportSyntaxViolation::Yes> |
84 | void advance(CodePointIterator<CharacterType>&, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition); |
85 | template<typename CharacterType> bool takesTwoAdvancesUntilEnd(CodePointIterator<CharacterType>); |
86 | template<typename CharacterType> void syntaxViolation(const CodePointIterator<CharacterType>&); |
87 | template<typename CharacterType> bool isPercentEncodedDot(CodePointIterator<CharacterType>); |
88 | template<typename CharacterType> bool isWindowsDriveLetter(CodePointIterator<CharacterType>); |
89 | template<typename CharacterType> bool isSingleDotPathSegment(CodePointIterator<CharacterType>); |
90 | template<typename CharacterType> bool isDoubleDotPathSegment(CodePointIterator<CharacterType>); |
91 | template<typename CharacterType> bool shouldCopyFileURL(CodePointIterator<CharacterType>); |
92 | template<typename CharacterType> bool checkLocalhostCodePoint(CodePointIterator<CharacterType>&, UChar32); |
93 | template<typename CharacterType> bool isAtLocalhost(CodePointIterator<CharacterType>); |
94 | bool isLocalhost(StringView); |
95 | template<typename CharacterType> void consumeSingleDotPathSegment(CodePointIterator<CharacterType>&); |
96 | template<typename CharacterType> void consumeDoubleDotPathSegment(CodePointIterator<CharacterType>&); |
97 | template<typename CharacterType> void appendWindowsDriveLetter(CodePointIterator<CharacterType>&); |
98 | template<typename CharacterType> size_t currentPosition(const CodePointIterator<CharacterType>&); |
99 | template<typename UnsignedIntegerType> void appendNumberToASCIIBuffer(UnsignedIntegerType); |
100 | template<bool(*isInCodeSet)(UChar32), typename CharacterType> void utf8PercentEncode(const CodePointIterator<CharacterType>&); |
101 | template<typename CharacterType> void utf8QueryEncode(const CodePointIterator<CharacterType>&); |
102 | template<typename CharacterType> Optional<LCharBuffer> domainToASCII(StringImpl&, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition); |
103 | template<typename CharacterType> LCharBuffer percentDecode(const LChar*, size_t, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition); |
104 | static LCharBuffer percentDecode(const LChar*, size_t); |
105 | static Optional<String> formURLDecode(StringView input); |
106 | static bool hasForbiddenHostCodePoint(const LCharBuffer&); |
107 | void percentEncodeByte(uint8_t); |
108 | void appendToASCIIBuffer(UChar32); |
109 | void appendToASCIIBuffer(const char*, size_t); |
110 | void appendToASCIIBuffer(const LChar* characters, size_t size) { appendToASCIIBuffer(reinterpret_cast<const char*>(characters), size); } |
111 | template<typename CharacterType> void encodeNonUTF8Query(const Vector<UChar>& source, const URLTextEncoding&, CodePointIterator<CharacterType>); |
112 | void copyASCIIStringUntil(const String&, size_t length); |
113 | bool copyBaseWindowsDriveLetter(const URL&); |
114 | StringView parsedDataView(size_t start, size_t length); |
115 | UChar parsedDataView(size_t position); |
116 | |
117 | using IPv4Address = uint32_t; |
118 | void serializeIPv4(IPv4Address); |
119 | enum class IPv4ParsingError; |
120 | enum class IPv4PieceParsingError; |
121 | template<typename CharacterTypeForSyntaxViolation, typename CharacterType> Expected<IPv4Address, IPv4ParsingError> parseIPv4Host(const CodePointIterator<CharacterTypeForSyntaxViolation>&, CodePointIterator<CharacterType>); |
122 | template<typename CharacterType> Expected<uint32_t, URLParser::IPv4PieceParsingError> parseIPv4Piece(CodePointIterator<CharacterType>&, bool& syntaxViolation); |
123 | using IPv6Address = std::array<uint16_t, 8>; |
124 | template<typename CharacterType> Optional<IPv6Address> parseIPv6Host(CodePointIterator<CharacterType>); |
125 | template<typename CharacterType> Optional<uint32_t> parseIPv4PieceInsideIPv6(CodePointIterator<CharacterType>&); |
126 | template<typename CharacterType> Optional<IPv4Address> parseIPv4AddressInsideIPv6(CodePointIterator<CharacterType>); |
127 | void serializeIPv6Piece(uint16_t piece); |
128 | void serializeIPv6(IPv6Address); |
129 | |
130 | enum class URLPart; |
131 | template<typename CharacterType> void copyURLPartsUntil(const URL& base, URLPart, const CodePointIterator<CharacterType>&, const URLTextEncoding*&); |
132 | static size_t urlLengthUntilPart(const URL&, URLPart); |
133 | void popPath(); |
134 | bool shouldPopPath(unsigned); |
135 | }; |
136 | |
137 | } |
138 | |