1 | /* |
2 | * Copyright (C) 2016-2018 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
14 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
15 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
17 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
18 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
19 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
20 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
21 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
22 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
23 | * THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #pragma once |
27 | |
28 | #include <wtf/Expected.h> |
29 | #include <wtf/Forward.h> |
30 | #include <wtf/URL.h> |
31 | |
32 | struct UIDNA; |
33 | |
34 | namespace WTF { |
35 | |
36 | template<typename CharacterType> class CodePointIterator; |
37 | |
38 | class URLParser { |
39 | public: |
40 | WTF_EXPORT_PRIVATE static bool allValuesEqual(const URL&, const URL&); |
41 | WTF_EXPORT_PRIVATE static bool internalValuesConsistent(const URL&); |
42 | |
43 | using URLEncodedForm = Vector<WTF::KeyValuePair<String, String>>; |
44 | WTF_EXPORT_PRIVATE static URLEncodedForm parseURLEncodedForm(StringView); |
45 | WTF_EXPORT_PRIVATE static String serialize(const URLEncodedForm&); |
46 | |
47 | WTF_EXPORT_PRIVATE static bool isSpecialScheme(const String& scheme); |
48 | WTF_EXPORT_PRIVATE static Optional<String> maybeCanonicalizeScheme(const String& scheme); |
49 | |
50 | static const UIDNA& internationalDomainNameTranscoder(); |
51 | static bool isInUserInfoEncodeSet(UChar); |
52 | |
53 | static Optional<uint16_t> defaultPortForProtocol(StringView); |
54 | |
55 | private: |
56 | URLParser(const String&, const URL& = { }, const URLTextEncoding* = nullptr); |
57 | URL result() { return m_url; } |
58 | |
59 | friend class URL; |
60 | |
61 | URL m_url; |
62 | Vector<LChar> m_asciiBuffer; |
63 | bool m_urlIsSpecial { false }; |
64 | bool m_urlIsFile { false }; |
65 | bool m_hostHasPercentOrNonASCII { false }; |
66 | String m_inputString; |
67 | const void* m_inputBegin { nullptr }; |
68 | |
69 | bool m_didSeeSyntaxViolation { false }; |
70 | static constexpr size_t defaultInlineBufferSize = 2048; |
71 | using LCharBuffer = Vector<LChar, defaultInlineBufferSize>; |
72 | |
73 | template<typename CharacterType> void parse(const CharacterType*, const unsigned length, const URL&, const URLTextEncoding*); |
74 | template<typename CharacterType> void parseAuthority(CodePointIterator<CharacterType>); |
75 | template<typename CharacterType> bool parseHostAndPort(CodePointIterator<CharacterType>); |
76 | template<typename CharacterType> bool parsePort(CodePointIterator<CharacterType>&); |
77 | |
78 | void failure(); |
79 | enum class ReportSyntaxViolation { No, Yes }; |
80 | template<typename CharacterType, ReportSyntaxViolation reportSyntaxViolation = ReportSyntaxViolation::Yes> |
81 | void advance(CodePointIterator<CharacterType>& iterator) { advance<CharacterType, reportSyntaxViolation>(iterator, iterator); } |
82 | template<typename CharacterType, ReportSyntaxViolation = ReportSyntaxViolation::Yes> |
83 | void advance(CodePointIterator<CharacterType>&, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition); |
84 | template<typename CharacterType> bool takesTwoAdvancesUntilEnd(CodePointIterator<CharacterType>); |
85 | template<typename CharacterType> void syntaxViolation(const CodePointIterator<CharacterType>&); |
86 | template<typename CharacterType> bool isPercentEncodedDot(CodePointIterator<CharacterType>); |
87 | template<typename CharacterType> bool isWindowsDriveLetter(CodePointIterator<CharacterType>); |
88 | template<typename CharacterType> bool isSingleDotPathSegment(CodePointIterator<CharacterType>); |
89 | template<typename CharacterType> bool isDoubleDotPathSegment(CodePointIterator<CharacterType>); |
90 | template<typename CharacterType> bool shouldCopyFileURL(CodePointIterator<CharacterType>); |
91 | template<typename CharacterType> bool checkLocalhostCodePoint(CodePointIterator<CharacterType>&, UChar32); |
92 | template<typename CharacterType> bool isAtLocalhost(CodePointIterator<CharacterType>); |
93 | bool isLocalhost(StringView); |
94 | template<typename CharacterType> void consumeSingleDotPathSegment(CodePointIterator<CharacterType>&); |
95 | template<typename CharacterType> void consumeDoubleDotPathSegment(CodePointIterator<CharacterType>&); |
96 | template<typename CharacterType> void appendWindowsDriveLetter(CodePointIterator<CharacterType>&); |
97 | template<typename CharacterType> size_t currentPosition(const CodePointIterator<CharacterType>&); |
98 | template<typename UnsignedIntegerType> void appendNumberToASCIIBuffer(UnsignedIntegerType); |
99 | template<bool(*isInCodeSet)(UChar32), typename CharacterType> void utf8PercentEncode(const CodePointIterator<CharacterType>&); |
100 | template<typename CharacterType> void utf8QueryEncode(const CodePointIterator<CharacterType>&); |
101 | template<typename CharacterType> Optional<LCharBuffer> domainToASCII(StringImpl&, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition); |
102 | template<typename CharacterType> LCharBuffer percentDecode(const LChar*, size_t, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition); |
103 | static LCharBuffer percentDecode(const LChar*, size_t); |
104 | static Optional<String> formURLDecode(StringView input); |
105 | static bool hasForbiddenHostCodePoint(const LCharBuffer&); |
106 | void percentEncodeByte(uint8_t); |
107 | void appendToASCIIBuffer(UChar32); |
108 | void appendToASCIIBuffer(const char*, size_t); |
109 | void appendToASCIIBuffer(const LChar* characters, size_t size) { appendToASCIIBuffer(reinterpret_cast<const char*>(characters), size); } |
110 | template<typename CharacterType> void encodeNonUTF8Query(const Vector<UChar>& source, const URLTextEncoding&, CodePointIterator<CharacterType>); |
111 | void copyASCIIStringUntil(const String&, size_t length); |
112 | bool copyBaseWindowsDriveLetter(const URL&); |
113 | StringView parsedDataView(size_t start, size_t length); |
114 | UChar parsedDataView(size_t position); |
115 | |
116 | using IPv4Address = uint32_t; |
117 | void serializeIPv4(IPv4Address); |
118 | enum class IPv4ParsingError; |
119 | enum class IPv4PieceParsingError; |
120 | template<typename CharacterTypeForSyntaxViolation, typename CharacterType> Expected<IPv4Address, IPv4ParsingError> parseIPv4Host(const CodePointIterator<CharacterTypeForSyntaxViolation>&, CodePointIterator<CharacterType>); |
121 | template<typename CharacterType> Expected<uint32_t, URLParser::IPv4PieceParsingError> parseIPv4Piece(CodePointIterator<CharacterType>&, bool& syntaxViolation); |
122 | using IPv6Address = std::array<uint16_t, 8>; |
123 | template<typename CharacterType> Optional<IPv6Address> parseIPv6Host(CodePointIterator<CharacterType>); |
124 | template<typename CharacterType> Optional<uint32_t> parseIPv4PieceInsideIPv6(CodePointIterator<CharacterType>&); |
125 | template<typename CharacterType> Optional<IPv4Address> parseIPv4AddressInsideIPv6(CodePointIterator<CharacterType>); |
126 | void serializeIPv6Piece(uint16_t piece); |
127 | void serializeIPv6(IPv6Address); |
128 | |
129 | enum class URLPart; |
130 | template<typename CharacterType> void copyURLPartsUntil(const URL& base, URLPart, const CodePointIterator<CharacterType>&, const URLTextEncoding*&); |
131 | static size_t urlLengthUntilPart(const URL&, URLPart); |
132 | void popPath(); |
133 | bool shouldPopPath(unsigned); |
134 | }; |
135 | |
136 | } |
137 | |