1 | /* |
2 | Copyright (C) 1999 Lars Knoll ([email protected]) |
3 | Copyright (C) 2006 Alexey Proskuryakov ([email protected]) |
4 | Copyright (C) 2006-2017 Apple Inc. All rights reserved. |
5 | |
6 | This library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Library General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2 of the License, or (at your option) any later version. |
10 | |
11 | This library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Library General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Library General Public License |
17 | along with this library; see the file COPYING.LIB. If not, write to |
18 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
19 | Boston, MA 02110-1301, USA. |
20 | |
21 | */ |
22 | |
23 | #pragma once |
24 | |
25 | #include "TextEncoding.h" |
26 | #include <wtf/RefCounted.h> |
27 | |
28 | namespace WebCore { |
29 | |
30 | class HTMLMetaCharsetParser; |
31 | class TextCodec; |
32 | |
33 | class TextResourceDecoder : public RefCounted<TextResourceDecoder> { |
34 | public: |
35 | enum EncodingSource { |
36 | DefaultEncoding, |
37 | AutoDetectedEncoding, |
38 | , |
39 | EncodingFromMetaTag, |
40 | EncodingFromCSSCharset, |
41 | , |
42 | UserChosenEncoding, |
43 | EncodingFromParentFrame |
44 | }; |
45 | |
46 | WEBCORE_EXPORT static Ref<TextResourceDecoder> create(const String& mimeType, const TextEncoding& defaultEncoding = { }, bool usesEncodingDetector = false); |
47 | WEBCORE_EXPORT ~TextResourceDecoder(); |
48 | |
49 | void setEncoding(const TextEncoding&, EncodingSource); |
50 | const TextEncoding& encoding() const { return m_encoding; } |
51 | const TextEncoding* encodingForURLParsing(); |
52 | |
53 | bool hasEqualEncodingForCharset(const String& charset) const; |
54 | |
55 | WEBCORE_EXPORT String decode(const char* data, size_t length); |
56 | WEBCORE_EXPORT String flush(); |
57 | |
58 | WEBCORE_EXPORT String decodeAndFlush(const char* data, size_t length); |
59 | |
60 | void setHintEncoding(const TextResourceDecoder* parentFrameDecoder); |
61 | |
62 | void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } |
63 | bool sawError() const { return m_sawError; } |
64 | |
65 | private: |
66 | TextResourceDecoder(const String& mimeType, const TextEncoding& defaultEncoding, bool usesEncodingDetector); |
67 | |
68 | enum ContentType { PlainText, HTML, XML, CSS }; // PlainText only checks for BOM. |
69 | static ContentType determineContentType(const String& mimeType); |
70 | static const TextEncoding& defaultEncoding(ContentType, const TextEncoding& defaultEncoding); |
71 | |
72 | size_t checkForBOM(const char*, size_t); |
73 | bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); |
74 | bool checkForHeadCharset(const char*, size_t, bool& movedDataToBuffer); |
75 | bool checkForMetaCharset(const char*, size_t); |
76 | void detectJapaneseEncoding(const char*, size_t); |
77 | bool shouldAutoDetect() const; |
78 | |
79 | ContentType m_contentType; |
80 | TextEncoding m_encoding; |
81 | std::unique_ptr<TextCodec> m_codec; |
82 | std::unique_ptr<HTMLMetaCharsetParser> m_charsetParser; |
83 | EncodingSource m_source { DefaultEncoding }; |
84 | const char* m_parentFrameAutoDetectedEncoding { nullptr }; |
85 | Vector<char> m_buffer; |
86 | bool m_checkedForBOM { false }; |
87 | bool m_checkedForCSSCharset { false }; |
88 | bool m_checkedForHeadCharset { false }; |
89 | bool m_useLenientXMLDecoding { false }; // Don't stop on XML decoding errors. |
90 | bool m_sawError { false }; |
91 | bool m_usesEncodingDetector { false }; |
92 | }; |
93 | |
94 | inline void TextResourceDecoder::setHintEncoding(const TextResourceDecoder* parentFrameDecoder) |
95 | { |
96 | if (parentFrameDecoder && parentFrameDecoder->m_source == AutoDetectedEncoding) |
97 | m_parentFrameAutoDetectedEncoding = parentFrameDecoder->encoding().name(); |
98 | } |
99 | |
100 | } // namespace WebCore |
101 | |