1 | /* |
2 | * Copyright (C) 2011, 2012 Apple Inc. All rights reserved. |
3 | * Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies). |
4 | * |
5 | * This library is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU Library General Public |
7 | * License as published by the Free Software Foundation; either |
8 | * version 2 of the License, or (at your option) any later version. |
9 | * |
10 | * This library is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | * Library General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU Library General Public License |
16 | * along with this library; see the file COPYING.LIB. If not, write to |
17 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
18 | * Boston, MA 02110-1301, USA. |
19 | * |
20 | */ |
21 | |
22 | #pragma once |
23 | |
24 | #include <stdint.h> |
25 | #include <unicode/utypes.h> |
26 | #include <wtf/StdLibExtras.h> |
27 | #include <wtf/text/LChar.h> |
28 | |
29 | #if CPU(X86_SSE2) |
30 | #include <emmintrin.h> |
31 | #endif |
32 | |
33 | namespace WTF { |
34 | |
35 | template <uintptr_t mask> |
36 | inline bool isAlignedTo(const void* pointer) |
37 | { |
38 | return !(reinterpret_cast<uintptr_t>(pointer) & mask); |
39 | } |
40 | |
41 | // Assuming that a pointer is the size of a "machine word", then |
42 | // uintptr_t is an integer type that is also a machine word. |
43 | typedef uintptr_t MachineWord; |
44 | const uintptr_t machineWordAlignmentMask = sizeof(MachineWord) - 1; |
45 | |
46 | inline bool isAlignedToMachineWord(const void* pointer) |
47 | { |
48 | return isAlignedTo<machineWordAlignmentMask>(pointer); |
49 | } |
50 | |
51 | template<typename T> inline T* alignToMachineWord(T* pointer) |
52 | { |
53 | return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(pointer) & ~machineWordAlignmentMask); |
54 | } |
55 | |
56 | template<size_t size, typename CharacterType> struct NonASCIIMask; |
57 | template<> struct NonASCIIMask<4, UChar> { |
58 | static inline uint32_t value() { return 0xFF80FF80U; } |
59 | }; |
60 | template<> struct NonASCIIMask<4, LChar> { |
61 | static inline uint32_t value() { return 0x80808080U; } |
62 | }; |
63 | template<> struct NonASCIIMask<8, UChar> { |
64 | static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; } |
65 | }; |
66 | template<> struct NonASCIIMask<8, LChar> { |
67 | static inline uint64_t value() { return 0x8080808080808080ULL; } |
68 | }; |
69 | |
70 | |
71 | template<typename CharacterType> |
72 | inline bool isAllASCII(MachineWord word) |
73 | { |
74 | return !(word & NonASCIIMask<sizeof(MachineWord), CharacterType>::value()); |
75 | } |
76 | |
77 | // Note: This function assume the input is likely all ASCII, and |
78 | // does not leave early if it is not the case. |
79 | template<typename CharacterType> |
80 | inline bool charactersAreAllASCII(const CharacterType* characters, size_t length) |
81 | { |
82 | MachineWord allCharBits = 0; |
83 | const CharacterType* end = characters + length; |
84 | |
85 | // Prologue: align the input. |
86 | while (!isAlignedToMachineWord(characters) && characters != end) { |
87 | allCharBits |= *characters; |
88 | ++characters; |
89 | } |
90 | |
91 | // Compare the values of CPU word size. |
92 | const CharacterType* wordEnd = alignToMachineWord(end); |
93 | const size_t loopIncrement = sizeof(MachineWord) / sizeof(CharacterType); |
94 | while (characters < wordEnd) { |
95 | allCharBits |= *(reinterpret_cast_ptr<const MachineWord*>(characters)); |
96 | characters += loopIncrement; |
97 | } |
98 | |
99 | // Process the remaining bytes. |
100 | while (characters != end) { |
101 | allCharBits |= *characters; |
102 | ++characters; |
103 | } |
104 | |
105 | MachineWord nonASCIIBitMask = NonASCIIMask<sizeof(MachineWord), CharacterType>::value(); |
106 | return !(allCharBits & nonASCIIBitMask); |
107 | } |
108 | |
109 | inline void copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length) |
110 | { |
111 | #if CPU(X86_SSE2) |
112 | const uintptr_t memoryAccessSize = 16; // Memory accesses on 16 byte (128 bit) alignment |
113 | const uintptr_t memoryAccessMask = memoryAccessSize - 1; |
114 | |
115 | size_t i = 0; |
116 | for (;i < length && !isAlignedTo<memoryAccessMask>(&source[i]); ++i) { |
117 | ASSERT(!(source[i] & 0xff00)); |
118 | destination[i] = static_cast<LChar>(source[i]); |
119 | } |
120 | |
121 | const uintptr_t sourceLoadSize = 32; // Process 32 bytes (16 UChars) each iteration |
122 | const size_t ucharsPerLoop = sourceLoadSize / sizeof(UChar); |
123 | if (length > ucharsPerLoop) { |
124 | const size_t endLength = length - ucharsPerLoop + 1; |
125 | for (; i < endLength; i += ucharsPerLoop) { |
126 | #ifndef NDEBUG |
127 | for (unsigned checkIndex = 0; checkIndex < ucharsPerLoop; ++checkIndex) |
128 | ASSERT(!(source[i+checkIndex] & 0xff00)); |
129 | #endif |
130 | __m128i first8UChars = _mm_load_si128(reinterpret_cast<const __m128i*>(&source[i])); |
131 | __m128i second8UChars = _mm_load_si128(reinterpret_cast<const __m128i*>(&source[i+8])); |
132 | __m128i packedChars = _mm_packus_epi16(first8UChars, second8UChars); |
133 | _mm_storeu_si128(reinterpret_cast<__m128i*>(&destination[i]), packedChars); |
134 | } |
135 | } |
136 | |
137 | for (; i < length; ++i) { |
138 | ASSERT(!(source[i] & 0xff00)); |
139 | destination[i] = static_cast<LChar>(source[i]); |
140 | } |
141 | #elif COMPILER(GCC_COMPATIBLE) && CPU(ARM64) && !defined(__ILP32__) && defined(NDEBUG) |
142 | const LChar* const end = destination + length; |
143 | const uintptr_t memoryAccessSize = 16; |
144 | |
145 | if (length >= memoryAccessSize) { |
146 | const uintptr_t memoryAccessMask = memoryAccessSize - 1; |
147 | |
148 | // Vector interleaved unpack, we only store the lower 8 bits. |
149 | const uintptr_t lengthLeft = end - destination; |
150 | const LChar* const simdEnd = destination + (lengthLeft & ~memoryAccessMask); |
151 | do { |
152 | asm("ld2 { v0.16B, v1.16B }, [%[SOURCE]], #32\n\t" |
153 | "st1 { v0.16B }, [%[DESTINATION]], #16\n\t" |
154 | : [SOURCE]"+r" (source), [DESTINATION]"+r" (destination) |
155 | : |
156 | : "memory" , "v0" , "v1" ); |
157 | } while (destination != simdEnd); |
158 | } |
159 | |
160 | while (destination != end) |
161 | *destination++ = static_cast<LChar>(*source++); |
162 | #elif COMPILER(GCC_COMPATIBLE) && CPU(ARM_NEON) && !(CPU(BIG_ENDIAN) || CPU(MIDDLE_ENDIAN)) && defined(NDEBUG) |
163 | const LChar* const end = destination + length; |
164 | const uintptr_t memoryAccessSize = 8; |
165 | |
166 | if (length >= (2 * memoryAccessSize) - 1) { |
167 | // Prefix: align dst on 64 bits. |
168 | const uintptr_t memoryAccessMask = memoryAccessSize - 1; |
169 | while (!isAlignedTo<memoryAccessMask>(destination)) |
170 | *destination++ = static_cast<LChar>(*source++); |
171 | |
172 | // Vector interleaved unpack, we only store the lower 8 bits. |
173 | const uintptr_t lengthLeft = end - destination; |
174 | const LChar* const simdEnd = end - (lengthLeft % memoryAccessSize); |
175 | do { |
176 | asm("vld2.8 { d0-d1 }, [%[SOURCE]] !\n\t" |
177 | "vst1.8 { d0 }, [%[DESTINATION],:64] !\n\t" |
178 | : [SOURCE]"+r" (source), [DESTINATION]"+r" (destination) |
179 | : |
180 | : "memory" , "d0" , "d1" ); |
181 | } while (destination != simdEnd); |
182 | } |
183 | |
184 | while (destination != end) |
185 | *destination++ = static_cast<LChar>(*source++); |
186 | #else |
187 | for (size_t i = 0; i < length; ++i) { |
188 | ASSERT(!(source[i] & 0xff00)); |
189 | destination[i] = static_cast<LChar>(source[i]); |
190 | } |
191 | #endif |
192 | } |
193 | |
194 | } // namespace WTF |
195 | |
196 | using WTF::charactersAreAllASCII; |
197 | |