1 | /* |
2 | * Copyright (C) 2012-2019 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #include "config.h" |
27 | #include "LinkBuffer.h" |
28 | |
29 | #if ENABLE(ASSEMBLER) |
30 | |
31 | #include "CodeBlock.h" |
32 | #include "Disassembler.h" |
33 | #include "JITCode.h" |
34 | #include "JSCInlines.h" |
35 | #include "Options.h" |
36 | #include "WasmCompilationMode.h" |
37 | #include <wtf/CompilationThread.h> |
38 | |
39 | #if OS(LINUX) |
40 | #include "PerfLog.h" |
41 | #endif |
42 | |
43 | namespace JSC { |
44 | |
45 | bool shouldDumpDisassemblyFor(CodeBlock* codeBlock) |
46 | { |
47 | if (codeBlock && JITCode::isOptimizingJIT(codeBlock->jitType()) && Options::dumpDFGDisassembly()) |
48 | return true; |
49 | return Options::dumpDisassembly(); |
50 | } |
51 | |
52 | bool shouldDumpDisassemblyFor(Wasm::CompilationMode mode) |
53 | { |
54 | if (Options::asyncDisassembly() || Options::dumpDisassembly() || Options::dumpWasmDisassembly()) |
55 | return true; |
56 | switch (mode) { |
57 | case Wasm::CompilationMode::BBQMode: |
58 | return Options::dumpBBQDisassembly(); |
59 | case Wasm::CompilationMode::OMGMode: |
60 | case Wasm::CompilationMode::OMGForOSREntryMode: |
61 | return Options::dumpOMGDisassembly(); |
62 | default: |
63 | break; |
64 | } |
65 | return false; |
66 | } |
67 | |
68 | LinkBuffer::CodeRef<LinkBufferPtrTag> LinkBuffer::finalizeCodeWithoutDisassemblyImpl() |
69 | { |
70 | performFinalization(); |
71 | |
72 | ASSERT(m_didAllocate); |
73 | if (m_executableMemory) |
74 | return CodeRef<LinkBufferPtrTag>(*m_executableMemory); |
75 | |
76 | return CodeRef<LinkBufferPtrTag>::createSelfManagedCodeRef(m_code); |
77 | } |
78 | |
79 | LinkBuffer::CodeRef<LinkBufferPtrTag> LinkBuffer::finalizeCodeWithDisassemblyImpl(bool dumpDisassembly, const char* format, ...) |
80 | { |
81 | CodeRef<LinkBufferPtrTag> result = finalizeCodeWithoutDisassemblyImpl(); |
82 | |
83 | #if OS(LINUX) |
84 | if (Options::logJITCodeForPerf()) { |
85 | StringPrintStream out; |
86 | va_list argList; |
87 | va_start(argList, format); |
88 | va_start(argList, format); |
89 | out.vprintf(format, argList); |
90 | va_end(argList); |
91 | PerfLog::log(out.toCString(), result.code().untaggedExecutableAddress<const uint8_t*>(), result.size()); |
92 | } |
93 | #endif |
94 | |
95 | if (!dumpDisassembly || m_alreadyDisassembled) |
96 | return result; |
97 | |
98 | StringPrintStream out; |
99 | out.printf("Generated JIT code for " ); |
100 | va_list argList; |
101 | va_start(argList, format); |
102 | out.vprintf(format, argList); |
103 | va_end(argList); |
104 | out.printf(":\n" ); |
105 | |
106 | uint8_t* executableAddress = result.code().untaggedExecutableAddress<uint8_t*>(); |
107 | out.printf(" Code at [%p, %p):\n" , executableAddress, executableAddress + result.size()); |
108 | |
109 | CString = out.toCString(); |
110 | |
111 | if (Options::asyncDisassembly()) { |
112 | CodeRef<DisassemblyPtrTag> codeRefForDisassembly = result.retagged<DisassemblyPtrTag>(); |
113 | disassembleAsynchronously(header, WTFMove(codeRefForDisassembly), m_size, " " ); |
114 | return result; |
115 | } |
116 | |
117 | dataLog(header); |
118 | disassemble(result.retaggedCode<DisassemblyPtrTag>(), m_size, " " , WTF::dataFile()); |
119 | |
120 | return result; |
121 | } |
122 | |
123 | #if ENABLE(BRANCH_COMPACTION) |
124 | static ALWAYS_INLINE void recordLinkOffsets(AssemblerData& assemblerData, int32_t regionStart, int32_t regionEnd, int32_t offset) |
125 | { |
126 | int32_t ptr = regionStart / sizeof(int32_t); |
127 | const int32_t end = regionEnd / sizeof(int32_t); |
128 | int32_t* offsets = reinterpret_cast_ptr<int32_t*>(assemblerData.buffer()); |
129 | while (ptr < end) |
130 | offsets[ptr++] = offset; |
131 | } |
132 | |
133 | template <typename InstructionType> |
134 | void LinkBuffer::copyCompactAndLinkCode(MacroAssembler& macroAssembler, void* ownerUID, JITCompilationEffort effort) |
135 | { |
136 | allocate(macroAssembler, ownerUID, effort); |
137 | const size_t initialSize = macroAssembler.m_assembler.codeSize(); |
138 | if (didFailToAllocate()) |
139 | return; |
140 | |
141 | Vector<LinkRecord, 0, UnsafeVectorOverflow>& jumpsToLink = macroAssembler.jumpsToLink(); |
142 | m_assemblerStorage = macroAssembler.m_assembler.buffer().releaseAssemblerData(); |
143 | uint8_t* inData = reinterpret_cast<uint8_t*>(m_assemblerStorage.buffer()); |
144 | |
145 | uint8_t* codeOutData = m_code.dataLocation<uint8_t*>(); |
146 | #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS) |
147 | const uint32_t expectedFinalHash = macroAssembler.m_assembler.buffer().hash().finalHash(); |
148 | ARM64EHash verifyUncompactedHash; |
149 | uint8_t* outData = codeOutData; |
150 | #else |
151 | AssemblerData outBuffer(m_size); |
152 | uint8_t* outData = reinterpret_cast<uint8_t*>(outBuffer.buffer()); |
153 | #endif |
154 | #if CPU(ARM64) |
155 | RELEASE_ASSERT(roundUpToMultipleOf<sizeof(unsigned)>(outData) == outData); |
156 | RELEASE_ASSERT(roundUpToMultipleOf<sizeof(unsigned)>(codeOutData) == codeOutData); |
157 | #endif |
158 | |
159 | int readPtr = 0; |
160 | int writePtr = 0; |
161 | unsigned jumpCount = jumpsToLink.size(); |
162 | |
163 | #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS) |
164 | os_thread_self_restrict_rwx_to_rw(); |
165 | #endif |
166 | |
167 | if (m_shouldPerformBranchCompaction) { |
168 | for (unsigned i = 0; i < jumpCount; ++i) { |
169 | int offset = readPtr - writePtr; |
170 | ASSERT(!(offset & 1)); |
171 | |
172 | // Copy the instructions from the last jump to the current one. |
173 | size_t regionSize = jumpsToLink[i].from() - readPtr; |
174 | InstructionType* copySource = reinterpret_cast_ptr<InstructionType*>(inData + readPtr); |
175 | InstructionType* copyEnd = reinterpret_cast_ptr<InstructionType*>(inData + readPtr + regionSize); |
176 | InstructionType* copyDst = reinterpret_cast_ptr<InstructionType*>(outData + writePtr); |
177 | ASSERT(!(regionSize % 2)); |
178 | ASSERT(!(readPtr % 2)); |
179 | ASSERT(!(writePtr % 2)); |
180 | while (copySource != copyEnd) { |
181 | InstructionType insn = *copySource++; |
182 | #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS) |
183 | static_assert(sizeof(InstructionType) == 4, "" ); |
184 | verifyUncompactedHash.update(insn); |
185 | #endif |
186 | *copyDst++ = insn; |
187 | } |
188 | recordLinkOffsets(m_assemblerStorage, readPtr, jumpsToLink[i].from(), offset); |
189 | readPtr += regionSize; |
190 | writePtr += regionSize; |
191 | |
192 | // Calculate absolute address of the jump target, in the case of backwards |
193 | // branches we need to be precise, forward branches we are pessimistic |
194 | const uint8_t* target; |
195 | if (jumpsToLink[i].to() >= jumpsToLink[i].from()) |
196 | target = codeOutData + jumpsToLink[i].to() - offset; // Compensate for what we have collapsed so far |
197 | else |
198 | target = codeOutData + jumpsToLink[i].to() - executableOffsetFor(jumpsToLink[i].to()); |
199 | |
200 | JumpLinkType jumpLinkType = MacroAssembler::computeJumpType(jumpsToLink[i], codeOutData + writePtr, target); |
201 | // Compact branch if we can... |
202 | if (MacroAssembler::canCompact(jumpsToLink[i].type())) { |
203 | // Step back in the write stream |
204 | int32_t delta = MacroAssembler::jumpSizeDelta(jumpsToLink[i].type(), jumpLinkType); |
205 | if (delta) { |
206 | writePtr -= delta; |
207 | recordLinkOffsets(m_assemblerStorage, jumpsToLink[i].from() - delta, readPtr, readPtr - writePtr); |
208 | } |
209 | } |
210 | jumpsToLink[i].setFrom(writePtr); |
211 | } |
212 | } else { |
213 | if (!ASSERT_DISABLED) { |
214 | for (unsigned i = 0; i < jumpCount; ++i) |
215 | ASSERT(!MacroAssembler::canCompact(jumpsToLink[i].type())); |
216 | } |
217 | } |
218 | |
219 | // Copy everything after the last jump |
220 | { |
221 | InstructionType* dst = bitwise_cast<InstructionType*>(outData + writePtr); |
222 | InstructionType* src = bitwise_cast<InstructionType*>(inData + readPtr); |
223 | size_t bytes = initialSize - readPtr; |
224 | |
225 | RELEASE_ASSERT(bitwise_cast<uintptr_t>(dst) % sizeof(InstructionType) == 0); |
226 | RELEASE_ASSERT(bitwise_cast<uintptr_t>(src) % sizeof(InstructionType) == 0); |
227 | RELEASE_ASSERT(bytes % sizeof(InstructionType) == 0); |
228 | |
229 | for (size_t i = 0; i < bytes; i += sizeof(InstructionType)) { |
230 | InstructionType insn = *src++; |
231 | #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS) |
232 | verifyUncompactedHash.update(insn); |
233 | #endif |
234 | *dst++ = insn; |
235 | } |
236 | } |
237 | |
238 | #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS) |
239 | if (verifyUncompactedHash.finalHash() != expectedFinalHash) { |
240 | dataLogLn("Hashes don't match: " , RawPointer(bitwise_cast<void*>(static_cast<uintptr_t>(verifyUncompactedHash.finalHash()))), " " , RawPointer(bitwise_cast<void*>(static_cast<uintptr_t>(expectedFinalHash)))); |
241 | dataLogLn("Crashing!" ); |
242 | CRASH(); |
243 | } |
244 | #endif |
245 | |
246 | recordLinkOffsets(m_assemblerStorage, readPtr, initialSize, readPtr - writePtr); |
247 | |
248 | for (unsigned i = 0; i < jumpCount; ++i) { |
249 | uint8_t* location = codeOutData + jumpsToLink[i].from(); |
250 | uint8_t* target = codeOutData + jumpsToLink[i].to() - executableOffsetFor(jumpsToLink[i].to()); |
251 | #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS) |
252 | MacroAssembler::link<memcpy>(jumpsToLink[i], outData + jumpsToLink[i].from(), location, target); |
253 | #else |
254 | MacroAssembler::link<performJITMemcpy>(jumpsToLink[i], outData + jumpsToLink[i].from(), location, target); |
255 | #endif |
256 | } |
257 | |
258 | size_t compactSize = writePtr + initialSize - readPtr; |
259 | if (!m_executableMemory) { |
260 | size_t nopSizeInBytes = initialSize - compactSize; |
261 | #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS) |
262 | Assembler::fillNops<memcpy>(outData + compactSize, nopSizeInBytes); |
263 | #else |
264 | Assembler::fillNops<performJITMemcpy>(outData + compactSize, nopSizeInBytes); |
265 | #endif |
266 | } |
267 | |
268 | #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS) |
269 | os_thread_self_restrict_rwx_to_rx(); |
270 | #endif |
271 | |
272 | if (m_executableMemory) { |
273 | m_size = compactSize; |
274 | m_executableMemory->shrink(m_size); |
275 | } |
276 | |
277 | #if !CPU(ARM64E) || !ENABLE(FAST_JIT_PERMISSIONS) |
278 | ASSERT(codeOutData != outData); |
279 | performJITMemcpy(codeOutData, outData, m_size); |
280 | #else |
281 | ASSERT(codeOutData == outData); |
282 | if (UNLIKELY(Options::dumpJITMemoryPath())) |
283 | dumpJITMemory(outData, outData, m_size); |
284 | #endif |
285 | |
286 | jumpsToLink.clear(); |
287 | |
288 | #if DUMP_LINK_STATISTICS |
289 | dumpLinkStatistics(codeOutData, initialSize, m_size); |
290 | #endif |
291 | #if DUMP_CODE |
292 | dumpCode(codeOutData, m_size); |
293 | #endif |
294 | } |
295 | #endif // ENABLE(BRANCH_COMPACTION) |
296 | |
297 | |
298 | void LinkBuffer::linkCode(MacroAssembler& macroAssembler, void* ownerUID, JITCompilationEffort effort) |
299 | { |
300 | // Ensure that the end of the last invalidation point does not extend beyond the end of the buffer. |
301 | macroAssembler.label(); |
302 | |
303 | #if !ENABLE(BRANCH_COMPACTION) |
304 | #if defined(ASSEMBLER_HAS_CONSTANT_POOL) && ASSEMBLER_HAS_CONSTANT_POOL |
305 | macroAssembler.m_assembler.buffer().flushConstantPool(false); |
306 | #endif |
307 | allocate(macroAssembler, ownerUID, effort); |
308 | if (!m_didAllocate) |
309 | return; |
310 | ASSERT(m_code); |
311 | AssemblerBuffer& buffer = macroAssembler.m_assembler.buffer(); |
312 | void* code = m_code.dataLocation(); |
313 | #if CPU(ARM64) |
314 | RELEASE_ASSERT(roundUpToMultipleOf<Assembler::instructionSize>(code) == code); |
315 | #endif |
316 | performJITMemcpy(code, buffer.data(), buffer.codeSize()); |
317 | #if CPU(MIPS) |
318 | macroAssembler.m_assembler.relocateJumps(buffer.data(), code); |
319 | #endif |
320 | #elif CPU(ARM_THUMB2) |
321 | copyCompactAndLinkCode<uint16_t>(macroAssembler, ownerUID, effort); |
322 | #elif CPU(ARM64) |
323 | copyCompactAndLinkCode<uint32_t>(macroAssembler, ownerUID, effort); |
324 | #endif // !ENABLE(BRANCH_COMPACTION) |
325 | |
326 | m_linkTasks = WTFMove(macroAssembler.m_linkTasks); |
327 | } |
328 | |
329 | void LinkBuffer::allocate(MacroAssembler& macroAssembler, void* ownerUID, JITCompilationEffort effort) |
330 | { |
331 | size_t initialSize = macroAssembler.m_assembler.codeSize(); |
332 | if (m_code) { |
333 | if (initialSize > m_size) |
334 | return; |
335 | |
336 | size_t nopsToFillInBytes = m_size - initialSize; |
337 | macroAssembler.emitNops(nopsToFillInBytes); |
338 | m_didAllocate = true; |
339 | return; |
340 | } |
341 | |
342 | while (initialSize % jitAllocationGranule) { |
343 | macroAssembler.breakpoint(); |
344 | initialSize = macroAssembler.m_assembler.codeSize(); |
345 | } |
346 | |
347 | m_executableMemory = ExecutableAllocator::singleton().allocate(initialSize, ownerUID, effort); |
348 | if (!m_executableMemory) |
349 | return; |
350 | m_code = MacroAssemblerCodePtr<LinkBufferPtrTag>(m_executableMemory->start().retaggedPtr<LinkBufferPtrTag>()); |
351 | m_size = initialSize; |
352 | m_didAllocate = true; |
353 | } |
354 | |
355 | void LinkBuffer::performFinalization() |
356 | { |
357 | for (auto& task : m_linkTasks) |
358 | task->run(*this); |
359 | |
360 | #ifndef NDEBUG |
361 | ASSERT(!isCompilationThread()); |
362 | ASSERT(!m_completed); |
363 | ASSERT(isValid()); |
364 | m_completed = true; |
365 | #endif |
366 | |
367 | MacroAssembler::cacheFlush(code(), m_size); |
368 | } |
369 | |
370 | #if DUMP_LINK_STATISTICS |
371 | void LinkBuffer::dumpLinkStatistics(void* code, size_t initializeSize, size_t finalSize) |
372 | { |
373 | static unsigned linkCount = 0; |
374 | static unsigned totalInitialSize = 0; |
375 | static unsigned totalFinalSize = 0; |
376 | linkCount++; |
377 | totalInitialSize += initialSize; |
378 | totalFinalSize += finalSize; |
379 | dataLogF("link %p: orig %u, compact %u (delta %u, %.2f%%)\n" , |
380 | code, static_cast<unsigned>(initialSize), static_cast<unsigned>(finalSize), |
381 | static_cast<unsigned>(initialSize - finalSize), |
382 | 100.0 * (initialSize - finalSize) / initialSize); |
383 | dataLogF("\ttotal %u: orig %u, compact %u (delta %u, %.2f%%)\n" , |
384 | linkCount, totalInitialSize, totalFinalSize, totalInitialSize - totalFinalSize, |
385 | 100.0 * (totalInitialSize - totalFinalSize) / totalInitialSize); |
386 | } |
387 | #endif |
388 | |
389 | #if DUMP_CODE |
390 | void LinkBuffer::dumpCode(void* code, size_t size) |
391 | { |
392 | #if CPU(ARM_THUMB2) |
393 | // Dump the generated code in an asm file format that can be assembled and then disassembled |
394 | // for debugging purposes. For example, save this output as jit.s: |
395 | // gcc -arch armv7 -c jit.s |
396 | // otool -tv jit.o |
397 | static unsigned codeCount = 0; |
398 | unsigned short* tcode = static_cast<unsigned short*>(code); |
399 | size_t tsize = size / sizeof(short); |
400 | char nameBuf[128]; |
401 | snprintf(nameBuf, sizeof(nameBuf), "_jsc_jit%u" , codeCount++); |
402 | dataLogF("\t.syntax unified\n" |
403 | "\t.section\t__TEXT,__text,regular,pure_instructions\n" |
404 | "\t.globl\t%s\n" |
405 | "\t.align 2\n" |
406 | "\t.code 16\n" |
407 | "\t.thumb_func\t%s\n" |
408 | "# %p\n" |
409 | "%s:\n" , nameBuf, nameBuf, code, nameBuf); |
410 | |
411 | for (unsigned i = 0; i < tsize; i++) |
412 | dataLogF("\t.short\t0x%x\n" , tcode[i]); |
413 | #endif |
414 | } |
415 | #endif |
416 | |
417 | } // namespace JSC |
418 | |
419 | #endif // ENABLE(ASSEMBLER) |
420 | |