1 | /* |
2 | * Copyright (C) 2012-2019 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #include "config.h" |
27 | #include "LinkBuffer.h" |
28 | |
29 | #if ENABLE(ASSEMBLER) |
30 | |
31 | #include "CodeBlock.h" |
32 | #include "Disassembler.h" |
33 | #include "JITCode.h" |
34 | #include "JSCInlines.h" |
35 | #include "Options.h" |
36 | #include <wtf/CompilationThread.h> |
37 | |
38 | #if OS(LINUX) |
39 | #include "PerfLog.h" |
40 | #endif |
41 | |
42 | namespace JSC { |
43 | |
44 | bool shouldDumpDisassemblyFor(CodeBlock* codeBlock) |
45 | { |
46 | if (codeBlock && JITCode::isOptimizingJIT(codeBlock->jitType()) && Options::dumpDFGDisassembly()) |
47 | return true; |
48 | return Options::dumpDisassembly(); |
49 | } |
50 | |
51 | LinkBuffer::CodeRef<LinkBufferPtrTag> LinkBuffer::finalizeCodeWithoutDisassemblyImpl() |
52 | { |
53 | performFinalization(); |
54 | |
55 | ASSERT(m_didAllocate); |
56 | if (m_executableMemory) |
57 | return CodeRef<LinkBufferPtrTag>(*m_executableMemory); |
58 | |
59 | return CodeRef<LinkBufferPtrTag>::createSelfManagedCodeRef(m_code); |
60 | } |
61 | |
62 | LinkBuffer::CodeRef<LinkBufferPtrTag> LinkBuffer::finalizeCodeWithDisassemblyImpl(bool dumpDisassembly, const char* format, ...) |
63 | { |
64 | CodeRef<LinkBufferPtrTag> result = finalizeCodeWithoutDisassemblyImpl(); |
65 | |
66 | #if OS(LINUX) |
67 | if (Options::logJITCodeForPerf()) { |
68 | StringPrintStream out; |
69 | va_list argList; |
70 | va_start(argList, format); |
71 | va_start(argList, format); |
72 | out.vprintf(format, argList); |
73 | va_end(argList); |
74 | PerfLog::log(out.toCString(), result.code().untaggedExecutableAddress<const uint8_t*>(), result.size()); |
75 | } |
76 | #endif |
77 | |
78 | if (!dumpDisassembly || m_alreadyDisassembled) |
79 | return result; |
80 | |
81 | StringPrintStream out; |
82 | out.printf("Generated JIT code for " ); |
83 | va_list argList; |
84 | va_start(argList, format); |
85 | out.vprintf(format, argList); |
86 | va_end(argList); |
87 | out.printf(":\n" ); |
88 | |
89 | uint8_t* executableAddress = result.code().untaggedExecutableAddress<uint8_t*>(); |
90 | out.printf(" Code at [%p, %p):\n" , executableAddress, executableAddress + result.size()); |
91 | |
92 | CString = out.toCString(); |
93 | |
94 | if (Options::asyncDisassembly()) { |
95 | CodeRef<DisassemblyPtrTag> codeRefForDisassembly = result.retagged<DisassemblyPtrTag>(); |
96 | disassembleAsynchronously(header, WTFMove(codeRefForDisassembly), m_size, " " ); |
97 | return result; |
98 | } |
99 | |
100 | dataLog(header); |
101 | disassemble(result.retaggedCode<DisassemblyPtrTag>(), m_size, " " , WTF::dataFile()); |
102 | |
103 | return result; |
104 | } |
105 | |
106 | #if ENABLE(BRANCH_COMPACTION) |
107 | static ALWAYS_INLINE void recordLinkOffsets(AssemblerData& assemblerData, int32_t regionStart, int32_t regionEnd, int32_t offset) |
108 | { |
109 | int32_t ptr = regionStart / sizeof(int32_t); |
110 | const int32_t end = regionEnd / sizeof(int32_t); |
111 | int32_t* offsets = reinterpret_cast_ptr<int32_t*>(assemblerData.buffer()); |
112 | while (ptr < end) |
113 | offsets[ptr++] = offset; |
114 | } |
115 | |
116 | template <typename InstructionType> |
117 | void LinkBuffer::copyCompactAndLinkCode(MacroAssembler& macroAssembler, void* ownerUID, JITCompilationEffort effort) |
118 | { |
119 | allocate(macroAssembler, ownerUID, effort); |
120 | const size_t initialSize = macroAssembler.m_assembler.codeSize(); |
121 | if (didFailToAllocate()) |
122 | return; |
123 | |
124 | Vector<LinkRecord, 0, UnsafeVectorOverflow>& jumpsToLink = macroAssembler.jumpsToLink(); |
125 | m_assemblerStorage = macroAssembler.m_assembler.buffer().releaseAssemblerData(); |
126 | uint8_t* inData = reinterpret_cast<uint8_t*>(m_assemblerStorage.buffer()); |
127 | |
128 | uint8_t* codeOutData = m_code.dataLocation<uint8_t*>(); |
129 | #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS) |
130 | const uint32_t expectedFinalHash = macroAssembler.m_assembler.buffer().hash().finalHash(); |
131 | ARM64EHash verifyUncompactedHash; |
132 | uint8_t* outData = codeOutData; |
133 | #else |
134 | AssemblerData outBuffer(m_size); |
135 | uint8_t* outData = reinterpret_cast<uint8_t*>(outBuffer.buffer()); |
136 | #endif |
137 | #if CPU(ARM64) |
138 | RELEASE_ASSERT(roundUpToMultipleOf<sizeof(unsigned)>(outData) == outData); |
139 | RELEASE_ASSERT(roundUpToMultipleOf<sizeof(unsigned)>(codeOutData) == codeOutData); |
140 | #endif |
141 | |
142 | int readPtr = 0; |
143 | int writePtr = 0; |
144 | unsigned jumpCount = jumpsToLink.size(); |
145 | |
146 | #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS) |
147 | os_thread_self_restrict_rwx_to_rw(); |
148 | #endif |
149 | |
150 | if (m_shouldPerformBranchCompaction) { |
151 | for (unsigned i = 0; i < jumpCount; ++i) { |
152 | int offset = readPtr - writePtr; |
153 | ASSERT(!(offset & 1)); |
154 | |
155 | // Copy the instructions from the last jump to the current one. |
156 | size_t regionSize = jumpsToLink[i].from() - readPtr; |
157 | InstructionType* copySource = reinterpret_cast_ptr<InstructionType*>(inData + readPtr); |
158 | InstructionType* copyEnd = reinterpret_cast_ptr<InstructionType*>(inData + readPtr + regionSize); |
159 | InstructionType* copyDst = reinterpret_cast_ptr<InstructionType*>(outData + writePtr); |
160 | ASSERT(!(regionSize % 2)); |
161 | ASSERT(!(readPtr % 2)); |
162 | ASSERT(!(writePtr % 2)); |
163 | while (copySource != copyEnd) { |
164 | InstructionType insn = *copySource++; |
165 | #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS) |
166 | static_assert(sizeof(InstructionType) == 4, "" ); |
167 | verifyUncompactedHash.update(insn); |
168 | #endif |
169 | *copyDst++ = insn; |
170 | } |
171 | recordLinkOffsets(m_assemblerStorage, readPtr, jumpsToLink[i].from(), offset); |
172 | readPtr += regionSize; |
173 | writePtr += regionSize; |
174 | |
175 | // Calculate absolute address of the jump target, in the case of backwards |
176 | // branches we need to be precise, forward branches we are pessimistic |
177 | const uint8_t* target; |
178 | if (jumpsToLink[i].to() >= jumpsToLink[i].from()) |
179 | target = codeOutData + jumpsToLink[i].to() - offset; // Compensate for what we have collapsed so far |
180 | else |
181 | target = codeOutData + jumpsToLink[i].to() - executableOffsetFor(jumpsToLink[i].to()); |
182 | |
183 | JumpLinkType jumpLinkType = MacroAssembler::computeJumpType(jumpsToLink[i], codeOutData + writePtr, target); |
184 | // Compact branch if we can... |
185 | if (MacroAssembler::canCompact(jumpsToLink[i].type())) { |
186 | // Step back in the write stream |
187 | int32_t delta = MacroAssembler::jumpSizeDelta(jumpsToLink[i].type(), jumpLinkType); |
188 | if (delta) { |
189 | writePtr -= delta; |
190 | recordLinkOffsets(m_assemblerStorage, jumpsToLink[i].from() - delta, readPtr, readPtr - writePtr); |
191 | } |
192 | } |
193 | jumpsToLink[i].setFrom(writePtr); |
194 | } |
195 | } else { |
196 | if (!ASSERT_DISABLED) { |
197 | for (unsigned i = 0; i < jumpCount; ++i) |
198 | ASSERT(!MacroAssembler::canCompact(jumpsToLink[i].type())); |
199 | } |
200 | } |
201 | |
202 | // Copy everything after the last jump |
203 | { |
204 | InstructionType* dst = bitwise_cast<InstructionType*>(outData + writePtr); |
205 | InstructionType* src = bitwise_cast<InstructionType*>(inData + readPtr); |
206 | size_t bytes = initialSize - readPtr; |
207 | |
208 | RELEASE_ASSERT(bitwise_cast<uintptr_t>(dst) % sizeof(InstructionType) == 0); |
209 | RELEASE_ASSERT(bitwise_cast<uintptr_t>(src) % sizeof(InstructionType) == 0); |
210 | RELEASE_ASSERT(bytes % sizeof(InstructionType) == 0); |
211 | |
212 | for (size_t i = 0; i < bytes; i += sizeof(InstructionType)) { |
213 | InstructionType insn = *src++; |
214 | #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS) |
215 | verifyUncompactedHash.update(insn); |
216 | #endif |
217 | *dst++ = insn; |
218 | } |
219 | } |
220 | |
221 | #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS) |
222 | if (verifyUncompactedHash.finalHash() != expectedFinalHash) { |
223 | dataLogLn("Hashes don't match: " , RawPointer(bitwise_cast<void*>(static_cast<uintptr_t>(verifyUncompactedHash.finalHash()))), " " , RawPointer(bitwise_cast<void*>(static_cast<uintptr_t>(expectedFinalHash)))); |
224 | dataLogLn("Crashing!" ); |
225 | CRASH(); |
226 | } |
227 | #endif |
228 | |
229 | recordLinkOffsets(m_assemblerStorage, readPtr, initialSize, readPtr - writePtr); |
230 | |
231 | for (unsigned i = 0; i < jumpCount; ++i) { |
232 | #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS) |
233 | auto memcpyFunction = memcpy; |
234 | #else |
235 | auto memcpyFunction = performJITMemcpy; |
236 | #endif |
237 | |
238 | uint8_t* location = codeOutData + jumpsToLink[i].from(); |
239 | uint8_t* target = codeOutData + jumpsToLink[i].to() - executableOffsetFor(jumpsToLink[i].to()); |
240 | MacroAssembler::link(jumpsToLink[i], outData + jumpsToLink[i].from(), location, target, memcpyFunction); |
241 | } |
242 | |
243 | size_t compactSize = writePtr + initialSize - readPtr; |
244 | if (!m_executableMemory) { |
245 | size_t nopSizeInBytes = initialSize - compactSize; |
246 | MacroAssembler::AssemblerType_T::fillNops(outData + compactSize, nopSizeInBytes, memcpy); |
247 | } |
248 | |
249 | #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS) |
250 | os_thread_self_restrict_rwx_to_rx(); |
251 | #endif |
252 | |
253 | if (m_executableMemory) { |
254 | m_size = compactSize; |
255 | m_executableMemory->shrink(m_size); |
256 | } |
257 | |
258 | #if !CPU(ARM64E) || !ENABLE(FAST_JIT_PERMISSIONS) |
259 | ASSERT(codeOutData != outData); |
260 | performJITMemcpy(codeOutData, outData, m_size); |
261 | #else |
262 | ASSERT(codeOutData == outData); |
263 | if (UNLIKELY(Options::dumpJITMemoryPath())) |
264 | dumpJITMemory(outData, outData, m_size); |
265 | #endif |
266 | |
267 | jumpsToLink.clear(); |
268 | |
269 | #if DUMP_LINK_STATISTICS |
270 | dumpLinkStatistics(codeOutData, initialSize, m_size); |
271 | #endif |
272 | #if DUMP_CODE |
273 | dumpCode(codeOutData, m_size); |
274 | #endif |
275 | } |
276 | #endif // ENABLE(BRANCH_COMPACTION) |
277 | |
278 | |
279 | void LinkBuffer::linkCode(MacroAssembler& macroAssembler, void* ownerUID, JITCompilationEffort effort) |
280 | { |
281 | // Ensure that the end of the last invalidation point does not extend beyond the end of the buffer. |
282 | macroAssembler.label(); |
283 | |
284 | #if !ENABLE(BRANCH_COMPACTION) |
285 | #if defined(ASSEMBLER_HAS_CONSTANT_POOL) && ASSEMBLER_HAS_CONSTANT_POOL |
286 | macroAssembler.m_assembler.buffer().flushConstantPool(false); |
287 | #endif |
288 | allocate(macroAssembler, ownerUID, effort); |
289 | if (!m_didAllocate) |
290 | return; |
291 | ASSERT(m_code); |
292 | AssemblerBuffer& buffer = macroAssembler.m_assembler.buffer(); |
293 | void* code = m_code.dataLocation(); |
294 | #if CPU(ARM64) |
295 | RELEASE_ASSERT(roundUpToMultipleOf<Assembler::instructionSize>(code) == code); |
296 | #endif |
297 | performJITMemcpy(code, buffer.data(), buffer.codeSize()); |
298 | #if CPU(MIPS) |
299 | macroAssembler.m_assembler.relocateJumps(buffer.data(), code); |
300 | #endif |
301 | #elif CPU(ARM_THUMB2) |
302 | copyCompactAndLinkCode<uint16_t>(macroAssembler, ownerUID, effort); |
303 | #elif CPU(ARM64) |
304 | copyCompactAndLinkCode<uint32_t>(macroAssembler, ownerUID, effort); |
305 | #endif // !ENABLE(BRANCH_COMPACTION) |
306 | |
307 | m_linkTasks = WTFMove(macroAssembler.m_linkTasks); |
308 | } |
309 | |
310 | void LinkBuffer::allocate(MacroAssembler& macroAssembler, void* ownerUID, JITCompilationEffort effort) |
311 | { |
312 | size_t initialSize = macroAssembler.m_assembler.codeSize(); |
313 | if (m_code) { |
314 | if (initialSize > m_size) |
315 | return; |
316 | |
317 | size_t nopsToFillInBytes = m_size - initialSize; |
318 | macroAssembler.emitNops(nopsToFillInBytes); |
319 | m_didAllocate = true; |
320 | return; |
321 | } |
322 | |
323 | while (initialSize % jitAllocationGranule) { |
324 | macroAssembler.breakpoint(); |
325 | initialSize = macroAssembler.m_assembler.codeSize(); |
326 | } |
327 | |
328 | m_executableMemory = ExecutableAllocator::singleton().allocate(initialSize, ownerUID, effort); |
329 | if (!m_executableMemory) |
330 | return; |
331 | m_code = MacroAssemblerCodePtr<LinkBufferPtrTag>(m_executableMemory->start().retaggedPtr<LinkBufferPtrTag>()); |
332 | m_size = initialSize; |
333 | m_didAllocate = true; |
334 | } |
335 | |
336 | void LinkBuffer::performFinalization() |
337 | { |
338 | for (auto& task : m_linkTasks) |
339 | task->run(*this); |
340 | |
341 | #ifndef NDEBUG |
342 | ASSERT(!isCompilationThread()); |
343 | ASSERT(!m_completed); |
344 | ASSERT(isValid()); |
345 | m_completed = true; |
346 | #endif |
347 | |
348 | MacroAssembler::cacheFlush(code(), m_size); |
349 | } |
350 | |
351 | #if DUMP_LINK_STATISTICS |
352 | void LinkBuffer::dumpLinkStatistics(void* code, size_t initializeSize, size_t finalSize) |
353 | { |
354 | static unsigned linkCount = 0; |
355 | static unsigned totalInitialSize = 0; |
356 | static unsigned totalFinalSize = 0; |
357 | linkCount++; |
358 | totalInitialSize += initialSize; |
359 | totalFinalSize += finalSize; |
360 | dataLogF("link %p: orig %u, compact %u (delta %u, %.2f%%)\n" , |
361 | code, static_cast<unsigned>(initialSize), static_cast<unsigned>(finalSize), |
362 | static_cast<unsigned>(initialSize - finalSize), |
363 | 100.0 * (initialSize - finalSize) / initialSize); |
364 | dataLogF("\ttotal %u: orig %u, compact %u (delta %u, %.2f%%)\n" , |
365 | linkCount, totalInitialSize, totalFinalSize, totalInitialSize - totalFinalSize, |
366 | 100.0 * (totalInitialSize - totalFinalSize) / totalInitialSize); |
367 | } |
368 | #endif |
369 | |
370 | #if DUMP_CODE |
371 | void LinkBuffer::dumpCode(void* code, size_t size) |
372 | { |
373 | #if CPU(ARM_THUMB2) |
374 | // Dump the generated code in an asm file format that can be assembled and then disassembled |
375 | // for debugging purposes. For example, save this output as jit.s: |
376 | // gcc -arch armv7 -c jit.s |
377 | // otool -tv jit.o |
378 | static unsigned codeCount = 0; |
379 | unsigned short* tcode = static_cast<unsigned short*>(code); |
380 | size_t tsize = size / sizeof(short); |
381 | char nameBuf[128]; |
382 | snprintf(nameBuf, sizeof(nameBuf), "_jsc_jit%u" , codeCount++); |
383 | dataLogF("\t.syntax unified\n" |
384 | "\t.section\t__TEXT,__text,regular,pure_instructions\n" |
385 | "\t.globl\t%s\n" |
386 | "\t.align 2\n" |
387 | "\t.code 16\n" |
388 | "\t.thumb_func\t%s\n" |
389 | "# %p\n" |
390 | "%s:\n" , nameBuf, nameBuf, code, nameBuf); |
391 | |
392 | for (unsigned i = 0; i < tsize; i++) |
393 | dataLogF("\t.short\t0x%x\n" , tcode[i]); |
394 | #endif |
395 | } |
396 | #endif |
397 | |
398 | } // namespace JSC |
399 | |
400 | #endif // ENABLE(ASSEMBLER) |
401 | |