1 | /* |
2 | * Copyright (C) 2013-2018 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #include "config.h" |
27 | |
28 | #if ENABLE(ASSEMBLER) && (CPU(X86) || CPU(X86_64)) |
29 | #include "MacroAssembler.h" |
30 | |
31 | #include "ProbeContext.h" |
32 | #include <wtf/InlineASM.h> |
33 | |
34 | #if COMPILER(MSVC) |
35 | #include <intrin.h> |
36 | #endif |
37 | |
38 | namespace JSC { |
39 | |
40 | #if ENABLE(MASM_PROBE) |
41 | |
42 | extern "C" void ctiMasmProbeTrampoline(); |
43 | |
44 | // The following are offsets for Probe::State fields accessed by the ctiMasmProbeTrampoline stub. |
45 | |
46 | #if CPU(X86) |
47 | #define PTR_SIZE 4 |
48 | #else // CPU(X86_64) |
49 | #define PTR_SIZE 8 |
50 | #endif |
51 | |
52 | #define PROBE_PROBE_FUNCTION_OFFSET (0 * PTR_SIZE) |
53 | #define PROBE_ARG_OFFSET (1 * PTR_SIZE) |
54 | #define PROBE_INIT_STACK_FUNCTION_OFFSET (2 * PTR_SIZE) |
55 | #define PROBE_INIT_STACK_ARG_OFFSET (3 * PTR_SIZE) |
56 | |
57 | #define PROBE_FIRST_GPR_OFFSET (4 * PTR_SIZE) |
58 | #define PROBE_CPU_EAX_OFFSET (PROBE_FIRST_GPR_OFFSET + (0 * PTR_SIZE)) |
59 | #define PROBE_CPU_ECX_OFFSET (PROBE_FIRST_GPR_OFFSET + (1 * PTR_SIZE)) |
60 | #define PROBE_CPU_EDX_OFFSET (PROBE_FIRST_GPR_OFFSET + (2 * PTR_SIZE)) |
61 | #define PROBE_CPU_EBX_OFFSET (PROBE_FIRST_GPR_OFFSET + (3 * PTR_SIZE)) |
62 | #define PROBE_CPU_ESP_OFFSET (PROBE_FIRST_GPR_OFFSET + (4 * PTR_SIZE)) |
63 | #define PROBE_CPU_EBP_OFFSET (PROBE_FIRST_GPR_OFFSET + (5 * PTR_SIZE)) |
64 | #define PROBE_CPU_ESI_OFFSET (PROBE_FIRST_GPR_OFFSET + (6 * PTR_SIZE)) |
65 | #define PROBE_CPU_EDI_OFFSET (PROBE_FIRST_GPR_OFFSET + (7 * PTR_SIZE)) |
66 | |
67 | #if CPU(X86) |
68 | #define PROBE_FIRST_SPR_OFFSET (PROBE_FIRST_GPR_OFFSET + (8 * PTR_SIZE)) |
69 | #else // CPU(X86_64) |
70 | #define PROBE_CPU_R8_OFFSET (PROBE_FIRST_GPR_OFFSET + (8 * PTR_SIZE)) |
71 | #define PROBE_CPU_R9_OFFSET (PROBE_FIRST_GPR_OFFSET + (9 * PTR_SIZE)) |
72 | #define PROBE_CPU_R10_OFFSET (PROBE_FIRST_GPR_OFFSET + (10 * PTR_SIZE)) |
73 | #define PROBE_CPU_R11_OFFSET (PROBE_FIRST_GPR_OFFSET + (11 * PTR_SIZE)) |
74 | #define PROBE_CPU_R12_OFFSET (PROBE_FIRST_GPR_OFFSET + (12 * PTR_SIZE)) |
75 | #define PROBE_CPU_R13_OFFSET (PROBE_FIRST_GPR_OFFSET + (13 * PTR_SIZE)) |
76 | #define PROBE_CPU_R14_OFFSET (PROBE_FIRST_GPR_OFFSET + (14 * PTR_SIZE)) |
77 | #define PROBE_CPU_R15_OFFSET (PROBE_FIRST_GPR_OFFSET + (15 * PTR_SIZE)) |
78 | #define PROBE_FIRST_SPR_OFFSET (PROBE_FIRST_GPR_OFFSET + (16 * PTR_SIZE)) |
79 | #endif // CPU(X86_64) |
80 | |
81 | #define PROBE_CPU_EIP_OFFSET (PROBE_FIRST_SPR_OFFSET + (0 * PTR_SIZE)) |
82 | #define PROBE_CPU_EFLAGS_OFFSET (PROBE_FIRST_SPR_OFFSET + (1 * PTR_SIZE)) |
83 | #define PROBE_FIRST_XMM_OFFSET (PROBE_FIRST_SPR_OFFSET + (2 * PTR_SIZE)) |
84 | |
85 | #define XMM_SIZE 8 |
86 | #define PROBE_CPU_XMM0_OFFSET (PROBE_FIRST_XMM_OFFSET + (0 * XMM_SIZE)) |
87 | #define PROBE_CPU_XMM1_OFFSET (PROBE_FIRST_XMM_OFFSET + (1 * XMM_SIZE)) |
88 | #define PROBE_CPU_XMM2_OFFSET (PROBE_FIRST_XMM_OFFSET + (2 * XMM_SIZE)) |
89 | #define PROBE_CPU_XMM3_OFFSET (PROBE_FIRST_XMM_OFFSET + (3 * XMM_SIZE)) |
90 | #define PROBE_CPU_XMM4_OFFSET (PROBE_FIRST_XMM_OFFSET + (4 * XMM_SIZE)) |
91 | #define PROBE_CPU_XMM5_OFFSET (PROBE_FIRST_XMM_OFFSET + (5 * XMM_SIZE)) |
92 | #define PROBE_CPU_XMM6_OFFSET (PROBE_FIRST_XMM_OFFSET + (6 * XMM_SIZE)) |
93 | #define PROBE_CPU_XMM7_OFFSET (PROBE_FIRST_XMM_OFFSET + (7 * XMM_SIZE)) |
94 | |
95 | #if CPU(X86) |
96 | #define PROBE_SIZE (PROBE_CPU_XMM7_OFFSET + XMM_SIZE) |
97 | #else // CPU(X86_64) |
98 | #define PROBE_CPU_XMM8_OFFSET (PROBE_FIRST_XMM_OFFSET + (8 * XMM_SIZE)) |
99 | #define PROBE_CPU_XMM9_OFFSET (PROBE_FIRST_XMM_OFFSET + (9 * XMM_SIZE)) |
100 | #define PROBE_CPU_XMM10_OFFSET (PROBE_FIRST_XMM_OFFSET + (10 * XMM_SIZE)) |
101 | #define PROBE_CPU_XMM11_OFFSET (PROBE_FIRST_XMM_OFFSET + (11 * XMM_SIZE)) |
102 | #define PROBE_CPU_XMM12_OFFSET (PROBE_FIRST_XMM_OFFSET + (12 * XMM_SIZE)) |
103 | #define PROBE_CPU_XMM13_OFFSET (PROBE_FIRST_XMM_OFFSET + (13 * XMM_SIZE)) |
104 | #define PROBE_CPU_XMM14_OFFSET (PROBE_FIRST_XMM_OFFSET + (14 * XMM_SIZE)) |
105 | #define PROBE_CPU_XMM15_OFFSET (PROBE_FIRST_XMM_OFFSET + (15 * XMM_SIZE)) |
106 | #define PROBE_SIZE (PROBE_CPU_XMM15_OFFSET + XMM_SIZE) |
107 | #endif // CPU(X86_64) |
108 | |
109 | #define PROBE_EXECUTOR_OFFSET PROBE_SIZE // Stash the executeProbe function pointer at the end of the ProbeContext. |
110 | |
111 | // The outgoing record to be popped off the stack at the end consists of: |
112 | // eflags, eax, ecx, ebp, eip. |
113 | #define OUT_SIZE (5 * PTR_SIZE) |
114 | |
115 | // These ASSERTs remind you that if you change the layout of Probe::State, |
116 | // you need to change ctiMasmProbeTrampoline offsets above to match. |
117 | #define PROBE_OFFSETOF(x) offsetof(struct Probe::State, x) |
118 | #define PROBE_OFFSETOF_REG(x, reg) offsetof(struct Probe::State, x) + reg * sizeof((reinterpret_cast<Probe::State*>(0))->x[reg]) |
119 | static_assert(PROBE_OFFSETOF(probeFunction) == PROBE_PROBE_FUNCTION_OFFSET, "Probe::State::probeFunction's offset matches ctiMasmProbeTrampoline" ); |
120 | static_assert(PROBE_OFFSETOF(arg) == PROBE_ARG_OFFSET, "Probe::State::arg's offset matches ctiMasmProbeTrampoline" ); |
121 | static_assert(PROBE_OFFSETOF(initializeStackFunction) == PROBE_INIT_STACK_FUNCTION_OFFSET, "Probe::State::initializeStackFunction's offset matches ctiMasmProbeTrampoline" ); |
122 | static_assert(PROBE_OFFSETOF(initializeStackArg) == PROBE_INIT_STACK_ARG_OFFSET, "Probe::State::initializeStackArg's offset matches ctiMasmProbeTrampoline" ); |
123 | |
124 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::eax) == PROBE_CPU_EAX_OFFSET, "Probe::State::cpu.gprs[eax]'s offset matches ctiMasmProbeTrampoline" ); |
125 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::ecx) == PROBE_CPU_ECX_OFFSET, "Probe::State::cpu.gprs[ecx]'s offset matches ctiMasmProbeTrampoline" ); |
126 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::edx) == PROBE_CPU_EDX_OFFSET, "Probe::State::cpu.gprs[edx]'s offset matches ctiMasmProbeTrampoline" ); |
127 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::ebx) == PROBE_CPU_EBX_OFFSET, "Probe::State::cpu.gprs[ebx]'s offset matches ctiMasmProbeTrampoline" ); |
128 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::esp) == PROBE_CPU_ESP_OFFSET, "Probe::State::cpu.gprs[esp]'s offset matches ctiMasmProbeTrampoline" ); |
129 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::ebp) == PROBE_CPU_EBP_OFFSET, "Probe::State::cpu.gprs[ebp]'s offset matches ctiMasmProbeTrampoline" ); |
130 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::esi) == PROBE_CPU_ESI_OFFSET, "Probe::State::cpu.gprs[esi]'s offset matches ctiMasmProbeTrampoline" ); |
131 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::edi) == PROBE_CPU_EDI_OFFSET, "Probe::State::cpu.gprs[edi]'s offset matches ctiMasmProbeTrampoline" ); |
132 | static_assert(PROBE_OFFSETOF_REG(cpu.sprs, X86Registers::eip) == PROBE_CPU_EIP_OFFSET, "Probe::State::cpu.gprs[eip]'s offset matches ctiMasmProbeTrampoline" ); |
133 | static_assert(PROBE_OFFSETOF_REG(cpu.sprs, X86Registers::eflags) == PROBE_CPU_EFLAGS_OFFSET, "Probe::State::cpu.sprs[eflags]'s offset matches ctiMasmProbeTrampoline" ); |
134 | |
135 | #if CPU(X86_64) |
136 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r8) == PROBE_CPU_R8_OFFSET, "Probe::State::cpu.gprs[r8]'s offset matches ctiMasmProbeTrampoline" ); |
137 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r9) == PROBE_CPU_R9_OFFSET, "Probe::State::cpu.gprs[r9]'s offset matches ctiMasmProbeTrampoline" ); |
138 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r10) == PROBE_CPU_R10_OFFSET, "Probe::State::cpu.gprs[r10]'s offset matches ctiMasmProbeTrampoline" ); |
139 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r11) == PROBE_CPU_R11_OFFSET, "Probe::State::cpu.gprs[r11]'s offset matches ctiMasmProbeTrampoline" ); |
140 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r12) == PROBE_CPU_R12_OFFSET, "Probe::State::cpu.gprs[r12]'s offset matches ctiMasmProbeTrampoline" ); |
141 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r13) == PROBE_CPU_R13_OFFSET, "Probe::State::cpu.gprs[r13]'s offset matches ctiMasmProbeTrampoline" ); |
142 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r14) == PROBE_CPU_R14_OFFSET, "Probe::State::cpu.gprs[r14]'s offset matches ctiMasmProbeTrampoline" ); |
143 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r15) == PROBE_CPU_R15_OFFSET, "Probe::State::cpu.gprs[r15]'s offset matches ctiMasmProbeTrampoline" ); |
144 | #endif // CPU(X86_64) |
145 | |
146 | static_assert(!(PROBE_CPU_XMM0_OFFSET & 0x7), "Probe::State::cpu.fprs[xmm0]'s offset should be 8 byte aligned" ); |
147 | |
148 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm0) == PROBE_CPU_XMM0_OFFSET, "Probe::State::cpu.fprs[xmm0]'s offset matches ctiMasmProbeTrampoline" ); |
149 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm1) == PROBE_CPU_XMM1_OFFSET, "Probe::State::cpu.fprs[xmm1]'s offset matches ctiMasmProbeTrampoline" ); |
150 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm2) == PROBE_CPU_XMM2_OFFSET, "Probe::State::cpu.fprs[xmm2]'s offset matches ctiMasmProbeTrampoline" ); |
151 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm3) == PROBE_CPU_XMM3_OFFSET, "Probe::State::cpu.fprs[xmm3]'s offset matches ctiMasmProbeTrampoline" ); |
152 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm4) == PROBE_CPU_XMM4_OFFSET, "Probe::State::cpu.fprs[xmm4]'s offset matches ctiMasmProbeTrampoline" ); |
153 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm5) == PROBE_CPU_XMM5_OFFSET, "Probe::State::cpu.fprs[xmm5]'s offset matches ctiMasmProbeTrampoline" ); |
154 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm6) == PROBE_CPU_XMM6_OFFSET, "Probe::State::cpu.fprs[xmm6]'s offset matches ctiMasmProbeTrampoline" ); |
155 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm7) == PROBE_CPU_XMM7_OFFSET, "Probe::State::cpu.fprs[xmm7]'s offset matches ctiMasmProbeTrampoline" ); |
156 | |
157 | #if CPU(X86_64) |
158 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm8) == PROBE_CPU_XMM8_OFFSET, "Probe::State::cpu.fprs[xmm8]'s offset matches ctiMasmProbeTrampoline" ); |
159 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm9) == PROBE_CPU_XMM9_OFFSET, "Probe::State::cpu.fprs[xmm9]'s offset matches ctiMasmProbeTrampoline" ); |
160 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm10) == PROBE_CPU_XMM10_OFFSET, "Probe::State::cpu.fprs[xmm10]'s offset matches ctiMasmProbeTrampoline" ); |
161 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm11) == PROBE_CPU_XMM11_OFFSET, "Probe::State::cpu.fprs[xmm11]'s offset matches ctiMasmProbeTrampoline" ); |
162 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm12) == PROBE_CPU_XMM12_OFFSET, "Probe::State::cpu.fprs[xmm12]'s offset matches ctiMasmProbeTrampoline" ); |
163 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm13) == PROBE_CPU_XMM13_OFFSET, "Probe::State::cpu.fprs[xmm13]'s offset matches ctiMasmProbeTrampoline" ); |
164 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm14) == PROBE_CPU_XMM14_OFFSET, "Probe::State::cpu.fprs[xmm14]'s offset matches ctiMasmProbeTrampoline" ); |
165 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm15) == PROBE_CPU_XMM15_OFFSET, "Probe::State::cpu.fprs[xmm15]'s offset matches ctiMasmProbeTrampoline" ); |
166 | #endif // CPU(X86_64) |
167 | |
168 | static_assert(sizeof(Probe::State) == PROBE_SIZE, "Probe::State::size's matches ctiMasmProbeTrampoline" ); |
169 | static_assert((PROBE_EXECUTOR_OFFSET + PTR_SIZE) <= (PROBE_SIZE + OUT_SIZE), "Must have room after ProbeContext to stash the probe handler" ); |
170 | |
171 | #undef PROBE_OFFSETOF |
172 | |
173 | #if CPU(X86) |
174 | #if COMPILER(GCC_COMPATIBLE) |
175 | asm ( |
176 | ".globl " SYMBOL_STRING(ctiMasmProbeTrampoline) "\n" |
177 | HIDE_SYMBOL(ctiMasmProbeTrampoline) "\n" |
178 | SYMBOL_STRING(ctiMasmProbeTrampoline) ":" "\n" |
179 | |
180 | "pushfl" "\n" |
181 | |
182 | // MacroAssemblerX86Common::probe() has already generated code to store some values. |
183 | // Together with the eflags pushed above, the top of stack now looks like |
184 | // this: |
185 | // esp[0 * ptrSize]: eflags |
186 | // esp[1 * ptrSize]: return address / saved eip |
187 | // esp[2 * ptrSize]: saved ebx |
188 | // esp[3 * ptrSize]: saved edx |
189 | // esp[4 * ptrSize]: saved ecx |
190 | // esp[5 * ptrSize]: saved eax |
191 | // |
192 | // Incoming registers contain: |
193 | // ecx: Probe::executeProbe |
194 | // edx: probe function |
195 | // ebx: probe arg |
196 | // eax: scratch (was ctiMasmProbeTrampoline) |
197 | |
198 | "movl %esp, %eax" "\n" |
199 | "subl $" STRINGIZE_VALUE_OF(PROBE_SIZE + OUT_SIZE) ", %esp" "\n" |
200 | |
201 | // The X86_64 ABI specifies that the worse case stack alignment requirement is 32 bytes. |
202 | "andl $~0x1f, %esp" "\n" |
203 | |
204 | "movl %ebp, " STRINGIZE_VALUE_OF(PROBE_CPU_EBP_OFFSET) "(%esp)" "\n" |
205 | "movl %esp, %ebp" "\n" // Save the Probe::State*. |
206 | |
207 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_EXECUTOR_OFFSET) "(%ebp)" "\n" |
208 | "movl %edx, " STRINGIZE_VALUE_OF(PROBE_PROBE_FUNCTION_OFFSET) "(%ebp)" "\n" |
209 | "movl %ebx, " STRINGIZE_VALUE_OF(PROBE_ARG_OFFSET) "(%ebp)" "\n" |
210 | "movl %esi, " STRINGIZE_VALUE_OF(PROBE_CPU_ESI_OFFSET) "(%ebp)" "\n" |
211 | "movl %edi, " STRINGIZE_VALUE_OF(PROBE_CPU_EDI_OFFSET) "(%ebp)" "\n" |
212 | |
213 | "movl 0 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%eax), %ecx" "\n" |
214 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_CPU_EFLAGS_OFFSET) "(%ebp)" "\n" |
215 | "movl 1 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%eax), %ecx" "\n" |
216 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_CPU_EIP_OFFSET) "(%ebp)" "\n" |
217 | "movl 2 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%eax), %ecx" "\n" |
218 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_CPU_EBX_OFFSET) "(%ebp)" "\n" |
219 | "movl 3 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%eax), %ecx" "\n" |
220 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_CPU_EDX_OFFSET) "(%ebp)" "\n" |
221 | "movl 4 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%eax), %ecx" "\n" |
222 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_CPU_ECX_OFFSET) "(%ebp)" "\n" |
223 | "movl 5 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%eax), %ecx" "\n" |
224 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_CPU_EAX_OFFSET) "(%ebp)" "\n" |
225 | |
226 | "movl %eax, %ecx" "\n" |
227 | "addl $" STRINGIZE_VALUE_OF(6 * PTR_SIZE) ", %ecx" "\n" |
228 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_CPU_ESP_OFFSET) "(%ebp)" "\n" |
229 | |
230 | "movq %xmm0, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%ebp)" "\n" |
231 | "movq %xmm1, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%ebp)" "\n" |
232 | "movq %xmm2, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%ebp)" "\n" |
233 | "movq %xmm3, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%ebp)" "\n" |
234 | "movq %xmm4, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%ebp)" "\n" |
235 | "movq %xmm5, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%ebp)" "\n" |
236 | "movq %xmm6, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%ebp)" "\n" |
237 | "movq %xmm7, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%ebp)" "\n" |
238 | |
239 | // Reserve stack space for the arg while maintaining the required stack |
240 | // pointer 32 byte alignment: |
241 | "subl $0x20, %esp" "\n" |
242 | "movl %ebp, 0(%esp)" "\n" // the Probe::State* arg. |
243 | |
244 | "call *" STRINGIZE_VALUE_OF(PROBE_EXECUTOR_OFFSET) "(%ebp)" "\n" |
245 | |
246 | // Make sure the Probe::State is entirely below the result stack pointer so |
247 | // that register values are still preserved when we call the initializeStack |
248 | // function. |
249 | "movl $" STRINGIZE_VALUE_OF(PROBE_SIZE + OUT_SIZE) ", %ecx" "\n" |
250 | "movl %ebp, %eax" "\n" |
251 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_ESP_OFFSET) "(%ebp), %edx" "\n" |
252 | "addl %ecx, %eax" "\n" |
253 | "cmpl %eax, %edx" "\n" |
254 | "jge " LOCAL_LABEL_STRING(ctiMasmProbeTrampolineProbeStateIsSafe) "\n" |
255 | |
256 | // Allocate a safe place on the stack below the result stack pointer to stash the Probe::State. |
257 | "subl %ecx, %edx" "\n" |
258 | "andl $~0x1f, %edx" "\n" // Keep the stack pointer 32 bytes aligned. |
259 | "xorl %eax, %eax" "\n" |
260 | "movl %edx, %esp" "\n" |
261 | |
262 | "movl $" STRINGIZE_VALUE_OF(PROBE_SIZE) ", %ecx" "\n" |
263 | |
264 | // Copy the Probe::State to the safe place. |
265 | LOCAL_LABEL_STRING(ctiMasmProbeTrampolineCopyLoop) ":" "\n" |
266 | "movl (%ebp, %eax), %edx" "\n" |
267 | "movl %edx, (%esp, %eax)" "\n" |
268 | "addl $" STRINGIZE_VALUE_OF(PTR_SIZE) ", %eax" "\n" |
269 | "cmpl %eax, %ecx" "\n" |
270 | "jg " LOCAL_LABEL_STRING(ctiMasmProbeTrampolineCopyLoop) "\n" |
271 | |
272 | "movl %esp, %ebp" "\n" |
273 | |
274 | // Call initializeStackFunction if present. |
275 | LOCAL_LABEL_STRING(ctiMasmProbeTrampolineProbeStateIsSafe) ":" "\n" |
276 | "xorl %ecx, %ecx" "\n" |
277 | "addl " STRINGIZE_VALUE_OF(PROBE_INIT_STACK_FUNCTION_OFFSET) "(%ebp), %ecx" "\n" |
278 | "je " LOCAL_LABEL_STRING(ctiMasmProbeTrampolineRestoreRegisters) "\n" |
279 | |
280 | // Reserve stack space for the arg while maintaining the required stack |
281 | // pointer 32 byte alignment: |
282 | "subl $0x20, %esp" "\n" |
283 | "movl %ebp, 0(%esp)" "\n" // the Probe::State* arg. |
284 | "call *%ecx" "\n" |
285 | |
286 | LOCAL_LABEL_STRING(ctiMasmProbeTrampolineRestoreRegisters) ":" "\n" |
287 | |
288 | // To enable probes to modify register state, we copy all registers |
289 | // out of the Probe::State before returning. |
290 | |
291 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_EDX_OFFSET) "(%ebp), %edx" "\n" |
292 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_EBX_OFFSET) "(%ebp), %ebx" "\n" |
293 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_ESI_OFFSET) "(%ebp), %esi" "\n" |
294 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_EDI_OFFSET) "(%ebp), %edi" "\n" |
295 | |
296 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%ebp), %xmm0" "\n" |
297 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%ebp), %xmm1" "\n" |
298 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%ebp), %xmm2" "\n" |
299 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%ebp), %xmm3" "\n" |
300 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%ebp), %xmm4" "\n" |
301 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%ebp), %xmm5" "\n" |
302 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%ebp), %xmm6" "\n" |
303 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%ebp), %xmm7" "\n" |
304 | |
305 | // There are 6 more registers left to restore: |
306 | // eax, ecx, ebp, esp, eip, and eflags. |
307 | |
308 | // The restoration process at ctiMasmProbeTrampolineEnd below works by popping |
309 | // 5 words off the stack into eflags, eax, ecx, ebp, and eip. These 5 words need |
310 | // to be pushed on top of the final esp value so that just by popping the 5 words, |
311 | // we'll get the esp that the probe wants to set. Let's call this area (for storing |
312 | // these 5 words) the restore area. |
313 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_ESP_OFFSET) "(%ebp), %ecx" "\n" |
314 | "subl $5 * " STRINGIZE_VALUE_OF(PTR_SIZE) ", %ecx" "\n" |
315 | |
316 | // ecx now points to the restore area. |
317 | |
318 | // Copy remaining restore values from the Probe::State to the restore area. |
319 | // Note: We already ensured above that the Probe::State is in a safe location before |
320 | // calling the initializeStackFunction. The initializeStackFunction is not allowed to |
321 | // change the stack pointer again. |
322 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_EFLAGS_OFFSET) "(%ebp), %eax" "\n" |
323 | "movl %eax, 0 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%ecx)" "\n" |
324 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_EAX_OFFSET) "(%ebp), %eax" "\n" |
325 | "movl %eax, 1 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%ecx)" "\n" |
326 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_ECX_OFFSET) "(%ebp), %eax" "\n" |
327 | "movl %eax, 2 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%ecx)" "\n" |
328 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_EBP_OFFSET) "(%ebp), %eax" "\n" |
329 | "movl %eax, 3 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%ecx)" "\n" |
330 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_EIP_OFFSET) "(%ebp), %eax" "\n" |
331 | "movl %eax, 4 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%ecx)" "\n" |
332 | "movl %ecx, %esp" "\n" |
333 | |
334 | // Do the remaining restoration by popping off the restore area. |
335 | "popfl" "\n" |
336 | "popl %eax" "\n" |
337 | "popl %ecx" "\n" |
338 | "popl %ebp" "\n" |
339 | "ret" "\n" |
340 | ); |
341 | #endif |
342 | |
343 | #if COMPILER(MSVC) |
344 | extern "C" __declspec(naked) void ctiMasmProbeTrampoline() |
345 | { |
346 | __asm { |
347 | pushfd; |
348 | |
349 | // MacroAssemblerX86Common::probe() has already generated code to store some values. |
350 | // Together with the eflags pushed above, the top of stack now looks like |
351 | // this: |
352 | // esp[0 * ptrSize]: eflags |
353 | // esp[1 * ptrSize]: return address / saved eip |
354 | // esp[2 * ptrSize]: saved ebx |
355 | // esp[3 * ptrSize]: saved edx |
356 | // esp[4 * ptrSize]: saved ecx |
357 | // esp[5 * ptrSize]: saved eax |
358 | // |
359 | // Incoming registers contain: |
360 | // ecx: Probe::executeProbe |
361 | // edx: probe function |
362 | // ebx: probe arg |
363 | // eax: scratch (was ctiMasmProbeTrampoline) |
364 | |
365 | mov eax, esp |
366 | sub esp, PROBE_SIZE + OUT_SIZE |
367 | |
368 | // The X86_64 ABI specifies that the worse case stack alignment requirement is 32 bytes. |
369 | and esp, ~0x1f |
370 | |
371 | mov [PROBE_CPU_EBP_OFFSET + esp], ebp |
372 | mov ebp, esp // Save the ProbeContext*. |
373 | |
374 | mov [PROBE_EXECUTOR_OFFSET + ebp], ecx |
375 | mov [PROBE_PROBE_FUNCTION_OFFSET + ebp], edx |
376 | mov [PROBE_ARG_OFFSET + ebp], ebx |
377 | mov [PROBE_CPU_ESI_OFFSET + ebp], esi |
378 | mov [PROBE_CPU_EDI_OFFSET + ebp], edi |
379 | |
380 | mov ecx, [0 * PTR_SIZE + eax] |
381 | mov [PROBE_CPU_EFLAGS_OFFSET + ebp], ecx |
382 | mov ecx, [1 * PTR_SIZE + eax] |
383 | mov [PROBE_CPU_EIP_OFFSET + ebp], ecx |
384 | mov ecx, [2 * PTR_SIZE + eax] |
385 | mov [PROBE_CPU_EBX_OFFSET + ebp], ecx |
386 | mov ecx, [3 * PTR_SIZE + eax] |
387 | mov [PROBE_CPU_EDX_OFFSET + ebp], ecx |
388 | mov ecx, [4 * PTR_SIZE + eax] |
389 | mov [PROBE_CPU_ECX_OFFSET + ebp], ecx |
390 | mov ecx, [5 * PTR_SIZE + eax] |
391 | mov [PROBE_CPU_EAX_OFFSET + ebp], ecx |
392 | |
393 | mov ecx, eax |
394 | add ecx, 6 * PTR_SIZE |
395 | mov [PROBE_CPU_ESP_OFFSET + ebp], ecx |
396 | |
397 | movq qword ptr[PROBE_CPU_XMM0_OFFSET + ebp], xmm0 |
398 | movq qword ptr[PROBE_CPU_XMM1_OFFSET + ebp], xmm1 |
399 | movq qword ptr[PROBE_CPU_XMM2_OFFSET + ebp], xmm2 |
400 | movq qword ptr[PROBE_CPU_XMM3_OFFSET + ebp], xmm3 |
401 | movq qword ptr[PROBE_CPU_XMM4_OFFSET + ebp], xmm4 |
402 | movq qword ptr[PROBE_CPU_XMM5_OFFSET + ebp], xmm5 |
403 | movq qword ptr[PROBE_CPU_XMM6_OFFSET + ebp], xmm6 |
404 | movq qword ptr[PROBE_CPU_XMM7_OFFSET + ebp], xmm7 |
405 | |
406 | // Reserve stack space for the arg while maintaining the required stack |
407 | // pointer 32 byte alignment: |
408 | sub esp, 0x20 |
409 | mov [0 + esp], ebp // the ProbeContext* arg. |
410 | |
411 | call [PROBE_EXECUTOR_OFFSET + ebp] |
412 | |
413 | // Make sure the ProbeContext is entirely below the result stack pointer so |
414 | // that register values are still preserved when we call the initializeStack |
415 | // function. |
416 | mov ecx, PROBE_SIZE + OUT_SIZE |
417 | mov eax, ebp |
418 | mov edx, [PROBE_CPU_ESP_OFFSET + ebp] |
419 | add eax, ecx |
420 | cmp edx, eax |
421 | jge ctiMasmProbeTrampolineProbeContextIsSafe |
422 | |
423 | // Allocate a safe place on the stack below the result stack pointer to stash the ProbeContext. |
424 | sub edx, ecx |
425 | and edx, ~0x1f // Keep the stack pointer 32 bytes aligned. |
426 | xor eax, eax |
427 | mov esp, edx |
428 | |
429 | mov ecx, PROBE_SIZE |
430 | |
431 | // Copy the ProbeContext to the safe place. |
432 | ctiMasmProbeTrampolineCopyLoop : |
433 | mov edx, [ebp + eax] |
434 | mov [esp + eax], edx |
435 | add eax, PTR_SIZE |
436 | cmp ecx, eax |
437 | jg ctiMasmProbeTrampolineCopyLoop |
438 | |
439 | mov ebp, esp |
440 | |
441 | // Call initializeStackFunction if present. |
442 | ctiMasmProbeTrampolineProbeContextIsSafe : |
443 | xor ecx, ecx |
444 | add ecx, [PROBE_INIT_STACK_FUNCTION_OFFSET + ebp] |
445 | je ctiMasmProbeTrampolineRestoreRegisters |
446 | |
447 | // Reserve stack space for the arg while maintaining the required stack |
448 | // pointer 32 byte alignment: |
449 | sub esp, 0x20 |
450 | mov [0 + esp], ebp // the ProbeContext* arg. |
451 | call ecx |
452 | |
453 | ctiMasmProbeTrampolineRestoreRegisters : |
454 | |
455 | // To enable probes to modify register state, we copy all registers |
456 | // out of the ProbeContext before returning. |
457 | |
458 | mov edx, [PROBE_CPU_EDX_OFFSET + ebp] |
459 | mov ebx, [PROBE_CPU_EBX_OFFSET + ebp] |
460 | mov esi, [PROBE_CPU_ESI_OFFSET + ebp] |
461 | mov edi, [PROBE_CPU_EDI_OFFSET + ebp] |
462 | |
463 | movq xmm0, qword ptr[PROBE_CPU_XMM0_OFFSET + ebp] |
464 | movq xmm1, qword ptr[PROBE_CPU_XMM1_OFFSET + ebp] |
465 | movq xmm2, qword ptr[PROBE_CPU_XMM2_OFFSET + ebp] |
466 | movq xmm3, qword ptr[PROBE_CPU_XMM3_OFFSET + ebp] |
467 | movq xmm4, qword ptr[PROBE_CPU_XMM4_OFFSET + ebp] |
468 | movq xmm5, qword ptr[PROBE_CPU_XMM5_OFFSET + ebp] |
469 | movq xmm6, qword ptr[PROBE_CPU_XMM6_OFFSET + ebp] |
470 | movq xmm7, qword ptr[PROBE_CPU_XMM7_OFFSET + ebp] |
471 | |
472 | // There are 6 more registers left to restore: |
473 | // eax, ecx, ebp, esp, eip, and eflags. |
474 | |
475 | // The restoration process at ctiMasmProbeTrampolineEnd below works by popping |
476 | // 5 words off the stack into eflags, eax, ecx, ebp, and eip. These 5 words need |
477 | // to be pushed on top of the final esp value so that just by popping the 5 words, |
478 | // we'll get the esp that the probe wants to set. Let's call this area (for storing |
479 | // these 5 words) the restore area. |
480 | mov ecx, [PROBE_CPU_ESP_OFFSET + ebp] |
481 | sub ecx, 5 * PTR_SIZE |
482 | |
483 | // ecx now points to the restore area. |
484 | |
485 | // Copy remaining restore values from the ProbeContext to the restore area. |
486 | // Note: We already ensured above that the ProbeContext is in a safe location before |
487 | // calling the initializeStackFunction. The initializeStackFunction is not allowed to |
488 | // change the stack pointer again. |
489 | mov eax, [PROBE_CPU_EFLAGS_OFFSET + ebp] |
490 | mov [0 * PTR_SIZE + ecx], eax |
491 | mov eax, [PROBE_CPU_EAX_OFFSET + ebp] |
492 | mov [1 * PTR_SIZE + ecx], eax |
493 | mov eax, [PROBE_CPU_ECX_OFFSET + ebp] |
494 | mov [2 * PTR_SIZE + ecx], eax |
495 | mov eax, [PROBE_CPU_EBP_OFFSET + ebp] |
496 | mov [3 * PTR_SIZE + ecx], eax |
497 | mov eax, [PROBE_CPU_EIP_OFFSET + ebp] |
498 | mov [4 * PTR_SIZE + ecx], eax |
499 | mov esp, ecx |
500 | |
501 | // Do the remaining restoration by popping off the restore area. |
502 | popfd |
503 | pop eax |
504 | pop ecx |
505 | pop ebp |
506 | ret |
507 | } |
508 | } |
509 | #endif // COMPILER(MSVC) |
510 | |
511 | #endif // CPU(X86) |
512 | |
513 | #if CPU(X86_64) |
514 | #if COMPILER(GCC_COMPATIBLE) |
515 | asm ( |
516 | ".globl " SYMBOL_STRING(ctiMasmProbeTrampoline) "\n" |
517 | HIDE_SYMBOL(ctiMasmProbeTrampoline) "\n" |
518 | SYMBOL_STRING(ctiMasmProbeTrampoline) ":" "\n" |
519 | |
520 | "pushfq" "\n" |
521 | |
522 | // MacroAssemblerX86Common::probe() has already generated code to store some values. |
523 | // Together with the rflags pushed above, the top of stack now looks like this: |
524 | // rsp[0 * ptrSize]: rflags |
525 | // rsp[1 * ptrSize]: return address / saved rip |
526 | // rsp[2 * ptrSize]: saved rbx |
527 | // rsp[3 * ptrSize]: saved rdx |
528 | // rsp[4 * ptrSize]: saved rcx |
529 | // rsp[5 * ptrSize]: saved rax |
530 | // |
531 | // Incoming registers contain: |
532 | // rcx: Probe::executeProbe |
533 | // rdx: probe function |
534 | // rbx: probe arg |
535 | // rax: scratch (was ctiMasmProbeTrampoline) |
536 | |
537 | "movq %rsp, %rax" "\n" |
538 | "subq $" STRINGIZE_VALUE_OF(PROBE_SIZE + OUT_SIZE) ", %rsp" "\n" |
539 | |
540 | // The X86_64 ABI specifies that the worse case stack alignment requirement is 32 bytes. |
541 | "andq $~0x1f, %rsp" "\n" |
542 | // Since sp points to the Probe::State, we've ensured that it's protected from interrupts before we initialize it. |
543 | |
544 | "movq %rbp, " STRINGIZE_VALUE_OF(PROBE_CPU_EBP_OFFSET) "(%rsp)" "\n" |
545 | "movq %rsp, %rbp" "\n" // Save the Probe::State*. |
546 | |
547 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_EXECUTOR_OFFSET) "(%rbp)" "\n" |
548 | "movq %rdx, " STRINGIZE_VALUE_OF(PROBE_PROBE_FUNCTION_OFFSET) "(%rbp)" "\n" |
549 | "movq %rbx, " STRINGIZE_VALUE_OF(PROBE_ARG_OFFSET) "(%rbp)" "\n" |
550 | "movq %rsi, " STRINGIZE_VALUE_OF(PROBE_CPU_ESI_OFFSET) "(%rbp)" "\n" |
551 | "movq %rdi, " STRINGIZE_VALUE_OF(PROBE_CPU_EDI_OFFSET) "(%rbp)" "\n" |
552 | |
553 | "movq 0 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rax), %rcx" "\n" |
554 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_EFLAGS_OFFSET) "(%rbp)" "\n" |
555 | "movq 1 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rax), %rcx" "\n" |
556 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_EIP_OFFSET) "(%rbp)" "\n" |
557 | "movq 2 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rax), %rcx" "\n" |
558 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_EBX_OFFSET) "(%rbp)" "\n" |
559 | "movq 3 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rax), %rcx" "\n" |
560 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_EDX_OFFSET) "(%rbp)" "\n" |
561 | "movq 4 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rax), %rcx" "\n" |
562 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_ECX_OFFSET) "(%rbp)" "\n" |
563 | "movq 5 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rax), %rcx" "\n" |
564 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_EAX_OFFSET) "(%rbp)" "\n" |
565 | |
566 | "movq %rax, %rcx" "\n" |
567 | "addq $" STRINGIZE_VALUE_OF(6 * PTR_SIZE) ", %rcx" "\n" |
568 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_ESP_OFFSET) "(%rbp)" "\n" |
569 | |
570 | "movq %r8, " STRINGIZE_VALUE_OF(PROBE_CPU_R8_OFFSET) "(%rbp)" "\n" |
571 | "movq %r9, " STRINGIZE_VALUE_OF(PROBE_CPU_R9_OFFSET) "(%rbp)" "\n" |
572 | "movq %r10, " STRINGIZE_VALUE_OF(PROBE_CPU_R10_OFFSET) "(%rbp)" "\n" |
573 | "movq %r11, " STRINGIZE_VALUE_OF(PROBE_CPU_R11_OFFSET) "(%rbp)" "\n" |
574 | "movq %r12, " STRINGIZE_VALUE_OF(PROBE_CPU_R12_OFFSET) "(%rbp)" "\n" |
575 | "movq %r13, " STRINGIZE_VALUE_OF(PROBE_CPU_R13_OFFSET) "(%rbp)" "\n" |
576 | "movq %r14, " STRINGIZE_VALUE_OF(PROBE_CPU_R14_OFFSET) "(%rbp)" "\n" |
577 | "movq %r15, " STRINGIZE_VALUE_OF(PROBE_CPU_R15_OFFSET) "(%rbp)" "\n" |
578 | |
579 | "movq %xmm0, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%rbp)" "\n" |
580 | "movq %xmm1, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%rbp)" "\n" |
581 | "movq %xmm2, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%rbp)" "\n" |
582 | "movq %xmm3, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%rbp)" "\n" |
583 | "movq %xmm4, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%rbp)" "\n" |
584 | "movq %xmm5, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%rbp)" "\n" |
585 | "movq %xmm6, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%rbp)" "\n" |
586 | "movq %xmm7, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%rbp)" "\n" |
587 | "movq %xmm8, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM8_OFFSET) "(%rbp)" "\n" |
588 | "movq %xmm9, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM9_OFFSET) "(%rbp)" "\n" |
589 | "movq %xmm10, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM10_OFFSET) "(%rbp)" "\n" |
590 | "movq %xmm11, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM11_OFFSET) "(%rbp)" "\n" |
591 | "movq %xmm12, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM12_OFFSET) "(%rbp)" "\n" |
592 | "movq %xmm13, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM13_OFFSET) "(%rbp)" "\n" |
593 | "movq %xmm14, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM14_OFFSET) "(%rbp)" "\n" |
594 | "movq %xmm15, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM15_OFFSET) "(%rbp)" "\n" |
595 | |
596 | "movq %rbp, %rdi" "\n" // the Probe::State* arg. |
597 | "call *" STRINGIZE_VALUE_OF(PROBE_EXECUTOR_OFFSET) "(%rbp)" "\n" |
598 | |
599 | // Make sure the Probe::State is entirely below the result stack pointer so |
600 | // that register values are still preserved when we call the initializeStack |
601 | // function. |
602 | "movq $" STRINGIZE_VALUE_OF(PROBE_SIZE + OUT_SIZE) ", %rcx" "\n" |
603 | "movq %rbp, %rax" "\n" |
604 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_ESP_OFFSET) "(%rbp), %rdx" "\n" |
605 | "addq %rcx, %rax" "\n" |
606 | "cmpq %rax, %rdx" "\n" |
607 | "jge " LOCAL_LABEL_STRING(ctiMasmProbeTrampolineProbeStateIsSafe) "\n" |
608 | |
609 | // Allocate a safe place on the stack below the result stack pointer to stash the Probe::State. |
610 | "subq %rcx, %rdx" "\n" |
611 | "andq $~0x1f, %rdx" "\n" // Keep the stack pointer 32 bytes aligned. |
612 | "xorq %rax, %rax" "\n" |
613 | "movq %rdx, %rsp" "\n" |
614 | |
615 | "movq $" STRINGIZE_VALUE_OF(PROBE_SIZE) ", %rcx" "\n" |
616 | |
617 | // Copy the Probe::State to the safe place. |
618 | LOCAL_LABEL_STRING(ctiMasmProbeTrampolineCopyLoop) ":" "\n" |
619 | "movq (%rbp, %rax), %rdx" "\n" |
620 | "movq %rdx, (%rsp, %rax)" "\n" |
621 | "addq $" STRINGIZE_VALUE_OF(PTR_SIZE) ", %rax" "\n" |
622 | "cmpq %rax, %rcx" "\n" |
623 | "jg " LOCAL_LABEL_STRING(ctiMasmProbeTrampolineCopyLoop) "\n" |
624 | |
625 | "movq %rsp, %rbp" "\n" |
626 | |
627 | // Call initializeStackFunction if present. |
628 | LOCAL_LABEL_STRING(ctiMasmProbeTrampolineProbeStateIsSafe) ":" "\n" |
629 | "xorq %rcx, %rcx" "\n" |
630 | "addq " STRINGIZE_VALUE_OF(PROBE_INIT_STACK_FUNCTION_OFFSET) "(%rbp), %rcx" "\n" |
631 | "je " LOCAL_LABEL_STRING(ctiMasmProbeTrampolineRestoreRegisters) "\n" |
632 | |
633 | "movq %rbp, %rdi" "\n" // the Probe::State* arg. |
634 | "call *%rcx" "\n" |
635 | |
636 | LOCAL_LABEL_STRING(ctiMasmProbeTrampolineRestoreRegisters) ":" "\n" |
637 | |
638 | // To enable probes to modify register state, we copy all registers |
639 | // out of the Probe::State before returning. |
640 | |
641 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_EDX_OFFSET) "(%rbp), %rdx" "\n" |
642 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_EBX_OFFSET) "(%rbp), %rbx" "\n" |
643 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_ESI_OFFSET) "(%rbp), %rsi" "\n" |
644 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_EDI_OFFSET) "(%rbp), %rdi" "\n" |
645 | |
646 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R8_OFFSET) "(%rbp), %r8" "\n" |
647 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R9_OFFSET) "(%rbp), %r9" "\n" |
648 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R10_OFFSET) "(%rbp), %r10" "\n" |
649 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R11_OFFSET) "(%rbp), %r11" "\n" |
650 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R12_OFFSET) "(%rbp), %r12" "\n" |
651 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R13_OFFSET) "(%rbp), %r13" "\n" |
652 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R14_OFFSET) "(%rbp), %r14" "\n" |
653 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R15_OFFSET) "(%rbp), %r15" "\n" |
654 | |
655 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%rbp), %xmm0" "\n" |
656 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%rbp), %xmm1" "\n" |
657 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%rbp), %xmm2" "\n" |
658 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%rbp), %xmm3" "\n" |
659 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%rbp), %xmm4" "\n" |
660 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%rbp), %xmm5" "\n" |
661 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%rbp), %xmm6" "\n" |
662 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%rbp), %xmm7" "\n" |
663 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM8_OFFSET) "(%rbp), %xmm8" "\n" |
664 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM9_OFFSET) "(%rbp), %xmm9" "\n" |
665 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM10_OFFSET) "(%rbp), %xmm10" "\n" |
666 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM11_OFFSET) "(%rbp), %xmm11" "\n" |
667 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM12_OFFSET) "(%rbp), %xmm12" "\n" |
668 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM13_OFFSET) "(%rbp), %xmm13" "\n" |
669 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM14_OFFSET) "(%rbp), %xmm14" "\n" |
670 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM15_OFFSET) "(%rbp), %xmm15" "\n" |
671 | |
672 | // There are 6 more registers left to restore: |
673 | // rax, rcx, rbp, rsp, rip, and rflags. |
674 | |
675 | // The restoration process at ctiMasmProbeTrampolineEnd below works by popping |
676 | // 5 words off the stack into rflags, rax, rcx, rbp, and rip. These 5 words need |
677 | // to be pushed on top of the final esp value so that just by popping the 5 words, |
678 | // we'll get the esp that the probe wants to set. Let's call this area (for storing |
679 | // these 5 words) the restore area. |
680 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_ESP_OFFSET) "(%rbp), %rcx" "\n" |
681 | "subq $5 * " STRINGIZE_VALUE_OF(PTR_SIZE) ", %rcx" "\n" |
682 | |
683 | // rcx now points to the restore area. |
684 | |
685 | // Copy remaining restore values from the Probe::State to the restore area. |
686 | // Note: We already ensured above that the Probe::State is in a safe location before |
687 | // calling the initializeStackFunction. The initializeStackFunction is not allowed to |
688 | // change the stack pointer again. |
689 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_EFLAGS_OFFSET) "(%rbp), %rax" "\n" |
690 | "movq %rax, 0 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rcx)" "\n" |
691 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_EAX_OFFSET) "(%rbp), %rax" "\n" |
692 | "movq %rax, 1 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rcx)" "\n" |
693 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_ECX_OFFSET) "(%rbp), %rax" "\n" |
694 | "movq %rax, 2 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rcx)" "\n" |
695 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_EBP_OFFSET) "(%rbp), %rax" "\n" |
696 | "movq %rax, 3 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rcx)" "\n" |
697 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_EIP_OFFSET) "(%rbp), %rax" "\n" |
698 | "movq %rax, 4 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rcx)" "\n" |
699 | "movq %rcx, %rsp" "\n" |
700 | |
701 | // Do the remaining restoration by popping off the restore area. |
702 | "popfq" "\n" |
703 | "popq %rax" "\n" |
704 | "popq %rcx" "\n" |
705 | "popq %rbp" "\n" |
706 | "ret" "\n" |
707 | ); |
708 | #endif // COMPILER(GCC_COMPATIBLE) |
709 | #endif // CPU(X86_64) |
710 | |
711 | // What code is emitted for the probe? |
712 | // ================================== |
713 | // We want to keep the size of the emitted probe invocation code as compact as |
714 | // possible to minimize the perturbation to the JIT generated code. However, |
715 | // we also need to preserve the CPU registers and set up the Probe::State to be |
716 | // passed to the user probe function. |
717 | // |
718 | // Hence, we do only the minimum here to preserve a scratch register (i.e. rax |
719 | // in this case) and the stack pointer (i.e. rsp), and pass the probe arguments. |
720 | // We'll let the ctiMasmProbeTrampoline handle the rest of the probe invocation |
721 | // work i.e. saving the CPUState (and setting up the Probe::State), calling the |
722 | // user probe function, and restoring the CPUState before returning to JIT |
723 | // generated code. |
724 | // |
725 | // What registers need to be saved? |
726 | // =============================== |
727 | // The registers are saved for 2 reasons: |
728 | // 1. To preserve their state in the JITted code. This means that all registers |
729 | // that are not callee saved needs to be saved. We also need to save the |
730 | // condition code registers because the probe can be inserted between a test |
731 | // and a branch. |
732 | // 2. To allow the probe to inspect the values of the registers for debugging |
733 | // purposes. This means all registers need to be saved. |
734 | // |
735 | // In summary, save everything. But for reasons stated above, we should do the |
736 | // minimum here and let ctiMasmProbeTrampoline do the heavy lifting to save the |
737 | // full set. |
738 | // |
739 | // What values are in the saved registers? |
740 | // ====================================== |
741 | // Conceptually, the saved registers should contain values as if the probe |
742 | // is not present in the JIT generated code. Hence, they should contain values |
743 | // that are expected at the start of the instruction immediately following the |
744 | // probe. |
745 | // |
746 | // Specifically, the saved stack pointer register will point to the stack |
747 | // position before we push the Probe::State frame. The saved rip will point to |
748 | // the address of the instruction immediately following the probe. |
749 | |
750 | void MacroAssembler::probe(Probe::Function function, void* arg) |
751 | { |
752 | push(RegisterID::eax); |
753 | move(TrustedImmPtr(reinterpret_cast<void*>(ctiMasmProbeTrampoline)), RegisterID::eax); |
754 | push(RegisterID::ecx); |
755 | move(TrustedImmPtr(reinterpret_cast<void*>(Probe::executeProbe)), RegisterID::ecx); |
756 | push(RegisterID::edx); |
757 | move(TrustedImmPtr(reinterpret_cast<void*>(function)), RegisterID::edx); |
758 | push(RegisterID::ebx); |
759 | move(TrustedImmPtr(arg), RegisterID::ebx); |
760 | call(RegisterID::eax, CFunctionPtrTag); |
761 | } |
762 | #endif // ENABLE(MASM_PROBE) |
763 | |
764 | MacroAssemblerX86Common::CPUID MacroAssemblerX86Common::getCPUID(unsigned level) |
765 | { |
766 | return getCPUIDEx(level, 0); |
767 | } |
768 | |
769 | MacroAssemblerX86Common::CPUID MacroAssemblerX86Common::getCPUIDEx(unsigned level, unsigned count) |
770 | { |
771 | CPUID result { }; |
772 | #if COMPILER(MSVC) |
773 | __cpuidex(bitwise_cast<int*>(result.data()), level, count); |
774 | #else |
775 | __asm__ ( |
776 | "cpuid\n" |
777 | : "=a" (result[0]), "=b" (result[1]), "=c" (result[2]), "=d" (result[3]) |
778 | : "0" (level), "2" (count) |
779 | ); |
780 | #endif |
781 | return result; |
782 | } |
783 | |
784 | void MacroAssemblerX86Common::collectCPUFeatures() |
785 | { |
786 | static std::once_flag onceKey; |
787 | std::call_once(onceKey, [] { |
788 | { |
789 | CPUID cpuid = getCPUID(0x1); |
790 | s_sse4_1CheckState = (cpuid[2] & (1 << 19)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear; |
791 | s_sse4_2CheckState = (cpuid[2] & (1 << 20)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear; |
792 | s_popcntCheckState = (cpuid[2] & (1 << 23)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear; |
793 | s_avxCheckState = (cpuid[2] & (1 << 28)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear; |
794 | } |
795 | { |
796 | CPUID cpuid = getCPUID(0x7); |
797 | s_bmi1CheckState = (cpuid[2] & (1 << 3)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear; |
798 | } |
799 | { |
800 | CPUID cpuid = getCPUID(0x80000001); |
801 | s_lzcntCheckState = (cpuid[2] & (1 << 5)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear; |
802 | } |
803 | }); |
804 | } |
805 | |
806 | MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_sse4_1CheckState = CPUIDCheckState::NotChecked; |
807 | MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_sse4_2CheckState = CPUIDCheckState::NotChecked; |
808 | MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_avxCheckState = CPUIDCheckState::NotChecked; |
809 | MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_lzcntCheckState = CPUIDCheckState::NotChecked; |
810 | MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_bmi1CheckState = CPUIDCheckState::NotChecked; |
811 | MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_popcntCheckState = CPUIDCheckState::NotChecked; |
812 | |
813 | } // namespace JSC |
814 | |
815 | #endif // ENABLE(ASSEMBLER) && (CPU(X86) || CPU(X86_64)) |
816 | |