1 | /* udis86 - libudis86/decode.c |
2 | * |
3 | * Copyright (c) 2002-2009 Vivek Thampi |
4 | * All rights reserved. |
5 | * |
6 | * Redistribution and use in source and binary forms, with or without modification, |
7 | * are permitted provided that the following conditions are met: |
8 | * |
9 | * * Redistributions of source code must retain the above copyright notice, |
10 | * this list of conditions and the following disclaimer. |
11 | * * Redistributions in binary form must reproduce the above copyright notice, |
12 | * this list of conditions and the following disclaimer in the documentation |
13 | * and/or other materials provided with the distribution. |
14 | * |
15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
17 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
19 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
20 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
22 | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
24 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | */ |
26 | |
27 | #include "config.h" |
28 | |
29 | #if USE(UDIS86) |
30 | |
31 | #include "udis86_udint.h" |
32 | #include "udis86_types.h" |
33 | #include "udis86_extern.h" |
34 | #include "udis86_decode.h" |
35 | |
36 | #ifndef __UD_STANDALONE__ |
37 | # include <string.h> |
38 | #endif /* __UD_STANDALONE__ */ |
39 | |
40 | /* The max number of prefixes to an instruction */ |
41 | #define MAX_PREFIXES 15 |
42 | |
43 | /* rex prefix bits */ |
44 | #define REX_W(r) ( ( 0xF & ( r ) ) >> 3 ) |
45 | #define REX_R(r) ( ( 0x7 & ( r ) ) >> 2 ) |
46 | #define REX_X(r) ( ( 0x3 & ( r ) ) >> 1 ) |
47 | #define REX_B(r) ( ( 0x1 & ( r ) ) >> 0 ) |
48 | #define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \ |
49 | ( P_REXR(n) << 2 ) | \ |
50 | ( P_REXX(n) << 1 ) | \ |
51 | ( P_REXB(n) << 0 ) ) |
52 | |
53 | /* scable-index-base bits */ |
54 | #define SIB_S(b) ( ( b ) >> 6 ) |
55 | #define SIB_I(b) ( ( ( b ) >> 3 ) & 7 ) |
56 | #define SIB_B(b) ( ( b ) & 7 ) |
57 | |
58 | /* modrm bits */ |
59 | #define MODRM_REG(b) ( ( ( b ) >> 3 ) & 7 ) |
60 | #define MODRM_NNN(b) ( ( ( b ) >> 3 ) & 7 ) |
61 | #define MODRM_MOD(b) ( ( ( b ) >> 6 ) & 3 ) |
62 | #define MODRM_RM(b) ( ( b ) & 7 ) |
63 | |
64 | static int decode_ext(struct ud *u, uint16_t ptr); |
65 | static int decode_opcode(struct ud *u); |
66 | |
67 | enum reg_class { /* register classes */ |
68 | REGCLASS_GPR, |
69 | REGCLASS_MMX, |
70 | REGCLASS_CR, |
71 | REGCLASS_DB, |
72 | REGCLASS_SEG, |
73 | REGCLASS_XMM |
74 | }; |
75 | |
76 | /* |
77 | * inp_start |
78 | * Should be called before each de-code operation. |
79 | */ |
80 | static void |
81 | inp_start(struct ud *u) |
82 | { |
83 | u->inp_ctr = 0; |
84 | } |
85 | |
86 | static uint8_t |
87 | inp_peek(struct ud *u) |
88 | { |
89 | if (u->inp_end == 0) { |
90 | if (u->inp_buf != NULL) { |
91 | if (u->inp_buf_index < u->inp_buf_size) { |
92 | return u->inp_buf[u->inp_buf_index]; |
93 | } |
94 | } else if (u->inp_peek != UD_EOI) { |
95 | return u->inp_peek; |
96 | } else { |
97 | int c; |
98 | if ((c = u->inp_hook(u)) != UD_EOI) { |
99 | u->inp_peek = c; |
100 | return u->inp_peek; |
101 | } |
102 | } |
103 | } |
104 | u->inp_end = 1; |
105 | UDERR(u, "byte expected, eoi received\n" ); |
106 | return 0; |
107 | } |
108 | |
109 | static uint8_t |
110 | inp_next(struct ud *u) |
111 | { |
112 | if (u->inp_end == 0) { |
113 | if (u->inp_buf != NULL) { |
114 | if (u->inp_buf_index < u->inp_buf_size) { |
115 | u->inp_ctr++; |
116 | return (u->inp_curr = u->inp_buf[u->inp_buf_index++]); |
117 | } |
118 | } else { |
119 | int c = u->inp_peek; |
120 | if (c != UD_EOI || (c = u->inp_hook(u)) != UD_EOI) { |
121 | u->inp_peek = UD_EOI; |
122 | u->inp_curr = c; |
123 | u->inp_sess[u->inp_ctr++] = u->inp_curr; |
124 | return u->inp_curr; |
125 | } |
126 | } |
127 | } |
128 | u->inp_end = 1; |
129 | UDERR(u, "byte expected, eoi received\n" ); |
130 | return 0; |
131 | } |
132 | |
133 | static uint8_t |
134 | inp_curr(struct ud *u) |
135 | { |
136 | return u->inp_curr; |
137 | } |
138 | |
139 | |
140 | /* |
141 | * inp_uint8 |
142 | * int_uint16 |
143 | * int_uint32 |
144 | * int_uint64 |
145 | * Load little-endian values from input |
146 | */ |
147 | static uint8_t |
148 | inp_uint8(struct ud* u) |
149 | { |
150 | return inp_next(u); |
151 | } |
152 | |
153 | static uint16_t |
154 | inp_uint16(struct ud* u) |
155 | { |
156 | uint16_t r, ret; |
157 | |
158 | ret = inp_next(u); |
159 | r = inp_next(u); |
160 | return ret | (r << 8); |
161 | } |
162 | |
163 | static uint32_t |
164 | inp_uint32(struct ud* u) |
165 | { |
166 | uint32_t r, ret; |
167 | |
168 | ret = inp_next(u); |
169 | r = inp_next(u); |
170 | ret = ret | (r << 8); |
171 | r = inp_next(u); |
172 | ret = ret | (r << 16); |
173 | r = inp_next(u); |
174 | return ret | (r << 24); |
175 | } |
176 | |
177 | static uint64_t |
178 | inp_uint64(struct ud* u) |
179 | { |
180 | uint64_t r, ret; |
181 | |
182 | ret = inp_next(u); |
183 | r = inp_next(u); |
184 | ret = ret | (r << 8); |
185 | r = inp_next(u); |
186 | ret = ret | (r << 16); |
187 | r = inp_next(u); |
188 | ret = ret | (r << 24); |
189 | r = inp_next(u); |
190 | ret = ret | (r << 32); |
191 | r = inp_next(u); |
192 | ret = ret | (r << 40); |
193 | r = inp_next(u); |
194 | ret = ret | (r << 48); |
195 | r = inp_next(u); |
196 | return ret | (r << 56); |
197 | } |
198 | |
199 | |
200 | static UD_INLINE int |
201 | eff_opr_mode(int dis_mode, int rex_w, int pfx_opr) |
202 | { |
203 | if (dis_mode == 64) { |
204 | return rex_w ? 64 : (pfx_opr ? 16 : 32); |
205 | } else if (dis_mode == 32) { |
206 | return pfx_opr ? 16 : 32; |
207 | } else { |
208 | UD_ASSERT(dis_mode == 16); |
209 | return pfx_opr ? 32 : 16; |
210 | } |
211 | } |
212 | |
213 | |
214 | static UD_INLINE int |
215 | eff_adr_mode(int dis_mode, int pfx_adr) |
216 | { |
217 | if (dis_mode == 64) { |
218 | return pfx_adr ? 32 : 64; |
219 | } else if (dis_mode == 32) { |
220 | return pfx_adr ? 16 : 32; |
221 | } else { |
222 | UD_ASSERT(dis_mode == 16); |
223 | return pfx_adr ? 32 : 16; |
224 | } |
225 | } |
226 | |
227 | |
228 | /* |
229 | * decode_prefixes |
230 | * |
231 | * Extracts instruction prefixes. |
232 | */ |
233 | static int |
234 | decode_prefixes(struct ud *u) |
235 | { |
236 | int done = 0; |
237 | uint8_t curr = 0, last = 0; |
238 | UD_RETURN_ON_ERROR(u); |
239 | |
240 | do { |
241 | last = curr; |
242 | curr = inp_next(u); |
243 | UD_RETURN_ON_ERROR(u); |
244 | if (u->inp_ctr == MAX_INSN_LENGTH) { |
245 | UD_RETURN_WITH_ERROR(u, "max instruction length" ); |
246 | } |
247 | |
248 | switch (curr) |
249 | { |
250 | case 0x2E: |
251 | u->pfx_seg = UD_R_CS; |
252 | break; |
253 | case 0x36: |
254 | u->pfx_seg = UD_R_SS; |
255 | break; |
256 | case 0x3E: |
257 | u->pfx_seg = UD_R_DS; |
258 | break; |
259 | case 0x26: |
260 | u->pfx_seg = UD_R_ES; |
261 | break; |
262 | case 0x64: |
263 | u->pfx_seg = UD_R_FS; |
264 | break; |
265 | case 0x65: |
266 | u->pfx_seg = UD_R_GS; |
267 | break; |
268 | case 0x67: /* adress-size override prefix */ |
269 | u->pfx_adr = 0x67; |
270 | break; |
271 | case 0xF0: |
272 | u->pfx_lock = 0xF0; |
273 | break; |
274 | case 0x66: |
275 | u->pfx_opr = 0x66; |
276 | break; |
277 | case 0xF2: |
278 | u->pfx_str = 0xf2; |
279 | break; |
280 | case 0xF3: |
281 | u->pfx_str = 0xf3; |
282 | break; |
283 | default: |
284 | /* consume if rex */ |
285 | done = (u->dis_mode == 64 && (curr & 0xF0) == 0x40) ? 0 : 1; |
286 | break; |
287 | } |
288 | } while (!done); |
289 | /* rex prefixes in 64bit mode, must be the last prefix */ |
290 | if (u->dis_mode == 64 && (last & 0xF0) == 0x40) { |
291 | u->pfx_rex = last; |
292 | } |
293 | return 0; |
294 | } |
295 | |
296 | |
297 | /* |
298 | * vex_l, vex_w |
299 | * Return the vex.L and vex.W bits |
300 | */ |
301 | static UD_INLINE uint8_t |
302 | vex_l(const struct ud *u) |
303 | { |
304 | UD_ASSERT(u->vex_op != 0); |
305 | return ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 2) & 1; |
306 | } |
307 | |
308 | static UD_INLINE uint8_t |
309 | vex_w(const struct ud *u) |
310 | { |
311 | UD_ASSERT(u->vex_op != 0); |
312 | return u->vex_op == 0xc4 ? ((u->vex_b2 >> 7) & 1) : 0; |
313 | } |
314 | |
315 | |
316 | static UD_INLINE uint8_t |
317 | modrm(struct ud * u) |
318 | { |
319 | if ( !u->have_modrm ) { |
320 | u->modrm = inp_next( u ); |
321 | u->modrm_offset = (uint8_t) (u->inp_ctr - 1); |
322 | u->have_modrm = 1; |
323 | } |
324 | return u->modrm; |
325 | } |
326 | |
327 | |
328 | static unsigned int |
329 | resolve_operand_size(const struct ud* u, ud_operand_size_t osize) |
330 | { |
331 | switch (osize) { |
332 | case SZ_V: |
333 | return u->opr_mode; |
334 | case SZ_Z: |
335 | return u->opr_mode == 16 ? 16 : 32; |
336 | case SZ_Y: |
337 | return u->opr_mode == 16 ? 32 : u->opr_mode; |
338 | case SZ_RDQ: |
339 | return u->dis_mode == 64 ? 64 : 32; |
340 | case SZ_X: |
341 | UD_ASSERT(u->vex_op != 0); |
342 | return (P_VEXL(u->itab_entry->prefix) && vex_l(u)) ? SZ_QQ : SZ_DQ; |
343 | default: |
344 | return osize; |
345 | } |
346 | } |
347 | |
348 | |
349 | static int resolve_mnemonic( struct ud* u ) |
350 | { |
351 | /* resolve 3dnow weirdness. */ |
352 | if ( u->mnemonic == UD_I3dnow ) { |
353 | u->mnemonic = ud_itab[ u->le->table[ inp_curr( u ) ] ].mnemonic; |
354 | } |
355 | /* SWAPGS is only valid in 64bits mode */ |
356 | if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) { |
357 | UDERR(u, "swapgs invalid in 64bits mode\n" ); |
358 | return -1; |
359 | } |
360 | |
361 | if (u->mnemonic == UD_Ixchg) { |
362 | if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX && |
363 | u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) || |
364 | (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX && |
365 | u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) { |
366 | u->operand[0].type = UD_NONE; |
367 | u->operand[1].type = UD_NONE; |
368 | u->mnemonic = UD_Inop; |
369 | } |
370 | } |
371 | |
372 | if (u->mnemonic == UD_Inop && u->pfx_repe) { |
373 | u->pfx_repe = 0; |
374 | u->mnemonic = UD_Ipause; |
375 | } |
376 | return 0; |
377 | } |
378 | |
379 | |
380 | /* ----------------------------------------------------------------------------- |
381 | * decode_a()- Decodes operands of the type seg:offset |
382 | * ----------------------------------------------------------------------------- |
383 | */ |
384 | static void |
385 | decode_a(struct ud* u, struct ud_operand *op) |
386 | { |
387 | if (u->opr_mode == 16) { |
388 | /* seg16:off16 */ |
389 | op->type = UD_OP_PTR; |
390 | op->size = 32; |
391 | op->lval.ptr.off = inp_uint16(u); |
392 | op->lval.ptr.seg = inp_uint16(u); |
393 | } else { |
394 | /* seg16:off32 */ |
395 | op->type = UD_OP_PTR; |
396 | op->size = 48; |
397 | op->lval.ptr.off = inp_uint32(u); |
398 | op->lval.ptr.seg = inp_uint16(u); |
399 | } |
400 | } |
401 | |
402 | /* ----------------------------------------------------------------------------- |
403 | * decode_gpr() - Returns decoded General Purpose Register |
404 | * ----------------------------------------------------------------------------- |
405 | */ |
406 | static enum ud_type |
407 | decode_gpr(register struct ud* u, unsigned int s, unsigned char rm) |
408 | { |
409 | switch (s) { |
410 | case 64: |
411 | return UD_R_RAX + rm; |
412 | case 32: |
413 | return UD_R_EAX + rm; |
414 | case 16: |
415 | return UD_R_AX + rm; |
416 | case 8: |
417 | if (u->dis_mode == 64 && u->pfx_rex) { |
418 | if (rm >= 4) |
419 | return UD_R_SPL + (rm-4); |
420 | return UD_R_AL + rm; |
421 | } else return UD_R_AL + rm; |
422 | case 0: |
423 | /* invalid size in case of a decode error */ |
424 | UD_ASSERT(u->error); |
425 | return UD_NONE; |
426 | default: |
427 | UD_ASSERT(!"invalid operand size" ); |
428 | return UD_NONE; |
429 | } |
430 | } |
431 | |
432 | static void |
433 | decode_reg(struct ud *u, |
434 | struct ud_operand *opr, |
435 | int type, |
436 | int num, |
437 | int size) |
438 | { |
439 | int reg; |
440 | size = resolve_operand_size(u, size); |
441 | switch (type) { |
442 | case REGCLASS_GPR : reg = decode_gpr(u, size, num); break; |
443 | case REGCLASS_MMX : reg = UD_R_MM0 + (num & 7); break; |
444 | case REGCLASS_XMM : |
445 | reg = num + (size == SZ_QQ ? UD_R_YMM0 : UD_R_XMM0); |
446 | break; |
447 | case REGCLASS_CR : reg = UD_R_CR0 + num; break; |
448 | case REGCLASS_DB : reg = UD_R_DR0 + num; break; |
449 | case REGCLASS_SEG : { |
450 | /* |
451 | * Only 6 segment registers, anything else is an error. |
452 | */ |
453 | if ((num & 7) > 5) { |
454 | UDERR(u, "invalid segment register value\n" ); |
455 | return; |
456 | } else { |
457 | reg = UD_R_ES + (num & 7); |
458 | } |
459 | break; |
460 | } |
461 | default: |
462 | UD_ASSERT(!"invalid register type" ); |
463 | return; |
464 | } |
465 | opr->type = UD_OP_REG; |
466 | opr->base = reg; |
467 | opr->size = size; |
468 | } |
469 | |
470 | |
471 | /* |
472 | * decode_imm |
473 | * |
474 | * Decode Immediate values. |
475 | */ |
476 | static void |
477 | decode_imm(struct ud* u, unsigned int size, struct ud_operand *op) |
478 | { |
479 | op->size = resolve_operand_size(u, size); |
480 | op->type = UD_OP_IMM; |
481 | |
482 | switch (op->size) { |
483 | case 8: op->lval.sbyte = inp_uint8(u); break; |
484 | case 16: op->lval.uword = inp_uint16(u); break; |
485 | case 32: op->lval.udword = inp_uint32(u); break; |
486 | case 64: op->lval.uqword = inp_uint64(u); break; |
487 | default: return; |
488 | } |
489 | } |
490 | |
491 | |
492 | /* |
493 | * decode_mem_disp |
494 | * |
495 | * Decode mem address displacement. |
496 | */ |
497 | static void |
498 | decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op) |
499 | { |
500 | switch (size) { |
501 | case 8: |
502 | op->offset = 8; |
503 | op->lval.ubyte = inp_uint8(u); |
504 | break; |
505 | case 16: |
506 | op->offset = 16; |
507 | op->lval.uword = inp_uint16(u); |
508 | break; |
509 | case 32: |
510 | op->offset = 32; |
511 | op->lval.udword = inp_uint32(u); |
512 | break; |
513 | case 64: |
514 | op->offset = 64; |
515 | op->lval.uqword = inp_uint64(u); |
516 | break; |
517 | default: |
518 | return; |
519 | } |
520 | } |
521 | |
522 | |
523 | /* |
524 | * decode_modrm_reg |
525 | * |
526 | * Decodes reg field of mod/rm byte |
527 | * |
528 | */ |
529 | static UD_INLINE void |
530 | decode_modrm_reg(struct ud *u, |
531 | struct ud_operand *operand, |
532 | unsigned int type, |
533 | unsigned int size) |
534 | { |
535 | uint8_t reg = (REX_R(u->_rex) << 3) | MODRM_REG(modrm(u)); |
536 | decode_reg(u, operand, type, reg, size); |
537 | } |
538 | |
539 | |
540 | /* |
541 | * decode_modrm_rm |
542 | * |
543 | * Decodes rm field of mod/rm byte |
544 | * |
545 | */ |
546 | static void |
547 | decode_modrm_rm(struct ud *u, |
548 | struct ud_operand *op, |
549 | unsigned char type, /* register type */ |
550 | unsigned int size) /* operand size */ |
551 | |
552 | { |
553 | size_t offset = 0; |
554 | unsigned char mod, rm; |
555 | |
556 | /* get mod, r/m and reg fields */ |
557 | mod = MODRM_MOD(modrm(u)); |
558 | rm = (REX_B(u->_rex) << 3) | MODRM_RM(modrm(u)); |
559 | |
560 | /* |
561 | * If mod is 11b, then the modrm.rm specifies a register. |
562 | * |
563 | */ |
564 | if (mod == 3) { |
565 | decode_reg(u, op, type, rm, size); |
566 | return; |
567 | } |
568 | |
569 | /* |
570 | * !11b => Memory Address |
571 | */ |
572 | op->type = UD_OP_MEM; |
573 | op->size = resolve_operand_size(u, size); |
574 | |
575 | if (u->adr_mode == 64) { |
576 | op->base = UD_R_RAX + rm; |
577 | if (mod == 1) { |
578 | offset = 8; |
579 | } else if (mod == 2) { |
580 | offset = 32; |
581 | } else if (mod == 0 && (rm & 7) == 5) { |
582 | op->base = UD_R_RIP; |
583 | offset = 32; |
584 | } else { |
585 | offset = 0; |
586 | } |
587 | /* |
588 | * Scale-Index-Base (SIB) |
589 | */ |
590 | if ((rm & 7) == 4) { |
591 | inp_next(u); |
592 | |
593 | op->base = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->_rex) << 3)); |
594 | op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->_rex) << 3)); |
595 | /* special conditions for base reference */ |
596 | if (op->index == UD_R_RSP) { |
597 | op->index = UD_NONE; |
598 | op->scale = UD_NONE; |
599 | } else { |
600 | op->scale = (1 << SIB_S(inp_curr(u))) & ~1; |
601 | } |
602 | |
603 | if (op->base == UD_R_RBP || op->base == UD_R_R13) { |
604 | if (mod == 0) { |
605 | op->base = UD_NONE; |
606 | } |
607 | if (mod == 1) { |
608 | offset = 8; |
609 | } else { |
610 | offset = 32; |
611 | } |
612 | } |
613 | } else { |
614 | op->scale = UD_NONE; |
615 | op->index = UD_NONE; |
616 | } |
617 | } else if (u->adr_mode == 32) { |
618 | op->base = UD_R_EAX + rm; |
619 | if (mod == 1) { |
620 | offset = 8; |
621 | } else if (mod == 2) { |
622 | offset = 32; |
623 | } else if (mod == 0 && rm == 5) { |
624 | op->base = UD_NONE; |
625 | offset = 32; |
626 | } else { |
627 | offset = 0; |
628 | } |
629 | |
630 | /* Scale-Index-Base (SIB) */ |
631 | if ((rm & 7) == 4) { |
632 | inp_next(u); |
633 | |
634 | op->scale = (1 << SIB_S(inp_curr(u))) & ~1; |
635 | op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); |
636 | op->base = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); |
637 | |
638 | if (op->index == UD_R_ESP) { |
639 | op->index = UD_NONE; |
640 | op->scale = UD_NONE; |
641 | } |
642 | |
643 | /* special condition for base reference */ |
644 | if (op->base == UD_R_EBP) { |
645 | if (mod == 0) { |
646 | op->base = UD_NONE; |
647 | } |
648 | if (mod == 1) { |
649 | offset = 8; |
650 | } else { |
651 | offset = 32; |
652 | } |
653 | } |
654 | } else { |
655 | op->scale = UD_NONE; |
656 | op->index = UD_NONE; |
657 | } |
658 | } else { |
659 | const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP, |
660 | UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX }; |
661 | const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI, |
662 | UD_NONE, UD_NONE, UD_NONE, UD_NONE }; |
663 | op->base = bases[rm & 7]; |
664 | op->index = indices[rm & 7]; |
665 | op->scale = UD_NONE; |
666 | if (mod == 0 && rm == 6) { |
667 | offset = 16; |
668 | op->base = UD_NONE; |
669 | } else if (mod == 1) { |
670 | offset = 8; |
671 | } else if (mod == 2) { |
672 | offset = 16; |
673 | } |
674 | } |
675 | |
676 | if (offset) { |
677 | decode_mem_disp(u, offset, op); |
678 | } else { |
679 | op->offset = 0; |
680 | } |
681 | } |
682 | |
683 | |
684 | /* |
685 | * decode_moffset |
686 | * Decode offset-only memory operand |
687 | */ |
688 | static void |
689 | decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr) |
690 | { |
691 | opr->type = UD_OP_MEM; |
692 | opr->base = UD_NONE; |
693 | opr->index = UD_NONE; |
694 | opr->scale = UD_NONE; |
695 | opr->size = resolve_operand_size(u, size); |
696 | decode_mem_disp(u, u->adr_mode, opr); |
697 | } |
698 | |
699 | |
700 | static void |
701 | decode_vex_vvvv(struct ud *u, struct ud_operand *opr, unsigned size) |
702 | { |
703 | uint8_t vvvv; |
704 | UD_ASSERT(u->vex_op != 0); |
705 | vvvv = ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 3) & 0xf; |
706 | decode_reg(u, opr, REGCLASS_XMM, (0xf & ~vvvv), size); |
707 | } |
708 | |
709 | |
710 | /* |
711 | * decode_vex_immreg |
712 | * Decode source operand encoded in immediate byte [7:4] |
713 | */ |
714 | static int |
715 | decode_vex_immreg(struct ud *u, struct ud_operand *opr, unsigned size) |
716 | { |
717 | uint8_t imm = inp_next(u); |
718 | uint8_t mask = u->dis_mode == 64 ? 0xf : 0x7; |
719 | UD_RETURN_ON_ERROR(u); |
720 | UD_ASSERT(u->vex_op != 0); |
721 | decode_reg(u, opr, REGCLASS_XMM, mask & (imm >> 4), size); |
722 | return 0; |
723 | } |
724 | |
725 | |
726 | /* |
727 | * decode_operand |
728 | * |
729 | * Decodes a single operand. |
730 | * Returns the type of the operand (UD_NONE if none) |
731 | */ |
732 | static int |
733 | decode_operand(struct ud *u, |
734 | struct ud_operand *operand, |
735 | enum ud_operand_code type, |
736 | unsigned int size) |
737 | { |
738 | operand->type = UD_NONE; |
739 | operand->_oprcode = type; |
740 | |
741 | switch (type) { |
742 | case OP_A : |
743 | decode_a(u, operand); |
744 | break; |
745 | case OP_MR: |
746 | decode_modrm_rm(u, operand, REGCLASS_GPR, |
747 | MODRM_MOD(modrm(u)) == 3 ? |
748 | Mx_reg_size(size) : Mx_mem_size(size)); |
749 | break; |
750 | case OP_F: |
751 | u->br_far = 1; |
752 | FALLTHROUGH; |
753 | case OP_M: |
754 | if (MODRM_MOD(modrm(u)) == 3) { |
755 | UDERR(u, "expected modrm.mod != 3\n" ); |
756 | } |
757 | FALLTHROUGH; |
758 | case OP_E: |
759 | decode_modrm_rm(u, operand, REGCLASS_GPR, size); |
760 | break; |
761 | case OP_G: |
762 | decode_modrm_reg(u, operand, REGCLASS_GPR, size); |
763 | break; |
764 | case OP_sI: |
765 | case OP_I: |
766 | decode_imm(u, size, operand); |
767 | break; |
768 | case OP_I1: |
769 | operand->type = UD_OP_CONST; |
770 | operand->lval.udword = 1; |
771 | break; |
772 | case OP_N: |
773 | if (MODRM_MOD(modrm(u)) != 3) { |
774 | UDERR(u, "expected modrm.mod == 3\n" ); |
775 | } |
776 | FALLTHROUGH; |
777 | case OP_Q: |
778 | decode_modrm_rm(u, operand, REGCLASS_MMX, size); |
779 | break; |
780 | case OP_P: |
781 | decode_modrm_reg(u, operand, REGCLASS_MMX, size); |
782 | break; |
783 | case OP_U: |
784 | if (MODRM_MOD(modrm(u)) != 3) { |
785 | UDERR(u, "expected modrm.mod == 3\n" ); |
786 | } |
787 | FALLTHROUGH; |
788 | case OP_W: |
789 | decode_modrm_rm(u, operand, REGCLASS_XMM, size); |
790 | break; |
791 | case OP_V: |
792 | decode_modrm_reg(u, operand, REGCLASS_XMM, size); |
793 | break; |
794 | case OP_H: |
795 | decode_vex_vvvv(u, operand, size); |
796 | break; |
797 | case OP_MU: |
798 | decode_modrm_rm(u, operand, REGCLASS_XMM, |
799 | MODRM_MOD(modrm(u)) == 3 ? |
800 | Mx_reg_size(size) : Mx_mem_size(size)); |
801 | break; |
802 | case OP_S: |
803 | decode_modrm_reg(u, operand, REGCLASS_SEG, size); |
804 | break; |
805 | case OP_O: |
806 | decode_moffset(u, size, operand); |
807 | break; |
808 | case OP_R0: |
809 | case OP_R1: |
810 | case OP_R2: |
811 | case OP_R3: |
812 | case OP_R4: |
813 | case OP_R5: |
814 | case OP_R6: |
815 | case OP_R7: |
816 | decode_reg(u, operand, REGCLASS_GPR, |
817 | (REX_B(u->_rex) << 3) | (type - OP_R0), size); |
818 | break; |
819 | case OP_AL: |
820 | case OP_AX: |
821 | case OP_eAX: |
822 | case OP_rAX: |
823 | decode_reg(u, operand, REGCLASS_GPR, 0, size); |
824 | break; |
825 | case OP_CL: |
826 | case OP_CX: |
827 | case OP_eCX: |
828 | decode_reg(u, operand, REGCLASS_GPR, 1, size); |
829 | break; |
830 | case OP_DL: |
831 | case OP_DX: |
832 | case OP_eDX: |
833 | decode_reg(u, operand, REGCLASS_GPR, 2, size); |
834 | break; |
835 | case OP_ES: |
836 | case OP_CS: |
837 | case OP_DS: |
838 | case OP_SS: |
839 | case OP_FS: |
840 | case OP_GS: |
841 | /* in 64bits mode, only fs and gs are allowed */ |
842 | if (u->dis_mode == 64) { |
843 | if (type != OP_FS && type != OP_GS) { |
844 | UDERR(u, "invalid segment register in 64bits\n" ); |
845 | } |
846 | } |
847 | operand->type = UD_OP_REG; |
848 | operand->base = (type - OP_ES) + UD_R_ES; |
849 | operand->size = 16; |
850 | break; |
851 | case OP_J : |
852 | decode_imm(u, size, operand); |
853 | operand->type = UD_OP_JIMM; |
854 | break ; |
855 | case OP_R : |
856 | if (MODRM_MOD(modrm(u)) != 3) { |
857 | UDERR(u, "expected modrm.mod == 3\n" ); |
858 | } |
859 | decode_modrm_rm(u, operand, REGCLASS_GPR, size); |
860 | break; |
861 | case OP_C: |
862 | decode_modrm_reg(u, operand, REGCLASS_CR, size); |
863 | break; |
864 | case OP_D: |
865 | decode_modrm_reg(u, operand, REGCLASS_DB, size); |
866 | break; |
867 | case OP_I3 : |
868 | operand->type = UD_OP_CONST; |
869 | operand->lval.sbyte = 3; |
870 | break; |
871 | case OP_ST0: |
872 | case OP_ST1: |
873 | case OP_ST2: |
874 | case OP_ST3: |
875 | case OP_ST4: |
876 | case OP_ST5: |
877 | case OP_ST6: |
878 | case OP_ST7: |
879 | operand->type = UD_OP_REG; |
880 | operand->base = (type - OP_ST0) + UD_R_ST0; |
881 | operand->size = 80; |
882 | break; |
883 | case OP_L: |
884 | decode_vex_immreg(u, operand, size); |
885 | break; |
886 | default : |
887 | operand->type = UD_NONE; |
888 | break; |
889 | } |
890 | return operand->type; |
891 | } |
892 | |
893 | |
894 | /* |
895 | * decode_operands |
896 | * |
897 | * Disassemble upto 3 operands of the current instruction being |
898 | * disassembled. By the end of the function, the operand fields |
899 | * of the ud structure will have been filled. |
900 | */ |
901 | static int |
902 | decode_operands(struct ud* u) |
903 | { |
904 | decode_operand(u, &u->operand[0], |
905 | u->itab_entry->operand1.type, |
906 | u->itab_entry->operand1.size); |
907 | if (u->operand[0].type != UD_NONE) { |
908 | decode_operand(u, &u->operand[1], |
909 | u->itab_entry->operand2.type, |
910 | u->itab_entry->operand2.size); |
911 | } |
912 | if (u->operand[1].type != UD_NONE) { |
913 | decode_operand(u, &u->operand[2], |
914 | u->itab_entry->operand3.type, |
915 | u->itab_entry->operand3.size); |
916 | } |
917 | if (u->operand[2].type != UD_NONE) { |
918 | decode_operand(u, &u->operand[3], |
919 | u->itab_entry->operand4.type, |
920 | u->itab_entry->operand4.size); |
921 | } |
922 | return 0; |
923 | } |
924 | |
925 | /* ----------------------------------------------------------------------------- |
926 | * clear_insn() - clear instruction structure |
927 | * ----------------------------------------------------------------------------- |
928 | */ |
929 | static void |
930 | clear_insn(register struct ud* u) |
931 | { |
932 | u->error = 0; |
933 | u->pfx_seg = 0; |
934 | u->pfx_opr = 0; |
935 | u->pfx_adr = 0; |
936 | u->pfx_lock = 0; |
937 | u->pfx_repne = 0; |
938 | u->pfx_rep = 0; |
939 | u->pfx_repe = 0; |
940 | u->pfx_rex = 0; |
941 | u->pfx_str = 0; |
942 | u->mnemonic = UD_Inone; |
943 | u->itab_entry = NULL; |
944 | u->have_modrm = 0; |
945 | u->br_far = 0; |
946 | u->vex_op = 0; |
947 | u->_rex = 0; |
948 | u->operand[0].type = UD_NONE; |
949 | u->operand[1].type = UD_NONE; |
950 | u->operand[2].type = UD_NONE; |
951 | u->operand[3].type = UD_NONE; |
952 | } |
953 | |
954 | |
955 | static UD_INLINE int |
956 | resolve_pfx_str(struct ud* u) |
957 | { |
958 | if (u->pfx_str == 0xf3) { |
959 | if (P_STR(u->itab_entry->prefix)) { |
960 | u->pfx_rep = 0xf3; |
961 | } else { |
962 | u->pfx_repe = 0xf3; |
963 | } |
964 | } else if (u->pfx_str == 0xf2) { |
965 | u->pfx_repne = 0xf3; |
966 | } |
967 | return 0; |
968 | } |
969 | |
970 | |
971 | static int |
972 | resolve_mode( struct ud* u ) |
973 | { |
974 | int default64; |
975 | /* if in error state, bail out */ |
976 | if ( u->error ) return -1; |
977 | |
978 | /* propagate prefix effects */ |
979 | if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */ |
980 | |
981 | /* Check validity of instruction m64 */ |
982 | if ( P_INV64( u->itab_entry->prefix ) ) { |
983 | UDERR(u, "instruction invalid in 64bits\n" ); |
984 | return -1; |
985 | } |
986 | |
987 | /* compute effective rex based on, |
988 | * - vex prefix (if any) |
989 | * - rex prefix (if any, and not vex) |
990 | * - allowed prefixes specified by the opcode map |
991 | */ |
992 | if (u->vex_op == 0xc4) { |
993 | /* vex has rex.rxb in 1's complement */ |
994 | u->_rex = ((~(u->vex_b1 >> 5) & 0x7) /* rex.0rxb */ | |
995 | ((u->vex_b2 >> 4) & 0x8) /* rex.w000 */); |
996 | } else if (u->vex_op == 0xc5) { |
997 | /* vex has rex.r in 1's complement */ |
998 | u->_rex = (~(u->vex_b1 >> 5)) & 4; |
999 | } else { |
1000 | UD_ASSERT(u->vex_op == 0); |
1001 | u->_rex = u->pfx_rex; |
1002 | } |
1003 | u->_rex &= REX_PFX_MASK(u->itab_entry->prefix); |
1004 | |
1005 | /* whether this instruction has a default operand size of |
1006 | * 64bit, also hardcoded into the opcode map. |
1007 | */ |
1008 | default64 = P_DEF64( u->itab_entry->prefix ); |
1009 | /* calculate effective operand size */ |
1010 | if (REX_W(u->_rex)) { |
1011 | u->opr_mode = 64; |
1012 | } else if ( u->pfx_opr ) { |
1013 | u->opr_mode = 16; |
1014 | } else { |
1015 | /* unless the default opr size of instruction is 64, |
1016 | * the effective operand size in the absence of rex.w |
1017 | * prefix is 32. |
1018 | */ |
1019 | u->opr_mode = default64 ? 64 : 32; |
1020 | } |
1021 | |
1022 | /* calculate effective address size */ |
1023 | u->adr_mode = (u->pfx_adr) ? 32 : 64; |
1024 | } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */ |
1025 | u->opr_mode = ( u->pfx_opr ) ? 16 : 32; |
1026 | u->adr_mode = ( u->pfx_adr ) ? 16 : 32; |
1027 | } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */ |
1028 | u->opr_mode = ( u->pfx_opr ) ? 32 : 16; |
1029 | u->adr_mode = ( u->pfx_adr ) ? 32 : 16; |
1030 | } |
1031 | |
1032 | return 0; |
1033 | } |
1034 | |
1035 | |
1036 | static UD_INLINE int |
1037 | decode_insn(struct ud *u, uint16_t ptr) |
1038 | { |
1039 | UD_ASSERT((ptr & 0x8000) == 0); |
1040 | u->itab_entry = &ud_itab[ ptr ]; |
1041 | u->mnemonic = u->itab_entry->mnemonic; |
1042 | return (resolve_pfx_str(u) == 0 && |
1043 | resolve_mode(u) == 0 && |
1044 | decode_operands(u) == 0 && |
1045 | resolve_mnemonic(u) == 0) ? 0 : -1; |
1046 | } |
1047 | |
1048 | |
1049 | /* |
1050 | * decode_3dnow() |
1051 | * |
1052 | * Decoding 3dnow is a little tricky because of its strange opcode |
1053 | * structure. The final opcode disambiguation depends on the last |
1054 | * byte that comes after the operands have been decoded. Fortunately, |
1055 | * all 3dnow instructions have the same set of operand types. So we |
1056 | * go ahead and decode the instruction by picking an arbitrarily chosen |
1057 | * valid entry in the table, decode the operands, and read the final |
1058 | * byte to resolve the menmonic. |
1059 | */ |
1060 | static UD_INLINE int |
1061 | decode_3dnow(struct ud* u) |
1062 | { |
1063 | uint16_t ptr; |
1064 | UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW); |
1065 | UD_ASSERT(u->le->table[0xc] != 0); |
1066 | decode_insn(u, u->le->table[0xc]); |
1067 | inp_next(u); |
1068 | if (u->error) { |
1069 | return -1; |
1070 | } |
1071 | ptr = u->le->table[inp_curr(u)]; |
1072 | UD_ASSERT((ptr & 0x8000) == 0); |
1073 | u->mnemonic = ud_itab[ptr].mnemonic; |
1074 | return 0; |
1075 | } |
1076 | |
1077 | |
1078 | static int |
1079 | decode_ssepfx(struct ud *u) |
1080 | { |
1081 | uint8_t idx; |
1082 | uint8_t pfx; |
1083 | |
1084 | /* |
1085 | * String prefixes (f2, f3) take precedence over operand |
1086 | * size prefix (66). |
1087 | */ |
1088 | pfx = u->pfx_str; |
1089 | if (pfx == 0) { |
1090 | pfx = u->pfx_opr; |
1091 | } |
1092 | idx = ((pfx & 0xf) + 1) / 2; |
1093 | if (u->le->table[idx] == 0) { |
1094 | idx = 0; |
1095 | } |
1096 | if (idx && u->le->table[idx] != 0) { |
1097 | /* |
1098 | * "Consume" the prefix as a part of the opcode, so it is no |
1099 | * longer exported as an instruction prefix. |
1100 | */ |
1101 | u->pfx_str = 0; |
1102 | if (pfx == 0x66) { |
1103 | /* |
1104 | * consume "66" only if it was used for decoding, leaving |
1105 | * it to be used as an operands size override for some |
1106 | * simd instructions. |
1107 | */ |
1108 | u->pfx_opr = 0; |
1109 | } |
1110 | } |
1111 | return decode_ext(u, u->le->table[idx]); |
1112 | } |
1113 | |
1114 | |
1115 | static int |
1116 | decode_vex(struct ud *u) |
1117 | { |
1118 | uint8_t index; |
1119 | if (u->dis_mode != 64 && MODRM_MOD(inp_peek(u)) != 0x3) { |
1120 | index = 0; |
1121 | } else { |
1122 | u->vex_op = inp_curr(u); |
1123 | u->vex_b1 = inp_next(u); |
1124 | if (u->vex_op == 0xc4) { |
1125 | uint8_t pp, m; |
1126 | /* 3-byte vex */ |
1127 | u->vex_b2 = inp_next(u); |
1128 | UD_RETURN_ON_ERROR(u); |
1129 | m = u->vex_b1 & 0x1f; |
1130 | if (m == 0 || m > 3) { |
1131 | UD_RETURN_WITH_ERROR(u, "reserved vex.m-mmmm value" ); |
1132 | } |
1133 | pp = u->vex_b2 & 0x3; |
1134 | index = (pp << 2) | m; |
1135 | } else { |
1136 | /* 2-byte vex */ |
1137 | UD_ASSERT(u->vex_op == 0xc5); |
1138 | index = 0x1 | ((u->vex_b1 & 0x3) << 2); |
1139 | } |
1140 | } |
1141 | return decode_ext(u, u->le->table[index]); |
1142 | } |
1143 | |
1144 | |
1145 | /* |
1146 | * decode_ext() |
1147 | * |
1148 | * Decode opcode extensions (if any) |
1149 | */ |
1150 | static int |
1151 | decode_ext(struct ud *u, uint16_t ptr) |
1152 | { |
1153 | uint8_t idx = 0; |
1154 | if ((ptr & 0x8000) == 0) { |
1155 | return decode_insn(u, ptr); |
1156 | } |
1157 | u->le = &ud_lookup_table_list[(~0x8000 & ptr)]; |
1158 | if (u->le->type == UD_TAB__OPC_3DNOW) { |
1159 | return decode_3dnow(u); |
1160 | } |
1161 | |
1162 | switch (u->le->type) { |
1163 | case UD_TAB__OPC_MOD: |
1164 | /* !11 = 0, 11 = 1 */ |
1165 | idx = (MODRM_MOD(modrm(u)) + 1) / 4; |
1166 | break; |
1167 | /* disassembly mode/operand size/address size based tables. |
1168 | * 16 = 0,, 32 = 1, 64 = 2 |
1169 | */ |
1170 | case UD_TAB__OPC_MODE: |
1171 | idx = u->dis_mode != 64 ? 0 : 1; |
1172 | break; |
1173 | case UD_TAB__OPC_OSIZE: |
1174 | idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32; |
1175 | break; |
1176 | case UD_TAB__OPC_ASIZE: |
1177 | idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32; |
1178 | break; |
1179 | case UD_TAB__OPC_X87: |
1180 | idx = modrm(u) - 0xC0; |
1181 | break; |
1182 | case UD_TAB__OPC_VENDOR: |
1183 | if (u->vendor == UD_VENDOR_ANY) { |
1184 | /* choose a valid entry */ |
1185 | idx = (u->le->table[idx] != 0) ? 0 : 1; |
1186 | } else if (u->vendor == UD_VENDOR_AMD) { |
1187 | idx = 0; |
1188 | } else { |
1189 | idx = 1; |
1190 | } |
1191 | break; |
1192 | case UD_TAB__OPC_RM: |
1193 | idx = MODRM_RM(modrm(u)); |
1194 | break; |
1195 | case UD_TAB__OPC_REG: |
1196 | idx = MODRM_REG(modrm(u)); |
1197 | break; |
1198 | case UD_TAB__OPC_SSE: |
1199 | return decode_ssepfx(u); |
1200 | case UD_TAB__OPC_VEX: |
1201 | return decode_vex(u); |
1202 | case UD_TAB__OPC_VEX_W: |
1203 | idx = vex_w(u); |
1204 | break; |
1205 | case UD_TAB__OPC_VEX_L: |
1206 | idx = vex_l(u); |
1207 | break; |
1208 | case UD_TAB__OPC_TABLE: |
1209 | inp_next(u); |
1210 | return decode_opcode(u); |
1211 | default: |
1212 | UD_ASSERT(!"not reached" ); |
1213 | break; |
1214 | } |
1215 | |
1216 | return decode_ext(u, u->le->table[idx]); |
1217 | } |
1218 | |
1219 | |
1220 | static int |
1221 | decode_opcode(struct ud *u) |
1222 | { |
1223 | uint16_t ptr; |
1224 | UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE); |
1225 | UD_RETURN_ON_ERROR(u); |
1226 | ptr = u->le->table[inp_curr(u)]; |
1227 | return decode_ext(u, ptr); |
1228 | } |
1229 | |
1230 | |
1231 | /* ============================================================================= |
1232 | * ud_decode() - Instruction decoder. Returns the number of bytes decoded. |
1233 | * ============================================================================= |
1234 | */ |
1235 | unsigned int |
1236 | ud_decode(struct ud *u) |
1237 | { |
1238 | inp_start(u); |
1239 | clear_insn(u); |
1240 | u->le = &ud_lookup_table_list[0]; |
1241 | u->error = decode_prefixes(u) == -1 || |
1242 | decode_opcode(u) == -1 || |
1243 | u->error; |
1244 | /* Handle decode error. */ |
1245 | if (u->error) { |
1246 | /* clear out the decode data. */ |
1247 | clear_insn(u); |
1248 | /* mark the sequence of bytes as invalid. */ |
1249 | u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */ |
1250 | u->mnemonic = u->itab_entry->mnemonic; |
1251 | } |
1252 | |
1253 | /* maybe this stray segment override byte |
1254 | * should be spewed out? |
1255 | */ |
1256 | if ( !P_SEG( u->itab_entry->prefix ) && |
1257 | u->operand[0].type != UD_OP_MEM && |
1258 | u->operand[1].type != UD_OP_MEM ) |
1259 | u->pfx_seg = 0; |
1260 | |
1261 | u->insn_offset = u->pc; /* set offset of instruction */ |
1262 | u->asm_buf_fill = 0; /* set translation buffer index to 0 */ |
1263 | u->pc += u->inp_ctr; /* move program counter by bytes decoded */ |
1264 | |
1265 | /* return number of bytes disassembled. */ |
1266 | return u->inp_ctr; |
1267 | } |
1268 | |
1269 | #endif // USE(UDIS86) |
1270 | |
1271 | /* |
1272 | vim: set ts=2 sw=2 expandtab |
1273 | */ |
1274 | |