1/* udis86 - libudis86/decode.c
2 *
3 * Copyright (c) 2002-2009 Vivek Thampi
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28
29#if USE(UDIS86)
30
31#include "udis86_udint.h"
32#include "udis86_types.h"
33#include "udis86_extern.h"
34#include "udis86_decode.h"
35
36#ifndef __UD_STANDALONE__
37# include <string.h>
38#endif /* __UD_STANDALONE__ */
39
40/* The max number of prefixes to an instruction */
41#define MAX_PREFIXES 15
42
43/* rex prefix bits */
44#define REX_W(r) ( ( 0xF & ( r ) ) >> 3 )
45#define REX_R(r) ( ( 0x7 & ( r ) ) >> 2 )
46#define REX_X(r) ( ( 0x3 & ( r ) ) >> 1 )
47#define REX_B(r) ( ( 0x1 & ( r ) ) >> 0 )
48#define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \
49 ( P_REXR(n) << 2 ) | \
50 ( P_REXX(n) << 1 ) | \
51 ( P_REXB(n) << 0 ) )
52
53/* scable-index-base bits */
54#define SIB_S(b) ( ( b ) >> 6 )
55#define SIB_I(b) ( ( ( b ) >> 3 ) & 7 )
56#define SIB_B(b) ( ( b ) & 7 )
57
58/* modrm bits */
59#define MODRM_REG(b) ( ( ( b ) >> 3 ) & 7 )
60#define MODRM_NNN(b) ( ( ( b ) >> 3 ) & 7 )
61#define MODRM_MOD(b) ( ( ( b ) >> 6 ) & 3 )
62#define MODRM_RM(b) ( ( b ) & 7 )
63
64static int decode_ext(struct ud *u, uint16_t ptr);
65static int decode_opcode(struct ud *u);
66
67enum reg_class { /* register classes */
68 REGCLASS_GPR,
69 REGCLASS_MMX,
70 REGCLASS_CR,
71 REGCLASS_DB,
72 REGCLASS_SEG,
73 REGCLASS_XMM
74};
75
76 /*
77 * inp_start
78 * Should be called before each de-code operation.
79 */
80static void
81inp_start(struct ud *u)
82{
83 u->inp_ctr = 0;
84}
85
86static uint8_t
87inp_peek(struct ud *u)
88{
89 if (u->inp_end == 0) {
90 if (u->inp_buf != NULL) {
91 if (u->inp_buf_index < u->inp_buf_size) {
92 return u->inp_buf[u->inp_buf_index];
93 }
94 } else if (u->inp_peek != UD_EOI) {
95 return u->inp_peek;
96 } else {
97 int c;
98 if ((c = u->inp_hook(u)) != UD_EOI) {
99 u->inp_peek = c;
100 return u->inp_peek;
101 }
102 }
103 }
104 u->inp_end = 1;
105 UDERR(u, "byte expected, eoi received\n");
106 return 0;
107}
108
109static uint8_t
110inp_next(struct ud *u)
111{
112 if (u->inp_end == 0) {
113 if (u->inp_buf != NULL) {
114 if (u->inp_buf_index < u->inp_buf_size) {
115 u->inp_ctr++;
116 return (u->inp_curr = u->inp_buf[u->inp_buf_index++]);
117 }
118 } else {
119 int c = u->inp_peek;
120 if (c != UD_EOI || (c = u->inp_hook(u)) != UD_EOI) {
121 u->inp_peek = UD_EOI;
122 u->inp_curr = c;
123 u->inp_sess[u->inp_ctr++] = u->inp_curr;
124 return u->inp_curr;
125 }
126 }
127 }
128 u->inp_end = 1;
129 UDERR(u, "byte expected, eoi received\n");
130 return 0;
131}
132
133static uint8_t
134inp_curr(struct ud *u)
135{
136 return u->inp_curr;
137}
138
139
140/*
141 * inp_uint8
142 * int_uint16
143 * int_uint32
144 * int_uint64
145 * Load little-endian values from input
146 */
147static uint8_t
148inp_uint8(struct ud* u)
149{
150 return inp_next(u);
151}
152
153static uint16_t
154inp_uint16(struct ud* u)
155{
156 uint16_t r, ret;
157
158 ret = inp_next(u);
159 r = inp_next(u);
160 return ret | (r << 8);
161}
162
163static uint32_t
164inp_uint32(struct ud* u)
165{
166 uint32_t r, ret;
167
168 ret = inp_next(u);
169 r = inp_next(u);
170 ret = ret | (r << 8);
171 r = inp_next(u);
172 ret = ret | (r << 16);
173 r = inp_next(u);
174 return ret | (r << 24);
175}
176
177static uint64_t
178inp_uint64(struct ud* u)
179{
180 uint64_t r, ret;
181
182 ret = inp_next(u);
183 r = inp_next(u);
184 ret = ret | (r << 8);
185 r = inp_next(u);
186 ret = ret | (r << 16);
187 r = inp_next(u);
188 ret = ret | (r << 24);
189 r = inp_next(u);
190 ret = ret | (r << 32);
191 r = inp_next(u);
192 ret = ret | (r << 40);
193 r = inp_next(u);
194 ret = ret | (r << 48);
195 r = inp_next(u);
196 return ret | (r << 56);
197}
198
199
200static UD_INLINE int
201eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
202{
203 if (dis_mode == 64) {
204 return rex_w ? 64 : (pfx_opr ? 16 : 32);
205 } else if (dis_mode == 32) {
206 return pfx_opr ? 16 : 32;
207 } else {
208 UD_ASSERT(dis_mode == 16);
209 return pfx_opr ? 32 : 16;
210 }
211}
212
213
214static UD_INLINE int
215eff_adr_mode(int dis_mode, int pfx_adr)
216{
217 if (dis_mode == 64) {
218 return pfx_adr ? 32 : 64;
219 } else if (dis_mode == 32) {
220 return pfx_adr ? 16 : 32;
221 } else {
222 UD_ASSERT(dis_mode == 16);
223 return pfx_adr ? 32 : 16;
224 }
225}
226
227
228/*
229 * decode_prefixes
230 *
231 * Extracts instruction prefixes.
232 */
233static int
234decode_prefixes(struct ud *u)
235{
236 int done = 0;
237 uint8_t curr = 0, last = 0;
238 UD_RETURN_ON_ERROR(u);
239
240 do {
241 last = curr;
242 curr = inp_next(u);
243 UD_RETURN_ON_ERROR(u);
244 if (u->inp_ctr == MAX_INSN_LENGTH) {
245 UD_RETURN_WITH_ERROR(u, "max instruction length");
246 }
247
248 switch (curr)
249 {
250 case 0x2E:
251 u->pfx_seg = UD_R_CS;
252 break;
253 case 0x36:
254 u->pfx_seg = UD_R_SS;
255 break;
256 case 0x3E:
257 u->pfx_seg = UD_R_DS;
258 break;
259 case 0x26:
260 u->pfx_seg = UD_R_ES;
261 break;
262 case 0x64:
263 u->pfx_seg = UD_R_FS;
264 break;
265 case 0x65:
266 u->pfx_seg = UD_R_GS;
267 break;
268 case 0x67: /* adress-size override prefix */
269 u->pfx_adr = 0x67;
270 break;
271 case 0xF0:
272 u->pfx_lock = 0xF0;
273 break;
274 case 0x66:
275 u->pfx_opr = 0x66;
276 break;
277 case 0xF2:
278 u->pfx_str = 0xf2;
279 break;
280 case 0xF3:
281 u->pfx_str = 0xf3;
282 break;
283 default:
284 /* consume if rex */
285 done = (u->dis_mode == 64 && (curr & 0xF0) == 0x40) ? 0 : 1;
286 break;
287 }
288 } while (!done);
289 /* rex prefixes in 64bit mode, must be the last prefix */
290 if (u->dis_mode == 64 && (last & 0xF0) == 0x40) {
291 u->pfx_rex = last;
292 }
293 return 0;
294}
295
296
297/*
298 * vex_l, vex_w
299 * Return the vex.L and vex.W bits
300 */
301static UD_INLINE uint8_t
302vex_l(const struct ud *u)
303{
304 UD_ASSERT(u->vex_op != 0);
305 return ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 2) & 1;
306}
307
308static UD_INLINE uint8_t
309vex_w(const struct ud *u)
310{
311 UD_ASSERT(u->vex_op != 0);
312 return u->vex_op == 0xc4 ? ((u->vex_b2 >> 7) & 1) : 0;
313}
314
315
316static UD_INLINE uint8_t
317modrm(struct ud * u)
318{
319 if ( !u->have_modrm ) {
320 u->modrm = inp_next( u );
321 u->modrm_offset = (uint8_t) (u->inp_ctr - 1);
322 u->have_modrm = 1;
323 }
324 return u->modrm;
325}
326
327
328static unsigned int
329resolve_operand_size(const struct ud* u, ud_operand_size_t osize)
330{
331 switch (osize) {
332 case SZ_V:
333 return u->opr_mode;
334 case SZ_Z:
335 return u->opr_mode == 16 ? 16 : 32;
336 case SZ_Y:
337 return u->opr_mode == 16 ? 32 : u->opr_mode;
338 case SZ_RDQ:
339 return u->dis_mode == 64 ? 64 : 32;
340 case SZ_X:
341 UD_ASSERT(u->vex_op != 0);
342 return (P_VEXL(u->itab_entry->prefix) && vex_l(u)) ? SZ_QQ : SZ_DQ;
343 default:
344 return osize;
345 }
346}
347
348
349static int resolve_mnemonic( struct ud* u )
350{
351 /* resolve 3dnow weirdness. */
352 if ( u->mnemonic == UD_I3dnow ) {
353 u->mnemonic = ud_itab[ u->le->table[ inp_curr( u ) ] ].mnemonic;
354 }
355 /* SWAPGS is only valid in 64bits mode */
356 if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
357 UDERR(u, "swapgs invalid in 64bits mode\n");
358 return -1;
359 }
360
361 if (u->mnemonic == UD_Ixchg) {
362 if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX &&
363 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
364 (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
365 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
366 u->operand[0].type = UD_NONE;
367 u->operand[1].type = UD_NONE;
368 u->mnemonic = UD_Inop;
369 }
370 }
371
372 if (u->mnemonic == UD_Inop && u->pfx_repe) {
373 u->pfx_repe = 0;
374 u->mnemonic = UD_Ipause;
375 }
376 return 0;
377}
378
379
380/* -----------------------------------------------------------------------------
381 * decode_a()- Decodes operands of the type seg:offset
382 * -----------------------------------------------------------------------------
383 */
384static void
385decode_a(struct ud* u, struct ud_operand *op)
386{
387 if (u->opr_mode == 16) {
388 /* seg16:off16 */
389 op->type = UD_OP_PTR;
390 op->size = 32;
391 op->lval.ptr.off = inp_uint16(u);
392 op->lval.ptr.seg = inp_uint16(u);
393 } else {
394 /* seg16:off32 */
395 op->type = UD_OP_PTR;
396 op->size = 48;
397 op->lval.ptr.off = inp_uint32(u);
398 op->lval.ptr.seg = inp_uint16(u);
399 }
400}
401
402/* -----------------------------------------------------------------------------
403 * decode_gpr() - Returns decoded General Purpose Register
404 * -----------------------------------------------------------------------------
405 */
406static enum ud_type
407decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
408{
409 switch (s) {
410 case 64:
411 return UD_R_RAX + rm;
412 case 32:
413 return UD_R_EAX + rm;
414 case 16:
415 return UD_R_AX + rm;
416 case 8:
417 if (u->dis_mode == 64 && u->pfx_rex) {
418 if (rm >= 4)
419 return UD_R_SPL + (rm-4);
420 return UD_R_AL + rm;
421 } else return UD_R_AL + rm;
422 case 0:
423 /* invalid size in case of a decode error */
424 UD_ASSERT(u->error);
425 return UD_NONE;
426 default:
427 UD_ASSERT(!"invalid operand size");
428 return UD_NONE;
429 }
430}
431
432static void
433decode_reg(struct ud *u,
434 struct ud_operand *opr,
435 int type,
436 int num,
437 int size)
438{
439 int reg;
440 size = resolve_operand_size(u, size);
441 switch (type) {
442 case REGCLASS_GPR : reg = decode_gpr(u, size, num); break;
443 case REGCLASS_MMX : reg = UD_R_MM0 + (num & 7); break;
444 case REGCLASS_XMM :
445 reg = num + (size == SZ_QQ ? UD_R_YMM0 : UD_R_XMM0);
446 break;
447 case REGCLASS_CR : reg = UD_R_CR0 + num; break;
448 case REGCLASS_DB : reg = UD_R_DR0 + num; break;
449 case REGCLASS_SEG : {
450 /*
451 * Only 6 segment registers, anything else is an error.
452 */
453 if ((num & 7) > 5) {
454 UDERR(u, "invalid segment register value\n");
455 return;
456 } else {
457 reg = UD_R_ES + (num & 7);
458 }
459 break;
460 }
461 default:
462 UD_ASSERT(!"invalid register type");
463 return;
464 }
465 opr->type = UD_OP_REG;
466 opr->base = reg;
467 opr->size = size;
468}
469
470
471/*
472 * decode_imm
473 *
474 * Decode Immediate values.
475 */
476static void
477decode_imm(struct ud* u, unsigned int size, struct ud_operand *op)
478{
479 op->size = resolve_operand_size(u, size);
480 op->type = UD_OP_IMM;
481
482 switch (op->size) {
483 case 8: op->lval.sbyte = inp_uint8(u); break;
484 case 16: op->lval.uword = inp_uint16(u); break;
485 case 32: op->lval.udword = inp_uint32(u); break;
486 case 64: op->lval.uqword = inp_uint64(u); break;
487 default: return;
488 }
489}
490
491
492/*
493 * decode_mem_disp
494 *
495 * Decode mem address displacement.
496 */
497static void
498decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op)
499{
500 switch (size) {
501 case 8:
502 op->offset = 8;
503 op->lval.ubyte = inp_uint8(u);
504 break;
505 case 16:
506 op->offset = 16;
507 op->lval.uword = inp_uint16(u);
508 break;
509 case 32:
510 op->offset = 32;
511 op->lval.udword = inp_uint32(u);
512 break;
513 case 64:
514 op->offset = 64;
515 op->lval.uqword = inp_uint64(u);
516 break;
517 default:
518 return;
519 }
520}
521
522
523/*
524 * decode_modrm_reg
525 *
526 * Decodes reg field of mod/rm byte
527 *
528 */
529static UD_INLINE void
530decode_modrm_reg(struct ud *u,
531 struct ud_operand *operand,
532 unsigned int type,
533 unsigned int size)
534{
535 uint8_t reg = (REX_R(u->_rex) << 3) | MODRM_REG(modrm(u));
536 decode_reg(u, operand, type, reg, size);
537}
538
539
540/*
541 * decode_modrm_rm
542 *
543 * Decodes rm field of mod/rm byte
544 *
545 */
546static void
547decode_modrm_rm(struct ud *u,
548 struct ud_operand *op,
549 unsigned char type, /* register type */
550 unsigned int size) /* operand size */
551
552{
553 size_t offset = 0;
554 unsigned char mod, rm;
555
556 /* get mod, r/m and reg fields */
557 mod = MODRM_MOD(modrm(u));
558 rm = (REX_B(u->_rex) << 3) | MODRM_RM(modrm(u));
559
560 /*
561 * If mod is 11b, then the modrm.rm specifies a register.
562 *
563 */
564 if (mod == 3) {
565 decode_reg(u, op, type, rm, size);
566 return;
567 }
568
569 /*
570 * !11b => Memory Address
571 */
572 op->type = UD_OP_MEM;
573 op->size = resolve_operand_size(u, size);
574
575 if (u->adr_mode == 64) {
576 op->base = UD_R_RAX + rm;
577 if (mod == 1) {
578 offset = 8;
579 } else if (mod == 2) {
580 offset = 32;
581 } else if (mod == 0 && (rm & 7) == 5) {
582 op->base = UD_R_RIP;
583 offset = 32;
584 } else {
585 offset = 0;
586 }
587 /*
588 * Scale-Index-Base (SIB)
589 */
590 if ((rm & 7) == 4) {
591 inp_next(u);
592
593 op->base = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->_rex) << 3));
594 op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->_rex) << 3));
595 /* special conditions for base reference */
596 if (op->index == UD_R_RSP) {
597 op->index = UD_NONE;
598 op->scale = UD_NONE;
599 } else {
600 op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
601 }
602
603 if (op->base == UD_R_RBP || op->base == UD_R_R13) {
604 if (mod == 0) {
605 op->base = UD_NONE;
606 }
607 if (mod == 1) {
608 offset = 8;
609 } else {
610 offset = 32;
611 }
612 }
613 } else {
614 op->scale = UD_NONE;
615 op->index = UD_NONE;
616 }
617 } else if (u->adr_mode == 32) {
618 op->base = UD_R_EAX + rm;
619 if (mod == 1) {
620 offset = 8;
621 } else if (mod == 2) {
622 offset = 32;
623 } else if (mod == 0 && rm == 5) {
624 op->base = UD_NONE;
625 offset = 32;
626 } else {
627 offset = 0;
628 }
629
630 /* Scale-Index-Base (SIB) */
631 if ((rm & 7) == 4) {
632 inp_next(u);
633
634 op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
635 op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
636 op->base = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
637
638 if (op->index == UD_R_ESP) {
639 op->index = UD_NONE;
640 op->scale = UD_NONE;
641 }
642
643 /* special condition for base reference */
644 if (op->base == UD_R_EBP) {
645 if (mod == 0) {
646 op->base = UD_NONE;
647 }
648 if (mod == 1) {
649 offset = 8;
650 } else {
651 offset = 32;
652 }
653 }
654 } else {
655 op->scale = UD_NONE;
656 op->index = UD_NONE;
657 }
658 } else {
659 const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
660 UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
661 const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
662 UD_NONE, UD_NONE, UD_NONE, UD_NONE };
663 op->base = bases[rm & 7];
664 op->index = indices[rm & 7];
665 op->scale = UD_NONE;
666 if (mod == 0 && rm == 6) {
667 offset = 16;
668 op->base = UD_NONE;
669 } else if (mod == 1) {
670 offset = 8;
671 } else if (mod == 2) {
672 offset = 16;
673 }
674 }
675
676 if (offset) {
677 decode_mem_disp(u, offset, op);
678 } else {
679 op->offset = 0;
680 }
681}
682
683
684/*
685 * decode_moffset
686 * Decode offset-only memory operand
687 */
688static void
689decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr)
690{
691 opr->type = UD_OP_MEM;
692 opr->base = UD_NONE;
693 opr->index = UD_NONE;
694 opr->scale = UD_NONE;
695 opr->size = resolve_operand_size(u, size);
696 decode_mem_disp(u, u->adr_mode, opr);
697}
698
699
700static void
701decode_vex_vvvv(struct ud *u, struct ud_operand *opr, unsigned size)
702{
703 uint8_t vvvv;
704 UD_ASSERT(u->vex_op != 0);
705 vvvv = ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 3) & 0xf;
706 decode_reg(u, opr, REGCLASS_XMM, (0xf & ~vvvv), size);
707}
708
709
710/*
711 * decode_vex_immreg
712 * Decode source operand encoded in immediate byte [7:4]
713 */
714static int
715decode_vex_immreg(struct ud *u, struct ud_operand *opr, unsigned size)
716{
717 uint8_t imm = inp_next(u);
718 uint8_t mask = u->dis_mode == 64 ? 0xf : 0x7;
719 UD_RETURN_ON_ERROR(u);
720 UD_ASSERT(u->vex_op != 0);
721 decode_reg(u, opr, REGCLASS_XMM, mask & (imm >> 4), size);
722 return 0;
723}
724
725
726/*
727 * decode_operand
728 *
729 * Decodes a single operand.
730 * Returns the type of the operand (UD_NONE if none)
731 */
732static int
733decode_operand(struct ud *u,
734 struct ud_operand *operand,
735 enum ud_operand_code type,
736 unsigned int size)
737{
738 operand->type = UD_NONE;
739 operand->_oprcode = type;
740
741 switch (type) {
742 case OP_A :
743 decode_a(u, operand);
744 break;
745 case OP_MR:
746 decode_modrm_rm(u, operand, REGCLASS_GPR,
747 MODRM_MOD(modrm(u)) == 3 ?
748 Mx_reg_size(size) : Mx_mem_size(size));
749 break;
750 case OP_F:
751 u->br_far = 1;
752 FALLTHROUGH;
753 case OP_M:
754 if (MODRM_MOD(modrm(u)) == 3) {
755 UDERR(u, "expected modrm.mod != 3\n");
756 }
757 FALLTHROUGH;
758 case OP_E:
759 decode_modrm_rm(u, operand, REGCLASS_GPR, size);
760 break;
761 case OP_G:
762 decode_modrm_reg(u, operand, REGCLASS_GPR, size);
763 break;
764 case OP_sI:
765 case OP_I:
766 decode_imm(u, size, operand);
767 break;
768 case OP_I1:
769 operand->type = UD_OP_CONST;
770 operand->lval.udword = 1;
771 break;
772 case OP_N:
773 if (MODRM_MOD(modrm(u)) != 3) {
774 UDERR(u, "expected modrm.mod == 3\n");
775 }
776 FALLTHROUGH;
777 case OP_Q:
778 decode_modrm_rm(u, operand, REGCLASS_MMX, size);
779 break;
780 case OP_P:
781 decode_modrm_reg(u, operand, REGCLASS_MMX, size);
782 break;
783 case OP_U:
784 if (MODRM_MOD(modrm(u)) != 3) {
785 UDERR(u, "expected modrm.mod == 3\n");
786 }
787 FALLTHROUGH;
788 case OP_W:
789 decode_modrm_rm(u, operand, REGCLASS_XMM, size);
790 break;
791 case OP_V:
792 decode_modrm_reg(u, operand, REGCLASS_XMM, size);
793 break;
794 case OP_H:
795 decode_vex_vvvv(u, operand, size);
796 break;
797 case OP_MU:
798 decode_modrm_rm(u, operand, REGCLASS_XMM,
799 MODRM_MOD(modrm(u)) == 3 ?
800 Mx_reg_size(size) : Mx_mem_size(size));
801 break;
802 case OP_S:
803 decode_modrm_reg(u, operand, REGCLASS_SEG, size);
804 break;
805 case OP_O:
806 decode_moffset(u, size, operand);
807 break;
808 case OP_R0:
809 case OP_R1:
810 case OP_R2:
811 case OP_R3:
812 case OP_R4:
813 case OP_R5:
814 case OP_R6:
815 case OP_R7:
816 decode_reg(u, operand, REGCLASS_GPR,
817 (REX_B(u->_rex) << 3) | (type - OP_R0), size);
818 break;
819 case OP_AL:
820 case OP_AX:
821 case OP_eAX:
822 case OP_rAX:
823 decode_reg(u, operand, REGCLASS_GPR, 0, size);
824 break;
825 case OP_CL:
826 case OP_CX:
827 case OP_eCX:
828 decode_reg(u, operand, REGCLASS_GPR, 1, size);
829 break;
830 case OP_DL:
831 case OP_DX:
832 case OP_eDX:
833 decode_reg(u, operand, REGCLASS_GPR, 2, size);
834 break;
835 case OP_ES:
836 case OP_CS:
837 case OP_DS:
838 case OP_SS:
839 case OP_FS:
840 case OP_GS:
841 /* in 64bits mode, only fs and gs are allowed */
842 if (u->dis_mode == 64) {
843 if (type != OP_FS && type != OP_GS) {
844 UDERR(u, "invalid segment register in 64bits\n");
845 }
846 }
847 operand->type = UD_OP_REG;
848 operand->base = (type - OP_ES) + UD_R_ES;
849 operand->size = 16;
850 break;
851 case OP_J :
852 decode_imm(u, size, operand);
853 operand->type = UD_OP_JIMM;
854 break ;
855 case OP_R :
856 if (MODRM_MOD(modrm(u)) != 3) {
857 UDERR(u, "expected modrm.mod == 3\n");
858 }
859 decode_modrm_rm(u, operand, REGCLASS_GPR, size);
860 break;
861 case OP_C:
862 decode_modrm_reg(u, operand, REGCLASS_CR, size);
863 break;
864 case OP_D:
865 decode_modrm_reg(u, operand, REGCLASS_DB, size);
866 break;
867 case OP_I3 :
868 operand->type = UD_OP_CONST;
869 operand->lval.sbyte = 3;
870 break;
871 case OP_ST0:
872 case OP_ST1:
873 case OP_ST2:
874 case OP_ST3:
875 case OP_ST4:
876 case OP_ST5:
877 case OP_ST6:
878 case OP_ST7:
879 operand->type = UD_OP_REG;
880 operand->base = (type - OP_ST0) + UD_R_ST0;
881 operand->size = 80;
882 break;
883 case OP_L:
884 decode_vex_immreg(u, operand, size);
885 break;
886 default :
887 operand->type = UD_NONE;
888 break;
889 }
890 return operand->type;
891}
892
893
894/*
895 * decode_operands
896 *
897 * Disassemble upto 3 operands of the current instruction being
898 * disassembled. By the end of the function, the operand fields
899 * of the ud structure will have been filled.
900 */
901static int
902decode_operands(struct ud* u)
903{
904 decode_operand(u, &u->operand[0],
905 u->itab_entry->operand1.type,
906 u->itab_entry->operand1.size);
907 if (u->operand[0].type != UD_NONE) {
908 decode_operand(u, &u->operand[1],
909 u->itab_entry->operand2.type,
910 u->itab_entry->operand2.size);
911 }
912 if (u->operand[1].type != UD_NONE) {
913 decode_operand(u, &u->operand[2],
914 u->itab_entry->operand3.type,
915 u->itab_entry->operand3.size);
916 }
917 if (u->operand[2].type != UD_NONE) {
918 decode_operand(u, &u->operand[3],
919 u->itab_entry->operand4.type,
920 u->itab_entry->operand4.size);
921 }
922 return 0;
923}
924
925/* -----------------------------------------------------------------------------
926 * clear_insn() - clear instruction structure
927 * -----------------------------------------------------------------------------
928 */
929static void
930clear_insn(register struct ud* u)
931{
932 u->error = 0;
933 u->pfx_seg = 0;
934 u->pfx_opr = 0;
935 u->pfx_adr = 0;
936 u->pfx_lock = 0;
937 u->pfx_repne = 0;
938 u->pfx_rep = 0;
939 u->pfx_repe = 0;
940 u->pfx_rex = 0;
941 u->pfx_str = 0;
942 u->mnemonic = UD_Inone;
943 u->itab_entry = NULL;
944 u->have_modrm = 0;
945 u->br_far = 0;
946 u->vex_op = 0;
947 u->_rex = 0;
948 u->operand[0].type = UD_NONE;
949 u->operand[1].type = UD_NONE;
950 u->operand[2].type = UD_NONE;
951 u->operand[3].type = UD_NONE;
952}
953
954
955static UD_INLINE int
956resolve_pfx_str(struct ud* u)
957{
958 if (u->pfx_str == 0xf3) {
959 if (P_STR(u->itab_entry->prefix)) {
960 u->pfx_rep = 0xf3;
961 } else {
962 u->pfx_repe = 0xf3;
963 }
964 } else if (u->pfx_str == 0xf2) {
965 u->pfx_repne = 0xf3;
966 }
967 return 0;
968}
969
970
971static int
972resolve_mode( struct ud* u )
973{
974 int default64;
975 /* if in error state, bail out */
976 if ( u->error ) return -1;
977
978 /* propagate prefix effects */
979 if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */
980
981 /* Check validity of instruction m64 */
982 if ( P_INV64( u->itab_entry->prefix ) ) {
983 UDERR(u, "instruction invalid in 64bits\n");
984 return -1;
985 }
986
987 /* compute effective rex based on,
988 * - vex prefix (if any)
989 * - rex prefix (if any, and not vex)
990 * - allowed prefixes specified by the opcode map
991 */
992 if (u->vex_op == 0xc4) {
993 /* vex has rex.rxb in 1's complement */
994 u->_rex = ((~(u->vex_b1 >> 5) & 0x7) /* rex.0rxb */ |
995 ((u->vex_b2 >> 4) & 0x8) /* rex.w000 */);
996 } else if (u->vex_op == 0xc5) {
997 /* vex has rex.r in 1's complement */
998 u->_rex = (~(u->vex_b1 >> 5)) & 4;
999 } else {
1000 UD_ASSERT(u->vex_op == 0);
1001 u->_rex = u->pfx_rex;
1002 }
1003 u->_rex &= REX_PFX_MASK(u->itab_entry->prefix);
1004
1005 /* whether this instruction has a default operand size of
1006 * 64bit, also hardcoded into the opcode map.
1007 */
1008 default64 = P_DEF64( u->itab_entry->prefix );
1009 /* calculate effective operand size */
1010 if (REX_W(u->_rex)) {
1011 u->opr_mode = 64;
1012 } else if ( u->pfx_opr ) {
1013 u->opr_mode = 16;
1014 } else {
1015 /* unless the default opr size of instruction is 64,
1016 * the effective operand size in the absence of rex.w
1017 * prefix is 32.
1018 */
1019 u->opr_mode = default64 ? 64 : 32;
1020 }
1021
1022 /* calculate effective address size */
1023 u->adr_mode = (u->pfx_adr) ? 32 : 64;
1024 } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
1025 u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
1026 u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
1027 } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
1028 u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
1029 u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
1030 }
1031
1032 return 0;
1033}
1034
1035
1036static UD_INLINE int
1037decode_insn(struct ud *u, uint16_t ptr)
1038{
1039 UD_ASSERT((ptr & 0x8000) == 0);
1040 u->itab_entry = &ud_itab[ ptr ];
1041 u->mnemonic = u->itab_entry->mnemonic;
1042 return (resolve_pfx_str(u) == 0 &&
1043 resolve_mode(u) == 0 &&
1044 decode_operands(u) == 0 &&
1045 resolve_mnemonic(u) == 0) ? 0 : -1;
1046}
1047
1048
1049/*
1050 * decode_3dnow()
1051 *
1052 * Decoding 3dnow is a little tricky because of its strange opcode
1053 * structure. The final opcode disambiguation depends on the last
1054 * byte that comes after the operands have been decoded. Fortunately,
1055 * all 3dnow instructions have the same set of operand types. So we
1056 * go ahead and decode the instruction by picking an arbitrarily chosen
1057 * valid entry in the table, decode the operands, and read the final
1058 * byte to resolve the menmonic.
1059 */
1060static UD_INLINE int
1061decode_3dnow(struct ud* u)
1062{
1063 uint16_t ptr;
1064 UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
1065 UD_ASSERT(u->le->table[0xc] != 0);
1066 decode_insn(u, u->le->table[0xc]);
1067 inp_next(u);
1068 if (u->error) {
1069 return -1;
1070 }
1071 ptr = u->le->table[inp_curr(u)];
1072 UD_ASSERT((ptr & 0x8000) == 0);
1073 u->mnemonic = ud_itab[ptr].mnemonic;
1074 return 0;
1075}
1076
1077
1078static int
1079decode_ssepfx(struct ud *u)
1080{
1081 uint8_t idx;
1082 uint8_t pfx;
1083
1084 /*
1085 * String prefixes (f2, f3) take precedence over operand
1086 * size prefix (66).
1087 */
1088 pfx = u->pfx_str;
1089 if (pfx == 0) {
1090 pfx = u->pfx_opr;
1091 }
1092 idx = ((pfx & 0xf) + 1) / 2;
1093 if (u->le->table[idx] == 0) {
1094 idx = 0;
1095 }
1096 if (idx && u->le->table[idx] != 0) {
1097 /*
1098 * "Consume" the prefix as a part of the opcode, so it is no
1099 * longer exported as an instruction prefix.
1100 */
1101 u->pfx_str = 0;
1102 if (pfx == 0x66) {
1103 /*
1104 * consume "66" only if it was used for decoding, leaving
1105 * it to be used as an operands size override for some
1106 * simd instructions.
1107 */
1108 u->pfx_opr = 0;
1109 }
1110 }
1111 return decode_ext(u, u->le->table[idx]);
1112}
1113
1114
1115static int
1116decode_vex(struct ud *u)
1117{
1118 uint8_t index;
1119 if (u->dis_mode != 64 && MODRM_MOD(inp_peek(u)) != 0x3) {
1120 index = 0;
1121 } else {
1122 u->vex_op = inp_curr(u);
1123 u->vex_b1 = inp_next(u);
1124 if (u->vex_op == 0xc4) {
1125 uint8_t pp, m;
1126 /* 3-byte vex */
1127 u->vex_b2 = inp_next(u);
1128 UD_RETURN_ON_ERROR(u);
1129 m = u->vex_b1 & 0x1f;
1130 if (m == 0 || m > 3) {
1131 UD_RETURN_WITH_ERROR(u, "reserved vex.m-mmmm value");
1132 }
1133 pp = u->vex_b2 & 0x3;
1134 index = (pp << 2) | m;
1135 } else {
1136 /* 2-byte vex */
1137 UD_ASSERT(u->vex_op == 0xc5);
1138 index = 0x1 | ((u->vex_b1 & 0x3) << 2);
1139 }
1140 }
1141 return decode_ext(u, u->le->table[index]);
1142}
1143
1144
1145/*
1146 * decode_ext()
1147 *
1148 * Decode opcode extensions (if any)
1149 */
1150static int
1151decode_ext(struct ud *u, uint16_t ptr)
1152{
1153 uint8_t idx = 0;
1154 if ((ptr & 0x8000) == 0) {
1155 return decode_insn(u, ptr);
1156 }
1157 u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
1158 if (u->le->type == UD_TAB__OPC_3DNOW) {
1159 return decode_3dnow(u);
1160 }
1161
1162 switch (u->le->type) {
1163 case UD_TAB__OPC_MOD:
1164 /* !11 = 0, 11 = 1 */
1165 idx = (MODRM_MOD(modrm(u)) + 1) / 4;
1166 break;
1167 /* disassembly mode/operand size/address size based tables.
1168 * 16 = 0,, 32 = 1, 64 = 2
1169 */
1170 case UD_TAB__OPC_MODE:
1171 idx = u->dis_mode != 64 ? 0 : 1;
1172 break;
1173 case UD_TAB__OPC_OSIZE:
1174 idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
1175 break;
1176 case UD_TAB__OPC_ASIZE:
1177 idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
1178 break;
1179 case UD_TAB__OPC_X87:
1180 idx = modrm(u) - 0xC0;
1181 break;
1182 case UD_TAB__OPC_VENDOR:
1183 if (u->vendor == UD_VENDOR_ANY) {
1184 /* choose a valid entry */
1185 idx = (u->le->table[idx] != 0) ? 0 : 1;
1186 } else if (u->vendor == UD_VENDOR_AMD) {
1187 idx = 0;
1188 } else {
1189 idx = 1;
1190 }
1191 break;
1192 case UD_TAB__OPC_RM:
1193 idx = MODRM_RM(modrm(u));
1194 break;
1195 case UD_TAB__OPC_REG:
1196 idx = MODRM_REG(modrm(u));
1197 break;
1198 case UD_TAB__OPC_SSE:
1199 return decode_ssepfx(u);
1200 case UD_TAB__OPC_VEX:
1201 return decode_vex(u);
1202 case UD_TAB__OPC_VEX_W:
1203 idx = vex_w(u);
1204 break;
1205 case UD_TAB__OPC_VEX_L:
1206 idx = vex_l(u);
1207 break;
1208 case UD_TAB__OPC_TABLE:
1209 inp_next(u);
1210 return decode_opcode(u);
1211 default:
1212 UD_ASSERT(!"not reached");
1213 break;
1214 }
1215
1216 return decode_ext(u, u->le->table[idx]);
1217}
1218
1219
1220static int
1221decode_opcode(struct ud *u)
1222{
1223 uint16_t ptr;
1224 UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE);
1225 UD_RETURN_ON_ERROR(u);
1226 ptr = u->le->table[inp_curr(u)];
1227 return decode_ext(u, ptr);
1228}
1229
1230
1231/* =============================================================================
1232 * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1233 * =============================================================================
1234 */
1235unsigned int
1236ud_decode(struct ud *u)
1237{
1238 inp_start(u);
1239 clear_insn(u);
1240 u->le = &ud_lookup_table_list[0];
1241 u->error = decode_prefixes(u) == -1 ||
1242 decode_opcode(u) == -1 ||
1243 u->error;
1244 /* Handle decode error. */
1245 if (u->error) {
1246 /* clear out the decode data. */
1247 clear_insn(u);
1248 /* mark the sequence of bytes as invalid. */
1249 u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */
1250 u->mnemonic = u->itab_entry->mnemonic;
1251 }
1252
1253 /* maybe this stray segment override byte
1254 * should be spewed out?
1255 */
1256 if ( !P_SEG( u->itab_entry->prefix ) &&
1257 u->operand[0].type != UD_OP_MEM &&
1258 u->operand[1].type != UD_OP_MEM )
1259 u->pfx_seg = 0;
1260
1261 u->insn_offset = u->pc; /* set offset of instruction */
1262 u->asm_buf_fill = 0; /* set translation buffer index to 0 */
1263 u->pc += u->inp_ctr; /* move program counter by bytes decoded */
1264
1265 /* return number of bytes disassembled. */
1266 return u->inp_ctr;
1267}
1268
1269#endif // USE(UDIS86)
1270
1271/*
1272vim: set ts=2 sw=2 expandtab
1273*/
1274