/*
 * emu.c -- F-CPU instruction-level emulator core
 * Copyright (C) 2002 Michael Riepe <michael@stud.uni-hannover.de>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

static const char rcsid[] = "@(#) $Id: emu.c,v 1.3 2002/12/28 18:08:55 michael Exp $";

#if HAVE_CONFIG_H
#include <config.h>
#endif

#if STDC_HEADERS
#include <stdlib.h>
#include <string.h>
#else
void *malloc();
unsigned long strtoul();
#endif

#include <stdio.h>

#if HAVE_UNISTD_H
#include <unistd.h>
#else
int read(), write();
int getopt(), optind;
char *optarg;
#endif

#if HAVE_ERRNO_H
#include <errno.h>
#else
extern int errno;
#endif

#if HAVE_MATH_H
#include <math.h>
#else
int isnan();
double sqrt(), log(), exp();
#endif

#if HAVE_FCNTL_H
#include <fcntl.h>
#else
int open(), close();
#endif
#ifndef O_RDONLY
#define O_RDONLY 0
#endif

#if HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif

#if HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
#ifndef MAP_FAILED
#define MAP_FAILED (-1)
#endif

#include "../fcpu_opcodes/fcpu_opcodes.h"

#if WORDS_BIGENDIAN
#define HOST_BIG_ENDIAN	1
#else /* WORDS_BIGENDIAN */
#define HOST_BIG_ENDIAN	0
#endif /* WORDS_BIGENDIAN */

#define MAXSIZE				8	/* emulate a 64-bit F-CPU */
#define PARTIAL_WRITES		0	/* new write semantics: high part is cleared */

typedef          char      I8;
typedef unsigned char      U8;
typedef          short     I16;
typedef unsigned short     U16;
typedef          int       I32;
typedef unsigned int       U32;
#if SIZEOF_LONG == 8
typedef          long      I64;
typedef unsigned long      U64;
#else
typedef          long long I64;
typedef unsigned long long U64;
#endif

#define ST_b	I8
#define ST_d	I16
#define ST_q	I32
#define ST_o	I64
#define ST(sz)	ST_##sz

#define UT_b	U8
#define UT_d	U16
#define UT_q	U32
#define UT_o	U64
#define UT(sz)	UT_##sz

#define FT_F	float
#define FT_D	double
#define FT(sz)	FT_##sz

#define BYTES_b		(sizeof(I8))
#define BYTES_d		(sizeof(I16))
#define BYTES_q		(sizeof(I32))
#define BYTES_o		(sizeof(I64))
#define BYTES_F		(sizeof(float))
#define BYTES_D		(sizeof(double))
#define BYTES(sz)	BYTES_##sz

/* number of chunks */
#define CHUNKS(sz)	(MAXSIZE/BYTES(sz))

#if SIZEOF_LONG == 8
/* 64-bit host */
#define best		o
#define ST_best		ST_o
#define UT_best		UT_o
#define CHUNKS_best	CHUNKS_o
#define BYTES_best	BYTES_o
#else
/* 32-bit host */
#define best		q
#define ST_best		ST_q
#define UT_best		UT_q
#define CHUNKS_best	CHUNKS_q
#define BYTES_best	BYTES_q
#endif

#undef BITS
#define BITS(sz)	(8*BYTES(sz))

/* rounding */
#if HAVE_FENV_H && HAVE_FESETROUND
#include <fenv.h>
#else
#define fesetround(x)	/**/
#define FE_TONEAREST	0
#define FE_TOWARDZERO	1
#define FE_DOWNWARD		2
#define FE_UPWARD		3
#endif

int default_rounding = FE_TONEAREST;

#define RND_r	FE_TONEAREST
#define RND_t	FE_TOWARDZERO
#define RND_f	FE_DOWNWARD
#define RND_c	FE_UPWARD
#define RND(rm)	RND_##rm

#define none /**/

/* opcode fields */
#define R1	((opcode >>	 0) & 077)	/* destination */
#define R2	((opcode >>	 6) & 077)	/* first operand */
#define R3	((opcode >> 12) & 077)	/* second operand */
#define RA	(R1 ^ 1)				/* alternate destination */
#define IL	(opcode >>	6)			/* long (16+ bit) immediate */
#define IS	(opcode >> 12)			/* short (8+ bit) immediate */

/* immediate constants */
#define UIMM8	((U8)IS)
#define SIMM8	((I8)IS)
#define SIMM9	((I16)(IS << 7) >> 7)
#define UIMM16	((U16)IL)
#define SIMM16	((I16)IL)
#define SIMM17	((I32)(IL << 15) >> 15)

/* a single register */
union reg {
	U8		b[CHUNKS(b)];
	U16		d[CHUNKS(d)];
	U32		q[CHUNKS(q)];
	U64		o[CHUNKS(o)];
	I8		sb[CHUNKS(b)];
	I16		sd[CHUNKS(d)];
	I32		sq[CHUNKS(q)];
	I64		so[CHUNKS(o)];
	float	F[CHUNKS(F)];
	double	D[CHUNKS(D)];
};

/* the register set */
struct regs {
	union reg	r[64];
	union reg	r_pc;
} regs;

/* shortcuts */
#define r(x)					regs.r[x]
#define for_all_chunks(i,sz)	for (i = 0; i < CHUNKS(sz); i++)

/* chunk selector */
#if HOST_BIG_ENDIAN
#define C(sz,i)		sz[CHUNKS(sz)-1-(i)]
#define C2(sz,i)	sz[2*CHUNKS(sz)-1-(i)]
#define SC(sz,i)	s##sz[CHUNKS(sz)-1-(i)]
#else
#define C(sz,i)		sz[i]
#define C2(sz,i)	sz[i]
#define SC(sz,i)	s##sz[i]
#endif

/* byte `j' of `sz'-sized chunk `i' of register `R' */
#define cbyte(R,sz,i,j)	(((U8*)&R.C(sz,i))[j])

/* result initializer */
#if PARTIAL_WRITES
#define reginit(R)	r(R)	/* preload result with original contents */
#else
#define reginit(R)	{{0}}	/* preload result with zero */
#endif

/* exception codes */
enum {
	EX_NONE,
	EX_ACCESS,
	EX_ADDRESS,
	EX_ALIGNMENT,
	EX_INVALID,
	EX_NULL,
	EX_RANGE,
	EX_HALT,
	EX_number
};

unsigned excode = EX_NONE;

/* raise an exception */
#define ex(x)	(excode = (x))

/* target -> host memory mapping */
unsigned char *addrbase;	/* F-CPU core memory is mapped here */
size_t ramsize;				/* total amount of RAM available for emulation */

void*
memmap(U64 virtaddr, U64 align, U64 len, int write_mode) {
	/* no MMU for now */
	if (len > ramsize) {
		ex(EX_ADDRESS);
		return NULL;
	}
	if (virtaddr >= ramsize) {
		ex(EX_ADDRESS);
		return NULL;
	}
	if (virtaddr + len > ramsize) {
		ex(EX_ADDRESS);
		return NULL;
	}
	if (virtaddr % align) {
		ex(EX_ALIGNMENT);
	}
	return addrbase + virtaddr;
}

/* condition codes */
int
zero(unsigned reg) {
	unsigned i;

	if (reg)
		for_all_chunks(i,best)
			if (r(reg).C(best,i))
				return 0;
	return 1;
}

int
nan(unsigned reg) {
	return reg && isnan(r(reg).C(D,0));
}

int
lsb(unsigned reg) {
	return reg && (r(reg).C(b,0) & 1);
}

/* XXX: always use bit 63? */
int
msb(unsigned reg) {
	return reg && (r(reg).C(b,7) & 0x80);
}

/* the instruction set, (almost) in manual order */

#define ADD(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) + r(R3).C(sz,i); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void add_b(U32 opcode) { if (R1) ADD(b,0); }
void add_d(U32 opcode) { if (R1) ADD(d,0); }
void add_q(U32 opcode) { if (R1) ADD(q,0); }
void add_o(U32 opcode) { if (R1) ADD(o,0); }
void sadd_b(U32 opcode) { if (R1) ADD(b,1); }
void sadd_d(U32 opcode) { if (R1) ADD(d,1); }
void sadd_q(U32 opcode) { if (R1) ADD(q,1); }
void sadd_o(U32 opcode) { if (R1) ADD(o,1); }

#define ADDC(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) + r(R3).C(sz,i); \
			ra.C(sz,i) = r1.C(sz,i) < r(R2).C(sz,i); \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void addc_b(U32 opcode) { ADDC(b,0); }
void addc_d(U32 opcode) { ADDC(d,0); }
void addc_q(U32 opcode) { ADDC(q,0); }
void addc_o(U32 opcode) { ADDC(o,0); }
void saddc_b(U32 opcode) { ADDC(b,1); }
void saddc_d(U32 opcode) { ADDC(d,1); }
void saddc_q(U32 opcode) { ADDC(q,1); }
void saddc_o(U32 opcode) { ADDC(o,1); }

#define ADDS(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) + r(R3).C(sz,i); \
			if (r1.C(sz,i) < r(R2).C(sz,i)) r1.C(sz,i) = ~(UT(sz))0; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void adds_b(U32 opcode) { if (R1) ADDS(b,0); }
void adds_d(U32 opcode) { if (R1) ADDS(d,0); }
void adds_q(U32 opcode) { if (R1) ADDS(q,0); }
void adds_o(U32 opcode) { if (R1) ADDS(o,0); }
void sadds_b(U32 opcode) { if (R1) ADDS(b,1); }
void sadds_d(U32 opcode) { if (R1) ADDS(d,1); }
void sadds_q(U32 opcode) { if (R1) ADDS(q,1); }
void sadds_o(U32 opcode) { if (R1) ADDS(o,1); }

#define ADDI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) + UIMM8; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void addi_b(U32 opcode) { if (R1) ADDI(b,0); }
void addi_d(U32 opcode) { if (R1) ADDI(d,0); }
void addi_q(U32 opcode) { if (R1) ADDI(q,0); }
void addi_o(U32 opcode) { if (R1) ADDI(o,0); }
void saddi_b(U32 opcode) { if (R1) ADDI(b,1); }
void saddi_d(U32 opcode) { if (R1) ADDI(d,1); }
void saddi_q(U32 opcode) { if (R1) ADDI(q,1); }
void saddi_o(U32 opcode) { if (R1) ADDI(o,1); }

#define SUB(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) - r(R3).C(sz,i); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void sub_b(U32 opcode) { if (R1) SUB(b,0); }
void sub_d(U32 opcode) { if (R1) SUB(d,0); }
void sub_q(U32 opcode) { if (R1) SUB(q,0); }
void sub_o(U32 opcode) { if (R1) SUB(o,0); }
void ssub_b(U32 opcode) { if (R1) SUB(b,1); }
void ssub_d(U32 opcode) { if (R1) SUB(d,1); }
void ssub_q(U32 opcode) { if (R1) SUB(q,1); }
void ssub_o(U32 opcode) { if (R1) SUB(o,1); }

#define SUBB(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) - r(R3).C(sz,i); \
			ra.C(sz,i) = r(R2).C(sz,i) < r(R3).C(sz,i); \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void subb_b(U32 opcode) { SUBB(b,0); }
void subb_d(U32 opcode) { SUBB(d,0); }
void subb_q(U32 opcode) { SUBB(q,0); }
void subb_o(U32 opcode) { SUBB(o,0); }
void ssubb_b(U32 opcode) { SUBB(b,1); }
void ssubb_d(U32 opcode) { SUBB(d,1); }
void ssubb_q(U32 opcode) { SUBB(q,1); }
void ssubb_o(U32 opcode) { SUBB(o,1); }

#define SUBF(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).C(sz,i) <= r(R3).C(sz,i)) \
				r1.C(sz,i) = 0; \
			else \
				r1.C(sz,i) = r(R2).C(sz,i) - r(R3).C(sz,i); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void subf_b(U32 opcode) { if (R1) SUBF(b,0); }
void subf_d(U32 opcode) { if (R1) SUBF(d,0); }
void subf_q(U32 opcode) { if (R1) SUBF(q,0); }
void subf_o(U32 opcode) { if (R1) SUBF(o,0); }
void ssubf_b(U32 opcode) { if (R1) SUBF(b,1); }
void ssubf_d(U32 opcode) { if (R1) SUBF(d,1); }
void ssubf_q(U32 opcode) { if (R1) SUBF(q,1); }
void ssubf_o(U32 opcode) { if (R1) SUBF(o,1); }

#define SUBI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) - UIMM8; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void subi_b(U32 opcode) { if (R1) SUBI(b,0); }
void subi_d(U32 opcode) { if (R1) SUBI(d,0); }
void subi_q(U32 opcode) { if (R1) SUBI(q,0); }
void subi_o(U32 opcode) { if (R1) SUBI(o,0); }
void ssubi_b(U32 opcode) { if (R1) SUBI(b,1); }
void ssubi_d(U32 opcode) { if (R1) SUBI(d,1); }
void ssubi_q(U32 opcode) { if (R1) SUBI(q,1); }
void ssubi_o(U32 opcode) { if (R1) SUBI(o,1); }

#define MULMAC(sz,simd,mac) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) * r(R3).C(sz,i); \
			if (mac) r1.C(sz,i) += r(R1).C(sz,i); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

#define MUL(sz,simd)	MULMAC(sz,simd,0)

void mul_b(U32 opcode) { if (R1) MUL(b,0); }
void mul_d(U32 opcode) { if (R1) MUL(d,0); }
void mul_q(U32 opcode) { if (R1) MUL(q,0); }
void mul_o(U32 opcode) { if (R1) MUL(o,0); }
void smul_b(U32 opcode) { if (R1) MUL(b,1); }
void smul_d(U32 opcode) { if (R1) MUL(d,1); }
void smul_q(U32 opcode) { if (R1) MUL(q,1); }
void smul_o(U32 opcode) { if (R1) MUL(o,1); }

#define HI(sz,x)	((x) >> 4*BYTES(sz))
#define LO(sz,x)	HI(sz,(x) << 4*BYTES(sz))

#define MULMACH(sz,simd,mac) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (BITS(sz) > 32) { \
				union reg t; \
				r1.C(sz,i) = r(R2).C(sz,i) * r(R3).C(sz,i); \
				ra.C(sz,i) = HI(sz,r(R2).C(sz,i)) * HI(sz,r(R3).C(sz,i)); \
				t.C(sz,0) = LO(sz,r(R2).C(sz,i)) * LO(sz,r(R3).C(sz,i)); \
				t.C(sz,0) = HI(sz,t.C(sz,0)) \
				        + LO(sz,r(R2).C(sz,i)) * HI(sz,r(R3).C(sz,i)); \
				ra.C(sz,i) += HI(sz,t.C(sz,0)); \
				t.C(sz,0) = LO(sz,t.C(sz,0)) \
				        + HI(sz,r(R2).C(sz,i)) * LO(sz,r(R3).C(sz,i)); \
				ra.C(sz,i) += HI(sz,t.C(sz,0)); \
			} \
			else { \
				U64 t = (U64)r(R2).C(sz,i) * (U64)r(R3).C(sz,i); \
				r1.C(sz,i) = t; \
				ra.C(sz,i) = t >> 1 >> (BITS(sz)-1); \
			} \
			if (mac) { \
				r1.C(sz,i) += r(R1).C(sz,i); \
				ra.C(sz,i) += r1.C(sz,i) < r(R1).C(sz,i); \
			} \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

#define MULH(sz,simd)	MULMACH(sz,simd,0)

void mulh_b(U32 opcode) { MULH(b,0); }
void mulh_d(U32 opcode) { MULH(d,0); }
void mulh_q(U32 opcode) { MULH(q,0); }
void mulh_o(U32 opcode) { MULH(o,0); }
void smulh_b(U32 opcode) { MULH(b,1); }
void smulh_d(U32 opcode) { MULH(d,1); }
void smulh_q(U32 opcode) { MULH(q,1); }
void smulh_o(U32 opcode) { MULH(o,1); }

#define MULMACHS(sz,simd,mac) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (BITS(sz) > 32) { \
				union reg t; \
				r1.C(sz,i) = r(R2).C(sz,i) * r(R3).C(sz,i); \
				ra.C(sz,i) = HI(sz,r(R2).C(sz,i)) * HI(sz,r(R3).C(sz,i)); \
				t.C(sz,0) = LO(sz,r(R2).C(sz,i)) * LO(sz,r(R3).C(sz,i)); \
				t.C(sz,0) = HI(sz,t.C(sz,0)) \
				        + LO(sz,r(R2).C(sz,i)) * HI(sz,r(R3).C(sz,i)); \
				ra.C(sz,i) += HI(sz,t.C(sz,0)); \
				t.C(sz,0) = LO(sz,t.C(sz,0)) \
				        + HI(sz,r(R2).C(sz,i)) * LO(sz,r(R3).C(sz,i)); \
				ra.C(sz,i) += HI(sz,t.C(sz,0)); \
				if (r(R2).SC(sz,i) < 0) \
					ra.C(sz,0) -= r(R3).C(sz,i); \
				if (r(R3).SC(sz,i) < 0) \
					ra.C(sz,0) -= r(R2).C(sz,i); \
			} \
			else { \
				I64 t = (I64)r(R2).C(sz,i) * (I64)r(R3).C(sz,i); \
				r1.C(sz,i) = t; \
				ra.C(sz,i) = t >> 1 >> (BITS(sz)-1); \
			} \
			if (mac) { \
				r1.C(sz,i) += r(R1).C(sz,i); \
				ra.C(sz,i) += r1.C(sz,i) < r(R1).C(sz,i); \
			} \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

#define MULHS(sz,simd)	MULMACHS(sz,simd,0)

void mulhs_b(U32 opcode) { MULHS(b,0); }
void mulhs_d(U32 opcode) { MULHS(d,0); }
void mulhs_q(U32 opcode) { MULHS(q,0); }
void mulhs_o(U32 opcode) { MULHS(o,0); }
void smulhs_b(U32 opcode) { MULHS(b,1); }
void smulhs_d(U32 opcode) { MULHS(d,1); }
void smulhs_q(U32 opcode) { MULHS(q,1); }
void smulhs_o(U32 opcode) { MULHS(o,1); }

#define MULI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) * SIMM8; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void muli_b(U32 opcode) { if (R1) MULI(b,0); }
void muli_d(U32 opcode) { if (R1) MULI(d,0); }
void muli_q(U32 opcode) { if (R1) MULI(q,0); }
void muli_o(U32 opcode) { if (R1) MULI(o,0); }
void smuli_b(U32 opcode) { if (R1) MULI(b,1); }
void smuli_d(U32 opcode) { if (R1) MULI(d,1); }
void smuli_q(U32 opcode) { if (R1) MULI(q,1); }
void smuli_o(U32 opcode) { if (R1) MULI(o,1); }

#define DIV(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (!r(R3).C(sz,i)) { \
				ex(EX_NULL); \
				return; \
			} \
			r1.C(sz,i) = r(R2).C(sz,i) / r(R3).C(sz,i); \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
	} while (0)

void div_b(U32 opcode) { DIV(b,0); }
void div_d(U32 opcode) { DIV(d,0); }
void div_q(U32 opcode) { DIV(q,0); }
void div_o(U32 opcode) { DIV(o,0); }
void sdiv_b(U32 opcode) { DIV(b,1); }
void sdiv_d(U32 opcode) { DIV(d,1); }
void sdiv_q(U32 opcode) { DIV(q,1); }
void sdiv_o(U32 opcode) { DIV(o,1); }

#define DIVS(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (!r(R3).C(sz,i)) { \
				ex(EX_NULL); \
				return; \
			} \
			r1.C(sz,i) = r(R2).SC(sz,i) / r(R3).SC(sz,i); \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
	} while (0)

void divs_b(U32 opcode) { DIVS(b,0); }
void divs_d(U32 opcode) { DIVS(d,0); }
void divs_q(U32 opcode) { DIVS(q,0); }
void divs_o(U32 opcode) { DIVS(o,0); }
void sdivs_b(U32 opcode) { DIVS(b,1); }
void sdivs_d(U32 opcode) { DIVS(d,1); }
void sdivs_q(U32 opcode) { DIVS(q,1); }
void sdivs_o(U32 opcode) { DIVS(o,1); }

#define DIVREM(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (!r(R3).C(sz,i)) { \
				ex(EX_NULL); \
				return; \
			} \
			r1.C(sz,i) = r(R2).C(sz,i) / r(R3).C(sz,i); \
			ra.C(sz,i) = r(R2).C(sz,i) % r(R3).C(sz,i); \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void divrem_b(U32 opcode) { DIVREM(b,0); }
void divrem_d(U32 opcode) { DIVREM(d,0); }
void divrem_q(U32 opcode) { DIVREM(q,0); }
void divrem_o(U32 opcode) { DIVREM(o,0); }
void sdivrem_b(U32 opcode) { DIVREM(b,1); }
void sdivrem_d(U32 opcode) { DIVREM(d,1); }
void sdivrem_q(U32 opcode) { DIVREM(q,1); }
void sdivrem_o(U32 opcode) { DIVREM(o,1); }

#define DIVREMS(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (!r(R3).C(sz,i)) { \
				ex(EX_NULL); \
				return; \
			} \
			r1.C(sz,i) = r(R2).SC(sz,i) / r(R3).SC(sz,i); \
			ra.C(sz,i) = r(R2).SC(sz,i) % r(R3).SC(sz,i); \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void divrems_b(U32 opcode) { DIVREMS(b,0); }
void divrems_d(U32 opcode) { DIVREMS(d,0); }
void divrems_q(U32 opcode) { DIVREMS(q,0); }
void divrems_o(U32 opcode) { DIVREMS(o,0); }
void sdivrems_b(U32 opcode) { DIVREMS(b,1); }
void sdivrems_d(U32 opcode) { DIVREMS(d,1); }
void sdivrems_q(U32 opcode) { DIVREMS(q,1); }
void sdivrems_o(U32 opcode) { DIVREMS(o,1); }

#define DIVI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		if (!SIMM8) { \
			ex(EX_NULL); \
			return; \
		} \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).SC(sz,i) / SIMM8; \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
	} while (0)

void divi_b(U32 opcode) { DIVI(b,0); }
void divi_d(U32 opcode) { DIVI(d,0); }
void divi_q(U32 opcode) { DIVI(q,0); }
void divi_o(U32 opcode) { DIVI(o,0); }
void sdivi_b(U32 opcode) { DIVI(b,1); }
void sdivi_d(U32 opcode) { DIVI(d,1); }
void sdivi_q(U32 opcode) { DIVI(q,1); }
void sdivi_o(U32 opcode) { DIVI(o,1); }

#define DIVREMI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		if (!SIMM8) { \
			ex(EX_NULL); \
			return; \
		} \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).SC(sz,i) / SIMM8; \
			ra.C(sz,i) = r(R2).SC(sz,i) % SIMM8; \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void divremi_b(U32 opcode) { DIVREMI(b,0); }
void divremi_d(U32 opcode) { DIVREMI(d,0); }
void divremi_q(U32 opcode) { DIVREMI(q,0); }
void divremi_o(U32 opcode) { DIVREMI(o,0); }
void sdivremi_b(U32 opcode) { DIVREMI(b,1); }
void sdivremi_d(U32 opcode) { DIVREMI(d,1); }
void sdivremi_q(U32 opcode) { DIVREMI(q,1); }
void sdivremi_o(U32 opcode) { DIVREMI(o,1); }

#define REM(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (!r(R3).C(sz,i)) { \
				ex(EX_NULL); \
				return; \
			} \
			r1.C(sz,i) = r(R2).C(sz,i) % r(R3).C(sz,i); \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
	} while (0)

void rem_b(U32 opcode) { REM(b,0); }
void rem_d(U32 opcode) { REM(d,0); }
void rem_q(U32 opcode) { REM(q,0); }
void rem_o(U32 opcode) { REM(o,0); }
void srem_b(U32 opcode) { REM(b,1); }
void srem_d(U32 opcode) { REM(d,1); }
void srem_q(U32 opcode) { REM(q,1); }
void srem_o(U32 opcode) { REM(o,1); }

#define REMS(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (!r(R3).C(sz,i)) { \
				ex(EX_NULL); \
				return; \
			} \
			r1.C(sz,i) = r(R2).SC(sz,i) % r(R3).SC(sz,i); \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
	} while (0)

void rems_b(U32 opcode) { REMS(b,0); }
void rems_d(U32 opcode) { REMS(d,0); }
void rems_q(U32 opcode) { REMS(q,0); }
void rems_o(U32 opcode) { REMS(o,0); }
void srems_b(U32 opcode) { REMS(b,1); }
void srems_d(U32 opcode) { REMS(d,1); }
void srems_q(U32 opcode) { REMS(q,1); }
void srems_o(U32 opcode) { REMS(o,1); }

#define REMI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		if (!SIMM8) { \
			ex(EX_NULL); \
			return; \
		} \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).SC(sz,i) % SIMM8; \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
	} while (0)

void remi_b(U32 opcode) { REMI(b,0); }
void remi_d(U32 opcode) { REMI(d,0); }
void remi_q(U32 opcode) { REMI(q,0); }
void remi_o(U32 opcode) { REMI(o,0); }
void sremi_b(U32 opcode) { REMI(b,1); }
void sremi_d(U32 opcode) { REMI(d,1); }
void sremi_q(U32 opcode) { REMI(q,1); }
void sremi_o(U32 opcode) { REMI(o,1); }

/* XXX: 64-bit chunks not handled correctly when MAXSIZE != 8 */
#define MACL(ssz,dsz,simd)	\
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,dsz) { \
			r1.C(dsz,i) = r(R1).C(dsz,i) \
				+ (UT(dsz))r(R2).C(ssz,i) * (UT(dsz))r(R3).C(ssz,i); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void macl_b(U32 opcode) { if (R1) MACL(b,d,0); }
void macl_d(U32 opcode) { if (R1) MACL(d,q,0); }
void macl_q(U32 opcode) { if (R1) MACL(q,o,0); }
void macl_o(U32 opcode) { if (R1) MACL(o,o,0); }
void smacl_b(U32 opcode) { if (R1) MACL(b,d,1); }
void smacl_d(U32 opcode) { if (R1) MACL(d,q,1); }
void smacl_q(U32 opcode) { if (R1) MACL(q,o,1); }
void smacl_o(U32 opcode) { if (R1) MACL(o,o,1); }

/* XXX: 64-bit chunks not handled correctly when MAXSIZE != 8 */
#define MACLS(ssz,dsz,simd)	\
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,dsz) { \
			r1.C(dsz,i) = r(R1).C(dsz,i) \
				+ (ST(dsz))r(R2).SC(ssz,i) * (ST(dsz))r(R3).SC(ssz,i); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void macls_b(U32 opcode) { if (R1) MACLS(b,d,0); }
void macls_d(U32 opcode) { if (R1) MACLS(d,q,0); }
void macls_q(U32 opcode) { if (R1) MACLS(q,o,0); }
void macls_o(U32 opcode) { if (R1) MACLS(o,o,0); }
void smacls_b(U32 opcode) { if (R1) MACLS(b,d,1); }
void smacls_d(U32 opcode) { if (R1) MACLS(d,q,1); }
void smacls_q(U32 opcode) { if (R1) MACLS(q,o,1); }
void smacls_o(U32 opcode) { if (R1) MACLS(o,o,1); }

/* XXX: 64-bit chunks not handled correctly when MAXSIZE != 8 */
#define MACH(ssz,dsz,simd)	\
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned j = CHUNKS(ssz) / 2; \
		for_all_chunks(i,dsz) { \
			r1.C(dsz,i) = r(R1).C(dsz,i) \
				+ (UT(dsz))r(R2).C(ssz,j+i) * (UT(dsz))r(R3).C(ssz,j+i); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void mach_b(U32 opcode) { if (R1) MACH(b,d,0); }
void mach_d(U32 opcode) { if (R1) MACH(d,q,0); }
void mach_q(U32 opcode) { if (R1) MACH(q,o,0); }
void mach_o(U32 opcode) { if (R1) MACH(o,o,0); }
void smach_b(U32 opcode) { if (R1) MACH(b,d,1); }
void smach_d(U32 opcode) { if (R1) MACH(d,q,1); }
void smach_q(U32 opcode) { if (R1) MACH(q,o,1); }
void smach_o(U32 opcode) { if (R1) MACH(o,o,1); }

/* XXX: 64-bit chunks not handled correctly when MAXSIZE != 8 */
#define MACHS(ssz,dsz,simd)	\
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned j = CHUNKS(ssz) / 2; \
		for_all_chunks(i,dsz) { \
			r1.C(dsz,i) = r(R1).C(dsz,i) \
				+ (ST(dsz))r(R2).SC(ssz,j+i) * (ST(dsz))r(R3).SC(ssz,j+i); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void machs_b(U32 opcode) { if (R1) MACHS(b,d,0); }
void machs_d(U32 opcode) { if (R1) MACHS(d,q,0); }
void machs_q(U32 opcode) { if (R1) MACHS(q,o,0); }
void machs_o(U32 opcode) { if (R1) MACHS(o,o,0); }
void smachs_b(U32 opcode) { if (R1) MACHS(b,d,1); }
void smachs_d(U32 opcode) { if (R1) MACHS(d,q,1); }
void smachs_q(U32 opcode) { if (R1) MACHS(q,o,1); }
void smachs_o(U32 opcode) { if (R1) MACHS(o,o,1); }

/* XXX: alternative mac (undocumented) */
/* Note: also used for FP mac! */
#define AMAC(sz,simd)	MULMAC(sz,simd,1)

void amac_b(U32 opcode) { if (R1) AMAC(b,0); }
void amac_d(U32 opcode) { if (R1) AMAC(d,0); }
void amac_q(U32 opcode) { if (R1) AMAC(q,0); }
void amac_o(U32 opcode) { if (R1) AMAC(o,0); }
void samac_b(U32 opcode) { if (R1) AMAC(b,1); }
void samac_d(U32 opcode) { if (R1) AMAC(d,1); }
void samac_q(U32 opcode) { if (R1) AMAC(q,1); }
void samac_o(U32 opcode) { if (R1) AMAC(o,1); }

#define AMACH(sz,simd)	MULMACH(sz,simd,1)

void amach_b(U32 opcode) { AMACH(b,0); }
void amach_d(U32 opcode) { AMACH(d,0); }
void amach_q(U32 opcode) { AMACH(q,0); }
void amach_o(U32 opcode) { AMACH(o,0); }
void samach_b(U32 opcode) { AMACH(b,1); }
void samach_d(U32 opcode) { AMACH(d,1); }
void samach_q(U32 opcode) { AMACH(q,1); }
void samach_o(U32 opcode) { AMACH(o,1); }

#define AMACHS(sz,simd)	MULMACHS(sz,simd,1)

void amachs_b(U32 opcode) { AMACHS(b,0); }
void amachs_d(U32 opcode) { AMACHS(d,0); }
void amachs_q(U32 opcode) { AMACHS(q,0); }
void amachs_o(U32 opcode) { AMACHS(o,0); }
void samachs_b(U32 opcode) { AMACHS(b,1); }
void samachs_d(U32 opcode) { AMACHS(d,1); }
void samachs_q(U32 opcode) { AMACHS(q,1); }
void samachs_o(U32 opcode) { AMACHS(o,1); }

#define ADDSUB(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) + r(R3).C(sz,i); \
			ra.C(sz,i) = r(R2).C(sz,i) - r(R3).C(sz,i); \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void addsub_b(U32 opcode) { ADDSUB(b,0); }
void addsub_d(U32 opcode) { ADDSUB(d,0); }
void addsub_q(U32 opcode) { ADDSUB(q,0); }
void addsub_o(U32 opcode) { ADDSUB(o,0); }
void saddsub_b(U32 opcode) { ADDSUB(b,1); }
void saddsub_d(U32 opcode) { ADDSUB(d,1); }
void saddsub_q(U32 opcode) { ADDSUB(q,1); }
void saddsub_o(U32 opcode) { ADDSUB(o,1); }

#define POPC(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			UT(sz) y = 0; \
			unsigned j; \
			for (j = 0; j < BYTES(sz); j++) { \
				U8 x = cbyte(r(R2),sz,i,j); \
				x = (x & 0x55) + ((x >> 1) & 0x55); \
				x = (x & 0x33) + ((x >> 2) & 0x33); \
				x = (x & 0x0f) + ((x >> 4) & 0x0f); \
				y += x; \
			} \
			if (y > r(R3).C(sz,i)) \
				r1.C(sz,i) = y - r(R3).C(sz,i); \
			else \
				r1.C(sz,i) = 0; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void popc_b(U32 opcode) { if (R1) POPC(b,0); }
void popc_d(U32 opcode) { if (R1) POPC(d,0); }
void popc_q(U32 opcode) { if (R1) POPC(q,0); }
void popc_o(U32 opcode) { if (R1) POPC(o,0); }
void spopc_b(U32 opcode) { if (R1) POPC(b,1); }
void spopc_d(U32 opcode) { if (R1) POPC(d,1); }
void spopc_q(U32 opcode) { if (R1) POPC(q,1); }
void spopc_o(U32 opcode) { if (R1) POPC(o,1); }

#if 0
/* XXX: undocumented extension */
#define POPCH(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			UT(sz) y = 0; \
			unsigned j; \
			for (j = 0; j < BYTES(sz); j++) { \
				U8 x = cbyte(r(R2),sz,i,j); \
				x = (x & 0x55) + ((x >> 1) & 0x55); \
				x = (x & 0x33) + ((x >> 2) & 0x33); \
				x = (x & 0x0f) + ((x >> 4) & 0x0f); \
				y += x; \
			} \
			if (y > r(R3).C(sz,0)) \
				r1.C(sz,i) = y - r(R3).C(sz,0); \
			else \
				r1.C(sz,i) = 0; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void spopch_b(U32 opcode) { if (R1) POPCH(b,1); }
void spopch_d(U32 opcode) { if (R1) POPCH(d,1); }
void spopch_q(U32 opcode) { if (R1) POPCH(q,1); }
void spopch_o(U32 opcode) { if (R1) POPCH(o,1); }
#endif

#define POPCI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			UT(sz) y = 0; \
			unsigned j; \
			for (j = 0; j < BYTES(sz); j++) { \
				U8 x = cbyte(r(R2),sz,i,j); \
				x = (x & 0x55) + ((x >> 1) & 0x55); \
				x = (x & 0x33) + ((x >> 2) & 0x33); \
				x = (x & 0x0f) + ((x >> 4) & 0x0f); \
				y += x; \
			} \
			if (y > UIMM8) \
				r1.C(sz,i) = y - UIMM8; \
			else \
				r1.C(sz,i) = 0; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void popci_b(U32 opcode) { if (R1) POPCI(b,0); }
void popci_d(U32 opcode) { if (R1) POPCI(d,0); }
void popci_q(U32 opcode) { if (R1) POPCI(q,0); }
void popci_o(U32 opcode) { if (R1) POPCI(o,0); }
void spopci_b(U32 opcode) { if (R1) POPCI(b,1); }
void spopci_d(U32 opcode) { if (R1) POPCI(d,1); }
void spopci_q(U32 opcode) { if (R1) POPCI(q,1); }
void spopci_o(U32 opcode) { if (R1) POPCI(o,1); }

#define INC(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) + 1; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void inc_b(U32 opcode) { if (R1) INC(b,0); }
void inc_d(U32 opcode) { if (R1) INC(d,0); }
void inc_q(U32 opcode) { if (R1) INC(q,0); }
void inc_o(U32 opcode) { if (R1) INC(o,0); }
void sinc_b(U32 opcode) { if (R1) INC(b,1); }
void sinc_d(U32 opcode) { if (R1) INC(d,1); }
void sinc_q(U32 opcode) { if (R1) INC(q,1); }
void sinc_o(U32 opcode) { if (R1) INC(o,1); }

#define DEC(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) - 1; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void dec_b(U32 opcode) { if (R1) DEC(b,0); }
void dec_d(U32 opcode) { if (R1) DEC(d,0); }
void dec_q(U32 opcode) { if (R1) DEC(q,0); }
void dec_o(U32 opcode) { if (R1) DEC(o,0); }
void sdec_b(U32 opcode) { if (R1) DEC(b,1); }
void sdec_d(U32 opcode) { if (R1) DEC(d,1); }
void sdec_q(U32 opcode) { if (R1) DEC(q,1); }
void sdec_o(U32 opcode) { if (R1) DEC(o,1); }

#define NEG(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = -r(R2).C(sz,i); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void neg_b(U32 opcode) { if (R1) NEG(b,0); }
void neg_d(U32 opcode) { if (R1) NEG(d,0); }
void neg_q(U32 opcode) { if (R1) NEG(q,0); }
void neg_o(U32 opcode) { if (R1) NEG(o,0); }
void sneg_b(U32 opcode) { if (R1) NEG(b,1); }
void sneg_d(U32 opcode) { if (R1) NEG(d,1); }
void sneg_q(U32 opcode) { if (R1) NEG(q,1); }
void sneg_o(U32 opcode) { if (R1) NEG(o,1); }

#define LSB1(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			UT(sz) x = r(R2).C(sz,i); \
			unsigned j; \
			r1.C(sz,i) = 0; \
			if (x) \
				for (j = 1; j <= BITS(sz); j++, x >>= 1) \
					if (x & 1) { \
						r1.C(sz,i) = j; \
						break; \
					} \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void lsb1_b(U32 opcode) { if (R1) LSB1(b,0); }
void lsb1_d(U32 opcode) { if (R1) LSB1(d,0); }
void lsb1_q(U32 opcode) { if (R1) LSB1(q,0); }
void lsb1_o(U32 opcode) { if (R1) LSB1(o,0); }
void slsb1_b(U32 opcode) { if (R1) LSB1(b,1); }
void slsb1_d(U32 opcode) { if (R1) LSB1(d,1); }
void slsb1_q(U32 opcode) { if (R1) LSB1(q,1); }
void slsb1_o(U32 opcode) { if (R1) LSB1(o,1); }

#define LSB0(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			UT(sz) x = ~r(R2).C(sz,i); \
			unsigned j; \
			r1.C(sz,i) = 0; \
			if (x) \
				for (j = 1; j <= BITS(sz); j++, x >>= 1) \
					if (x & 1) { \
						r1.C(sz,i) = j; \
						break; \
					} \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void lsb0_b(U32 opcode) { if (R1) LSB0(b,0); }
void lsb0_d(U32 opcode) { if (R1) LSB0(d,0); }
void lsb0_q(U32 opcode) { if (R1) LSB0(q,0); }
void lsb0_o(U32 opcode) { if (R1) LSB0(o,0); }
void slsb0_b(U32 opcode) { if (R1) LSB1(b,1); }
void slsb0_d(U32 opcode) { if (R1) LSB1(d,1); }
void slsb0_q(U32 opcode) { if (R1) LSB1(q,1); }
void slsb0_o(U32 opcode) { if (R1) LSB1(o,1); }

#define MSB1(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			ST(sz) x = r(R2).C(sz,i); \
			unsigned j; \
			r1.C(sz,i) = 0; \
			if (x) \
				for (j = BITS(sz); j >= 1; j--, x <<= 1) \
					if (x < 0) { \
						r1.C(sz,i) = j; \
						break; \
					} \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void msb1_b(U32 opcode) { if (R1) MSB1(b,0); }
void msb1_d(U32 opcode) { if (R1) MSB1(d,0); }
void msb1_q(U32 opcode) { if (R1) MSB1(q,0); }
void msb1_o(U32 opcode) { if (R1) MSB1(o,0); }
void smsb1_b(U32 opcode) { if (R1) MSB1(b,1); }
void smsb1_d(U32 opcode) { if (R1) MSB1(d,1); }
void smsb1_q(U32 opcode) { if (R1) MSB1(q,1); }
void smsb1_o(U32 opcode) { if (R1) MSB1(o,1); }

#define MSB0(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			ST(sz) x = ~r(R2).C(sz,i); \
			unsigned j; \
			r1.C(sz,i) = 0; \
			if (x) \
				for (j = BITS(sz); j >= 1; j--, x <<= 1) \
					if (x < 0) { \
						r1.C(sz,i) = j; \
						break; \
					} \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void msb0_b(U32 opcode) { if (R1) MSB0(b,0); }
void msb0_d(U32 opcode) { if (R1) MSB0(d,0); }
void msb0_q(U32 opcode) { if (R1) MSB0(q,0); }
void msb0_o(U32 opcode) { if (R1) MSB0(o,0); }
void smsb0_b(U32 opcode) { if (R1) MSB1(b,1); }
void smsb0_d(U32 opcode) { if (R1) MSB1(d,1); }
void smsb0_q(U32 opcode) { if (R1) MSB1(q,1); }
void smsb0_o(U32 opcode) { if (R1) MSB1(o,1); }

/* XXX: verify semantics */
#define CMPG(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = -(r(R2).C(sz,i) > r(R3).C(sz,i)); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void cmpg_b(U32 opcode) { if (R1) CMPG(b,0); }
void cmpg_d(U32 opcode) { if (R1) CMPG(d,0); }
void cmpg_q(U32 opcode) { if (R1) CMPG(q,0); }
void cmpg_o(U32 opcode) { if (R1) CMPG(o,0); }
void scmpg_b(U32 opcode) { if (R1) CMPG(b,1); }
void scmpg_d(U32 opcode) { if (R1) CMPG(d,1); }
void scmpg_q(U32 opcode) { if (R1) CMPG(q,1); }
void scmpg_o(U32 opcode) { if (R1) CMPG(o,1); }

#define CMPLE(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = -(r(R2).C(sz,i) <= r(R3).C(sz,i)); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void cmple_b(U32 opcode) { if (R1) CMPLE(b,0); }
void cmple_d(U32 opcode) { if (R1) CMPLE(d,0); }
void cmple_q(U32 opcode) { if (R1) CMPLE(q,0); }
void cmple_o(U32 opcode) { if (R1) CMPLE(o,0); }
void scmple_b(U32 opcode) { if (R1) CMPLE(b,1); }
void scmple_d(U32 opcode) { if (R1) CMPLE(d,1); }
void scmple_q(U32 opcode) { if (R1) CMPLE(q,1); }
void scmple_o(U32 opcode) { if (R1) CMPLE(o,1); }

#define CMPGI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = -(r(R2).C(sz,i) > UIMM8); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void cmpgi_b(U32 opcode) { if (R1) CMPGI(b,0); }
void cmpgi_d(U32 opcode) { if (R1) CMPGI(d,0); }
void cmpgi_q(U32 opcode) { if (R1) CMPGI(q,0); }
void cmpgi_o(U32 opcode) { if (R1) CMPGI(o,0); }
void scmpgi_b(U32 opcode) { if (R1) CMPGI(b,1); }
void scmpgi_d(U32 opcode) { if (R1) CMPGI(d,1); }
void scmpgi_q(U32 opcode) { if (R1) CMPGI(q,1); }
void scmpgi_o(U32 opcode) { if (R1) CMPGI(o,1); }

#define CMPLEI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = -(r(R2).C(sz,i) <= UIMM8); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void cmplei_b(U32 opcode) { if (R1) CMPLEI(b,0); }
void cmplei_d(U32 opcode) { if (R1) CMPLEI(d,0); }
void cmplei_q(U32 opcode) { if (R1) CMPLEI(q,0); }
void cmplei_o(U32 opcode) { if (R1) CMPLEI(o,0); }
void scmplei_b(U32 opcode) { if (R1) CMPLEI(b,1); }
void scmplei_d(U32 opcode) { if (R1) CMPLEI(d,1); }
void scmplei_q(U32 opcode) { if (R1) CMPLEI(q,1); }
void scmplei_o(U32 opcode) { if (R1) CMPLEI(o,1); }

/* XXX: verify semantics */
#define CMPGS(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = -(r(R2).SC(sz,i) > r(R3).SC(sz,i)); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void cmpgs_b(U32 opcode) { if (R1) CMPGS(b,0); }
void cmpgs_d(U32 opcode) { if (R1) CMPGS(d,0); }
void cmpgs_q(U32 opcode) { if (R1) CMPGS(q,0); }
void cmpgs_o(U32 opcode) { if (R1) CMPGS(o,0); }
void scmpgs_b(U32 opcode) { if (R1) CMPGS(b,1); }
void scmpgs_d(U32 opcode) { if (R1) CMPGS(d,1); }
void scmpgs_q(U32 opcode) { if (R1) CMPGS(q,1); }
void scmpgs_o(U32 opcode) { if (R1) CMPGS(o,1); }

#define CMPLES(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = -(r(R2).SC(sz,i) <= r(R3).SC(sz,i)); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void cmples_b(U32 opcode) { if (R1) CMPLES(b,0); }
void cmples_d(U32 opcode) { if (R1) CMPLES(d,0); }
void cmples_q(U32 opcode) { if (R1) CMPLES(q,0); }
void cmples_o(U32 opcode) { if (R1) CMPLES(o,0); }
void scmples_b(U32 opcode) { if (R1) CMPLES(b,1); }
void scmples_d(U32 opcode) { if (R1) CMPLES(d,1); }
void scmples_q(U32 opcode) { if (R1) CMPLES(q,1); }
void scmples_o(U32 opcode) { if (R1) CMPLES(o,1); }

#define CMPGSI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = -(r(R2).SC(sz,i) > SIMM8); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void cmpgsi_b(U32 opcode) { if (R1) CMPGSI(b,0); }
void cmpgsi_d(U32 opcode) { if (R1) CMPGSI(d,0); }
void cmpgsi_q(U32 opcode) { if (R1) CMPGSI(q,0); }
void cmpgsi_o(U32 opcode) { if (R1) CMPGSI(o,0); }
void scmpgsi_b(U32 opcode) { if (R1) CMPGSI(b,1); }
void scmpgsi_d(U32 opcode) { if (R1) CMPGSI(d,1); }
void scmpgsi_q(U32 opcode) { if (R1) CMPGSI(q,1); }
void scmpgsi_o(U32 opcode) { if (R1) CMPGSI(o,1); }

#define CMPLESI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = -(r(R2).SC(sz,i) <= SIMM8); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void cmplesi_b(U32 opcode) { if (R1) CMPLESI(b,0); }
void cmplesi_d(U32 opcode) { if (R1) CMPLESI(d,0); }
void cmplesi_q(U32 opcode) { if (R1) CMPLESI(q,0); }
void cmplesi_o(U32 opcode) { if (R1) CMPLESI(o,0); }
void scmplesi_b(U32 opcode) { if (R1) CMPLESI(b,1); }
void scmplesi_d(U32 opcode) { if (R1) CMPLESI(d,1); }
void scmplesi_q(U32 opcode) { if (R1) CMPLESI(q,1); }
void scmplesi_o(U32 opcode) { if (R1) CMPLESI(o,1); }

#define ABS(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).SC(sz,i) < 0) \
				r1.C(sz,i) = -r(R2).C(sz,i); \
			else \
				r1.C(sz,i) = r(R2).C(sz,i); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void abs_b(U32 opcode) { if (R1) ABS(b,0); }
void abs_d(U32 opcode) { if (R1) ABS(d,0); }
void abs_q(U32 opcode) { if (R1) ABS(q,0); }
void abs_o(U32 opcode) { if (R1) ABS(o,0); }
void sabs_b(U32 opcode) { if (R1) ABS(b,1); }
void sabs_d(U32 opcode) { if (R1) ABS(d,1); }
void sabs_q(U32 opcode) { if (R1) ABS(q,1); }
void sabs_o(U32 opcode) { if (R1) ABS(o,1); }

#define NABS(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).SC(sz,i) < 0) \
				r1.C(sz,i) = r(R2).C(sz,i); \
			else \
				r1.C(sz,i) = -r(R2).C(sz,i); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void nabs_b(U32 opcode) { if (R1) NABS(b,0); }
void nabs_d(U32 opcode) { if (R1) NABS(d,0); }
void nabs_q(U32 opcode) { if (R1) NABS(q,0); }
void nabs_o(U32 opcode) { if (R1) NABS(o,0); }
void snabs_b(U32 opcode) { if (R1) NABS(b,1); }
void snabs_d(U32 opcode) { if (R1) NABS(d,1); }
void snabs_q(U32 opcode) { if (R1) NABS(q,1); }
void snabs_o(U32 opcode) { if (R1) NABS(o,1); }

#define MAX(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).C(sz,i) > r(R3).C(sz,i)) \
				r1.C(sz,i) = r(R2).C(sz,i); \
			else \
				r1.C(sz,i) = r(R3).C(sz,i); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void max_b(U32 opcode) { if (R1) MAX(b,0); }
void max_d(U32 opcode) { if (R1) MAX(d,0); }
void max_q(U32 opcode) { if (R1) MAX(q,0); }
void max_o(U32 opcode) { if (R1) MAX(o,0); }
void smax_b(U32 opcode) { if (R1) MAX(b,1); }
void smax_d(U32 opcode) { if (R1) MAX(d,1); }
void smax_q(U32 opcode) { if (R1) MAX(q,1); }
void smax_o(U32 opcode) { if (R1) MAX(o,1); }

#define MIN(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).C(sz,i) < r(R3).C(sz,i)) \
				r1.C(sz,i) = r(R2).C(sz,i); \
			else \
				r1.C(sz,i) = r(R3).C(sz,i); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void min_b(U32 opcode) { if (R1) MIN(b,0); }
void min_d(U32 opcode) { if (R1) MIN(d,0); }
void min_q(U32 opcode) { if (R1) MIN(q,0); }
void min_o(U32 opcode) { if (R1) MIN(o,0); }
void smin_b(U32 opcode) { if (R1) MIN(b,1); }
void smin_d(U32 opcode) { if (R1) MIN(d,1); }
void smin_q(U32 opcode) { if (R1) MIN(q,1); }
void smin_o(U32 opcode) { if (R1) MIN(o,1); }

#define MAXI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).C(sz,i) > UIMM8) \
				r1.C(sz,i) = r(R2).C(sz,i); \
			else \
				r1.C(sz,i) = UIMM8; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void maxi_b(U32 opcode) { if (R1) MAXI(b,0); }
void maxi_d(U32 opcode) { if (R1) MAXI(d,0); }
void maxi_q(U32 opcode) { if (R1) MAXI(q,0); }
void maxi_o(U32 opcode) { if (R1) MAXI(o,0); }
void smaxi_b(U32 opcode) { if (R1) MAXI(b,1); }
void smaxi_d(U32 opcode) { if (R1) MAXI(d,1); }
void smaxi_q(U32 opcode) { if (R1) MAXI(q,1); }
void smaxi_o(U32 opcode) { if (R1) MAXI(o,1); }

#define MINI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).C(sz,i) < UIMM8) \
				r1.C(sz,i) = r(R2).C(sz,i); \
			else \
				r1.C(sz,i) = UIMM8; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void mini_b(U32 opcode) { if (R1) MINI(b,0); }
void mini_d(U32 opcode) { if (R1) MINI(d,0); }
void mini_q(U32 opcode) { if (R1) MINI(q,0); }
void mini_o(U32 opcode) { if (R1) MINI(o,0); }
void smini_b(U32 opcode) { if (R1) MINI(b,1); }
void smini_d(U32 opcode) { if (R1) MINI(d,1); }
void smini_q(U32 opcode) { if (R1) MINI(q,1); }
void smini_o(U32 opcode) { if (R1) MINI(o,1); }

/* Note: legacy name is `sort' */
#define MINMAX(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).C(sz,i) < r(R3).C(sz,i)) { \
				r1.C(sz,i) = r(R2).C(sz,i); \
				ra.C(sz,i) = r(R3).C(sz,i); \
			} \
			else { \
				r1.C(sz,i) = r(R3).C(sz,i); \
				ra.C(sz,i) = r(R2).C(sz,i); \
			} \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void minmax_b(U32 opcode) { MINMAX(b,0); }
void minmax_d(U32 opcode) { MINMAX(d,0); }
void minmax_q(U32 opcode) { MINMAX(q,0); }
void minmax_o(U32 opcode) { MINMAX(o,0); }
void sminmax_b(U32 opcode) { MINMAX(b,1); }
void sminmax_d(U32 opcode) { MINMAX(d,1); }
void sminmax_q(U32 opcode) { MINMAX(q,1); }
void sminmax_o(U32 opcode) { MINMAX(o,1); }

/* XXX: undocumented */
#define MINMAXI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).C(sz,i) < UIMM8) { \
				r1.C(sz,i) = r(R2).C(sz,i); \
				ra.C(sz,i) = UIMM8; \
			} \
			else { \
				r1.C(sz,i) = UIMM8; \
				ra.C(sz,i) = r(R2).C(sz,i); \
			} \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void minmaxi_b(U32 opcode) { MINMAXI(b,0); }
void minmaxi_d(U32 opcode) { MINMAXI(d,0); }
void minmaxi_q(U32 opcode) { MINMAXI(q,0); }
void minmaxi_o(U32 opcode) { MINMAXI(o,0); }
void sminmaxi_b(U32 opcode) { MINMAXI(b,1); }
void sminmaxi_d(U32 opcode) { MINMAXI(d,1); }
void sminmaxi_q(U32 opcode) { MINMAXI(q,1); }
void sminmaxi_o(U32 opcode) { MINMAXI(o,1); }

#define MAXS(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).SC(sz,i) > r(R3).SC(sz,i)) \
				r1.C(sz,i) = r(R2).C(sz,i); \
			else \
				r1.C(sz,i) = r(R3).C(sz,i); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void maxs_b(U32 opcode) { if (R1) MAXS(b,0); }
void maxs_d(U32 opcode) { if (R1) MAXS(d,0); }
void maxs_q(U32 opcode) { if (R1) MAXS(q,0); }
void maxs_o(U32 opcode) { if (R1) MAXS(o,0); }
void smaxs_b(U32 opcode) { if (R1) MAXS(b,1); }
void smaxs_d(U32 opcode) { if (R1) MAXS(d,1); }
void smaxs_q(U32 opcode) { if (R1) MAXS(q,1); }
void smaxs_o(U32 opcode) { if (R1) MAXS(o,1); }

#define MINS(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).SC(sz,i) < r(R3).SC(sz,i)) \
				r1.C(sz,i) = r(R2).C(sz,i); \
			else \
				r1.C(sz,i) = r(R3).C(sz,i); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void mins_b(U32 opcode) { if (R1) MINS(b,0); }
void mins_d(U32 opcode) { if (R1) MINS(d,0); }
void mins_q(U32 opcode) { if (R1) MINS(q,0); }
void mins_o(U32 opcode) { if (R1) MINS(o,0); }
void smins_b(U32 opcode) { if (R1) MINS(b,1); }
void smins_d(U32 opcode) { if (R1) MINS(d,1); }
void smins_q(U32 opcode) { if (R1) MINS(q,1); }
void smins_o(U32 opcode) { if (R1) MINS(o,1); }

#define MAXSI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).SC(sz,i) > SIMM8) \
				r1.C(sz,i) = r(R2).C(sz,i); \
			else \
				r1.C(sz,i) = SIMM8; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void maxsi_b(U32 opcode) { if (R1) MAXSI(b,0); }
void maxsi_d(U32 opcode) { if (R1) MAXSI(d,0); }
void maxsi_q(U32 opcode) { if (R1) MAXSI(q,0); }
void maxsi_o(U32 opcode) { if (R1) MAXSI(o,0); }
void smaxsi_b(U32 opcode) { if (R1) MAXSI(b,1); }
void smaxsi_d(U32 opcode) { if (R1) MAXSI(d,1); }
void smaxsi_q(U32 opcode) { if (R1) MAXSI(q,1); }
void smaxsi_o(U32 opcode) { if (R1) MAXSI(o,1); }

#define MINSI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).SC(sz,i) < SIMM8) \
				r1.C(sz,i) = r(R2).C(sz,i); \
			else \
				r1.C(sz,i) = SIMM8; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void minsi_b(U32 opcode) { if (R1) MINSI(b,0); }
void minsi_d(U32 opcode) { if (R1) MINSI(d,0); }
void minsi_q(U32 opcode) { if (R1) MINSI(q,0); }
void minsi_o(U32 opcode) { if (R1) MINSI(o,0); }
void sminsi_b(U32 opcode) { if (R1) MINSI(b,1); }
void sminsi_d(U32 opcode) { if (R1) MINSI(d,1); }
void sminsi_q(U32 opcode) { if (R1) MINSI(q,1); }
void sminsi_o(U32 opcode) { if (R1) MINSI(o,1); }

/* Note: legacy name is `sorts' */
#define MINMAXS(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).SC(sz,i) < r(R3).SC(sz,i)) { \
				r1.C(sz,i) = r(R2).C(sz,i); \
				ra.C(sz,i) = r(R3).C(sz,i); \
			} \
			else { \
				r1.C(sz,i) = r(R3).C(sz,i); \
				ra.C(sz,i) = r(R2).C(sz,i); \
			} \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void minmaxs_b(U32 opcode) { MINMAXS(b,0); }
void minmaxs_d(U32 opcode) { MINMAXS(d,0); }
void minmaxs_q(U32 opcode) { MINMAXS(q,0); }
void minmaxs_o(U32 opcode) { MINMAXS(o,0); }
void sminmaxs_b(U32 opcode) { MINMAXS(b,1); }
void sminmaxs_d(U32 opcode) { MINMAXS(d,1); }
void sminmaxs_q(U32 opcode) { MINMAXS(q,1); }
void sminmaxs_o(U32 opcode) { MINMAXS(o,1); }

/* XXX: undocumented */
#define MINMAXSI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).SC(sz,i) < SIMM8) { \
				r1.C(sz,i) = r(R2).C(sz,i); \
				ra.C(sz,i) = SIMM8; \
			} \
			else { \
				r1.C(sz,i) = SIMM8; \
				ra.C(sz,i) = r(R2).C(sz,i); \
			} \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void minmaxsi_b(U32 opcode) { MINMAXSI(b,0); }
void minmaxsi_d(U32 opcode) { MINMAXSI(d,0); }
void minmaxsi_q(U32 opcode) { MINMAXSI(q,0); }
void minmaxsi_o(U32 opcode) { MINMAXSI(o,0); }
void sminmaxsi_b(U32 opcode) { MINMAXSI(b,1); }
void sminmaxsi_d(U32 opcode) { MINMAXSI(d,1); }
void sminmaxsi_q(U32 opcode) { MINMAXSI(q,1); }
void sminmaxsi_o(U32 opcode) { MINMAXSI(o,1); }

/* XXX: LNS operations are unimplemented */

#define SHIFTL(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned n = r(R3).C(sz,i) % BITS(sz); \
			r1.C(sz,i) = r(R2).C(sz,i) << n; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void shiftl_b(U32 opcode) { if (R1) SHIFTL(b,0); }
void shiftl_d(U32 opcode) { if (R1) SHIFTL(d,0); }
void shiftl_q(U32 opcode) { if (R1) SHIFTL(q,0); }
void shiftl_o(U32 opcode) { if (R1) SHIFTL(o,0); }
void sshiftl_b(U32 opcode) { if (R1) SHIFTL(b,1); }
void sshiftl_d(U32 opcode) { if (R1) SHIFTL(d,1); }
void sshiftl_q(U32 opcode) { if (R1) SHIFTL(q,1); }
void sshiftl_o(U32 opcode) { if (R1) SHIFTL(o,1); }

#define SHIFTLH(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = r(R3).C(sz,0) % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) << n; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void sshiftlh_b(U32 opcode) { if (R1) SHIFTLH(b,1); }
void sshiftlh_d(U32 opcode) { if (R1) SHIFTLH(d,1); }
void sshiftlh_q(U32 opcode) { if (R1) SHIFTLH(q,1); }
void sshiftlh_o(U32 opcode) { if (R1) SHIFTLH(o,1); }

#define DSHIFTL(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned n = r(R3).C(sz,i) % BITS(sz); \
			r1.C(sz,i) = r(R2).C(sz,i) << n; \
			ra.C(sz,i) = n ? r(R2).C(sz,i) >> (BITS(sz) - n) : 0; \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void dshiftl_b(U32 opcode) { DSHIFTL(b,0); }
void dshiftl_d(U32 opcode) { DSHIFTL(d,0); }
void dshiftl_q(U32 opcode) { DSHIFTL(q,0); }
void dshiftl_o(U32 opcode) { DSHIFTL(o,0); }
void sdshiftl_b(U32 opcode) { DSHIFTL(b,1); }
void sdshiftl_d(U32 opcode) { DSHIFTL(d,1); }
void sdshiftl_q(U32 opcode) { DSHIFTL(q,1); }
void sdshiftl_o(U32 opcode) { DSHIFTL(o,1); }

#define DSHIFTLH(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		const unsigned n = r(R3).C(sz,0) % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) << n; \
			ra.C(sz,i) = n ? r(R2).C(sz,i) >> (BITS(sz) - n) : 0; \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void sdshiftlh_b(U32 opcode) { DSHIFTLH(b,1); }
void sdshiftlh_d(U32 opcode) { DSHIFTLH(d,1); }
void sdshiftlh_q(U32 opcode) { DSHIFTLH(q,1); }
void sdshiftlh_o(U32 opcode) { DSHIFTLH(o,1); }

#define SHIFTR(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned n = r(R3).C(sz,i) % BITS(sz); \
			r1.C(sz,i) = r(R2).C(sz,i) >> n; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void shiftr_b(U32 opcode) { if (R1) SHIFTR(b,0); }
void shiftr_d(U32 opcode) { if (R1) SHIFTR(d,0); }
void shiftr_q(U32 opcode) { if (R1) SHIFTR(q,0); }
void shiftr_o(U32 opcode) { if (R1) SHIFTR(o,0); }
void sshiftr_b(U32 opcode) { if (R1) SHIFTR(b,1); }
void sshiftr_d(U32 opcode) { if (R1) SHIFTR(d,1); }
void sshiftr_q(U32 opcode) { if (R1) SHIFTR(q,1); }
void sshiftr_o(U32 opcode) { if (R1) SHIFTR(o,1); }

#define SHIFTRH(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = r(R3).C(sz,0) % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) >> n; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void sshiftrh_b(U32 opcode) { if (R1) SHIFTRH(b,1); }
void sshiftrh_d(U32 opcode) { if (R1) SHIFTRH(d,1); }
void sshiftrh_q(U32 opcode) { if (R1) SHIFTRH(q,1); }
void sshiftrh_o(U32 opcode) { if (R1) SHIFTRH(o,1); }

#define DSHIFTR(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned n = r(R3).C(sz,i) % BITS(sz); \
			r1.C(sz,i) = r(R2).C(sz,i) >> n; \
			ra.C(sz,i) = n ? r(R2).C(sz,i) << (BITS(sz) - n) : 0; \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void dshiftr_b(U32 opcode) { DSHIFTR(b,0); }
void dshiftr_d(U32 opcode) { DSHIFTR(d,0); }
void dshiftr_q(U32 opcode) { DSHIFTR(q,0); }
void dshiftr_o(U32 opcode) { DSHIFTR(o,0); }
void sdshiftr_b(U32 opcode) { DSHIFTR(b,1); }
void sdshiftr_d(U32 opcode) { DSHIFTR(d,1); }
void sdshiftr_q(U32 opcode) { DSHIFTR(q,1); }
void sdshiftr_o(U32 opcode) { DSHIFTR(o,1); }

#define DSHIFTRH(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		const unsigned n = r(R3).C(sz,0) % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) >> n; \
			ra.C(sz,i) = n ? r(R2).C(sz,i) << (BITS(sz) - n) : 0; \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void sdshiftrh_b(U32 opcode) { DSHIFTRH(b,1); }
void sdshiftrh_d(U32 opcode) { DSHIFTRH(d,1); }
void sdshiftrh_q(U32 opcode) { DSHIFTRH(q,1); }
void sdshiftrh_o(U32 opcode) { DSHIFTRH(o,1); }

#define SHIFTLI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = UIMM8 % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) << n; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void shiftli_b(U32 opcode) { if (R1) SHIFTLI(b,0); }
void shiftli_d(U32 opcode) { if (R1) SHIFTLI(d,0); }
void shiftli_q(U32 opcode) { if (R1) SHIFTLI(q,0); }
void shiftli_o(U32 opcode) { if (R1) SHIFTLI(o,0); }
void sshiftli_b(U32 opcode) { if (R1) SHIFTLI(b,1); }
void sshiftli_d(U32 opcode) { if (R1) SHIFTLI(d,1); }
void sshiftli_q(U32 opcode) { if (R1) SHIFTLI(q,1); }
void sshiftli_o(U32 opcode) { if (R1) SHIFTLI(o,1); }

#define DSHIFTLI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		const unsigned n = UIMM8 % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) << n; \
			ra.C(sz,i) = n ? r(R2).C(sz,i) >> (BITS(sz) - n) : 0; \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void dshiftli_b(U32 opcode) { DSHIFTLI(b,0); }
void dshiftli_d(U32 opcode) { DSHIFTLI(d,0); }
void dshiftli_q(U32 opcode) { DSHIFTLI(q,0); }
void dshiftli_o(U32 opcode) { DSHIFTLI(o,0); }
void sdshiftli_b(U32 opcode) { DSHIFTLI(b,1); }
void sdshiftli_d(U32 opcode) { DSHIFTLI(d,1); }
void sdshiftli_q(U32 opcode) { DSHIFTLI(q,1); }
void sdshiftli_o(U32 opcode) { DSHIFTLI(o,1); }

#define SHIFTRI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = UIMM8 % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) >> n; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void shiftri_b(U32 opcode) { if (R1) SHIFTRI(b,0); }
void shiftri_d(U32 opcode) { if (R1) SHIFTRI(d,0); }
void shiftri_q(U32 opcode) { if (R1) SHIFTRI(q,0); }
void shiftri_o(U32 opcode) { if (R1) SHIFTRI(o,0); }
void sshiftri_b(U32 opcode) { if (R1) SHIFTRI(b,1); }
void sshiftri_d(U32 opcode) { if (R1) SHIFTRI(d,1); }
void sshiftri_q(U32 opcode) { if (R1) SHIFTRI(q,1); }
void sshiftri_o(U32 opcode) { if (R1) SHIFTRI(o,1); }

#define DSHIFTRI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		const unsigned n = UIMM8 % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) >> n; \
			ra.C(sz,i) = n ? r(R2).C(sz,i) << (BITS(sz) - n) : 0; \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void dshiftri_b(U32 opcode) { DSHIFTRI(b,0); }
void dshiftri_d(U32 opcode) { DSHIFTRI(d,0); }
void dshiftri_q(U32 opcode) { DSHIFTRI(q,0); }
void dshiftri_o(U32 opcode) { DSHIFTRI(o,0); }
void sdshiftri_b(U32 opcode) { DSHIFTRI(b,1); }
void sdshiftri_d(U32 opcode) { DSHIFTRI(d,1); }
void sdshiftri_q(U32 opcode) { DSHIFTRI(q,1); }
void sdshiftri_o(U32 opcode) { DSHIFTRI(o,1); }

#define SHIFTRA(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned n = r(R3).C(sz,i) % BITS(sz); \
			r1.C(sz,i) = r(R2).SC(sz,i) >> n; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void shiftra_b(U32 opcode) { if (R1) SHIFTRA(b,0); }
void shiftra_d(U32 opcode) { if (R1) SHIFTRA(d,0); }
void shiftra_q(U32 opcode) { if (R1) SHIFTRA(q,0); }
void shiftra_o(U32 opcode) { if (R1) SHIFTRA(o,0); }
void sshiftra_b(U32 opcode) { if (R1) SHIFTRA(b,1); }
void sshiftra_d(U32 opcode) { if (R1) SHIFTRA(d,1); }
void sshiftra_q(U32 opcode) { if (R1) SHIFTRA(q,1); }
void sshiftra_o(U32 opcode) { if (R1) SHIFTRA(o,1); }

#define SHIFTRAH(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = r(R3).C(sz,0) % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).SC(sz,i) >> n; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void sshiftrah_b(U32 opcode) { if (R1) SHIFTRAH(b,1); }
void sshiftrah_d(U32 opcode) { if (R1) SHIFTRAH(d,1); }
void sshiftrah_q(U32 opcode) { if (R1) SHIFTRAH(q,1); }
void sshiftrah_o(U32 opcode) { if (R1) SHIFTRAH(o,1); }

#define DSHIFTRA(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned n = r(R3).C(sz,i) % BITS(sz); \
			r1.C(sz,i) = r(R2).SC(sz,i) >> n; \
			ra.C(sz,i) = n ? r(R2).C(sz,i) << (BITS(sz) - n) : 0; \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void dshiftra_b(U32 opcode) { DSHIFTRA(b,0); }
void dshiftra_d(U32 opcode) { DSHIFTRA(d,0); }
void dshiftra_q(U32 opcode) { DSHIFTRA(q,0); }
void dshiftra_o(U32 opcode) { DSHIFTRA(o,0); }
void sdshiftra_b(U32 opcode) { DSHIFTRA(b,1); }
void sdshiftra_d(U32 opcode) { DSHIFTRA(d,1); }
void sdshiftra_q(U32 opcode) { DSHIFTRA(q,1); }
void sdshiftra_o(U32 opcode) { DSHIFTRA(o,1); }

#define DSHIFTRAH(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		const unsigned n = r(R3).C(sz,0) % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).SC(sz,i) >> n; \
			ra.C(sz,i) = n ? r(R2).C(sz,i) << (BITS(sz) - n) : 0; \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void sdshiftrah_b(U32 opcode) { DSHIFTRAH(b,1); }
void sdshiftrah_d(U32 opcode) { DSHIFTRAH(d,1); }
void sdshiftrah_q(U32 opcode) { DSHIFTRAH(q,1); }
void sdshiftrah_o(U32 opcode) { DSHIFTRAH(o,1); }

#define SHIFTRAI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = UIMM8 % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).SC(sz,i) >> n; \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void shiftrai_b(U32 opcode) { if (R1) SHIFTRAI(b,0); }
void shiftrai_d(U32 opcode) { if (R1) SHIFTRAI(d,0); }
void shiftrai_q(U32 opcode) { if (R1) SHIFTRAI(q,0); }
void shiftrai_o(U32 opcode) { if (R1) SHIFTRAI(o,0); }
void sshiftrai_b(U32 opcode) { if (R1) SHIFTRAI(b,1); }
void sshiftrai_d(U32 opcode) { if (R1) SHIFTRAI(d,1); }
void sshiftrai_q(U32 opcode) { if (R1) SHIFTRAI(q,1); }
void sshiftrai_o(U32 opcode) { if (R1) SHIFTRAI(o,1); }

#define DSHIFTRAI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		const unsigned n = UIMM8 % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).SC(sz,i) >> n; \
			ra.C(sz,i) = n ? r(R2).C(sz,i) << (BITS(sz) - n) : 0; \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void dshiftrai_b(U32 opcode) { DSHIFTRAI(b,0); }
void dshiftrai_d(U32 opcode) { DSHIFTRAI(d,0); }
void dshiftrai_q(U32 opcode) { DSHIFTRAI(q,0); }
void dshiftrai_o(U32 opcode) { DSHIFTRAI(o,0); }
void sdshiftrai_b(U32 opcode) { DSHIFTRAI(b,1); }
void sdshiftrai_d(U32 opcode) { DSHIFTRAI(d,1); }
void sdshiftrai_q(U32 opcode) { DSHIFTRAI(q,1); }
void sdshiftrai_o(U32 opcode) { DSHIFTRAI(o,1); }

#define ROTL(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned n = r(R3).C(sz,i) % BITS(sz); \
			r1.C(sz,i) = n == 0 ? r(R2).C(sz,i) : \
				(r(R2).C(sz,i) << n) | (r(R2).C(sz,i) >> (BITS(sz) - n)); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void rotl_b(U32 opcode) { if (R1) ROTL(b,0); }
void rotl_d(U32 opcode) { if (R1) ROTL(d,0); }
void rotl_q(U32 opcode) { if (R1) ROTL(q,0); }
void rotl_o(U32 opcode) { if (R1) ROTL(o,0); }
void srotl_b(U32 opcode) { if (R1) ROTL(b,1); }
void srotl_d(U32 opcode) { if (R1) ROTL(d,1); }
void srotl_q(U32 opcode) { if (R1) ROTL(q,1); }
void srotl_o(U32 opcode) { if (R1) ROTL(o,1); }

#define ROTLH(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = r(R3).C(sz,0) % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = n == 0 ? r(R2).C(sz,i) : \
				(r(R2).C(sz,i) << n) | (r(R2).C(sz,i) >> (BITS(sz) - n)); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void srotlh_b(U32 opcode) { if (R1) ROTLH(b,1); }
void srotlh_d(U32 opcode) { if (R1) ROTLH(d,1); }
void srotlh_q(U32 opcode) { if (R1) ROTLH(q,1); }
void srotlh_o(U32 opcode) { if (R1) ROTLH(o,1); }

#define ROTR(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned n = r(R3).C(sz,i) % BITS(sz); \
			r1.C(sz,i) = n == 0 ? r(R2).C(sz,i) : \
				(r(R2).C(sz,i) >> n) | (r(R2).C(sz,i) << (BITS(sz) - n)); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void rotr_b(U32 opcode) { if (R1) ROTR(b,0); }
void rotr_d(U32 opcode) { if (R1) ROTR(d,0); }
void rotr_q(U32 opcode) { if (R1) ROTR(q,0); }
void rotr_o(U32 opcode) { if (R1) ROTR(o,0); }
void srotr_b(U32 opcode) { if (R1) ROTR(b,1); }
void srotr_d(U32 opcode) { if (R1) ROTR(d,1); }
void srotr_q(U32 opcode) { if (R1) ROTR(q,1); }
void srotr_o(U32 opcode) { if (R1) ROTR(o,1); }

#define ROTRH(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = r(R3).C(sz,0) % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = n == 0 ? r(R2).C(sz,i) : \
				(r(R2).C(sz,i) >> n) | (r(R2).C(sz,i) << (BITS(sz) - n)); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void srotrh_b(U32 opcode) { if (R1) ROTRH(b,1); }
void srotrh_d(U32 opcode) { if (R1) ROTRH(d,1); }
void srotrh_q(U32 opcode) { if (R1) ROTRH(q,1); }
void srotrh_o(U32 opcode) { if (R1) ROTRH(o,1); }

#define ROTLI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = UIMM8 % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = n == 0 ? r(R2).C(sz,i) : \
				(r(R2).C(sz,i) << n) | (r(R2).C(sz,i) >> (BITS(sz) - n)); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void rotli_b(U32 opcode) { if (R1) ROTLI(b,0); }
void rotli_d(U32 opcode) { if (R1) ROTLI(d,0); }
void rotli_q(U32 opcode) { if (R1) ROTLI(q,0); }
void rotli_o(U32 opcode) { if (R1) ROTLI(o,0); }
void srotli_b(U32 opcode) { if (R1) ROTLI(b,1); }
void srotli_d(U32 opcode) { if (R1) ROTLI(d,1); }
void srotli_q(U32 opcode) { if (R1) ROTLI(q,1); }
void srotli_o(U32 opcode) { if (R1) ROTLI(o,1); }

#define ROTRI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = UIMM8 % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = n == 0 ? r(R2).C(sz,i) : \
				(r(R2).C(sz,i) >> n) | (r(R2).C(sz,i) << (BITS(sz) - n)); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void rotri_b(U32 opcode) { if (R1) ROTRI(b,0); }
void rotri_d(U32 opcode) { if (R1) ROTRI(d,0); }
void rotri_q(U32 opcode) { if (R1) ROTRI(q,0); }
void rotri_o(U32 opcode) { if (R1) ROTRI(o,0); }
void srotri_b(U32 opcode) { if (R1) ROTRI(b,1); }
void srotri_d(U32 opcode) { if (R1) ROTRI(d,1); }
void srotri_q(U32 opcode) { if (R1) ROTRI(q,1); }
void srotri_o(U32 opcode) { if (R1) ROTRI(o,1); }

#define BTST(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned n = r(R3).C(sz,i) % BITS(sz); \
			r1.C(sz,i) = r(R2).C(sz,i) & ((UT(sz))1 << n); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void btst_b(U32 opcode) { if (R1) BTST(b,0); }
void btst_d(U32 opcode) { if (R1) BTST(d,0); }
void btst_q(U32 opcode) { if (R1) BTST(q,0); }
void btst_o(U32 opcode) { if (R1) BTST(o,0); }
void sbtst_b(U32 opcode) { if (R1) BTST(b,1); }
void sbtst_d(U32 opcode) { if (R1) BTST(d,1); }
void sbtst_q(U32 opcode) { if (R1) BTST(q,1); }
void sbtst_o(U32 opcode) { if (R1) BTST(o,1); }

#if 0
/* XXX: undocumented */
#define BTSTH(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = r(R3).C(sz,0) % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) & ((UT(sz))1 << n); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void sbtsth_b(U32 opcode) { if (R1) BTSTH(b,1); }
void sbtsth_d(U32 opcode) { if (R1) BTSTH(d,1); }
void sbtsth_q(U32 opcode) { if (R1) BTSTH(q,1); }
void sbtsth_o(U32 opcode) { if (R1) BTSTH(o,1); }
#endif

#define BCLR(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned n = r(R3).C(sz,i) % BITS(sz); \
			r1.C(sz,i) = r(R2).C(sz,i) & ~((UT(sz))1 << n); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void bclr_b(U32 opcode) { if (R1) BCLR(b,0); }
void bclr_d(U32 opcode) { if (R1) BCLR(d,0); }
void bclr_q(U32 opcode) { if (R1) BCLR(q,0); }
void bclr_o(U32 opcode) { if (R1) BCLR(o,0); }
void sbclr_b(U32 opcode) { if (R1) BCLR(b,1); }
void sbclr_d(U32 opcode) { if (R1) BCLR(d,1); }
void sbclr_q(U32 opcode) { if (R1) BCLR(q,1); }
void sbclr_o(U32 opcode) { if (R1) BCLR(o,1); }

#if 0
/* XXX: undocumented */
#define BCLRH(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = r(R3).C(sz,0) % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) & ~((UT(sz))1 << n); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void sbclrh_b(U32 opcode) { if (R1) BCLRH(b,1); }
void sbclrh_d(U32 opcode) { if (R1) BCLRH(d,1); }
void sbclrh_q(U32 opcode) { if (R1) BCLRH(q,1); }
void sbclrh_o(U32 opcode) { if (R1) BCLRH(o,1); }
#endif

#define BCHG(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned n = r(R3).C(sz,i) % BITS(sz); \
			r1.C(sz,i) = r(R2).C(sz,i) ^ ((UT(sz))1 << n); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void bchg_b(U32 opcode) { if (R1) BCHG(b,0); }
void bchg_d(U32 opcode) { if (R1) BCHG(d,0); }
void bchg_q(U32 opcode) { if (R1) BCHG(q,0); }
void bchg_o(U32 opcode) { if (R1) BCHG(o,0); }
void sbchg_b(U32 opcode) { if (R1) BCHG(b,1); }
void sbchg_d(U32 opcode) { if (R1) BCHG(d,1); }
void sbchg_q(U32 opcode) { if (R1) BCHG(q,1); }
void sbchg_o(U32 opcode) { if (R1) BCHG(o,1); }

#if 0
/* XXX: undocumented */
#define BCHGH(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = r(R3).C(sz,0) % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) ^ ((UT(sz))1 << n); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void sbchgh_b(U32 opcode) { if (R1) BCHGH(b,1); }
void sbchgh_d(U32 opcode) { if (R1) BCHGH(d,1); }
void sbchgh_q(U32 opcode) { if (R1) BCHGH(q,1); }
void sbchgh_o(U32 opcode) { if (R1) BCHGH(o,1); }
#endif

#define BSET(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned n = r(R3).C(sz,i) % BITS(sz); \
			r1.C(sz,i) = r(R2).C(sz,i) | ((UT(sz))1 << n); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void bset_b(U32 opcode) { if (R1) BSET(b,0); }
void bset_d(U32 opcode) { if (R1) BSET(d,0); }
void bset_q(U32 opcode) { if (R1) BSET(q,0); }
void bset_o(U32 opcode) { if (R1) BSET(o,0); }
void sbset_b(U32 opcode) { if (R1) BSET(b,1); }
void sbset_d(U32 opcode) { if (R1) BSET(d,1); }
void sbset_q(U32 opcode) { if (R1) BSET(q,1); }
void sbset_o(U32 opcode) { if (R1) BSET(o,1); }

#if 0
/* XXX: undocumented */
#define BSETH(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = r(R3).C(sz,0) % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) | ((UT(sz))1 << n); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void sbseth_b(U32 opcode) { if (R1) BSETH(b,1); }
void sbseth_d(U32 opcode) { if (R1) BSETH(d,1); }
void sbseth_q(U32 opcode) { if (R1) BSETH(q,1); }
void sbseth_o(U32 opcode) { if (R1) BSETH(o,1); }
#endif

/* XXX: verify immediate size */
#define BTSTI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = UIMM8 % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) & ((UT(sz))1 << n); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void btsti_b(U32 opcode) { if (R1) BTSTI(b,0); }
void btsti_d(U32 opcode) { if (R1) BTSTI(d,0); }
void btsti_q(U32 opcode) { if (R1) BTSTI(q,0); }
void btsti_o(U32 opcode) { if (R1) BTSTI(o,0); }
void sbtsti_b(U32 opcode) { if (R1) BTSTI(b,1); }
void sbtsti_d(U32 opcode) { if (R1) BTSTI(d,1); }
void sbtsti_q(U32 opcode) { if (R1) BTSTI(q,1); }
void sbtsti_o(U32 opcode) { if (R1) BTSTI(o,1); }

/* XXX: verify immediate size */
#define BCLRI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = UIMM8 % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) & ~((UT(sz))1 << n); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void bclri_b(U32 opcode) { if (R1) BCLRI(b,0); }
void bclri_d(U32 opcode) { if (R1) BCLRI(d,0); }
void bclri_q(U32 opcode) { if (R1) BCLRI(q,0); }
void bclri_o(U32 opcode) { if (R1) BCLRI(o,0); }
void sbclri_b(U32 opcode) { if (R1) BCLRI(b,1); }
void sbclri_d(U32 opcode) { if (R1) BCLRI(d,1); }
void sbclri_q(U32 opcode) { if (R1) BCLRI(q,1); }
void sbclri_o(U32 opcode) { if (R1) BCLRI(o,1); }

/* XXX: verify immediate size */
#define BCHGI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = UIMM8 % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) ^ ((UT(sz))1 << n); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void bchgi_b(U32 opcode) { if (R1) BCHGI(b,0); }
void bchgi_d(U32 opcode) { if (R1) BCHGI(d,0); }
void bchgi_q(U32 opcode) { if (R1) BCHGI(q,0); }
void bchgi_o(U32 opcode) { if (R1) BCHGI(o,0); }
void sbchgi_b(U32 opcode) { if (R1) BCHGI(b,1); }
void sbchgi_d(U32 opcode) { if (R1) BCHGI(d,1); }
void sbchgi_q(U32 opcode) { if (R1) BCHGI(q,1); }
void sbchgi_o(U32 opcode) { if (R1) BCHGI(o,1); }

/* XXX: verify immediate size */
#define BSETI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = UIMM8 % BITS(sz); \
		for_all_chunks(i,sz) { \
			r1.C(sz,i) = r(R2).C(sz,i) | ((UT(sz))1 << n); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void bseti_b(U32 opcode) { if (R1) BSETI(b,0); }
void bseti_d(U32 opcode) { if (R1) BSETI(d,0); }
void bseti_q(U32 opcode) { if (R1) BSETI(q,0); }
void bseti_o(U32 opcode) { if (R1) BSETI(o,0); }
void sbseti_b(U32 opcode) { if (R1) BSETI(b,1); }
void sbseti_d(U32 opcode) { if (R1) BSETI(d,1); }
void sbseti_q(U32 opcode) { if (R1) BSETI(q,1); }
void sbseti_o(U32 opcode) { if (R1) BSETI(o,1); }

#define MIXL(sz) \
	do { \
		union reg res[2]; \
		unsigned i; \
		for_all_chunks(i,sz) { \
			res[0].C2(sz,2*i+0) = r(R2).C(sz,i); \
			res[0].C2(sz,2*i+1) = r(R3).C(sz,i); \
		} \
		r(R1) = res[HOST_BIG_ENDIAN != 0]; \
	} while (0)

void mixl_b(U32 opcode) { if (R1) MIXL(b); }
void mixl_d(U32 opcode) { if (R1) MIXL(d); }
void mixl_q(U32 opcode) { if (R1) MIXL(q); }
void mixl_o(U32 opcode) { if (R1) MIXL(o); }

#define MIXH(sz) \
	do { \
		union reg res[2]; \
		unsigned i; \
		for_all_chunks(i,sz) { \
			res[0].C2(sz,2*i+0) = r(R2).C(sz,i); \
			res[0].C2(sz,2*i+1) = r(R3).C(sz,i); \
		} \
		r(R1) = res[HOST_BIG_ENDIAN == 0]; \
	} while (0)

void mixh_b(U32 opcode) { if (R1) MIXH(b); }
void mixh_d(U32 opcode) { if (R1) MIXH(d); }
void mixh_q(U32 opcode) { if (R1) MIXH(q); }
void mixh_o(U32 opcode) { if (R1) MIXH(o); }

/* XXX: undocumented */
#define MIX(sz) \
	do { \
		union reg res[2]; \
		unsigned i; \
		for_all_chunks(i,sz) { \
			res[0].C2(sz,2*i+0) = r(R2).C(sz,i); \
			res[0].C2(sz,2*i+1) = r(R3).C(sz,i); \
		} \
		if (R1) r(R1) = res[HOST_BIG_ENDIAN != 0]; \
		if (RA) r(RA) = res[HOST_BIG_ENDIAN == 0]; \
	} while (0)

void mix_b(U32 opcode) { MIX(b); }
void mix_d(U32 opcode) { MIX(d); }
void mix_q(U32 opcode) { MIX(q); }
void mix_o(U32 opcode) { MIX(o); }

#define EXPANDL(sz) \
	do { \
		union reg res[2]; \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned j = (i % 2) ? CHUNKS(sz) - 1 + i : i; \
			res[0].C2(sz,j+0) = r(R2).C(sz,i); \
			res[0].C2(sz,j+1) = r(R3).C(sz,i); \
		} \
		r(R1) = res[HOST_BIG_ENDIAN != 0]; \
	} while (0)

void expandl_b(U32 opcode) { if (R1) EXPANDL(b); }
void expandl_d(U32 opcode) { if (R1) EXPANDL(d); }
void expandl_q(U32 opcode) { if (R1) EXPANDL(q); }
void expandl_o(U32 opcode) { if (R1) EXPANDL(o); }

#define EXPANDH(sz) \
	do { \
		union reg res[2]; \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned j = (i % 2) ? CHUNKS(sz) - 1 + i : i; \
			res[0].C2(sz,j+0) = r(R2).C(sz,i); \
			res[0].C2(sz,j+1) = r(R3).C(sz,i); \
		} \
		r(R1) = res[HOST_BIG_ENDIAN == 0]; \
	} while (0)

void expandh_b(U32 opcode) { if (R1) EXPANDH(b); }
void expandh_d(U32 opcode) { if (R1) EXPANDH(d); }
void expandh_q(U32 opcode) { if (R1) EXPANDH(q); }
void expandh_o(U32 opcode) { if (R1) EXPANDH(o); }

/* XXX: undocumented */
#define EXPAND(sz) \
	do { \
		union reg res[2]; \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned j = (i % 2) ? CHUNKS(sz) - 1 + i : i; \
			res[0].C2(sz,j+0) = r(R2).C(sz,i); \
			res[0].C2(sz,j+1) = r(R3).C(sz,i); \
		} \
		if (R1) r(R1) = res[HOST_BIG_ENDIAN != 0]; \
		if (RA) r(RA) = res[HOST_BIG_ENDIAN == 0]; \
	} while (0)

void expand_b(U32 opcode) { EXPAND(b); }
void expand_d(U32 opcode) { EXPAND(d); }
void expand_q(U32 opcode) { EXPAND(q); }
void expand_o(U32 opcode) { EXPAND(o); }

/* XXX: 3-operand version? */
void cshiftl(U32 opcode) {
	if (R1) {
		unsigned i;

		for (i = CHUNKS(o) - 1; i > 0; i--)
			r(R1).C(o,i) = r(R2).C(o,i-1);
		r(R1).C(o,0) = 0;
	}
}

/* XXX: 3-operand version? */
void cshiftr(U32 opcode) {
	if (R1) {
		unsigned i;

		for (i = 1; i < CHUNKS(o); i++)
			r(R1).C(o,i-1) = r(R2).C(o,i);
		r(R1).C(o,CHUNKS(o)-1) = 0;
	}
}

#define SDUP(sz) \
	do { \
		union reg r1; \
		unsigned i; \
		for_all_chunks(i,sz) \
			r1.C(sz,i) = r(R2).C(sz,0); \
		r(R1) = r1; \
	} while (0)

void sdup_b(U32 opcode) { if (R1) SDUP(b); }
void sdup_d(U32 opcode) { if (R1) SDUP(d); }
void sdup_q(U32 opcode) { if (R1) SDUP(q); }
void sdup_o(U32 opcode) { if (R1) SDUP(o); }

U64 brev(U64 x, unsigned bits) {
	U64 y = 0;
	unsigned i;

	for (i = 0; i < bits; i++) {
		y = (y << 1) | (x & 1);
		x >>= 1;
	}
	return y;
}

/* XXX: behaves not as documented */
/* XXX: bitrev[i]o is unimplemented */
#define BITREV(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned n = r(R3).C(sz,i) % BITS(sz); \
			UT(sz) t = brev(r(R2).C(sz,i), BITS(sz)); \
			r1.C(sz,i) = t >> (BITS(sz)-1-n); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void bitrev_b(U32 opcode) { if (R1) BITREV(b,0); }
void bitrev_d(U32 opcode) { if (R1) BITREV(d,0); }
void bitrev_q(U32 opcode) { if (R1) BITREV(q,0); }
void bitrev_o(U32 opcode) { if (R1) BITREV(o,0); }
void sbitrev_b(U32 opcode) { if (R1) BITREV(b,1); }
void sbitrev_d(U32 opcode) { if (R1) BITREV(d,1); }
void sbitrev_q(U32 opcode) { if (R1) BITREV(q,1); }
void sbitrev_o(U32 opcode) { if (R1) BITREV(o,1); }

#define BITREVH(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = r(R3).C(sz,0) % BITS(sz); \
		for_all_chunks(i,sz) { \
			UT(sz) t = brev(r(R2).C(sz,i), BITS(sz)); \
			r1.C(sz,i) = t >> (BITS(sz)-1-n); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void sbitrevh_b(U32 opcode) { if (R1) BITREVH(b,1); }
void sbitrevh_d(U32 opcode) { if (R1) BITREVH(d,1); }
void sbitrevh_q(U32 opcode) { if (R1) BITREVH(q,1); }
void sbitrevh_o(U32 opcode) { if (R1) BITREVH(o,1); }

#define BITREVI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		const unsigned n = UIMM8 % BITS(sz); \
		for_all_chunks(i,sz) { \
			UT(sz) t = brev(r(R2).C(sz,i), BITS(sz)); \
			r1.C(sz,i) = t >> (BITS(sz)-1-n); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void bitrevi_b(U32 opcode) { if (R1) BITREVI(b,0); }
void bitrevi_d(U32 opcode) { if (R1) BITREVI(d,0); }
void bitrevi_q(U32 opcode) { if (R1) BITREVI(q,0); }
void bitrevi_o(U32 opcode) { if (R1) BITREVI(o,0); }
void sbitrevi_b(U32 opcode) { if (R1) BITREVI(b,1); }
void sbitrevi_d(U32 opcode) { if (R1) BITREVI(d,1); }
void sbitrevi_q(U32 opcode) { if (R1) BITREVI(q,1); }
void sbitrevi_o(U32 opcode) { if (R1) BITREVI(o,1); }

/* XXX: behaves not as documented */
/* XXX: dbitrev[i]o is unimplemented */
#define DBITREV(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			const unsigned n = r(R3).C(sz,i) % BITS(sz); \
			UT(sz) t = brev(r(R2).C(sz,i), BITS(sz)); \
			r1.C(sz,i) = t >> (BITS(sz)-1-n); \
			ra.C(sz,i) = t << 1 << n; \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void dbitrev_b(U32 opcode) { DBITREV(b,0); }
void dbitrev_d(U32 opcode) { DBITREV(d,0); }
void dbitrev_q(U32 opcode) { DBITREV(q,0); }
void dbitrev_o(U32 opcode) { DBITREV(o,0); }
void sdbitrev_b(U32 opcode) { DBITREV(b,1); }
void sdbitrev_d(U32 opcode) { DBITREV(d,1); }
void sdbitrev_q(U32 opcode) { DBITREV(q,1); }
void sdbitrev_o(U32 opcode) { DBITREV(o,1); }

#define DBITREVH(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		const unsigned n = r(R3).C(sz,0) % BITS(sz); \
		for_all_chunks(i,sz) { \
			UT(sz) t = brev(r(R2).C(sz,i), BITS(sz)); \
			r1.C(sz,i) = t >> (BITS(sz)-1-n); \
			ra.C(sz,i) = t << 1 << n; \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void sdbitrevh_b(U32 opcode) { DBITREVH(b,1); }
void sdbitrevh_d(U32 opcode) { DBITREVH(d,1); }
void sdbitrevh_q(U32 opcode) { DBITREVH(q,1); }
void sdbitrevh_o(U32 opcode) { DBITREVH(o,1); }

#define DBITREVI(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		union reg ra = reginit(RA); \
		unsigned i; \
		const unsigned n = UIMM8 % BITS(sz); \
		for_all_chunks(i,sz) { \
			UT(sz) t = brev(r(R2).C(sz,i), BITS(sz)); \
			r1.C(sz,i) = t >> (BITS(sz)-1-n); \
			ra.C(sz,i) = t << 1 << n; \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
		if (RA) r(RA) = ra; \
	} while (0)

void dbitrevi_b(U32 opcode) { DBITREVI(b,0); }
void dbitrevi_d(U32 opcode) { DBITREVI(d,0); }
void dbitrevi_q(U32 opcode) { DBITREVI(q,0); }
void dbitrevi_o(U32 opcode) { DBITREVI(o,0); }
void sdbitrevi_b(U32 opcode) { DBITREVI(b,1); }
void sdbitrevi_d(U32 opcode) { DBITREVI(d,1); }
void sdbitrevi_q(U32 opcode) { DBITREVI(q,1); }
void sdbitrevi_o(U32 opcode) { DBITREVI(o,1); }

/* XXX: manual example is wrong! */
#define BYTEREV(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		const unsigned n = BYTES(sz); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			unsigned j; \
			for (j = 0; j < n; j++) \
				cbyte(r1,sz,i,n-1-j) = cbyte(r(R2),sz,i,j); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void byterev_b(U32 opcode) { if (R1) BYTEREV(b,0); }
void byterev_d(U32 opcode) { if (R1) BYTEREV(d,0); }
void byterev_q(U32 opcode) { if (R1) BYTEREV(q,0); }
void byterev_o(U32 opcode) { if (R1) BYTEREV(o,0); }
void sbyterev_b(U32 opcode) { if (R1) BYTEREV(b,1); }
void sbyterev_d(U32 opcode) { if (R1) BYTEREV(d,1); }
void sbyterev_q(U32 opcode) { if (R1) BYTEREV(q,1); }
void sbyterev_o(U32 opcode) { if (R1) BYTEREV(o,1); }

#define ROP2(sz,A,B) \
	do { \
		unsigned i; \
		for_all_chunks(i,sz) { \
			r(R1).C(sz,i) = A (r(R2).C(sz,i) B r(R3).C(sz,i)); \
		} \
	} while (0)

void  and(U32 opcode) { if (R1) ROP2(best, none, & ); }
void   or(U32 opcode) { if (R1) ROP2(best, none, | ); }
void  xor(U32 opcode) { if (R1) ROP2(best, none, ^ ); }
void andn(U32 opcode) { if (R1) ROP2(best, none, &~); }
void  orn(U32 opcode) { if (R1) ROP2(best, none, |~); }
void nand(U32 opcode) { if (R1) ROP2(best,	   ~, & ); }
void  nor(U32 opcode) { if (R1) ROP2(best,	   ~, | ); }
void xnor(U32 opcode) { if (R1) ROP2(best,	   ~, ^ ); }

/* XXX: verify semantics */
#define ROP2_AND(sz,A,B) \
	do { \
		unsigned i; \
		for_all_chunks(i,sz) { \
			UT(sz) x = ~ A (r(R2).C(sz,i) B r(R3).C(sz,i)); \
			r(R1).C(sz,i) = -(x == 0); \
		} \
	} while (0)

void  and_and_b(U32 opcode) { if (R1) ROP2_AND(b, none, & ); }
void   or_and_b(U32 opcode) { if (R1) ROP2_AND(b, none, | ); }
void  xor_and_b(U32 opcode) { if (R1) ROP2_AND(b, none, ^ ); }
void andn_and_b(U32 opcode) { if (R1) ROP2_AND(b, none, &~); }
void  orn_and_b(U32 opcode) { if (R1) ROP2_AND(b, none, |~); }
void nand_and_b(U32 opcode) { if (R1) ROP2_AND(b,	  ~, & ); }
void  nor_and_b(U32 opcode) { if (R1) ROP2_AND(b,	  ~, | ); }
void xnor_and_b(U32 opcode) { if (R1) ROP2_AND(b,	  ~, ^ ); }
void  and_and_d(U32 opcode) { if (R1) ROP2_AND(d, none, & ); }
void   or_and_d(U32 opcode) { if (R1) ROP2_AND(d, none, | ); }
void  xor_and_d(U32 opcode) { if (R1) ROP2_AND(d, none, ^ ); }
void andn_and_d(U32 opcode) { if (R1) ROP2_AND(d, none, &~); }
void  orn_and_d(U32 opcode) { if (R1) ROP2_AND(d, none, |~); }
void nand_and_d(U32 opcode) { if (R1) ROP2_AND(d,	  ~, & ); }
void  nor_and_d(U32 opcode) { if (R1) ROP2_AND(d,	  ~, | ); }
void xnor_and_d(U32 opcode) { if (R1) ROP2_AND(d,	  ~, ^ ); }
void  and_and_q(U32 opcode) { if (R1) ROP2_AND(q, none, & ); }
void   or_and_q(U32 opcode) { if (R1) ROP2_AND(q, none, | ); }
void  xor_and_q(U32 opcode) { if (R1) ROP2_AND(q, none, ^ ); }
void andn_and_q(U32 opcode) { if (R1) ROP2_AND(q, none, &~); }
void  orn_and_q(U32 opcode) { if (R1) ROP2_AND(q, none, |~); }
void nand_and_q(U32 opcode) { if (R1) ROP2_AND(q,	  ~, & ); }
void  nor_and_q(U32 opcode) { if (R1) ROP2_AND(q,	  ~, | ); }
void xnor_and_q(U32 opcode) { if (R1) ROP2_AND(q,	  ~, ^ ); }
void  and_and_o(U32 opcode) { if (R1) ROP2_AND(o, none, & ); }
void   or_and_o(U32 opcode) { if (R1) ROP2_AND(o, none, | ); }
void  xor_and_o(U32 opcode) { if (R1) ROP2_AND(o, none, ^ ); }
void andn_and_o(U32 opcode) { if (R1) ROP2_AND(o, none, &~); }
void  orn_and_o(U32 opcode) { if (R1) ROP2_AND(o, none, |~); }
void nand_and_o(U32 opcode) { if (R1) ROP2_AND(o,	  ~, & ); }
void  nor_and_o(U32 opcode) { if (R1) ROP2_AND(o,	  ~, | ); }
void xnor_and_o(U32 opcode) { if (R1) ROP2_AND(o,	  ~, ^ ); }

/* XXX: verify semantics */
#define ROP2_OR(sz,A,B) \
	do { \
		unsigned i; \
		for_all_chunks(i,sz) { \
			UT(sz) x = A (r(R2).C(sz,i) B r(R3).C(sz,i)); \
			r(R1).C(sz,i) = -(x != 0); \
		} \
	} while (0)

void  and_or_b(U32 opcode) { if (R1) ROP2_OR(b, none, & ); }
void   or_or_b(U32 opcode) { if (R1) ROP2_OR(b, none, | ); }
void  xor_or_b(U32 opcode) { if (R1) ROP2_OR(b, none, ^ ); }
void andn_or_b(U32 opcode) { if (R1) ROP2_OR(b, none, &~); }
void  orn_or_b(U32 opcode) { if (R1) ROP2_OR(b, none, |~); }
void nand_or_b(U32 opcode) { if (R1) ROP2_OR(b,	~, & ); }
void  nor_or_b(U32 opcode) { if (R1) ROP2_OR(b,	~, | ); }
void xnor_or_b(U32 opcode) { if (R1) ROP2_OR(b,	~, ^ ); }
void  and_or_d(U32 opcode) { if (R1) ROP2_OR(d, none, & ); }
void   or_or_d(U32 opcode) { if (R1) ROP2_OR(d, none, | ); }
void  xor_or_d(U32 opcode) { if (R1) ROP2_OR(d, none, ^ ); }
void andn_or_d(U32 opcode) { if (R1) ROP2_OR(d, none, &~); }
void  orn_or_d(U32 opcode) { if (R1) ROP2_OR(d, none, |~); }
void nand_or_d(U32 opcode) { if (R1) ROP2_OR(d,	~, & ); }
void  nor_or_d(U32 opcode) { if (R1) ROP2_OR(d,	~, | ); }
void xnor_or_d(U32 opcode) { if (R1) ROP2_OR(d,	~, ^ ); }
void  and_or_q(U32 opcode) { if (R1) ROP2_OR(q, none, & ); }
void   or_or_q(U32 opcode) { if (R1) ROP2_OR(q, none, | ); }
void  xor_or_q(U32 opcode) { if (R1) ROP2_OR(q, none, ^ ); }
void andn_or_q(U32 opcode) { if (R1) ROP2_OR(q, none, &~); }
void  orn_or_q(U32 opcode) { if (R1) ROP2_OR(q, none, |~); }
void nand_or_q(U32 opcode) { if (R1) ROP2_OR(q,	~, & ); }
void  nor_or_q(U32 opcode) { if (R1) ROP2_OR(q,	~, | ); }
void xnor_or_q(U32 opcode) { if (R1) ROP2_OR(q,	~, ^ ); }
void  and_or_o(U32 opcode) { if (R1) ROP2_OR(o, none, & ); }
void   or_or_o(U32 opcode) { if (R1) ROP2_OR(o, none, | ); }
void  xor_or_o(U32 opcode) { if (R1) ROP2_OR(o, none, ^ ); }
void andn_or_o(U32 opcode) { if (R1) ROP2_OR(o, none, &~); }
void  orn_or_o(U32 opcode) { if (R1) ROP2_OR(o, none, |~); }
void nand_or_o(U32 opcode) { if (R1) ROP2_OR(o,	~, & ); }
void  nor_or_o(U32 opcode) { if (R1) ROP2_OR(o,	~, | ); }
void xnor_or_o(U32 opcode) { if (R1) ROP2_OR(o,	~, ^ ); }

/* XXX: verify register usage for MUX */
#define ROP2_MUX(sz) \
	do { \
		unsigned i; \
		for_all_chunks(i,sz) \
			r(RA).C(sz,i) = (r(R1).C(sz,i) &~ r(R3).C(sz,i)) \
						| (r(R2).C(sz,i) &  r(R3).C(sz,i)); \
	} while (0)

void mux(U32 opcode) { if (RA) ROP2_MUX(best); }

/* XXX: verify imm usage */
/* XXX: is there a combine mode? */
#define ROP2I(sz,A,B) \
	do { \
		ST(sz) r3 = SIMM9; \
		unsigned i; \
		for_all_chunks(i,sz) \
			r(R1).C(sz,i) = A (r(R2).C(sz,i) B r3); \
	} while (0)

void  andi_b(U32 opcode) { if (R1) ROP2I(b, none, & ); }
void   ori_b(U32 opcode) { if (R1) ROP2I(b, none, | ); }
void  xori_b(U32 opcode) { if (R1) ROP2I(b, none, ^ ); }
void andni_b(U32 opcode) { if (R1) ROP2I(b, none, &~); }
void  orni_b(U32 opcode) { if (R1) ROP2I(b, none, |~); }
void nandi_b(U32 opcode) { if (R1) ROP2I(b,	~, & ); }
void  nori_b(U32 opcode) { if (R1) ROP2I(b,	~, | ); }
void xnori_b(U32 opcode) { if (R1) ROP2I(b,	~, ^ ); }
void  andi_d(U32 opcode) { if (R1) ROP2I(d, none, & ); }
void   ori_d(U32 opcode) { if (R1) ROP2I(d, none, | ); }
void  xori_d(U32 opcode) { if (R1) ROP2I(d, none, ^ ); }
void andni_d(U32 opcode) { if (R1) ROP2I(d, none, &~); }
void  orni_d(U32 opcode) { if (R1) ROP2I(d, none, |~); }
void nandi_d(U32 opcode) { if (R1) ROP2I(d,	~, & ); }
void  nori_d(U32 opcode) { if (R1) ROP2I(d,	~, | ); }
void xnori_d(U32 opcode) { if (R1) ROP2I(d,	~, ^ ); }
void  andi_q(U32 opcode) { if (R1) ROP2I(q, none, & ); }
void   ori_q(U32 opcode) { if (R1) ROP2I(q, none, | ); }
void  xori_q(U32 opcode) { if (R1) ROP2I(q, none, ^ ); }
void andni_q(U32 opcode) { if (R1) ROP2I(q, none, &~); }
void  orni_q(U32 opcode) { if (R1) ROP2I(q, none, |~); }
void nandi_q(U32 opcode) { if (R1) ROP2I(q,	~, & ); }
void  nori_q(U32 opcode) { if (R1) ROP2I(q,	~, | ); }
void xnori_q(U32 opcode) { if (R1) ROP2I(q,	~, ^ ); }
void  andi_o(U32 opcode) { if (R1) ROP2I(o, none, & ); }
void   ori_o(U32 opcode) { if (R1) ROP2I(o, none, | ); }
void  xori_o(U32 opcode) { if (R1) ROP2I(o, none, ^ ); }
void andni_o(U32 opcode) { if (R1) ROP2I(o, none, &~); }
void  orni_o(U32 opcode) { if (R1) ROP2I(o, none, |~); }
void nandi_o(U32 opcode) { if (R1) ROP2I(o,	~, & ); }
void  nori_o(U32 opcode) { if (R1) ROP2I(o,	~, | ); }
void xnori_o(U32 opcode) { if (R1) ROP2I(o,	~, ^ ); }

/* FP stuff */
/* XXX: -x suffix is currently ignored */

void fadd_f(U32 opcode) { if (R1) ADD(F,0); }
void fadd_d(U32 opcode) { if (R1) ADD(D,0); }
void sfadd_f(U32 opcode) { if (R1) ADD(F,1); }
void sfadd_d(U32 opcode) { if (R1) ADD(D,1); }

void fsub_f(U32 opcode) { if (R1) SUB(F,0); }
void fsub_d(U32 opcode) { if (R1) SUB(D,0); }
void sfsub_f(U32 opcode) { if (R1) SUB(F,1); }
void sfsub_d(U32 opcode) { if (R1) SUB(D,1); }

void fmul_f(U32 opcode) { if (R1) MUL(F,0); }
void fmul_d(U32 opcode) { if (R1) MUL(D,0); }
void sfmul_f(U32 opcode) { if (R1) MUL(F,1); }
void sfmul_d(U32 opcode) { if (R1) MUL(D,1); }

/* XXX: verify semantics */
/* XXX: exception handling */
/* XXX: SIMD version? */
#define F2INT(fsz,isz,rm) \
	do { \
		fesetround(RND(rm)); \
		r(R1).SC(isz,0) = (ST(isz))r(R2).C(fsz,0); \
		fesetround(default_rounding); \
	} while (0)

void f2intr_b(U32 opcode) { if (R1) F2INT(F,b,r); }
void f2intr_d(U32 opcode) { if (R1) F2INT(F,d,r); }
void f2intr_q(U32 opcode) { if (R1) F2INT(F,q,r); }
void f2intr_o(U32 opcode) { if (R1) F2INT(F,o,r); }
void d2intr_b(U32 opcode) { if (R1) F2INT(D,b,r); }
void d2intr_d(U32 opcode) { if (R1) F2INT(D,d,r); }
void d2intr_q(U32 opcode) { if (R1) F2INT(D,q,r); }
void d2intr_o(U32 opcode) { if (R1) F2INT(D,o,r); }
void f2intt_b(U32 opcode) { if (R1) F2INT(F,b,t); }
void f2intt_d(U32 opcode) { if (R1) F2INT(F,d,t); }
void f2intt_q(U32 opcode) { if (R1) F2INT(F,q,t); }
void f2intt_o(U32 opcode) { if (R1) F2INT(F,o,t); }
void d2intt_b(U32 opcode) { if (R1) F2INT(D,b,t); }
void d2intt_d(U32 opcode) { if (R1) F2INT(D,d,t); }
void d2intt_q(U32 opcode) { if (R1) F2INT(D,q,t); }
void d2intt_o(U32 opcode) { if (R1) F2INT(D,o,t); }
void f2intf_b(U32 opcode) { if (R1) F2INT(F,b,f); }
void f2intf_d(U32 opcode) { if (R1) F2INT(F,d,f); }
void f2intf_q(U32 opcode) { if (R1) F2INT(F,q,f); }
void f2intf_o(U32 opcode) { if (R1) F2INT(F,o,f); }
void d2intf_b(U32 opcode) { if (R1) F2INT(D,b,f); }
void d2intf_d(U32 opcode) { if (R1) F2INT(D,d,f); }
void d2intf_q(U32 opcode) { if (R1) F2INT(D,q,f); }
void d2intf_o(U32 opcode) { if (R1) F2INT(D,o,f); }
void f2intc_b(U32 opcode) { if (R1) F2INT(F,b,c); }
void f2intc_d(U32 opcode) { if (R1) F2INT(F,d,c); }
void f2intc_q(U32 opcode) { if (R1) F2INT(F,q,c); }
void f2intc_o(U32 opcode) { if (R1) F2INT(F,o,c); }
void d2intc_b(U32 opcode) { if (R1) F2INT(D,b,c); }
void d2intc_d(U32 opcode) { if (R1) F2INT(D,d,c); }
void d2intc_q(U32 opcode) { if (R1) F2INT(D,q,c); }
void d2intc_o(U32 opcode) { if (R1) F2INT(D,o,c); }

/* XXX: verify semantics */
/* XXX: exception handling */
/* XXX: SIMD version? */
#define INT2F(fsz,isz,rm) \
	do { \
		fesetround(RND(rm)); \
		r(R1).C(fsz,0) = (FT(fsz))r(R2).SC(isz,0); \
		fesetround(default_rounding); \
	} while (0)

void int2fr_b(U32 opcode) { if (R1) INT2F(F,b,r); }
void int2fr_d(U32 opcode) { if (R1) INT2F(F,d,r); }
void int2fr_q(U32 opcode) { if (R1) INT2F(F,q,r); }
void int2fr_o(U32 opcode) { if (R1) INT2F(F,o,r); }
void int2dr_b(U32 opcode) { if (R1) INT2F(D,b,r); }
void int2dr_d(U32 opcode) { if (R1) INT2F(D,d,r); }
void int2dr_q(U32 opcode) { if (R1) INT2F(D,q,r); }
void int2dr_o(U32 opcode) { if (R1) INT2F(D,o,r); }
void int2ft_b(U32 opcode) { if (R1) INT2F(F,b,t); }
void int2ft_d(U32 opcode) { if (R1) INT2F(F,d,t); }
void int2ft_q(U32 opcode) { if (R1) INT2F(F,q,t); }
void int2ft_o(U32 opcode) { if (R1) INT2F(F,o,t); }
void int2dt_b(U32 opcode) { if (R1) INT2F(D,b,t); }
void int2dt_d(U32 opcode) { if (R1) INT2F(D,d,t); }
void int2dt_q(U32 opcode) { if (R1) INT2F(D,q,t); }
void int2dt_o(U32 opcode) { if (R1) INT2F(D,o,t); }
void int2ff_b(U32 opcode) { if (R1) INT2F(F,b,f); }
void int2ff_d(U32 opcode) { if (R1) INT2F(F,d,f); }
void int2ff_q(U32 opcode) { if (R1) INT2F(F,q,f); }
void int2ff_o(U32 opcode) { if (R1) INT2F(F,o,f); }
void int2df_b(U32 opcode) { if (R1) INT2F(D,b,f); }
void int2df_d(U32 opcode) { if (R1) INT2F(D,d,f); }
void int2df_q(U32 opcode) { if (R1) INT2F(D,q,f); }
void int2df_o(U32 opcode) { if (R1) INT2F(D,o,f); }
void int2fc_b(U32 opcode) { if (R1) INT2F(F,b,c); }
void int2fc_d(U32 opcode) { if (R1) INT2F(F,d,c); }
void int2fc_q(U32 opcode) { if (R1) INT2F(F,q,c); }
void int2fc_o(U32 opcode) { if (R1) INT2F(F,o,c); }
void int2dc_b(U32 opcode) { if (R1) INT2F(D,b,c); }
void int2dc_d(U32 opcode) { if (R1) INT2F(D,d,c); }
void int2dc_q(U32 opcode) { if (R1) INT2F(D,q,c); }
void int2dc_o(U32 opcode) { if (R1) INT2F(D,o,c); }

/* simulated approximation table lookup */
/* XXX: depends heavily on host CPU's internal FP representation! */
double aprx(double x) {
#if __i386__ || __sparc__
	union {
		double d;
		U64 n;
	} cv;

	/* truncate mantissa to 3 bits -> 16 table entries */
	cv.d = x;
	cv.n &= 0xfffe000000000000ull;
	cv.n |= 0x0001000000000000ull;	// move to middle of interval
	return cv.d;
#else
#error unknown FP representation; please modify aprx().
#endif
}

#define FIAPRX(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (!r(R2).C(sz,i)) { \
				ex(EX_NULL); \
				return; \
			} \
			r1.C(sz,i) = 1.0 / aprx(r(R2).C(sz,i)); \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
	} while (0)

void fiaprx_f(U32 opcode) { FIAPRX(F,0); }
void fiaprx_d(U32 opcode) { FIAPRX(D,0); }
void sfiaprx_f(U32 opcode) { FIAPRX(F,1); }
void sfiaprx_d(U32 opcode) { FIAPRX(D,1); }

#define FSQRTIAPRX(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).C(sz,i) < 0.0) { \
				ex(EX_RANGE); \
				return; \
			} \
			r1.C(sz,i) = 1.0 / sqrt(aprx(r(R2).C(sz,i))); \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
	} while (0)

void fsqrtiaprx_f(U32 opcode) { FSQRTIAPRX(F,0); }
void fsqrtiaprx_d(U32 opcode) { FSQRTIAPRX(D,0); }
void sfsqrtiaprx_f(U32 opcode) { FSQRTIAPRX(F,1); }
void sfsqrtiaprx_d(U32 opcode) { FSQRTIAPRX(D,1); }

/* XXX: fcmp* is Broken As Designed (tm), skipping... */

void fdiv_f(U32 opcode) { DIV(F,0); }
void fdiv_d(U32 opcode) { DIV(D,0); }
void sfdiv_f(U32 opcode) { DIV(F,1); }
void sfdiv_d(U32 opcode) { DIV(D,1); }

#define FSQRT(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).C(sz,i) < 0.0) { \
				ex(EX_RANGE); \
				return; \
			} \
			r1.C(sz,i) = sqrt(r(R2).C(sz,i)); \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
	} while (0)

void fsqrt_f(U32 opcode) { FSQRT(F,0); }
void fsqrt_d(U32 opcode) { FSQRT(D,0); }
void sfsqrt_f(U32 opcode) { FSQRT(F,1); }
void sfsqrt_d(U32 opcode) { FSQRT(D,1); }

#define log2(x) (log(x)/log(2.0))	/* binary logarithm */
#define FLOG(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (r(R2).C(sz,i) <= 0.0) { \
				ex(EX_RANGE); \
				return; \
			} \
			if (R3) { \
				if (r(R3).C(sz,i) == 0.0) { \
					ex(EX_NULL); \
					return; \
				} \
				r1.C(sz,i) = log2(r(R2).C(sz,i)) / r(R3).C(sz,i); \
			} \
			else \
				r1.C(sz,i) = log2(r(R2).C(sz,i)); \
			if (!simd) break; \
		} \
		if (R1) r(R1) = r1; \
	} while (0)

void flog_f(U32 opcode) { FLOG(F,0); }
void flog_d(U32 opcode) { FLOG(D,0); }
void sflog_f(U32 opcode) { FLOG(F,1); }
void sflog_d(U32 opcode) { FLOG(D,1); }

#define exp2(x) (exp((x)*log(2.0))) /* 2.0**x */
#define FEXP(sz,simd) \
	do { \
		union reg r1 = reginit(R1); \
		unsigned i; \
		for_all_chunks(i,sz) { \
			if (R3) \
				r1.C(sz,i) = exp2(r(R2).C(sz,i) * r(R3).C(sz,i)); \
			else \
				r1.C(sz,i) = exp2(r(R2).C(sz,i)); \
			if (!simd) break; \
		} \
		r(R1) = r1; \
	} while (0)

void fexp_f(U32 opcode) { if (R1) FEXP(F,0); }
void fexp_d(U32 opcode) { if (R1) FEXP(D,0); }
void sfexp_f(U32 opcode) { if (R1) FEXP(F,1); }
void sfexp_d(U32 opcode) { if (R1) FEXP(D,1); }

void fmac_f(U32 opcode) { if (R1) AMAC(F,0); }
void fmac_d(U32 opcode) { if (R1) AMAC(D,0); }
void sfmac_f(U32 opcode) { if (R1) AMAC(F,1); }
void sfmac_d(U32 opcode) { if (R1) AMAC(D,1); }

void faddsub_f(U32 opcode) { ADDSUB(F,0); }
void faddsub_d(U32 opcode) { ADDSUB(D,0); }
void sfaddsub_f(U32 opcode) { ADDSUB(F,1); }
void sfaddsub_d(U32 opcode) { ADDSUB(D,1); }

/* load/store */

/* Note: R1 is updated *after* R2 */
#define LOAD(sz) \
	do { \
		char *p = memmap(r(R2).C(o,0), BYTES(sz), BYTES(sz), 0); \
		union reg r1 = reginit(R1); \
		unsigned j; \
		if (excode) return; \
		if (R3 && R2) r(R2).C(o,0) += r(R3).C(o,0); \
		for (j = 0; j < BYTES(sz); j++) \
			r1.C(b,j) = p[j]; \
		if (R1) r(R1) = r1; \
	} while (0)

void load_b(U32 opcode) { LOAD(b); }
void load_d(U32 opcode) { LOAD(d); }
void load_q(U32 opcode) { LOAD(q); }
void load_o(U32 opcode) { LOAD(o); }

/* Note: R1 is updated *after* R2 */
#define LOADE(sz) \
	do { \
		char *p = memmap(r(R2).C(o,0), BYTES(sz), BYTES(sz), 0); \
		union reg r1 = reginit(R1); \
		unsigned j; \
		if (excode) return; \
		if (R3 && R2) r(R2).C(o,0) += r(R3).C(o,0); \
		for (j = 0; j < BYTES(sz); j++) \
			r1.C(b,BYTES(sz)-1-j) = p[j]; \
		if (R1) r(R1) = r1; \
	} while (0)

void loade_b(U32 opcode) { LOADE(b); }
void loade_d(U32 opcode) { LOADE(d); }
void loade_q(U32 opcode) { LOADE(q); }
void loade_o(U32 opcode) { LOADE(o); }

/* Note: R1 is updated *after* R2 */
#define LOADI(sz) \
	do { \
		char *p = memmap(r(R2).C(o,0), BYTES(sz), BYTES(sz), 0); \
		union reg r1 = reginit(R1); \
		unsigned j; \
		if (excode) return; \
		if (R2) r(R2).C(o,0) += SIMM9; \
		for (j = 0; j < BYTES(sz); j++) \
			r1.C(b,j) = p[j]; \
		if (R1) r(R1) = r1; \
	} while (0)

void loadi_b(U32 opcode) { LOADI(b); }
void loadi_d(U32 opcode) { LOADI(d); }
void loadi_q(U32 opcode) { LOADI(q); }
void loadi_o(U32 opcode) { LOADI(o); }

/* Note: R1 is updated *after* R2 */
#define LOADIE(sz) \
	do { \
		char *p = memmap(r(R2).C(o,0), BYTES(sz), BYTES(sz), 0); \
		union reg r1 = reginit(R1); \
		unsigned j; \
		if (excode) return; \
		if (R2) r(R2).C(o,0) += SIMM9; \
		for (j = 0; j < BYTES(sz); j++) \
			r1.C(b,BYTES(sz)-1-j) = p[j]; \
		if (R1) r(R1) = r1; \
	} while (0)

void loadie_b(U32 opcode) { LOADIE(b); }
void loadie_d(U32 opcode) { LOADIE(d); }
void loadie_q(U32 opcode) { LOADIE(q); }
void loadie_o(U32 opcode) { LOADIE(o); }

/* Note: R2 is updated *after* R1 was written */
#define STORE(sz) \
	do { \
		char *p = memmap(r(R2).C(o,0), BYTES(sz), BYTES(sz), 1); \
		unsigned j; \
		if (excode) return; \
		for (j = 0; j < BYTES(sz); j++) \
			p[j] = r(R1).C(b,j); \
		if (R3 && R2) r(R2).C(o,0) += r(R3).C(o,0); \
	} while (0)

void store_b(U32 opcode) { STORE(b); }
void store_d(U32 opcode) { STORE(d); }
void store_q(U32 opcode) { STORE(q); }
void store_o(U32 opcode) { STORE(o); }

/* Note: R2 is updated *after* R1 was written */
#define STOREE(sz) \
	do { \
		char *p = memmap(r(R2).C(o,0), BYTES(sz), BYTES(sz), 1); \
		unsigned j; \
		if (excode) return; \
		for (j = 0; j < BYTES(sz); j++) \
			p[j] = r(R1).C(b,BYTES(sz)-1-j); \
		if (R3 && R2) r(R2).C(o,0) += r(R3).C(o,0); \
	} while (0)

void storee_b(U32 opcode) { STOREE(b); }
void storee_d(U32 opcode) { STOREE(d); }
void storee_q(U32 opcode) { STOREE(q); }
void storee_o(U32 opcode) { STOREE(o); }

/* Note: R2 is updated *after* R1 was written */
#define STOREI(sz) \
	do { \
		char *p = memmap(r(R2).C(o,0), BYTES(sz), BYTES(sz), 0); \
		unsigned j; \
		if (excode) return; \
		for (j = 0; j < BYTES(sz); j++) \
			p[j] = r(R1).C(b,j); \
		if (R2) r(R2).C(o,0) += SIMM9; \
	} while (0)

void storei_b(U32 opcode) { STOREI(b); }
void storei_d(U32 opcode) { STOREI(d); }
void storei_q(U32 opcode) { STOREI(q); }
void storei_o(U32 opcode) { STOREI(o); }

/* Note: R2 is updated *after* R1 was written */
#define STOREIE(sz) \
	do { \
		char *p = memmap(r(R2).C(o,0), BYTES(sz), BYTES(sz), 0); \
		unsigned j; \
		if (excode) return; \
		for (j = 0; j < BYTES(sz); j++) \
			p[j] = r(R1).C(b,BYTES(sz)-1-j); \
		if (R2) r(R2).C(o,0) += SIMM9; \
	} while (0)

void storeie_b(U32 opcode) { STOREIE(b); }
void storeie_d(U32 opcode) { STOREIE(d); }
void storeie_q(U32 opcode) { STOREIE(q); }
void storeie_o(U32 opcode) { STOREIE(o); }

#if 0 /* use add/sub instead */
void madd(U32 opcode) { if (R1) ADD(o,0); }
void msub(U32 opcode) { if (R1) SUB(o,0); }

/* XXX: mshchg is a NOP during emulation */
void mshchg(U32 opcode) { }
#endif

/* XXX: verify semantics */
#define CSTORE(sz,cond) \
	do { \
		char *p; \
		unsigned j; \
		if (!cond(R3)) break; \
		p = memmap(r(R2).C(o,0), BYTES(sz), BYTES(sz), 1); \
		if (excode) return; \
		for (j = 0; j < BYTES(sz); j++) \
			p[j] = r(R1).C(b,j); \
	} while (0)

void cstorez_b(U32 opcode) { CSTORE(b,zero); }
void cstorez_d(U32 opcode) { CSTORE(d,zero); }
void cstorez_q(U32 opcode) { CSTORE(q,zero); }
void cstorez_o(U32 opcode) { CSTORE(o,zero); }
void cstoren_b(U32 opcode) { CSTORE(b,nan); }
void cstoren_d(U32 opcode) { CSTORE(d,nan); }
void cstoren_q(U32 opcode) { CSTORE(q,nan); }
void cstoren_o(U32 opcode) { CSTORE(o,nan); }
void cstorel_b(U32 opcode) { CSTORE(b,lsb); }
void cstorel_d(U32 opcode) { CSTORE(d,lsb); }
void cstorel_q(U32 opcode) { CSTORE(q,lsb); }
void cstorel_o(U32 opcode) { CSTORE(o,lsb); }
void cstorem_b(U32 opcode) { CSTORE(b,msb); }
void cstorem_d(U32 opcode) { CSTORE(d,msb); }
void cstorem_q(U32 opcode) { CSTORE(q,msb); }
void cstorem_o(U32 opcode) { CSTORE(o,msb); }
void cstorenz_b(U32 opcode) { CSTORE(b,!zero); }
void cstorenz_d(U32 opcode) { CSTORE(d,!zero); }
void cstorenz_q(U32 opcode) { CSTORE(q,!zero); }
void cstorenz_o(U32 opcode) { CSTORE(o,!zero); }
void cstorenn_b(U32 opcode) { CSTORE(b,!nan); }
void cstorenn_d(U32 opcode) { CSTORE(d,!nan); }
void cstorenn_q(U32 opcode) { CSTORE(q,!nan); }
void cstorenn_o(U32 opcode) { CSTORE(o,!nan); }
void cstorenl_b(U32 opcode) { CSTORE(b,!lsb); }
void cstorenl_d(U32 opcode) { CSTORE(d,!lsb); }
void cstorenl_q(U32 opcode) { CSTORE(q,!lsb); }
void cstorenl_o(U32 opcode) { CSTORE(o,!lsb); }
void cstorenm_b(U32 opcode) { CSTORE(b,!msb); }
void cstorenm_d(U32 opcode) { CSTORE(d,!msb); }
void cstorenm_q(U32 opcode) { CSTORE(q,!msb); }
void cstorenm_o(U32 opcode) { CSTORE(o,!msb); }

/* XXX: verify semantics */
#define CSTOREE(sz,cond) \
	do { \
		char *p; \
		unsigned j; \
		if (!cond(R3)) break; \
		p = memmap(r(R2).C(o,0), BYTES(sz), BYTES(sz), 1); \
		if (excode) return; \
		for (j = 0; j < BYTES(sz); j++) \
			p[j] = r(R1).C(b,BYTES(sz)-1-j); \
	} while (0)

void cstoreez_b(U32 opcode) { CSTOREE(b,zero); }
void cstoreez_d(U32 opcode) { CSTOREE(d,zero); }
void cstoreez_q(U32 opcode) { CSTOREE(q,zero); }
void cstoreez_o(U32 opcode) { CSTOREE(o,zero); }
void cstoreen_b(U32 opcode) { CSTOREE(b,nan); }
void cstoreen_d(U32 opcode) { CSTOREE(d,nan); }
void cstoreen_q(U32 opcode) { CSTOREE(q,nan); }
void cstoreen_o(U32 opcode) { CSTOREE(o,nan); }
void cstoreel_b(U32 opcode) { CSTOREE(b,lsb); }
void cstoreel_d(U32 opcode) { CSTOREE(d,lsb); }
void cstoreel_q(U32 opcode) { CSTOREE(q,lsb); }
void cstoreel_o(U32 opcode) { CSTOREE(o,lsb); }
void cstoreem_b(U32 opcode) { CSTOREE(b,msb); }
void cstoreem_d(U32 opcode) { CSTOREE(d,msb); }
void cstoreem_q(U32 opcode) { CSTOREE(q,msb); }
void cstoreem_o(U32 opcode) { CSTOREE(o,msb); }
void cstoreenz_b(U32 opcode) { CSTOREE(b,!zero); }
void cstoreenz_d(U32 opcode) { CSTOREE(d,!zero); }
void cstoreenz_q(U32 opcode) { CSTOREE(q,!zero); }
void cstoreenz_o(U32 opcode) { CSTOREE(o,!zero); }
void cstoreenn_b(U32 opcode) { CSTOREE(b,!nan); }
void cstoreenn_d(U32 opcode) { CSTOREE(d,!nan); }
void cstoreenn_q(U32 opcode) { CSTOREE(q,!nan); }
void cstoreenn_o(U32 opcode) { CSTOREE(o,!nan); }
void cstoreenl_b(U32 opcode) { CSTOREE(b,!lsb); }
void cstoreenl_d(U32 opcode) { CSTOREE(d,!lsb); }
void cstoreenl_q(U32 opcode) { CSTOREE(q,!lsb); }
void cstoreenl_o(U32 opcode) { CSTOREE(o,!lsb); }
void cstoreenm_b(U32 opcode) { CSTOREE(b,!msb); }
void cstoreenm_d(U32 opcode) { CSTOREE(d,!msb); }
void cstoreenm_q(U32 opcode) { CSTOREE(q,!msb); }
void cstoreenm_o(U32 opcode) { CSTOREE(o,!msb); }

/* XXX: verify semantics */
#define CLOAD(sz,cond) \
	do { \
		union reg r1 = reginit(R1); \
		char *p; \
		unsigned j; \
		if (!cond(R3)) break; \
		p = memmap(r(R2).C(o,0), BYTES(sz), BYTES(sz), 0); \
		if (excode) return; \
		for (j = 0; j < BYTES(sz); j++) \
			r1.C(b,j) = p[j]; \
		if (R1) r(R1) = r1; \
	} while (0)

void cloadz_b(U32 opcode) { CLOAD(b,zero); }
void cloadz_d(U32 opcode) { CLOAD(d,zero); }
void cloadz_q(U32 opcode) { CLOAD(q,zero); }
void cloadz_o(U32 opcode) { CLOAD(o,zero); }
void cloadn_b(U32 opcode) { CLOAD(b,nan); }
void cloadn_d(U32 opcode) { CLOAD(d,nan); }
void cloadn_q(U32 opcode) { CLOAD(q,nan); }
void cloadn_o(U32 opcode) { CLOAD(o,nan); }
void cloadl_b(U32 opcode) { CLOAD(b,lsb); }
void cloadl_d(U32 opcode) { CLOAD(d,lsb); }
void cloadl_q(U32 opcode) { CLOAD(q,lsb); }
void cloadl_o(U32 opcode) { CLOAD(o,lsb); }
void cloadm_b(U32 opcode) { CLOAD(b,msb); }
void cloadm_d(U32 opcode) { CLOAD(d,msb); }
void cloadm_q(U32 opcode) { CLOAD(q,msb); }
void cloadm_o(U32 opcode) { CLOAD(o,msb); }
void cloadnz_b(U32 opcode) { CLOAD(b,!zero); }
void cloadnz_d(U32 opcode) { CLOAD(d,!zero); }
void cloadnz_q(U32 opcode) { CLOAD(q,!zero); }
void cloadnz_o(U32 opcode) { CLOAD(o,!zero); }
void cloadnn_b(U32 opcode) { CLOAD(b,!nan); }
void cloadnn_d(U32 opcode) { CLOAD(d,!nan); }
void cloadnn_q(U32 opcode) { CLOAD(q,!nan); }
void cloadnn_o(U32 opcode) { CLOAD(o,!nan); }
void cloadnl_b(U32 opcode) { CLOAD(b,!lsb); }
void cloadnl_d(U32 opcode) { CLOAD(d,!lsb); }
void cloadnl_q(U32 opcode) { CLOAD(q,!lsb); }
void cloadnl_o(U32 opcode) { CLOAD(o,!lsb); }
void cloadnm_b(U32 opcode) { CLOAD(b,!msb); }
void cloadnm_d(U32 opcode) { CLOAD(d,!msb); }
void cloadnm_q(U32 opcode) { CLOAD(q,!msb); }
void cloadnm_o(U32 opcode) { CLOAD(o,!msb); }

/* XXX: verify semantics */
#define CLOADE(sz,cond) \
	do { \
		union reg r1 = reginit(R1); \
		char *p; \
		unsigned j; \
		if (!cond(R3)) break; \
		p = memmap(r(R2).C(o,0), BYTES(sz), BYTES(sz), 0); \
		if (excode) return; \
		for (j = 0; j < BYTES(sz); j++) \
			r1.C(b,BYTES(sz)-1-j) = p[j]; \
		if (R1) r(R1) = r1; \
	} while (0)

void cloadez_b(U32 opcode) { CLOADE(b,zero); }
void cloadez_d(U32 opcode) { CLOADE(d,zero); }
void cloadez_q(U32 opcode) { CLOADE(q,zero); }
void cloadez_o(U32 opcode) { CLOADE(o,zero); }
void cloaden_b(U32 opcode) { CLOADE(b,nan); }
void cloaden_d(U32 opcode) { CLOADE(d,nan); }
void cloaden_q(U32 opcode) { CLOADE(q,nan); }
void cloaden_o(U32 opcode) { CLOADE(o,nan); }
void cloadel_b(U32 opcode) { CLOADE(b,lsb); }
void cloadel_d(U32 opcode) { CLOADE(d,lsb); }
void cloadel_q(U32 opcode) { CLOADE(q,lsb); }
void cloadel_o(U32 opcode) { CLOADE(o,lsb); }
void cloadem_b(U32 opcode) { CLOADE(b,msb); }
void cloadem_d(U32 opcode) { CLOADE(d,msb); }
void cloadem_q(U32 opcode) { CLOADE(q,msb); }
void cloadem_o(U32 opcode) { CLOADE(o,msb); }
void cloadenz_b(U32 opcode) { CLOADE(b,!zero); }
void cloadenz_d(U32 opcode) { CLOADE(d,!zero); }
void cloadenz_q(U32 opcode) { CLOADE(q,!zero); }
void cloadenz_o(U32 opcode) { CLOADE(o,!zero); }
void cloadenn_b(U32 opcode) { CLOADE(b,!nan); }
void cloadenn_d(U32 opcode) { CLOADE(d,!nan); }
void cloadenn_q(U32 opcode) { CLOADE(q,!nan); }
void cloadenn_o(U32 opcode) { CLOADE(o,!nan); }
void cloadenl_b(U32 opcode) { CLOADE(b,!lsb); }
void cloadenl_d(U32 opcode) { CLOADE(d,!lsb); }
void cloadenl_q(U32 opcode) { CLOADE(q,!lsb); }
void cloadenl_o(U32 opcode) { CLOADE(o,!lsb); }
void cloadenm_b(U32 opcode) { CLOADE(b,!msb); }
void cloadenm_d(U32 opcode) { CLOADE(d,!msb); }
void cloadenm_q(U32 opcode) { CLOADE(q,!msb); }
void cloadenm_o(U32 opcode) { CLOADE(o,!msb); }

/* XXX: ss/sl is missing */

/* XXX: cachemm is deprecated */

/* XXX: manual differs */
#define MOVE(sz,cond) \
	do { \
		union reg r1 = reginit(R1); \
		if (cond(R3)) r1.C(sz,0) = r(R2).C(sz,0); \
		r(R1) = r1; \
	} while (0)

void movez_b(U32 opcode) { if (R1) MOVE(b,zero); }
void movez_d(U32 opcode) { if (R1) MOVE(d,zero); }
void movez_q(U32 opcode) { if (R1) MOVE(q,zero); }
void movez_o(U32 opcode) { if (R1) MOVE(o,zero); }
void moven_b(U32 opcode) { if (R1) MOVE(b,nan); }
void moven_d(U32 opcode) { if (R1) MOVE(d,nan); }
void moven_q(U32 opcode) { if (R1) MOVE(q,nan); }
void moven_o(U32 opcode) { if (R1) MOVE(o,nan); }
void movel_b(U32 opcode) { if (R1) MOVE(b,lsb); }
void movel_d(U32 opcode) { if (R1) MOVE(d,lsb); }
void movel_q(U32 opcode) { if (R1) MOVE(q,lsb); }
void movel_o(U32 opcode) { if (R1) MOVE(o,lsb); }
void movem_b(U32 opcode) { if (R1) MOVE(b,msb); }
void movem_d(U32 opcode) { if (R1) MOVE(d,msb); }
void movem_q(U32 opcode) { if (R1) MOVE(q,msb); }
void movem_o(U32 opcode) { if (R1) MOVE(o,msb); }
void movenz_b(U32 opcode) { if (R1) MOVE(b,!zero); }
void movenz_d(U32 opcode) { if (R1) MOVE(d,!zero); }
void movenz_q(U32 opcode) { if (R1) MOVE(q,!zero); }
void movenz_o(U32 opcode) { if (R1) MOVE(o,!zero); }
void movenn_b(U32 opcode) { if (R1) MOVE(b,!nan); }
void movenn_d(U32 opcode) { if (R1) MOVE(d,!nan); }
void movenn_q(U32 opcode) { if (R1) MOVE(q,!nan); }
void movenn_o(U32 opcode) { if (R1) MOVE(o,!nan); }
void movenl_b(U32 opcode) { if (R1) MOVE(b,!lsb); }
void movenl_d(U32 opcode) { if (R1) MOVE(d,!lsb); }
void movenl_q(U32 opcode) { if (R1) MOVE(q,!lsb); }
void movenl_o(U32 opcode) { if (R1) MOVE(o,!lsb); }
void movenm_b(U32 opcode) { if (R1) MOVE(b,!msb); }
void movenm_d(U32 opcode) { if (R1) MOVE(d,!msb); }
void movenm_q(U32 opcode) { if (R1) MOVE(q,!msb); }
void movenm_o(U32 opcode) { if (R1) MOVE(o,!msb); }

/* XXX: undocumented */
/* XXX: make it conditional (that is, `moves{cc}'? */
#define WIDEN(sz) \
	do { \
		union reg r1; \
		unsigned i; \
		r1.SC(o,0) = r(R2).SC(sz,0); \
		for (i = 1; i < CHUNKS(o); i++) \
			r1.SC(o,i) = r1.SC(o,0) >> (BITS(o)-1); \
		r(R1) = r1; \
	} while (0)

void widen_b(U32 opcode) { if (R1) WIDEN(b); }
void widen_d(U32 opcode) { if (R1) WIDEN(d); }
void widen_q(U32 opcode) { if (R1) WIDEN(q); }
void widen_o(U32 opcode) { if (R1) WIDEN(o); }

/* loadcons */

/* Note: partial write */
#define LOADCONS(i) \
	do { \
		r(R1).C(d,i) = UIMM16; \
	} while (0)

void loadcons_0(U32 opcode) { if (R1) LOADCONS(0); }
void loadcons_1(U32 opcode) { if (R1) LOADCONS(1); }
void loadcons_2(U32 opcode) { if (R1) LOADCONS(2); }
void loadcons_3(U32 opcode) { if (R1) LOADCONS(3); }

/* Note: partial write */
/* XXX: sign-extend how much? */
#define LOADCONSX(i) \
	do { \
		unsigned j; \
		r(R1).C(d,i) = SIMM16; \
		for (j = i + 1; j < 4; j++) \
			r(R1).C(d,j) = SIMM16 >> 15; \
	} while (0)

void loadconsx_0(U32 opcode) { if (R1) LOADCONSX(0); }
void loadconsx_1(U32 opcode) { if (R1) LOADCONSX(1); }
void loadconsx_2(U32 opcode) { if (R1) LOADCONSX(2); }
void loadconsx_3(U32 opcode) { if (R1) LOADCONSX(3); }

/* XXX: assumes the PC has already been advanced */
/* XXX: handle upper chunks?! */
#define LOADADDR() \
	do { \
		union reg r1 = regs.r_pc; \
		r1.C(o,0) += r(R2).C(o,0); \
		r(R1) = r1; \
	} while (0)

void loadaddr (U32 opcode) { if (R1) LOADADDR(); }
void loadaddrd(U32 opcode) { if (R1) LOADADDR(); }

/* XXX: assumes the PC has already been advanced */
/* XXX: handle upper chunks?! */
#define LOADADDRI() \
	do { \
		union reg r1 = regs.r_pc; \
		r1.C(o,0) += SIMM17; \
		r(R1) = r1; \
	} while (0)

void loadaddri (U32 opcode) { if (R1) LOADADDRI(); }
void loadaddrid(U32 opcode) { if (R1) LOADADDRI(); }

/* XXX: get[i]/put[i] follow at the end */

/* XXX: differs from manual */
/* Note: loadm/storem use host default endian! */
void loadm(U32 opcode) {
	void *p;

	if (R3 > R1) {
		ex(EX_INVALID);
		return;
	}
	p = memmap(r(R2).C(o,0), BYTES(o), (R1 - R3 + 1) * sizeof(union reg), 0);
	if (excode) return;
	memcpy(&r(R3), p, (R1 - R3 + 1) * sizeof(union reg));
	if (!R3) memset(&r(0), 0, sizeof(union reg));	/* r0 may have changed! */
}

/* XXX: differs from manual */
void storem(U32 opcode) {
	void *p;

	if (R3 > R1) {
		ex(EX_INVALID);
		return;
	}
	p = memmap(r(R2).C(o,0), BYTES(o), (R1 - R3 + 1) * sizeof(union reg), 1);
	if (excode) return;
	memcpy(p, &r(R3), (R1 - R3 + 1) * sizeof(union reg));
}

/* XXX: left-shift immediate operand? */
void nop(U32 opcode) { regs.r_pc.C(o,0) += opcode & 0xffffff; }

/* XXX: assumes the PC has already been advanced */
/* Note: R1 and R2 might be the same! */
#define JMP(cond) \
	do { \
		if (cond(R3)) { \
			union reg t = r(R2); \
			if (R1) r(R1) = regs.r_pc; \
			regs.r_pc = t; \
		} \
	} while (0)

void jmpz(U32 opcode) { JMP(zero); }
void jmpn(U32 opcode) { JMP(nan); }
void jmpl(U32 opcode) { JMP(lsb); }
void jmpm(U32 opcode) { JMP(msb); }
void jmpnz(U32 opcode) { JMP(!zero); }
void jmpnn(U32 opcode) { JMP(!nan); }
void jmpnl(U32 opcode) { JMP(!lsb); }
void jmpnm(U32 opcode) { JMP(!msb); }

/* XXX: no size flags? */
/* XXX: handle upper bits?! */
void loop(U32 opcode) {
	if (!zero(R1)) regs.r_pc = r(R2);
	if (R1) r(R1).C(o,0) -= 1;
}

#define fcpu_io(func, a1, a2, a3)	\
	do { \
		int fd = a1; \
		size_t buf = a2; \
		size_t len = a3; \
		size_t n; \
		if (fd != a1 || len > ramsize || buf + len > ramsize) { \
			a1 = -EINVAL; \
			break; \
		} \
		n = func(fd, addrbase + buf, len); \
		if (n == (size_t)-1) { \
			a1 = -errno; \
			break; \
		} \
		a1 = n; \
	} while (0)

/* provide simple I/O via syscall */
void syscall(U32 opcode) {
	switch (UIMM16) {
		case 0:
			fprintf(stderr, "program exited with status %u\n", r(1).C(b,0));
			exit(0);
		case 1:
			fcpu_io(read, r(1).C(o,0), r(2).C(o,0), r(3).C(o,0));
			break;
		case 2:
			fcpu_io(write, r(1).C(o,0), r(2).C(o,0), r(3).C(o,0));
			break;
		default:
			r(1).C(o,0) = -ENOSYS;
			break;
	}
	if (R1) memset(&r(R1), 0, sizeof(union reg));
}

void trap(U32 opcode) {
	switch (UIMM16) {
		default:
			r(1).C(o,0) = -ENOSYS;
			break;
	}
	if (R1) memset(&r(R1), 0, sizeof(union reg));
}

void halt(U32 opcode) {
	ex(EX_HALT);
}

/* XXX: rfe is unimplemented */
/* XXX: srb_save is a no-op */
/* XXX: srb_restore is a no-op */
/* XXX: serialize is a no-op */

enum {
	SR_NUMBERS,
	SR_FAMILY,
	SR_STEPPING,
	SR_MAX_SIZE,
	SR_SIZE_0,
	SR_SIZE_1,
	SR_SIZE_2,
	SR_SIZE_3,
	SR_MAX_CHUNK_SIZE,
	SR_CYCLE,
	SR_PAGING,
	SR_CONTROL,
	SR_IRQ_BASE,
	SR_IRQ_SIZE,
	SR_TRAP_BASE,
	SR_TRAP_SIZE,
	SR_SYSCALL_BASE,
	SR_SYSCALL_SIZE,
	SR_TLBMISS_BASE,
	SR_URL,
	SR_URL_last = SR_URL + 7,
	SR_LAST_SR
};

#define _SR_RD	(1u << 0)
#define _SR_WR	(1u << 1)
#define _SR_RW	(_SR_RD|_SR_WR)

/* XXX: really limit special registers to 64 bits? */
struct {
	U32 p_super;
	U32 p_user;
	union {
		U64 v;
		char s[8];
	} u;
} sregs[SR_LAST_SR] = {
	[SR_NUMBERS]		= { _SR_RD, _SR_RD, { SR_LAST_SR } },
	[SR_FAMILY]			= { _SR_RD, _SR_RD, { 0xfc0 } },
	[SR_STEPPING]		= { _SR_RD, _SR_RD, { 0 } },
	[SR_MAX_SIZE]		= { _SR_RD, _SR_RD, { MAXSIZE } },
	[SR_SIZE_0]			= { _SR_RD, _SR_RD, { 1 } },
	[SR_SIZE_1]			= { _SR_RD, _SR_RD, { 2 } },
	[SR_SIZE_2]			= { _SR_RD, _SR_RD, { 4 } },
	[SR_SIZE_3]			= { _SR_RD, _SR_RD, { 8 } },
	[SR_MAX_CHUNK_SIZE]	= { _SR_RD, _SR_RD, { 8 } },
	[SR_CYCLE]			= { _SR_RD, _SR_RD, { 0 } },	/* volatile */
	[SR_PAGING]			= { _SR_RW, _SR_RD, { 0 } },
	[SR_CONTROL]		= { _SR_RW, _SR_RD, { 0 } },
	[SR_IRQ_BASE]		= { _SR_RW, _SR_RD, { 0 } },
	[SR_IRQ_SIZE]		= { _SR_RW, _SR_RD, { 0 } },
	[SR_TRAP_BASE]		= { _SR_RW, _SR_RD, { 0 } },
	[SR_TRAP_SIZE]		= { _SR_RW, _SR_RD, { 0 } },
	[SR_SYSCALL_BASE]	= { _SR_RW, _SR_RD, { 0 } },
	[SR_SYSCALL_SIZE]	= { _SR_RW, _SR_RD, { 0 } },
	[SR_TLBMISS_BASE]	= { _SR_RW, _SR_RD, { 0 } },
	[SR_URL]			= { _SR_RD, _SR_RD, { .s = "http://w" } },
						  { _SR_RD, _SR_RD, { .s = "ww.f-cpu" } },
						  { _SR_RD, _SR_RD, { .s = ".org" } },
						  { _SR_RD, _SR_RD, { .s = "" } },
						  { _SR_RD, _SR_RD, { .s = "" } },
						  { _SR_RD, _SR_RD, { .s = "" } },
						  { _SR_RD, _SR_RD, { .s = "" } },
						  { _SR_RD, _SR_RD, { .s = "" } },
};

/* XXX: currently, we always are superuser */

void get(U32 opcode) {
	const U64 n = r(R2).C(o,0);
	union reg r1 = reginit(R1);

	if (n >= SR_LAST_SR) {
		ex(EX_INVALID);
		return;
	}
	if (!(sregs[n].p_super & _SR_RD)) {
		ex(EX_ACCESS);
		return;
	}
	r(1).C(o,0) = sregs[n].u.v;
	if (R1) r(R1) = r1;
}

void geti(U32 opcode) {
	const U64 n = UIMM16;
	union reg r1 = reginit(R1);

	if (n >= SR_LAST_SR) {
		ex(EX_INVALID);
		return;
	}
	if (!(sregs[n].p_super & _SR_RD)) {
		ex(EX_ACCESS);
		return;
	}
	r(1).C(o,0) = sregs[n].u.v;
	if (R1) r(R1) = r1;
}

void put(U32 opcode) {
	const U64 n = r(R2).C(o,0);

	if (n >= SR_LAST_SR) {
		ex(EX_INVALID);
		return;
	}
	if (!(sregs[n].p_super & _SR_WR)) {
		ex(EX_ACCESS);
		return;
	}
	sregs[n].u.v = r(R1).C(o,0);
}

void puti(U32 opcode) {
	const U64 n = UIMM16;

	if (n >= SR_LAST_SR) {
		ex(EX_INVALID);
		return;
	}
	if (!(sregs[n].p_super & _SR_WR)) {
		ex(EX_ACCESS);
		return;
	}
	sregs[n].u.v = r(R1).C(o,0);
}

/* emulator main */

#define X(op)	((op)<<24)

void
initemu(void) {
	memset(&regs, 0, sizeof(regs));
	excode = EX_NONE;
}

int
emulate1(U32 opcode) {
	switch (opcode & 0xff000000) {
		/* no args */
		case X(OP_NOP):  nop(opcode); return 0;
		case X(OP_HALT):  halt(opcode); return 0;
	}
	switch (opcode & 0xff800000) {
		/* imm17, r1 */
		case X(OP_LOADADDRI) | LOADADDR_DATA:  loadaddrid(opcode); return 0;
		case X(OP_LOADADDRI):  loadaddri(opcode); return 0;
	}
	switch (opcode & 0xffc00000) {
		/* imm16, r1 */
		case X(OP_LOADCONS) | (0 << 22):  loadcons_0(opcode); return 0;
		case X(OP_LOADCONS) | (1 << 22):  loadcons_1(opcode); return 0;
		case X(OP_LOADCONS) | (2 << 22):  loadcons_2(opcode); return 0;
		case X(OP_LOADCONS) | (3 << 22):  loadcons_3(opcode); return 0;
		case X(OP_LOADCONSX) | (0 << 22):  loadconsx_0(opcode); return 0;
		case X(OP_LOADCONSX) | (1 << 22):  loadconsx_1(opcode); return 0;
		case X(OP_LOADCONSX) | (2 << 22):  loadconsx_2(opcode); return 0;
		case X(OP_LOADCONSX) | (3 << 22):  loadconsx_3(opcode); return 0;
		case X(OP_GETI):  geti(opcode); return 0;
		case X(OP_PUTI):  puti(opcode); return 0;
		case X(OP_SYSCALL) | SYSCALL_TRAP:  trap(opcode); return 0;
		case X(OP_SYSCALL):  syscall(opcode); return 0;
	}
	switch (opcode & 0xffe00000) {
		/* imm9, r2, r1 */
		case X(OP_ANDI) | ISIZE_16BIT:  andi_d(opcode); return 0;
		case X(OP_ANDI) | ISIZE_32BIT:  andi_q(opcode); return 0;
		case X(OP_ANDI) | ISIZE_64BIT:  andi_o(opcode); return 0;
		case X(OP_ANDI) | ISIZE_8BIT:   andi_b(opcode); return 0;
		case X(OP_ANDNI) | ISIZE_16BIT:  andni_d(opcode); return 0;
		case X(OP_ANDNI) | ISIZE_32BIT:  andni_q(opcode); return 0;
		case X(OP_ANDNI) | ISIZE_64BIT:  andni_o(opcode); return 0;
		case X(OP_ANDNI) | ISIZE_8BIT:   andni_b(opcode); return 0;
		case X(OP_NANDI) | ISIZE_16BIT:  nandi_d(opcode); return 0;
		case X(OP_NANDI) | ISIZE_32BIT:  nandi_q(opcode); return 0;
		case X(OP_NANDI) | ISIZE_64BIT:  nandi_o(opcode); return 0;
		case X(OP_NANDI) | ISIZE_8BIT:   nandi_b(opcode); return 0;
		case X(OP_NORI) | ISIZE_16BIT:  nori_d(opcode); return 0;
		case X(OP_NORI) | ISIZE_32BIT:  nori_q(opcode); return 0;
		case X(OP_NORI) | ISIZE_64BIT:  nori_o(opcode); return 0;
		case X(OP_NORI) | ISIZE_8BIT:   nori_b(opcode); return 0;
		case X(OP_ORI) | ISIZE_16BIT:  ori_d(opcode); return 0;
		case X(OP_ORI) | ISIZE_32BIT:  ori_q(opcode); return 0;
		case X(OP_ORI) | ISIZE_64BIT:  ori_o(opcode); return 0;
		case X(OP_ORI) | ISIZE_8BIT:   ori_b(opcode); return 0;
		case X(OP_ORNI) | ISIZE_16BIT:  orni_d(opcode); return 0;
		case X(OP_ORNI) | ISIZE_32BIT:  orni_q(opcode); return 0;
		case X(OP_ORNI) | ISIZE_64BIT:  orni_o(opcode); return 0;
		case X(OP_ORNI) | ISIZE_8BIT:   orni_b(opcode); return 0;
		case X(OP_XNORI) | ISIZE_16BIT:  xnori_d(opcode); return 0;
		case X(OP_XNORI) | ISIZE_32BIT:  xnori_q(opcode); return 0;
		case X(OP_XNORI) | ISIZE_64BIT:  xnori_o(opcode); return 0;
		case X(OP_XNORI) | ISIZE_8BIT:   xnori_b(opcode); return 0;
		case X(OP_XORI) | ISIZE_16BIT:  xori_d(opcode); return 0;
		case X(OP_XORI) | ISIZE_32BIT:  xori_q(opcode); return 0;
		case X(OP_XORI) | ISIZE_64BIT:  xori_o(opcode); return 0;
		case X(OP_XORI) | ISIZE_8BIT:   xori_b(opcode); return 0;
		/* loadi/storei imm9, r2, r1 */
		case X(OP_LOADI) | ISIZE_16BIT:  loadi_d(opcode); return 0;
		case X(OP_LOADI) | ISIZE_32BIT:  loadi_q(opcode); return 0;
		case X(OP_LOADI) | ISIZE_64BIT:  loadi_o(opcode); return 0;
		case X(OP_LOADI) | ISIZE_8BIT:   loadi_b(opcode); return 0;
		case X(OP_LOADI) | LS_BIG_ENDIAN | ISIZE_16BIT:  loadie_d(opcode); return 0;
		case X(OP_LOADI) | LS_BIG_ENDIAN | ISIZE_32BIT:  loadie_q(opcode); return 0;
		case X(OP_LOADI) | LS_BIG_ENDIAN | ISIZE_64BIT:  loadie_o(opcode); return 0;
		case X(OP_LOADI) | LS_BIG_ENDIAN | ISIZE_8BIT:   loadie_b(opcode); return 0;
		case X(OP_LOADIF) | ISIZE_16BIT:  loadi_d(opcode); return 0;
		case X(OP_LOADIF) | ISIZE_32BIT:  loadi_q(opcode); return 0;
		case X(OP_LOADIF) | ISIZE_64BIT:  loadi_o(opcode); return 0;
		case X(OP_LOADIF) | ISIZE_8BIT:   loadi_b(opcode); return 0;
		case X(OP_LOADIF) | LS_BIG_ENDIAN | ISIZE_16BIT:  loadie_d(opcode); return 0;
		case X(OP_LOADIF) | LS_BIG_ENDIAN | ISIZE_32BIT:  loadie_q(opcode); return 0;
		case X(OP_LOADIF) | LS_BIG_ENDIAN | ISIZE_64BIT:  loadie_o(opcode); return 0;
		case X(OP_LOADIF) | LS_BIG_ENDIAN | ISIZE_8BIT:   loadie_b(opcode); return 0;
		case X(OP_STOREI) | ISIZE_16BIT:  storei_d(opcode); return 0;
		case X(OP_STOREI) | ISIZE_32BIT:  storei_q(opcode); return 0;
		case X(OP_STOREI) | ISIZE_64BIT:  storei_o(opcode); return 0;
		case X(OP_STOREI) | ISIZE_8BIT:   storei_b(opcode); return 0;
		case X(OP_STOREI) | LS_BIG_ENDIAN | ISIZE_16BIT:  storeie_d(opcode); return 0;
		case X(OP_STOREI) | LS_BIG_ENDIAN | ISIZE_32BIT:  storeie_q(opcode); return 0;
		case X(OP_STOREI) | LS_BIG_ENDIAN | ISIZE_64BIT:  storeie_o(opcode); return 0;
		case X(OP_STOREI) | LS_BIG_ENDIAN | ISIZE_8BIT:   storeie_b(opcode); return 0;
		case X(OP_STOREIF) | ISIZE_16BIT:  storei_d(opcode); return 0;
		case X(OP_STOREIF) | ISIZE_32BIT:  storei_q(opcode); return 0;
		case X(OP_STOREIF) | ISIZE_64BIT:  storei_o(opcode); return 0;
		case X(OP_STOREIF) | ISIZE_8BIT:   storei_b(opcode); return 0;
		case X(OP_STOREIF) | LS_BIG_ENDIAN | ISIZE_16BIT:  storeie_d(opcode); return 0;
		case X(OP_STOREIF) | LS_BIG_ENDIAN | ISIZE_32BIT:  storeie_q(opcode); return 0;
		case X(OP_STOREIF) | LS_BIG_ENDIAN | ISIZE_64BIT:  storeie_o(opcode); return 0;
		case X(OP_STOREIF) | LS_BIG_ENDIAN | ISIZE_8BIT:   storeie_b(opcode); return 0;
		/* load/store r3, r2, r1 + 3 bit stream hints */
		case X(OP_LOAD) | ISIZE_16BIT:  load_d(opcode); return 0;
		case X(OP_LOAD) | ISIZE_32BIT:  load_q(opcode); return 0;
		case X(OP_LOAD) | ISIZE_64BIT:  load_o(opcode); return 0;
		case X(OP_LOAD) | ISIZE_8BIT:   load_b(opcode); return 0;
		case X(OP_LOAD) | LS_BIG_ENDIAN | ISIZE_16BIT:  loade_d(opcode); return 0;
		case X(OP_LOAD) | LS_BIG_ENDIAN | ISIZE_32BIT:  loade_q(opcode); return 0;
		case X(OP_LOAD) | LS_BIG_ENDIAN | ISIZE_64BIT:  loade_o(opcode); return 0;
		case X(OP_LOAD) | LS_BIG_ENDIAN | ISIZE_8BIT:   loade_b(opcode); return 0;
		case X(OP_LOADF) | ISIZE_16BIT:  load_d(opcode); return 0;
		case X(OP_LOADF) | ISIZE_32BIT:  load_q(opcode); return 0;
		case X(OP_LOADF) | ISIZE_64BIT:  load_o(opcode); return 0;
		case X(OP_LOADF) | ISIZE_8BIT:   load_b(opcode); return 0;
		case X(OP_LOADF) | LS_BIG_ENDIAN | ISIZE_16BIT:  loade_d(opcode); return 0;
		case X(OP_LOADF) | LS_BIG_ENDIAN | ISIZE_32BIT:  loade_q(opcode); return 0;
		case X(OP_LOADF) | LS_BIG_ENDIAN | ISIZE_64BIT:  loade_o(opcode); return 0;
		case X(OP_LOADF) | LS_BIG_ENDIAN | ISIZE_8BIT:   loade_b(opcode); return 0;
		case X(OP_STORE) | ISIZE_16BIT:  store_d(opcode); return 0;
		case X(OP_STORE) | ISIZE_32BIT:  store_q(opcode); return 0;
		case X(OP_STORE) | ISIZE_64BIT:  store_o(opcode); return 0;
		case X(OP_STORE) | ISIZE_8BIT:   store_b(opcode); return 0;
		case X(OP_STORE) | LS_BIG_ENDIAN | ISIZE_16BIT:  storee_d(opcode); return 0;
		case X(OP_STORE) | LS_BIG_ENDIAN | ISIZE_32BIT:  storee_q(opcode); return 0;
		case X(OP_STORE) | LS_BIG_ENDIAN | ISIZE_64BIT:  storee_o(opcode); return 0;
		case X(OP_STORE) | LS_BIG_ENDIAN | ISIZE_8BIT:   storee_b(opcode); return 0;
		case X(OP_STOREF) | ISIZE_16BIT:  store_d(opcode); return 0;
		case X(OP_STOREF) | ISIZE_32BIT:  store_q(opcode); return 0;
		case X(OP_STOREF) | ISIZE_64BIT:  store_o(opcode); return 0;
		case X(OP_STOREF) | ISIZE_8BIT:   store_b(opcode); return 0;
		case X(OP_STOREF) | LS_BIG_ENDIAN | ISIZE_16BIT:  storee_d(opcode); return 0;
		case X(OP_STOREF) | LS_BIG_ENDIAN | ISIZE_32BIT:  storee_q(opcode); return 0;
		case X(OP_STOREF) | LS_BIG_ENDIAN | ISIZE_64BIT:  storee_o(opcode); return 0;
		case X(OP_STOREF) | LS_BIG_ENDIAN | ISIZE_8BIT:   storee_b(opcode); return 0;
	}
	switch (opcode & 0xfff00000) {
		/* imm8, r2, r1 */
		case X(OP_ADDI) | ISIZE_16BIT:  addi_d(opcode); return 0;
		case X(OP_ADDI) | ISIZE_32BIT:  addi_q(opcode); return 0;
		case X(OP_ADDI) | ISIZE_64BIT:  addi_o(opcode); return 0;
		case X(OP_ADDI) | ISIZE_8BIT:   addi_b(opcode); return 0;
		case X(OP_ADDI) | SIMD_FLAG | ISIZE_16BIT:  saddi_d(opcode); return 0;
		case X(OP_ADDI) | SIMD_FLAG | ISIZE_32BIT:  saddi_q(opcode); return 0;
		case X(OP_ADDI) | SIMD_FLAG | ISIZE_64BIT:  saddi_o(opcode); return 0;
		case X(OP_ADDI) | SIMD_FLAG | ISIZE_8BIT:   saddi_b(opcode); return 0;
		case X(OP_CMPLEI) | CMP_SIGNED | ISIZE_16BIT:  cmplesi_d(opcode); return 0;
		case X(OP_CMPLEI) | CMP_SIGNED | ISIZE_32BIT:  cmplesi_q(opcode); return 0;
		case X(OP_CMPLEI) | CMP_SIGNED | ISIZE_64BIT:  cmplesi_o(opcode); return 0;
		case X(OP_CMPLEI) | CMP_SIGNED | ISIZE_8BIT:   cmplesi_b(opcode); return 0;
		case X(OP_CMPLEI) | ISIZE_16BIT:  cmplei_d(opcode); return 0;
		case X(OP_CMPLEI) | ISIZE_32BIT:  cmplei_q(opcode); return 0;
		case X(OP_CMPLEI) | ISIZE_64BIT:  cmplei_o(opcode); return 0;
		case X(OP_CMPLEI) | ISIZE_8BIT:   cmplei_b(opcode); return 0;
		case X(OP_CMPLEI) | SIMD_FLAG | CMP_SIGNED | ISIZE_16BIT:  scmplesi_d(opcode); return 0;
		case X(OP_CMPLEI) | SIMD_FLAG | CMP_SIGNED | ISIZE_32BIT:  scmplesi_q(opcode); return 0;
		case X(OP_CMPLEI) | SIMD_FLAG | CMP_SIGNED | ISIZE_64BIT:  scmplesi_o(opcode); return 0;
		case X(OP_CMPLEI) | SIMD_FLAG | CMP_SIGNED | ISIZE_8BIT:   scmplesi_b(opcode); return 0;
		case X(OP_CMPLEI) | SIMD_FLAG | ISIZE_16BIT:  scmplei_d(opcode); return 0;
		case X(OP_CMPLEI) | SIMD_FLAG | ISIZE_32BIT:  scmplei_q(opcode); return 0;
		case X(OP_CMPLEI) | SIMD_FLAG | ISIZE_64BIT:  scmplei_o(opcode); return 0;
		case X(OP_CMPLEI) | SIMD_FLAG | ISIZE_8BIT:   scmplei_b(opcode); return 0;
		case X(OP_CMPGI) | CMP_SIGNED | ISIZE_16BIT:  cmpgsi_d(opcode); return 0;
		case X(OP_CMPGI) | CMP_SIGNED | ISIZE_32BIT:  cmpgsi_q(opcode); return 0;
		case X(OP_CMPGI) | CMP_SIGNED | ISIZE_64BIT:  cmpgsi_o(opcode); return 0;
		case X(OP_CMPGI) | CMP_SIGNED | ISIZE_8BIT:   cmpgsi_b(opcode); return 0;
		case X(OP_CMPGI) | ISIZE_16BIT:  cmpgi_d(opcode); return 0;
		case X(OP_CMPGI) | ISIZE_32BIT:  cmpgi_q(opcode); return 0;
		case X(OP_CMPGI) | ISIZE_64BIT:  cmpgi_o(opcode); return 0;
		case X(OP_CMPGI) | ISIZE_8BIT:   cmpgi_b(opcode); return 0;
		case X(OP_CMPGI) | SIMD_FLAG | CMP_SIGNED | ISIZE_16BIT:  scmpgsi_d(opcode); return 0;
		case X(OP_CMPGI) | SIMD_FLAG | CMP_SIGNED | ISIZE_32BIT:  scmpgsi_q(opcode); return 0;
		case X(OP_CMPGI) | SIMD_FLAG | CMP_SIGNED | ISIZE_64BIT:  scmpgsi_o(opcode); return 0;
		case X(OP_CMPGI) | SIMD_FLAG | CMP_SIGNED | ISIZE_8BIT:   scmpgsi_b(opcode); return 0;
		case X(OP_CMPGI) | SIMD_FLAG | ISIZE_16BIT:  scmpgi_d(opcode); return 0;
		case X(OP_CMPGI) | SIMD_FLAG | ISIZE_32BIT:  scmpgi_q(opcode); return 0;
		case X(OP_CMPGI) | SIMD_FLAG | ISIZE_64BIT:  scmpgi_o(opcode); return 0;
		case X(OP_CMPGI) | SIMD_FLAG | ISIZE_8BIT:   scmpgi_b(opcode); return 0;
		case X(OP_DIVI) | DIV_REMAINDER | ISIZE_16BIT:  divremi_d(opcode); return 0;
		case X(OP_DIVI) | DIV_REMAINDER | ISIZE_32BIT:  divremi_q(opcode); return 0;
		case X(OP_DIVI) | DIV_REMAINDER | ISIZE_64BIT:  divremi_o(opcode); return 0;
		case X(OP_DIVI) | DIV_REMAINDER | ISIZE_8BIT:   divremi_b(opcode); return 0;
		case X(OP_DIVI) | ISIZE_16BIT:  divi_d(opcode); return 0;
		case X(OP_DIVI) | ISIZE_32BIT:  divi_q(opcode); return 0;
		case X(OP_DIVI) | ISIZE_64BIT:  divi_o(opcode); return 0;
		case X(OP_DIVI) | ISIZE_8BIT:   divi_b(opcode); return 0;
		case X(OP_DIVI) | SIMD_FLAG | DIV_REMAINDER | ISIZE_16BIT:  sdivremi_d(opcode); return 0;
		case X(OP_DIVI) | SIMD_FLAG | DIV_REMAINDER | ISIZE_32BIT:  sdivremi_q(opcode); return 0;
		case X(OP_DIVI) | SIMD_FLAG | DIV_REMAINDER | ISIZE_64BIT:  sdivremi_o(opcode); return 0;
		case X(OP_DIVI) | SIMD_FLAG | DIV_REMAINDER | ISIZE_8BIT:   sdivremi_b(opcode); return 0;
		case X(OP_DIVI) | SIMD_FLAG | ISIZE_16BIT:  sdivi_d(opcode); return 0;
		case X(OP_DIVI) | SIMD_FLAG | ISIZE_32BIT:  sdivi_q(opcode); return 0;
		case X(OP_DIVI) | SIMD_FLAG | ISIZE_64BIT:  sdivi_o(opcode); return 0;
		case X(OP_DIVI) | SIMD_FLAG | ISIZE_8BIT:   sdivi_b(opcode); return 0;
		case X(OP_MAXI) | CMP_SIGNED | ISIZE_16BIT:  maxsi_d(opcode); return 0;
		case X(OP_MAXI) | CMP_SIGNED | ISIZE_32BIT:  maxsi_q(opcode); return 0;
		case X(OP_MAXI) | CMP_SIGNED | ISIZE_64BIT:  maxsi_o(opcode); return 0;
		case X(OP_MAXI) | CMP_SIGNED | ISIZE_8BIT:   maxsi_b(opcode); return 0;
		case X(OP_MAXI) | ISIZE_16BIT:  maxi_d(opcode); return 0;
		case X(OP_MAXI) | ISIZE_32BIT:  maxi_q(opcode); return 0;
		case X(OP_MAXI) | ISIZE_64BIT:  maxi_o(opcode); return 0;
		case X(OP_MAXI) | ISIZE_8BIT:   maxi_b(opcode); return 0;
		case X(OP_MAXI) | SIMD_FLAG | CMP_SIGNED | ISIZE_16BIT:  smaxsi_d(opcode); return 0;
		case X(OP_MAXI) | SIMD_FLAG | CMP_SIGNED | ISIZE_32BIT:  smaxsi_q(opcode); return 0;
		case X(OP_MAXI) | SIMD_FLAG | CMP_SIGNED | ISIZE_64BIT:  smaxsi_o(opcode); return 0;
		case X(OP_MAXI) | SIMD_FLAG | CMP_SIGNED | ISIZE_8BIT:   smaxsi_b(opcode); return 0;
		case X(OP_MAXI) | SIMD_FLAG | ISIZE_16BIT:  smaxi_d(opcode); return 0;
		case X(OP_MAXI) | SIMD_FLAG | ISIZE_32BIT:  smaxi_q(opcode); return 0;
		case X(OP_MAXI) | SIMD_FLAG | ISIZE_64BIT:  smaxi_o(opcode); return 0;
		case X(OP_MAXI) | SIMD_FLAG | ISIZE_8BIT:   smaxi_b(opcode); return 0;
		case X(OP_MINI) | CMP_SIGNED | ISIZE_16BIT:  minsi_d(opcode); return 0;
		case X(OP_MINI) | CMP_SIGNED | ISIZE_32BIT:  minsi_q(opcode); return 0;
		case X(OP_MINI) | CMP_SIGNED | ISIZE_64BIT:  minsi_o(opcode); return 0;
		case X(OP_MINI) | CMP_SIGNED | ISIZE_8BIT:   minsi_b(opcode); return 0;
		case X(OP_MINI) | ISIZE_16BIT:  mini_d(opcode); return 0;
		case X(OP_MINI) | ISIZE_32BIT:  mini_q(opcode); return 0;
		case X(OP_MINI) | ISIZE_64BIT:  mini_o(opcode); return 0;
		case X(OP_MINI) | ISIZE_8BIT:   mini_b(opcode); return 0;
		case X(OP_MINI) | SIMD_FLAG | CMP_SIGNED | ISIZE_16BIT:  sminsi_d(opcode); return 0;
		case X(OP_MINI) | SIMD_FLAG | CMP_SIGNED | ISIZE_32BIT:  sminsi_q(opcode); return 0;
		case X(OP_MINI) | SIMD_FLAG | CMP_SIGNED | ISIZE_64BIT:  sminsi_o(opcode); return 0;
		case X(OP_MINI) | SIMD_FLAG | CMP_SIGNED | ISIZE_8BIT:   sminsi_b(opcode); return 0;
		case X(OP_MINI) | SIMD_FLAG | ISIZE_16BIT:  smini_d(opcode); return 0;
		case X(OP_MINI) | SIMD_FLAG | ISIZE_32BIT:  smini_q(opcode); return 0;
		case X(OP_MINI) | SIMD_FLAG | ISIZE_64BIT:  smini_o(opcode); return 0;
		case X(OP_MINI) | SIMD_FLAG | ISIZE_8BIT:   smini_b(opcode); return 0;
		case X(OP_MINMAXI) | CMP_SIGNED | ISIZE_16BIT:  minmaxsi_d(opcode); return 0;
		case X(OP_MINMAXI) | CMP_SIGNED | ISIZE_32BIT:  minmaxsi_q(opcode); return 0;
		case X(OP_MINMAXI) | CMP_SIGNED | ISIZE_64BIT:  minmaxsi_o(opcode); return 0;
		case X(OP_MINMAXI) | CMP_SIGNED | ISIZE_8BIT:   minmaxsi_b(opcode); return 0;
		case X(OP_MINMAXI) | ISIZE_16BIT:  minmaxi_d(opcode); return 0;
		case X(OP_MINMAXI) | ISIZE_32BIT:  minmaxi_q(opcode); return 0;
		case X(OP_MINMAXI) | ISIZE_64BIT:  minmaxi_o(opcode); return 0;
		case X(OP_MINMAXI) | ISIZE_8BIT:   minmaxi_b(opcode); return 0;
		case X(OP_MINMAXI) | SIMD_FLAG | CMP_SIGNED | ISIZE_16BIT:  sminmaxsi_d(opcode); return 0;
		case X(OP_MINMAXI) | SIMD_FLAG | CMP_SIGNED | ISIZE_32BIT:  sminmaxsi_q(opcode); return 0;
		case X(OP_MINMAXI) | SIMD_FLAG | CMP_SIGNED | ISIZE_64BIT:  sminmaxsi_o(opcode); return 0;
		case X(OP_MINMAXI) | SIMD_FLAG | CMP_SIGNED | ISIZE_8BIT:   sminmaxsi_b(opcode); return 0;
		case X(OP_MINMAXI) | SIMD_FLAG | ISIZE_16BIT:  sminmaxi_d(opcode); return 0;
		case X(OP_MINMAXI) | SIMD_FLAG | ISIZE_32BIT:  sminmaxi_q(opcode); return 0;
		case X(OP_MINMAXI) | SIMD_FLAG | ISIZE_64BIT:  sminmaxi_o(opcode); return 0;
		case X(OP_MINMAXI) | SIMD_FLAG | ISIZE_8BIT:   sminmaxi_b(opcode); return 0;
		case X(OP_MULI) | ISIZE_16BIT:  muli_d(opcode); return 0;
		case X(OP_MULI) | ISIZE_32BIT:  muli_q(opcode); return 0;
		case X(OP_MULI) | ISIZE_64BIT:  muli_o(opcode); return 0;
		case X(OP_MULI) | ISIZE_8BIT:   muli_b(opcode); return 0;
		case X(OP_MULI) | SIMD_FLAG | ISIZE_16BIT:  smuli_d(opcode); return 0;
		case X(OP_MULI) | SIMD_FLAG | ISIZE_32BIT:  smuli_q(opcode); return 0;
		case X(OP_MULI) | SIMD_FLAG | ISIZE_64BIT:  smuli_o(opcode); return 0;
		case X(OP_MULI) | SIMD_FLAG | ISIZE_8BIT:   smuli_b(opcode); return 0;
		case X(OP_REMI) | ISIZE_16BIT:  remi_d(opcode); return 0;
		case X(OP_REMI) | ISIZE_32BIT:  remi_q(opcode); return 0;
		case X(OP_REMI) | ISIZE_64BIT:  remi_o(opcode); return 0;
		case X(OP_REMI) | ISIZE_8BIT:   remi_b(opcode); return 0;
		case X(OP_REMI) | SIMD_FLAG | ISIZE_16BIT:  sremi_d(opcode); return 0;
		case X(OP_REMI) | SIMD_FLAG | ISIZE_32BIT:  sremi_q(opcode); return 0;
		case X(OP_REMI) | SIMD_FLAG | ISIZE_64BIT:  sremi_o(opcode); return 0;
		case X(OP_REMI) | SIMD_FLAG | ISIZE_8BIT:   sremi_b(opcode); return 0;
		case X(OP_SUBI) | ISIZE_16BIT:  subi_d(opcode); return 0;
		case X(OP_SUBI) | ISIZE_32BIT:  subi_q(opcode); return 0;
		case X(OP_SUBI) | ISIZE_64BIT:  subi_o(opcode); return 0;
		case X(OP_SUBI) | ISIZE_8BIT:   subi_b(opcode); return 0;
		case X(OP_SUBI) | SIMD_FLAG | ISIZE_16BIT:  ssubi_d(opcode); return 0;
		case X(OP_SUBI) | SIMD_FLAG | ISIZE_32BIT:  ssubi_q(opcode); return 0;
		case X(OP_SUBI) | SIMD_FLAG | ISIZE_64BIT:  ssubi_o(opcode); return 0;
		case X(OP_SUBI) | SIMD_FLAG | ISIZE_8BIT:   ssubi_b(opcode); return 0;
		/* XXX: imm6, r2, r1? */
		case X(OP_BCHGI) | ISIZE_16BIT:  bchgi_d(opcode); return 0;
		case X(OP_BCHGI) | ISIZE_32BIT:  bchgi_q(opcode); return 0;
		case X(OP_BCHGI) | ISIZE_64BIT:  bchgi_o(opcode); return 0;
		case X(OP_BCHGI) | ISIZE_8BIT:   bchgi_b(opcode); return 0;
		case X(OP_BCHGI) | SIMD_FLAG | ISIZE_16BIT:  sbchgi_d(opcode); return 0;
		case X(OP_BCHGI) | SIMD_FLAG | ISIZE_32BIT:  sbchgi_q(opcode); return 0;
		case X(OP_BCHGI) | SIMD_FLAG | ISIZE_64BIT:  sbchgi_o(opcode); return 0;
		case X(OP_BCHGI) | SIMD_FLAG | ISIZE_8BIT:   sbchgi_b(opcode); return 0;
		case X(OP_BCLRI) | ISIZE_16BIT:  bclri_d(opcode); return 0;
		case X(OP_BCLRI) | ISIZE_32BIT:  bclri_q(opcode); return 0;
		case X(OP_BCLRI) | ISIZE_64BIT:  bclri_o(opcode); return 0;
		case X(OP_BCLRI) | ISIZE_8BIT:   bclri_b(opcode); return 0;
		case X(OP_BCLRI) | SIMD_FLAG | ISIZE_16BIT:  sbclri_d(opcode); return 0;
		case X(OP_BCLRI) | SIMD_FLAG | ISIZE_32BIT:  sbclri_q(opcode); return 0;
		case X(OP_BCLRI) | SIMD_FLAG | ISIZE_64BIT:  sbclri_o(opcode); return 0;
		case X(OP_BCLRI) | SIMD_FLAG | ISIZE_8BIT:   sbclri_b(opcode); return 0;
		case X(OP_BITREVI) | ISIZE_16BIT:  bitrevi_d(opcode); return 0;
		case X(OP_BITREVI) | ISIZE_32BIT:  bitrevi_q(opcode); return 0;
		case X(OP_BITREVI) | ISIZE_64BIT:  bitrevi_o(opcode); return 0;
		case X(OP_BITREVI) | ISIZE_8BIT:   bitrevi_b(opcode); return 0;
		case X(OP_BITREVI) | SIMD_FLAG | ISIZE_16BIT:  sbitrevi_d(opcode); return 0;
		case X(OP_BITREVI) | SIMD_FLAG | ISIZE_32BIT:  sbitrevi_q(opcode); return 0;
		case X(OP_BITREVI) | SIMD_FLAG | ISIZE_64BIT:  sbitrevi_o(opcode); return 0;
		case X(OP_BITREVI) | SIMD_FLAG | ISIZE_8BIT:   sbitrevi_b(opcode); return 0;
		case X(OP_BSETI) | ISIZE_16BIT:  bseti_d(opcode); return 0;
		case X(OP_BSETI) | ISIZE_32BIT:  bseti_q(opcode); return 0;
		case X(OP_BSETI) | ISIZE_64BIT:  bseti_o(opcode); return 0;
		case X(OP_BSETI) | ISIZE_8BIT:   bseti_b(opcode); return 0;
		case X(OP_BSETI) | SIMD_FLAG | ISIZE_16BIT:  sbseti_d(opcode); return 0;
		case X(OP_BSETI) | SIMD_FLAG | ISIZE_32BIT:  sbseti_q(opcode); return 0;
		case X(OP_BSETI) | SIMD_FLAG | ISIZE_64BIT:  sbseti_o(opcode); return 0;
		case X(OP_BSETI) | SIMD_FLAG | ISIZE_8BIT:   sbseti_b(opcode); return 0;
		case X(OP_BTSTI) | ISIZE_16BIT:  btsti_d(opcode); return 0;
		case X(OP_BTSTI) | ISIZE_32BIT:  btsti_q(opcode); return 0;
		case X(OP_BTSTI) | ISIZE_64BIT:  btsti_o(opcode); return 0;
		case X(OP_BTSTI) | ISIZE_8BIT:   btsti_b(opcode); return 0;
		case X(OP_BTSTI) | SIMD_FLAG | ISIZE_16BIT:  sbtsti_d(opcode); return 0;
		case X(OP_BTSTI) | SIMD_FLAG | ISIZE_32BIT:  sbtsti_q(opcode); return 0;
		case X(OP_BTSTI) | SIMD_FLAG | ISIZE_64BIT:  sbtsti_o(opcode); return 0;
		case X(OP_BTSTI) | SIMD_FLAG | ISIZE_8BIT:   sbtsti_b(opcode); return 0;
		case X(OP_DBITREVI) | ISIZE_16BIT:  dbitrevi_d(opcode); return 0;
		case X(OP_DBITREVI) | ISIZE_32BIT:  dbitrevi_q(opcode); return 0;
		case X(OP_DBITREVI) | ISIZE_64BIT:  dbitrevi_o(opcode); return 0;
		case X(OP_DBITREVI) | ISIZE_8BIT:   dbitrevi_b(opcode); return 0;
		case X(OP_DBITREVI) | SIMD_FLAG | ISIZE_16BIT:  sdbitrevi_d(opcode); return 0;
		case X(OP_DBITREVI) | SIMD_FLAG | ISIZE_32BIT:  sdbitrevi_q(opcode); return 0;
		case X(OP_DBITREVI) | SIMD_FLAG | ISIZE_64BIT:  sdbitrevi_o(opcode); return 0;
		case X(OP_DBITREVI) | SIMD_FLAG | ISIZE_8BIT:   sdbitrevi_b(opcode); return 0;
		case X(OP_DSHIFTLI) | ISIZE_16BIT:  dshiftli_d(opcode); return 0;
		case X(OP_DSHIFTLI) | ISIZE_32BIT:  dshiftli_q(opcode); return 0;
		case X(OP_DSHIFTLI) | ISIZE_64BIT:  dshiftli_o(opcode); return 0;
		case X(OP_DSHIFTLI) | ISIZE_8BIT:   dshiftli_b(opcode); return 0;
		case X(OP_DSHIFTLI) | SIMD_FLAG | ISIZE_16BIT:  sdshiftli_d(opcode); return 0;
		case X(OP_DSHIFTLI) | SIMD_FLAG | ISIZE_32BIT:  sdshiftli_q(opcode); return 0;
		case X(OP_DSHIFTLI) | SIMD_FLAG | ISIZE_64BIT:  sdshiftli_o(opcode); return 0;
		case X(OP_DSHIFTLI) | SIMD_FLAG | ISIZE_8BIT:   sdshiftli_b(opcode); return 0;
		case X(OP_DSHIFTRAI) | ISIZE_16BIT:  dshiftrai_d(opcode); return 0;
		case X(OP_DSHIFTRAI) | ISIZE_32BIT:  dshiftrai_q(opcode); return 0;
		case X(OP_DSHIFTRAI) | ISIZE_64BIT:  dshiftrai_o(opcode); return 0;
		case X(OP_DSHIFTRAI) | ISIZE_8BIT:   dshiftrai_b(opcode); return 0;
		case X(OP_DSHIFTRAI) | SIMD_FLAG | ISIZE_16BIT:  sdshiftrai_d(opcode); return 0;
		case X(OP_DSHIFTRAI) | SIMD_FLAG | ISIZE_32BIT:  sdshiftrai_q(opcode); return 0;
		case X(OP_DSHIFTRAI) | SIMD_FLAG | ISIZE_64BIT:  sdshiftrai_o(opcode); return 0;
		case X(OP_DSHIFTRAI) | SIMD_FLAG | ISIZE_8BIT:   sdshiftrai_b(opcode); return 0;
		case X(OP_DSHIFTRI) | ISIZE_16BIT:  dshiftri_d(opcode); return 0;
		case X(OP_DSHIFTRI) | ISIZE_32BIT:  dshiftri_q(opcode); return 0;
		case X(OP_DSHIFTRI) | ISIZE_64BIT:  dshiftri_o(opcode); return 0;
		case X(OP_DSHIFTRI) | ISIZE_8BIT:   dshiftri_b(opcode); return 0;
		case X(OP_DSHIFTRI) | SIMD_FLAG | ISIZE_16BIT:  sdshiftri_d(opcode); return 0;
		case X(OP_DSHIFTRI) | SIMD_FLAG | ISIZE_32BIT:  sdshiftri_q(opcode); return 0;
		case X(OP_DSHIFTRI) | SIMD_FLAG | ISIZE_64BIT:  sdshiftri_o(opcode); return 0;
		case X(OP_DSHIFTRI) | SIMD_FLAG | ISIZE_8BIT:   sdshiftri_b(opcode); return 0;
		case X(OP_POPCI) | ISIZE_16BIT:  popci_d(opcode); return 0;
		case X(OP_POPCI) | ISIZE_32BIT:  popci_q(opcode); return 0;
		case X(OP_POPCI) | ISIZE_64BIT:  popci_o(opcode); return 0;
		case X(OP_POPCI) | ISIZE_8BIT:   popci_b(opcode); return 0;
		case X(OP_POPCI) | SIMD_FLAG | ISIZE_16BIT:  spopci_d(opcode); return 0;
		case X(OP_POPCI) | SIMD_FLAG | ISIZE_32BIT:  spopci_q(opcode); return 0;
		case X(OP_POPCI) | SIMD_FLAG | ISIZE_64BIT:  spopci_o(opcode); return 0;
		case X(OP_POPCI) | SIMD_FLAG | ISIZE_8BIT:   spopci_b(opcode); return 0;
		case X(OP_ROTLI) | ISIZE_16BIT:  rotli_d(opcode); return 0;
		case X(OP_ROTLI) | ISIZE_32BIT:  rotli_q(opcode); return 0;
		case X(OP_ROTLI) | ISIZE_64BIT:  rotli_o(opcode); return 0;
		case X(OP_ROTLI) | ISIZE_8BIT:   rotli_b(opcode); return 0;
		case X(OP_ROTLI) | SIMD_FLAG | ISIZE_16BIT:  srotli_d(opcode); return 0;
		case X(OP_ROTLI) | SIMD_FLAG | ISIZE_32BIT:  srotli_q(opcode); return 0;
		case X(OP_ROTLI) | SIMD_FLAG | ISIZE_64BIT:  srotli_o(opcode); return 0;
		case X(OP_ROTLI) | SIMD_FLAG | ISIZE_8BIT:   srotli_b(opcode); return 0;
		case X(OP_ROTRI) | ISIZE_16BIT:  rotri_d(opcode); return 0;
		case X(OP_ROTRI) | ISIZE_32BIT:  rotri_q(opcode); return 0;
		case X(OP_ROTRI) | ISIZE_64BIT:  rotri_o(opcode); return 0;
		case X(OP_ROTRI) | ISIZE_8BIT:   rotri_b(opcode); return 0;
		case X(OP_ROTRI) | SIMD_FLAG | ISIZE_16BIT:  srotri_d(opcode); return 0;
		case X(OP_ROTRI) | SIMD_FLAG | ISIZE_32BIT:  srotri_q(opcode); return 0;
		case X(OP_ROTRI) | SIMD_FLAG | ISIZE_64BIT:  srotri_o(opcode); return 0;
		case X(OP_ROTRI) | SIMD_FLAG | ISIZE_8BIT:   srotri_b(opcode); return 0;
		case X(OP_SHIFTLI) | ISIZE_16BIT:  shiftli_d(opcode); return 0;
		case X(OP_SHIFTLI) | ISIZE_32BIT:  shiftli_q(opcode); return 0;
		case X(OP_SHIFTLI) | ISIZE_64BIT:  shiftli_o(opcode); return 0;
		case X(OP_SHIFTLI) | ISIZE_8BIT:   shiftli_b(opcode); return 0;
		case X(OP_SHIFTLI) | SIMD_FLAG | ISIZE_16BIT:  sshiftli_d(opcode); return 0;
		case X(OP_SHIFTLI) | SIMD_FLAG | ISIZE_32BIT:  sshiftli_q(opcode); return 0;
		case X(OP_SHIFTLI) | SIMD_FLAG | ISIZE_64BIT:  sshiftli_o(opcode); return 0;
		case X(OP_SHIFTLI) | SIMD_FLAG | ISIZE_8BIT:   sshiftli_b(opcode); return 0;
		case X(OP_SHIFTRAI) | ISIZE_16BIT:  shiftrai_d(opcode); return 0;
		case X(OP_SHIFTRAI) | ISIZE_32BIT:  shiftrai_q(opcode); return 0;
		case X(OP_SHIFTRAI) | ISIZE_64BIT:  shiftrai_o(opcode); return 0;
		case X(OP_SHIFTRAI) | ISIZE_8BIT:   shiftrai_b(opcode); return 0;
		case X(OP_SHIFTRAI) | SIMD_FLAG | ISIZE_16BIT:  sshiftrai_d(opcode); return 0;
		case X(OP_SHIFTRAI) | SIMD_FLAG | ISIZE_32BIT:  sshiftrai_q(opcode); return 0;
		case X(OP_SHIFTRAI) | SIMD_FLAG | ISIZE_64BIT:  sshiftrai_o(opcode); return 0;
		case X(OP_SHIFTRAI) | SIMD_FLAG | ISIZE_8BIT:   sshiftrai_b(opcode); return 0;
		case X(OP_SHIFTRI) | ISIZE_16BIT:  shiftri_d(opcode); return 0;
		case X(OP_SHIFTRI) | ISIZE_32BIT:  shiftri_q(opcode); return 0;
		case X(OP_SHIFTRI) | ISIZE_64BIT:  shiftri_o(opcode); return 0;
		case X(OP_SHIFTRI) | ISIZE_8BIT:   shiftri_b(opcode); return 0;
		case X(OP_SHIFTRI) | SIMD_FLAG | ISIZE_16BIT:  sshiftri_d(opcode); return 0;
		case X(OP_SHIFTRI) | SIMD_FLAG | ISIZE_32BIT:  sshiftri_q(opcode); return 0;
		case X(OP_SHIFTRI) | SIMD_FLAG | ISIZE_64BIT:  sshiftri_o(opcode); return 0;
		case X(OP_SHIFTRI) | SIMD_FLAG | ISIZE_8BIT:   sshiftri_b(opcode); return 0;
	}
	switch (opcode & 0xfffc0000) {
		/* r3, r2, r1 */
		case X(OP_ADD) | ADD_MODE_CARRY | ISIZE_16BIT:  addc_d(opcode); return 0;
		case X(OP_ADD) | ADD_MODE_CARRY | ISIZE_32BIT:  addc_q(opcode); return 0;
		case X(OP_ADD) | ADD_MODE_CARRY | ISIZE_64BIT:  addc_o(opcode); return 0;
		case X(OP_ADD) | ADD_MODE_CARRY | ISIZE_8BIT:   addc_b(opcode); return 0;
		case X(OP_ADD) | ADD_MODE_NORMAL | ISIZE_16BIT:  add_d(opcode); return 0;
		case X(OP_ADD) | ADD_MODE_NORMAL | ISIZE_32BIT:  add_q(opcode); return 0;
		case X(OP_ADD) | ADD_MODE_NORMAL | ISIZE_64BIT:  add_o(opcode); return 0;
		case X(OP_ADD) | ADD_MODE_NORMAL | ISIZE_8BIT:   add_b(opcode); return 0;
		case X(OP_ADD) | ADD_MODE_SATURATE | ISIZE_16BIT:  adds_d(opcode); return 0;
		case X(OP_ADD) | ADD_MODE_SATURATE | ISIZE_32BIT:  adds_q(opcode); return 0;
		case X(OP_ADD) | ADD_MODE_SATURATE | ISIZE_64BIT:  adds_o(opcode); return 0;
		case X(OP_ADD) | ADD_MODE_SATURATE | ISIZE_8BIT:   adds_b(opcode); return 0;
		case X(OP_ADD) | SIMD_FLAG | ADD_MODE_CARRY | ISIZE_16BIT:  saddc_d(opcode); return 0;
		case X(OP_ADD) | SIMD_FLAG | ADD_MODE_CARRY | ISIZE_32BIT:  saddc_q(opcode); return 0;
		case X(OP_ADD) | SIMD_FLAG | ADD_MODE_CARRY | ISIZE_64BIT:  saddc_o(opcode); return 0;
		case X(OP_ADD) | SIMD_FLAG | ADD_MODE_CARRY | ISIZE_8BIT:   saddc_b(opcode); return 0;
		case X(OP_ADD) | SIMD_FLAG | ADD_MODE_NORMAL | ISIZE_16BIT:  sadd_d(opcode); return 0;
		case X(OP_ADD) | SIMD_FLAG | ADD_MODE_NORMAL | ISIZE_32BIT:  sadd_q(opcode); return 0;
		case X(OP_ADD) | SIMD_FLAG | ADD_MODE_NORMAL | ISIZE_64BIT:  sadd_o(opcode); return 0;
		case X(OP_ADD) | SIMD_FLAG | ADD_MODE_NORMAL | ISIZE_8BIT:   sadd_b(opcode); return 0;
		case X(OP_ADD) | SIMD_FLAG | ADD_MODE_SATURATE | ISIZE_16BIT:  sadds_d(opcode); return 0;
		case X(OP_ADD) | SIMD_FLAG | ADD_MODE_SATURATE | ISIZE_32BIT:  sadds_q(opcode); return 0;
		case X(OP_ADD) | SIMD_FLAG | ADD_MODE_SATURATE | ISIZE_64BIT:  sadds_o(opcode); return 0;
		case X(OP_ADD) | SIMD_FLAG | ADD_MODE_SATURATE | ISIZE_8BIT:   sadds_b(opcode); return 0;
		case X(OP_ADDSUB) | ISIZE_16BIT:  addsub_d(opcode); return 0;
		case X(OP_ADDSUB) | ISIZE_32BIT:  addsub_q(opcode); return 0;
		case X(OP_ADDSUB) | ISIZE_64BIT:  addsub_o(opcode); return 0;
		case X(OP_ADDSUB) | ISIZE_8BIT:   addsub_b(opcode); return 0;
		case X(OP_ADDSUB) | SIMD_FLAG | ISIZE_16BIT:  saddsub_d(opcode); return 0;
		case X(OP_ADDSUB) | SIMD_FLAG | ISIZE_32BIT:  saddsub_q(opcode); return 0;
		case X(OP_ADDSUB) | SIMD_FLAG | ISIZE_64BIT:  saddsub_o(opcode); return 0;
		case X(OP_ADDSUB) | SIMD_FLAG | ISIZE_8BIT:   saddsub_b(opcode); return 0;
		case X(OP_AMAC) | ISIZE_16BIT:  amac_d(opcode); return 0;
		case X(OP_AMAC) | ISIZE_32BIT:  amac_q(opcode); return 0;
		case X(OP_AMAC) | ISIZE_64BIT:  amac_o(opcode); return 0;
		case X(OP_AMAC) | ISIZE_8BIT:   amac_b(opcode); return 0;
		case X(OP_AMAC) | MUL_HIGH | ISIZE_16BIT:  amach_d(opcode); return 0;
		case X(OP_AMAC) | MUL_HIGH | ISIZE_32BIT:  amach_q(opcode); return 0;
		case X(OP_AMAC) | MUL_HIGH | ISIZE_64BIT:  amach_o(opcode); return 0;
		case X(OP_AMAC) | MUL_HIGH | ISIZE_8BIT:   amach_b(opcode); return 0;
		case X(OP_AMAC) | MUL_HIGH | MUL_SIGNED | ISIZE_16BIT:  amachs_d(opcode); return 0;
		case X(OP_AMAC) | MUL_HIGH | MUL_SIGNED | ISIZE_32BIT:  amachs_q(opcode); return 0;
		case X(OP_AMAC) | MUL_HIGH | MUL_SIGNED | ISIZE_64BIT:  amachs_o(opcode); return 0;
		case X(OP_AMAC) | MUL_HIGH | MUL_SIGNED | ISIZE_8BIT:   amachs_b(opcode); return 0;
		case X(OP_AMAC) | MUL_SIGNED | ISIZE_16BIT:  amac_d(opcode); return 0;
		case X(OP_AMAC) | MUL_SIGNED | ISIZE_32BIT:  amac_q(opcode); return 0;
		case X(OP_AMAC) | MUL_SIGNED | ISIZE_64BIT:  amac_o(opcode); return 0;
		case X(OP_AMAC) | MUL_SIGNED | ISIZE_8BIT:   amac_b(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | ISIZE_16BIT:  samac_d(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | ISIZE_32BIT:  samac_q(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | ISIZE_64BIT:  samac_o(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | ISIZE_8BIT:   samac_b(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | MUL_HIGH | ISIZE_16BIT:  samach_d(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | MUL_HIGH | ISIZE_32BIT:  samach_q(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | MUL_HIGH | ISIZE_64BIT:  samach_o(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | MUL_HIGH | ISIZE_8BIT:   samach_b(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | MUL_HIGH | MUL_SIGNED | ISIZE_16BIT:  samachs_d(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | MUL_HIGH | MUL_SIGNED | ISIZE_32BIT:  samachs_q(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | MUL_HIGH | MUL_SIGNED | ISIZE_64BIT:  samachs_o(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | MUL_HIGH | MUL_SIGNED | ISIZE_8BIT:   samachs_b(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | MUL_SIGNED | ISIZE_16BIT:  samac_d(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | MUL_SIGNED | ISIZE_32BIT:  samac_q(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | MUL_SIGNED | ISIZE_64BIT:  samac_o(opcode); return 0;
		case X(OP_AMAC) | SIMD_FLAG | MUL_SIGNED | ISIZE_8BIT:   samac_b(opcode); return 0;
		case X(OP_AND) | ROP2_MODE_AND | ISIZE_16BIT:  and_and_d(opcode); return 0;
		case X(OP_AND) | ROP2_MODE_AND | ISIZE_32BIT:  and_and_q(opcode); return 0;
		case X(OP_AND) | ROP2_MODE_AND | ISIZE_64BIT:  and_and_o(opcode); return 0;
		case X(OP_AND) | ROP2_MODE_AND | ISIZE_8BIT:   and_and_b(opcode); return 0;
		case X(OP_AND) | ROP2_MODE_DIRECT:  and(opcode); return 0;
		case X(OP_AND) | ROP2_MODE_OR | ISIZE_16BIT:  and_or_d(opcode); return 0;
		case X(OP_AND) | ROP2_MODE_OR | ISIZE_32BIT:  and_or_q(opcode); return 0;
		case X(OP_AND) | ROP2_MODE_OR | ISIZE_64BIT:  and_or_o(opcode); return 0;
		case X(OP_AND) | ROP2_MODE_OR | ISIZE_8BIT:   and_or_b(opcode); return 0;
		case X(OP_ANDN) | ROP2_MODE_AND | ISIZE_16BIT:  andn_and_d(opcode); return 0;
		case X(OP_ANDN) | ROP2_MODE_AND | ISIZE_32BIT:  andn_and_q(opcode); return 0;
		case X(OP_ANDN) | ROP2_MODE_AND | ISIZE_64BIT:  andn_and_o(opcode); return 0;
		case X(OP_ANDN) | ROP2_MODE_AND | ISIZE_8BIT:   andn_and_b(opcode); return 0;
		case X(OP_ANDN) | ROP2_MODE_DIRECT:  andn(opcode); return 0;
		case X(OP_ANDN) | ROP2_MODE_OR | ISIZE_16BIT:  andn_or_d(opcode); return 0;
		case X(OP_ANDN) | ROP2_MODE_OR | ISIZE_32BIT:  andn_or_q(opcode); return 0;
		case X(OP_ANDN) | ROP2_MODE_OR | ISIZE_64BIT:  andn_or_o(opcode); return 0;
		case X(OP_ANDN) | ROP2_MODE_OR | ISIZE_8BIT:   andn_or_b(opcode); return 0;
		case X(OP_BCHG) | ISIZE_16BIT:  bchg_d(opcode); return 0;
		case X(OP_BCHG) | ISIZE_32BIT:  bchg_q(opcode); return 0;
		case X(OP_BCHG) | ISIZE_64BIT:  bchg_o(opcode); return 0;
		case X(OP_BCHG) | ISIZE_8BIT:   bchg_b(opcode); return 0;
		case X(OP_BCHG) | SIMD_FLAG | ISIZE_16BIT:  sbchg_d(opcode); return 0;
		case X(OP_BCHG) | SIMD_FLAG | ISIZE_32BIT:  sbchg_q(opcode); return 0;
		case X(OP_BCHG) | SIMD_FLAG | ISIZE_64BIT:  sbchg_o(opcode); return 0;
		case X(OP_BCHG) | SIMD_FLAG | ISIZE_8BIT:   sbchg_b(opcode); return 0;
		case X(OP_BCLR) | ISIZE_16BIT:  bclr_d(opcode); return 0;
		case X(OP_BCLR) | ISIZE_32BIT:  bclr_q(opcode); return 0;
		case X(OP_BCLR) | ISIZE_64BIT:  bclr_o(opcode); return 0;
		case X(OP_BCLR) | ISIZE_8BIT:   bclr_b(opcode); return 0;
		case X(OP_BCLR) | SIMD_FLAG | ISIZE_16BIT:  sbclr_d(opcode); return 0;
		case X(OP_BCLR) | SIMD_FLAG | ISIZE_32BIT:  sbclr_q(opcode); return 0;
		case X(OP_BCLR) | SIMD_FLAG | ISIZE_64BIT:  sbclr_o(opcode); return 0;
		case X(OP_BCLR) | SIMD_FLAG | ISIZE_8BIT:   sbclr_b(opcode); return 0;
		case X(OP_BITREV) | ISIZE_16BIT:  bitrev_d(opcode); return 0;
		case X(OP_BITREV) | ISIZE_32BIT:  bitrev_q(opcode); return 0;
		case X(OP_BITREV) | ISIZE_64BIT:  bitrev_o(opcode); return 0;
		case X(OP_BITREV) | ISIZE_8BIT:   bitrev_b(opcode); return 0;
		case X(OP_BITREV) | SIMD_FLAG | ISIZE_16BIT:  sbitrev_d(opcode); return 0;
		case X(OP_BITREV) | SIMD_FLAG | ISIZE_32BIT:  sbitrev_q(opcode); return 0;
		case X(OP_BITREV) | SIMD_FLAG | ISIZE_64BIT:  sbitrev_o(opcode); return 0;
		case X(OP_BITREV) | SIMD_FLAG | ISIZE_8BIT:   sbitrev_b(opcode); return 0;
		case X(OP_BITREV) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_16BIT:  sbitrevh_d(opcode); return 0;
		case X(OP_BITREV) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_32BIT:  sbitrevh_q(opcode); return 0;
		case X(OP_BITREV) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_64BIT:  sbitrevh_o(opcode); return 0;
		case X(OP_BITREV) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_8BIT:   sbitrevh_b(opcode); return 0;
		case X(OP_BSET) | ISIZE_16BIT:  bset_d(opcode); return 0;
		case X(OP_BSET) | ISIZE_32BIT:  bset_q(opcode); return 0;
		case X(OP_BSET) | ISIZE_64BIT:  bset_o(opcode); return 0;
		case X(OP_BSET) | ISIZE_8BIT:   bset_b(opcode); return 0;
		case X(OP_BSET) | SIMD_FLAG | ISIZE_16BIT:  sbset_d(opcode); return 0;
		case X(OP_BSET) | SIMD_FLAG | ISIZE_32BIT:  sbset_q(opcode); return 0;
		case X(OP_BSET) | SIMD_FLAG | ISIZE_64BIT:  sbset_o(opcode); return 0;
		case X(OP_BSET) | SIMD_FLAG | ISIZE_8BIT:   sbset_b(opcode); return 0;
		case X(OP_BTST) | ISIZE_16BIT:  btst_d(opcode); return 0;
		case X(OP_BTST) | ISIZE_32BIT:  btst_q(opcode); return 0;
		case X(OP_BTST) | ISIZE_64BIT:  btst_o(opcode); return 0;
		case X(OP_BTST) | ISIZE_8BIT:   btst_b(opcode); return 0;
		case X(OP_BTST) | SIMD_FLAG | ISIZE_16BIT:  sbtst_d(opcode); return 0;
		case X(OP_BTST) | SIMD_FLAG | ISIZE_32BIT:  sbtst_q(opcode); return 0;
		case X(OP_BTST) | SIMD_FLAG | ISIZE_64BIT:  sbtst_o(opcode); return 0;
		case X(OP_BTST) | SIMD_FLAG | ISIZE_8BIT:   sbtst_b(opcode); return 0;
		case X(OP_CMPG) | CMP_SIGNED | ISIZE_16BIT:  cmpgs_d(opcode); return 0;
		case X(OP_CMPG) | CMP_SIGNED | ISIZE_32BIT:  cmpgs_q(opcode); return 0;
		case X(OP_CMPG) | CMP_SIGNED | ISIZE_64BIT:  cmpgs_o(opcode); return 0;
		case X(OP_CMPG) | CMP_SIGNED | ISIZE_8BIT:   cmpgs_b(opcode); return 0;
		case X(OP_CMPG) | ISIZE_16BIT:  cmpg_d(opcode); return 0;
		case X(OP_CMPG) | ISIZE_32BIT:  cmpg_q(opcode); return 0;
		case X(OP_CMPG) | ISIZE_64BIT:  cmpg_o(opcode); return 0;
		case X(OP_CMPG) | ISIZE_8BIT:   cmpg_b(opcode); return 0;
		case X(OP_CMPG) | SIMD_FLAG | CMP_SIGNED | ISIZE_16BIT:  scmpgs_d(opcode); return 0;
		case X(OP_CMPG) | SIMD_FLAG | CMP_SIGNED | ISIZE_32BIT:  scmpgs_q(opcode); return 0;
		case X(OP_CMPG) | SIMD_FLAG | CMP_SIGNED | ISIZE_64BIT:  scmpgs_o(opcode); return 0;
		case X(OP_CMPG) | SIMD_FLAG | CMP_SIGNED | ISIZE_8BIT:   scmpgs_b(opcode); return 0;
		case X(OP_CMPG) | SIMD_FLAG | ISIZE_16BIT:  scmpg_d(opcode); return 0;
		case X(OP_CMPG) | SIMD_FLAG | ISIZE_32BIT:  scmpg_q(opcode); return 0;
		case X(OP_CMPG) | SIMD_FLAG | ISIZE_64BIT:  scmpg_o(opcode); return 0;
		case X(OP_CMPG) | SIMD_FLAG | ISIZE_8BIT:   scmpg_b(opcode); return 0;
		case X(OP_CMPLE) | CMP_SIGNED | ISIZE_16BIT:  cmples_d(opcode); return 0;
		case X(OP_CMPLE) | CMP_SIGNED | ISIZE_32BIT:  cmples_q(opcode); return 0;
		case X(OP_CMPLE) | CMP_SIGNED | ISIZE_64BIT:  cmples_o(opcode); return 0;
		case X(OP_CMPLE) | CMP_SIGNED | ISIZE_8BIT:   cmples_b(opcode); return 0;
		case X(OP_CMPLE) | ISIZE_16BIT:  cmple_d(opcode); return 0;
		case X(OP_CMPLE) | ISIZE_32BIT:  cmple_q(opcode); return 0;
		case X(OP_CMPLE) | ISIZE_64BIT:  cmple_o(opcode); return 0;
		case X(OP_CMPLE) | ISIZE_8BIT:   cmple_b(opcode); return 0;
		case X(OP_CMPLE) | SIMD_FLAG | CMP_SIGNED | ISIZE_16BIT:  scmples_d(opcode); return 0;
		case X(OP_CMPLE) | SIMD_FLAG | CMP_SIGNED | ISIZE_32BIT:  scmples_q(opcode); return 0;
		case X(OP_CMPLE) | SIMD_FLAG | CMP_SIGNED | ISIZE_64BIT:  scmples_o(opcode); return 0;
		case X(OP_CMPLE) | SIMD_FLAG | CMP_SIGNED | ISIZE_8BIT:   scmples_b(opcode); return 0;
		case X(OP_CMPLE) | SIMD_FLAG | ISIZE_16BIT:  scmple_d(opcode); return 0;
		case X(OP_CMPLE) | SIMD_FLAG | ISIZE_32BIT:  scmple_q(opcode); return 0;
		case X(OP_CMPLE) | SIMD_FLAG | ISIZE_64BIT:  scmple_o(opcode); return 0;
		case X(OP_CMPLE) | SIMD_FLAG | ISIZE_8BIT:   scmple_b(opcode); return 0;
		case X(OP_DBITREV) | ISIZE_16BIT:  dbitrev_d(opcode); return 0;
		case X(OP_DBITREV) | ISIZE_32BIT:  dbitrev_q(opcode); return 0;
		case X(OP_DBITREV) | ISIZE_64BIT:  dbitrev_o(opcode); return 0;
		case X(OP_DBITREV) | ISIZE_8BIT:   dbitrev_b(opcode); return 0;
		case X(OP_DBITREV) | SIMD_FLAG | ISIZE_16BIT:  sdbitrev_d(opcode); return 0;
		case X(OP_DBITREV) | SIMD_FLAG | ISIZE_32BIT:  sdbitrev_q(opcode); return 0;
		case X(OP_DBITREV) | SIMD_FLAG | ISIZE_64BIT:  sdbitrev_o(opcode); return 0;
		case X(OP_DBITREV) | SIMD_FLAG | ISIZE_8BIT:   sdbitrev_b(opcode); return 0;
		case X(OP_DBITREV) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_16BIT:  sdbitrevh_d(opcode); return 0;
		case X(OP_DBITREV) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_32BIT:  sdbitrevh_q(opcode); return 0;
		case X(OP_DBITREV) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_64BIT:  sdbitrevh_o(opcode); return 0;
		case X(OP_DBITREV) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_8BIT:   sdbitrevh_b(opcode); return 0;
		case X(OP_DIV) | DIV_REMAINDER | DIV_SIGNED | ISIZE_16BIT:  divrems_d(opcode); return 0;
		case X(OP_DIV) | DIV_REMAINDER | DIV_SIGNED | ISIZE_32BIT:  divrems_q(opcode); return 0;
		case X(OP_DIV) | DIV_REMAINDER | DIV_SIGNED | ISIZE_64BIT:  divrems_o(opcode); return 0;
		case X(OP_DIV) | DIV_REMAINDER | DIV_SIGNED | ISIZE_8BIT:   divrems_b(opcode); return 0;
		case X(OP_DIV) | DIV_REMAINDER | ISIZE_16BIT:  divrem_d(opcode); return 0;
		case X(OP_DIV) | DIV_REMAINDER | ISIZE_32BIT:  divrem_q(opcode); return 0;
		case X(OP_DIV) | DIV_REMAINDER | ISIZE_64BIT:  divrem_o(opcode); return 0;
		case X(OP_DIV) | DIV_REMAINDER | ISIZE_8BIT:   divrem_b(opcode); return 0;
		case X(OP_DIV) | DIV_SIGNED | ISIZE_16BIT:  divs_d(opcode); return 0;
		case X(OP_DIV) | DIV_SIGNED | ISIZE_32BIT:  divs_q(opcode); return 0;
		case X(OP_DIV) | DIV_SIGNED | ISIZE_64BIT:  divs_o(opcode); return 0;
		case X(OP_DIV) | DIV_SIGNED | ISIZE_8BIT:   divs_b(opcode); return 0;
		case X(OP_DIV) | ISIZE_16BIT:  div_d(opcode); return 0;
		case X(OP_DIV) | ISIZE_32BIT:  div_q(opcode); return 0;
		case X(OP_DIV) | ISIZE_64BIT:  div_o(opcode); return 0;
		case X(OP_DIV) | ISIZE_8BIT:   div_b(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | DIV_REMAINDER | DIV_SIGNED | ISIZE_16BIT:  sdivrems_d(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | DIV_REMAINDER | DIV_SIGNED | ISIZE_32BIT:  sdivrems_q(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | DIV_REMAINDER | DIV_SIGNED | ISIZE_64BIT:  sdivrems_o(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | DIV_REMAINDER | DIV_SIGNED | ISIZE_8BIT:   sdivrems_b(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | DIV_REMAINDER | ISIZE_16BIT:  sdivrem_d(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | DIV_REMAINDER | ISIZE_32BIT:  sdivrem_q(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | DIV_REMAINDER | ISIZE_64BIT:  sdivrem_o(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | DIV_REMAINDER | ISIZE_8BIT:   sdivrem_b(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | DIV_SIGNED | ISIZE_16BIT:  sdivs_d(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | DIV_SIGNED | ISIZE_32BIT:  sdivs_q(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | DIV_SIGNED | ISIZE_64BIT:  sdivs_o(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | DIV_SIGNED | ISIZE_8BIT:   sdivs_b(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | ISIZE_16BIT:  sdiv_d(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | ISIZE_32BIT:  sdiv_q(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | ISIZE_64BIT:  sdiv_o(opcode); return 0;
		case X(OP_DIV) | SIMD_FLAG | ISIZE_8BIT:   sdiv_b(opcode); return 0;
		case X(OP_DSHIFTL) | ISIZE_16BIT:  dshiftl_d(opcode); return 0;
		case X(OP_DSHIFTL) | ISIZE_32BIT:  dshiftl_q(opcode); return 0;
		case X(OP_DSHIFTL) | ISIZE_64BIT:  dshiftl_o(opcode); return 0;
		case X(OP_DSHIFTL) | ISIZE_8BIT:   dshiftl_b(opcode); return 0;
		case X(OP_DSHIFTL) | SIMD_FLAG | ISIZE_16BIT:  sdshiftl_d(opcode); return 0;
		case X(OP_DSHIFTL) | SIMD_FLAG | ISIZE_32BIT:  sdshiftl_q(opcode); return 0;
		case X(OP_DSHIFTL) | SIMD_FLAG | ISIZE_64BIT:  sdshiftl_o(opcode); return 0;
		case X(OP_DSHIFTL) | SIMD_FLAG | ISIZE_8BIT:   sdshiftl_b(opcode); return 0;
		case X(OP_DSHIFTL) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_16BIT:  sdshiftlh_d(opcode); return 0;
		case X(OP_DSHIFTL) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_32BIT:  sdshiftlh_q(opcode); return 0;
		case X(OP_DSHIFTL) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_64BIT:  sdshiftlh_o(opcode); return 0;
		case X(OP_DSHIFTL) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_8BIT:   sdshiftlh_b(opcode); return 0;
		case X(OP_DSHIFTR) | ISIZE_16BIT:  dshiftr_d(opcode); return 0;
		case X(OP_DSHIFTR) | ISIZE_32BIT:  dshiftr_q(opcode); return 0;
		case X(OP_DSHIFTR) | ISIZE_64BIT:  dshiftr_o(opcode); return 0;
		case X(OP_DSHIFTR) | ISIZE_8BIT:   dshiftr_b(opcode); return 0;
		case X(OP_DSHIFTR) | SIMD_FLAG | ISIZE_16BIT:  sdshiftr_d(opcode); return 0;
		case X(OP_DSHIFTR) | SIMD_FLAG | ISIZE_32BIT:  sdshiftr_q(opcode); return 0;
		case X(OP_DSHIFTR) | SIMD_FLAG | ISIZE_64BIT:  sdshiftr_o(opcode); return 0;
		case X(OP_DSHIFTR) | SIMD_FLAG | ISIZE_8BIT:   sdshiftr_b(opcode); return 0;
		case X(OP_DSHIFTR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_16BIT:  sdshiftrh_d(opcode); return 0;
		case X(OP_DSHIFTR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_32BIT:  sdshiftrh_q(opcode); return 0;
		case X(OP_DSHIFTR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_64BIT:  sdshiftrh_o(opcode); return 0;
		case X(OP_DSHIFTR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_8BIT:   sdshiftrh_b(opcode); return 0;
		case X(OP_DSHIFTRA) | ISIZE_16BIT:  dshiftra_d(opcode); return 0;
		case X(OP_DSHIFTRA) | ISIZE_32BIT:  dshiftra_q(opcode); return 0;
		case X(OP_DSHIFTRA) | ISIZE_64BIT:  dshiftra_o(opcode); return 0;
		case X(OP_DSHIFTRA) | ISIZE_8BIT:   dshiftra_b(opcode); return 0;
		case X(OP_DSHIFTRA) | SIMD_FLAG | ISIZE_16BIT:  sdshiftra_d(opcode); return 0;
		case X(OP_DSHIFTRA) | SIMD_FLAG | ISIZE_32BIT:  sdshiftra_q(opcode); return 0;
		case X(OP_DSHIFTRA) | SIMD_FLAG | ISIZE_64BIT:  sdshiftra_o(opcode); return 0;
		case X(OP_DSHIFTRA) | SIMD_FLAG | ISIZE_8BIT:   sdshiftra_b(opcode); return 0;
		case X(OP_DSHIFTRA) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_16BIT:  sdshiftrah_d(opcode); return 0;
		case X(OP_DSHIFTRA) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_32BIT:  sdshiftrah_q(opcode); return 0;
		case X(OP_DSHIFTRA) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_64BIT:  sdshiftrah_o(opcode); return 0;
		case X(OP_DSHIFTRA) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_8BIT:   sdshiftrah_b(opcode); return 0;
		case X(OP_EXPAND) | EXPAND_BOTH | ISIZE_16BIT:  expand_d(opcode); return 0;
		case X(OP_EXPAND) | EXPAND_BOTH | ISIZE_32BIT:  expand_q(opcode); return 0;
		case X(OP_EXPAND) | EXPAND_BOTH | ISIZE_64BIT:  expand_o(opcode); return 0;
		case X(OP_EXPAND) | EXPAND_BOTH | ISIZE_8BIT:   expand_b(opcode); return 0;
		case X(OP_EXPAND) | EXPAND_HIGH | ISIZE_16BIT:  expandh_d(opcode); return 0;
		case X(OP_EXPAND) | EXPAND_HIGH | ISIZE_32BIT:  expandh_q(opcode); return 0;
		case X(OP_EXPAND) | EXPAND_HIGH | ISIZE_64BIT:  expandh_o(opcode); return 0;
		case X(OP_EXPAND) | EXPAND_HIGH | ISIZE_8BIT:   expandh_b(opcode); return 0;
		case X(OP_EXPAND) | EXPAND_LOW | ISIZE_16BIT:  expandl_d(opcode); return 0;
		case X(OP_EXPAND) | EXPAND_LOW | ISIZE_32BIT:  expandl_q(opcode); return 0;
		case X(OP_EXPAND) | EXPAND_LOW | ISIZE_64BIT:  expandl_o(opcode); return 0;
		case X(OP_EXPAND) | EXPAND_LOW | ISIZE_8BIT:   expandl_b(opcode); return 0;
		case X(OP_FADD) | FSIZE_DOUBLE:  fadd_d(opcode); return 0;
		case X(OP_FADD) | FSIZE_SINGLE:  fadd_f(opcode); return 0;
		case X(OP_FADD) | SIMD_FLAG | FSIZE_DOUBLE:  sfadd_d(opcode); return 0;
		case X(OP_FADD) | SIMD_FLAG | FSIZE_SINGLE:  sfadd_f(opcode); return 0;
		case X(OP_FADDSUB) | FSIZE_DOUBLE:  faddsub_d(opcode); return 0;
		case X(OP_FADDSUB) | FSIZE_SINGLE:  faddsub_f(opcode); return 0;
		case X(OP_FADDSUB) | SIMD_FLAG | FSIZE_DOUBLE:  sfaddsub_d(opcode); return 0;
		case X(OP_FADDSUB) | SIMD_FLAG | FSIZE_SINGLE:  sfaddsub_f(opcode); return 0;
		case X(OP_FDIV) | FSIZE_DOUBLE:  fdiv_d(opcode); return 0;
		case X(OP_FDIV) | FSIZE_SINGLE:  fdiv_f(opcode); return 0;
		case X(OP_FDIV) | SIMD_FLAG | FSIZE_DOUBLE:  sfdiv_d(opcode); return 0;
		case X(OP_FDIV) | SIMD_FLAG | FSIZE_SINGLE:  sfdiv_f(opcode); return 0;
		case X(OP_FEXP) | FSIZE_DOUBLE:  fexp_d(opcode); return 0;
		case X(OP_FEXP) | FSIZE_SINGLE:  fexp_f(opcode); return 0;
		case X(OP_FEXP) | SIMD_FLAG | FSIZE_DOUBLE:  sfexp_d(opcode); return 0;
		case X(OP_FEXP) | SIMD_FLAG | FSIZE_SINGLE:  sfexp_f(opcode); return 0;
		case X(OP_FLOG) | FSIZE_DOUBLE:  flog_d(opcode); return 0;
		case X(OP_FLOG) | FSIZE_SINGLE:  flog_f(opcode); return 0;
		case X(OP_FLOG) | SIMD_FLAG | FSIZE_DOUBLE:  sflog_d(opcode); return 0;
		case X(OP_FLOG) | SIMD_FLAG | FSIZE_SINGLE:  sflog_f(opcode); return 0;
		case X(OP_FMAC) | FSIZE_DOUBLE:  fmac_d(opcode); return 0;
		case X(OP_FMAC) | FSIZE_SINGLE:  fmac_f(opcode); return 0;
		case X(OP_FMAC) | SIMD_FLAG | FSIZE_DOUBLE:  sfmac_d(opcode); return 0;
		case X(OP_FMAC) | SIMD_FLAG | FSIZE_SINGLE:  sfmac_f(opcode); return 0;
		case X(OP_FMUL) | FSIZE_DOUBLE:  fmul_d(opcode); return 0;
		case X(OP_FMUL) | FSIZE_SINGLE:  fmul_f(opcode); return 0;
		case X(OP_FMUL) | SIMD_FLAG | FSIZE_DOUBLE:  sfmul_d(opcode); return 0;
		case X(OP_FMUL) | SIMD_FLAG | FSIZE_SINGLE:  sfmul_f(opcode); return 0;
		case X(OP_FSUB) | FSIZE_DOUBLE:  fsub_d(opcode); return 0;
		case X(OP_FSUB) | FSIZE_SINGLE:  fsub_f(opcode); return 0;
		case X(OP_FSUB) | SIMD_FLAG | FSIZE_DOUBLE:  sfsub_d(opcode); return 0;
		case X(OP_FSUB) | SIMD_FLAG | FSIZE_SINGLE:  sfsub_f(opcode); return 0;
		case X(OP_JMP) | CC_LSB:  jmpl(opcode); return 0;
		case X(OP_JMP) | CC_MSB:  jmpm(opcode); return 0;
		case X(OP_JMP) | CC_NAN:  jmpn(opcode); return 0;
		case X(OP_JMP) | CC_NOT_LSB:  jmpnl(opcode); return 0;
		case X(OP_JMP) | CC_NOT_MSB:  jmpnm(opcode); return 0;
		case X(OP_JMP) | CC_NOT_NAN:  jmpnn(opcode); return 0;
		case X(OP_JMP) | CC_NOT_ZERO:  jmpnz(opcode); return 0;
		case X(OP_JMP) | CC_ZERO:  jmpz(opcode); return 0;
		case X(OP_LOADM):  loadm(opcode); return 0;
		case X(OP_MAC) | ISIZE_16BIT:  macl_d(opcode); return 0;
		case X(OP_MAC) | ISIZE_32BIT:  macl_q(opcode); return 0;
		case X(OP_MAC) | ISIZE_64BIT:  macl_o(opcode); return 0;
		case X(OP_MAC) | ISIZE_8BIT:   macl_b(opcode); return 0;
		case X(OP_MAC) | MAC_HIGH | ISIZE_16BIT:  mach_d(opcode); return 0;
		case X(OP_MAC) | MAC_HIGH | ISIZE_32BIT:  mach_q(opcode); return 0;
		case X(OP_MAC) | MAC_HIGH | ISIZE_64BIT:  mach_o(opcode); return 0;
		case X(OP_MAC) | MAC_HIGH | ISIZE_8BIT:   mach_b(opcode); return 0;
		case X(OP_MAC) | MAC_HIGH | MAC_SIGNED | ISIZE_16BIT:  machs_d(opcode); return 0;
		case X(OP_MAC) | MAC_HIGH | MAC_SIGNED | ISIZE_32BIT:  machs_q(opcode); return 0;
		case X(OP_MAC) | MAC_HIGH | MAC_SIGNED | ISIZE_64BIT:  machs_o(opcode); return 0;
		case X(OP_MAC) | MAC_HIGH | MAC_SIGNED | ISIZE_8BIT:   machs_b(opcode); return 0;
		case X(OP_MAC) | MAC_SIGNED | ISIZE_16BIT:  macls_d(opcode); return 0;
		case X(OP_MAC) | MAC_SIGNED | ISIZE_32BIT:  macls_q(opcode); return 0;
		case X(OP_MAC) | MAC_SIGNED | ISIZE_64BIT:  macls_o(opcode); return 0;
		case X(OP_MAC) | MAC_SIGNED | ISIZE_8BIT:   macls_b(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | ISIZE_16BIT:  smacl_d(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | ISIZE_32BIT:  smacl_q(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | ISIZE_64BIT:  smacl_o(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | ISIZE_8BIT:   smacl_b(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | MAC_HIGH | ISIZE_16BIT:  smach_d(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | MAC_HIGH | ISIZE_32BIT:  smach_q(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | MAC_HIGH | ISIZE_64BIT:  smach_o(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | MAC_HIGH | ISIZE_8BIT:   smach_b(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | MAC_HIGH | MAC_SIGNED | ISIZE_16BIT:  smachs_d(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | MAC_HIGH | MAC_SIGNED | ISIZE_32BIT:  smachs_q(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | MAC_HIGH | MAC_SIGNED | ISIZE_64BIT:  smachs_o(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | MAC_HIGH | MAC_SIGNED | ISIZE_8BIT:   smachs_b(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | MAC_SIGNED | ISIZE_16BIT:  smacls_d(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | MAC_SIGNED | ISIZE_32BIT:  smacls_q(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | MAC_SIGNED | ISIZE_64BIT:  smacls_o(opcode); return 0;
		case X(OP_MAC) | SIMD_FLAG | MAC_SIGNED | ISIZE_8BIT:   smacls_b(opcode); return 0;
		case X(OP_MAX) | CMP_SIGNED | ISIZE_16BIT:  maxs_d(opcode); return 0;
		case X(OP_MAX) | CMP_SIGNED | ISIZE_32BIT:  maxs_q(opcode); return 0;
		case X(OP_MAX) | CMP_SIGNED | ISIZE_64BIT:  maxs_o(opcode); return 0;
		case X(OP_MAX) | CMP_SIGNED | ISIZE_8BIT:   maxs_b(opcode); return 0;
		case X(OP_MAX) | ISIZE_16BIT:  max_d(opcode); return 0;
		case X(OP_MAX) | ISIZE_32BIT:  max_q(opcode); return 0;
		case X(OP_MAX) | ISIZE_64BIT:  max_o(opcode); return 0;
		case X(OP_MAX) | ISIZE_8BIT:   max_b(opcode); return 0;
		case X(OP_MAX) | SIMD_FLAG | CMP_SIGNED | ISIZE_16BIT:  smaxs_d(opcode); return 0;
		case X(OP_MAX) | SIMD_FLAG | CMP_SIGNED | ISIZE_32BIT:  smaxs_q(opcode); return 0;
		case X(OP_MAX) | SIMD_FLAG | CMP_SIGNED | ISIZE_64BIT:  smaxs_o(opcode); return 0;
		case X(OP_MAX) | SIMD_FLAG | CMP_SIGNED | ISIZE_8BIT:   smaxs_b(opcode); return 0;
		case X(OP_MAX) | SIMD_FLAG | ISIZE_16BIT:  smax_d(opcode); return 0;
		case X(OP_MAX) | SIMD_FLAG | ISIZE_32BIT:  smax_q(opcode); return 0;
		case X(OP_MAX) | SIMD_FLAG | ISIZE_64BIT:  smax_o(opcode); return 0;
		case X(OP_MAX) | SIMD_FLAG | ISIZE_8BIT:   smax_b(opcode); return 0;
		case X(OP_MIN) | CMP_SIGNED | ISIZE_16BIT:  mins_d(opcode); return 0;
		case X(OP_MIN) | CMP_SIGNED | ISIZE_32BIT:  mins_q(opcode); return 0;
		case X(OP_MIN) | CMP_SIGNED | ISIZE_64BIT:  mins_o(opcode); return 0;
		case X(OP_MIN) | CMP_SIGNED | ISIZE_8BIT:   mins_b(opcode); return 0;
		case X(OP_MIN) | ISIZE_16BIT:  min_d(opcode); return 0;
		case X(OP_MIN) | ISIZE_32BIT:  min_q(opcode); return 0;
		case X(OP_MIN) | ISIZE_64BIT:  min_o(opcode); return 0;
		case X(OP_MIN) | ISIZE_8BIT:   min_b(opcode); return 0;
		case X(OP_MIN) | SIMD_FLAG | CMP_SIGNED | ISIZE_16BIT:  smins_d(opcode); return 0;
		case X(OP_MIN) | SIMD_FLAG | CMP_SIGNED | ISIZE_32BIT:  smins_q(opcode); return 0;
		case X(OP_MIN) | SIMD_FLAG | CMP_SIGNED | ISIZE_64BIT:  smins_o(opcode); return 0;
		case X(OP_MIN) | SIMD_FLAG | CMP_SIGNED | ISIZE_8BIT:   smins_b(opcode); return 0;
		case X(OP_MIN) | SIMD_FLAG | ISIZE_16BIT:  smin_d(opcode); return 0;
		case X(OP_MIN) | SIMD_FLAG | ISIZE_32BIT:  smin_q(opcode); return 0;
		case X(OP_MIN) | SIMD_FLAG | ISIZE_64BIT:  smin_o(opcode); return 0;
		case X(OP_MIN) | SIMD_FLAG | ISIZE_8BIT:   smin_b(opcode); return 0;
		case X(OP_MINMAX) | CMP_SIGNED | ISIZE_16BIT:  minmaxs_d(opcode); return 0;
		case X(OP_MINMAX) | CMP_SIGNED | ISIZE_32BIT:  minmaxs_q(opcode); return 0;
		case X(OP_MINMAX) | CMP_SIGNED | ISIZE_64BIT:  minmaxs_o(opcode); return 0;
		case X(OP_MINMAX) | CMP_SIGNED | ISIZE_8BIT:   minmaxs_b(opcode); return 0;
		case X(OP_MINMAX) | ISIZE_16BIT:  minmax_d(opcode); return 0;
		case X(OP_MINMAX) | ISIZE_32BIT:  minmax_q(opcode); return 0;
		case X(OP_MINMAX) | ISIZE_64BIT:  minmax_o(opcode); return 0;
		case X(OP_MINMAX) | ISIZE_8BIT:   minmax_b(opcode); return 0;
		case X(OP_MINMAX) | SIMD_FLAG | CMP_SIGNED | ISIZE_16BIT:  sminmaxs_d(opcode); return 0;
		case X(OP_MINMAX) | SIMD_FLAG | CMP_SIGNED | ISIZE_32BIT:  sminmaxs_q(opcode); return 0;
		case X(OP_MINMAX) | SIMD_FLAG | CMP_SIGNED | ISIZE_64BIT:  sminmaxs_o(opcode); return 0;
		case X(OP_MINMAX) | SIMD_FLAG | CMP_SIGNED | ISIZE_8BIT:   sminmaxs_b(opcode); return 0;
		case X(OP_MINMAX) | SIMD_FLAG | ISIZE_16BIT:  sminmax_d(opcode); return 0;
		case X(OP_MINMAX) | SIMD_FLAG | ISIZE_32BIT:  sminmax_q(opcode); return 0;
		case X(OP_MINMAX) | SIMD_FLAG | ISIZE_64BIT:  sminmax_o(opcode); return 0;
		case X(OP_MINMAX) | SIMD_FLAG | ISIZE_8BIT:   sminmax_b(opcode); return 0;
		case X(OP_MIX) | MIX_BOTH | ISIZE_16BIT:  mix_d(opcode); return 0;
		case X(OP_MIX) | MIX_BOTH | ISIZE_32BIT:  mix_q(opcode); return 0;
		case X(OP_MIX) | MIX_BOTH | ISIZE_64BIT:  mix_o(opcode); return 0;
		case X(OP_MIX) | MIX_BOTH | ISIZE_8BIT:   mix_b(opcode); return 0;
		case X(OP_MIX) | MIX_HIGH | ISIZE_16BIT:  mixh_d(opcode); return 0;
		case X(OP_MIX) | MIX_HIGH | ISIZE_32BIT:  mixh_q(opcode); return 0;
		case X(OP_MIX) | MIX_HIGH | ISIZE_64BIT:  mixh_o(opcode); return 0;
		case X(OP_MIX) | MIX_HIGH | ISIZE_8BIT:   mixh_b(opcode); return 0;
		case X(OP_MIX) | MIX_LOW | ISIZE_16BIT:  mixl_d(opcode); return 0;
		case X(OP_MIX) | MIX_LOW | ISIZE_32BIT:  mixl_q(opcode); return 0;
		case X(OP_MIX) | MIX_LOW | ISIZE_64BIT:  mixl_o(opcode); return 0;
		case X(OP_MIX) | MIX_LOW | ISIZE_8BIT:   mixl_b(opcode); return 0;
		case X(OP_MOVE) | CC_LSB | ISIZE_16BIT:  movel_d(opcode); return 0;
		case X(OP_MOVE) | CC_LSB | ISIZE_32BIT:  movel_q(opcode); return 0;
		case X(OP_MOVE) | CC_LSB | ISIZE_64BIT:  movel_o(opcode); return 0;
		case X(OP_MOVE) | CC_LSB | ISIZE_8BIT:   movel_b(opcode); return 0;
		case X(OP_MOVE) | CC_MSB | ISIZE_16BIT:  movem_d(opcode); return 0;
		case X(OP_MOVE) | CC_MSB | ISIZE_32BIT:  movem_q(opcode); return 0;
		case X(OP_MOVE) | CC_MSB | ISIZE_64BIT:  movem_o(opcode); return 0;
		case X(OP_MOVE) | CC_MSB | ISIZE_8BIT:   movem_b(opcode); return 0;
		case X(OP_MOVE) | CC_NAN | ISIZE_16BIT:  moven_d(opcode); return 0;
		case X(OP_MOVE) | CC_NAN | ISIZE_32BIT:  moven_q(opcode); return 0;
		case X(OP_MOVE) | CC_NAN | ISIZE_64BIT:  moven_o(opcode); return 0;
		case X(OP_MOVE) | CC_NAN | ISIZE_8BIT:   moven_b(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_LSB | ISIZE_16BIT:  movenl_d(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_LSB | ISIZE_32BIT:  movenl_q(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_LSB | ISIZE_64BIT:  movenl_o(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_LSB | ISIZE_8BIT:   movenl_b(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_MSB | ISIZE_16BIT:  movenm_d(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_MSB | ISIZE_32BIT:  movenm_q(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_MSB | ISIZE_64BIT:  movenm_o(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_MSB | ISIZE_8BIT:   movenm_b(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_NAN | ISIZE_16BIT:  movenn_d(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_NAN | ISIZE_32BIT:  movenn_q(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_NAN | ISIZE_64BIT:  movenn_o(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_NAN | ISIZE_8BIT:   movenn_b(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_ZERO | ISIZE_16BIT:  movenz_d(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_ZERO | ISIZE_32BIT:  movenz_q(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_ZERO | ISIZE_64BIT:  movenz_o(opcode); return 0;
		case X(OP_MOVE) | CC_NOT_ZERO | ISIZE_8BIT:   movenz_b(opcode); return 0;
		case X(OP_MOVE) | CC_ZERO | ISIZE_16BIT:  movez_d(opcode); return 0;
		case X(OP_MOVE) | CC_ZERO | ISIZE_32BIT:  movez_q(opcode); return 0;
		case X(OP_MOVE) | CC_ZERO | ISIZE_64BIT:  movez_o(opcode); return 0;
		case X(OP_MOVE) | CC_ZERO | ISIZE_8BIT:   movez_b(opcode); return 0;
		case X(OP_MUL) | ISIZE_16BIT:  mul_d(opcode); return 0;
		case X(OP_MUL) | ISIZE_32BIT:  mul_q(opcode); return 0;
		case X(OP_MUL) | ISIZE_64BIT:  mul_o(opcode); return 0;
		case X(OP_MUL) | ISIZE_8BIT:   mul_b(opcode); return 0;
		case X(OP_MUL) | MUL_HIGH | ISIZE_16BIT:  mulh_d(opcode); return 0;
		case X(OP_MUL) | MUL_HIGH | ISIZE_32BIT:  mulh_q(opcode); return 0;
		case X(OP_MUL) | MUL_HIGH | ISIZE_64BIT:  mulh_o(opcode); return 0;
		case X(OP_MUL) | MUL_HIGH | ISIZE_8BIT:   mulh_b(opcode); return 0;
		case X(OP_MUL) | MUL_HIGH | MUL_SIGNED | ISIZE_16BIT:  mulhs_d(opcode); return 0;
		case X(OP_MUL) | MUL_HIGH | MUL_SIGNED | ISIZE_32BIT:  mulhs_q(opcode); return 0;
		case X(OP_MUL) | MUL_HIGH | MUL_SIGNED | ISIZE_64BIT:  mulhs_o(opcode); return 0;
		case X(OP_MUL) | MUL_HIGH | MUL_SIGNED | ISIZE_8BIT:   mulhs_b(opcode); return 0;
		case X(OP_MUL) | MUL_SIGNED | ISIZE_16BIT:  mul_d(opcode); return 0;
		case X(OP_MUL) | MUL_SIGNED | ISIZE_32BIT:  mul_q(opcode); return 0;
		case X(OP_MUL) | MUL_SIGNED | ISIZE_64BIT:  mul_o(opcode); return 0;
		case X(OP_MUL) | MUL_SIGNED | ISIZE_8BIT:   mul_b(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | ISIZE_16BIT:  smul_d(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | ISIZE_32BIT:  smul_q(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | ISIZE_64BIT:  smul_o(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | ISIZE_8BIT:   smul_b(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | MUL_HIGH | ISIZE_16BIT:  smulh_d(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | MUL_HIGH | ISIZE_32BIT:  smulh_q(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | MUL_HIGH | ISIZE_64BIT:  smulh_o(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | MUL_HIGH | ISIZE_8BIT:   smulh_b(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | MUL_HIGH | MUL_SIGNED | ISIZE_16BIT:  smulhs_d(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | MUL_HIGH | MUL_SIGNED | ISIZE_32BIT:  smulhs_q(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | MUL_HIGH | MUL_SIGNED | ISIZE_64BIT:  smulhs_o(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | MUL_HIGH | MUL_SIGNED | ISIZE_8BIT:   smulhs_b(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | MUL_SIGNED | ISIZE_16BIT:  smul_d(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | MUL_SIGNED | ISIZE_32BIT:  smul_q(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | MUL_SIGNED | ISIZE_64BIT:  smul_o(opcode); return 0;
		case X(OP_MUL) | SIMD_FLAG | MUL_SIGNED | ISIZE_8BIT:   smul_b(opcode); return 0;
		case X(OP_NAND) | ROP2_MODE_AND | ISIZE_16BIT:  nand_and_d(opcode); return 0;
		case X(OP_NAND) | ROP2_MODE_AND | ISIZE_32BIT:  nand_and_q(opcode); return 0;
		case X(OP_NAND) | ROP2_MODE_AND | ISIZE_64BIT:  nand_and_o(opcode); return 0;
		case X(OP_NAND) | ROP2_MODE_AND | ISIZE_8BIT:   nand_and_b(opcode); return 0;
		case X(OP_NAND) | ROP2_MODE_DIRECT:  nand(opcode); return 0;
		case X(OP_NAND) | ROP2_MODE_OR | ISIZE_16BIT:  nand_or_d(opcode); return 0;
		case X(OP_NAND) | ROP2_MODE_OR | ISIZE_32BIT:  nand_or_q(opcode); return 0;
		case X(OP_NAND) | ROP2_MODE_OR | ISIZE_64BIT:  nand_or_o(opcode); return 0;
		case X(OP_NAND) | ROP2_MODE_OR | ISIZE_8BIT:   nand_or_b(opcode); return 0;
		case X(OP_NOR) | ROP2_MODE_AND | ISIZE_16BIT:  nor_and_d(opcode); return 0;
		case X(OP_NOR) | ROP2_MODE_AND | ISIZE_32BIT:  nor_and_q(opcode); return 0;
		case X(OP_NOR) | ROP2_MODE_AND | ISIZE_64BIT:  nor_and_o(opcode); return 0;
		case X(OP_NOR) | ROP2_MODE_AND | ISIZE_8BIT:   nor_and_b(opcode); return 0;
		case X(OP_NOR) | ROP2_MODE_DIRECT:  nor(opcode); return 0;
		case X(OP_NOR) | ROP2_MODE_OR | ISIZE_16BIT:  nor_or_d(opcode); return 0;
		case X(OP_NOR) | ROP2_MODE_OR | ISIZE_32BIT:  nor_or_q(opcode); return 0;
		case X(OP_NOR) | ROP2_MODE_OR | ISIZE_64BIT:  nor_or_o(opcode); return 0;
		case X(OP_NOR) | ROP2_MODE_OR | ISIZE_8BIT:   nor_or_b(opcode); return 0;
		case X(OP_OR) | ROP2_MODE_AND | ISIZE_16BIT:  or_and_d(opcode); return 0;
		case X(OP_OR) | ROP2_MODE_AND | ISIZE_32BIT:  or_and_q(opcode); return 0;
		case X(OP_OR) | ROP2_MODE_AND | ISIZE_64BIT:  or_and_o(opcode); return 0;
		case X(OP_OR) | ROP2_MODE_AND | ISIZE_8BIT:   or_and_b(opcode); return 0;
		case X(OP_OR) | ROP2_MODE_DIRECT:  or(opcode); return 0;
		case X(OP_OR) | ROP2_MODE_OR | ISIZE_16BIT:  or_or_d(opcode); return 0;
		case X(OP_OR) | ROP2_MODE_OR | ISIZE_32BIT:  or_or_q(opcode); return 0;
		case X(OP_OR) | ROP2_MODE_OR | ISIZE_64BIT:  or_or_o(opcode); return 0;
		case X(OP_OR) | ROP2_MODE_OR | ISIZE_8BIT:   or_or_b(opcode); return 0;
		case X(OP_ORN) | ROP2_MODE_AND | ISIZE_16BIT:  orn_and_d(opcode); return 0;
		case X(OP_ORN) | ROP2_MODE_AND | ISIZE_32BIT:  orn_and_q(opcode); return 0;
		case X(OP_ORN) | ROP2_MODE_AND | ISIZE_64BIT:  orn_and_o(opcode); return 0;
		case X(OP_ORN) | ROP2_MODE_AND | ISIZE_8BIT:   orn_and_b(opcode); return 0;
		case X(OP_ORN) | ROP2_MODE_DIRECT:  orn(opcode); return 0;
		case X(OP_ORN) | ROP2_MODE_OR | ISIZE_16BIT:  orn_or_d(opcode); return 0;
		case X(OP_ORN) | ROP2_MODE_OR | ISIZE_32BIT:  orn_or_q(opcode); return 0;
		case X(OP_ORN) | ROP2_MODE_OR | ISIZE_64BIT:  orn_or_o(opcode); return 0;
		case X(OP_ORN) | ROP2_MODE_OR | ISIZE_8BIT:   orn_or_b(opcode); return 0;
		case X(OP_POPC) | ISIZE_16BIT:  popc_d(opcode); return 0;
		case X(OP_POPC) | ISIZE_32BIT:  popc_q(opcode); return 0;
		case X(OP_POPC) | ISIZE_64BIT:  popc_o(opcode); return 0;
		case X(OP_POPC) | ISIZE_8BIT:   popc_b(opcode); return 0;
		case X(OP_POPC) | SIMD_FLAG | ISIZE_16BIT:  spopc_d(opcode); return 0;
		case X(OP_POPC) | SIMD_FLAG | ISIZE_32BIT:  spopc_q(opcode); return 0;
		case X(OP_POPC) | SIMD_FLAG | ISIZE_64BIT:  spopc_o(opcode); return 0;
		case X(OP_POPC) | SIMD_FLAG | ISIZE_8BIT:   spopc_b(opcode); return 0;
		case X(OP_REM) | DIV_SIGNED | ISIZE_16BIT:  rems_d(opcode); return 0;
		case X(OP_REM) | DIV_SIGNED | ISIZE_32BIT:  rems_q(opcode); return 0;
		case X(OP_REM) | DIV_SIGNED | ISIZE_64BIT:  rems_o(opcode); return 0;
		case X(OP_REM) | DIV_SIGNED | ISIZE_8BIT:   rems_b(opcode); return 0;
		case X(OP_REM) | ISIZE_16BIT:  rem_d(opcode); return 0;
		case X(OP_REM) | ISIZE_32BIT:  rem_q(opcode); return 0;
		case X(OP_REM) | ISIZE_64BIT:  rem_o(opcode); return 0;
		case X(OP_REM) | ISIZE_8BIT:   rem_b(opcode); return 0;
		case X(OP_REM) | SIMD_FLAG | DIV_SIGNED | ISIZE_16BIT:  srems_d(opcode); return 0;
		case X(OP_REM) | SIMD_FLAG | DIV_SIGNED | ISIZE_32BIT:  srems_q(opcode); return 0;
		case X(OP_REM) | SIMD_FLAG | DIV_SIGNED | ISIZE_64BIT:  srems_o(opcode); return 0;
		case X(OP_REM) | SIMD_FLAG | DIV_SIGNED | ISIZE_8BIT:   srems_b(opcode); return 0;
		case X(OP_REM) | SIMD_FLAG | ISIZE_16BIT:  srem_d(opcode); return 0;
		case X(OP_REM) | SIMD_FLAG | ISIZE_32BIT:  srem_q(opcode); return 0;
		case X(OP_REM) | SIMD_FLAG | ISIZE_64BIT:  srem_o(opcode); return 0;
		case X(OP_REM) | SIMD_FLAG | ISIZE_8BIT:   srem_b(opcode); return 0;
		case X(OP_ROP2) | ROP2_MODE_MUX:  mux(opcode); return 0;
		case X(OP_ROTL) | ISIZE_16BIT:  rotl_d(opcode); return 0;
		case X(OP_ROTL) | ISIZE_32BIT:  rotl_q(opcode); return 0;
		case X(OP_ROTL) | ISIZE_64BIT:  rotl_o(opcode); return 0;
		case X(OP_ROTL) | ISIZE_8BIT:   rotl_b(opcode); return 0;
		case X(OP_ROTL) | SIMD_FLAG | ISIZE_16BIT:  srotl_d(opcode); return 0;
		case X(OP_ROTL) | SIMD_FLAG | ISIZE_32BIT:  srotl_q(opcode); return 0;
		case X(OP_ROTL) | SIMD_FLAG | ISIZE_64BIT:  srotl_o(opcode); return 0;
		case X(OP_ROTL) | SIMD_FLAG | ISIZE_8BIT:   srotl_b(opcode); return 0;
		case X(OP_ROTL) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_16BIT:  srotlh_d(opcode); return 0;
		case X(OP_ROTL) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_32BIT:  srotlh_q(opcode); return 0;
		case X(OP_ROTL) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_64BIT:  srotlh_o(opcode); return 0;
		case X(OP_ROTL) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_8BIT:   srotlh_b(opcode); return 0;
		case X(OP_ROTR) | ISIZE_16BIT:  rotr_d(opcode); return 0;
		case X(OP_ROTR) | ISIZE_32BIT:  rotr_q(opcode); return 0;
		case X(OP_ROTR) | ISIZE_64BIT:  rotr_o(opcode); return 0;
		case X(OP_ROTR) | ISIZE_8BIT:   rotr_b(opcode); return 0;
		case X(OP_ROTR) | SIMD_FLAG | ISIZE_16BIT:  srotr_d(opcode); return 0;
		case X(OP_ROTR) | SIMD_FLAG | ISIZE_32BIT:  srotr_q(opcode); return 0;
		case X(OP_ROTR) | SIMD_FLAG | ISIZE_64BIT:  srotr_o(opcode); return 0;
		case X(OP_ROTR) | SIMD_FLAG | ISIZE_8BIT:   srotr_b(opcode); return 0;
		case X(OP_ROTR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_16BIT:  srotrh_d(opcode); return 0;
		case X(OP_ROTR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_32BIT:  srotrh_q(opcode); return 0;
		case X(OP_ROTR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_64BIT:  srotrh_o(opcode); return 0;
		case X(OP_ROTR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_8BIT:   srotrh_b(opcode); return 0;
		case X(OP_SHIFTL) | ISIZE_16BIT:  shiftl_d(opcode); return 0;
		case X(OP_SHIFTL) | ISIZE_32BIT:  shiftl_q(opcode); return 0;
		case X(OP_SHIFTL) | ISIZE_64BIT:  shiftl_o(opcode); return 0;
		case X(OP_SHIFTL) | ISIZE_8BIT:   shiftl_b(opcode); return 0;
		case X(OP_SHIFTL) | SIMD_FLAG | ISIZE_16BIT:  sshiftl_d(opcode); return 0;
		case X(OP_SHIFTL) | SIMD_FLAG | ISIZE_32BIT:  sshiftl_q(opcode); return 0;
		case X(OP_SHIFTL) | SIMD_FLAG | ISIZE_64BIT:  sshiftl_o(opcode); return 0;
		case X(OP_SHIFTL) | SIMD_FLAG | ISIZE_8BIT:   sshiftl_b(opcode); return 0;
		case X(OP_SHIFTL) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_16BIT:  sshiftlh_d(opcode); return 0;
		case X(OP_SHIFTL) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_32BIT:  sshiftlh_q(opcode); return 0;
		case X(OP_SHIFTL) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_64BIT:  sshiftlh_o(opcode); return 0;
		case X(OP_SHIFTL) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_8BIT:   sshiftlh_b(opcode); return 0;
		case X(OP_SHIFTR) | ISIZE_16BIT:  shiftr_d(opcode); return 0;
		case X(OP_SHIFTR) | ISIZE_32BIT:  shiftr_q(opcode); return 0;
		case X(OP_SHIFTR) | ISIZE_64BIT:  shiftr_o(opcode); return 0;
		case X(OP_SHIFTR) | ISIZE_8BIT:   shiftr_b(opcode); return 0;
		case X(OP_SHIFTR) | SIMD_FLAG | ISIZE_16BIT:  sshiftr_d(opcode); return 0;
		case X(OP_SHIFTR) | SIMD_FLAG | ISIZE_32BIT:  sshiftr_q(opcode); return 0;
		case X(OP_SHIFTR) | SIMD_FLAG | ISIZE_64BIT:  sshiftr_o(opcode); return 0;
		case X(OP_SHIFTR) | SIMD_FLAG | ISIZE_8BIT:   sshiftr_b(opcode); return 0;
		case X(OP_SHIFTR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_16BIT:  sshiftrh_d(opcode); return 0;
		case X(OP_SHIFTR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_32BIT:  sshiftrh_q(opcode); return 0;
		case X(OP_SHIFTR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_64BIT:  sshiftrh_o(opcode); return 0;
		case X(OP_SHIFTR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_8BIT:   sshiftrh_b(opcode); return 0;
		case X(OP_SHIFTRA) | ISIZE_16BIT:  shiftra_d(opcode); return 0;
		case X(OP_SHIFTRA) | ISIZE_32BIT:  shiftra_q(opcode); return 0;
		case X(OP_SHIFTRA) | ISIZE_64BIT:  shiftra_o(opcode); return 0;
		case X(OP_SHIFTRA) | ISIZE_8BIT:   shiftra_b(opcode); return 0;
		case X(OP_SHIFTRA) | SIMD_FLAG | ISIZE_16BIT:  sshiftra_d(opcode); return 0;
		case X(OP_SHIFTRA) | SIMD_FLAG | ISIZE_32BIT:  sshiftra_q(opcode); return 0;
		case X(OP_SHIFTRA) | SIMD_FLAG | ISIZE_64BIT:  sshiftra_o(opcode); return 0;
		case X(OP_SHIFTRA) | SIMD_FLAG | ISIZE_8BIT:   sshiftra_b(opcode); return 0;
		case X(OP_SHIFTRA) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_16BIT:  sshiftrah_d(opcode); return 0;
		case X(OP_SHIFTRA) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_32BIT:  sshiftrah_q(opcode); return 0;
		case X(OP_SHIFTRA) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_64BIT:  sshiftrah_o(opcode); return 0;
		case X(OP_SHIFTRA) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_8BIT:   sshiftrah_b(opcode); return 0;
		case X(OP_STOREM):  storem(opcode); return 0;
		case X(OP_SUB) | SIMD_FLAG | SUB_MODE_BORROW | ISIZE_16BIT:  ssubb_d(opcode); return 0;
		case X(OP_SUB) | SIMD_FLAG | SUB_MODE_BORROW | ISIZE_32BIT:  ssubb_q(opcode); return 0;
		case X(OP_SUB) | SIMD_FLAG | SUB_MODE_BORROW | ISIZE_64BIT:  ssubb_o(opcode); return 0;
		case X(OP_SUB) | SIMD_FLAG | SUB_MODE_BORROW | ISIZE_8BIT:   ssubb_b(opcode); return 0;
		case X(OP_SUB) | SIMD_FLAG | SUB_MODE_FLOOR | ISIZE_16BIT:  ssubf_d(opcode); return 0;
		case X(OP_SUB) | SIMD_FLAG | SUB_MODE_FLOOR | ISIZE_32BIT:  ssubf_q(opcode); return 0;
		case X(OP_SUB) | SIMD_FLAG | SUB_MODE_FLOOR | ISIZE_64BIT:  ssubf_o(opcode); return 0;
		case X(OP_SUB) | SIMD_FLAG | SUB_MODE_FLOOR | ISIZE_8BIT:   ssubf_b(opcode); return 0;
		case X(OP_SUB) | SIMD_FLAG | SUB_MODE_NORMAL | ISIZE_16BIT:  ssub_d(opcode); return 0;
		case X(OP_SUB) | SIMD_FLAG | SUB_MODE_NORMAL | ISIZE_32BIT:  ssub_q(opcode); return 0;
		case X(OP_SUB) | SIMD_FLAG | SUB_MODE_NORMAL | ISIZE_64BIT:  ssub_o(opcode); return 0;
		case X(OP_SUB) | SIMD_FLAG | SUB_MODE_NORMAL | ISIZE_8BIT:   ssub_b(opcode); return 0;
		case X(OP_SUB) | SUB_MODE_BORROW | ISIZE_16BIT:  subb_d(opcode); return 0;
		case X(OP_SUB) | SUB_MODE_BORROW | ISIZE_32BIT:  subb_q(opcode); return 0;
		case X(OP_SUB) | SUB_MODE_BORROW | ISIZE_64BIT:  subb_o(opcode); return 0;
		case X(OP_SUB) | SUB_MODE_BORROW | ISIZE_8BIT:   subb_b(opcode); return 0;
		case X(OP_SUB) | SUB_MODE_FLOOR | ISIZE_16BIT:  subf_d(opcode); return 0;
		case X(OP_SUB) | SUB_MODE_FLOOR | ISIZE_32BIT:  subf_q(opcode); return 0;
		case X(OP_SUB) | SUB_MODE_FLOOR | ISIZE_64BIT:  subf_o(opcode); return 0;
		case X(OP_SUB) | SUB_MODE_FLOOR | ISIZE_8BIT:   subf_b(opcode); return 0;
		case X(OP_SUB) | SUB_MODE_NORMAL | ISIZE_16BIT:  sub_d(opcode); return 0;
		case X(OP_SUB) | SUB_MODE_NORMAL | ISIZE_32BIT:  sub_q(opcode); return 0;
		case X(OP_SUB) | SUB_MODE_NORMAL | ISIZE_64BIT:  sub_o(opcode); return 0;
		case X(OP_SUB) | SUB_MODE_NORMAL | ISIZE_8BIT:   sub_b(opcode); return 0;
		case X(OP_XNOR) | ROP2_MODE_AND | ISIZE_16BIT:  xnor_and_d(opcode); return 0;
		case X(OP_XNOR) | ROP2_MODE_AND | ISIZE_32BIT:  xnor_and_q(opcode); return 0;
		case X(OP_XNOR) | ROP2_MODE_AND | ISIZE_64BIT:  xnor_and_o(opcode); return 0;
		case X(OP_XNOR) | ROP2_MODE_AND | ISIZE_8BIT:   xnor_and_b(opcode); return 0;
		case X(OP_XNOR) | ROP2_MODE_DIRECT:  xnor(opcode); return 0;
		case X(OP_XNOR) | ROP2_MODE_OR | ISIZE_16BIT:  xnor_or_d(opcode); return 0;
		case X(OP_XNOR) | ROP2_MODE_OR | ISIZE_32BIT:  xnor_or_q(opcode); return 0;
		case X(OP_XNOR) | ROP2_MODE_OR | ISIZE_64BIT:  xnor_or_o(opcode); return 0;
		case X(OP_XNOR) | ROP2_MODE_OR | ISIZE_8BIT:   xnor_or_b(opcode); return 0;
		case X(OP_XOR) | ROP2_MODE_AND | ISIZE_16BIT:  xor_and_d(opcode); return 0;
		case X(OP_XOR) | ROP2_MODE_AND | ISIZE_32BIT:  xor_and_q(opcode); return 0;
		case X(OP_XOR) | ROP2_MODE_AND | ISIZE_64BIT:  xor_and_o(opcode); return 0;
		case X(OP_XOR) | ROP2_MODE_AND | ISIZE_8BIT:   xor_and_b(opcode); return 0;
		case X(OP_XOR) | ROP2_MODE_DIRECT:  xor(opcode); return 0;
		case X(OP_XOR) | ROP2_MODE_OR | ISIZE_16BIT:  xor_or_d(opcode); return 0;
		case X(OP_XOR) | ROP2_MODE_OR | ISIZE_32BIT:  xor_or_q(opcode); return 0;
		case X(OP_XOR) | ROP2_MODE_OR | ISIZE_64BIT:  xor_or_o(opcode); return 0;
		case X(OP_XOR) | ROP2_MODE_OR | ISIZE_8BIT:   xor_or_b(opcode); return 0;
	}
	switch (opcode & 0xfffff000) {
		/* r2, r1 */
		case X(OP_ABS) | ISIZE_16BIT:  abs_d(opcode); return 0;
		case X(OP_ABS) | ISIZE_32BIT:  abs_q(opcode); return 0;
		case X(OP_ABS) | ISIZE_64BIT:  abs_o(opcode); return 0;
		case X(OP_ABS) | ISIZE_8BIT:   abs_b(opcode); return 0;
		case X(OP_ABS) | SIMD_FLAG | ISIZE_16BIT:  sabs_d(opcode); return 0;
		case X(OP_ABS) | SIMD_FLAG | ISIZE_32BIT:  sabs_q(opcode); return 0;
		case X(OP_ABS) | SIMD_FLAG | ISIZE_64BIT:  sabs_o(opcode); return 0;
		case X(OP_ABS) | SIMD_FLAG | ISIZE_8BIT:   sabs_b(opcode); return 0;
		case X(OP_BYTEREV) | ISIZE_16BIT:  byterev_d(opcode); return 0;
		case X(OP_BYTEREV) | ISIZE_32BIT:  byterev_q(opcode); return 0;
		case X(OP_BYTEREV) | ISIZE_64BIT:  byterev_o(opcode); return 0;
		case X(OP_BYTEREV) | ISIZE_8BIT:   byterev_b(opcode); return 0;
		case X(OP_BYTEREV) | SIMD_FLAG | ISIZE_16BIT:  sbyterev_d(opcode); return 0;
		case X(OP_BYTEREV) | SIMD_FLAG | ISIZE_32BIT:  sbyterev_q(opcode); return 0;
		case X(OP_BYTEREV) | SIMD_FLAG | ISIZE_64BIT:  sbyterev_o(opcode); return 0;
		case X(OP_BYTEREV) | SIMD_FLAG | ISIZE_8BIT:   sbyterev_b(opcode); return 0;
		case X(OP_CSHIFTL):  cshiftl(opcode); return 0;
		case X(OP_CSHIFTR):  cshiftr(opcode); return 0;
		case X(OP_D2INT) | ROUND_CEIL | ISIZE_16BIT:  d2intc_d(opcode); return 0;
		case X(OP_D2INT) | ROUND_CEIL | ISIZE_32BIT:  d2intc_q(opcode); return 0;
		case X(OP_D2INT) | ROUND_CEIL | ISIZE_64BIT:  d2intc_o(opcode); return 0;
		case X(OP_D2INT) | ROUND_CEIL | ISIZE_8BIT:   d2intc_b(opcode); return 0;
		case X(OP_D2INT) | ROUND_FLOOR | ISIZE_16BIT:  d2intf_d(opcode); return 0;
		case X(OP_D2INT) | ROUND_FLOOR | ISIZE_32BIT:  d2intf_q(opcode); return 0;
		case X(OP_D2INT) | ROUND_FLOOR | ISIZE_64BIT:  d2intf_o(opcode); return 0;
		case X(OP_D2INT) | ROUND_FLOOR | ISIZE_8BIT:   d2intf_b(opcode); return 0;
		case X(OP_D2INT) | ROUND_NEAREST | ISIZE_16BIT:  d2intr_d(opcode); return 0;
		case X(OP_D2INT) | ROUND_NEAREST | ISIZE_32BIT:  d2intr_q(opcode); return 0;
		case X(OP_D2INT) | ROUND_NEAREST | ISIZE_64BIT:  d2intr_o(opcode); return 0;
		case X(OP_D2INT) | ROUND_NEAREST | ISIZE_8BIT:   d2intr_b(opcode); return 0;
		case X(OP_D2INT) | ROUND_TRUNC | ISIZE_16BIT:  d2intt_d(opcode); return 0;
		case X(OP_D2INT) | ROUND_TRUNC | ISIZE_32BIT:  d2intt_q(opcode); return 0;
		case X(OP_D2INT) | ROUND_TRUNC | ISIZE_64BIT:  d2intt_o(opcode); return 0;
		case X(OP_D2INT) | ROUND_TRUNC | ISIZE_8BIT:   d2intt_b(opcode); return 0;
		case X(OP_DEC) | ISIZE_16BIT:  dec_d(opcode); return 0;
		case X(OP_DEC) | ISIZE_32BIT:  dec_q(opcode); return 0;
		case X(OP_DEC) | ISIZE_64BIT:  dec_o(opcode); return 0;
		case X(OP_DEC) | ISIZE_8BIT:   dec_b(opcode); return 0;
		case X(OP_DEC) | SIMD_FLAG | ISIZE_16BIT:  sdec_d(opcode); return 0;
		case X(OP_DEC) | SIMD_FLAG | ISIZE_32BIT:  sdec_q(opcode); return 0;
		case X(OP_DEC) | SIMD_FLAG | ISIZE_64BIT:  sdec_o(opcode); return 0;
		case X(OP_DEC) | SIMD_FLAG | ISIZE_8BIT:   sdec_b(opcode); return 0;
		case X(OP_F2INT) | ROUND_CEIL | ISIZE_16BIT:  f2intc_d(opcode); return 0;
		case X(OP_F2INT) | ROUND_CEIL | ISIZE_32BIT:  f2intc_q(opcode); return 0;
		case X(OP_F2INT) | ROUND_CEIL | ISIZE_64BIT:  f2intc_o(opcode); return 0;
		case X(OP_F2INT) | ROUND_CEIL | ISIZE_8BIT:   f2intc_b(opcode); return 0;
		case X(OP_F2INT) | ROUND_FLOOR | ISIZE_16BIT:  f2intf_d(opcode); return 0;
		case X(OP_F2INT) | ROUND_FLOOR | ISIZE_32BIT:  f2intf_q(opcode); return 0;
		case X(OP_F2INT) | ROUND_FLOOR | ISIZE_64BIT:  f2intf_o(opcode); return 0;
		case X(OP_F2INT) | ROUND_FLOOR | ISIZE_8BIT:   f2intf_b(opcode); return 0;
		case X(OP_F2INT) | ROUND_NEAREST | ISIZE_16BIT:  f2intr_d(opcode); return 0;
		case X(OP_F2INT) | ROUND_NEAREST | ISIZE_32BIT:  f2intr_q(opcode); return 0;
		case X(OP_F2INT) | ROUND_NEAREST | ISIZE_64BIT:  f2intr_o(opcode); return 0;
		case X(OP_F2INT) | ROUND_NEAREST | ISIZE_8BIT:   f2intr_b(opcode); return 0;
		case X(OP_F2INT) | ROUND_TRUNC | ISIZE_16BIT:  f2intt_d(opcode); return 0;
		case X(OP_F2INT) | ROUND_TRUNC | ISIZE_32BIT:  f2intt_q(opcode); return 0;
		case X(OP_F2INT) | ROUND_TRUNC | ISIZE_64BIT:  f2intt_o(opcode); return 0;
		case X(OP_F2INT) | ROUND_TRUNC | ISIZE_8BIT:   f2intt_b(opcode); return 0;
		case X(OP_FIAPRX) | FSIZE_DOUBLE:  fiaprx_d(opcode); return 0;
		case X(OP_FIAPRX) | FSIZE_SINGLE:  fiaprx_f(opcode); return 0;
		case X(OP_FIAPRX) | SIMD_FLAG | FSIZE_DOUBLE:  sfiaprx_d(opcode); return 0;
		case X(OP_FIAPRX) | SIMD_FLAG | FSIZE_SINGLE:  sfiaprx_f(opcode); return 0;
		case X(OP_FSQRT) | FSIZE_DOUBLE:  fsqrt_d(opcode); return 0;
		case X(OP_FSQRT) | FSIZE_SINGLE:  fsqrt_f(opcode); return 0;
		case X(OP_FSQRT) | SIMD_FLAG | FSIZE_DOUBLE:  sfsqrt_d(opcode); return 0;
		case X(OP_FSQRT) | SIMD_FLAG | FSIZE_SINGLE:  sfsqrt_f(opcode); return 0;
		case X(OP_FSQRTIAPRX) | FSIZE_DOUBLE:  fsqrtiaprx_d(opcode); return 0;
		case X(OP_FSQRTIAPRX) | FSIZE_SINGLE:  fsqrtiaprx_f(opcode); return 0;
		case X(OP_FSQRTIAPRX) | SIMD_FLAG | FSIZE_DOUBLE:  sfsqrtiaprx_d(opcode); return 0;
		case X(OP_FSQRTIAPRX) | SIMD_FLAG | FSIZE_SINGLE:  sfsqrtiaprx_f(opcode); return 0;
		case X(OP_GET):  get(opcode); return 0;
		case X(OP_INC) | ISIZE_16BIT:  inc_d(opcode); return 0;
		case X(OP_INC) | ISIZE_32BIT:  inc_q(opcode); return 0;
		case X(OP_INC) | ISIZE_64BIT:  inc_o(opcode); return 0;
		case X(OP_INC) | ISIZE_8BIT:   inc_b(opcode); return 0;
		case X(OP_INC) | SIMD_FLAG | ISIZE_16BIT:  sinc_d(opcode); return 0;
		case X(OP_INC) | SIMD_FLAG | ISIZE_32BIT:  sinc_q(opcode); return 0;
		case X(OP_INC) | SIMD_FLAG | ISIZE_64BIT:  sinc_o(opcode); return 0;
		case X(OP_INC) | SIMD_FLAG | ISIZE_8BIT:   sinc_b(opcode); return 0;
		case X(OP_INT2D) | ROUND_CEIL | ISIZE_16BIT:  int2dc_d(opcode); return 0;
		case X(OP_INT2D) | ROUND_CEIL | ISIZE_32BIT:  int2dc_q(opcode); return 0;
		case X(OP_INT2D) | ROUND_CEIL | ISIZE_64BIT:  int2dc_o(opcode); return 0;
		case X(OP_INT2D) | ROUND_CEIL | ISIZE_8BIT:   int2dc_b(opcode); return 0;
		case X(OP_INT2D) | ROUND_FLOOR | ISIZE_16BIT:  int2df_d(opcode); return 0;
		case X(OP_INT2D) | ROUND_FLOOR | ISIZE_32BIT:  int2df_q(opcode); return 0;
		case X(OP_INT2D) | ROUND_FLOOR | ISIZE_64BIT:  int2df_o(opcode); return 0;
		case X(OP_INT2D) | ROUND_FLOOR | ISIZE_8BIT:   int2df_b(opcode); return 0;
		case X(OP_INT2D) | ROUND_NEAREST | ISIZE_16BIT:  int2dr_d(opcode); return 0;
		case X(OP_INT2D) | ROUND_NEAREST | ISIZE_32BIT:  int2dr_q(opcode); return 0;
		case X(OP_INT2D) | ROUND_NEAREST | ISIZE_64BIT:  int2dr_o(opcode); return 0;
		case X(OP_INT2D) | ROUND_NEAREST | ISIZE_8BIT:   int2dr_b(opcode); return 0;
		case X(OP_INT2D) | ROUND_TRUNC | ISIZE_16BIT:  int2dt_d(opcode); return 0;
		case X(OP_INT2D) | ROUND_TRUNC | ISIZE_32BIT:  int2dt_q(opcode); return 0;
		case X(OP_INT2D) | ROUND_TRUNC | ISIZE_64BIT:  int2dt_o(opcode); return 0;
		case X(OP_INT2D) | ROUND_TRUNC | ISIZE_8BIT:   int2dt_b(opcode); return 0;
		case X(OP_INT2F) | ROUND_CEIL | ISIZE_16BIT:  int2fc_d(opcode); return 0;
		case X(OP_INT2F) | ROUND_CEIL | ISIZE_32BIT:  int2fc_q(opcode); return 0;
		case X(OP_INT2F) | ROUND_CEIL | ISIZE_64BIT:  int2fc_o(opcode); return 0;
		case X(OP_INT2F) | ROUND_CEIL | ISIZE_8BIT:   int2fc_b(opcode); return 0;
		case X(OP_INT2F) | ROUND_FLOOR | ISIZE_16BIT:  int2ff_d(opcode); return 0;
		case X(OP_INT2F) | ROUND_FLOOR | ISIZE_32BIT:  int2ff_q(opcode); return 0;
		case X(OP_INT2F) | ROUND_FLOOR | ISIZE_64BIT:  int2ff_o(opcode); return 0;
		case X(OP_INT2F) | ROUND_FLOOR | ISIZE_8BIT:   int2ff_b(opcode); return 0;
		case X(OP_INT2F) | ROUND_NEAREST | ISIZE_16BIT:  int2fr_d(opcode); return 0;
		case X(OP_INT2F) | ROUND_NEAREST | ISIZE_32BIT:  int2fr_q(opcode); return 0;
		case X(OP_INT2F) | ROUND_NEAREST | ISIZE_64BIT:  int2fr_o(opcode); return 0;
		case X(OP_INT2F) | ROUND_NEAREST | ISIZE_8BIT:   int2fr_b(opcode); return 0;
		case X(OP_INT2F) | ROUND_TRUNC | ISIZE_16BIT:  int2ft_d(opcode); return 0;
		case X(OP_INT2F) | ROUND_TRUNC | ISIZE_32BIT:  int2ft_q(opcode); return 0;
		case X(OP_INT2F) | ROUND_TRUNC | ISIZE_64BIT:  int2ft_o(opcode); return 0;
		case X(OP_INT2F) | ROUND_TRUNC | ISIZE_8BIT:   int2ft_b(opcode); return 0;
		case X(OP_LOADADDR) | LOADADDR_DATA:  loadaddrd(opcode); return 0;
		case X(OP_LOADADDR):  loadaddr(opcode); return 0;
		case X(OP_LOOP):  loop(opcode); return 0;
		case X(OP_NABS) | ISIZE_16BIT:  nabs_d(opcode); return 0;
		case X(OP_NABS) | ISIZE_32BIT:  nabs_q(opcode); return 0;
		case X(OP_NABS) | ISIZE_64BIT:  nabs_o(opcode); return 0;
		case X(OP_NABS) | ISIZE_8BIT:   nabs_b(opcode); return 0;
		case X(OP_NABS) | SIMD_FLAG | ISIZE_16BIT:  snabs_d(opcode); return 0;
		case X(OP_NABS) | SIMD_FLAG | ISIZE_32BIT:  snabs_q(opcode); return 0;
		case X(OP_NABS) | SIMD_FLAG | ISIZE_64BIT:  snabs_o(opcode); return 0;
		case X(OP_NABS) | SIMD_FLAG | ISIZE_8BIT:   snabs_b(opcode); return 0;
		case X(OP_NEG) | ISIZE_16BIT:  neg_d(opcode); return 0;
		case X(OP_NEG) | ISIZE_32BIT:  neg_q(opcode); return 0;
		case X(OP_NEG) | ISIZE_64BIT:  neg_o(opcode); return 0;
		case X(OP_NEG) | ISIZE_8BIT:   neg_b(opcode); return 0;
		case X(OP_NEG) | SIMD_FLAG | ISIZE_16BIT:  sneg_d(opcode); return 0;
		case X(OP_NEG) | SIMD_FLAG | ISIZE_32BIT:  sneg_q(opcode); return 0;
		case X(OP_NEG) | SIMD_FLAG | ISIZE_64BIT:  sneg_o(opcode); return 0;
		case X(OP_NEG) | SIMD_FLAG | ISIZE_8BIT:   sneg_b(opcode); return 0;
		case X(OP_PUT):  put(opcode); return 0;
		case X(OP_SCAN) | ISIZE_16BIT:  lsb1_d(opcode); return 0;
		case X(OP_SCAN) | ISIZE_32BIT:  lsb1_q(opcode); return 0;
		case X(OP_SCAN) | ISIZE_64BIT:  lsb1_o(opcode); return 0;
		case X(OP_SCAN) | ISIZE_8BIT:   lsb1_b(opcode); return 0;
		case X(OP_SCAN) | SCAN_NEGATE | ISIZE_16BIT:  lsb0_d(opcode); return 0;
		case X(OP_SCAN) | SCAN_NEGATE | ISIZE_32BIT:  lsb0_q(opcode); return 0;
		case X(OP_SCAN) | SCAN_NEGATE | ISIZE_64BIT:  lsb0_o(opcode); return 0;
		case X(OP_SCAN) | SCAN_NEGATE | ISIZE_8BIT:   lsb0_b(opcode); return 0;
		case X(OP_SCAN) | SCAN_REVERSE | ISIZE_16BIT:  msb1_d(opcode); return 0;
		case X(OP_SCAN) | SCAN_REVERSE | ISIZE_32BIT:  msb1_q(opcode); return 0;
		case X(OP_SCAN) | SCAN_REVERSE | ISIZE_64BIT:  msb1_o(opcode); return 0;
		case X(OP_SCAN) | SCAN_REVERSE | ISIZE_8BIT:   msb1_b(opcode); return 0;
		case X(OP_SCAN) | SCAN_REVERSE | SCAN_NEGATE | ISIZE_16BIT:  msb0_d(opcode); return 0;
		case X(OP_SCAN) | SCAN_REVERSE | SCAN_NEGATE | ISIZE_32BIT:  msb0_q(opcode); return 0;
		case X(OP_SCAN) | SCAN_REVERSE | SCAN_NEGATE | ISIZE_64BIT:  msb0_o(opcode); return 0;
		case X(OP_SCAN) | SCAN_REVERSE | SCAN_NEGATE | ISIZE_8BIT:   msb0_b(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | ISIZE_16BIT:  slsb1_d(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | ISIZE_32BIT:  slsb1_q(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | ISIZE_64BIT:  slsb1_o(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | ISIZE_8BIT:   slsb1_b(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | SCAN_NEGATE | ISIZE_16BIT:  slsb0_d(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | SCAN_NEGATE | ISIZE_32BIT:  slsb0_q(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | SCAN_NEGATE | ISIZE_64BIT:  slsb0_o(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | SCAN_NEGATE | ISIZE_8BIT:   slsb0_b(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | SCAN_REVERSE | ISIZE_16BIT:  smsb1_d(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | SCAN_REVERSE | ISIZE_32BIT:  smsb1_q(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | SCAN_REVERSE | ISIZE_64BIT:  smsb1_o(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | SCAN_REVERSE | ISIZE_8BIT:   smsb1_b(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | SCAN_REVERSE | SCAN_NEGATE | ISIZE_16BIT:  smsb0_d(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | SCAN_REVERSE | SCAN_NEGATE | ISIZE_32BIT:  smsb0_q(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | SCAN_REVERSE | SCAN_NEGATE | ISIZE_64BIT:  smsb0_o(opcode); return 0;
		case X(OP_SCAN) | SIMD_FLAG | SCAN_REVERSE | SCAN_NEGATE | ISIZE_8BIT:   smsb0_b(opcode); return 0;
		case X(OP_SDUP) | ISIZE_16BIT:  sdup_d(opcode); return 0;
		case X(OP_SDUP) | ISIZE_32BIT:  sdup_q(opcode); return 0;
		case X(OP_SDUP) | ISIZE_64BIT:  sdup_o(opcode); return 0;
		case X(OP_SDUP) | ISIZE_8BIT:   sdup_b(opcode); return 0;
		case X(OP_WIDEN) | ISIZE_16BIT:  widen_d(opcode); return 0;
		case X(OP_WIDEN) | ISIZE_32BIT:  widen_q(opcode); return 0;
		case X(OP_WIDEN) | ISIZE_64BIT:  widen_o(opcode); return 0;
		case X(OP_WIDEN) | ISIZE_8BIT:   widen_b(opcode); return 0;
		/* XXX: subject to change */
		case X(OP_CLOAD) | CC_LSB | ISIZE_16BIT:  cloadl_d(opcode); return 0;
		case X(OP_CLOAD) | CC_LSB | ISIZE_32BIT:  cloadl_q(opcode); return 0;
		case X(OP_CLOAD) | CC_LSB | ISIZE_64BIT:  cloadl_o(opcode); return 0;
		case X(OP_CLOAD) | CC_LSB | ISIZE_8BIT:   cloadl_b(opcode); return 0;
		case X(OP_CLOAD) | CC_MSB | ISIZE_16BIT:  cloadm_d(opcode); return 0;
		case X(OP_CLOAD) | CC_MSB | ISIZE_32BIT:  cloadm_q(opcode); return 0;
		case X(OP_CLOAD) | CC_MSB | ISIZE_64BIT:  cloadm_o(opcode); return 0;
		case X(OP_CLOAD) | CC_MSB | ISIZE_8BIT:   cloadm_b(opcode); return 0;
		case X(OP_CLOAD) | CC_NAN | ISIZE_16BIT:  cloadn_d(opcode); return 0;
		case X(OP_CLOAD) | CC_NAN | ISIZE_32BIT:  cloadn_q(opcode); return 0;
		case X(OP_CLOAD) | CC_NAN | ISIZE_64BIT:  cloadn_o(opcode); return 0;
		case X(OP_CLOAD) | CC_NAN | ISIZE_8BIT:   cloadn_b(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_LSB | ISIZE_16BIT:  cloadnl_d(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_LSB | ISIZE_32BIT:  cloadnl_q(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_LSB | ISIZE_64BIT:  cloadnl_o(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_LSB | ISIZE_8BIT:   cloadnl_b(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_MSB | ISIZE_16BIT:  cloadnm_d(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_MSB | ISIZE_32BIT:  cloadnm_q(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_MSB | ISIZE_64BIT:  cloadnm_o(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_MSB | ISIZE_8BIT:   cloadnm_b(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_NAN | ISIZE_16BIT:  cloadnn_d(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_NAN | ISIZE_32BIT:  cloadnn_q(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_NAN | ISIZE_64BIT:  cloadnn_o(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_NAN | ISIZE_8BIT:   cloadnn_b(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_ZERO | ISIZE_16BIT:  cloadnz_d(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_ZERO | ISIZE_32BIT:  cloadnz_q(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_ZERO | ISIZE_64BIT:  cloadnz_o(opcode); return 0;
		case X(OP_CLOAD) | CC_NOT_ZERO | ISIZE_8BIT:   cloadnz_b(opcode); return 0;
		case X(OP_CLOAD) | CC_ZERO | ISIZE_16BIT:  cloadz_d(opcode); return 0;
		case X(OP_CLOAD) | CC_ZERO | ISIZE_32BIT:  cloadz_q(opcode); return 0;
		case X(OP_CLOAD) | CC_ZERO | ISIZE_64BIT:  cloadz_o(opcode); return 0;
		case X(OP_CLOAD) | CC_ZERO | ISIZE_8BIT:   cloadz_b(opcode); return 0;
		case X(OP_CSTORE) | CC_LSB | ISIZE_16BIT:  cstorel_d(opcode); return 0;
		case X(OP_CSTORE) | CC_LSB | ISIZE_32BIT:  cstorel_q(opcode); return 0;
		case X(OP_CSTORE) | CC_LSB | ISIZE_64BIT:  cstorel_o(opcode); return 0;
		case X(OP_CSTORE) | CC_LSB | ISIZE_8BIT:   cstorel_b(opcode); return 0;
		case X(OP_CSTORE) | CC_MSB | ISIZE_16BIT:  cstorem_d(opcode); return 0;
		case X(OP_CSTORE) | CC_MSB | ISIZE_32BIT:  cstorem_q(opcode); return 0;
		case X(OP_CSTORE) | CC_MSB | ISIZE_64BIT:  cstorem_o(opcode); return 0;
		case X(OP_CSTORE) | CC_MSB | ISIZE_8BIT:   cstorem_b(opcode); return 0;
		case X(OP_CSTORE) | CC_NAN | ISIZE_16BIT:  cstoren_d(opcode); return 0;
		case X(OP_CSTORE) | CC_NAN | ISIZE_32BIT:  cstoren_q(opcode); return 0;
		case X(OP_CSTORE) | CC_NAN | ISIZE_64BIT:  cstoren_o(opcode); return 0;
		case X(OP_CSTORE) | CC_NAN | ISIZE_8BIT:   cstoren_b(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_LSB | ISIZE_16BIT:  cstorenl_d(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_LSB | ISIZE_32BIT:  cstorenl_q(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_LSB | ISIZE_64BIT:  cstorenl_o(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_LSB | ISIZE_8BIT:   cstorenl_b(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_MSB | ISIZE_16BIT:  cstorenm_d(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_MSB | ISIZE_32BIT:  cstorenm_q(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_MSB | ISIZE_64BIT:  cstorenm_o(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_MSB | ISIZE_8BIT:   cstorenm_b(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_NAN | ISIZE_16BIT:  cstorenn_d(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_NAN | ISIZE_32BIT:  cstorenn_q(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_NAN | ISIZE_64BIT:  cstorenn_o(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_NAN | ISIZE_8BIT:   cstorenn_b(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_ZERO | ISIZE_16BIT:  cstorenz_d(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_ZERO | ISIZE_32BIT:  cstorenz_q(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_ZERO | ISIZE_64BIT:  cstorenz_o(opcode); return 0;
		case X(OP_CSTORE) | CC_NOT_ZERO | ISIZE_8BIT:   cstorenz_b(opcode); return 0;
		case X(OP_CSTORE) | CC_ZERO | ISIZE_16BIT:  cstorez_d(opcode); return 0;
		case X(OP_CSTORE) | CC_ZERO | ISIZE_32BIT:  cstorez_q(opcode); return 0;
		case X(OP_CSTORE) | CC_ZERO | ISIZE_64BIT:  cstorez_o(opcode); return 0;
		case X(OP_CSTORE) | CC_ZERO | ISIZE_8BIT:   cstorez_b(opcode); return 0;
	}
	ex(EX_INVALID);
	return 1;
}
/* XXX: not documented
		case X(OP_BCHG) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_16BIT:  sbchgh_d(opcode); return 0;
		case X(OP_BCHG) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_32BIT:  sbchgh_q(opcode); return 0;
		case X(OP_BCHG) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_64BIT:  sbchgh_o(opcode); return 0;
		case X(OP_BCHG) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_8BIT:   sbchgh_b(opcode); return 0;
		case X(OP_BCLR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_16BIT:  sbclrh_d(opcode); return 0;
		case X(OP_BCLR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_32BIT:  sbclrh_q(opcode); return 0;
		case X(OP_BCLR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_64BIT:  sbclrh_o(opcode); return 0;
		case X(OP_BCLR) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_8BIT:   sbclrh_b(opcode); return 0;
		case X(OP_BSET) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_16BIT:  sbseth_d(opcode); return 0;
		case X(OP_BSET) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_32BIT:  sbseth_q(opcode); return 0;
		case X(OP_BSET) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_64BIT:  sbseth_o(opcode); return 0;
		case X(OP_BSET) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_8BIT:   sbseth_b(opcode); return 0;
		case X(OP_BTST) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_16BIT:  sbtsth_d(opcode); return 0;
		case X(OP_BTST) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_32BIT:  sbtsth_q(opcode); return 0;
		case X(OP_BTST) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_64BIT:  sbtsth_o(opcode); return 0;
		case X(OP_BTST) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_8BIT:   sbtsth_b(opcode); return 0;
		case X(OP_POPC) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_16BIT:  spopch_d(opcode); return 0;
		case X(OP_POPC) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_32BIT:  spopch_q(opcode); return 0;
		case X(OP_POPC) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_64BIT:  spopch_o(opcode); return 0;
		case X(OP_POPC) | SIMD_FLAG | SHIFT_HALF_SIMD | ISIZE_8BIT:   spopch_b(opcode); return 0;
*/
/* XXX: encoding problem (clash between CC_* and LS_BIG_ENDIAN)!
*/
/* XXX: subject to change
		case X(OP_MADD):  madd(opcode); return 0;
		case X(OP_MSUB):  msub(opcode); return 0;
		case X(OP_MSHCHG):  mshchg(opcode); return 0;
*/

U32
fetch(U64 ip) {
	unsigned char *p = memmap(ip, 4, 4, 0);
	U32 insn = 0;

	if (!p) return 0xffffffff;
#if INSTRUCTION_BIG_ENDIAN
	insn = (insn << 8) | p[0];
	insn = (insn << 8) | p[1];
	insn = (insn << 8) | p[2];
	insn = (insn << 8) | p[3];
#else
	insn = (insn << 8) | p[3];
	insn = (insn << 8) | p[2];
	insn = (insn << 8) | p[1];
	insn = (insn << 8) | p[0];
#endif
	return insn;
}

void
show(unsigned long ip, unsigned insn) {
	char buf[1024];

	if (fcpu_decode_instruction(buf, sizeof(buf), insn) < 0) {
		abort();
	}
	fprintf(stderr, "%08lx  %02x%02x%02x%02x  %s\n", ip, (U8)(insn>>24),
			(U8)(insn>>16), (U8)(insn>>8), (U8)(insn>>0), buf);
}

void
exception(unsigned long ip, unsigned insn, unsigned code) {
	static const char *msgs[] = {
		[EX_ACCESS]    = "access rights violation",
		[EX_ADDRESS]   = "address out of range",
		[EX_ALIGNMENT] = "misaligned address",
		[EX_INVALID]   = "invalid instruction",
		[EX_NULL]      = "divide by 0",
		[EX_RANGE]     = "value out of range",
		[EX_HALT]      = "halted",
	};

	if (code >= EX_number || !msgs[code]) {
		fprintf(stderr, "\n*** exception %u ***\n", code);
	}
	else {
		fprintf(stderr, "\n*** %s ***\n", msgs[code]);
	}
}

static const char usage[] = "usage: emu [-i] [-m mbytes] binfile\n";

#if INSTRUCTION_BIG_ENDIAN
#define INSN(x)	(char)((x)>>24),(char)((x)>>16),(char)((x)>>8),(char)(x)
#else
#define INSN(x)	(char)(x),(char)((x)>>8),(char)((x)>>16),(char)((x)>>24)
#endif

char smoke_test[] = {
	/* loadaddri $target-.-4, r1 */
	INSN(X(OP_LOADADDRI) | 28 << 6 | 1 << 0),
	/* jmp r1, r2 */
	INSN(X(OP_JMP) | 1 << 6 | 2 << 0),
	/* .string */
	's','m','o','k','e',' ','t','e',
	's','t',' ','s','u','c','c','e',
	's','s','f','u','l','!','\n',0,
	/* inc r0, r1 */
	INSN(X(OP_INC) | ISIZE_64BIT | 0 << 6 | 1 << 0),
	/* loadconsx.0 $23, r3 */
	INSN(X(OP_LOADCONSX) | 23 << 6 | 3 << 0),
	/* syscall $2, r0 */
	INSN(X(OP_SYSCALL) | 2 << 6 | 0 << 0),
	/* nop */
	INSN(X(OP_NOP)),
	/* halt */
	INSN(X(OP_HALT)),
};

int
main(int argc, char **argv) {
	int interactive = 0;
	int trace = 0;
	unsigned char *p;
	struct stat st;
	int x;
	U64 ip;
	U32 insn;

	ramsize = 0;
	while ((x = getopt(argc, argv, "him:")) != EOF) {
		switch (x) {
			case 'i':
				interactive = 1;
				break;
			case 'm':
				ramsize = strtoul(optarg, NULL, 0) << 20;
				break;
			case 'h':
			case '?':
				fprintf(stderr, usage);
				exit(x != 'h');
		}
	}
	if (optind != argc && optind + 1 != argc) {
		fprintf(stderr, usage);
		exit(1);
	}
	if (!ramsize) {
		ramsize = 32 << 20;
	}
	fprintf(stderr, "RAM size set to %lu MB\n", (unsigned long)ramsize >> 20);
#if HAVE_MMAP && defined(MAP_ANONYMOUS)
	addrbase = mmap(0, ramsize, PROT_READ | PROT_WRITE,
					MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
	if (addrbase == (unsigned char*)MAP_FAILED) {
		perror("mmap");
		exit(1);
	}
#else
	addrbase = malloc(ramsize);
	if (addrbase == NULL) {
		perror("malloc");
		exit(1);
	}
#endif
	if (optind < argc) {
		if ((x = open(argv[optind], O_RDONLY)) == -1) {
			perror(argv[optind]);
			exit(1);
		}
		if (fstat(x, &st)) {
			perror("fstat");
			exit(1);
		}
		if (st.st_size == 0) {
			fprintf(stderr, "%s: empty file\n", argv[optind]);
			exit(1);
		}
#if HAVE_MMAP
		p = mmap(addrbase, st.st_size, PROT_READ | PROT_WRITE,
				 MAP_PRIVATE | MAP_FIXED, x, 0);
		if (p == (unsigned char*)MAP_FAILED) {
#else
		if (1) {
#endif
			size_t n = 0, nread = 0;

			fprintf(stderr, "warning: mmap failed, trying read\n");
			while (nread < ramsize
			&&     (n = read(x, addrbase + nread, ramsize - nread)) > 0) {
				nread += n;
			}
			if (n == (size_t)-1) {
				perror("read");
				exit(1);
			}
		}
		else if (p != addrbase) {
			fprintf(stderr, "mapping failed\n");
			exit(1);
		}
		close(x);
	}
	else {
		memcpy(addrbase, &smoke_test, sizeof(smoke_test));
	}

	fputs("F-CPU instruction-level emulator " VERSION_STRING "\n"
		"Copyright (C) 2002 Michael \"Tired\" Riepe\n"
		"This program is free software; you can redistribute it and/or\n"
		"modify it under the terms of the GNU General Public License.\n"
		"This program is distributed WITHOUT ANY WARRANTY.\n\n",
		stderr);
	fprintf(stderr, "Emulated F-CPU has %u-bit registers\n", 8*MAXSIZE);
	fprintf(stderr, "and uses %s write semantics.\n\n", PARTIAL_WRITES ? "old" : "new");
	fprintf(stderr, "Enter \"?\" for help.\n\n");
	initemu();

	for (;;) {
		int done = 0;
		unsigned i;

		excode = EX_NONE;
		ip = regs.r_pc.C(o,0);
		insn = fetch(ip);
		if (!interactive) {
			while (!excode) {
				if (trace) {
					show(ip, insn);
				}
				regs.r_pc.C(o,0) = ip + 4;
				emulate1(insn);
				if (excode) break;
				ip = regs.r_pc.C(o,0);
				insn = fetch(ip);
			}
			regs.r_pc.C(o,0) = ip;
			interactive = 1;
			trace = 0;
		}
		if (excode) {
			exception(ip, insn, excode);
			excode = EX_NONE;
		}
		show(ip, insn);
		do {
			char line[1024];

			fprintf(stderr, "-");
			if (!fgets(line, sizeof(line), stdin)) {
				exit(0);
			}
			switch (*line) {
				case '\n':
					continue;
				case 'g':
					if (line[1] == '=') {
						regs.r_pc.C(o,0) = strtoul(line + 2, NULL, 16);
					}
					interactive = 0;
					done = 1;
					break;
				case 'l':
					if (line[1] != '\n') {
						ip = strtoul(line + 1, NULL, 16);
						insn = fetch(ip);
					}
					for (i = 0; i < 16; i++) {
						show(ip, insn);
						insn = fetch(ip += 4);
					}
					excode = EX_NONE;
					break;
				case 'q':
					exit(0);
				case 's':
					if (line[1] == '=') {
						regs.r_pc.C(o,0) = strtoul(line + 2, NULL, 16);
					}
					ip = regs.r_pc.C(o,0);
					insn = fetch(ip);
					if (excode) {
						exception(ip, insn, excode);
						excode = EX_NONE;
						show(ip, insn);
						break;
					}
					regs.r_pc.C(o,0) = ip + 4;
					emulate1(insn);
					if (excode) {
						exception(ip, insn, excode);
						regs.r_pc.C(o,0) = ip;
						excode = EX_NONE;
						show(ip, insn);
						break;
					}
					done = 1;
					break;
				case 't':
					if (line[1] == '=') {
						regs.r_pc.C(o,0) = strtoul(line + 2, NULL, 16);
					}
					interactive = 0;
					trace = 1;
					done = 1;
					break;
				case '?':
					fprintf(stderr, "usage:\n"
						"  ?          show this help\n"
						"  g[=addr]   run (from addr)\n"
						"  l[addr]    disassemble (from addr)\n"
						"  q          quit\n"
						"  s[=addr]   single-step (from addr)\n"
						"  t[=addr]   trace (from addr)\n"
					);
					break;
				default:
					fprintf(stderr, "huh?\n");
			}
		}
		while (!done);
	}
}
