// ****************************************************************************
// *
// * Virtual FCPU (64 Bit implementation) V0.2
// *
// ****************************************************************************

#include "fcpu.h"

#define FCPU_MEMORY_SIZE_BYTES          ( 1L * 1024L * 1024L )

//

#define FCPU_NUM_REGS                   64L
#define FCPU_RESET_PC                   0L

//

#define BIT( x ) ( 1L << ( x ) )

//

#define FCPU_LITTLE_ENDIAN                  0L
#define FCPU_BIG_ENDIAN                     1L

#define FCPU_SIZECODE_BYTE                  0L
#define FCPU_SIZECODE_DOUBLEBYTE            1L
#define FCPU_SIZECODE_QUADBYTE              2L
#define FCPU_SIZECODE_OCTBYTE               3L

//
#if 1

/* not a real "operation" in the sense that the data are not modified */
#define FCPU_OP_MOVE         0
/* so NOP is detected with an instruction equal to zero */

#define FCPU_OP_NOP 0
/* followed by 3 empty bytes, but the 6 last bits (dest reg) can be tested now to simplify things. */

/* Integer arithmetics */
#define FCPU_OP_ADD          2
#define FCPU_OP_SUB          4
#define FCPU_OP_MUL          6
#define FCPU_OP_DIV          8
#define FCPU_OP_MOD          10
#define FCPU_OP_ADDSUB       12
#define FCPU_OP_MAC          13

/* not really arithmetic... */
#define FCPU_OP_POPCOUNT     14

/* INC-based instructions */
#define FCPU_OP_CMPL         24
#define FCPU_OP_CMPLE        26
#define FCPU_OP_MAX          28
#define FCPU_OP_MIN          30
#define FCPU_OP_SORT         21

/* LNS operations */
#define FCPU_OP_LADD         32
#define FCPU_OP_LSUB         33

/* SHL ("shuffler") operations */
#define FCPU_OP_SHIFTL       36
#define FCPU_OP_SHIFTR       38
#define FCPU_OP_SHIFTRA      40
#define FCPU_OP_ROTL         42
#define FCPU_OP_ROTR         44
#define FCPU_OP_BITOP        46
#define FCPU_OP_BITREV       48
/* SIMD and byte-shuffling */
#define FCPU_OP_MIX          52
#define FCPU_OP_EXPAND       53
#define FCPU_OP_SDUP         54
#define FCPU_OP_PERMUTE      126

/* ROP2 unit (not complete : combine is new) */
#define FCPU_OP_LOGIC        56
#define FCPU_OP_COMBINE_OR   58
#define FCPU_OP_COMBINE_AND  59

/* FP (not complete) */
#define FCPU_OP_FADD         64
#define FCPU_OP_FSUB         65
#define FCPU_OP_FMUL         66
#define FCPU_OP_FDIV         71
#define FCPU_OP_FMAC         75
#define FCPU_OP_FADDSUB      76

/* LSU, dome forms ar also in intermediary format */
#define FCPU_OP_LOAD         80
#define FCPU_OP_LOADF        82
#define FCPU_OP_STORE        84
#define FCPU_OP_STOREF       86
#define FCPU_OP_CACHEMM      88

#define FCPU_OP_SCATTER      127
#define FCPU_OP_GATHER       128

/* MISC : */
/* SRB : */
#define FCPU_OP_LOADM        108
#define FCPU_OP_STOREM       110

/* control-related */
#define FCPU_OP_JMPA         116


/*
 intermediary format : RR
 ------------------------
 can belong to either RRR or I8RR (RRR seems better)
 count=15
*/

/* INC-based instructions */
#define FCPU_OP_INC          16
/*
   the following functions can be encoded in the bits 12 and 13 !
   #define FCPU_OP_DEC
   #define FCPU_OP_NEG
   #define FCPU_OP_ABS
*/


/* hole here. */

#define FCPU_OP_SCAN         20

/* LNS operations */
#define FCPU_OP_L2INT        34
#define FCPU_OP_INT2L        35

/* SIMD and byte-shuffling */
#define FCPU_OP_BYTEREV      50

/* FP, not complete */
#define FCPU_OP_F2INT        67
#define FCPU_OP_INT2F        68
#define FCPU_OP_FIAPRX       69
#define FCPU_OP_FSQRTIAPRX   70
#define FCPU_OP_FSQRT        72
#define FCPU_OP_FLOG         73
#define FCPU_OP_FEXP         74

/* MISC : */
/* SPR : */
#define FCPU_OP_GET          104
#define FCPU_OP_PUT          106

/* control-related */
#define FCPU_OP_LOOP         117    /* this isn't going to be RR only in some time... */

/*
 Format one : I8RR
 -----------------
 (plus sign bit in a 9th bit)
 count=27

 challenge : map it so it corresponds to the RRR format whenever possible,
so the difference is "only" one bit in most cases -> easier decoding.

*/

/* Integer arithmetics */

#define FCPU_OP_ADDI         3
#define FCPU_OP_SUBI         5
#define FCPU_OP_MULI         7
#define FCPU_OP_DIVI         9
#define FCPU_OP_MODI         11

/* INC-based instructions */

#define FCPU_OP_CMPLI        25
#define FCPU_OP_CMPLEI       27
#define FCPU_OP_MAXI         29
#define FCPU_OP_MINI         31

/* not really arithmetic */
#define FCPU_OP_POPCOUNTI    15

/* SHL ("shuffler") operations */
#define FCPU_OP_SHIFTLI      37
#define FCPU_OP_SHIFTRI      39
#define FCPU_OP_SHIFTRAI     41
#define FCPU_OP_ROTLI        43
#define FCPU_OP_ROTRI        45
#define FCPU_OP_BITOPI       47
#define FCPU_OP_BITREVI      49
/* SIMD and byte-shuffling */
#define FCPU_OP_SDUPI        55

/* ROP2 unit (not complete : combine is new) */
#define FCPU_OP_LOGICI       57
#define FCPU_OP_COMBINE_ORI  60
#define FCPU_OP_COMBINE_ANDI 61

/* LSU */
#define FCPU_OP_LOADI        81
#define FCPU_OP_LOADIF       83
#define FCPU_OP_STOREI       85
#define FCPU_OP_STOREIF      87
#define FCPU_OP_CACHEMMI     88

/* SRB : */
#define FCPU_OP_LOADMI       109
#define FCPU_OP_STOREMI      111

/*
 Format two : I16R
 -----------------
 count=12
 */

/* MISC : */
/* close to FCPU_OP_MOVE */
#define FCPU_OP_LOADCONS     96   /* 4-opcode range so we can create 256-bit immediates */
#define FCPU_OP_LOADCONSX    100  /* idem */
/* total : 8 opcodes */

/* SPR : */
#define FCPU_OP_GETI         105
#define FCPU_OP_PUTI         107

/* control-related */
#define FCPU_OP_LOADADDR     114
#define FCPU_OP_LOADADDRI    115

/*
 Format three : I24
 ------------------
 count=10
*/

/* SRB : */
#define FCPU_OP_SRB_SAVE     112
#define FCPU_OP_SRB_RESTORE  113
/* control-related */
#define FCPU_OP_SYSCALL      118
#define FCPU_OP_RFE          119
#define FCPU_OP_HALT         120
#define FCPU_OP_SERIALIZE    121

#define FCPU_OP_VLIW         122
#define FCPU_OP_VLIW0        122   /* should be rounded on a 2-bit boundary ! */
#define FCPU_OP_VLIW1        123   /* ultimately, would be opcode #252-255 */
#define FCPU_OP_VLIW2        124
#define FCPU_OP_VLIW3        125

// !!! Opcode map differs from manual in th INC-based ops
// !!! As long as emulator is not changed
// !!! these opcodes are defined here
#define FCPU_OP_ABS          200
#define FCPU_OP_DEC          201
#define FCPU_OP_NEG          202

#else

#define FCPU_OP_ADD                     1L
#define FCPU_OP_SUB                     2L
#define FCPU_OP_MUL                     3L
#define FCPU_OP_DIV                     4L
#define FCPU_OP_ABS                     5L
#define FCPU_OP_MIN                     6L
#define FCPU_OP_MAX                     7L
#define FCPU_OP_INC                     8L
#define FCPU_OP_DEC                     9L

#define FCPU_OP_SHIFTL                  10L
#define FCPU_OP_SHIFTR                  11L
#define FCPU_OP_SHIFTRA                 12L
#define FCPU_OP_ROTL                    13L
#define FCPU_OP_ROTR                    14L
#define FCPU_OP_BITOP                   15L

#define FCPU_OP_LOGIC                   20L
#define FCPU_OP_LOGICI                  21L
#define FCPU_OP_SDUP                    22L

#define FCPU_OP_LOAD                    30L
#define FCPU_OP_STORE                   31L
#define FCPU_OP_LOADI                   32L
#define FCPU_OP_STOREI                  33L
#define FCPU_OP_LOADF                   34L
#define FCPU_OP_STOREF                  35L
#define FCPU_OP_LOADIF                  36L
#define FCPU_OP_STOREIF                 37L

#define FCPU_OP_MOVE                    50L
#define FCPU_OP_LOADCONS                51L
#define FCPU_OP_LOADCONSX               52L
#define FCPU_OP_GET                     53L
#define FCPU_OP_PUT                     54L
#define FCPU_OP_GETI                    55L
#define FCPU_OP_PUTI                    56L
#define FCPU_OP_LOADM                   57L
#define FCPU_OP_STOREM                  58L
#define FCPU_OP_CACHEMM                 59L

#define FCPU_OP_JMPA                    70L
#define FCPU_OP_LOADADDR                71L
#define FCPU_OP_LOADADDRI               72L
#define FCPU_OP_LOOP                    73L

#define FCPU_OP_ADDI                    80L
#define FCPU_OP_SUBI                    81L
#define FCPU_OP_MULI                    82L
#define FCPU_OP_DIVI                    83L
#define FCPU_OP_MOD                     84L
#define FCPU_OP_MODI                    85L
#define FCPU_OP_MAC                     86L
#define FCPU_OP_POPCOUNT                87L
#define FCPU_OP_POPCOUNTI               88L
#define FCPU_OP_ADDSUB                  89L

#define FCPU_OP_NEG                     90L
#define FCPU_OP_CMPL                    91L
#define FCPU_OP_CMPLE                   92L
#define FCPU_OP_SORT                    93L
#define FCPU_OP_CMPLI                   94L
#define FCPU_OP_CMPLEI                  95L
#define FCPU_OP_SCAN                    96L

#define FCPU_OP_SHIFTLI                 100L
#define FCPU_OP_SHIFTRI                 101L
#define FCPU_OP_SHIFTRAI                102L
#define FCPU_OP_ROTLI                   103L
#define FCPU_OP_ROTRI                   104L
#define FCPU_OP_BITOPI                  105L

#define FCPU_OP_MAXI                    110L
#define FCPU_OP_MINI                    111L

#define FCPU_OP_SYSCALL                 230L
#define FCPU_OP_HALT                    231L
#define FCPU_OP_RFE                     232L
#define FCPU_OP_SRB_SAVE                233L
#define FCPU_OP_SRB_RESTORE             234L
#define FCPU_OP_SERIALIZE               235L

#endif

//

typedef void (*PCSCRFUNC)( PFCPUCPU, octbyte, octbyte, poctbyte, poctbyte );

//
//
//
void fcpu_eie( void )
{
    // emulator internal error deteced!
}

//
// Memory Access
//

onebyte mem[ FCPU_MEMORY_SIZE_BYTES ];

void fcpu_mem_get_byte( PFCPUCPU p, octbyte e, octbyte a, poctbyte pop )
{ 
    *pop = (octbyte)mem[ a % FCPU_MEMORY_SIZE_BYTES ];
}

void fcpu_mem_put_byte( PFCPUCPU p, octbyte e, octbyte a, octbyte op )
{
    mem[ a % FCPU_MEMORY_SIZE_BYTES ] = (onebyte)op;
}

void fcpu_mem_get_n_bytes( PFCPUCPU p, octbyte e, octbyte a, octbyte n, poctbyte pop )
{
    octbyte opb;
    octbyte i;
    
    *pop = 0L;
    for( i = 0; i < n; i++ )
    {
        *pop <<= 8L;

        switch( e )
        {
            case FCPU_LITTLE_ENDIAN:
                fcpu_mem_get_byte( p, e, a + ( n - 1 - i ), &opb );
                break;

            case FCPU_BIG_ENDIAN:
                fcpu_mem_get_byte( p, e, a + i, &opb );
                break;

            default:
                fcpu_eie();
                break;
        }

        *pop |= opb;
    }
}

void fcpu_mem_put_n_bytes( PFCPUCPU p, octbyte e, octbyte a, octbyte n, octbyte op )
{
    octbyte opb;
    octbyte i;

    for( i = 0; i < n; i++ )
    {
        opb = op & 0xFF;
        op >>= 8L;

        switch( e )
        {
            case FCPU_LITTLE_ENDIAN:
                fcpu_mem_put_byte( p, e, a + i, opb );
                break;

            case FCPU_BIG_ENDIAN:
                fcpu_mem_put_byte( p, e, a + ( n - 1 - i ), opb );
                break;

            default:
                fcpu_eie();
                break;
        }
    }
}

//
void fcpu_mem_get_doublebyte( PFCPUCPU p, octbyte e, octbyte a, poctbyte pop )
{
    fcpu_mem_get_n_bytes( p, e, a, 2L, pop );
}

void fcpu_mem_put_doublebyte( PFCPUCPU p, octbyte e, octbyte a, octbyte op )
{
    fcpu_mem_put_n_bytes( p, e, a, 2L, op );
}

//
void fcpu_mem_get_quadbyte( PFCPUCPU p, octbyte e, octbyte a, poctbyte pop )
{
    fcpu_mem_get_n_bytes( p, e, a, 4L, pop );
}

void fcpu_mem_put_quadbyte( PFCPUCPU p, octbyte e, octbyte a, octbyte op )
{
    fcpu_mem_put_n_bytes( p, e, a, 4L, op );
}

//
void fcpu_mem_get_octbyte( PFCPUCPU p, octbyte e, octbyte a, poctbyte pop )
{
    fcpu_mem_get_n_bytes( p, e, a, 8L, pop );
}

void fcpu_mem_put_octbyte( PFCPUCPU p, octbyte e, octbyte a, octbyte op )
{
    fcpu_mem_put_n_bytes( p, e, a, 8L, op );
}

//
void fcpu_mem_get_sized( PFCPUCPU p, octbyte e, octbyte a, poctbyte pop )
{
    switch( p->instr_flgsize )
    {
        case 1L:
            fcpu_mem_get_byte( p, e, a, pop );
            break;

        case 2L:
            fcpu_mem_get_doublebyte( p, e, a, pop );
            break;

        case 4L:
            fcpu_mem_get_quadbyte( p, e, a, pop );
            break;

        case 8L:
            fcpu_mem_get_octbyte( p, e, a, pop );
            break;
    }
}

void fcpu_mem_put_sized( PFCPUCPU p, octbyte e, octbyte a, octbyte op )
{
    switch( p->instr_flgsize )
    {
        case 1L:
            fcpu_mem_put_byte( p, e, a, op );
            break;

        case 2L:
            fcpu_mem_put_doublebyte( p, e, a, op );
            break;

        case 4L:
            fcpu_mem_put_quadbyte( p, e, a, op );
            break;

        case 8L:
            fcpu_mem_put_octbyte( p, e, a, op );
            break;
    }
}

//
// Register Access
//

octbyte fcpu_get_neighbour_reg_id( octbyte r )
{
    return ( r + 1 ) % FCPU_NUM_REGS;
}

void fcpu_reg_get_byte( PFCPUCPU p, octbyte r, octbyte b, poctbyte pop )
{
    if( 0 == r )
        *pop = (octbyte)0;
    else
        *pop = ( p->reg[ r % FCPU_NUM_REGS ] >> ( 8L * b ) ) & 0x00000000000000FF;
}

void fcpu_reg_put_byte( PFCPUCPU p, octbyte r, octbyte b, octbyte op )
{
    if( 0 != r )
    {
        op &= 0xFF;

        p->reg[ r % FCPU_NUM_REGS ] &= ~( 0x00000000000000FF << ( 8L * b ) );
        p->reg[ r % FCPU_NUM_REGS ] |=  (                 op << ( 8L * b ) );
    }
}

void fcpu_reg_get_doublebyte( PFCPUCPU p, octbyte r, octbyte b, poctbyte pop )
{
    if( 0 == r )
        *pop = (octbyte)0;
    else
        *pop = ( p->reg[ r % FCPU_NUM_REGS ] >> ( 16L * b ) ) & 0x000000000000FFFF;
}

void fcpu_reg_put_doublebyte( PFCPUCPU p, octbyte r, octbyte b, octbyte op )
{
    if( 0 != r )
    {
        op &= 0xFFFF;

        p->reg[ r % FCPU_NUM_REGS ] &= ~( 0x000000000000FFFF << ( 16L * b ) );
        p->reg[ r % FCPU_NUM_REGS ] |=  (                 op << ( 16L * b ) );
    }
}

void fcpu_reg_get_quadbyte( PFCPUCPU p, octbyte r, octbyte b, poctbyte pop )
{
    if( 0 == r )
        *pop = (octbyte)0;
    else
        *pop = ( p->reg[ r % FCPU_NUM_REGS ] >> ( 32L * b ) ) & 0x00000000FFFFFFFF;
}

void fcpu_reg_put_quadbyte( PFCPUCPU p, octbyte r, octbyte b, octbyte op )
{
    if( 0 != r )
    {
        op &= 0xFFFFFFFF;

        p->reg[ r % FCPU_NUM_REGS ] &= ~( 0x00000000FFFFFFFF << ( 32L * b ) );
        p->reg[ r % FCPU_NUM_REGS ] |=  (                 op << ( 32L * b ) );
    }
}

void fcpu_reg_get_octbyte( PFCPUCPU p, octbyte r, poctbyte pop )
{
    if( 0 == r )
        *pop = (octbyte)0;
    else
        *pop = p->reg[ r % 64 ];
}

void fcpu_reg_put_octbyte( PFCPUCPU p, octbyte r, octbyte op )
{
    if( 0 != r )
        p->reg[ r % 64 ] = op;
}

void fcpu_reg_get_sized_at( PFCPUCPU p, octbyte r, octbyte s, poctbyte pop )
{
    switch( p->instr_flgsize )
    {
        case 1L:
            fcpu_reg_get_byte( p, r, s, pop );
            break;

        case 2L:
            fcpu_reg_get_doublebyte( p, r, s, pop );
            break;

        case 4L:
            fcpu_reg_get_quadbyte( p, r, s, pop );
            break;

        case 8L:
            fcpu_reg_get_octbyte( p, r, pop );
            break;
    }
}

void fcpu_reg_get_sized( PFCPUCPU p, octbyte r, poctbyte pop )
{
    fcpu_reg_get_sized_at( p, r, 0, pop );
}

void fcpu_reg_put_sized_at( PFCPUCPU p, octbyte r, octbyte s, octbyte op )
{
    switch( p->instr_flgsize )
    {
        case 1L:
            fcpu_reg_put_byte( p, r, s, op );
            break;

        case 2L:
            fcpu_reg_put_doublebyte( p, r, s, op );
            break;

        case 4L:
            fcpu_reg_put_quadbyte( p, r, s, op );
            break;

        case 8L:
            fcpu_reg_put_octbyte( p, r, op );
            break;
    }
}

void fcpu_reg_put_sized( PFCPUCPU p, octbyte r, octbyte op )
{
    fcpu_reg_put_sized_at( p, r, 0, op );
}

//
//
//

void fcpu_sign_extend_op( poctbyte pop, octbyte s )
{
    switch( s )
    {
        case 1L:
            if( *pop & 0x80 )
                *pop |= 0xFFFFFFFFFFFFFF00;
            else
                *pop &= 0x00000000000000FF;
            break;

        case 2L:
            if( *pop & 0x8000 )
                *pop |= 0xFFFFFFFFFFFF0000;
            else
                *pop &= 0x000000000000FFFF;
            break;
            
        case 4L:
            if( *pop & 0x80000000 )
                *pop |= 0xFFFFFFFF00000000;
            else
                *pop &= 0x00000000FFFFFFFF;
            break;

        case 8L:
            // octbyte needs no signextension
            break;
    }
}

void fcpu_trunc_op( poctbyte pop, octbyte s )
{
    switch( s )
    {
        case 1L:
            *pop &= 0xFF;
            break;

        case 2L:
            *pop &= 0xFFFF;
            break;
            
        case 4L:
            *pop &= 0xFFFFFFFF;
            break;

        case 8L:
            // octbyte needs no truncation
            break;
    }
}

//
// 6.1 Arithmetic Instructions
//
// For every arithmetic operation there is a basic function which operates on the
// current instruction operand size. The basic function is called by
// fcpu_exe_x_instr which handle simd / non simd processing.
//
void fcpu_basic_add( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    octbyte op1;
    octbyte op0;

    op1 = op2 + op3;
    switch( p->instr_flgsize )
    {
        case 1L:
            op0 = ( op1 >> 8 ) & 0x01;
            break;

        case 2L:
            op0 = ( op1 >> 16 ) & 0x01;
            break;

        case 4L:
            op0 = ( op1 >> 32 ) & 0x01;
            break;

        case 8L:
            op0 = ( op2 >> 1 ) + ( op3 >> 1 );
            if( ( op2 & 1 ) && ( op3 & 1 ) )
                op0++;
            if( op0 & 0x8000000000000000 )
                op0 = 0x01;
            else
                op0 = 0x00;
            break;
    }

    if( p->instr_flgsat )
    {
        if( op0 > 0 )
            op1 = 0xFFFFFFFFFFFFFFFF;
    }

    *pop1 = op1;
    *pop0 = op0;
}

void fcpu_basic_sub( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    octbyte op1;
    octbyte op0;

    op1 = op2 - op3;
    op0 = 0;
    switch( p->instr_flgsize )
    {
        case 1L:
            if( ( op1 & 0xFFFFFFFFFFFFFF00 ) != 0 )
                op0 = 0x01;
            break;

        case 2L:
            if( ( op1 & 0xFFFFFFFFFFFF0000 ) != 0 )
                op0 = 0x01;
            break;

        case 4L:
            if( ( op1 & 0xFFFFFFFF00000000 ) != 0 )
                op0 = 0x01;
            break;

        case 8L:
            break;
    }

    if( p->instr_flgsat )
    {
        if( op0 > 0 )
            op1 = 0x0000000000000000;
    }

    *pop1 = op1;
    *pop0 = op0;
}

void fcpu_basic_mul( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    octbyte op1;
    octbyte op0;

    if( p->instr_flgsign )
    {
        fcpu_sign_extend_op( &op2, p->instr_flgsize );
        fcpu_sign_extend_op( &op3, p->instr_flgsize );
    }

    switch( p->instr_flgsize )
    {
        case 1L:
            op1 = op3 * op2;
            op0 = op1 >> 8;
            break;

        case 2L:
            op1 = op3 * op2;
            op0 = op1 >> 16;
            break;

        case 4L:
            op1 = op3 * op2;
            op0 = op1 >> 32;
            break;

        case 8L:
        {
            // todo
        }; break;
    }

    *pop1 = op1;
    *pop0 = op0;
}

void fcpu_basic_div( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    octbyte op1;
    octbyte op0;

    if( p->instr_flgsign )
    {
        fcpu_sign_extend_op( &op2, p->instr_flgsize );
        fcpu_sign_extend_op( &op3, p->instr_flgsize );
    }

    if( 0 == op2 )
    {
        p->exception = FCPU_EXCEPTION_DIVISIONBYZERO;
        return;
    }

    op1 = op3 / op2;

    if( p->instr_flgmod )
        op0 = op3 % op2;
    else
        op0 = 0;

    *pop1 = op1;
    *pop0 = op0;
}

void fcpu_basic_mod( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    octbyte op1;

    if( p->instr_flgsign )
    {
        fcpu_sign_extend_op( &op2, p->instr_flgsize );
        fcpu_sign_extend_op( &op3, p->instr_flgsize );
    }

    if( 0 == op2 )
    {
        p->exception = FCPU_EXCEPTION_DIVISIONBYZERO;
        return;
    }

    op1 = op3 % op2;

    *pop1 = op1;
    *pop0 = 0L;
}

void fcpu_basic_abs( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{   
    fcpu_sign_extend_op( &op2, p->instr_flgsize );

    if( op2 < 0 )
        op2 = -op2;

    *pop1 = op2;
    *pop0 = 0;
}

void fcpu_basic_min( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{   
    if( op3 < op2 )
        *pop1 = op3;
    else
        *pop1 = op2;

    *pop0 = 0;
}

void fcpu_basic_max( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    if( op3 > op2 )
        *pop1 = op3;
    else
        *pop1 = op2;

    *pop0 = 0;
}

void fcpu_basic_addsub( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    *pop1 = op2 + op3;
    *pop0 = op2 - op3;
}

void fcpu_basic_inc( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    *pop1 = op2 + 1;
    *pop0 = 0L;
}

void fcpu_basic_dec( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    *pop1 = op2 - 1;
    *pop0 = 0L;
}

void fcpu_basic_neg( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    fcpu_sign_extend_op( &op2, p->instr_flgsize );

    *pop1 = -op2;
    *pop0 = 0L;
}

void fcpu_basic_cmpl( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    if( op2 < op3 )
        *pop1 = 0xFFFFFFFFFFFFFFFF;
    else
        *pop1 = 0x0000000000000000;
    
    *pop0 = 0L;
}

void fcpu_basic_cmple( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    if( op2 <= op3 )
        *pop1 = 0xFFFFFFFFFFFFFFFF;
    else
        *pop1 = 0x0000000000000000;
    
    *pop0 = 0L;
}

void fcpu_basic_sort( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    if( op2 > op3 )
    {
        *pop1 = op2;
        *pop0 = op3;
    }
    else
    {
        *pop1 = op3;
        *pop0 = op2;
    }
}

void fcpu_basic_bitop( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    switch( p->instr_flgbitop )
    {
        case 0L: // OR
            *pop1 = op2 | ( (octbyte)1 << op3 );
            break;

        case 1L: // ANDN
            *pop1 = op2 & ~( (octbyte)1 << op3 );
            break;

        case 2L: // XOR
            *pop1 = op2 ^ ( (octbyte)1 << op3 );
            break;

        case 3L: // AND
            *pop1 = op2 & ( (octbyte)1 << op3 );
            break;
    }

    *pop0 = 0;
}

//
// 6.2.1 (Core) Bit Shuffling Instructions
//
void fcpu_basic_shiftl( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    *pop1 = op2 << op3;
    *pop0 = 0L;
}

void fcpu_basic_shiftr( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    *pop1 = ( op2 >> op3 ) & 0x7FFFFFFFFFFFFFFF;
    *pop0 = 0L;
}

void fcpu_basic_shiftra( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    fcpu_sign_extend_op( &op2, p->instr_flgsize );

    *pop1 = op2 >> op3;
    *pop0 = 0L;
}

void fcpu_basic_rotl( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    onebyte i;
    onebyte n;

    n = (onebyte)( op3 % ( 8L * p->instr_flgsize ) );
    if( 0 == n )
        return;

    fcpu_trunc_op( &op2, p->instr_flgsize );

    for( i = 0; i < n; i++ )
    {
        switch( p->instr_flgsize )
        {
            case 1L:                
                op2 <<= 1;
                if( op2 & 0x100 )
                    op2 |= 1;
                op2 &= 0xFF;
                break;

            case 2L:
                op2 <<= 1;
                if( op2 & 0x10000 )
                    op2 |= 1;
                op2 &= 0xFFFF;
                break;

            case 4L:
                op2 <<= 1;
                if( op2 & 0x100000000 )
                    op2 |= 1;
                op2 &= 0xFFFFFFFF;
                break;

            case 8L:
                if( op2 & 0x8000000000000000 )
                {
                    op2 &= 0x7FFFFFFFFFFFFFFF;
                    op2 <<= 1;
                    op2 |= 1;
                }
                else
                    op2 <<= 1;
                break;
        }
    }

    *pop1 = op2;
    *pop0 = 0;
}

void fcpu_basic_rotr( PFCPUCPU p, octbyte op3, octbyte op2, poctbyte pop1, poctbyte pop0 )
{
    onebyte i;
    onebyte n;

    n = (onebyte)( op3 % ( 8L * p->instr_flgsize ) );
    if( 0 == n )
        return;

    fcpu_trunc_op( &op2, p->instr_flgsize );

    for( i = 0; i < n; i++ )
    {
        switch( p->instr_flgsize )
        {
            case 1L:                
                if( op2 & 1 )
                {
                    op2 >>= 1;
                    op2 |= 0x80;
                }
                else
                {
                    op2 >>= 1;
                    op2 &= 0xFF;
                }
                break;

            case 2L:
                if( op2 & 1 )
                {
                    op2 >>= 1;
                    op2 |= 0x8000;
                }
                else
                {
                    op2 >>= 1;
                    op2 &= 0xFFFF;
                }
                break;

            case 4L:
                if( op2 & 1 )
                {
                    op2 >>= 1;
                    op2 |= 0x80000000;
                }
                else
                {
                    op2 >>= 1;
                    op2 &= 0xFFFFFFFF;
                }
                break;

            case 8L:
                if( op2 & 1 )
                {
                    op2 >>= 1;
                    op2 &= 0x7FFFFFFFFFFFFFFF;
                    op2 |= 0x8000000000000000;
                }
                else
                {
                    op2 >>= 1;
                    op2 &= 0x7FFFFFFFFFFFFFFF;
                }
                break;
        }
    }

    *pop1 = op2;
    *pop0 = 0;
}

void fcpu_exec_sdup_instr( PFCPUCPU p )
{
    octbyte op;

    fcpu_reg_get_sized( p, p->instr_r2, &op );

    switch( p->instr_flgsize )
    {
        case 1L:
            op |= ( op <<  8 );
            op |= ( op << 16 );
            op |= ( op << 32 );
            break;

        case 2L:
            op |= ( op << 16 );
            op |= ( op << 32 );
            break;

        case 4L:
            op |= ( op << 32 );
            break;

        case 8L:
            // cant dup octbyte
            break;
    }

    fcpu_reg_put_octbyte( p, p->instr_r1, op );
}

//
// 6.3.1 (Core) Logic Instructions
//
void fcpu_exec_logic_instr( PFCPUCPU p )
{
    octbyte op1;
    octbyte op2;
    octbyte op3;
    onebyte i;

    fcpu_reg_get_sized( p, p->instr_r1, &op1 );
    fcpu_reg_get_sized( p, p->instr_r2, &op2 );

    switch( p->instr_logop )
    {
        case 1: // 0001 = AND
            op3 = op2 & op1;
            break;

        case 6: // 0110 = XOR
            op3 = op2 ^ op1;
            break;

        case 7: // 0111 = OR
            op3 = op2 | op1;
            break;

        case 8: // 1000 = NOR
            op3 = ~( op2 | op1 );
            break;

        case 14: // 1110 = NAND
            op3 = ~( op2 & op1 );
            break;

        default:

            op3 = 0;
            
            for( i = 0; i < ( 8L * p->instr_flgsize ); i++ )
            {
                if( op2 & ( (octbyte)1 << i ) )
                {
                    if( op1 & ( (octbyte)1 << i ) )
                    {
                        // f(1,1)
                        if( p->instr_reg & BIT( 13 ) )
                            op3 |= ( (octbyte)1 << i );
                    }
                    else
                    {
                        // f(1,0)
                        if( p->instr_reg & BIT( 11 ) )
                            op3 |= ( (octbyte)1 << i );
                    }
                }
                else
                {
                    if( op1 & ( (octbyte)1 << i ) )
                    {
                        // f(0,1)
                        if( p->instr_reg & BIT( 12 ) )
                            op3 |= ( (octbyte)1 << i );
                    }
                    else
                    {
                        // f(1,1)
                        if( p->instr_reg & BIT( 10 ) )
                            op3 |= ( (octbyte)1 << i );
                    }
                }
            }
            break;
    }

    fcpu_reg_put_sized( p, p->instr_r3, op3 );
}

void fcpu_exec_logici_instr( PFCPUCPU p )
{
    octbyte op1;
    octbyte op2;

    fcpu_reg_get_sized( p, p->instr_r2, &op2 );

    switch( ( p->instr_reg >> 10 ) & 0x03 )
    {
        case 0:
            op1 = op2 | p->instr_imm8;
            break;

        case 1:
            op1 = op2 & p->instr_imm8;
            break;

        case 2:
            op1 = op2 ^ p->instr_imm8;
            break;

        case 3:
            op1 = op2 & ( ~p->instr_imm8 );
            break;
    }

    fcpu_reg_put_sized( p, p->instr_r3, op1 );
}

//
// 6.5.1 Core Memory Access Instructions
//
// ( load, store with pointer inc. automatically supported too )
//

void fcpu_exec_load_instr( PFCPUCPU p )
{
    octbyte op1;
    octbyte op2;
    octbyte op3;

    fcpu_reg_get_octbyte( p, p->instr_r2, &op2 );
    fcpu_reg_get_octbyte( p, p->instr_r3, &op3 );

    if( p->instr_reg & BIT( 10 ) )
        fcpu_mem_get_sized( p, FCPU_BIG_ENDIAN, op2, &op1 );
    else
        fcpu_mem_get_sized( p, FCPU_LITTLE_ENDIAN, op2, &op1 );

    fcpu_reg_put_sized( p, p->instr_r1, op1 );

    op2 += op3;

    fcpu_reg_put_octbyte( p, p->instr_r2, op2 );
}

void fcpu_exec_store_instr( PFCPUCPU p )
{
    octbyte op1;
    octbyte op2;
    octbyte op3;

    fcpu_reg_get_octbyte( p, p->instr_r2, &op2 );

    fcpu_reg_get_octbyte( p, p->instr_r3, &op3 );

    fcpu_reg_get_sized( p, p->instr_r1, &op1 );

    if( p->instr_reg & BIT( 10 ) )
        fcpu_mem_put_sized( p, FCPU_BIG_ENDIAN, op2, op1 );
    else
        fcpu_mem_put_sized( p, FCPU_LITTLE_ENDIAN, op2, op1 );

    op2 += op3;

    fcpu_reg_put_octbyte( p, p->instr_r2, op2 );
}

void fcpu_exec_loadi_instr( PFCPUCPU p )
{
    octbyte op1;
    octbyte op2;
    octbyte op3;

    fcpu_reg_get_octbyte( p, p->instr_r2, &op2 );

    op3 = p->instr_imm8;
    if( p->instr_reg & BIT( 11 ) )
        op3 |= 0xFFFFFFFFFFFFFF00;

    if( p->instr_reg & BIT( 10 ) )
        fcpu_mem_get_sized( p, FCPU_BIG_ENDIAN, op2, &op1 );
    else
        fcpu_mem_get_sized( p, FCPU_LITTLE_ENDIAN, op2, &op1 );

    fcpu_reg_put_sized( p, p->instr_r1, op1 );

    op2 += op3;

    fcpu_reg_put_octbyte( p, p->instr_r2, op2 );
}

void fcpu_exec_storei_instr( PFCPUCPU p )
{
    octbyte op1;
    octbyte op2;
    octbyte op3;

    fcpu_reg_get_octbyte( p, p->instr_r2, &op2 );

    op3 = p->instr_imm8;
    if( p->instr_reg & BIT( 11 ) )
        op3 |= 0xFFFFFFFFFFFFFF00;

    fcpu_reg_get_sized( p, p->instr_r1, &op1 );

    if( p->instr_reg & BIT( 10 ) )
        fcpu_mem_put_sized( p, FCPU_BIG_ENDIAN, op2, op1 );
    else
        fcpu_mem_put_sized( p, FCPU_LITTLE_ENDIAN, op2, op1 );

    op2 += op3;

    fcpu_reg_put_octbyte( p, p->instr_r2, op2 );
}

//
// 6.6.1 Core Data Move Instructions
//

void fcpu_exec_move_instr( PFCPUCPU p )
{
    octbyte op3;
    octbyte opm;
    onebyte c;

    fcpu_reg_get_octbyte( p, p->instr_r3, &op3 );

    c = 1;

    if( p->instr_reg & BIT(11) )
    {
        if( p->instr_reg & BIT(12) )
        {
            // 11
            if( op3 & 0x8000000000000000 )
                c = 1;
            else
                c = 0;
        }
        else
        {
            // 10
            if( op3 & 0x0000000000000001)
                c = 1;
            else
                c = 0;
        }
    }
    else
    {
        if( p->instr_reg & BIT(12) )
        {
            // 01 ???
            fcpu_eie();
        }
        else
        {
            // 00 nullity
            if( 0 == op3 )
                c = 1;
            else
                c = 0;
        }
    }

    if( p->instr_reg & BIT( 10 ) )
    {
        if( 0 == c ) 
            c = 1L;
        else
            c = 0L;
    }

    if( c )
    {
        fcpu_reg_get_sized( p, p->instr_r2, &opm );

        if( p->instr_opcode & BIT( 13 ) )
        {
            // what does sign extension mean??? from size to oct ( always ? )
            // from size to next bigger size???
        }
        else
        {
            fcpu_reg_put_sized( p, p->instr_r1, opm );
        }
    }
}

void fcpu_exec_loadcons_instr( PFCPUCPU p )
{
    octbyte op1;
    octbyte n;
    octbyte i;

    n = ( ( p->instr_reg >>  8 ) & 0x0000000000000003 ) << 4L;
    i = ( p->instr_reg >> 10 ) & 0x000000000000FFFF;

    fcpu_reg_get_octbyte( p, p->instr_r1, &op1 );

    op1 &= ~( 0x000000000000FFFF << n );

    op1 |= i << n;
    
    fcpu_reg_put_octbyte( p, p->instr_r1, op1 );
}

void fcpu_exec_loadconsx_instr( PFCPUCPU p )
{
    octbyte op1;
    octbyte n;
    octbyte i;

    n = ( ( p->instr_reg >>  8 ) & 0x0000000000000003 ) << 4L;
    i = ( p->instr_reg >> 10 ) & 0x000000000000FFFF;

    fcpu_reg_get_octbyte( p, p->instr_r1, &op1 );

    op1 &= ~( 0x000000000000FFFF << n );

    op1 |= i << n;

    if( i & 0x0000000000008000 )
        op1 |= ( 0xFFFFFFFFFFFF0000 << n );
    else
        op1 &= ~( 0xFFFFFFFFFFFF0000 << n );
    
    fcpu_reg_put_octbyte( p, p->instr_r1, op1 );
}

void fcpu_exec_loadm_instr( PFCPUCPU p )
{
    octbyte op2;
    octbyte opm;
    octbyte ibegin;
    octbyte iend;
    octbyte i;

    if( 0 == p->instr_r1 )
        return;

    fcpu_reg_get_octbyte( p, p->instr_r2, &op2 );

    ibegin  = p->instr_r3;
    iend    = p->instr_r3 + p->instr_r1;
    if( iend > FCPU_NUM_REGS )
        iend = FCPU_NUM_REGS;

    for( i = ibegin; i < iend; i++ )
    {
        fcpu_mem_get_octbyte( p, FCPU_LITTLE_ENDIAN, op2, &opm );
        op2++;

        fcpu_reg_put_octbyte( p, i, opm );
    }
}

void fcpu_exec_storem_instr( PFCPUCPU p )
{
    octbyte op2;
    octbyte opm;
    octbyte ibegin;
    octbyte iend;
    octbyte i;

    if( 0 == p->instr_r1 )
        return;

    fcpu_reg_get_octbyte( p, p->instr_r2, &op2 );

    ibegin  = p->instr_r3;
    iend    = p->instr_r3 + p->instr_r1;
    if( iend > FCPU_NUM_REGS )
        iend = FCPU_NUM_REGS;

    for( i = ibegin; i < iend; i++ )
    {
        fcpu_reg_get_octbyte( p, i, &opm );

        fcpu_mem_put_octbyte( p, FCPU_LITTLE_ENDIAN, op2, opm );
        op2++;
    }
}

void fcpu_exec_get_geti_instr( PFCPUCPU p )
{
}

void fcpu_exec_put_puti_instr( PFCPUCPU p )
{
}

//
// 6.7.1 Core Instruction Flow Control Instructions
//

void fcpu_exec_jmpa_instr( PFCPUCPU p )
{   
    octbyte op1;
    octbyte op2;
    octbyte op3;
    onebyte c;

    fcpu_reg_get_octbyte( p, p->instr_r3, &op3 );

    c = 1L;

    if( p->instr_reg & BIT(11) )
    {
        if( p->instr_reg & BIT(12) )
        {
            // 11
            if( op3 & 0x8000000000000000 )
                c = 1;
            else
                c = 0;
        }
        else
        {
            // 10
            if( op3 & 0x0000000000000001)
                c = 1;
            else
                c = 0;
        }
    }
    else
    {
        if( p->instr_reg & BIT(12) )
        {
            // 01 ???
            fcpu_eie();
        }
        else
        {
            // 00 nullity
            if( 0 == op3 )
                c = 1;
            else
                c = 0;
        }
    }

    if( p->instr_reg & BIT( 10 ) )
    {
        if( 0 == c ) 
            c = 1L;
        else
            c = 0L;
    }

    if( c )
    {
        fcpu_reg_get_octbyte( p, p->instr_r2, &op2 );
        
        op1 = p->instr_ptr;
        p->instr_ptr = op2;

        fcpu_reg_put_octbyte( p, p->instr_r1, op1 );
    }
}

void fcpu_exec_loadaddr_instr( PFCPUCPU p )
{
    octbyte op1;
    octbyte op2;

    fcpu_reg_get_octbyte( p, p->instr_r2, &op2 );

    op1 = p->instr_ptr + 4 + op2;

    fcpu_reg_put_octbyte( p, p->instr_r1, op1 );
}

void fcpu_exec_loadaddri_instr( PFCPUCPU p )
{
    fcpu_reg_put_octbyte( p, p->instr_r1, p->instr_ptr + 4 + p->instr_imm16 );
}

void fcpu_exec_loop_instr( PFCPUCPU p )
{
    octbyte op1;
    octbyte op2;

    fcpu_reg_get_octbyte( p, p->instr_r1, &op1 );

    if( op1 != 0 )
    {
        fcpu_reg_get_octbyte( p, p->instr_r2, &op2 );
        p->instr_ptr = op2;
    }

    fcpu_reg_put_octbyte( p, p->instr_r1, op1 - 1 );
}

void fcpu_exec_halt_instr( PFCPUCPU p )
{
    p->exception = FCPU_EXCEPTION_HALT;
}

//
// Instruction Decoding and Control / Execution
//

void fcpu_exec_x_instr( PFCPUCPU p, PCSCRFUNC f )
{
    onebyte n;
    onebyte i;
    octbyte op0;
    octbyte op1;
    octbyte op2;
    octbyte op3;

    if( p->instr_flgsimd )
        n = (onebyte)( 8L / p->instr_flgsize );
    else
        n = 1L;

    for( i = 0; i < n; i++ )
    {
        fcpu_reg_get_sized_at( p, p->instr_r2, i, &op2 );
        fcpu_reg_get_sized_at( p, p->instr_r3, i, &op3 );

        f( p, op3, op2, &op1, &op0 );

        fcpu_reg_put_sized_at( p, p->instr_r1, i, op1 );

        if( ( p->instr_flgcy ) || ( p->instr_flgmod ) )
            fcpu_reg_put_sized_at( p, fcpu_get_neighbour_reg_id( p->instr_r1 ), i, op0 );
    }
}

void fcpu_exec_x_imm_instr( PFCPUCPU p, PCSCRFUNC f )
{
    onebyte n;
    onebyte i;
    octbyte op0;
    octbyte op1;
    octbyte op2;

    if( p->instr_flgsimd )
        n = (onebyte)( 8L / p->instr_flgsize );
    else
        n = 1L;

    for( i = 0; i < n; i++ )
    {
        fcpu_reg_get_sized_at( p, p->instr_r2, i, &op2 );

        f( p, p->instr_imm8, op2, &op1, &op0 );

        fcpu_reg_put_sized_at( p, p->instr_r1, i, op1 );

        fcpu_reg_put_sized_at( p, fcpu_get_neighbour_reg_id( p->instr_r1 ), i, op0 );
    }
}

void fcpu_idu_exec_instr( PFCPUCPU p )
{
    switch( p->instr_opcode )
    {
        //
        // 6.1.1 Core Arithmetic Instructions
        //
        case FCPU_OP_ADD:
            fcpu_exec_x_instr( p, &fcpu_basic_add );
            break;

        case FCPU_OP_SUB:
            fcpu_exec_x_instr( p, &fcpu_basic_sub );
            break;

        case FCPU_OP_MUL:
            fcpu_exec_x_instr( p, &fcpu_basic_mul );
            break;
            
        case FCPU_OP_DIV:
            fcpu_exec_x_instr( p, &fcpu_basic_div );
            break;

        case FCPU_OP_ABS:
            fcpu_exec_x_instr( p, &fcpu_basic_abs );
            break;

        case FCPU_OP_MIN:
            fcpu_exec_x_instr( p, &fcpu_basic_min );
            break;

        case FCPU_OP_MAX:
            fcpu_exec_x_instr( p, &fcpu_basic_max );
            break;

        case FCPU_OP_INC:
            fcpu_exec_x_instr( p, &fcpu_basic_inc );
            break;

        case FCPU_OP_DEC:
            fcpu_exec_x_instr( p, &fcpu_basic_dec );
            break;

        case FCPU_OP_MINI:
            fcpu_exec_x_imm_instr( p, &fcpu_basic_min );
            break;

        case FCPU_OP_MAXI:
            fcpu_exec_x_imm_instr( p, &fcpu_basic_max );
            break;

        case FCPU_OP_NEG:
            fcpu_exec_x_instr( p, &fcpu_basic_neg );
            break;

        case FCPU_OP_CMPL:
            fcpu_exec_x_instr( p, &fcpu_basic_cmpl );
            break;

        case FCPU_OP_CMPLE:
            fcpu_exec_x_instr( p, &fcpu_basic_cmple );
            break;

        case FCPU_OP_MOD:
            fcpu_exec_x_instr( p, &fcpu_basic_mod );
            break;

        case FCPU_OP_SORT:
            fcpu_exec_x_instr( p, &fcpu_basic_sort );
            break;

        case FCPU_OP_ADDI:
            fcpu_exec_x_imm_instr( p, &fcpu_basic_add );
            break;

        case FCPU_OP_SUBI:
            fcpu_exec_x_imm_instr( p, &fcpu_basic_sub );
            break;

        case FCPU_OP_MULI:
            fcpu_exec_x_imm_instr( p, &fcpu_basic_mul );
            break;
            
        case FCPU_OP_DIVI:
            fcpu_exec_x_imm_instr( p, &fcpu_basic_div );
            break;

        case FCPU_OP_CMPLI:
            fcpu_exec_x_imm_instr( p, &fcpu_basic_cmpl );
            break;

        case FCPU_OP_CMPLEI:
            fcpu_exec_x_imm_instr( p, &fcpu_basic_cmple );
            break;

        case FCPU_OP_MODI:
            fcpu_exec_x_imm_instr( p, &fcpu_basic_mod );
            break;

        //
        // 6.3.1 (Core) Shift And Rotate Instructions
        //
        case FCPU_OP_SHIFTL:
            fcpu_exec_x_instr( p, &fcpu_basic_shiftl );
            break;

        case FCPU_OP_SHIFTR:
            fcpu_exec_x_instr( p, &fcpu_basic_shiftr );
            break;

        case FCPU_OP_SHIFTRA:
            fcpu_exec_x_instr( p, &fcpu_basic_shiftra );
            break;
            
        case FCPU_OP_ROTL:
            fcpu_exec_x_instr( p, &fcpu_basic_rotl );
            break;
            
        case FCPU_OP_ROTR:
            fcpu_exec_x_instr( p, &fcpu_basic_rotr );
            break;

        case FCPU_OP_BITOP:
            fcpu_exec_x_instr( p, &fcpu_basic_bitop );
            break;

        case FCPU_OP_SHIFTLI:
            fcpu_exec_x_imm_instr( p, &fcpu_basic_shiftl );
            break;

        case FCPU_OP_SHIFTRI:
            fcpu_exec_x_imm_instr( p, &fcpu_basic_shiftr );
            break;

        case FCPU_OP_SHIFTRAI:
            fcpu_exec_x_imm_instr( p, &fcpu_basic_shiftra );
            break;
            
        case FCPU_OP_ROTLI:
            fcpu_exec_x_imm_instr( p, &fcpu_basic_rotl );
            break;
            
        case FCPU_OP_ROTRI:
            fcpu_exec_x_imm_instr( p, &fcpu_basic_rotr );
            break;

        case FCPU_OP_BITOPI:
            fcpu_exec_x_imm_instr( p, &fcpu_basic_bitop );
            break;

        case FCPU_OP_SDUP:
            fcpu_exec_sdup_instr( p );
            break;

        //
        // 6.3.1 Core Logic Instructions
        //
        case FCPU_OP_LOGIC:
            fcpu_exec_logic_instr( p );
            break;
            
        case FCPU_OP_LOGICI:
            fcpu_exec_logici_instr( p );
            break;
            
        //
        // 6.5.1 (Core) Memory Access Instructions
        //
        case FCPU_OP_LOAD:
        case FCPU_OP_LOADF:
            fcpu_exec_load_instr( p );
            break;

        case FCPU_OP_STORE:
        case FCPU_OP_STOREF:
            fcpu_exec_store_instr( p );
            break;

        case FCPU_OP_LOADI:
        case FCPU_OP_LOADIF:
            fcpu_exec_load_instr( p );
            break;

        case FCPU_OP_STOREI:
        case FCPU_OP_STOREIF:
            fcpu_exec_store_instr( p );
            break;

        //
        // 6.6.1 (Core) Data Move Instructions
        //
        case FCPU_OP_MOVE:
            fcpu_exec_move_instr( p );
            break;

        case FCPU_OP_LOADCONS:
            fcpu_exec_loadcons_instr( p );
            break;

        case FCPU_OP_LOADCONSX:
            fcpu_exec_loadconsx_instr( p );
            break;

        case FCPU_OP_GET:
        case FCPU_OP_GETI:
            fcpu_exec_get_geti_instr( p );
            break;

        case FCPU_OP_PUT:
        case FCPU_OP_PUTI:
            fcpu_exec_put_puti_instr( p );
            break;

        case FCPU_OP_LOADM:
            fcpu_exec_loadm_instr( p );
            break;

        case FCPU_OP_STOREM:
            fcpu_exec_storem_instr( p );
            break;

        case FCPU_OP_CACHEMM:
            // needs to do nothing in the current emulator version
            break;

        //
        // 6.7.1 Core Instruction Flow Control Instructions
        //
        case FCPU_OP_JMPA:
            fcpu_exec_jmpa_instr( p );
            break;

        case FCPU_OP_LOADADDR:
            fcpu_exec_loadaddr_instr( p );
            break;

        case FCPU_OP_LOADADDRI:
            fcpu_exec_loadaddri_instr( p );
            break;

        case FCPU_OP_LOOP:
            fcpu_exec_loop_instr( p );
            break;

        //
        //
        //

        case FCPU_OP_HALT:
            fcpu_exec_halt_instr( p );
            break;
        
        //
        // non understood / not well defined instructions
        //
        case FCPU_OP_SYSCALL:
            break;

        case FCPU_OP_RFE:
            break;

        case FCPU_OP_SRB_SAVE:
            break;

        case FCPU_OP_SRB_RESTORE:
            break;

        case FCPU_OP_SERIALIZE:
            break;

        //
        //
        //
        default:
            p->exception = FCPU_EXCEPTION_INVALIDINSTRUCTION;
            break;
    }
}

void fcpu_ctl_next_instr( PFCPUCPU p )
{
    p->instr_ptr = ( p->instr_ptr + 4L ) & 0xFFFFFFFFFFFFFFFC;
}

void fcpu_idu_fetch_instr( PFCPUCPU p )
{
    fcpu_mem_get_quadbyte( p, FCPU_LITTLE_ENDIAN, p->instr_ptr, &p->instr_reg );
}

void fcpu_idu_decode_instr( PFCPUCPU p )
{
    p->instr_opcode     = ( p->instr_reg >>  0 ) & 0x00FF;
    p->instr_r1         = ( p->instr_reg >> 26 ) & 0x003F;
    p->instr_r2         = ( p->instr_reg >> 20 ) & 0x003F;
    p->instr_r3         = ( p->instr_reg >> 14 ) & 0x003F;
    p->instr_imm8       = ( p->instr_reg >> 12 ) & 0x00FF;
    p->instr_imm16      = ( p->instr_reg >> 10 ) & 0xFFFF;
    p->instr_logop      = ( p->instr_reg >> 10 ) & 0x000F;
    p->instr_flgsimd    = (onebyte)( ( p->instr_reg >> 10 ) & 0x0001 );
    p->instr_flgsat     = (onebyte)( ( p->instr_reg >> 12 ) & 0x0001 );
    p->instr_flgcy      = (onebyte)( ( p->instr_reg >> 13 ) & 0x0001 );
    p->instr_flgsign    = (onebyte)( ( p->instr_reg >> 12 ) & 0x0001 );
    p->instr_flgmod     = (onebyte)( ( p->instr_reg >> 13 ) & 0x0001 );
    p->instr_flgbitop   = (onebyte)( ( p->instr_reg >> 12 ) & 0x0003 );

    switch( ( p->instr_reg >>  8 ) & 0x03 )
    {
        case FCPU_SIZECODE_BYTE:
            p->instr_flgsize = 1L;
            break;

        case FCPU_SIZECODE_DOUBLEBYTE:
            p->instr_flgsize = 2L;
            break;

        case FCPU_SIZECODE_QUADBYTE:
            p->instr_flgsize = 4L;
            break;

        case FCPU_SIZECODE_OCTBYTE:
            p->instr_flgsize = 8L;
            break;

        default:
            fcpu_eie();
    }
}

void fcpu_ctl_handle_exception( PFCPUCPU p )
{
    switch( p->exception )
    {
        case FCPU_EXCEPTION_NONE:
            break;
        
        case FCPU_EXCEPTION_RESET:
            p->instr_ptr = FCPU_RESET_PC;
            p->exception = FCPU_EXCEPTION_NONE;
            break;

        case FCPU_EXCEPTION_ALIGNMENT_FAULT:
            break;

        case FCPU_EXCEPTION_DIVISIONBYZERO:
            break;

        case FCPU_EXCEPTION_PAGEFAULT:
            break;

        case FCPU_EXCEPTION_INVALIDINSTRUCTION:
            p->exception = FCPU_EXCEPTION_HALT;
            break;

        case FCPU_EXCEPTION_HALT:
            break;

        default:
            fcpu_eie();
            break;
    }
}

//
//
//

void fcpu_reset( PFCPUCPU p )
{
    p->instr_ptr = FCPU_RESET_PC;
    p->exception = FCPU_EXCEPTION_RESET;
}

void fcpu_clock( PFCPUCPU p )
{
    fcpu_ctl_handle_exception( p );

    if( FCPU_EXCEPTION_NONE == p->exception )
    {
        fcpu_idu_fetch_instr( p );

        fcpu_idu_decode_instr( p );

        fcpu_idu_exec_instr( p );

        fcpu_ctl_next_instr( p );
    }
}

// *****************************************************************************
// *
// * CSCR Assembler and Test vectors
// *
// *****************************************************************************

onebyte fcpu_asm_is_hex_digit( char c )
{
    if( ( c >= '0' ) && ( c <= '9' ) )
        return 1L;

    if( ( c >= 'a' ) && ( c <= 'f' ) )
        return 1L;

    if( ( c >= 'A' ) && ( c <= 'F' ) )
        return 1L;

    return 0L;
}

onebyte fcpu_asm_hex_digit_to_number( char c )
{
    if( ( c >= '0' ) && ( c <= '9' ) )
        return (onebyte)( c - '0' );

    if( ( c >= 'a' ) && ( c <= 'f' ) )
        return (onebyte)( c - 'a' + 10 );

    if( ( c >= 'A' ) && ( c <= 'F' ) )
        return (onebyte)( c - 'A' + 10 );

    return 0L;
}

onebyte fcpu_asm_cmp( char* instr, char* ref, ponebyte pu )
{
    onebyte u;

    *pu = 0;

    u = 0;
    while( ( *instr ) && ( *ref ) )
    {
        if( *instr != *ref )
            return 0L;

        u++;
        instr++;
        ref++;
    }
    if( *ref == 0 )
    {
        if( ( *instr == '.' ) ||
            ( *instr == ' ' ) ||
            ( *instr ==   0 ) )
        {
            *pu = u;
            return 1L;
        }
    }

    return 0L;
}

void fcpu_asm_param( char* instr, poctbyte size, poctbyte r3, poctbyte r2, poctbyte r1, poctbyte imm )
{
    *size = FCPU_SIZECODE_OCTBYTE;
    *r3   = 0;
    *r2   = 0;
    *r1   = 0;
    *imm  = 0;
    
    while( *instr )
    {
        if( *instr == '.' )
        {
            instr++;
            if( *instr )
            {
                switch( *instr )
                {
                    case 'b':
                    case 'B':
                        *size = FCPU_SIZECODE_BYTE;
                        break;

                    case 'd':
                    case 'D':
                        *size = FCPU_SIZECODE_DOUBLEBYTE;
                        break;
                    
                    case 'q':
                    case 'Q':
                        *size = FCPU_SIZECODE_QUADBYTE;
                        break;
                    
                    case 'o':
                    case 'O':
                        *size = FCPU_SIZECODE_OCTBYTE;
                        break;
                }
            }
        }

        while( ( ( *instr == ' ' ) || ( *instr == ',' ) || ( *instr == 8 ) ) && ( *instr ) )
            instr++;

        *r3 = *r2;
        *r2 = *r1;
        *r1 = 0L;

        if( fcpu_asm_is_hex_digit( *instr ) )
        {
            while( ( *instr ) && ( fcpu_asm_is_hex_digit( *instr ) ) )
            {
                *imm <<= 4;
                *imm |= fcpu_asm_hex_digit_to_number( *instr );
                
                instr++;
            }
        }
        else if( ( *instr == 'R' ) || ( *instr == 'r' ) )
        {
            instr++;
            if( fcpu_asm_is_hex_digit( *instr ) )
            {
                *r1 |= fcpu_asm_hex_digit_to_number( *instr );
                *r1 %= FCPU_NUM_REGS;
                instr++;
            }
            if( fcpu_asm_is_hex_digit( *instr ) )
            {
                *r1 <<= 4;
                *r1 |= fcpu_asm_hex_digit_to_number( *instr );
                *r1 %= FCPU_NUM_REGS;
                instr++;
            }
        }
        else
            instr++;
    }
}

void fcpu_asm_2r( poctbyte o, octbyte r2, octbyte r1 )
{
    r2 %= FCPU_NUM_REGS;
    r1 %= FCPU_NUM_REGS;
    
    *o |= ( ( r2 & 0x3F ) << 20 );
    
    *o |= ( ( r1 & 0x3F ) << 26 );
}

void fcpu_asm_3r( poctbyte o, octbyte r3, octbyte r2, octbyte r1 )
{
    r3 %= FCPU_NUM_REGS;
    
    *o |= ( ( r3 & 0x3F ) << 14 );

    fcpu_asm_2r( o, r2, r1 );
}

void fcpu_asm_x_instr( poctbyte o, octbyte oc, octbyte flsize, octbyte flsimd, octbyte flsat, octbyte flcy, octbyte r3, octbyte r2, octbyte r1 )
{
    *o = 0;

    *o |= ( ( oc     & 0xFF ) <<  0 );

    *o |= ( ( flsize & 0x03 ) <<  8 );

    *o |= ( ( flsimd & 0x01 ) << 10 );
    
    *o |= ( ( flsat  & 0x01 ) << 12 );

    *o |= ( ( flcy   & 0x01 ) << 13 );

    fcpu_asm_3r( o, r3, r2, r1 );
}

void fcpu_asm_x_imm_instr( poctbyte o, octbyte oc, octbyte flsize, octbyte flsimd, octbyte imm, octbyte r2, octbyte r1 )
{
    *o = 0;

    *o |= ( ( oc     & 0xFF ) <<  0 );

    *o |= ( ( flsize & 0x03 ) <<  8 );

    *o |= ( ( flsimd & 0x01 ) << 10 );
    
    *o |= ( ( imm    & 0xFF ) << 12 );
    
    fcpu_asm_2r( o, r2, r1 );
}

void fcpu_asm( PFCPUCPU p, char* instr, poctbyte o, poctbyte a, onebyte flsave )
{
    octbyte r3;
    octbyte r2;
    octbyte r1;
    octbyte imm;
    octbyte size;
    onebyte u;

    // format:  adr opcode paramliste
    // example: 00000000 loadcons.0 FF34, r1

    *a = 0;
    while( ( *instr ) && ( fcpu_asm_is_hex_digit( *instr ) ) )
    {
        *a <<= 4;
        *a |= fcpu_asm_hex_digit_to_number( *instr );

        instr++;
    }

    while( ( *instr == ' ' ) || ( *instr == 8 ) )
        instr++;

    *o = 0;

    // HLT
    if( fcpu_asm_cmp( instr, "halt", &u ) )
    {
        *o = FCPU_OP_HALT;
    }

    // JMPA

    if( fcpu_asm_cmp( instr, "jmpa", &u ) )
    {
        // alias for move.nul.n r0,rx,ry
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_JMPA;
        *o |= ( 0x0 << 11 );
        *o |= BIT( 10 );
        fcpu_asm_3r( o, 0, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "jmpa_ifnul", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_JMPA;
        *o |= ( 0x0 << 11 );
        fcpu_asm_3r( o, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "jmpa_iflsb", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_JMPA;
        *o |= ( 0x3 << 11 );
        fcpu_asm_3r( o, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "jmpa_ifmsb", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_JMPA;
        *o |= ( 0x2 << 11 );
        fcpu_asm_3r( o, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "jmpa_ifnnul", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_JMPA;
        *o |= ( 0x0 << 11 );
        *o |= BIT( 10 );
        fcpu_asm_3r( o, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "jmpa_ifnlsb", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_JMPA;
        *o |= ( 0x3 << 11 );
        *o |= BIT( 10 );
        fcpu_asm_3r( o, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "jmpa_ifnmsb", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_JMPA;
        *o |= ( 0x2 << 11 );
        *o |= BIT( 10 );
        fcpu_asm_3r( o, r3, r2, r1 );
    }

    // LOADADDR
    
    if( fcpu_asm_cmp( instr, "loadaddr", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADADDR;
        fcpu_asm_2r( o, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "loadaddrd", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADADDR;
        *o |= BIT( 8 );
        fcpu_asm_2r( o, r2, r1 );
    }

    // LOOPENTRY
    
    if( fcpu_asm_cmp( instr, "loopentry", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADADDR;
        fcpu_asm_3r( o, 0, 0, r1 );
    }

    // LOADADDRI
    if( fcpu_asm_cmp( instr, "loadaddri", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADADDRI;
        fcpu_asm_2r( o, 0, r1 );
    }
    if( fcpu_asm_cmp( instr, "loadaddrid", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADADDRI;
        *o |= BIT(8);
        fcpu_asm_2r( o, 0, r1 );
    }
    if( fcpu_asm_cmp( instr, "loadaddris", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADADDRI;
        *o |= BIT(9);
        fcpu_asm_2r( o, 0, r1 );
    }
    if( fcpu_asm_cmp( instr, "loadaddrids", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADADDRI;
        *o |= BIT(8);
        *o |= BIT(9);
        fcpu_asm_2r( o, 0, r1 );
    }

    // LOOP
    
    if( fcpu_asm_cmp( instr, "loop", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOOP;
        fcpu_asm_3r( o, 0, 0, r1 );
    }

    // MOVE

    if( fcpu_asm_cmp( instr, "move", &u ) )
    {
        // alias for move.nul.n r0,rx,ry
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_MOVE;
        *o |= ( 0x0 << 11 );
        *o |= BIT( 10 );
        fcpu_asm_3r( o, 0, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "move_ifnul", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_MOVE;
        *o |= ( 0x0 << 11 );
        fcpu_asm_3r( o, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "move_iflsb", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_MOVE;
        *o |= ( 0x3 << 11 );
        fcpu_asm_3r( o, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "move_iflsb", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_MOVE;
        *o |= ( 0x2 << 11 );
        fcpu_asm_3r( o, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "move_ifnnul", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_MOVE;
        *o |= ( 0x0 << 11 );
        *o |= BIT( 10 );
        fcpu_asm_3r( o, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "move_ifnlsb", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_MOVE;
        *o |= ( 0x3 << 11 );
        *o |= BIT( 10 );
        fcpu_asm_3r( o, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "move_ifnmsb", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_MOVE;
        *o |= ( 0x2 << 11 );
        *o |= BIT( 10 );
        fcpu_asm_3r( o, r3, r2, r1 );
    }

    // LOADCONS
    
    if( fcpu_asm_cmp( instr, "loadcons.0", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADCONS;
        *o |= (   0 << 8  );
        *o |= ( imm << 10 );
        *o |= (  r1 << 26 );
    }
    if( fcpu_asm_cmp( instr, "loadcons.1", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADCONS;
        *o |= (   1 << 8  );
        *o |= ( imm << 10 );
        *o |= (  r1 << 26 );
    }
    if( fcpu_asm_cmp( instr, "loadcons.2", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADCONS;
        *o |= (   2 << 8  );
        *o |= ( imm << 10 );
        *o |= (  r1 << 26 );
    }
    if( fcpu_asm_cmp( instr, "loadcons.3", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADCONS;
        *o |= (   3 << 8  );
        *o |= ( imm << 10 );
        *o |= (  r1 << 26 );
    }

    // LOADCONSX
    
    if( fcpu_asm_cmp( instr, "loadconsx.0", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADCONSX;
        *o |= (   0 << 8  );
        *o |= ( imm << 10 );
        *o |= (  r1 << 26 );
    }
    if( fcpu_asm_cmp( instr, "loadconsx.1", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADCONSX;
        *o |= (   1 << 8  );
        *o |= ( imm << 10 );
        *o |= (  r1 << 26 );
    }
    if( fcpu_asm_cmp( instr, "loadconsx.2", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADCONSX;
        *o |= (   2 << 8  );
        *o |= ( imm << 10 );
        *o |= (  r1 << 26 );
    }
    if( fcpu_asm_cmp( instr, "loadconsx.3", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADCONSX;
        *o |= (   3 << 8  );
        *o |= ( imm << 10 );
        *o |= (  r1 << 26 );
    }

    // GET, GETI, PUT, PUTI

    if( fcpu_asm_cmp( instr, "get", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_GET;
        fcpu_asm_2r( o, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "geti", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_GETI;
        *o |= ( imm & 0xFFFF ) << 10L;
        *o |= (  r1 &   0x3F ) << 26L;
    }
    if( fcpu_asm_cmp( instr, "put", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_PUT;
        fcpu_asm_2r( o, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "puti", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_PUTI;
        *o |= ( imm & 0xFFFF ) << 10L;
        *o |= (  r1 &   0x3F ) << 26L;
    }

    // LOADM, STOREM
    if( fcpu_asm_cmp( instr, "loadm", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_LOADM;
        fcpu_asm_3r( o, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "storem", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_STOREM;
        fcpu_asm_3r( o, r3, r2, r1 );
    }

    // LOAD
    if( fcpu_asm_cmp( instr, "load", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOAD, size, 0L, 0L, 0L, 0L, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "loade", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOAD, size, 1L, 0L, 0L, 0L, r2, r1 );
    }

    // LOADI
    if( fcpu_asm_cmp( instr, "loadi", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_LOADI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "loadie", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_LOADI, size, 1L, imm, r2, r1 );
    }

    // STORE
    if( fcpu_asm_cmp( instr, "store", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_STORE, size, 0L, 0L, 0L, 0L, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "storee", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_STORE, size, 1L, 0L, 0L, 0L, r2, r1 );
    }

    // STOREI
    if( fcpu_asm_cmp( instr, "storei", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_STOREI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "storeie", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_STOREI, size, 1L, imm, r2, r1 );
    }

    // LOADF
    if( fcpu_asm_cmp( instr, "loadf", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOADF, size, 0L, 0L, 0L, 0L, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "loadfe", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOADF, size, 1L, 0L, 0L, 0L, r2, r1 );
    }

    // LOADIF
    if( fcpu_asm_cmp( instr, "loadif", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_LOADIF, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "loadife", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_LOADIF, size, 1L, imm, r2, r1 );
    }

    // STOREF
    if( fcpu_asm_cmp( instr, "storef", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_STOREF, size, 0L, 0L, 0L, 0L, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "storefe", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_STOREF, size, 1L, 0L, 0L, 0L, r2, r1 );
    }

    // STOREIF
    if( fcpu_asm_cmp( instr, "storeif", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_STOREIF, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "storeife", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_STOREIF, size, 1L, imm, r2, r1 );
    }

    // CACHEMM
    if( fcpu_asm_cmp( instr, "cachemm", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        *o = FCPU_OP_CACHEMM;
        fcpu_asm_2r( o, r2, r1 );
    }

    //
    // 6.1.1 Core Arithmetic Instructions
    //

    // ADD

    if( fcpu_asm_cmp( instr, "add", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_ADD, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "adds", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_ADD, size, 0L, 1L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "addc", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_ADD, size, 0L, 0L, 1L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sadd", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_ADD, size, 1L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sadds", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_ADD, size, 1L, 1L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "saddc", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_ADD, size, 1L, 0L, 1L, r3, r2, r1 );
    }

    // SUB BYTE

    if( fcpu_asm_cmp( instr, "sub", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_SUB, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "subs", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_SUB, size, 0L, 1L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "subc", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_SUB, size, 0L, 0L, 1L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "ssub", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_SUB, size, 1L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "ssubs", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_SUB, size, 1L, 1L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "ssubc", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_SUB, size, 1L, 0L, 1L, r3, r2, r1 );
    }

    // MUL

    if( fcpu_asm_cmp( instr, "mul", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_MUL, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "muls", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_MUL, size, 0L, 1L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "mulh", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_MUL, size, 0L, 0L, 1L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "smul", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_MUL, size, 1L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "smuls", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_MUL, size, 1L, 1L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "smulh", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_MUL, size, 1L, 0L, 1L, r3, r2, r1 );
    }

    // DIV

    if( fcpu_asm_cmp( instr, "div", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_DIV, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "divs", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_DIV, size, 0L, 1L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "divm", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_DIV, size, 0L, 0L, 1L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sdiv", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_DIV, size, 1L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sdivs", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_DIV, size, 1L, 1L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sdivm", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_DIV, size, 1L, 0L, 1L, r3, r2, r1 );
    }

    //
    // 6.1.2 Optional Arithmetic Instructions
    //

    // ADDI

    if( fcpu_asm_cmp( instr, "addi", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_ADDI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "saddi", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_ADDI, size, 1L, imm, r2, r1 );
    }

    // SUBI

    if( fcpu_asm_cmp( instr, "subi", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_SUBI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "ssubi", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_SUBI, size, 1L, imm, r2, r1 );
    }

    // MULI

    if( fcpu_asm_cmp( instr, "muli", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_MULI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "smuli", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_MULI, size, 1L, imm, r2, r1 );
    }

    // DIVI

    if( fcpu_asm_cmp( instr, "divi", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_DIVI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sdivi", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_DIVI, size, 1L, imm, r2, r1 );
    }

    // MOD

    if( fcpu_asm_cmp( instr, "mod", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_MOD, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "mods", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_MOD, size, 0L, 1L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "smod", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_MOD, size, 1L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "smods", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_MOD, size, 1L, 1L, 0L, r3, r2, r1 );
    }

    // MODI

    if( fcpu_asm_cmp( instr, "modi", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_MODI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "smodi", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_MODI, size, 1L, imm, r2, r1 );
    }

    // ADDSUB

    if( fcpu_asm_cmp( instr, "addsub", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_ADDSUB, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "saddsub", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_ADDSUB, size, 1L, 0L, 0L, r3, r2, r1 );
    }

    // INC

    if( fcpu_asm_cmp( instr, "inc", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_INC, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sinc", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_INC, size, 1L, 0L, 0L, r3, r2, r1 );
    }

    // DEC

    if( fcpu_asm_cmp( instr, "dec", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_DEC, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sdec", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_DEC, size, 1L, 0L, 0L, r3, r2, r1 );
    }

    // NEG

    if( fcpu_asm_cmp( instr, "neg", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_NEG, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sneg", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_NEG, size, 1L, 0L, 0L, r3, r2, r1 );
    }

    // CMPL

    if( fcpu_asm_cmp( instr, "cmpl", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_CMPL, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "scmpl", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_CMPL, size, 1L, 0L, 0L, r3, r2, r1 );
    }

    // CMPLE

    if( fcpu_asm_cmp( instr, "cmple", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_CMPLE, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "scmple", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_CMPLE, size, 1L, 0L, 0L, r3, r2, r1 );
    }

    // CMPLI

    if( fcpu_asm_cmp( instr, "cmpli", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_CMPLI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "scmpli", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_CMPLI, size, 1L, imm, r2, r1 );
    }

    // CMPLEI

    if( fcpu_asm_cmp( instr, "cmplei", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_CMPLEI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "scmplei", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_CMPLEI, size, 1L, imm, r2, r1 );
    }

    // ABS

    if( fcpu_asm_cmp( instr, "abs", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_ABS, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sabs", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_ABS, size, 1L, 0L, 0L, r3, r2, r1 );
    }

    // MAX

    if( fcpu_asm_cmp( instr, "max", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_MAX, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "smax", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_MAX, size, 1L, 0L, 0L, r3, r2, r1 );
    }

    // MIN

    if( fcpu_asm_cmp( instr, "min", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_MIN, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "smin", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_MIN, size, 1L, 0L, 0L, r3, r2, r1 );
    }

    // MAXI

    if( fcpu_asm_cmp( instr, "maxi", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_MAXI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "smaxi", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_MAXI, size, 1L, imm, r2, r1 );
    }

    // MINI

    if( fcpu_asm_cmp( instr, "mini", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_MINI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "smini", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_MINI, size, 1L, imm, r2, r1 );
    }

    // SORT

    if( fcpu_asm_cmp( instr, "sort", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_SORT, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "ssort", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_SORT, size, 1L, 0L, 0L, r3, r2, r1 );
    }

    // SHIFTL

    if( fcpu_asm_cmp( instr, "shiftl", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_SHIFTL, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sshiftl", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_SHIFTL, size, 1L, 0L, 0L, r3, r2, r1 );
    }

    // SHIFTR

    if( fcpu_asm_cmp( instr, "shiftr", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_SHIFTR, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sshiftr", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_SHIFTR, size, 1L, 0L, 0L, r3, r2, r1 );
    }

    // SHIFTRA

    if( fcpu_asm_cmp( instr, "shiftra", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_SHIFTRA, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sshiftra", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_SHIFTRA, size, 1L, 0L, 0L, r3, r2, r1 );
    }

    // ROTL

    if( fcpu_asm_cmp( instr, "rotl", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_ROTL, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "srotl", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_ROTL, size, 1L, 0L, 0L, r3, r2, r1 );
    }

    // ROTR

    if( fcpu_asm_cmp( instr, "rotr", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_ROTR, size, 0L, 0L, 0L, r3, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "srotr", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_ROTR, size, 1L, 0L, 0L, r3, r2, r1 );
    }

    // SHIFTLI

    if( fcpu_asm_cmp( instr, "shiftli", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_SHIFTLI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sshiftli", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_SHIFTLI, size, 1L, imm, r2, r1 );
    }

    // SHIFTRI

    if( fcpu_asm_cmp( instr, "shiftri", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_SHIFTRI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sshiftri", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_SHIFTRI, size, 1L, imm, r2, r1 );
    }

    // SHIFTRAI

    if( fcpu_asm_cmp( instr, "shiftrai", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_SHIFTRAI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "sshiftrai", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_SHIFTRAI, size, 1L, imm, r2, r1 );
    }

    // ROTLI

    if( fcpu_asm_cmp( instr, "rotli", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_ROTLI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "srotli", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_ROTLI, size, 1L, imm, r2, r1 );
    }

    // ROTRI

    if( fcpu_asm_cmp( instr, "rotri", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_ROTRI, size, 0L, imm, r2, r1 );
    }
    if( fcpu_asm_cmp( instr, "srotri", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_ROTRI, size, 1L, imm, r2, r1 );
    }

    // BSET

    if( fcpu_asm_cmp( instr, "bset", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_BITOP, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x00 >> 12 );
    }
    if( fcpu_asm_cmp( instr, "sbset", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_BITOP, size, 1L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x00 >> 12 );
    }

    // BCLR

    if( fcpu_asm_cmp( instr, "bclr", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_BITOP, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x01 >> 12 );
    }
    if( fcpu_asm_cmp( instr, "sbclr", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_BITOP, size, 1L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x01 >> 12 );
    }

    // BCHG

    if( fcpu_asm_cmp( instr, "bchg", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_BITOP, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x02 >> 12 );
    }
    if( fcpu_asm_cmp( instr, "sbchg", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_BITOP, size, 1L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x02 >> 12 );
    }

    // BTST

    if( fcpu_asm_cmp( instr, "btst", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_BITOP, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x03 >> 12 );
    }
    if( fcpu_asm_cmp( instr, "sbtst", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_BITOP, size, 1L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x03 >> 12 );
    }

    // BSETI

    if( fcpu_asm_cmp( instr, "bseti", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_BITOPI, size, 0L, imm, r2, r1 );
        *o |= ( 0x00 >> 12 );
    }
    if( fcpu_asm_cmp( instr, "sbseti", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_BITOPI, size, 1L, imm, r2, r1 );
        *o |= ( 0x00 >> 12 );
    }

    // BCLRI

    if( fcpu_asm_cmp( instr, "bclri", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_BITOPI, size, 0L, imm, r2, r1 );
        *o |= ( 0x01 >> 12 );
    }
    if( fcpu_asm_cmp( instr, "sbclri", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_BITOPI, size, 1L, imm, r2, r1 );
        *o |= ( 0x01 >> 12 );
    }

    // BCHGI

    if( fcpu_asm_cmp( instr, "bchgi", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_BITOPI, size, 0L, imm, r2, r1 );
        *o |= ( 0x02 >> 12 );
    }
    if( fcpu_asm_cmp( instr, "sbchgi", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_BITOPI, size, 1L, imm, r2, r1 );
        *o |= ( 0x02 >> 12 );
    }

    // BTSTI

    if( fcpu_asm_cmp( instr, "btsti", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_BITOPI, size, 0L, imm, r2, r1 );
        *o |= ( 0x03 >> 12 );
    }
    if( fcpu_asm_cmp( instr, "sbtsti", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_BITOPI, size, 1L, imm, r2, r1 );
        *o |= ( 0x03 >> 12 );
    }

    // SDUP

    if( fcpu_asm_cmp( instr, "sdup", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_SDUP, size, 0L, 0L, r2, r1 );
    }

    // LOGIC

    if( fcpu_asm_cmp( instr, "logic.0000", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x00 >> 10 );
    }
    if( ( fcpu_asm_cmp( instr, "logic.0001", &u ) ) ||
        ( fcpu_asm_cmp( instr, "and",        &u ) ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x01 >> 10 );
    }
    if( fcpu_asm_cmp( instr, "logic.0010", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x02 >> 10 );
    }
    if( fcpu_asm_cmp( instr, "logic.0011", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x03 >> 10 );
    }
    if( fcpu_asm_cmp( instr, "logic.0100", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x04 >> 10 );
    }
    if( fcpu_asm_cmp( instr, "logic.0101", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x05 >> 10 );
    }
    if( ( fcpu_asm_cmp( instr, "logic.0110", &u ) ) ||
        ( fcpu_asm_cmp( instr, "xor",        &u ) ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x06 >> 10 );
    }
    if( ( fcpu_asm_cmp( instr, "logic.0111", &u ) ) ||
        ( fcpu_asm_cmp( instr, "or",         &u ) ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x07 >> 10 );
    }
    if( ( fcpu_asm_cmp( instr, "logic.1000", &u ) ) ||
        ( fcpu_asm_cmp( instr, "nor",        &u ) ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x08 >> 10 );
    }
    if( fcpu_asm_cmp( instr, "logic.1001", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x09 >> 10 );
    }
    if( ( fcpu_asm_cmp( instr, "logic.1010", &u ) ) ||
        ( fcpu_asm_cmp( instr, "not",        &u ) ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x0A >> 10 );
    }
    if( fcpu_asm_cmp( instr, "logic.1011", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x0B >> 10 );
    }
    if( fcpu_asm_cmp( instr, "logic.1100", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x0C >> 10 );
    }
    if( fcpu_asm_cmp( instr, "logic.1101", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x0D >> 10 );
    }
    if( ( fcpu_asm_cmp( instr, "logic.1110", &u ) ) ||
        ( fcpu_asm_cmp( instr, "nand",         &u ) ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x0E >> 10 );
    }
    if( fcpu_asm_cmp( instr, "logic.1111", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_instr( o, FCPU_OP_LOGIC, size, 0L, 0L, 0L, r3, r2, r1 );
        *o |= ( 0x0F >> 10 );
    }

    // ORI
    if( fcpu_asm_cmp( instr, "ori", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_LOGICI, size, 0L, imm, r2, r1 );
        *o |= ( 0x00 >> 10 ); // different position as in bitopi
    }

    // ANDI
    if( fcpu_asm_cmp( instr, "andi", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_LOGICI, size, 0L, imm, r2, r1 );
        *o |= ( 0x01 >> 10 ); // different position as in bitopi
    }

    // XORI
    if( fcpu_asm_cmp( instr, "xori", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_LOGICI, size, 0L, imm, r2, r1 );
        *o |= ( 0x02 >> 10 ); // different position as in bitopi
    }

    // ANDNI
    if( fcpu_asm_cmp( instr, "andni", &u ) )
    {
        fcpu_asm_param( instr + u, &size, &r3, &r2, &r1, &imm );
        fcpu_asm_x_imm_instr( o, FCPU_OP_LOGICI, size, 0L, imm, r2, r1 );
        *o |= ( 0x03 >> 10 ); // different position as in bitopi
    }

    //

    if( flsave )
        fcpu_mem_put_quadbyte( p, FCPU_LITTLE_ENDIAN, *a, *o );
}
