#include "math.h"
#include "stdio.h"
#include "fcpudefs.h"

#define FCPU_SIMD_FLAG          BIT( 10 )
#define FCPU_BITREV_OR_FLAG     BIT( 10 )
#define FCPU_MULDIVSIGN_FLAG    BIT( 12 )
#define FCPU_SATURATE_FLAG      BIT( 12 )
#define FCPU_CYMODHI_FLAG       BIT( 13 )

//
//
//

void fcpu_cpu_internal_error( PFCPU p )
{
}

//
//
//
char fcpu_cpu_protectionfault_if_not_in_kernelmode( PFCPU p )
{
    if( p->sr_flags & FCPU_FLAGS_KERNELMODE )
        return 0;

    p->exception |= FCPU_EXCEPTION_PROTECTIONFAULT;

    return 1;
}

PTLBE fcpu_cpu_find_tlbe( PFCPU p, UL64 log_addr )
{
    UL08 i;

    for( i = 0; i < FCPU_NUM_TLB_ENTRIES; i++ )
    {        
        if( p->tlb[ i ].valid )
        {
            if( ( log_addr ^ p->tlb[ i ].log_addr ) >> p->tlb[ i ].page_size )
                return &( p->tlb[ i ] );
        }
    }

    return NULL;
}

void fcpu_cpu_check_tlb( PFCPU p, UL64 log_addr, UL64* pphy_addr, UL64 m )
{
    PTLBE ptlbe;
    UL64  mask;

    if( p->sr_flags & FCPU_FLAGS_KERNELMODE )
        *pphy_addr = log_addr;
    else
    {
        ptlbe = fcpu_cpu_find_tlbe( p, log_addr );
        if( !ptlbe )
        {
            p->exception |= FCPU_EXCEPTION_PAGEFAULT;
            return;
        }

        switch( m )
        {
            case FCPU_TLB_ACCESS_MODE_R:
                if( ( ptlbe->access_rights == FCPU_TLB_ACCESS_MODE_W ) ||
                    ( ptlbe->access_rights == FCPU_TLB_ACCESS_MODE_X ) )
                {
                    p->exception |= FCPU_EXCEPTION_PROTECTIONFAULT;
                    return;
                }
                break;

            case FCPU_TLB_ACCESS_MODE_W:
                if( ( ptlbe->access_rights == FCPU_TLB_ACCESS_MODE_R ) ||
                    ( ptlbe->access_rights == FCPU_TLB_ACCESS_MODE_X ) )
                {
                    p->exception |= FCPU_EXCEPTION_PROTECTIONFAULT;
                    return;
                }
                break;

            case FCPU_TLB_ACCESS_MODE_X:
                if( ptlbe->access_rights != FCPU_TLB_ACCESS_MODE_X )
                {
                    p->exception |= FCPU_EXCEPTION_PROTECTIONFAULT;
                    return;
                }
                break;

            default:
                // internal emulator error
                fcpu_cpu_internal_error( p );
                break;
        }

        ptlbe->hit_counter++;

        mask = ( (UL64)1 ) << ptlbe->page_size;

        *pphy_addr = ( ptlbe->phy_addr & mask ) | ( log_addr & ~mask );
    }
}

void fcpu_cpu_handle_taskswitch_tlb( PFCPU p, UL64 vimd )
{
    UL08 i;

    // until implementation of VMID machanism this function 
    // invalidates all page entries.

    for( i = 0; i < FCPU_NUM_TLB_ENTRIES; i++ )
        p->tlb[ i ].valid = 0;
}

//
//
//

UL64 fcpu_cpu_reg_get_octbyte_at( void* p, UL64 r, UL64 n )
{
    if( 0 == r )
        return 0;

    return ( (PFCPU)p )->reg[ r ];
}

UL64 fcpu_cpu_reg_get_quadbyte_at( void* p, UL64 r, UL64 n )
{
    if( 0 == r )
        return 0;

    return ( ( (PFCPU)p )->reg[ r ] >> ( 32 * n ) ) & 0x00000000FFFFFFFF;
}

UL64 fcpu_cpu_reg_get_doublebyte_at( void* p, UL64 r, UL64 n )
{
    if( 0 == r )
        return 0;

    return ( ( (PFCPU)p )->reg[ r ] >> ( 16 * n ) ) & 0x000000000000FFFF;
}

UL64 fcpu_cpu_reg_get_byte_at( void* p, UL64 r, UL64 n )
{
    if( 0 == r )
        return 0;

    return ( ( (PFCPU)p )->reg[ r ] >> (  8 * n ) ) & 0x00000000000000FF;
}

void fcpu_cpu_reg_put_octbyte_at( void* p, UL64 r, UL64 n, UL64 d )
{
    ( (PFCPU)p )->reg[ r ] = d;
}

void fcpu_cpu_reg_put_quadbyte_at( void* p, UL64 r, UL64 n, UL64 d )
{
    ( (PFCPU)p )->reg[ r ] &= ~( 0x00000000FFFFFFFF << ( 32 * n ) );
    ( (PFCPU)p )->reg[ r ] |= ( d & 0x00000000FFFFFFFF ) << ( 32 * n );
}

void fcpu_cpu_reg_put_doublebyte_at( void* p, UL64 r, UL64 n, UL64 d )
{
    ( (PFCPU)p )->reg[ r ] &= ~( 0x000000000000FFFF << ( 16 * n ) );
    ( (PFCPU)p )->reg[ r ] |= ( d & 0x000000000000FFFF ) << ( 16 * n );
}

void fcpu_cpu_reg_put_byte_at( void* p, UL64 r, UL64 n, UL64 d )
{
    ( (PFCPU)p )->reg[ r ] &= ~( 0x00000000000000FF << ( 8 * n ) );
    ( (PFCPU)p )->reg[ r ] |= ( d & 0x00000000000000FF ) << ( 8 * n );
}

UL64 fcpu_cpu_get_next_reg( UL64 r )
{
    return ( ( r + 1 ) & 0x3F );
}

//
//
//

char fcpu_cpu_check_exception( PFCPU p )
{
    if( p->exception )
        return TRUE;

    return FALSE;
}

//
//
//

UL64 fcpu_cpu_sign_extend_b( UL64 op )
{
    if( op & 0x80 )
        return ( op |= 0xFFFFFFFFFFFFFF00 );
    else
        return ( op &= 0x00000000000000FF );
}

UL64 fcpu_cpu_sign_extend_d( UL64 op )
{
    if( op & 0x8000 )
        return ( op |= 0xFFFFFFFFFFFF0000 );
    else
        return ( op &= 0x000000000000FFFF );
}

UL64 fcpu_cpu_sign_extend_q( UL64 op )
{
    if( op & 0x80000000 )
        return ( op |= 0xFFFFFFFF00000000 );
    else
        return ( op &= 0x00000000FFFFFFFF );
}

UL64 fcpu_cpu_sign_extend_o( UL64 op )
{
    return op;
}

//
// 6.1.1 Core Arithmetic operations
//

void fcpu_cpu_exec_add_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 op1;
    UL64 op0;

    op1 = ( op2 + op3 ) & p->mask[ p->sizeflg ];

    if( ( op1 < op2 ) || ( op1 < op3 ) )
    {
        if( p->instr_reg & FCPU_SATURATE_FLAG )
            op1 |= p->mask[ p->sizeflg ];

        op0 = 0x01;
    }
    else
        op0 = 0x00;

    *pop1 = op1;
    *pop0 = op0;
}

void fcpu_cpu_exec_sub_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 op1;
    UL64 op0;

    op1 = ( op2 + op3 ) & p->mask[ p->sizeflg ];

    if( op3 > op2 )
    {
        if( p->instr_reg & FCPU_SATURATE_FLAG )
            op1 &= ~p->mask[ p->sizeflg ];

        op0 = 0x01;
    }
    else
        op0 = 0x00;

    *pop1 = op1;
    *pop0 = op0;
}

void fcpu_cpu_exec_mul_b_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 op1;
    UL64 op0;

    if( p->instr_reg & FCPU_MULDIVSIGN_FLAG )
    {
        op1 = (UL64)( (I64)fcpu_cpu_sign_extend_b( op2 ) * (I64)fcpu_cpu_sign_extend_b( op3 ) );
        op0 = op1;
    }
    else
    {
        op1 = ( op2 & 0xFF ) * ( op3 & 0xFF );
        op0 = op1;
    }

    op1 = ( op1 >> 0 ) & 0xFF;
    op0 = ( op0 >> 8 ) & 0xFF;

    *pop1 = op1;
    *pop0 = op0;
}

void fcpu_cpu_exec_mul_d_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 op1;
    UL64 op0;

    if( p->instr_reg & FCPU_MULDIVSIGN_FLAG )
    {
        op1 = (UL64)( (I64)fcpu_cpu_sign_extend_d( op2 ) * (I64)fcpu_cpu_sign_extend_d( op3 ) );
        op0 = op1;
    }
    else
    {
        op1 = ( op2 & 0xFFFF ) * ( op3 & 0xFFFF );
        op0 = op1;
    }

    op1 = ( op1 >>  0 ) & 0xFFFF;
    op0 = ( op0 >> 16 ) & 0xFFFF;

    *pop1 = op1;
    *pop0 = op0;
}

void fcpu_cpu_exec_mul_q_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 op1;
    UL64 op0;

    if( p->instr_reg & FCPU_MULDIVSIGN_FLAG )
    {
        op1 = (UL64)( (I64)fcpu_cpu_sign_extend_q( op2 ) * (I64)fcpu_cpu_sign_extend_q( op3 ) );
        op0 = op1;
    }
    else
    {
        op1 = ( op2 & 0xFFFFFFFF ) * ( op3 & 0xFFFFFFFF );
        op0 = op1;
    }

    op1 = ( op1 >>  0 ) & 0xFFFFFFFF;
    op0 = ( op0 >> 32 ) & 0xFFFFFFFF;

    *pop1 = op1;
    *pop0 = op0;
}

void fcpu_cpu_exec_mul_o_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 s3;
    UL64 s2;
    UL64 op3l;
    UL64 op3h;
    UL64 op2l;
    UL64 op2h;
    UL64 op3l2l;
    UL64 op3h2l;
    UL64 op3l2h;
    UL64 op3h2h;
    UL64 op1;
    UL64 op0;

    if( p->instr_reg & FCPU_MULDIVSIGN_FLAG )
    {
        s3  = op3 & 0x8000000000000000;
        s2  = op2 & 0x8000000000000000;
        op3 = -( (I64)op3 ) & 0x7FFFFFFFFFFFFFFF;
        op2 = -( (I64)op2 ) & 0x7FFFFFFFFFFFFFFF;
    }

    op3l = op3 & 0x00000000FFFFFFFF;
    op3h = op3 & 0xFFFFFFFF00000000;
        
    op2l = op2 & 0x00000000FFFFFFFF;
    op2h = op2 & 0xFFFFFFFF00000000;

    op3l2l = op3l * op2l;
    op3h2l = op3h * op2h;
    op3l2h = op3l * op2h;
    op3h2h = op3h * op2h;
        
    if( p->instr_reg & FCPU_MULDIVSIGN_FLAG )
    {
    }
}

void fcpu_cpu_exec_div_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 op1;
    UL64 op0;

    if( 0 == op3 )
    {
        p->exception |= FCPU_EXCEPTION_DIVISION_BY_ZERO;
        return;
    }
    
    if( p->instr_reg & FCPU_MULDIVSIGN_FLAG )
    {
        op1 = (UL64)( (I64)p->psef[ p->sizeflg ]( op2 ) / (I64)p->psef[ p->sizeflg ]( op3 ) );
        op0 = (UL64)( (I64)p->psef[ p->sizeflg ]( op2 ) % (I64)p->psef[ p->sizeflg ]( op3 ) );
    }
    else
    {
        op1 = ( op2 & p->mask[ p->sizeflg ] ) / ( op3 & p->mask[ p->sizeflg ] );
        op0 = ( op2 & p->mask[ p->sizeflg ] ) % ( op3 &p->mask[ p->sizeflg ] );
    }

    *pop1 = op1;
    *pop0 = op0;
}

//
// 6.1.2 Optional Arithmetic operations
//

void fcpu_cpu_exec_addi_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = ( op2 + op3 ) & p->mask[ p->sizeflg ];
    *pop0 = 0;
}

void fcpu_cpu_exec_subi_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = ( op2 - op3 ) & p->mask[ p->sizeflg ];
    *pop0 = 0;
}

void fcpu_cpu_exec_muli_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = ( op2 * op3 ) & p->mask[ p->sizeflg ];
    *pop0 = 0;
}

void fcpu_cpu_exec_divi_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = ( op2 / op3 ) & p->mask[ p->sizeflg ];
    *pop0 = 0;
}

void fcpu_cpu_exec_mod_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 op1;

    if( 0 == op3 )
    {
        p->exception |= FCPU_EXCEPTION_DIVISION_BY_ZERO;
        return;
    }
    
    if( p->instr_reg & FCPU_MULDIVSIGN_FLAG )
    {
        op1 = (UL64)( (I64)p->psef[ p->sizeflg ]( op2 ) % (I64)p->psef[ p->sizeflg ]( op3 ) );
    }
    else
    {
        op1 = ( op2 & p->mask[ p->sizeflg ] ) % ( op3 & p->mask[ p->sizeflg ] );
    }

    *pop1 = op1;
    *pop0 = 0;
}

void fcpu_cpu_exec_mac_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 op1;

    op1 = *pop1;

    if( p->instr_reg & FCPU_MULDIVSIGN_FLAG )
    {
        op1 = op1 + (UL64)( (I64)p->psef[ p->sizeflg ]( op2 ) % (I64)p->psef[ p->sizeflg ]( op3 ) );
    }
    else
    {
        op1 = op1 + ( op2 & p->mask[ p->sizeflg ] ) % ( op3 & p->mask[ p->sizeflg ] );
    }

    *pop1 = op1;
    *pop0 = 0;
}

void fcpu_cpu_exec_modi_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = ( op2 % op3 ) & p->mask[ p->sizeflg ];
    *pop0 = 0;
}

void fcpu_cpu_exec_addsub_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = ( op2 + op3 ) & p->mask[ p->sizeflg ];
    *pop0 = ( op2 - op3 ) & p->mask[ p->sizeflg ];
}

void fcpu_cpu_exec_popcount_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 i;
    UL64 n;

    n = 0;
    for( i = 0; i < ( 8L * p->sb[ p->sizeflg ] ); i++ )
        if( op2 & ( 1 << i ) )
            n++;

    if( op3 > n )
        n = 0;
    else
        n = n - op3;

    *pop1 = n;
    *pop0 = 0;
}

//
// 6.1.3 Optional Increment-based operations
//

void fcpu_cpu_exec_inc_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = ( op2 + 1 ) & p->mask[ p->sizeflg ];
    *pop0 = 0;
}

void fcpu_cpu_exec_dec_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = ( op2 - 1 ) & p->mask[ p->sizeflg ];
    *pop0 = 0;
}

void fcpu_cpu_exec_neg_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = ( ~p->psef[ p->sizeflg ]( op2 ) + 1 ) & p->mask[ p->sizeflg ];
    *pop0 = 0;
}

void fcpu_cpu_exec_scan_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 i;
    UL64 n;
    UL64 b;

    n = 0;

    if( p->instr_reg & BIT( 12 ) )
        op2 = ~op2;

    if( p->instr_reg & BIT( 13 ) )
    {
        b = 1 << ( 8 * p->sb[ p->sizeflg ] - 1 );
        for( i = 0; i < ( 8L * p->sb[ p->sizeflg ] ); i++ )
        {
            if( op2 & ( b >> i ) )
            {
                n = ( 8 * p->sb[ p->sizeflg ] - 1 ) - i + 1;
                break;
            }
        }
    }
    else
    {
        for( i = 0; i < ( 8L * p->sb[ p->sizeflg ] ); i++ )
        {
            if( op2 & ( 1 << i ) )
            {
                n = i + 1;
                break;
            }
        }
    }

    *pop1 = (UL64)n;
    *pop0 = 0;
}

void fcpu_cpu_exec_cmpl_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    if( op2 < op3 )
        *pop1 = p->mask[ p->sizeflg ];
    else
        *pop1 = 0;

    *pop0 = 0;
}

void fcpu_cpu_exec_cmple_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    if( op2 < op3 )
        *pop1 = p->mask[ p->sizeflg ];
    else
        *pop1 = 0;

    *pop0 = 0;
}

void fcpu_cpu_exec_abs_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    if( op2 & p->msbmask[ p->sizeflg ] )
        *pop1 = (UL64)( -( (I64)p->psef[ p->sizeflg ]( op2 ) ) );
    else
        *pop1 = op2;

    *pop0 = 0;
}

void fcpu_cpu_exec_max_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    if( op3 > op2 )
        *pop1 = op3;
    else
        *pop1 = op2;

    *pop0 = 0;
}

void fcpu_cpu_exec_min_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    if( op3 < op2 )
        *pop1 = op3;
    else
        *pop1 = op2;

    *pop0 = 0;
}

void fcpu_cpu_exec_sort_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    if( op2 > op3 )
    {
        *pop1 = op3;
        *pop0 = op2;
    }
    else
    {
        *pop1 = op2;
        *pop0 = op3;
    }
}

//
// 6.2.1 Core Shift and Rotate operations
//

void fcpu_cpu_exec_shiftl_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = op2 << op3;
    *pop0 = 0;
}

void fcpu_cpu_exec_shiftr_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = op2 >> op3;
    *pop0 = 0;
}

void fcpu_cpu_exec_shiftra_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = ( (I64)op2 ) >> op3;
    *pop0 = 0;
}

void fcpu_cpu_exec_rotl_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 i;

    for( i = 0; i < ( op3 & ( 8L * p->sb[ p->sizeflg ] - 1 ) ); i++ )
    {
        if( op2 & p->msbmask[ p->sizeflg ] )
            op2 = ( op2 << 1 ) | 1;
        else
            op2 = ( op2 << 1 );
    }
}

void fcpu_cpu_exec_rotr_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 i;

    for( i = 0; i < ( op3 & ( 8L * p->sb[ p->sizeflg ] - 1 ) ); i++ )
    {
        if( op2 & 1 )
            op2 = ( op2 >> 1 ) | p->msbmask[ p->sizeflg ];
        else
            op2 = ( op2 >> 1 );
    }
}

//
// 6.2.2 Optional Bit Shift and Rotate operations
//

void fcpu_cpu_exec_bitop_bdqo_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    if( p->instr_reg & BIT( 13 ) )
    {
        if( p->instr_reg & BIT( 12 ) )
            *pop1 = op2 & ( 1 << op3 );  // 11 AND, BTST
        else
            *pop1 = op2 ^ ( 1 << op3 );  // 10 XOR, BCHG
    }
    else
    {
        if( p->instr_reg & BIT( 12 ) )
            *pop1 = op2 & ( ~( 1 << op3 ) );  // 01 ANDN, BCLR
        else
            *pop1 = op2 | ( 1 << op3 );  // 00 OR, BSET
    }

    *pop0 = 0;
}

//
// 6.2.3 Optional Bit Shuffling operations
//

void fcpu_cpu_exec_bitrev_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 i;
    UL64 b;
    UL64 op1;

    op1 = 0;
    b   = 1 << ( 8L * p->sb[ p->sizeflg ] - 1 );

    for( i = 0; i < ( 8L * p->sb[ p->sizeflg ] ); i++ )
    {
        if( op2 & ( 1 << i ) )
            op1 |= ( b >> i );
    }

    op1 >>= ( 8L * p->sb[ p->sizeflg ] - op3 );

    if( p->instr_reg & FCPU_BITREV_OR_FLAG )
        *pop1 |= op1;

    *pop1 = op1;
    *pop0 = 0;
}

void fcpu_cpu_exec_byterev_b_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = op2;
    *pop0 = 0;
}

void fcpu_cpu_exec_byterev_d_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 b[2];

    b[ 0 ] = ( op2 >> 0 ) & 0xFF;
    b[ 1 ] = ( op2 >> 8 ) & 0xFF;

    *pop1 = ( b[ 0 ] << 8 ) |
            ( b[ 1 ] << 0 );

    *pop0 = 0;
}

void fcpu_cpu_exec_byterev_q_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 b[4];

    b[ 0 ] = ( op2 >>  0 ) & 0xFF;
    b[ 1 ] = ( op2 >>  8 ) & 0xFF;
    b[ 2 ] = ( op2 >> 16 ) & 0xFF;
    b[ 3 ] = ( op2 >> 24 ) & 0xFF;

    *pop1 = ( b[ 0 ] << 24 ) |
            ( b[ 1 ] << 16 ) |
            ( b[ 2 ] <<  8 ) |
            ( b[ 3 ] <<  0 );

    *pop0 = 0;
}

void fcpu_cpu_exec_byterev_o_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 b[8];

    b[ 0 ] = ( op2 >>  0 ) & 0xFF;
    b[ 1 ] = ( op2 >>  8 ) & 0xFF;
    b[ 2 ] = ( op2 >> 16 ) & 0xFF;
    b[ 3 ] = ( op2 >> 24 ) & 0xFF;
    b[ 4 ] = ( op2 >> 32 ) & 0xFF;
    b[ 5 ] = ( op2 >> 40 ) & 0xFF;
    b[ 6 ] = ( op2 >> 48 ) & 0xFF;
    b[ 7 ] = ( op2 >> 56 ) & 0xFF;

    *pop1 = ( b[ 0 ] << 56 ) |
            ( b[ 1 ] << 48 ) |
            ( b[ 2 ] << 40 ) |
            ( b[ 3 ] << 32 ) |
            ( b[ 4 ] << 24 ) |
            ( b[ 5 ] << 16 ) |
            ( b[ 6 ] <<  8 ) |
            ( b[ 7 ] <<  0 );

    *pop0 = 0;
}

void fcpu_cpu_exec_mix_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64 op1;
    
    if( p->instr_reg & BIT( 12 ) )
    {
        // MIXH

        op1 = 0;

        switch( p->sizeflg )
        {
            case 0: // .b
                op1 |= ( ( ( op2 >> ( 32 +  0 ) ) & 0xFF ) <<  0 ) | ( ( ( op3 >> ( 32 +  0 ) ) & 0xFF ) << (  0 + 8 ) );
                op1 |= ( ( ( op2 >> ( 32 +  8 ) ) & 0xFF ) << 16 ) | ( ( ( op3 >> ( 32 +  8 ) ) & 0xFF ) << ( 16 + 8 ) );
                op1 |= ( ( ( op2 >> ( 32 + 16 ) ) & 0xFF ) << 32 ) | ( ( ( op3 >> ( 32 + 16 ) ) & 0xFF ) << ( 32 + 8 ) );
                op1 |= ( ( ( op2 >> ( 32 + 24 ) ) & 0xFF ) << 48 ) | ( ( ( op3 >> ( 32 + 24 ) ) & 0xFF ) << ( 48 + 8 ) );
                break;

            case 1: // .d
                op1 |= ( ( ( op2 >> ( 32 +  0 ) ) & 0xFFFF ) <<  0 ) | ( ( ( op3 >> ( 32 +  0 ) ) & 0xFFFF ) << (  0 + 16 ) );
                op1 |= ( ( ( op2 >> ( 32 + 16 ) ) & 0xFFFF ) << 32 ) | ( ( ( op3 >> ( 32 + 16 ) ) & 0xFFFF ) << ( 32 + 16 ) );
                break;

            case 2: // .q
                op1 |= ( ( ( op2 >> ( 32 +  0 ) ) & 0xFFFFFFFF ) <<  0 ) | ( ( ( op3 >> ( 32 +  0 ) ) & 0xFFFFFFFF ) << (  0 + 32 ) );
                break;

            case 3: // .o
                op1 = op2;
                break;
        }
    }
    else
    {
        // MIXL

        op1 = 0;

        switch( p->sizeflg )
        {
            case 0: // .b
                op1 |= ( ( ( op2 >> (  0 +  0 ) ) & 0xFF ) <<  0 ) | ( ( ( op3 >> (  0 +  0 ) ) & 0xFF ) << (  0 + 8 ) );
                op1 |= ( ( ( op2 >> (  0 +  8 ) ) & 0xFF ) << 16 ) | ( ( ( op3 >> (  0 +  8 ) ) & 0xFF ) << ( 16 + 8 ) );
                op1 |= ( ( ( op2 >> (  0 + 16 ) ) & 0xFF ) << 32 ) | ( ( ( op3 >> (  0 + 16 ) ) & 0xFF ) << ( 32 + 8 ) );
                op1 |= ( ( ( op2 >> (  0 + 24 ) ) & 0xFF ) << 48 ) | ( ( ( op3 >> (  0 + 24 ) ) & 0xFF ) << ( 48 + 8 ) );
                break;

            case 1: // .d
                op1 |= ( ( ( op2 >> (  0 +  0 ) ) & 0xFFFF ) <<  0 ) | ( ( ( op3 >> (  0 +  0 ) ) & 0xFFFF ) << (  0 + 16 ) );
                op1 |= ( ( ( op2 >> (  0 + 16 ) ) & 0xFFFF ) << 32 ) | ( ( ( op3 >> (  0 + 16 ) ) & 0xFFFF ) << ( 32 + 16 ) );
                break;

            case 2: // .q
                op1 |= ( ( ( op2 >> (  0 +  0 ) ) & 0xFFFFFFFF ) <<  0 ) | ( ( ( op3 >> (  0 +  0 ) ) & 0xFFFFFFFF ) << (  0 + 32 ) );
                break;

            case 3: // .o
                op1 = op2;
                break;
        }
    }

    *pop1 = op1;
    *pop0 = 0;
}

void fcpu_cpu_exec_expand_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
}

void fcpu_cpu_exec_sdup_b_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = ( ( op2 & 0xFF ) <<  0 ) |
            ( ( op2 & 0xFF ) <<  8 ) |
            ( ( op2 & 0xFF ) << 16 ) |
            ( ( op2 & 0xFF ) << 24 ) |
            ( ( op2 & 0xFF ) << 32 ) |
            ( ( op2 & 0xFF ) << 40 ) |
            ( ( op2 & 0xFF ) << 48 );

    *pop0 = 0;
}

void fcpu_cpu_exec_sdup_d_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = ( ( op2 & 0xFFFF ) <<  0 ) |
            ( ( op2 & 0xFFFF ) << 16 ) |
            ( ( op2 & 0xFFFF ) << 32 ) |
            ( ( op2 & 0xFFFF ) << 48 );

    *pop0 = 0;
}

void fcpu_cpu_exec_sdup_q_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = ( ( op2 & 0xFFFFFFFF ) <<  0 ) |
            ( ( op2 & 0xFFFFFFFF ) << 32 );

    *pop0 = 0;
}

void fcpu_cpu_exec_sdup_o_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    *pop1 = op2;
    *pop0 = 0;
}

//
// 6.3.1 Core Logic operations
//

void fcpu_cpu_exec_logic_instr( PFCPU p )
{
    UL64 op3;
    UL64 op2;
    UL64 op1;
    UL64 i;

    op3 = p->pigrf[ p->sizeflg ]( p, p->r3, 0 );
    op2 = p->pigrf[ p->sizeflg ]( p, p->r2, 0 );

    switch( ( p->instr_reg >> 10 ) & 0x0F )
    {
        case 1: // 0001 = AND
            op1 = op2 & op3;
            break;

        case 6: // 0110 = XOR
            op1 = op2 ^ op3;
            break;

        case 7: // 0111 = OR
            op1 = op2 | op3;
            break;

        case 8: // 1000 = NOR
            op1 = ~( op2 | op3 );
            break;

        case 14: // 1110 = NAND
            op1 = ~( op2 & op3 );
            break;

        default:

            op1 = 0;
            
            for( i = 0; i < ( 8L * p->iss[ p->sizeflg ] ); i++ )
            {
                if( op2 & ( (UL64)1 << i ) )
                {
                    if( op3 & ( (UL64)1 << i ) )
                    {
                        // f(1,1)
                        if( p->instr_reg & BIT( 13 ) )
                            op1 |= ( (UL64)1 << i );
                    }
                    else
                    {
                        // f(1,0)
                        if( p->instr_reg & BIT( 11 ) )
                            op1 |= ( (UL64)1 << i );
                    }
                }
                else
                {
                    if( op3 & ( (UL64)1 << i ) )
                    {
                        // f(0,1)
                        if( p->instr_reg & BIT( 12 ) )
                            op1 |= ( (UL64)1 << i );
                    }
                    else
                    {
                        // f(1,1)
                        if( p->instr_reg & BIT( 10 ) )
                            op1 |= ( (UL64)1 << i );
                    }
                }
            }
            break;
    }

    p->piprf[ p->sizeflg ]( p, p->r1, 0, op1 );
}

//
// 6.3.2 Optional Logic operations
//

void fcpu_cpu_exec_logici_instr( PFCPU p )
{
    UL64 op2;
    UL64 op1;

    op2 = p->pigrf[ p->sizeflg ]( p, p->r2, 0 );

    switch( ( p->instr_reg >> 10 ) & 0x03 )
    {
        case 0:
            op1 = op2 | p->imm8;
            break;

        case 1:
            op1 = op2 & p->imm8;
            break;

        case 2:
            op1 = op2 ^ p->imm8;
            break;

        case 3:
            op1 = op2 & ( ~p->imm8 );
            break;
    }

    p->piprf[ p->sizeflg ]( p, p->r1, 0, op1 );
}

//
// 6.4.1 Level 1 Floating Point operations
//

void fcpu_cpu_exec_fadd_single_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    float   fop3;
    float   fop2;
    float   fop1;

    *pop0 = 0;

    fop3 = *( (float*)&op3 );
    fop2 = *( (float*)&op2 );

    fop1 = fop2 + fop3;

    *pop1 = (UL64)( *( (UL32*)&fop1 ) );
}

void fcpu_cpu_exec_fadd_double_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    double  fop3;
    double  fop2;
    double  fop1;

    *pop0 = 0;
    *pop1 = 0;

    fop3 = *( (double*)&op3 );
    fop2 = *( (double*)&op2 );

    fop1 = fop2 + fop3;

    *pop1 = (UL64)( *( (UL64*)&fop1 ) );
}

void fcpu_cpu_exec_fsub_single_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    float   fop3;
    float   fop2;
    float   fop1;

    *pop0 = 0;

    fop3 = *( (float*)&op3 );
    fop2 = *( (float*)&op2 );

    fop1 = fop2 - fop3;

    *pop1 = (UL64)( *( (UL32*)&fop1 ) );
}

void fcpu_cpu_exec_fsub_double_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    double  fop3;
    double  fop2;
    double  fop1;

    *pop0 = 0;
    *pop1 = 0;

    fop3 = *( (double*)&op3 );
    fop2 = *( (double*)&op2 );

    fop1 = fop2 - fop3;

    *pop1 = (UL64)( *( (UL64*)&fop1 ) );
}

void fcpu_cpu_exec_fmul_single_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    float   fop3;
    float   fop2;
    float   fop1;

    *pop0 = 0;

    fop3 = *( (float*)&op3 );
    fop2 = *( (float*)&op2 );

    fop1 = fop2 * fop3;

    *pop1 = (UL64)( *( (UL32*)&fop1 ) );
}

void fcpu_cpu_exec_fmul_double_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    double  fop3;
    double  fop2;
    double  fop1;

    *pop0 = 0;
    *pop1 = 0;

    fop3 = *( (double*)&op3 );
    fop2 = *( (double*)&op2 );

    fop1 = fop2 * fop3;

    *pop1 = (UL64)( *( (UL64*)&fop1 ) );
}

UL64 fcpu_cpu_exec_f2int_round( double f, UL64 m )
{
    switch( m )
    {
        case 1: // towards 0
            if( f > 0 )
                return (UL64)floor( f );
            else
                return (UL64)ceil( f );
            break;

        case 2: // towards -infinity
            return (UL64)floor( f );

        case 3: // towards +infinity
            return (UL64)ceil( f );
    }

    // nearest
    if( f > 0 )
    {
        if( floor( f ) == floor( f + 0.5 ) )
            return (UL64)floor( f );
        else
            return (UL64)ceil( f );
    }
    else
    {
        if( ceil( f ) == ceil( f - 0.5 ) )
            return (UL64)ceil( f );
        else
            return (UL64)floor( f );
    }
}

void fcpu_cpu_exec_f2int_single_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    float fop2;

    fop2 = *( (float*)&op2 );

    *pop1 = fcpu_cpu_exec_f2int_round( fop2, ( p->instr_reg >> 12 ) & 3 );
    *pop0 = 0;
}

void fcpu_cpu_exec_f2int_double_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    double fop2;

    fop2 = *( (double*)&op2 );

    *pop1 = fcpu_cpu_exec_f2int_round( fop2, ( p->instr_reg >> 12 ) & 3 );
    *pop0 = 0;
}

void fcpu_cpu_exec_int2f_single_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    float   fop1;

    fop1 = (float)(I32)op2;

    *pop1 = (UL64)( *( (UL32*)&fop1 ) );
    *pop0 = 0;
}

void fcpu_cpu_exec_int2f_double_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    double  fop1;

    fop1 = (double)(I64)op2;

    *pop1 = (UL64)( *( (UL64*)&fop1 ) );
    *pop0 = 0;
}

//
// 6.4.2 Level 2 Floating Point operations
//

void fcpu_cpu_exec_fdiv_single_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    float   fop3;
    float   fop2;
    float   fop1;

    *pop0 = 0;

    fop3 = *( (float*)&op3 );
    fop2 = *( (float*)&op2 );

    fop1 = fop2 / fop3;

    *pop1 = (UL64)( *( (UL32*)&fop1 ) );
}

void fcpu_cpu_exec_fdiv_double_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    double  fop3;
    double  fop2;
    double  fop1;

    *pop0 = 0;
    *pop1 = 0;

    fop3 = *( (double*)&op3 );
    fop2 = *( (double*)&op2 );

    fop1 = fop2 / fop3;

    *pop1 = (UL64)( *( (UL64*)&fop1 ) );
}

//
// 6.4.3 Level 3 Floating Point operations
//

void fcpu_cpu_exec_fsqrt_single_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    float   fop3;
    float   fop2;
    float   fop1;

    *pop0 = 0;

    fop3 = *( (float*)&op3 );
    fop2 = *( (float*)&op2 );

    fop1 = (float)sqrt( fop2 );

    *pop1 = (UL64)( *( (UL32*)&fop1 ) );
}

void fcpu_cpu_exec_fsqrt_double_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    double  fop3;
    double  fop2;
    double  fop1;

    *pop0 = 0;
    *pop1 = 0;

    fop3 = *( (double*)&op3 );
    fop2 = *( (double*)&op2 );

    fop1 = sqrt( fop2 );

    *pop1 = (UL64)( *( (UL64*)&fop1 ) );
}

void fcpu_cpu_exec_fmac_single_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64    op1;
    float   fop3;
    float   fop2;
    float   fop1;

    op1 = *pop1;

    fop3 = *( (float*)&op3 );
    fop2 = *( (float*)&op2 );
    fop1 = *( (float*)&op1 );

    fop1 = fop1 + fop2 * fop3;

    *pop1 = (UL64)( *( (UL32*)&fop1 ) );
    *pop0 = 0;
}

void fcpu_cpu_exec_fmac_double_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    UL64    op1;
    double  fop3;
    double  fop2;
    double  fop1;

    op1 = *pop1;

    fop3 = *( (double*)&op3 );
    fop2 = *( (double*)&op2 );
    fop1 = *( (double*)&op1 );

    fop1 = fop1 + fop2 * fop3;

    *pop1 = (UL64)( *( (UL64*)&fop1 ) );
    *pop0 = 0;
}

void fcpu_cpu_exec_faddsub_single_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    float   fop3;
    float   fop2;
    float   fop1;
    float   fop0;

    fop3 = *( (float*)&op3 );
    fop2 = *( (float*)&op2 );

    fop1 = fop2 + fop3;
    fop0 = fop2 - fop3;

    *pop1 = (UL64)( *( (UL32*)&fop1 ) );
    *pop0 = (UL64)( *( (UL32*)&fop2 ) );
}

void fcpu_cpu_exec_faddsub_double_instr( PFCPU p, UL64 op3, UL64 op2, UL64* pop1, UL64* pop0 )
{
    double  fop3;
    double  fop2;
    double  fop1;
    double  fop0;

    fop3 = *( (double*)&op3 );
    fop2 = *( (double*)&op2 );

    fop1 = fop2 + fop3;
    fop0 = fop2 - fop3;

    *pop1 = (UL64)( *( (UL64*)&fop1 ) );
    *pop0 = (UL64)( *( (UL64*)&fop2 ) );
}

//
// 6.5.1 Core Memory Access operations
//
//
// 6.5.2 Optional Memory Access operations
//

void fcpu_cpu_exec_loadi_instr( PFCPU p )
{
    UL64 op2;
    UL64 op1;
    UL64 ap;

    op2 = p->pigrf[ p->sizeflg ]( p, p->r2, 0 );
    if( op2 & 3 )
    {
        p->exception |= FCPU_EXCEPTION_ALIGNMENTFAULT;
        return;
    }

    fcpu_cpu_check_tlb( p, op2, &ap, FCPU_TLB_ACCESS_MODE_R );
    if( fcpu_cpu_check_exception( p ) )
        return;

    op2 = op2 + p->imm16;

    p->piprf[ p->sizeflg ]( p, p->r2, 0, op2 );

    if( p->instr_reg & BIT( 10 ) )
        op1 = fcpu_mem_get_n_bytes_be( ap, p->sb[ p->sizeflg ] );
    else
        op1 = fcpu_mem_get_n_bytes_le( ap, p->sb[ p->sizeflg ] );

    p->piprf[ p->sizeflg ]( p, p->r1, 0, op1 );
}

void fcpu_cpu_exec_storei_instr( PFCPU p )
{
    UL64 op2;
    UL64 op1;
    UL64 ap;

    op2 = p->pigrf[ p->sizeflg ]( p, p->r2, 0 );
    if( op2 & 3 )
    {
        p->exception |= FCPU_EXCEPTION_ALIGNMENTFAULT;
        return;
    }

    fcpu_cpu_check_tlb( p, op2, &ap, FCPU_TLB_ACCESS_MODE_R );
    if( fcpu_cpu_check_exception( p ) )
        return;
    
    op2 = op2 + p->imm16;

    p->piprf[ p->sizeflg ]( p, p->r2, 0, op2 );

    op1 = p->pigrf[ p->sizeflg ]( p, p->r1, 0 );

    if( p->instr_reg & BIT( 10 ) )
        fcpu_mem_put_n_bytes_be( ap, p->sb[ p->sizeflg ], op1 );
    else
        fcpu_mem_put_n_bytes_le( ap, p->sb[ p->sizeflg ], op1 );
}

void fcpu_cpu_exec_load_instr( PFCPU p )
{
    p->imm16 = fcpu_cpu_reg_get_octbyte_at( p, p->r3, 0 );
    fcpu_cpu_exec_loadi_instr( p );
}

void fcpu_cpu_exec_store_instr( PFCPU p )
{
    p->imm16 = fcpu_cpu_reg_get_octbyte_at( p, p->r3, 0 );
    fcpu_cpu_exec_storei_instr( p );
}

void fcpu_cpu_exec_loadm_instr( PFCPU p )
{
    UL64 op3;
    UL64 op2;
    UL64 op1;
    UL64 ap;
    UL64 i;

    op2 = fcpu_cpu_reg_get_octbyte_at( p, p->r2, 0 );
    if( op2 & 3 )
    {
        p->exception |= FCPU_EXCEPTION_ALIGNMENTFAULT;
        return;
    }
    
    op3 = fcpu_cpu_reg_get_octbyte_at( p, p->r3, 0 );

    op1 = fcpu_cpu_reg_get_octbyte_at( p, p->r1, 0 );

    for( i = 0; i < op1; i++ )
    {
        fcpu_cpu_check_tlb( p, op2 + 8L * i, &ap, FCPU_TLB_ACCESS_MODE_R );
        if( fcpu_cpu_check_exception( p ) )
            return;
    
        fcpu_cpu_reg_put_octbyte_at( p,
                                     ( i + op3 ) & 0x3F,
                                     8,
                                     fcpu_mem_get_n_bytes_le( ap, 8 ) );
    }
}

void fcpu_cpu_exec_storem_instr( PFCPU p )
{
    UL64 op3;
    UL64 op2;
    UL64 op1;
    UL64 ap;
    UL64 i;

    op2 = fcpu_cpu_reg_get_octbyte_at( p, p->r2, 0 );
    if( op2 & 3 )
    {
        p->exception |= FCPU_EXCEPTION_ALIGNMENTFAULT;
        return;
    }
    
    op3 = fcpu_cpu_reg_get_octbyte_at( p, p->r3, 0 );

    op1 = fcpu_cpu_reg_get_octbyte_at( p, p->r1, 0 );

    for( i = 0; i < op1; i++ )
    {
        fcpu_cpu_check_tlb( p, op2 + 8L * i, &ap, FCPU_TLB_ACCESS_MODE_W );
        if( fcpu_cpu_check_exception( p ) )
            return;
    
        fcpu_mem_put_n_bytes_le( ap,
                                 8,
                                 fcpu_cpu_reg_get_octbyte_at( p, ( i + op3 ) & 0x3F, 0 ) );
    }
}

void fcpu_cpu_exec_cachemm_instr( PFCPU p )
{
    // emulator (!) - no operation
}

//
// 6.6.1 Core Data Move operations
//
//
// 6.6.2 Optional Data Move operations
//

char fcpu_cpu_compute_move_jmpa_cond( PFCPU p, UL64 op )
{
    char c;

    c = 0;

    switch( ( p->instr_reg >> 11 ) & 0x03 )
    {
        case 0:
            if( 0 == ( op & p->mask[ p->sizeflg ] ) )
                c = 1;
            break;

        case 2:
            if( op & p->msbmask[ p->sizeflg ] )
                c = 1;
            break;

        case 3:
            if( op & 1 )
                c = 1;
            break;
    }

    if( p->instr_reg & BIT( 10 ) )
    {
        if( c )
            c = 0;
        else
            c = 1;
    }
    
    return c;
}

void fcpu_cpu_exec_move_instr( PFCPU p )
{
    UL64 op3;
    UL64 op2;

    op3 = p->pigrf[ p->sizeflg ]( p, p->r3, 0 );

    op2 = p->pigrf[ p->sizeflg ]( p, p->r2, 0 );

    if( fcpu_cpu_compute_move_jmpa_cond( p, op3 ) )
        p->piprf[ p->sizeflg ]( p, p->r1, 0, op2 );
}

void fcpu_cpu_exec_loadcons_instr( PFCPU p )
{
    UL64 op1;
    UL64 n;

    n = 16 * p->sizeflg;

    op1 = fcpu_cpu_reg_get_octbyte_at( p, p->r1, 0 );

    op1 &= ~( 0x000000000000FFFF << n );

    op1 |= p->imm16 << n;
    
    fcpu_cpu_reg_put_octbyte_at( p, p->r1, 0, op1 );
}

void fcpu_cpu_exec_loadconsx_instr( PFCPU p )
{
    UL64 op1;
    UL64 n;

    n = 16 * p->sizeflg;

    op1 = fcpu_cpu_reg_get_octbyte_at( p, p->r1, 0 );

    op1 &= ~( 0x000000000000FFFF << n );

    op1 |= p->imm16 << n;

    if( p->imm16 & 0x0000000000008000 )
        op1 |= ( 0xFFFFFFFFFFFF0000 << n );
    else
        op1 &= ~( 0xFFFFFFFFFFFF0000 << n );
    
    fcpu_cpu_reg_put_octbyte_at( p, p->r1, 0, op1 );
}

void fcpu_cpu_exec_geti_instr( PFCPU p )
{
    UL64    op2;
    UL64    op1;
    UL64    i;
    UL64    o;
    UL64*   ptlbe;

    op2 = fcpu_cpu_reg_get_octbyte_at( p, p->r2, 0 );

    if( ( op2 >= FCPU_SR_TLB_BASE ) &&
        ( op2 <= FCPU_SR_TLB_BASE + FCPU_TLB_ENTRY_SIZE * FCPU_NUM_TLB_ENTRIES ) )
    {        
        if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
            return;

        i = ( op2 - FCPU_SR_TLB_BASE ) / FCPU_TLB_ENTRY_SIZE;
        o = ( op2 - FCPU_SR_TLB_BASE ) % FCPU_TLB_ENTRY_SIZE;

        ptlbe = (UL64*)&( p->tlb[ i ] );

        if( o <= 4 )
            op1 = *( ptlbe + o );
        else
            op1 = 0;
    }
    else
    {  
        switch( op2 )
        {
            case FCPU_SR_NUMBERS:
                op1 = FCPU_SR_NUMBERS_VAL;
                break;

            case FCPU_SR_FAMILY:
                op1 = FCPU_SR_FAMILY_VAL;
                break;

            case FCPU_SR_STEPPING:
                op1 = FCPU_SR_STEPPING_VAL;
                break;
    
            case FCPU_SR_MAX_SIZE:
                op1 = FCPU_SR_MAX_SIZE_VAL;
                break;

            case FCPU_SR_SIZE_1:
                op1 = FCPU_SR_SIZE_1_VAL;
                break;

            case FCPU_SR_SIZE_2:
                op1 = FCPU_SR_SIZE_2_VAL;
                break;

            case FCPU_SR_SIZE_3:
                op1 = FCPU_SR_SIZE_3_VAL;
                break;

            case FCPU_SR_SIZE_4:
                op1 = FCPU_SR_SIZE_4_VAL;
                break;

            case FCPU_SR_MAX_CHUNK_SIZE:
                op1 = FCPU_SR_MAX_CHUNK_SIZE_VAL;
                break;

            case FCPU_SR_CYCLE:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_cycle;
                break;

            case FCPU_SR_PAGING:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_paging;
                break;

            case FCPU_SR_IRQ_BASE:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_irq_base;
                break;

            case FCPU_SR_IRQ_SIZE:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_irq_size;
                break;
    
            case FCPU_SR_TRAP_BASE:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_trap_base;
                break;

            case FCPU_SR_TRAP_SIZE:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_trap_size;
                break;

            case FCPU_SR_SYSCALL_BASE:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_syscall_base;
                break;

            case FCPU_SR_SYSCALL_SIZE:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_syscall_size;
                break;
        
            case FCPU_SR_TLBMISS_BASE:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_tlb_miss_base;
                break;

            case FCPU_SR_TIME_SLICE_COUNTER:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_time_slice_counter;
                break;

            case FCPU_SR_TIME_SLICE_LIMIT:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_time_slice_limit;
                break;

            case FCPU_SR_CMB_BASE:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_cmb_base;
                break;

            case FCPU_SR_EVENT_COUNTER_0_CFG:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_0_cfg;
                break;

            case FCPU_SR_EVENT_COUNTER_0:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_0;
                break;

            case FCPU_SR_EVENT_COUNTER_1_CFG:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_1_cfg;
                break;

            case FCPU_SR_EVENT_COUNTER_1:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_1;
                break;

            case FCPU_SR_EVENT_COUNTER_2_CFG:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_2_cfg;
                break;

            case FCPU_SR_EVENT_COUNTER_2:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_2;
                break;

            case FCPU_SR_EVENT_COUNTER_3_CFG:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_3_cfg;
                break;

            case FCPU_SR_EVENT_COUNTER_3:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_3;
                break;

            case FCPU_SR_EVENT_COUNTER_4_CFG:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_4_cfg;
                break;

            case FCPU_SR_EVENT_COUNTER_4:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_4;
                break;

            case FCPU_SR_EVENT_COUNTER_5_CFG:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_5_cfg;
                break;

            case FCPU_SR_EVENT_COUNTER_5:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_5;
                break;

            case FCPU_SR_EVENT_COUNTER_6_CFG:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_6_cfg;
                break;

            case FCPU_SR_EVENT_COUNTER_6:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_6;
                break;

            case FCPU_SR_EVENT_COUNTER_7_CFG:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_7_cfg;
                break;

            case FCPU_SR_EVENT_COUNTER_7:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_event_counter_7;
                break;

            case FCPU_SR_FLAGS:
                if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
                    return;
                op1 = p->sr_flags;
                break;

            default:
                p->exception |= FCPU_EXCEPTION_PROTECTIONFAULT;
                return;
        }
    }
    
    fcpu_cpu_reg_put_octbyte_at( p, p->r1, 0, op1 );
}

void fcpu_cpu_exec_get_instr( PFCPU p )
{
    p->imm16 = fcpu_cpu_reg_get_doublebyte_at( p, p->r3, 0 );
    fcpu_cpu_exec_geti_instr( p );
}

void fcpu_cpu_exec_puti_instr( PFCPU p )
{
    UL64    op2;
    UL64    op1;
    UL64    i;
    UL64    o;
    UL64*   ptlbe;

    if( fcpu_cpu_protectionfault_if_not_in_kernelmode( p ) )
        return;

    op2 = fcpu_cpu_reg_get_octbyte_at( p, p->r2, 0 );
    op1 = fcpu_cpu_reg_get_octbyte_at( p, p->r1, 0 );

    if( ( op2 >= FCPU_SR_TLB_BASE ) &&
        ( op2 <= FCPU_SR_TLB_BASE + FCPU_TLB_ENTRY_SIZE * FCPU_NUM_TLB_ENTRIES ) )
    {        
        i = ( op2 - FCPU_SR_TLB_BASE ) / FCPU_TLB_ENTRY_SIZE;
        o = ( op2 - FCPU_SR_TLB_BASE ) % FCPU_TLB_ENTRY_SIZE;

        ptlbe = (UL64*)&( p->tlb[ i ] );

        if( o <= 4 )
            *( ptlbe + o ) = op1;
    }
    else
    {  
        switch( op2 )
        {
            case FCPU_SR_NUMBERS:
                break;

            case FCPU_SR_FAMILY:
                break;

            case FCPU_SR_STEPPING:
                break;
    
            case FCPU_SR_MAX_SIZE:
                break;

            case FCPU_SR_SIZE_1:
                break;

            case FCPU_SR_SIZE_2:
                break;

            case FCPU_SR_SIZE_3:
                break;

            case FCPU_SR_SIZE_4:
                break;

            case FCPU_SR_MAX_CHUNK_SIZE:
                break;

            case FCPU_SR_CYCLE:
                p->sr_cycle = op1;
                break;

            case FCPU_SR_PAGING:
                p->sr_paging = op1;
                break;

            case FCPU_SR_IRQ_BASE:
                p->sr_irq_base = op1;
                break;

            case FCPU_SR_IRQ_SIZE:
                p->sr_irq_size = op1;
                break;
    
            case FCPU_SR_TRAP_BASE:
                p->sr_trap_base = op1;
                break;

            case FCPU_SR_TRAP_SIZE:
                p->sr_trap_size = op1;
                break;

            case FCPU_SR_SYSCALL_BASE:
                p->sr_syscall_base = op1;
                break;

            case FCPU_SR_SYSCALL_SIZE:
                p->sr_syscall_size = op1;
                break;
        
            case FCPU_SR_TLBMISS_BASE:
                p->sr_tlb_miss_base = op1;
                break;

            case FCPU_SR_TIME_SLICE_COUNTER:
                p->sr_time_slice_counter = op1;
                break;

            case FCPU_SR_TIME_SLICE_LIMIT:
                p->sr_time_slice_limit = op1;
                break;

            case FCPU_SR_CMB_BASE:
                p->sr_cmb_base = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_0_CFG:
                p->sr_event_counter_0_cfg = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_0:
                p->sr_event_counter_0 = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_1_CFG:
                p->sr_event_counter_1_cfg = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_1:
                p->sr_event_counter_1 = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_2_CFG:
                p->sr_event_counter_2_cfg = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_2:
                p->sr_event_counter_2 = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_3_CFG:
                p->sr_event_counter_3_cfg = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_3:
                p->sr_event_counter_3 = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_4_CFG:
                p->sr_event_counter_4_cfg = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_4:
                p->sr_event_counter_4 = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_5_CFG:
                p->sr_event_counter_5_cfg = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_5:
                p->sr_event_counter_5 = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_6_CFG:
                p->sr_event_counter_6_cfg = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_6:
                p->sr_event_counter_6 = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_7_CFG:
                p->sr_event_counter_7_cfg = op1;
                break;

            case FCPU_SR_EVENT_COUNTER_7:
                p->sr_event_counter_7 = op1;
                break;

            case FCPU_SR_FLAGS:
                p->sr_flags = op1;
                break;

            default:
                p->exception |= FCPU_EXCEPTION_PROTECTIONFAULT;
                break;
        }
    }
}

void fcpu_cpu_exec_put_instr( PFCPU p )
{
    p->imm16 = fcpu_cpu_reg_get_doublebyte_at( p, p->r3, 0 );
    fcpu_cpu_exec_puti_instr( p );
}

//
// 6.7.1 Core Instruction Flow Control instructions
//

char fcpu_cpu_check_handle_native_syscall( PFCPU p )
{
    char buffer[ 2 ];
    char c;
    int  ich;

    c = 0;

    switch( p->imm16 )
    {
        case 0xFFFF:
        {
            buffer[ 0 ] = (char)fcpu_cpu_reg_get_byte_at( p, 63, 0 );
            buffer[ 1 ] = 0;
            printf( &buffer[ 0 ] );
            c = 1;
        }; break;

        case 0xFFFE:
        {
            ich = getchar();
            if( ich == EOF )
                ich = 0;
            else if( ich > 255 )
                ich = 32;

            fcpu_cpu_reg_put_byte_at( p, 63, 0, (UL64)( ich & 0xFF ) );
        }; break;
    }

    return c;
}

void fcpu_cpu_save_cpu_into_cmb( PFCPU p, UL64 ap )
{
    UL64 i;

    fcpu_mem_put_n_bytes_le( ap +  0, 8, p->sr_flags  ); // MSR
    fcpu_mem_put_n_bytes_le( ap +  8, 8, p->instr_ptr ); // PC

    for( i = 1; i < 64; i++ )
        fcpu_mem_put_n_bytes_le( ap + 16 + 8 * ( i - 1 ), 8, p->reg[ i ] );

    fcpu_mem_put_n_bytes_le( ap + 520 +  0, 8, p->sr_time_slice_limit   );
    fcpu_mem_put_n_bytes_le( ap + 520 +  8, 8, p->sr_time_slice_counter );
    
    fcpu_mem_put_n_bytes_le( ap + 520 + 16, 8, 0 ); // NEXT_CMB

    fcpu_mem_put_n_bytes_le( ap + 520 + 24, 8, p->sr_syscall_base );
    fcpu_mem_put_n_bytes_le( ap + 520 + 32, 8, p->sr_syscall_size );
}

void fcpu_cpu_load_cpu_from_cmb( PFCPU p, UL64 ap )
{
    UL64 i;

    p->sr_flags  = fcpu_mem_get_n_bytes_le( ap +  0, 8 );
    p->instr_ptr = fcpu_mem_get_n_bytes_le( ap +  8, 8 );

    for( i = 0; i < 64; i++ )
        p->reg[ i ] = fcpu_mem_get_n_bytes_le( ap + 16 + 8 * ( i - 1 ), 8 );

    p->sr_time_slice_limit      = fcpu_mem_get_n_bytes_le( ap + 520 +  0, 8 );
    p->sr_time_slice_counter    = fcpu_mem_get_n_bytes_le( ap + 520 +  8, 8 );

    p->sr_syscall_base          = fcpu_mem_get_n_bytes_le( ap + 520 + 24, 8 );
    p->sr_syscall_size          = fcpu_mem_get_n_bytes_le( ap + 520 + 32, 8 );
}

void fcpu_cpu_exec_jmpa_instr( PFCPU p )
{
    UL64 op3;
    UL64 op2;
    UL64 op1;

    if( fcpu_cpu_compute_move_jmpa_cond( p, op3 ) )
    {
        op2 = fcpu_cpu_reg_get_octbyte_at( p, p->r2, 0 );

        if( op2 & 3 )
        {
            p->exception |= FCPU_EXCEPTION_ALIGNMENTFAULT;
            return;
        }

        op1 = p->instr_ptr;
        p->instr_ptr = op2;

        fcpu_cpu_reg_put_octbyte_at( p, p->r1, 0, op1 );
    }
}

void fcpu_cpu_exec_loadaddr_instr( PFCPU p )
{
    UL64 op2;
    UL64 op1;

    op2 = fcpu_cpu_reg_get_octbyte_at( p, p->r2, 0 );
    
    op1 = p->instr_ptr + op2;

    fcpu_cpu_reg_put_octbyte_at( p, p->r1, 0, op1 );
}

void fcpu_cpu_exec_loadaddri_instr( PFCPU p )
{
    UL64 op1;

    // p->instr already points to next instr (PC+4)
    if( p->instr_reg & BIT( 9 ) )
        op1 = p->instr_ptr + fcpu_cpu_sign_extend_d( p->imm16 );
    else
        op1 = p->instr_ptr + p->imm16;

    fcpu_cpu_reg_put_octbyte_at( p, p->r1, 0, op1 );
}

void fcpu_cpu_exec_loop_instr( PFCPU p )
{
    UL64 op2;
    UL64 op1;

    op1 = fcpu_cpu_reg_get_octbyte_at( p, p->r1, 0 );
    
    op2 = fcpu_cpu_reg_get_octbyte_at( p, p->r2, 0 );

    if( op1 != 0 )
        p->instr_ptr = op2;

    op1 = op1 - 1;

    fcpu_cpu_reg_put_octbyte_at( p, p->r1, 0, op1 );
}

void fcpu_cpu_exec_syscall_instr( PFCPU p )
{
    // check if this is a native syscall handle by virtual machine directly.
    if( fcpu_cpu_check_handle_native_syscall( p ) )
        return;

    if( p->imm16 >= p->sr_syscall_size )
    {
        p->exception |= FCPU_EXCEPTION_PROTECTIONFAULT;
        return;
    }

    fcpu_cpu_save_cpu_into_cmb( p, p->sr_cmb_base );

    p->instr_ptr = p->sr_syscall_base | ( p->imm16 << 6 );

    p->sr_flags |= FCPU_FLAGS_KERNELMODE;
    p->sr_flags &= ~FCPU_FLAGS_INTERRUPTENABLE;
}

void fcpu_cpu_exec_rfe_instr( PFCPU p )
{
    fcpu_cpu_load_cpu_from_cmb( p, p->sr_cmb_base );
//    fcpu_cpu_handle_taskswitch_tlb( p, p->sr_vimd );
}

void fcpu_cpu_exec_halt_instr( PFCPU p )
{
    p->exception |= FCPU_EXCEPTION_HALT;
}

void fcpu_cpu_exec_srb_save_instr( PFCPU p )
{
    // emulator (!) - no operation
}

void fcpu_cpu_exec_srb_restore_instr( PFCPU p )
{
    // emulator (!) - no operation
}

void fcpu_cpu_exec_serialize_instr( PFCPU p )
{
    // emulator (!) - no operation
}

void fcpu_cpu_exec_unknown_instr( PFCPU p )
{
    p->exception |= FCPU_EXCEPTION_INVALID_INSTRUCTION;
}

//
//
//

void fcpu_cpu_exec_int_mh_3r1w_instr( PFCPU p )
{
    UL64 i;
    UL64 n;
    UL64 op3;
    UL64 op2;
    UL64 op1;
    UL64 op0;

    if( p->instr_reg & FCPU_SIMD_FLAG )
        n = p->iss[ p->sizeflg ];
    else
        n = 1;

    for( i = 0; i < n; i++ )
    {
        op3 = p->pigrf[ p->sizeflg ]( p, p->r3, i );
        
        op2 = p->pigrf[ p->sizeflg ]( p, p->r2, i );     

        op1 = p->pigrf[ p->sizeflg ]( p, p->r1, i );     

        p->psf[ p->opcode ][ p->sizeflg ]( p, op3, op2, &op1, &op0 );
        if( fcpu_cpu_check_exception( p ) )
            return;

        p->piprf[ p->sizeflg ]( p, p->r1, i, op1);       
    }
}

void fcpu_cpu_exec_int_mh_2r1w_imm_instr( PFCPU p )
{
    UL64 i;
    UL64 n;
    UL64 op3;
    UL64 op2;
    UL64 op1;
    UL64 op0;

    if( p->instr_reg & FCPU_SIMD_FLAG )
        n = p->iss[ p->sizeflg ];
    else
        n = 1;

    op3 = p->imm8;

    for( i = 0; i < n; i++ )
    {
        op2 = p->pigrf[ p->sizeflg ]( p, p->r2, i );     

        op1 = p->pigrf[ p->sizeflg ]( p, p->r1, i );     

        p->psf[ p->opcode ][ p->sizeflg ]( p, op3, op2, &op1, &op0 );
        if( fcpu_cpu_check_exception( p ) )
            return;

        p->piprf[ p->sizeflg ]( p, p->r1, i, op1);       
    }
}

void fcpu_cpu_exec_int_mh_2r2w_instr( PFCPU p )
{
    UL64 i;
    UL64 n;
    UL64 op3;
    UL64 op2;
    UL64 op1;
    UL64 op0;

    if( p->instr_reg & FCPU_SIMD_FLAG )
        n = p->iss[ p->sizeflg ];
    else
        n = 1;

    for( i = 0; i < n; i++ )
    {
        op3 = p->pigrf[ p->sizeflg ]( p, p->r3, i );
        
        op2 = p->pigrf[ p->sizeflg ]( p, p->r2, i );     

        p->psf[ p->opcode ][ p->sizeflg ]( p, op3, op2, &op1, &op0 );
        if( fcpu_cpu_check_exception( p ) )
            return;

        p->piprf[ p->sizeflg ]( p, p->r1, i, op1);       
        
        if( p->instr_reg & FCPU_CYMODHI_FLAG )
            p->piprf[ p->sizeflg ]( p, fcpu_cpu_get_next_reg( p->r1 ), i, op0 );   
    }
}

void fcpu_cpu_exec_int_mh_2r2w_x_instr( PFCPU p )
{
    UL64 i;
    UL64 n;
    UL64 op3;
    UL64 op2;
    UL64 op1;
    UL64 op0;

    if( p->instr_reg & FCPU_SIMD_FLAG )
        n = p->iss[ p->sizeflg ];
    else
        n = 1;

    for( i = 0; i < n; i++ )
    {
        op3 = p->pigrf[ p->sizeflg ]( p, p->r3, i );
        
        op2 = p->pigrf[ p->sizeflg ]( p, p->r2, i );     

        p->psf[ p->opcode ][ p->sizeflg ]( p, op3, op2, &op1, &op0 );
        if( fcpu_cpu_check_exception( p ) )
            return;

        p->piprf[ p->sizeflg ]( p, p->r1, i, op1);       
        
        p->piprf[ p->sizeflg ]( p, fcpu_cpu_get_next_reg( p->r1 ), i, op0 );   
    }
}

void fcpu_cpu_exec_int_mh_2r1w_instr( PFCPU p )
{
    UL64 i;
    UL64 n;
    UL64 op3;
    UL64 op2;
    UL64 op1;
    UL64 op0;

    if( p->instr_reg & FCPU_SIMD_FLAG )
        n = p->iss[ p->sizeflg ];
    else
        n = 1;

    for( i = 0; i < n; i++ )
    {
        op3 = p->pigrf[ p->sizeflg ]( p, p->r3, i );
        
        op2 = p->pigrf[ p->sizeflg ]( p, p->r2, i );     

        p->psf[ p->opcode ][ p->sizeflg ]( p, op3, op2, &op1, &op0 );
        if( fcpu_cpu_check_exception( p ) )
            return;

        p->piprf[ p->sizeflg ]( p, p->r1, i, op1);       
    }
}

void fcpu_cpu_exec_int_mh_1r2w_imm_instr( PFCPU p )
{
    UL64 i;
    UL64 n;
    UL64 op3;
    UL64 op2;
    UL64 op1;
    UL64 op0;

    if( p->instr_reg & FCPU_SIMD_FLAG )
        n = p->iss[ p->sizeflg ];
    else
        n = 1;

    op3 = p->imm8;

    for( i = 0; i < n; i++ )
    {
        op2 = p->pigrf[ p->sizeflg ]( p, p->r2, i );     

        p->psf[ p->opcode ][ p->sizeflg ]( p, op3, op2, &op1, &op0 );

        p->piprf[ p->sizeflg ]( p, p->r1, i, op1 );
        if( fcpu_cpu_check_exception( p ) )
            return;
        
        if( p->instr_reg & FCPU_CYMODHI_FLAG )
            p->piprf[ p->sizeflg ]( p, fcpu_cpu_get_next_reg( p->r1 ), i, op0 );   
    }
}

void fcpu_cpu_exec_int_mh_1r1w_imm_instr( PFCPU p )
{
    UL64 i;
    UL64 n;
    UL64 op3;
    UL64 op2;
    UL64 op1;
    UL64 op0;

    if( p->instr_reg & FCPU_SIMD_FLAG )
        n = p->iss[ p->sizeflg ];
    else
        n = 1;

    op3 = p->imm8;

    for( i = 0; i < n; i++ )
    {
        op2 = p->pigrf[ p->sizeflg ]( p, p->r2, i );     

        p->psf[ p->opcode ][ p->sizeflg ]( p, op3, op2, &op1, &op0 );

        p->piprf[ p->sizeflg ]( p, p->r1, i, op1 );
        if( fcpu_cpu_check_exception( p ) )
            return;
    }
}

void fcpu_cpu_exec_int_mh_1r1w_imm6_instr( PFCPU p )
{
    UL64 i;
    UL64 n;
    UL64 op3;
    UL64 op2;
    UL64 op1;
    UL64 op0;

    if( p->instr_reg & FCPU_SIMD_FLAG )
        n = p->iss[ p->sizeflg ];
    else
        n = 1;

    op3 = p->imm6;

    for( i = 0; i < n; i++ )
    {
        op2 = p->pigrf[ p->sizeflg ]( p, p->r2, i );     

        p->psf[ p->opcode ][ p->sizeflg ]( p, op3, op2, &op1, &op0 );

        p->piprf[ p->sizeflg ]( p, p->r1, i, op1 );
        if( fcpu_cpu_check_exception( p ) )
            return;
    }
}

//
//
//

void fcpu_cpu_exec_flt_mh_3r1w_instr( PFCPU p )
{
    UL64 i;
    UL64 n;
    UL64 op3;
    UL64 op2;
    UL64 op1;
    UL64 op0;

    if( p->instr_reg & FCPU_SIMD_FLAG )
        n = p->fss[ p->sizeflg ];
    else
        n = 1;

    for( i = 0; i < n; i++ )
    {
        op3 = p->pfgrf[ p->sizeflg ]( p, p->r3, i );
        
        op2 = p->pfgrf[ p->sizeflg ]( p, p->r2, i );     

        op1 = p->pfgrf[ p->sizeflg ]( p, p->r1, i );     

        p->psf[ p->opcode ][ p->sizeflg ]( p, op3, op2, &op1, &op0 );
        if( fcpu_cpu_check_exception( p ) )
            return;

        p->pfprf[ p->sizeflg ]( p, p->r1, i, op1 );       
    }
}

void fcpu_cpu_exec_flt_mh_2r1w_instr( PFCPU p )
{
    UL64 i;
    UL64 n;
    UL64 op3;
    UL64 op2;
    UL64 op1;
    UL64 op0;

    if( p->instr_reg & FCPU_SIMD_FLAG )
        n = p->fss[ p->sizeflg ];
    else
        n = 1;

    for( i = 0; i < n; i++ )
    {
        op3 = p->pfgrf[ p->sizeflg ]( p, p->r3, i );
        
        op2 = p->pfgrf[ p->sizeflg ]( p, p->r2, i );     

        p->psf[ p->opcode ][ p->sizeflg ]( p, op3, op2, &op1, &op0 );
        if( fcpu_cpu_check_exception( p ) )
            return;

        p->pfprf[ p->sizeflg ]( p, p->r1, i, op1 );       
    }
}

void fcpu_cpu_exec_flt_mh_2r2w_x_instr( PFCPU p )
{
    UL64 i;
    UL64 n;
    UL64 op3;
    UL64 op2;
    UL64 op1;
    UL64 op0;

    if( p->instr_reg & FCPU_SIMD_FLAG )
        n = p->fss[ p->sizeflg ];
    else
        n = 1;

    for( i = 0; i < n; i++ )
    {
        op3 = p->pfgrf[ p->sizeflg ]( p, p->r3, i );
        
        op2 = p->pfgrf[ p->sizeflg ]( p, p->r2, i );     

        p->psf[ p->opcode ][ p->sizeflg ]( p, op3, op2, &op1, &op0 );
        if( fcpu_cpu_check_exception( p ) )
            return;

        p->pfprf[ p->sizeflg ]( p, p->r1, i, op1 );       

        p->pfprf[ p->sizeflg ]( p, fcpu_cpu_get_next_reg( p->r1 ), i, op0 );       
    }
}

//
//
//
void fcpu_cpu_def_mh_instr( PFCPU p, UL64 opcode, void* pxf, void* psf00, void* psf01, void* psf10, void* psf11 )
{
    p->pxf[ opcode ]       = (PXF)pxf;
    p->psf[ opcode ][ 0 ]  = (PSF)psf00;
    p->psf[ opcode ][ 1 ]  = (PSF)psf01;
    p->psf[ opcode ][ 2 ]  = (PSF)psf10;
    p->psf[ opcode ][ 3 ]  = (PSF)psf11;
}

void fcpu_cpu_def_int_mh_instr( PFCPU p, UL64 opcode, void* pxf, void* psf00, void* psf01, void* psf10, void* psf11 )
{
    fcpu_cpu_def_mh_instr( p, opcode, pxf, psf00, psf01, psf10, psf11 );
}

void fcpu_cpu_def_int_sh_instr( PFCPU p, UL64 opcode, void* pxf, void* psf )
{
    fcpu_cpu_def_mh_instr( p, opcode, pxf, psf, psf, psf, psf );
}

void fcpu_cpu_def_atomic_instr( PFCPU p, UL64 opcode, void* pxf )
{
    fcpu_cpu_def_int_mh_instr( p, opcode, pxf, NULL, NULL, NULL, NULL );
}

void fcpu_cpu_def_flt_mh_instr( PFCPU p, UL64 opcode, void* pxf, void* psf00, void* psf01 )
{
    fcpu_cpu_def_int_mh_instr( p, opcode, pxf, psf00, psf01, psf00, psf01 );
}

//
//
void fcpu_cpu_reset( PFCPU p )
{
    UL64    i;

    //
    // emulator specific reset
    //

    for( i = 0; i < FCPU_MAX_OPCODES; i++ )
    {
        p->pxf[ i ]         = (PXF)&fcpu_cpu_exec_unknown_instr;
        p->psf[ i ][ 0 ]    = (PSF)NULL;
        p->psf[ i ][ 1 ]    = (PSF)NULL;
        p->psf[ i ][ 2 ]    = (PSF)NULL;
        p->psf[ i ][ 3 ]    = (PSF)NULL;
    }

    p->sb[ 0 ] = 1L;
    p->sb[ 1 ] = 2L;
    p->sb[ 2 ] = 4L;
    p->sb[ 3 ] = 8L;

    p->iss[ 0 ] = 8L;
    p->iss[ 1 ] = 4L;
    p->iss[ 2 ] = 2L;
    p->iss[ 3 ] = 1L;
    
    p->fss[ 0 ] = 2L;
    p->fss[ 1 ] = 1L;
    p->fss[ 2 ] = 2L;
    p->fss[ 3 ] = 1L;

    p->mask[ 0 ] = 0x00000000000000FF;
    p->mask[ 1 ] = 0x000000000000FFFF;
    p->mask[ 2 ] = 0x00000000FFFFFFFF;
    p->mask[ 3 ] = 0xFFFFFFFFFFFFFFFF;

    p->msbmask[ 0 ] = 0x0000000000000080;
    p->msbmask[ 1 ] = 0x0000000000008000;
    p->msbmask[ 2 ] = 0x0000000080000000;
    p->msbmask[ 3 ] = 0x8000000000000000;

    p->psef[ 0 ] = (PSEF)&fcpu_cpu_sign_extend_b;
    p->psef[ 1 ] = (PSEF)&fcpu_cpu_sign_extend_d;
    p->psef[ 2 ] = (PSEF)&fcpu_cpu_sign_extend_q;
    p->psef[ 3 ] = (PSEF)&fcpu_cpu_sign_extend_o;

    p->pfgrf[ 0 ] = (PGRF)&fcpu_cpu_reg_get_quadbyte_at;
    p->pfgrf[ 1 ] = (PGRF)&fcpu_cpu_reg_get_octbyte_at;
    p->pfgrf[ 2 ] = (PGRF)&fcpu_cpu_reg_get_quadbyte_at;
    p->pfgrf[ 3 ] = (PGRF)&fcpu_cpu_reg_get_octbyte_at;

    p->pfprf[ 0 ] = (PPRF)&fcpu_cpu_reg_put_quadbyte_at;
    p->pfprf[ 1 ] = (PPRF)&fcpu_cpu_reg_put_octbyte_at;
    p->pfprf[ 2 ] = (PPRF)&fcpu_cpu_reg_put_quadbyte_at;
    p->pfprf[ 3 ] = (PPRF)&fcpu_cpu_reg_put_octbyte_at;

    p->pigrf[ 0 ] = (PGRF)&fcpu_cpu_reg_get_byte_at;
    p->pigrf[ 1 ] = (PGRF)&fcpu_cpu_reg_get_doublebyte_at;
    p->pigrf[ 2 ] = (PGRF)&fcpu_cpu_reg_get_quadbyte_at;
    p->pigrf[ 3 ] = (PGRF)&fcpu_cpu_reg_get_octbyte_at;

    p->piprf[ 0 ] = (PPRF)&fcpu_cpu_reg_put_byte_at;
    p->piprf[ 1 ] = (PPRF)&fcpu_cpu_reg_put_doublebyte_at;
    p->piprf[ 2 ] = (PPRF)&fcpu_cpu_reg_put_quadbyte_at;
    p->piprf[ 3 ] = (PPRF)&fcpu_cpu_reg_put_octbyte_at;

    //
    // 6.1.1 Core Arithmetic operations
    //

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_ADD,
                               &fcpu_cpu_exec_int_mh_2r2w_instr,
                               &fcpu_cpu_exec_add_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_SUB,
                               &fcpu_cpu_exec_int_mh_2r2w_instr,
                               &fcpu_cpu_exec_sub_bdqo_instr );

    fcpu_cpu_def_int_mh_instr( p,
                               FCPU_OP_MUL,
                               &fcpu_cpu_exec_int_mh_2r2w_instr,
                               &fcpu_cpu_exec_mul_b_instr,
                               &fcpu_cpu_exec_mul_d_instr,
                               &fcpu_cpu_exec_mul_q_instr,
                               &fcpu_cpu_exec_mul_o_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_DIV,
                               &fcpu_cpu_exec_int_mh_2r2w_instr,
                               &fcpu_cpu_exec_div_bdqo_instr );

    //
    // 6.1.2 Optional Arithmetic operations
    //

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_ADDI,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_addi_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_SUBI,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_subi_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_MULI,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_muli_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_DIVI,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_divi_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_MOD,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_mod_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_MODI,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_modi_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_MAC,
                               &fcpu_cpu_exec_int_mh_3r1w_instr,
                               &fcpu_cpu_exec_mac_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_ADDSUB,
                               &fcpu_cpu_exec_int_mh_2r2w_x_instr,
                               &fcpu_cpu_exec_addsub_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_POPCOUNT,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_popcount_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_POPCOUNTI,
                               &fcpu_cpu_exec_int_mh_1r1w_imm_instr,
                               &fcpu_cpu_exec_popcount_instr );

    //
    // 6.1.3 Optional Increment-based operations
    //

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_INC,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_inc_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_DEC,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_dec_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_NEG,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_neg_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_SCAN,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_scan_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_CMPL,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_cmpl_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_CMPLE,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_cmple_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_CMPLI,
                               &fcpu_cpu_exec_int_mh_1r1w_imm_instr,
                               &fcpu_cpu_exec_cmpl_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_CMPLEI,
                               &fcpu_cpu_exec_int_mh_1r1w_imm_instr,
                               &fcpu_cpu_exec_cmpl_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_ABS,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_abs_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_MAX,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_max_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_MIN,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_min_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_MAXI,
                               &fcpu_cpu_exec_int_mh_1r1w_imm_instr,
                               &fcpu_cpu_exec_max_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_MINI,
                               &fcpu_cpu_exec_int_mh_1r1w_imm_instr,
                               &fcpu_cpu_exec_min_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_SORT,
                               &fcpu_cpu_exec_int_mh_2r2w_x_instr,
                               &fcpu_cpu_exec_sort_bdqo_instr );

    //
    // 6.2.1 Core Shift and Rotate operations
    //

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_SHIFTL,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_shiftl_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_SHIFTR,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_shiftr_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_SHIFTRA,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_shiftra_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_ROTL,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_rotl_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_ROTR,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_rotr_bdqo_instr );

    //
    // 6.2.2 Optional Bit Shift and Rotate operations
    //

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_SHIFTLI,
                               &fcpu_cpu_exec_int_mh_1r1w_imm_instr,
                               &fcpu_cpu_exec_shiftl_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_SHIFTRI,
                               &fcpu_cpu_exec_int_mh_1r1w_imm_instr,
                               &fcpu_cpu_exec_shiftr_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_SHIFTRAI,
                               &fcpu_cpu_exec_int_mh_1r1w_imm_instr,
                               &fcpu_cpu_exec_shiftra_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_ROTLI,
                               &fcpu_cpu_exec_int_mh_1r1w_imm_instr,
                               &fcpu_cpu_exec_rotl_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_ROTRI,
                               &fcpu_cpu_exec_int_mh_1r1w_imm_instr,
                               &fcpu_cpu_exec_rotr_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_BITOP,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_bitop_bdqo_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_BITOPI,
                               &fcpu_cpu_exec_int_mh_1r1w_imm6_instr,
                               &fcpu_cpu_exec_bitop_bdqo_instr );

    //
    // 6.2.3 Optional Bit Shuffling operations
    //

    fcpu_cpu_def_int_mh_instr( p,
                               FCPU_OP_BYTEREV,
                               &fcpu_cpu_exec_int_mh_2r2w_instr,
                               &fcpu_cpu_exec_byterev_b_instr,
                               &fcpu_cpu_exec_byterev_d_instr,
                               &fcpu_cpu_exec_byterev_q_instr,
                               &fcpu_cpu_exec_byterev_o_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_BITREV,
                               &fcpu_cpu_exec_int_mh_3r1w_instr,
                               &fcpu_cpu_exec_bitrev_instr );

    fcpu_cpu_def_int_sh_instr( p,
                               FCPU_OP_BITREVI,
                               &fcpu_cpu_exec_int_mh_2r1w_imm_instr,
                               &fcpu_cpu_exec_bitrev_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_MIX,
                               &fcpu_cpu_exec_mix_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_EXPAND,
                               &fcpu_cpu_exec_expand_instr );

    fcpu_cpu_def_int_mh_instr( p,
                               FCPU_OP_SDUP,
                               &fcpu_cpu_exec_int_mh_2r1w_instr,
                               &fcpu_cpu_exec_sdup_b_instr,
                               &fcpu_cpu_exec_sdup_d_instr,
                               &fcpu_cpu_exec_sdup_q_instr,
                               &fcpu_cpu_exec_sdup_o_instr );

    //
    // 6.3.1 Core Logic operations
    //

    fcpu_cpu_def_atomic_instr( p, 
                               FCPU_OP_LOGIC,
                               &fcpu_cpu_exec_logic_instr );

    //
    // 6.3.2 Optional Logic operations
    //

    fcpu_cpu_def_atomic_instr( p, 
                               FCPU_OP_LOGICI,
                               &fcpu_cpu_exec_logici_instr );

    //
    // 6.4.1 Level 1 Floating Point operations
    //

    fcpu_cpu_def_flt_mh_instr( p,
                               FCPU_OP_FADD,
                               &fcpu_cpu_exec_flt_mh_2r1w_instr,
                               &fcpu_cpu_exec_fadd_single_instr,
                               &fcpu_cpu_exec_fadd_double_instr );

    fcpu_cpu_def_flt_mh_instr( p,
                               FCPU_OP_FSUB,
                               &fcpu_cpu_exec_flt_mh_2r1w_instr,
                               &fcpu_cpu_exec_fsub_single_instr,
                               &fcpu_cpu_exec_fsub_double_instr );

    fcpu_cpu_def_flt_mh_instr( p,
                               FCPU_OP_FMUL,
                               &fcpu_cpu_exec_flt_mh_2r1w_instr,
                               &fcpu_cpu_exec_fmul_single_instr,
                               &fcpu_cpu_exec_fmul_double_instr );

    fcpu_cpu_def_flt_mh_instr( p,
                               FCPU_OP_F2INT,
                               &fcpu_cpu_exec_flt_mh_2r1w_instr,
                               &fcpu_cpu_exec_f2int_single_instr,
                               &fcpu_cpu_exec_f2int_double_instr );

    fcpu_cpu_def_flt_mh_instr( p,
                               FCPU_OP_INT2F,
                               &fcpu_cpu_exec_flt_mh_2r1w_instr,
                               &fcpu_cpu_exec_int2f_single_instr,
                               &fcpu_cpu_exec_int2f_double_instr );

    //
    // 6.4.2 Level 2 Floating Point operations
    //

    fcpu_cpu_def_flt_mh_instr( p,
                               FCPU_OP_FDIV,
                               &fcpu_cpu_exec_flt_mh_2r1w_instr,
                               &fcpu_cpu_exec_fdiv_single_instr,
                               &fcpu_cpu_exec_fdiv_double_instr );

    //
    // 6.4.3 Level 3 Floating Point operations
    //

    fcpu_cpu_def_flt_mh_instr( p,
                               FCPU_OP_FSQRT,
                               &fcpu_cpu_exec_flt_mh_2r1w_instr,
                               &fcpu_cpu_exec_fdiv_single_instr,
                               &fcpu_cpu_exec_fdiv_double_instr );

    fcpu_cpu_def_flt_mh_instr( p,
                               FCPU_OP_FADDSUB,
                               &fcpu_cpu_exec_flt_mh_2r2w_x_instr,
                               &fcpu_cpu_exec_faddsub_single_instr,
                               &fcpu_cpu_exec_faddsub_double_instr );

    fcpu_cpu_def_flt_mh_instr( p,
                               FCPU_OP_FMAC,
                               &fcpu_cpu_exec_flt_mh_3r1w_instr,
                               &fcpu_cpu_exec_fmac_single_instr,
                               &fcpu_cpu_exec_fmac_double_instr );

    //
    // 6.5.1 Core Memory Access operations
    //
    //
    // 6.5.2 Optional Memory Access operations
    //

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_LOAD,
                               &fcpu_cpu_exec_load_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_STORE,
                               &fcpu_cpu_exec_store_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_LOADI,
                               &fcpu_cpu_exec_loadi_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_STOREI,
                               &fcpu_cpu_exec_storei_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_CACHEMM,
                               &fcpu_cpu_exec_cachemm_instr );

    //
    // 6.6.1 Core Data Move operations
    //

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_MOVE,
                               &fcpu_cpu_exec_move_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_LOADCONS,
                               &fcpu_cpu_exec_loadcons_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_LOADCONSX,
                               &fcpu_cpu_exec_loadconsx_instr );
    
    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_PUT,
                               &fcpu_cpu_exec_put_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_GET,
                               &fcpu_cpu_exec_get_instr );
    
    //
    // 6.6.2 Optional Data Move operations
    //

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_PUTI,
                               &fcpu_cpu_exec_puti_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_GETI,
                               &fcpu_cpu_exec_geti_instr );

    //
    // 6.7.1 Core Instruction Flow Control instructions
    //

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_JMPA,
                               &fcpu_cpu_exec_jmpa_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_LOADADDR,
                               &fcpu_cpu_exec_loadaddr_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_LOADADDRI,
                               &fcpu_cpu_exec_loadaddri_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_LOOP,
                               &fcpu_cpu_exec_loop_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_SYSCALL,
                               &fcpu_cpu_exec_syscall_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_RFE,
                               &fcpu_cpu_exec_rfe_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_HALT,
                               &fcpu_cpu_exec_halt_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_SRB_SAVE,
                               &fcpu_cpu_exec_srb_save_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_SRB_RESTORE,
                               &fcpu_cpu_exec_srb_restore_instr );

    fcpu_cpu_def_atomic_instr( p,
                               FCPU_OP_SERIALIZE,
                               &fcpu_cpu_exec_serialize_instr );

    //
    //
    //

    p->exception |= FCPU_EXCEPTION_RESET;
}

//
//
//

void fcpu_cpu_exec_trap( PFCPU p, UL64 trap_vector )
{
    if( trap_vector > p->sr_trap_size )
    {
        p->exception |= FCPU_EXCEPTION_RESET;
        return;
    }

    fcpu_cpu_save_cpu_into_cmb( p, p->sr_cmb_base );
    
    p->instr_ptr = p->sr_trap_base | ( trap_vector << 6 );

    p->sr_flags |= FCPU_FLAGS_KERNELMODE;
    p->sr_flags &= ~FCPU_FLAGS_INTERRUPTENABLE;
}

void fcpu_cpu_handle_exception( PFCPU p )
{
    UL64 i;

    if( p->exception & FCPU_EXCEPTION_RESET )
    {
        //
        // fcpu reset
        //
        p->instr_ptr                = 0L;

        p->sr_flags                 = FCPU_FLAGS_KERNELMODE;
        p->exception                = 0L;

        p->sr_cycle                 = 0L;
        p->sr_paging                = 0L;
        p->sr_irq_base              = 0L;
        p->sr_irq_size              = 0L;   /* size = 0 means interrupts disabled */
        p->sr_trap_base             = 0L;
        p->sr_trap_size             = 0L;
        p->sr_syscall_base          = 0L;
        p->sr_syscall_size          = 0L;
        p->sr_tlb_miss_base         = 0L;
        p->sr_time_slice_counter    = 0xFFFFFFFFFFFFFFFF;
        p->sr_time_slice_limit      = 0xFFFFFFFFFFFFFFFF;
        p->sr_cmb_base              = 0L;
        p->sr_event_counter_0_cfg   = 0L;
        p->sr_event_counter_0       = 0L;
        p->sr_event_counter_1_cfg   = 0L;
        p->sr_event_counter_1       = 0L;
        p->sr_event_counter_2_cfg   = 0L;
        p->sr_event_counter_2       = 0L;
        p->sr_event_counter_3_cfg   = 0L;
        p->sr_event_counter_3       = 0L;
        p->sr_event_counter_4_cfg   = 0L;
        p->sr_event_counter_4       = 0L;
        p->sr_event_counter_5_cfg   = 0L;
        p->sr_event_counter_5       = 0L;
        p->sr_event_counter_6_cfg   = 0L;
        p->sr_event_counter_6       = 0L;
        p->sr_event_counter_7_cfg   = 0L;
        p->sr_event_counter_7       = 0L;

        // VMID cache init
        for( i = 0; i < FCPU_NUM_VMIDC_ENTRIES; i++ )
        {
            p->vmidc[ i ].valid = 0L;
        }

        // tlb init
        for( i = 0; i < FCPU_NUM_TLB_ENTRIES; i++ )
        {
            p->tlb[ i ].valid = 0L;
        }

        p->exception &= ~FCPU_EXCEPTION_RESET;
    }
    else if( p->exception & FCPU_EXCEPTION_PAGEFAULT )
    {
        fcpu_cpu_save_cpu_into_cmb( p, p->sr_cmb_base );
        
        p->exception &= ~FCPU_EXCEPTION_PAGEFAULT;
        p->instr_ptr = p->sr_tlb_miss_base;

        p->sr_flags |= FCPU_FLAGS_KERNELMODE;
        p->sr_flags &= ~FCPU_FLAGS_INTERRUPTENABLE;
    }
    else if( p->exception & FCPU_EXCEPTION_INVALID_INSTRUCTION )
    {
        p->exception &= ~FCPU_EXCEPTION_INVALID_INSTRUCTION;
        fcpu_cpu_exec_trap( p, FCPU_TRAP_VECTOR_INVALID_INSTRUCTION );
    }
    else if( p->exception & FCPU_EXCEPTION_PROTECTIONFAULT )
    {
        p->exception &= ~FCPU_EXCEPTION_PROTECTIONFAULT;
        fcpu_cpu_exec_trap( p, FCPU_TRAP_VECTOR_PROTECTIONFAULT );
    }
    else if( p->exception & FCPU_EXCEPTION_ALIGNMENTFAULT )
    {
        p->exception &= ~FCPU_EXCEPTION_ALIGNMENTFAULT;
        fcpu_cpu_exec_trap( p, FCPU_TRAP_VECTOR_ALIGNMENTFAULT );
    }
    else if( p->exception & FCPU_EXCEPTION_DIVISION_BY_ZERO )
    {
        p->exception &= ~FCPU_EXCEPTION_DIVISION_BY_ZERO;
        fcpu_cpu_exec_trap( p, FCPU_TRAP_VECTOR_DIVISION_BY_ZERO );
    }
    else if( p->exception & FCPU_EXCEPTION_TIMER_COUNT_ZERO )
    {
        p->exception &= ~FCPU_EXCEPTION_TIMER_COUNT_ZERO;
        fcpu_cpu_exec_trap( p, FCPU_TRAP_VECTOR_TIMER_COUNT_ZERO );
    }
}

void fcpu_cpu_fetch_instr( PFCPU p )
{
    UL64 ap;

    if( p->instr_ptr & 3 )
    {
        p->exception |= FCPU_EXCEPTION_ALIGNMENTFAULT;
        return;
    }

    fcpu_cpu_check_tlb( p, p->instr_ptr, &ap, FCPU_TLB_ACCESS_MODE_X );
    if( fcpu_cpu_check_exception( p ) )
        return;

    p->instr_reg = fcpu_mem_get_n_bytes_le( ap, 4 );
}

void fcpu_cpu_decode_instr( PFCPU p )
{
    p->opcode       = (UL08)( ( p->instr_reg >>  0 ) & 0x00FF );   
    p->sizeflg      = (UL08)( ( p->instr_reg >>  8 ) & 0x0003 );
    p->r3           = (UL08)( ( p->instr_reg >> 14 ) & 0x003F );
    p->r2           = (UL08)( ( p->instr_reg >> 20 ) & 0x003F );
    p->r1           = (UL08)( ( p->instr_reg >> 26 ) & 0x003F );
    p->imm6         = ( p->instr_reg >> 14 ) & 0x003F;
    p->imm8         = ( p->instr_reg >> 12 ) & 0x00FF;
    p->imm16        = ( p->instr_reg >> 10 ) & 0xFFFF;
}

void fcpu_cpu_exec_instr( PFCPU p )
{
    p->pxf[ p->opcode ]( p );
}

void fcpu_cpu_next_instr( PFCPU p )
{
    p->instr_ptr = ( p->instr_ptr & 0xFFFFFFFFFFFFFFFC ) + 4;
}

void fcpu_cpu_update_counters( PFCPU p )
{
    // SR_CYCLE
    p->sr_cycle++;

    // SR_TIME_SLICE_COUNTER
    if( 0 != p->sr_time_slice_counter )
    {
        if( 1 == p->sr_time_slice_counter )
        {
            p->exception |= FCPU_EXCEPTION_TIMER_COUNT_ZERO;
            p->sr_time_slice_counter = 0;
        }
        else
            p->sr_time_slice_counter = p->sr_time_slice_counter - 1;
    }
}

void fcpu_cpu_clock( PFCPU p )
{
    fcpu_cpu_handle_exception( p );

    fcpu_cpu_update_counters( p );
    if( fcpu_cpu_check_exception( p ) )
        return;

    fcpu_cpu_fetch_instr( p );
    if( fcpu_cpu_check_exception( p ) )
        return;

    fcpu_cpu_next_instr( p );
    if( fcpu_cpu_check_exception( p ) )
        return;

    fcpu_cpu_decode_instr( p );
    if( fcpu_cpu_check_exception( p ) )
        return;

    fcpu_cpu_exec_instr( p );
    if( fcpu_cpu_check_exception( p ) )
        return;

}