--------------------------------------------------------------------------
-- ROP2.vhdl - ROP2 Execution Unit for the F-CPU
-- Copyright (C) 2000-2001 Yann GUIDON (whygee@f-cpu.org)
--
-- v0.2: Michael Riepe reorganized the main for-generate loop
-- + corrected the lookup table (wrong op for ORN)
-- v0.3: YG replaced UMAX/8 with MAXSIZE :-)
-- v0.4: 11/17/2000, YG wants to rewrite the unit with MR's gate library ...
-- v0.5: 8/12/2001, YG modifies the interface, the names, adds MUX,...
--
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
--
--------------------------------------------------------------------------
-- This is the first version ever for this unit.
-- It should be easily synthetizable but there is no proof yet.
-- What matters most today is that it compiles and behaves correctly.
-- Warning : this code is and should remain purely combinatorial,
-- there is no latching here, it must be done at another level.
-- Furthermore, the function lookup table should be moved earlier
-- in the pipeline, in parallel with the Xbar cycle.
-- The big fanout problems (propagation of the opcode from 1 to 64 bits)
-- overlaps the Xbar cycle so we can make a nice "signal tree".
-- Finally, only byte combines are possible yet. The COMBINE
-- instruction is still not completely re-defined. The manual should
-- be modified.
--------------------------------------------------------------------------
-- Sun Aug 12 01:16:11 2001: rop2_bis.vhdl is still untested but it includes
-- the latest updates to the FC0 core. 
-- Tue Aug 21 08:45:16 2001: trying to make something that works reasonably.

LIBRARY ieee;
    USE ieee.std_logic_1164.ALL;
    USE ieee.numeric_std.all;
LIBRARY work;
    USE work.FCPU_config.ALL;

Entity EU_ROP2 is
  port(
    ROP2_in_A,
    ROP2_in_B,
    ROP2_in_C : in F_VECTOR;    -- the 3 operands
    ROP2_function_bit0,
    ROP2_function_bit1,   -- pre-buffered boolean function bits
    ROP2_function_bit2,
    ROP2_function_bit3 : in Std_ulogic_vector((MAXSIZE *2) downto 0); -- fanout=4 
    ROP2_mode : in Std_ulogic_vector(1 downto 0);  -- 2 function bits from the instruction
--    Combine_size : in Std_ulogic_vector(1 downto 0);   -- unused ATM. Byte chuncks only.
    ROP2_out     : out F_VECTOR     -- the result
  );
end EU_ROP2;

Architecture arch1 of EU_ROP2 is
  signal
    local_function_0,
    local_function_1,
    local_function_2,
    local_function_3,
    partial_result,
    partial_OR,
    partial_AND,
    partial_MUX : F_VECTOR;  -- the partial results.

begin

--------------------------------------------------------------------------
-- During Xbar cycle :
--------------------------------------------------------------------------
-- (this part must be moved in another section of the code)
---- lookup table that decodes the function bits
--   with ROP_function select
--     local_function <=
--       "0001" when "000",  -- AND
--       "0010" when "001",  -- ANDN
--       "0110" when "010",  -- XOR
--       "0111" when "011",  -- OR
--       "1000" when "100",  -- NOR
--       "1001" when "101",  -- XNOR
--       "1011" when "110",  -- ORN
--       "1110" when others; -- NAND
---- The signal tree :
--   ROP2_function_bit0 <= F_RANGE => local_function(0);
--   ROP2_function_bit1 <= F_RANGE => local_function(1);
--   ROP2_function_bit2 <= F_RANGE => local_function(2);
--   ROP2_function_bit3 <= F_RANGE => local_function(3);

--------------------------------------------------------------------------
-- ROP2 cycle : (combinational part only)
--------------------------------------------------------------------------

-- 1 : last fanout for the function bits :
-- the bits are duplicated 4x (i know i should not do the synthesiser's work
-- but i can't stop myself ...
  FANOUT : for j in (MAXSIZE*2)-1 downto 0 generate
    local_function_0(4*j)   <= ROP2_function_bit0(j);
    local_function_0(4*j+1) <= ROP2_function_bit0(j);
    local_function_0(4*j+2) <= ROP2_function_bit0(j);
    local_function_0(4*j+3) <= ROP2_function_bit0(j);
    local_function_1(4*j)   <= ROP2_function_bit1(j);
    local_function_1(4*j+1) <= ROP2_function_bit1(j);
    local_function_1(4*j+2) <= ROP2_function_bit1(j);
    local_function_1(4*j+3) <= ROP2_function_bit1(j);
    local_function_2(4*j)   <= ROP2_function_bit2(j);
    local_function_2(4*j+1) <= ROP2_function_bit2(j);
    local_function_2(4*j+2) <= ROP2_function_bit2(j);
    local_function_2(4*j+3) <= ROP2_function_bit2(j);
    local_function_3(4*j)   <= ROP2_function_bit3(j);
    local_function_3(4*j+1) <= ROP2_function_bit3(j);
    local_function_3(4*j+2) <= ROP2_function_bit3(j);
    local_function_3(4*j+3) <= ROP2_function_bit3(j);
  end generate FANOUT;
-- Help me Michael, it's ugly !


-- 2 : the ROP2 operator itself.
  partial_result <=
       ((not ROP2_in_A) and (not ROP2_in_B) and ROP2_function_bit3)
    or ((not ROP2_in_A) and (    ROP2_in_B) and ROP2_function_bit2)
    or ((    ROP2_in_A) and (not ROP2_in_B) and ROP2_function_bit1)
    or ((    ROP2_in_A) and (    ROP2_in_B) and ROP2_function_bit0);

-- 2 bis : the MUX
  partial_MUX <=
       (ROP2_input_0 and (    ROP2_input_2))
    or (ROP2_input_1 and (not ROP2_input_2));

  
-- 3 : partial ORs and ANDs on the byte chuncks :
  BYTE_COMBINE : for i in MAXSIZE-1 downto 0 generate
    partial_OR(8*i+7 downto 8*i) <= "11111111" when
      partial_result(8*i+7 downto 8*i) /= "00000000"
      else "00000000";
    partial_AND(8*i+7 downto 8*i) <= "11111111" when
      partial_result(8*i+7 downto 8*i) = "11111111"
      else "00000000";
-- I'm still uncertain about the best way to write a multi-size version.
-- Plus, the latency might explode the ROP2 unit's performance.
-- So the multi-size version is dropped until it becomes necessary.
-- Let's stick to plain bytes...
  end generate BYTE_COMBINE;

-- 4 : final selection stage :
  with ROP2_mode select
    ROP2_out <=
      partial_ROP when ROP2_DIRECT_MODE,
      partial_AND when ROP2_AND_MODE,
      partial_OR  when ROP2_OR_MODE,
      partial_MUX when others; -- MUX
-- warning, f-cpu_config.vhdl is not yet updated
end;
