-- imu.vhdl - Integer Multiply Execution Unit for the F-CPU.
-- Copyright (C) 2000, 2001 Michael Riepe <michael@stud.uni-hannover.de>
--
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

-- $Id: imu.vhdl,v 1.11 2001/08/22 13:49:36 michael Exp $

library IEEE;
use IEEE.std_logic_1164.all;

library work;
use work.FCPU_config.all;

entity EU_IMU is
	port(
		-- operands
		Din_0  : in F_VECTOR;	-- multiplicand
		Din_1  : in F_VECTOR;	-- multiplicator
		Din_2  : in F_VECTOR;	-- summand (optional)
		-- MAC flags (should be derived from opcode)
		MacLo  : in std_ulogic;
		MacHi  : in std_ulogic;
		MacAlt : in std_ulogic;
		-- flag bits (directly copied from instruction word)
		Flags  : in std_ulogic_vector(13 downto 8);
		-- SIMD mode bits (decoded)
		Size   : in std_ulogic_vector(LOGMAXSIZE-1 downto 0);
		-- clock/reset/enable inputs
		Clk    : in std_ulogic;
		Rst    : in std_ulogic;
		En     : in std_ulogic;
	--
		-- 8-bit result
		Dout_0   : out F_VECTOR;
		-- 8-bit carry output
		Dout_1   : out F_VECTOR;
		-- 16-bit result
		Dout_2   : out F_VECTOR;
		-- 16-bit carry output
		Dout_3   : out F_VECTOR;
		-- 32-bit result
		Dout_4   : out F_VECTOR;
		-- 32-bit carry output
		Dout_5   : out F_VECTOR;
		-- 64-bit result
		Dout_6   : out F_VECTOR;
		-- 64-bit carry output
		Dout_7   : out F_VECTOR
	);
end EU_IMU;

architecture Struct_1 of EU_IMU is
	component IMul64
		generic (
			PIPE_AFTER : natural := 0;	-- gates per stage; 0 means no pipelining
			PIPE_DELAY : natural := 0	-- additional delay before 1st stage
		);
		port (
			A : in std_ulogic_vector(63 downto 0);
			B : in std_ulogic_vector(63 downto 0);
			X : in std_ulogic_vector(63 downto 0);
			SignedMode : in std_ulogic;
			Mac : in std_ulogic_vector(2 downto 0);
			U : in std_ulogic_vector(2 downto 0);
			Clk : in std_ulogic;
			Rst : in std_ulogic;
			En : in std_ulogic;
		--
			Y08l : out std_ulogic_vector(63 downto 0);	-- d=18 (3 stages)
			Y08h : out std_ulogic_vector(63 downto 0);	-- d=20 (4 stages)
			Y16l : out std_ulogic_vector(63 downto 0);	-- d=24 (4 stages)
			Y16h : out std_ulogic_vector(63 downto 0);	-- d=26 (5 stages)
			Y32l : out std_ulogic_vector(63 downto 0);	-- d=28 (5 stages)
			Y32h : out std_ulogic_vector(63 downto 0);	-- d=28 (5 stages)
			Y64l : out std_ulogic_vector(63 downto 0);	-- d=32 (6 stages)
			Y64h : out std_ulogic_vector(63 downto 0)	-- d=34 (6 stages)
		);
	end component;

	-- internal unit width (only 64 is supported right now)
	constant w : natural := MAX_CHUNK_SIZE;

	signal Mac : std_ulogic_vector(2 downto 0);
begin
--pragma synthesis_off
	assert w = 64
		report "MAX_CHUNK_SIZE must be 64"
		severity failure;
--pragma synthesis_on

	-- helper signal
	Mac <= (2 => MacAlt, 1 => MacHi, 0 => MacLo);

	-- many multipliers in a row...
	instantiate : for i in UMAX/w-1 downto 0 generate
		core : IMul64
			generic map (
				-- standard timing: 6 gates per stage
				PIPE_AFTER => 6, PIPE_DELAY => 0
			)
			port map (
				A => Din_0(w*i+w-1 downto w*i),
				B => Din_1(w*i+w-1 downto w*i),
				X => Din_2(w*i+w-1 downto w*i),
				SignedMode => Flags(12),	-- as defined in the manual
				Mac => Mac,
				U => Size(2 downto 0),
				Clk => Clk,
				Rst => Rst,
				En => En,
				Y08l => Dout_0(w*i+w-1 downto w*i),
				Y08h => Dout_1(w*i+w-1 downto w*i),
				Y16l => Dout_2(w*i+w-1 downto w*i),
				Y16h => Dout_3(w*i+w-1 downto w*i),
				Y32l => Dout_4(w*i+w-1 downto w*i),
				Y32h => Dout_5(w*i+w-1 downto w*i),
				Y64l => Dout_6(w*i+w-1 downto w*i),
				Y64h => Dout_7(w*i+w-1 downto w*i)
			);
	end generate;
end Struct_1;

-- vi: set ts=4 sw=4 equalprg="fmt -72 -p--": please
