-- inc64.vhdl -- F-CPU 64-bit Increment Unit
-- Copyright (C) 2002, 2003 Michael Riepe <michael@stud.uni-hannover.de>
--
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

-- @(#) $Id: inc64.vhdl,v 1.9 2003/04/17 16:00:56 michael Exp $

library IEEE;
use IEEE.std_logic_1164.all;

use work.Bit_Manipulation.all;

entity Inc64 is
	generic (
		WIDTH : natural := 64
	);
	port (
		-- operand inputs
		A : in std_ulogic_vector(WIDTH-1 downto 0);
		-- mode flags
		Inc : in std_ulogic;
		Dec : in std_ulogic;
		Neg : in std_ulogic;
		Xabs : in std_ulogic;	-- `Abs' is reserved in VHDL
		Lsb1 : in std_ulogic;
		Lsb0 : in std_ulogic;
		-- SIMD mode switches
		U : in std_ulogic_vector(2 downto 0);
		-- clock/reset inputs
		Clk : in std_ulogic;
		Rst : in std_ulogic;
		En : in std_ulogic;
	--
		-- inc/dec/neg output (stage 1)
		X : out std_ulogic_vector(WIDTH-1 downto 0);
		-- abs/lsb output (stage 2)
		Y : out std_ulogic_vector(WIDTH-1 downto 0);
		-- nabs/mask output (stage 2)
		Z : out std_ulogic_vector(WIDTH-1 downto 0)
	);
--pragma synthesis_off
begin
	assert WIDTH >= 64
		report "width of Inc64 must be at least 64"
		severity failure;
	assert WIDTH mod 64 = 0
		report "width of Inc64 must be an integer multiple of 64"
		severity failure;
--pragma synthesis_on
end Inc64;

-- Operating modes:
--
--  Mode | Y           | Z
--  =====#=============#============
--  Inc  | A + 1   (*) | unspec.
--  Dec  | A - 1   (*) | unspec.
--  Neg  | -A      (*) | unspec.
--  Xabs | abs(A)      | -abs(A)
--  Lsb1 | findlsb(A)  | masklsb(A)
--  Lsb0 | findlsb(~A) | masklsb(~A)
--
--  (*) also available one cycle earlier, at the X output
--
--  `findlsb(x)' returns 1 plus the index of the least significant
--  `1' bit in `x' while `masklsb(x)' returns a bit mask that selects
--  that bit.  If no bit is found, both return 0.

architecture Behave_1 of Inc64 is
	signal r_A : std_ulogic_vector(WIDTH-1 downto 0);
	signal r_B : std_ulogic_vector(WIDTH-1 downto 0);
	signal r_Y : std_ulogic_vector(WIDTH-1 downto 0);
	signal M, r_M : std_ulogic_vector(8 downto 0);

	constant Mode_Inc  : natural := 3;
	constant Mode_Dec  : natural := 4;
	constant Mode_Neg  : natural := 5;
	constant Mode_Abs  : natural := 6;
	constant Mode_Lsb1 : natural := 7;
	constant Mode_Lsb0 : natural := 8;
begin
	M <= (
		0 => U(0),
		1 => U(1),
		2 => U(2),
		Mode_Inc => Inc,
		Mode_Dec => Dec,
		Mode_Neg => Neg,
		Mode_Abs => Xabs,
		Mode_Lsb1 => Lsb1,
		Mode_Lsb0 => Lsb0
	);

	stage_1 : process (A, M, Clk, Rst, En)
		-- single stage of work.Bit_Manipulation.cascade_and
		function tree_layer (A : in std_ulogic_vector;
							 N, M : in natural) return std_ulogic_vector is
			constant L : natural := A'length;
			variable aa : std_ulogic_vector(L-1 downto 0);
			variable yy : std_ulogic_vector(L-1 downto 0);
			variable j : natural;
		begin
--pragma synthesis_off
			assert L mod 64 = 0;
			assert L >= 64;
			assert N >= 1;
			assert (M = 2) or (M = 4);
--pragma synthesis_on
			aa := A;
			for i in L-1 downto 0 loop
				j := i - i mod (M * N) + N - 1;
				case (i / N) mod M is
					when 3 =>
						yy(i) := aa(i) and aa(j) and aa(j+N) and aa(j+2*N);
					when 2 =>
						yy(i) := aa(i) and aa(j) and aa(j+N);
					when 1 =>
						yy(i) := aa(i) and aa(j);
					when others =>
						yy(i) := aa(i);
				end case;
			end loop;
			return yy;
		end tree_layer;

		-- build increment vector
		function ivec (A : in std_ulogic_vector;
					   N : in natural) return std_ulogic_vector is
			constant L : natural := A'length;
			variable aa : std_ulogic_vector(L-1 downto 0);
			variable yy : std_ulogic_vector(L-1 downto 0);
		begin
--pragma synthesis_off
			assert N > 1;
			assert L mod N = 0;
--pragma synthesis_on
			aa := A;
			for i in L-1 downto 0 loop
				if i mod N = 0 then
					yy(i) := '1';
				else
					yy(i) := aa(i-1);
				end if;
			end loop;
			return yy;
		end ivec;

		variable aa : std_ulogic_vector(WIDTH-1 downto 0);
		variable na : std_ulogic_vector(WIDTH-1 downto 0);
		variable bb : std_ulogic_vector(WIDTH-1 downto 0);
		variable cc : std_ulogic_vector(WIDTH-1 downto 0);
		variable yy : std_ulogic_vector(WIDTH-1 downto 0);
		variable mm : std_ulogic_vector(M'length-1 downto 0);
		variable x04 : std_ulogic_vector(WIDTH-1 downto 0);
		variable x08 : std_ulogic_vector(WIDTH-1 downto 0);
		variable x16 : std_ulogic_vector(WIDTH-1 downto 0);
		variable x32 : std_ulogic_vector(WIDTH-1 downto 0);
		variable x64 : std_ulogic_vector(WIDTH-1 downto 0);
	begin
		aa := A;
		mm := to_X01(M);

		-- d=1
		na := not aa;

		-- d=2
		if (mm(Mode_Inc) = '1') or (mm(Mode_Dec) = '1') then
			yy := aa;
		else
			yy := na;
		end if;

		-- d=2
		if (mm(Mode_Inc) = '1') or (mm(Mode_Lsb0) = '1') then
			x04 := aa;
		else
			x04 := na;
		end if;

		-- d=3
		x04 := tree_layer(x04, 1, 4);
		-- d=4
		x08 := tree_layer(x04, 4, 2);
		x16 := tree_layer(x04, 4, 4);
		-- d=5
		x32 := tree_layer(x16, 16, 2);
		x64 := tree_layer(x16, 16, 4);

		-- d=6
		case mm(2 downto 0) is
			when "111" =>
				bb := x64;
				cc := ivec(x64, 64);
			when "011" =>
				bb := x32;
				cc := ivec(x32, 32);
			when "001" =>
				bb := x16;
				cc := ivec(x16, 16);
			when "000" =>
				bb := x08;
				cc := ivec(x08, 8);
			when others =>
				bb := (others => 'X');
				cc := (others => 'X');
		end case;

		-- Note: I violate the 6 Gate Rule here (but not the
		-- 10 Transistor Rule, so everything should be ok).

		-- d=7
		yy := yy xor cc;
		bb := bb xor cc;

		-- d=7
		-- tap output for inc/dec/neg
		X <= yy;

		-- d=7
		if to_X01(Rst) = '1' then
			r_A <= (others => '0');
			r_B <= (others => '0');
			r_Y <= (others => '0');
			r_M <= (others => '0');
		elsif rising_edge(Clk) then
			if to_X01(En) = '1' then
				r_A <= aa;
				r_B <= bb;
				r_Y <= yy;
				r_M <= M;
			end if;
		end if;
	end process;

	stage_2 : process (r_A, r_B, r_Y, r_M)
		function bit_encode (A : in std_ulogic_vector;
							 M : in natural) return std_ulogic_vector is
			constant L : natural := A'length;
			constant N : natural := 2 ** M;
			variable aa : std_ulogic_vector(L-1 downto 0);
			variable yy : std_ulogic_vector(L-1 downto 0);
			variable xx : std_ulogic_vector(N-1 downto 0);
			variable tt : std_ulogic_vector(N/2-1 downto 0);
			variable k : natural;
		begin
--pragma synthesis_off
			assert L mod 8 = 0;
			assert L mod N = 0;
--pragma synthesis_on
			aa := A;
			yy := (others => '0');
			for c in L/N-1 downto 0 loop
				xx := aa(N*c+N-1 downto N*c);
				yy(c*N+M) := xx(N-1);
				for i in M-1 downto 0 loop
					k := 2 ** i;
					for j in 0 to N/(2*k)-1 loop
						tt(k*(j+1)-1 downto k*j)
							:= xx(2*k*(j+1)-2 downto 2*k*(j+1)-k-1);
					end loop;
					yy(c*N+i) := reduce_or(tt);
				end loop;
			end loop;
			return yy;
		end bit_encode;

		variable aa : std_ulogic_vector(WIDTH-1 downto 0);
		variable bb : std_ulogic_vector(WIDTH-1 downto 0);
		variable cc : std_ulogic_vector(WIDTH-1 downto 0);
		variable yy : std_ulogic_vector(WIDTH-1 downto 0);
		variable zz : std_ulogic_vector(WIDTH-1 downto 0);
		variable mm : std_ulogic_vector(M'length-1 downto 0);
		variable absel : std_ulogic_vector(WIDTH/8-1 downto 0);
	begin
		aa := r_A;
		bb := r_B;
		yy := r_Y;
		mm := to_X01(r_M);

		-- d=1-4 (absel: 1, cc: 4)
		case mm(2 downto 0) is
			when "111" =>
				absel := bit_duplicate(bit_extract(aa, 64, 63), 8);
				cc := bit_encode(bb, 6);
			when "011" =>
				absel := bit_duplicate(bit_extract(aa, 32, 31), 4);
				cc := bit_encode(bb, 5);
			when "001" =>
				absel := bit_duplicate(bit_extract(aa, 16, 15), 2);
				cc := bit_encode(bb, 4);
			when "000" =>
				absel := bit_duplicate(bit_extract(aa,  8,  7), 1);
				cc := bit_encode(bb, 3);
			when others =>
				absel := (others => 'X');
				cc := (others => 'X');
		end case;

		-- output selector
		-- d=5
		case mm(Mode_Lsb0 downto Mode_Abs) is
			when "100" | "010" => -- lsb0/lsb1
				yy := cc;
				zz := bb;
			when "001" => -- nabs/abs
				zz := yy;
				for i in WIDTH/8-1 downto 0 loop
					if to_X01(absel(i)) = '1' then
						zz(8*i+7 downto 8*i) := aa(8*i+7 downto 8*i);
					else
						yy(8*i+7 downto 8*i) := aa(8*i+7 downto 8*i);
					end if;
				end loop;
			when others => -- return the same value as in stage 1
				zz := (others => 'X');
		end case;

		-- output
		Y <= yy;
		Z <= zz;
	end process;
end Behave_1;

-- vi: set ts=4 sw=4 equalprg="fmt -72 -p--": please
