-- generic_adder.vhdl -- Generic Adders
-- Copyright (C) 2000 - 2003 Michael Riepe <michael@stud.uni-hannover.de>
--
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

-- @(#) $Id: generic_adder.vhdl,v 1.11 2003/04/12 15:09:02 michael Exp $

library IEEE;
use IEEE.std_logic_1164.all;

package Generic_Adder is
	-- add without carry
	function add (A, B : in std_ulogic_vector) return std_ulogic_vector;
	-- add with carry-in
	function add (A, B : in std_ulogic_vector;
				  Ci : in std_ulogic) return std_ulogic_vector;
	-- add with carry in/out
	procedure add (A, B : in std_ulogic_vector;
				   Ci : in std_ulogic;
				   Y : out std_ulogic_vector;
				   Co : out std_ulogic);

	-- carry-select adder
	procedure CSAdd (A, B : in std_ulogic_vector;
				     Y, Z : out std_ulogic_vector;
				     G, P : out std_ulogic);

	-- low-level routines

	-- carry look-ahead
	procedure CLA (Gi, Pi : in std_ulogic_vector;
				   Go, Po : out std_ulogic_vector);
	-- carry select vectors
	procedure CSV (G, P : in std_ulogic_vector;
				   S, T : out std_ulogic_vector);
	-- carry increment vectors
	procedure CIV (G, P : in std_ulogic_vector;
				   S, I : out std_ulogic_vector);

	-- sequential CIA routines (deprecated)
	procedure CIA_Core (Gi, Pi : in std_ulogic_vector(3 downto 0);
						Cv, Iv : out std_ulogic_vector(3 downto 0);
						Go, Po : out std_ulogic);
	procedure CIA_Row (Gi, Pi : in std_ulogic_vector;
					   Cv, Iv, Go, Po : out std_ulogic_vector);
	procedure CIA_Inc (Yi, Ci, Cv, Iv : in std_ulogic_vector;
					   Yo, Co : out std_ulogic_vector;
					   step : in natural);
	procedure CIAdd (A, B : in std_ulogic_vector;
				     Y, C : out std_ulogic_vector;
				     G, P : out std_ulogic);

	-- concurrent CIA routines (deprecated)
	procedure S_CIA_Core (Gi, Pi : in std_ulogic_vector(3 downto 0);
						  signal Cv, Iv : out std_ulogic_vector(3 downto 0);
						  signal Go, Po : out std_ulogic);
	procedure S_CIA_Row (Gi, Pi : in std_ulogic_vector;
						 signal Cv, Iv, Go, Po : out std_ulogic_vector);
	procedure S_CIA_Inc (Yi, Ci, Cv, Iv : in std_ulogic_vector;
						 signal Yo, Co : out std_ulogic_vector;
						 step : in natural);
	procedure S_CIAdd (A, B : in std_ulogic_vector;
					   signal Y, C : out std_ulogic_vector;
					   signal G, P : out std_ulogic);
end Generic_Adder;

package body Generic_Adder is
	-- add without carry
	function add (A, B : in std_ulogic_vector) return std_ulogic_vector is
		constant w : natural := A'length;
		variable aa : std_ulogic_vector(w-1 downto 0);
		variable bb : std_ulogic_vector(w-1 downto 0);
		variable yy, zz : std_ulogic_vector(w-1 downto 0);
		variable gg, pp : std_ulogic;
	begin
--pragma synthesis_off
		assert B'length = w;
--pragma synthesis_on
		aa := A;
		bb := B;
		CSAdd(aa, bb, yy, zz, gg, pp);
		return yy;
	end add;

	-- add with carry-in
	function add (A, B : in std_ulogic_vector;
				  Ci : in std_ulogic) return std_ulogic_vector is
		constant w : natural := A'length;
		variable aa : std_ulogic_vector(w-1 downto 0);
		variable bb : std_ulogic_vector(w-1 downto 0);
		variable yy, zz : std_ulogic_vector(w-1 downto 0);
		variable gg, pp : std_ulogic;
	begin
--pragma synthesis_off
		assert B'length = w;
--pragma synthesis_on
		aa := A;
		bb := B;
		CSAdd(aa, bb, yy, zz, gg, pp);
		if to_X01(Ci) = '1' then
			yy := zz;
		end if;
		return yy;
	end add;

	-- add with carry-in/out
	procedure add (A, B : in std_ulogic_vector;
				   Ci : in std_ulogic;
				   Y : out std_ulogic_vector;
				   Co : out std_ulogic) is
		constant w : natural := Y'length;
		variable aa : std_ulogic_vector(w-1 downto 0);
		variable bb : std_ulogic_vector(w-1 downto 0);
		variable yy, zz : std_ulogic_vector(w-1 downto 0);
		variable gg, pp : std_ulogic;
	begin
--pragma synthesis_off
		assert A'length = w;
		assert B'length = w;
--pragma synthesis_on
		aa := A;
		bb := B;
		CSAdd(aa, bb, yy, zz, gg, pp);
		if to_X01(Ci) = '1' then
			Y := zz;
		else
			Y := yy;
		end if;
		Co := gg or (pp and Ci);
	end add;

	-- carry look-ahead
	-- d=1-2
	procedure CLA (Gi, Pi : in std_ulogic_vector;
				   Go, Po : out std_ulogic_vector) is
		constant L : natural := Gi'length;
		constant L4 : natural := (L + 3) / 4;
		variable gg, pp : std_ulogic_vector(4*L4-1 downto 0);
		variable og, op : std_ulogic_vector(L4-1 downto 0);
	begin
--pragma synthesis_off
		assert Gi'length = L;
		assert Pi'length = L;
		assert Go'length = L4;
		assert Po'length = L4;
--pragma synthesis_on
		gg := (others => '0');
		pp := (others => '1');
		gg(L-1 downto 0) := Gi;
		pp(L-1 downto 0) := Pi;
		for i in L4-1 downto 0 loop
			-- d=2
			og(i) := gg(4*i+3)
				or (pp(4*i+3) and gg(4*i+2))
				or (pp(4*i+3) and pp(4*i+2) and gg(4*i+1))
				or (pp(4*i+3) and pp(4*i+2) and pp(4*i+1) and gg(4*i+0));
			-- d=1
			op(i) := pp(4*i+3) and pp(4*i+2) and pp(4*i+1) and pp(4*i+0);
		end loop;
		Go := og;
		Po := op;
	end CLA;

	-- carry select vectors
	-- d=2
	procedure CSV (G, P : in std_ulogic_vector;
				   S, T : out std_ulogic_vector) is
		constant L : natural := G'length;
		constant L4 : natural := (L + 3) / 4;
		variable gg, pp, ss, tt : std_ulogic_vector(4*L4-1 downto 0);
	begin
--pragma synthesis_off
		assert G'length = L;
		assert P'length = L;
		assert S'length = L;
		assert T'length = L;
--pragma synthesis_on
		gg := (others => 'U');
		pp := (others => 'U');
		gg(L-1 downto 0) := G;
		pp(L-1 downto 0) := P;
		for i in L4-1 downto 0 loop
			-- d=2
			ss(4*i+0) := '0';
			ss(4*i+1) := gg(4*i+0);
			ss(4*i+2) := gg(4*i+1)
				or (pp(4*i+1) and gg(4*i+0));
			ss(4*i+3) := gg(4*i+2)
				or (pp(4*i+2) and gg(4*i+1))
				or (pp(4*i+2) and pp(4*i+1) and gg(4*i+0));
			-- d=2
			tt(4*i+0) := '1';
			tt(4*i+1) := gg(4*i+0)
				or pp(4*i+0);
			tt(4*i+2) := gg(4*i+1)
				or (pp(4*i+1) and gg(4*i+0))
				or (pp(4*i+1) and pp(4*i+0));
			tt(4*i+3) := gg(4*i+2)
				or (pp(4*i+2) and gg(4*i+1))
				or (pp(4*i+2) and pp(4*i+1) and gg(4*i+0))
				or (pp(4*i+2) and pp(4*i+1) and pp(4*i+0));
		end loop;
		S := ss(L-1 downto 0);
		T := tt(L-1 downto 0);
	end CSV;

	-- carry increment vectors
	-- d=1-2
	procedure CIV (G, P : in std_ulogic_vector;
				   S, I : out std_ulogic_vector) is
		constant L : natural := G'length;
		constant L4 : natural := (L + 3) / 4;
		variable gg, pp, ss, ii : std_ulogic_vector(4*L4-1 downto 0);
	begin
--pragma synthesis_off
		assert G'length = L;
		assert P'length = L;
		assert S'length = L;
		assert I'length = L;
--pragma synthesis_on
		gg := (others => 'U');
		pp := (others => 'U');
		gg(L-1 downto 0) := G;
		pp(L-1 downto 0) := P;
		for j in L4-1 downto 0 loop
			-- d=2
			ss(4*j+0) := '0';
			ss(4*j+1) := gg(4*j+0);
			ss(4*j+2) := gg(4*j+1)
				or (pp(4*j+1) and gg(4*j+0));
			ss(4*j+3) := gg(4*j+2)
				or (pp(4*j+2) and gg(4*j+1))
				or (pp(4*j+2) and pp(4*j+1) and gg(4*j+0));
			-- d=1
			ii(4*j+0) := '1';
			ii(4*j+1) := pp(4*j+0);
			ii(4*j+2) := pp(4*j+1) and pp(4*j+0);
			ii(4*j+3) := pp(4*j+2) and pp(4*j+1) and pp(4*j+0);
		end loop;
		S := ss(L-1 downto 0);
		I := ii(L-1 downto 0);
	end CIV;

	-- carry-select adder
	-- delay (without pipelining):
	-- L <=    4: d= 4  t= 6
	-- L <=    8: d= 5  t= 7
	-- L <=   16: d= 6  t= 7
	-- L <=   32: d= 7  t= 8
	-- L <=   64: d= 8  t= 9
	-- L <=  128: d= 9  t=10
	-- L <=  256: d=10  t=11
	-- L <=  512: d=11  t=12
	-- L <= 1024: d=12  t=13
	procedure CSAdd (A, B : in std_ulogic_vector;
				     Y, Z : out std_ulogic_vector;
				     G, P : out std_ulogic) is
		constant L : natural := A'length;
		variable aa, bb : std_ulogic_vector(L-1 downto 0);
		variable ym, zm : std_ulogic_vector(L-1 downto 0);
		variable gm, pm : std_ulogic_vector(L-1 downto 0);
		variable yt, zt : std_ulogic_vector(L-1 downto 0);
		variable gt, pt : std_ulogic_vector(L-1 downto 0);
		variable sv, tv : std_ulogic_vector(L-1 downto 0);
		variable step, left, right : natural;
	begin
--pragma synthesis_off
		assert A'length = L;
		assert B'length = L;
		assert Y'length = L;
		assert Z'length = L;
--pragma synthesis_on

		-- normalize inputs
		aa := A;
		bb := B;

		-- a row of 4-bit adders
		-- d=1 t=2
		gm := aa and bb;
		pm := aa xor bb;
		-- d=3 t=4
		CSV(gm, pm, sv, tv);
		-- d=4 t=6
		ym := pm xor sv;
		zm := pm xor tv;
		-- d=3 t=4
		gt := gm; pt := pm;
		CLA(gt, pt, gm((L-1)/4 downto 0), pm((L-1)/4 downto 0));

		-- carry-select tree
		for level in 1 to 15 loop	-- should be enough...
			step := 4 ** level;
			exit when step >= L;
			left := (L - 1) / step;

			-- single tree level
			-- d=5/7/9/11/...
			-- t=6/8/10/12/...
			CSV(gm(left downto 0), pm(left downto 0),
				sv(left downto 0), tv(left downto 0));
			gt := gm; pt := pm;
			CLA(gt(left downto 0), pt(left downto 0),
				gm(left/4 downto 0), pm(left/4 downto 0));

			-- level mux
			-- d=6/8/10/12/...
			-- t=7/9/11/13/...
			yt := ym; zt := zm;
			for i in L/step-1 downto 0 loop
				if to_X01(sv(i)) = '1' then
					ym(step*(i+1)-1 downto step*i) :=
						zt(step*(i+1)-1 downto step*i);
				end if;
				if to_X01(tv(i)) /= '1' then
					zm(step*(i+1)-1 downto step*i) :=
						yt(step*(i+1)-1 downto step*i);
				end if;
			end loop;

			-- last (partial) chunk
			if L mod step /= 0 then
				if to_X01(sv(L/step)) = '1' then
					ym(L-1 downto L - L mod step) :=
						zt(L-1 downto L - L mod step);
				end if;
				if to_X01(tv(L/step)) /= '1' then
					zm(L-1 downto L - L mod step) :=
						yt(L-1 downto L - L mod step);
				end if;
			end if;
		end loop;

		-- outputs
		Y := ym;
		Z := zm;
		G := gm(0);
		P := pm(0);
	end CSAdd;

	procedure CIA_Core (Gi, Pi : in std_ulogic_vector(3 downto 0);
						Cv, Iv : out std_ulogic_vector(3 downto 0);
						Go, Po : out std_ulogic) is
		variable tg, tp : std_ulogic_vector(0 downto 0);
	begin
		CLA(Gi, Pi, tg, tp);
		CIV(Gi, Pi, Cv, Iv);
		Go := tg(0);
		Po := tp(0);
	end CIA_Core;

	procedure CIA_Row (Gi, Pi : in std_ulogic_vector;
					   Cv, Iv, Go, Po : out std_ulogic_vector) is
	begin
		CLA(Gi, Pi, Go, Po);
		CIV(Gi, Pi, Cv, Iv);
	end CIA_Row;

	procedure CIA_Inc (Yi, Ci, Cv, Iv : in std_ulogic_vector;
					   Yo, Co : out std_ulogic_vector;
					   step : in natural) is
		constant WIDTH : natural := Yi'length;
		constant SMALL : natural := (WIDTH-1)/step + 1;
		variable yy, cc : std_ulogic_vector(WIDTH-1 downto 0);
		variable c_v, i_v : std_ulogic_vector(SMALL-1 downto 0);
	begin
--pragma synthesis_off
		assert Yi'length = WIDTH;
		assert Ci'length = WIDTH;
		assert Cv'length = SMALL;
		assert Iv'length = SMALL;
--pragma synthesis_on

		-- normalize inputs
		yy := Yi;
		cc := Ci;
		c_v := Cv;
		i_v := Iv;

		for i in yy'range loop
			yy(i) := yy(i) xor (cc(i) and c_v(i/step));
			cc(i) := cc(i) and i_v(i/step);
		end loop;

		-- outputs
		Yo := yy;
		Co := cc;
	end CIA_Inc;

	-- delay:
	--   1 ..   4 bit: d=4
	--   5 ..   8 bit: d=5
	--   9 ..  32 bit: d=7
	--  33 .. 128 bit: d=9
	-- 129 .. 512 bit: d=11
	procedure CIAdd (A, B : in std_ulogic_vector;
				     Y, C : out std_ulogic_vector;
				     G, P : out std_ulogic) is
		constant WIDTH : natural := A'length;
		variable aa, bb : std_ulogic_vector(WIDTH-1 downto 0);
		variable ym, cm : std_ulogic_vector(WIDTH-1 downto 0);
		variable gm, pm : std_ulogic_vector(WIDTH-1 downto 0);
		variable yt, ct : std_ulogic_vector(WIDTH-1 downto 0);
		variable gt, pt : std_ulogic_vector(WIDTH-1 downto 0);
		variable cv, iv : std_ulogic_vector(WIDTH-1 downto 0);
		variable step, left : integer;
	begin
--pragma synthesis_off
		assert A'length = WIDTH;
		assert B'length = WIDTH;
		assert Y'length = WIDTH;
		assert C'length = WIDTH;
--pragma synthesis_on

		-- normalize inputs
		aa := A;
		bb := B;

		-- a row of 4-bit adders
		gm := aa and bb;				-- d=1
		pm := aa xor bb;				-- d=1
		ym := pm;
		gt := gm;
		pt := pm;
		CIA_Row(
			gt, pt, cv, iv,
			gm((WIDTH-1)/4 downto 0),	-- d=3
			pm((WIDTH-1)/4 downto 0)	-- d=2
		);
		cm := iv;						-- d=2
		ym := ym xor cv;				-- d=4

		-- carry-increment tree
		for level in 1 to 15 loop	-- should be enough...
			step := 4 ** level;
			exit when step >= WIDTH;
			left := (WIDTH - 1) / step;

			-- single level of carry-increment tree
			gt := gm;
			pt := pm;
			CIA_Row(					-- b <= 8  16  32  64 128 256 512
				gt(left downto 0),		-- d =  3   3   5   5   7   7   9
				pt(left downto 0),		-- d =  2   2   3   3   4   4   5
			--
				cv(left downto 0),		-- d =  3   5   5   7   7   9   9
				iv(left downto 0),		-- d =  2   3   3   4   4   5   5
				gm(left/4 downto 0),	-- d =  5   5   7   7   9   9  11
				pm(left/4 downto 0)		-- d =  3   3   4   4   5   5   6
			);

			-- intermediate result
			yt := ym;
			ct := cm;
			CIA_Inc(					-- b <= 8  16  32  64 128 256 512
				yt(WIDTH-1 downto 0),	-- d =  4   4   7   7   9   9  11
				ct(WIDTH-1 downto 0),	-- d =  2   2   4   4   5   5   6
				cv(left downto 0),		-- d =  3   5   5   7   7   9   9
				iv(left downto 0),		-- d =  2   3   3   4   4   5   5
			--
				ym(WIDTH-1 downto 0),	-- d =  5   7   8   9  10  11  12
				cm(WIDTH-1 downto 0),	-- d =  3   4   5   5   6   6   7
				step
			);
		end loop;

		-- outputs
		Y := ym;
		C := cm;
		G := gm(0);
		P := pm(0);
	end CIAdd;

	procedure S_CIA_Core (Gi, Pi : in std_ulogic_vector(3 downto 0);
						  signal Cv, Iv : out std_ulogic_vector(3 downto 0);
						  signal Go, Po : out std_ulogic) is
		variable c_v, i_v : std_ulogic_vector(3 downto 0);
		variable g_o, p_o : std_ulogic;
	begin
		CIA_Core(Gi, Pi, c_v, i_v, g_o, p_o);
		Cv <= c_v;
		Iv <= i_v;
		Go <= g_o;
		Po <= p_o;
	end S_CIA_Core;

	procedure S_CIA_Row (Gi, Pi : in std_ulogic_vector;
						 signal Cv, Iv, Go, Po : out std_ulogic_vector) is
		variable c_v : std_ulogic_vector(Cv'length-1 downto 0);
		variable i_v : std_ulogic_vector(Iv'length-1 downto 0);
		variable g_o : std_ulogic_vector(Go'length-1 downto 0);
		variable p_o : std_ulogic_vector(Po'length-1 downto 0);
	begin
		CIA_Row(Gi, Pi, c_v, i_v, g_o, p_o);
		Cv <= c_v;
		Iv <= i_v;
		Go <= g_o;
		Po <= p_o;
	end S_CIA_Row;

	procedure S_CIA_Inc (Yi, Ci, Cv, Iv : in std_ulogic_vector;
						 signal Yo, Co : out std_ulogic_vector;
						 step : in natural) is
		variable y_o : std_ulogic_vector(Yo'length-1 downto 0);
		variable c_o : std_ulogic_vector(Co'length-1 downto 0);
	begin
		CIA_Inc(Yi, Ci, Cv, Iv, y_o, c_o, step);
		Yo <= y_o;
		Co <= c_o;
	end S_CIA_Inc;

	procedure S_CIAdd (A, B : in std_ulogic_vector;
					   signal Y, C : out std_ulogic_vector;
					   signal G, P : out std_ulogic) is
		variable y_o : std_ulogic_vector(Y'length-1 downto 0);
		variable c_o : std_ulogic_vector(C'length-1 downto 0);
		variable g_o, p_o : std_ulogic;
	begin
		CIAdd(A, B, y_o, c_o, g_o, p_o);
		Y <= y_o;
		C <= c_o;
		G <= g_o;
		P <= p_o;
	end S_CIAdd;
end Generic_Adder;

-- vi: set ts=4 sw=4 equalprg="fmt -72 -p--": please
