-- shuffle.vhdl -- bit shuffling unit
-- Copyright (C) 2001 Michael Riepe <michael@stud.uni-hannover.de>
--
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

-- $Id$

library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.numeric_std.all;

entity Shuffle is
	generic (
		WIDTH : natural := 64
	);
	port (
		A : in std_ulogic_vector(WIDTH-1 downto 0);
		B : in std_ulogic_vector(WIDTH-1 downto 0);
		ShiftL : in std_ulogic := '0';
		ShiftR : in std_ulogic := '0';
		ShiftRA : in std_ulogic := '0';
		RotL : in std_ulogic := '0';
		RotR : in std_ulogic := '0';
		Bitrev : in std_ulogic := '0';
		Byterev : in std_ulogic := '0';
		Sdup : in std_ulogic := '0';
		U08, U16, U32 : in std_ulogic := '0';
	--
		Y : out std_ulogic_vector(WIDTH-1 downto 0)
	);
begin
	assert WIDTH = 64
		report "WIDTH must be 64"
		severity failure;
end Shuffle;

architecture Behave_1 of Shuffle is
	component Shl_In
		generic (
			WIDTH : natural := 8	-- do not change
		);
		port (
			A : in std_ulogic_vector(WIDTH-1 downto 0);
			B : in std_ulogic_vector(2 downto 0);
			Left : in std_ulogic := '0';
			Right : in std_ulogic := '0';
			Extend : in std_ulogic := '0';
			Normal : in std_ulogic := '0';
			Reverse : in std_ulogic := '0';
		--
			Y : out std_ulogic_vector(2*WIDTH-1 downto 0)
		);
	end component;

	signal X1 : std_ulogic_vector(2*WIDTH-1 downto 0);
	signal mode : std_ulogic_vector(10 downto 0);
begin
	input : block
		signal Left, Right, Normal, Reverse : std_ulogic;
		signal Extend : std_ulogic_vector(7 downto 0);
	begin
		Left <= ShiftL or RotL;
		Right <= ShiftR or ShiftRA or RotR;
		Normal <= Byterev or Sdup;
		Reverse <= Bitrev;
		Extend <= (
			7 => ShiftRA,
			3 => ShiftRA and U16 and not U32,
			5|1 => ShiftRA and U08 and not U16,
			others => ShiftRA and not U08
		);

		input_matrix : for i in WIDTH/8-1 downto 0 generate
			mat : Shl_In
				generic map (WIDTH => 8)
				port map (
					A => A(8*i+7 downto 8*i),
					-- TODO: is B a SIMD operand???
					B => B(2 downto 0),
					Left => Left,
					Right => Right,
					Extend => Extend(i),
					Normal => Normal,
					Reverse => Reverse,
					Y => X1(16*i+15 downto 16*i)
				);
		end generate;
	end block;

	mode <= (
		0 => U08,
		1 => U16,
		2 => U32,
		3 => ShiftL,
		4 => ShiftR,
		5 => ShiftRA,
		6 => RotL,
		7 => RotR,
		8 => Bitrev,
		9 => Byterev,
		10 => Sdup
	);

	output : process (X1, B, mode)
		variable matrix : std_ulogic_vector(8*16-1 downto 0);
		variable b2 : std_ulogic_vector(2 downto 0);
		variable sbytes, rbytes : std_ulogic_vector(7 downto 0);
		variable x : std_ulogic_vector(WIDTH-1 downto 0) := (others => '0');
	begin
		-- 3-to-8 decoders
		b2 := (0 => B(3) and U08, 1 => B(4) and U16, 2 => B(5) and U32);
		sbytes := (
			0 => not b2(2) and not b2(1) and not b2(0),
			1 => not b2(2) and not b2(1) and     b2(0),
			2 => not b2(2) and     b2(1) and not b2(0),
			3 => not b2(2) and     b2(1) and     b2(0),
			4 =>     b2(2) and not b2(1) and not b2(0),
			5 =>     b2(2) and not b2(1) and     b2(0),
			6 =>     b2(2) and     b2(1) and not b2(0),
			7 =>     b2(2) and     b2(1) and     b2(0)
		);
		rbytes := (
			0 => (not B(5) and not B(4) and not B(3))
			  or (not U32  and not B(4) and not B(3))
			  or (not U16               and not B(3))
			  or (not U08),
			1 => (not B(5) and not B(4) and     B(3))
			  or (not U32  and not B(4) and     B(3))
			  or (not U16               and     B(3)),
			2 => (not B(5) and     B(4) and not B(3))
			  or (not U32  and     B(4) and not B(3)),
			3 => (not B(5) and     B(4) and     B(3))
			  or (not U32  and     B(4) and     B(3)),
			4 => (    B(5) and not B(4) and not B(3))
			  or (not U32  and not B(4) and not B(3)),
			5 => (    B(5) and not B(4) and     B(3))
			  or (not U32  and not B(4) and     B(3)),
			6 => (    B(5) and     B(4) and not B(3))
			  or (not U32  and     B(4) and not B(3))
			  or (not U16               and not B(3)),
			7 => (    B(5) and     B(4) and     B(3))
			  or (not U32  and     B(4) and     B(3))
			  or (not U16               and     B(3))
			  or (not U08)
		);

		-- transformation matrix
		matrix := (others => '0');
		if mode(3) = '1' then	-- ShiftL
			-- 8-bit
			matrix(  0) := sbytes(0);
			matrix( 18) := sbytes(0);
			matrix( 36) := sbytes(0);
			matrix( 54) := sbytes(0);
			matrix( 72) := sbytes(0);
			matrix( 90) := sbytes(0);
			matrix(108) := sbytes(0);
			matrix(126) := sbytes(0);
			-- 16-bit
			matrix( 16) := sbytes(1) and U08;
			matrix( 17) := sbytes(0) and U08;
			matrix( 52) := sbytes(1) and U08;
			matrix( 53) := sbytes(0) and U08;
			matrix( 88) := sbytes(1) and U08;
			matrix( 89) := sbytes(0) and U08;
			matrix(124) := sbytes(1) and U08;
			matrix(125) := sbytes(0) and U08;
			-- 32-bit
			matrix( 32) := sbytes(2) and U16;
			matrix( 33) := sbytes(1) and U16;
			matrix( 34) := sbytes(1) and U16;
			matrix( 35) := sbytes(0) and U16;
			matrix( 48) := sbytes(3) and U16;
			matrix( 49) := sbytes(2) and U16;
			matrix( 50) := sbytes(2) and U16;
			matrix( 51) := sbytes(1) and U16;
			matrix(104) := sbytes(2) and U16;
			matrix(105) := sbytes(1) and U16;
			matrix(106) := sbytes(1) and U16;
			matrix(107) := sbytes(0) and U16;
			matrix(120) := sbytes(3) and U16;
			matrix(121) := sbytes(2) and U16;
			matrix(122) := sbytes(2) and U16;
			matrix(123) := sbytes(1) and U16;
			-- 64-bit
			matrix( 64) := sbytes(4) and U32;
			matrix( 65) := sbytes(3) and U32;
			matrix( 66) := sbytes(3) and U32;
			matrix( 67) := sbytes(2) and U32;
			matrix( 68) := sbytes(2) and U32;
			matrix( 69) := sbytes(1) and U32;
			matrix( 70) := sbytes(1) and U32;
			matrix( 71) := sbytes(0) and U32;
			matrix( 80) := sbytes(5) and U32;
			matrix( 81) := sbytes(4) and U32;
			matrix( 82) := sbytes(4) and U32;
			matrix( 83) := sbytes(3) and U32;
			matrix( 84) := sbytes(3) and U32;
			matrix( 85) := sbytes(2) and U32;
			matrix( 86) := sbytes(2) and U32;
			matrix( 87) := sbytes(1) and U32;
			matrix( 96) := sbytes(6) and U32;
			matrix( 97) := sbytes(5) and U32;
			matrix( 98) := sbytes(5) and U32;
			matrix( 99) := sbytes(4) and U32;
			matrix(100) := sbytes(4) and U32;
			matrix(101) := sbytes(3) and U32;
			matrix(102) := sbytes(3) and U32;
			matrix(103) := sbytes(2) and U32;
			matrix(112) := sbytes(7) and U32;
			matrix(113) := sbytes(6) and U32;
			matrix(114) := sbytes(6) and U32;
			matrix(115) := sbytes(5) and U32;
			matrix(116) := sbytes(5) and U32;
			matrix(117) := sbytes(4) and U32;
			matrix(118) := sbytes(4) and U32;
			matrix(119) := sbytes(3) and U32;
		elsif mode(4) = '1' then	-- ShiftR
			-- 8-bit
			matrix(  1) := sbytes(0);
			matrix( 19) := sbytes(0);
			matrix( 37) := sbytes(0);
			matrix( 55) := sbytes(0);
			matrix( 73) := sbytes(0);
			matrix( 91) := sbytes(0);
			matrix(109) := sbytes(0);
			matrix(127) := sbytes(0);
			-- 16-bit
			matrix(  2) := sbytes(0) and U08;
			matrix(  3) := sbytes(1) and U08;
			matrix( 38) := sbytes(0) and U08;
			matrix( 39) := sbytes(1) and U08;
			matrix( 74) := sbytes(0) and U08;
			matrix( 75) := sbytes(1) and U08;
			matrix(110) := sbytes(0) and U08;
			matrix(111) := sbytes(1) and U08;
			-- 32-bit
			matrix(  4) := sbytes(1) and U16;
			matrix(  5) := sbytes(2) and U16;
			matrix(  6) := sbytes(2) and U16;
			matrix(  7) := sbytes(3) and U16;
			matrix( 20) := sbytes(0) and U16;
			matrix( 21) := sbytes(1) and U16;
			matrix( 22) := sbytes(1) and U16;
			matrix( 23) := sbytes(2) and U16;
			matrix( 76) := sbytes(1) and U16;
			matrix( 77) := sbytes(2) and U16;
			matrix( 78) := sbytes(2) and U16;
			matrix( 79) := sbytes(3) and U16;
			matrix( 92) := sbytes(0) and U16;
			matrix( 93) := sbytes(1) and U16;
			matrix( 94) := sbytes(1) and U16;
			matrix( 95) := sbytes(2) and U16;
			-- 64-bit
			matrix(  8) := sbytes(3) and U32;
			matrix(  9) := sbytes(4) and U32;
			matrix( 10) := sbytes(4) and U32;
			matrix( 11) := sbytes(5) and U32;
			matrix( 12) := sbytes(5) and U32;
			matrix( 13) := sbytes(6) and U32;
			matrix( 14) := sbytes(6) and U32;
			matrix( 15) := sbytes(7) and U32;
			matrix( 24) := sbytes(2) and U32;
			matrix( 25) := sbytes(3) and U32;
			matrix( 26) := sbytes(3) and U32;
			matrix( 27) := sbytes(4) and U32;
			matrix( 28) := sbytes(4) and U32;
			matrix( 29) := sbytes(5) and U32;
			matrix( 30) := sbytes(5) and U32;
			matrix( 31) := sbytes(6) and U32;
			matrix( 40) := sbytes(1) and U32;
			matrix( 41) := sbytes(2) and U32;
			matrix( 42) := sbytes(2) and U32;
			matrix( 43) := sbytes(3) and U32;
			matrix( 44) := sbytes(3) and U32;
			matrix( 45) := sbytes(4) and U32;
			matrix( 46) := sbytes(4) and U32;
			matrix( 47) := sbytes(5) and U32;
			matrix( 56) := sbytes(0) and U32;
			matrix( 57) := sbytes(1) and U32;
			matrix( 58) := sbytes(1) and U32;
			matrix( 59) := sbytes(2) and U32;
			matrix( 60) := sbytes(2) and U32;
			matrix( 61) := sbytes(3) and U32;
			matrix( 62) := sbytes(3) and U32;
			matrix( 63) := sbytes(4) and U32;
		elsif mode(5) = '1' then	-- ShiftRA
			-- TODO
			null;
		elsif mode(6) = '1' then	-- RotL
			-- 8-bit
			matrix(  0) := rbytes(0);
			matrix(  1) := rbytes(7);
			matrix( 18) := rbytes(0);
			matrix( 19) := rbytes(7);
			matrix( 36) := rbytes(0);
			matrix( 37) := rbytes(7);
			matrix( 54) := rbytes(0);
			matrix( 55) := rbytes(7);
			matrix( 72) := rbytes(0);
			matrix( 73) := rbytes(7);
			matrix( 90) := rbytes(0);
			matrix( 91) := rbytes(7);
			matrix(108) := rbytes(0);
			matrix(109) := rbytes(7);
			matrix(126) := rbytes(0);
			matrix(127) := rbytes(7);
			-- 16-bit
			matrix(  2) := rbytes(7) and U08;
			matrix(  3) := rbytes(6) and U08;
			matrix( 16) := rbytes(1) and U08;
			matrix( 17) := rbytes(0) and U08;
			matrix( 38) := rbytes(7) and U08;
			matrix( 39) := rbytes(6) and U08;
			matrix( 52) := rbytes(1) and U08;
			matrix( 53) := rbytes(0) and U08;
			matrix( 74) := rbytes(7) and U08;
			matrix( 75) := rbytes(6) and U08;
			matrix( 88) := rbytes(1) and U08;
			matrix( 89) := rbytes(0) and U08;
			matrix(110) := rbytes(7) and U08;
			matrix(111) := rbytes(6) and U08;
			matrix(124) := rbytes(1) and U08;
			matrix(125) := rbytes(0) and U08;
			-- 32-bit
			matrix(  4) := rbytes(6) and U16;
			matrix(  5) := rbytes(5) and U16;
			matrix(  6) := rbytes(5) and U16;
			matrix(  7) := rbytes(4) and U16;
			matrix( 20) := rbytes(7) and U16;
			matrix( 21) := rbytes(6) and U16;
			matrix( 22) := rbytes(6) and U16;
			matrix( 23) := rbytes(5) and U16;
			matrix( 32) := rbytes(2) and U16;
			matrix( 33) := rbytes(1) and U16;
			matrix( 34) := rbytes(1) and U16;
			matrix( 35) := rbytes(0) and U16;
			matrix( 48) := rbytes(3) and U16;
			matrix( 49) := rbytes(2) and U16;
			matrix( 50) := rbytes(2) and U16;
			matrix( 51) := rbytes(1) and U16;
			matrix( 76) := rbytes(6) and U16;
			matrix( 77) := rbytes(5) and U16;
			matrix( 78) := rbytes(5) and U16;
			matrix( 79) := rbytes(4) and U16;
			matrix( 92) := rbytes(7) and U16;
			matrix( 93) := rbytes(6) and U16;
			matrix( 94) := rbytes(6) and U16;
			matrix( 95) := rbytes(5) and U16;
			matrix(104) := rbytes(2) and U16;
			matrix(105) := rbytes(1) and U16;
			matrix(106) := rbytes(1) and U16;
			matrix(107) := rbytes(0) and U16;
			matrix(120) := rbytes(3) and U16;
			matrix(121) := rbytes(2) and U16;
			matrix(122) := rbytes(2) and U16;
			matrix(123) := rbytes(1) and U16;
			-- 64-bit
			matrix(  8) := rbytes(4) and U32;
			matrix(  9) := rbytes(3) and U32;
			matrix( 10) := rbytes(3) and U32;
			matrix( 11) := rbytes(2) and U32;
			matrix( 12) := rbytes(2) and U32;
			matrix( 13) := rbytes(1) and U32;
			matrix( 14) := rbytes(1) and U32;
			matrix( 15) := rbytes(0) and U32;
			matrix( 24) := rbytes(5) and U32;
			matrix( 25) := rbytes(4) and U32;
			matrix( 26) := rbytes(4) and U32;
			matrix( 27) := rbytes(3) and U32;
			matrix( 28) := rbytes(3) and U32;
			matrix( 29) := rbytes(2) and U32;
			matrix( 30) := rbytes(2) and U32;
			matrix( 31) := rbytes(1) and U32;
			matrix( 40) := rbytes(6) and U32;
			matrix( 41) := rbytes(5) and U32;
			matrix( 42) := rbytes(5) and U32;
			matrix( 43) := rbytes(4) and U32;
			matrix( 44) := rbytes(4) and U32;
			matrix( 45) := rbytes(3) and U32;
			matrix( 46) := rbytes(3) and U32;
			matrix( 47) := rbytes(2) and U32;
			matrix( 56) := rbytes(7) and U32;
			matrix( 57) := rbytes(6) and U32;
			matrix( 58) := rbytes(6) and U32;
			matrix( 59) := rbytes(5) and U32;
			matrix( 60) := rbytes(5) and U32;
			matrix( 61) := rbytes(4) and U32;
			matrix( 62) := rbytes(4) and U32;
			matrix( 63) := rbytes(3) and U32;
			matrix( 64) := rbytes(4) and U32;
			matrix( 65) := rbytes(3) and U32;
			matrix( 66) := rbytes(3) and U32;
			matrix( 67) := rbytes(2) and U32;
			matrix( 68) := rbytes(2) and U32;
			matrix( 69) := rbytes(1) and U32;
			matrix( 70) := rbytes(1) and U32;
			matrix( 71) := rbytes(0) and U32;
			matrix( 80) := rbytes(5) and U32;
			matrix( 81) := rbytes(4) and U32;
			matrix( 82) := rbytes(4) and U32;
			matrix( 83) := rbytes(3) and U32;
			matrix( 84) := rbytes(3) and U32;
			matrix( 85) := rbytes(2) and U32;
			matrix( 86) := rbytes(2) and U32;
			matrix( 87) := rbytes(1) and U32;
			matrix( 96) := rbytes(6) and U32;
			matrix( 97) := rbytes(5) and U32;
			matrix( 98) := rbytes(5) and U32;
			matrix( 99) := rbytes(4) and U32;
			matrix(100) := rbytes(4) and U32;
			matrix(101) := rbytes(3) and U32;
			matrix(102) := rbytes(3) and U32;
			matrix(103) := rbytes(2) and U32;
			matrix(112) := rbytes(7) and U32;
			matrix(113) := rbytes(6) and U32;
			matrix(114) := rbytes(6) and U32;
			matrix(115) := rbytes(5) and U32;
			matrix(116) := rbytes(5) and U32;
			matrix(117) := rbytes(4) and U32;
			matrix(118) := rbytes(4) and U32;
			matrix(119) := rbytes(3) and U32;
		elsif mode(7) = '1' then	-- RotR
			-- 8-bit
			matrix(  0) := rbytes(7);
			matrix(  1) := rbytes(0);
			matrix( 18) := rbytes(7);
			matrix( 19) := rbytes(0);
			matrix( 36) := rbytes(7);
			matrix( 37) := rbytes(0);
			matrix( 54) := rbytes(7);
			matrix( 55) := rbytes(0);
			matrix( 72) := rbytes(7);
			matrix( 73) := rbytes(0);
			matrix( 90) := rbytes(7);
			matrix( 91) := rbytes(0);
			matrix(108) := rbytes(7);
			matrix(109) := rbytes(0);
			matrix(126) := rbytes(7);
			matrix(127) := rbytes(0);
			-- 16-bit
			matrix(  2) := rbytes(0) and U08;
			matrix(  3) := rbytes(1) and U08;
			matrix( 16) := rbytes(6) and U08;
			matrix( 17) := rbytes(7) and U08;
			matrix( 38) := rbytes(0) and U08;
			matrix( 39) := rbytes(1) and U08;
			matrix( 52) := rbytes(6) and U08;
			matrix( 53) := rbytes(7) and U08;
			matrix( 74) := rbytes(0) and U08;
			matrix( 75) := rbytes(1) and U08;
			matrix( 88) := rbytes(6) and U08;
			matrix( 89) := rbytes(7) and U08;
			matrix(110) := rbytes(0) and U08;
			matrix(111) := rbytes(1) and U08;
			matrix(124) := rbytes(6) and U08;
			matrix(125) := rbytes(7) and U08;
			-- 32-bit
			matrix(  4) := rbytes(1) and U16;
			matrix(  5) := rbytes(2) and U16;
			matrix(  6) := rbytes(2) and U16;
			matrix(  7) := rbytes(3) and U16;
			matrix( 20) := rbytes(0) and U16;
			matrix( 21) := rbytes(1) and U16;
			matrix( 22) := rbytes(1) and U16;
			matrix( 23) := rbytes(2) and U16;
			matrix( 32) := rbytes(5) and U16;
			matrix( 33) := rbytes(6) and U16;
			matrix( 34) := rbytes(6) and U16;
			matrix( 35) := rbytes(7) and U16;
			matrix( 48) := rbytes(4) and U16;
			matrix( 49) := rbytes(5) and U16;
			matrix( 50) := rbytes(5) and U16;
			matrix( 51) := rbytes(6) and U16;
			matrix( 76) := rbytes(1) and U16;
			matrix( 77) := rbytes(2) and U16;
			matrix( 78) := rbytes(2) and U16;
			matrix( 79) := rbytes(3) and U16;
			matrix( 92) := rbytes(0) and U16;
			matrix( 93) := rbytes(1) and U16;
			matrix( 94) := rbytes(1) and U16;
			matrix( 95) := rbytes(2) and U16;
			matrix(104) := rbytes(5) and U16;
			matrix(105) := rbytes(6) and U16;
			matrix(106) := rbytes(6) and U16;
			matrix(107) := rbytes(7) and U16;
			matrix(120) := rbytes(4) and U16;
			matrix(121) := rbytes(5) and U16;
			matrix(122) := rbytes(5) and U16;
			matrix(123) := rbytes(6) and U16;
			-- 64-bit
			matrix(  8) := rbytes(3) and U32;
			matrix(  9) := rbytes(4) and U32;
			matrix( 10) := rbytes(4) and U32;
			matrix( 11) := rbytes(5) and U32;
			matrix( 12) := rbytes(5) and U32;
			matrix( 13) := rbytes(6) and U32;
			matrix( 14) := rbytes(6) and U32;
			matrix( 15) := rbytes(7) and U32;
			matrix( 24) := rbytes(2) and U32;
			matrix( 25) := rbytes(3) and U32;
			matrix( 26) := rbytes(3) and U32;
			matrix( 27) := rbytes(4) and U32;
			matrix( 28) := rbytes(4) and U32;
			matrix( 29) := rbytes(5) and U32;
			matrix( 30) := rbytes(5) and U32;
			matrix( 31) := rbytes(6) and U32;
			matrix( 40) := rbytes(1) and U32;
			matrix( 41) := rbytes(2) and U32;
			matrix( 42) := rbytes(2) and U32;
			matrix( 43) := rbytes(3) and U32;
			matrix( 44) := rbytes(3) and U32;
			matrix( 45) := rbytes(4) and U32;
			matrix( 46) := rbytes(4) and U32;
			matrix( 47) := rbytes(5) and U32;
			matrix( 56) := rbytes(0) and U32;
			matrix( 57) := rbytes(1) and U32;
			matrix( 58) := rbytes(1) and U32;
			matrix( 59) := rbytes(2) and U32;
			matrix( 60) := rbytes(2) and U32;
			matrix( 61) := rbytes(3) and U32;
			matrix( 62) := rbytes(3) and U32;
			matrix( 63) := rbytes(4) and U32;
			matrix( 64) := rbytes(3) and U32;
			matrix( 65) := rbytes(4) and U32;
			matrix( 66) := rbytes(4) and U32;
			matrix( 67) := rbytes(5) and U32;
			matrix( 68) := rbytes(5) and U32;
			matrix( 69) := rbytes(6) and U32;
			matrix( 70) := rbytes(6) and U32;
			matrix( 71) := rbytes(7) and U32;
			matrix( 80) := rbytes(2) and U32;
			matrix( 81) := rbytes(3) and U32;
			matrix( 82) := rbytes(3) and U32;
			matrix( 83) := rbytes(4) and U32;
			matrix( 84) := rbytes(4) and U32;
			matrix( 85) := rbytes(5) and U32;
			matrix( 86) := rbytes(5) and U32;
			matrix( 87) := rbytes(6) and U32;
			matrix( 96) := rbytes(1) and U32;
			matrix( 97) := rbytes(2) and U32;
			matrix( 98) := rbytes(2) and U32;
			matrix( 99) := rbytes(3) and U32;
			matrix(100) := rbytes(3) and U32;
			matrix(101) := rbytes(4) and U32;
			matrix(102) := rbytes(4) and U32;
			matrix(103) := rbytes(5) and U32;
			matrix(112) := rbytes(0) and U32;
			matrix(113) := rbytes(1) and U32;
			matrix(114) := rbytes(1) and U32;
			matrix(115) := rbytes(2) and U32;
			matrix(116) := rbytes(2) and U32;
			matrix(117) := rbytes(3) and U32;
			matrix(118) := rbytes(3) and U32;
			matrix(119) := rbytes(4) and U32;
		elsif mode(8) = '1' then	-- Bitrev
			-- TODO
			null;
		elsif mode(9) = '1' then	-- Byterev
			-- 8-bit
			matrix(  0) := not U08;
			matrix( 18) := not U08;
			matrix( 36) := not U08;
			matrix( 54) := not U08;
			matrix( 72) := not U08;
			matrix( 90) := not U08;
			matrix(108) := not U08;
			matrix(126) := not U08;
			-- 16-bit
			matrix(  2) := U08 and not U16;
			matrix( 16) := U08 and not U16;
			matrix( 38) := U08 and not U16;
			matrix( 52) := U08 and not U16;
			matrix( 74) := U08 and not U16;
			matrix( 88) := U08 and not U16;
			matrix(110) := U08 and not U16;
			matrix(124) := U08 and not U16;
			-- 32-bit
			matrix(  6) := U16 and not U32;
			matrix( 20) := U16 and not U32;
			matrix( 34) := U16 and not U32;
			matrix( 48) := U16 and not U32;
			matrix( 78) := U16 and not U32;
			matrix( 92) := U16 and not U32;
			matrix(106) := U16 and not U32;
			matrix(120) := U16 and not U32;
			-- 64-bit
			matrix( 14) := U32;
			matrix( 28) := U32;
			matrix( 42) := U32;
			matrix( 56) := U32;
			matrix( 70) := U32;
			matrix( 84) := U32;
			matrix( 98) := U32;
			matrix(112) := U32;
		elsif mode(10) = '1' then	-- Sdup
			matrix(  0) := '1';
			matrix( 16) := not U08;
			matrix( 18) := U08;
			matrix( 32) := not U16;
			matrix( 36) := U16;
			matrix( 48) := not U08;
			matrix( 50) := U08 and not U16;
			matrix( 54) := U16;
			matrix( 64) := not U32;
			matrix( 72) := U32;
			matrix( 80) := not U08;
			matrix( 82) := U08 and not U32;
			matrix( 90) := U32;
			matrix( 96) := not U16;
			matrix(100) := U16 and not U32;
			matrix(108) := U32;
			matrix(112) := not U08;
			matrix(114) := U08 and not U16;
			matrix(118) := U16 and not U32;
			matrix(126) := U32;
		end if;

		-- output
		x := (others => '0');
		for i in 7 downto 0 loop	-- byte loop
			for j in 15 downto 0 loop	-- chunk loop
				if matrix(16*i+j) = '1' then
					x(8*i+7 downto 8*i) :=
						x(8*i+7 downto 8*i) or X1(8*j+7 downto 8*j);
				end if;
			end loop;
		end loop;
		Y <= x;
	end process;
end Behave_1;

-- vi: set ts=4 sw=4 equalprg="fmt -72 -p--": please
