-------------------------------------------------------------------------------
-- Title      : divide component for f-cpu divide unit
-- Project    : F-CPU divmod unit
-------------------------------------------------------------------------------
-- File       : 8bitsDiv.vhdl
-- Author     : Cedric  <bail_c@epita.fr>
-- Company    : 
-- Last update: 2002/01/09
-- Platform   :
-- Licence    : GPL 2.0 or newer
-------------------------------------------------------------------------------
-- Description: The objectif is to create a component that can do the
--   8 bits divisions in less cycle than a SRT divider.
-------------------------------------------------------------------------------
-- Revisions  :
-- Date        Version  Author  Description
-- 2001/11/29  0.1      bail_c	Created
-------------------------------------------------------------------------------

library IEEE;
use IEEE.std_logic_1164.all;

entity divmods is
  
  generic (
    WIDTH     : natural := 8;           -- must be always 8
    PIPELINED : boolean := false;       -- not used, and perhaps never
    LENGTH    : natural := 6);          -- Set when we must say WillBeReady

  port (
    A  : in  std_ulogic_vector(WIDTH-1 downto 0);
    B  : in  std_ulogic_vector(WIDTH-1 downto 0);

    Start : in std_ulogic := '0';
    
    Clk, Reset : in std_ulogic := '0';
    Freeze     : in std_ulogic := '0';
    
    Yl : out std_ulogic_vector(WIDTH-1 downto 0);
    Yh : out std_ulogic_vector(WIDTH-1 downto 0);
    
    WillBe  : out std_ulogic := '0';
    Ready   : out std_ulogic := '0';
    Running : out std_ulogic := '0');

end divmods;

-- Known limitations:
--
--      1: Only work with unsigned number
--      So if we work with signed number we must first
--      negativ them and set Yl to -Yl (the modulo is always
--      positiv)
--
--      2: The result is available after the WIDTH-1 cycles so
--      you can transfert it after WIDTH cycles

-- Modus Operandi:
--
--      This component only do the division/remainder
--  in a non SIMD way, so you must put them in parallel
--  to have the SIMD capabilty.
--      It put the result of the division into Yl and
--  the modulo into Yh. Like this :
--      Yl := A / B;
--      Yh := A % B;
--
--      The algorithme is :
--  WillBe := '0';
--  Ready := '0';
--  TmpB := B << WIDTH-1;
--  For i form WIDTH-1 to 0
--      TmpResult := substraction(A, B);
--      if carry(TmpResult) = true then  (negativ result)
--              Yl(i) := 0;
--      else
--              Yl(i) := 1;
--              A := TmpResult;
--      endif
--      if i = LENGTH then
--              WillBe := '1';
--      end if
--      TmpB := B >> 1;
--  next i;
--  Yh := A;
--  Ready := '1';

-- How quick is this unit:
--
--      This unit is only designed to work with
-- 8 bits. A bigger number will take too much
-- time compared to a SRT division. But a SRT
-- divider, is to slow for 8bits.

architecture final8_cb of divmods is

  component substractor
    port (
      A      : in  std_ulogic_vector(3 downto 0);
      B      : in  std_ulogic_vector(3 downto 0);
      R      : in  std_ulogic;
      result : out std_ulogic_vector(4 downto 0));
  end component;
  
  signal cA, tA, eval, tmp : std_ulogic_vector(WIDTH downto 0);
  signal cB, tB : std_ulogic_vector((WIDTH*2)-1 downto 0);

  signal cresult, result : std_ulogic_vector(WIDTH-1 downto 0);
  signal cState, tState : std_ulogic_vector(WIDTH-1 downto 0)
    := (others => '0');
  signal tRunning : std_ulogic := '0';
  signal partie : std_ulogic_vector(9 downto 0);
  signal superieur : std_ulogic_vector(4 downto 0);

  signal vide : std_ulogic_vector(7 downto 0) := (others => '0');
  
begin  -- final8_cb

  Ready <= tState(0);
  WillBe <= tState(LENGTH);
  Running <= tRunning;

  -- purpose: Control the division unit
  -- type   : sequential
  -- inputs : Clk, Reset
  Control: process (Clk, Reset)
  begin  -- process Control
    if Reset = '1' then                 -- asynchronous reset (active low)
      tA <= (others => '0');
      tB <= (others => '0');
      tState <= (others => '0');
      tRunning <= '0';
    elsif Clk'event and Clk = '1' then  -- rising clock edge
      if tState(0) = '1' then
        Yh <= cA(WIDTH-1 downto 0);
        Yl <= cresult;
        tRunning <= '0';
        tState <= (others => '0');
      end if;
      if Start = '1' and Freeze = '0' and tRunning = '0' then
        tRunning <= '1';
        tA(WIDTH) <= '0';
        tA(WIDTH-1 downto 0) <= A;
        tB(WIDTH-2 downto 0) <= (others => '0');
        tB(WIDTH*2 - 1) <= '0';
        tB((WIDTH-1)*2 downto WIDTH-1) <= B;
        tState(WIDTH-2 downto 0) <= (others => '0');
        tState(WIDTH-1) <= '1';
      elsif tRunning = '1' and Freeze = '0' and tState(0) = '0' then
        tB <= cB;
        tA <= cA;
        tState <= cState;
        result <= cresult;
      else
        tA <= tA;
        tB <= tB;
        result <= result;
      end if;
    end if;
  end process Control;

  partie_basse : substractor port map (
    A      => tA(3 downto 0),
    B      => tB(3 downto 0),
    R      => '0',
    result => superieur);
  partie_h1 : substractor port map (
    A      => tA(7 downto 4),
    B      => tB(7 downto 4),
    R      => '0',
    result => partie(4 downto 0));
  partie_h2 : substractor port map (
    A      => tA(7 downto 4),
    B      => tB(7 downto 4),
    R      => '1',
    result => partie(9 downto 5));

  eval(3 downto 0) <= superieur(3 downto 0);
  
  with superieur(4) select
    eval(8 downto 4) <=
     partie(4 downto 0) when '0',
     partie(9 downto 5) when '1',
     null when others;

  with tB(15 downto 8) select
    tmp <=
    eval        when "00000000",
    "111111111" when others;

  with tmp(8) select
    cresult(0) <=
    '1' when '0',
    '0' when others;

  with tmp(8) select
    cA <=
    tmp when '0',
    tA  when others;

  cB((WIDTH-1)*2 downto 0) <= tB(WIDTH*2-1 downto 1);
  cB(WIDTH*2-1) <= '0';

  cresult(WIDTH-1 downto 1) <= result(WIDTH-2 downto 0);

  cState(WIDTH-2 downto 0) <= tState(WIDTH-1 downto 1);
  cState(WIDTH-1) <= '0';

end final8_cb;
