/*

f-cpu/qdcpoc2/bist.c
created Sat Aug  4 18:51:03 2001 by Yann Guidon <whygee@f-cpu.org>
version Sun Aug 12 04:15:32 2001

(C) 2001 YG, all rights reserved.
* don't try to work on this, it's really ugly and a waste of time.
* don't spread outside the F-CPU team
* it's not even sure that it will compile.

This is a pre-VHDL study for the Built-In Self Test.
Each unit has a testbench/boot-time test and the BIST is
a collection of these "routines". This is still part of
the QDCPOC so be careful because not everything is definitive !!!


version : Tue Aug  7 05:49:52 2001

the BIST codes can be seen as testbenches as well as power-on or reset-time
"microcode" or "FSM" for verifying the machine's guts. However, the testing
strategy is not definitive or perfect, it's just a first hack.

remark that all the tests require 3 phases :
  1) set the external input lines
  2) send a clock cycle
  3) compare the output with the expected result

Sending a clock cycle and reading/writing the latches is somewhat tricky.
There are 2 possibilities :
  * use some builtin (technology dependent) boundary scan
    inside the latches : that's pretty slow and overloads the chip
  * create a special port on the Xbar : i prefer that :-) it's parallel,
    faster and doesn't depend on the technology. However i think that we
    are limited to one read and one write port, so we can't test
    absolutely everything. i'll have to check that... But this lets
    one design a "pipelined" BIST :-)

WARNING ! read all the comments inside the code.
this is also heavily unoptimised and badly coded, as you will see
(but this is not the point of the QDCPOC).

*/



/* reset the register set and verify the integrity.
   Partial writes and the second write port are omitted :
   i don't see how to do that yet. */
void BIST_R7() {
  unsigned int i,j,k;
  UMAX T;

  /* internal function to generate the masks : */
  UMAX create_mask(int i, int j, int k) {
    UMAX M=0;
/*    int l; */
/*
How to decode this algorithm :
each 4032 bits of the register bank has a
(Log2(4032)=) 12-bit "number" (or "position") :
it is composed of the register number and the
position of the bit inside the register.
What the algorithm does is to set each bit of the
bank to the value of the Nth bit of the value of
its position, N being the algorithm step ("i").
we have a bit less than 4096 bits, so we need 12+1 steps
(plus as much to test the negated cases). This means that
we need 26 steps to exhaustively test all the possible errors
(and eventually clear the bank).

example :
for register k=23, the bit bitpos=45 will be
at position p = (k<<6)|bit_pos = 010111101101b
each bit will be set to the values 
 b = ((((k << 6)| bit_pos) >> i)& 1)^(~j)

we can remark that this can be split into 2 parts :
inside the register (because during the first part,
all the registers will have the same value), and between
registers (the registers will be either 0 or -1LL).
so we can simplify this a bit :-)

The first part is :
    if (i<6) {
      for (l=0; l<64; l++) {
	M = M | ((((UMAX)l >> i) & 1) << l);
      }

this can be shortened a lot, in terms of C programming,
but there's no real use for it... because we want to make VHDL.

VHDL version (untested) :
  signal M : std_ulogic_vector(F_RANGE);
  signal l : natural (6 downto 0);
  for l in F_RANGE begin
    M[l] = l[i];
  end;

one-hot encoding for i might be necessary.
 */


switch (i) { /* quick C version */
  case 0: M = 0xAAAAAAAAAAAAAAAALL;
  case 1: M = 0xCCCCCCCCCCCCCCCCLL;
  case 2: M = 0xF0F0F0F0F0F0F0F0LL;
  case 3: M = 0xFF00FF00FF00FF00LL;
  case 4: M = 0xFFFF0000FFFF0000LL;
  case 5: M = 0xFFFFFFFF00000000LL;
  default : M = ~((( k >> (i-6)) & 1)-1);
    } /* i>=6 so (i-6) should not give naughty surprises */
/*
VHDL version of the "default" :
  M (M'high downto 0):=(others=> (k(i-6)) );
in a few words, it propagates the (i-6)th bit of k within all M
 */

    if (j==0) /* test the positive and negative versions */
      M = ~M;

    return M;

    /* nb: in HW, this looks completely different
       and much more straight-forward ! :-D */
  }


  /* 0) init the data, only making sure that the "side effects" (r0=0) are ok in C : */

  R7_bank[0].r = 0; /* register #0 is hardwired to zero */
  R7_LSB = 0;
  R7_MSB = 0;    /* only the bit #0 needs to be cleared, anyway */
  R7_ZERO = 0;
  R7_null_flags[0] = 0;


  /* 1) test the (hardwired) register zero : */
  R7_write_mask_0 = 0;
  R7_write_mask_1 = 0;
  R7_read_address_0 = 0;
  R7_read_address_1 = 0;
  R7_read_address_2 = 0;

  fc0_cycle_register_set();

  if ((R7_read_port_0 != 0)
    ||(R7_read_port_1 != 0)
    ||(R7_read_port_2 != 0)
    ||((R7_ZERO & 1) != 0)
    ||((R7_MSB & 1) != 0)
    ||((R7_LSB & 1) != 0))
    qdcpoc_error("[BIST] register 0 is not 0",14);


  /* 2) test the other 63 registers with a 2*log2 algorithm :
      we have 63*64 bits to test, or 4032 bits if you prefer.
      this requires a 2*13 steps test (*63 register read+write)
      to exhaustively test the bank.

      Remark : this routine both tests and initialises the banks.
      So the patterns start with 010101010101... and end (at
      the last step) with 0000000000 so the routine leaves the
      bank cleared. How smart is that ? :-)

      Remark 2 : at one point, i'll be smart enough to "pipeline"
      the test so the read will overlap the write of another part...
      This way, i will be able to test the R7 through the Xbar.

  */

  /* i: step number */
  for (i=0; i<13; i++) {
    /* j: polarity */
    for (j=0; j<2; j++) {

      /* a : set the values */
      /* WARNING ! only one write port is tested. */

      R7_write_mask_0 = 31; /* write everything */
      R7_write_mask_1 = 0;  /* */
      /* k: register value */
      for (k=1; k<64; k++) {
	R7_write_address_0 = k;
	R7_write_port_0 = create_mask(i,j,k);
	fc0_cycle_register_set();
      }

      /* b : check if we get the original values */
      /* this part could overlap the a) part. this way, we can
         test the 2nd write port with (almost) as many cycles. */

      R7_write_mask_0 = 0; /* write nothing */
      R7_write_mask_1 = 0;
      /* k: register value */
      for (k=1; k<64; k++) {
	/* set the inputs */
	R7_read_address_0 = k;
	R7_read_address_1 = k;
	R7_read_address_2 = k;
	fc0_cycle_register_set();

	/* test the returned values */
	T = create_mask(i,j,k);
	if ((T != R7_read_port_0)
          ||(T != R7_read_port_1)
          ||(T != R7_read_port_2))
	  qdcpoc_error("[BIST] register does not return the expected value",14);

	/* test the zero flags */
	if ((T==0) && (((R7_ZERO >> k) & 1)!=0))
	  qdcpoc_error("[BIST] ZERO flag is altered",15);

	/* test the LSB flags */
	if ((T & 1) != ((R7_LSB >> k) & 1))
	  qdcpoc_error("[BIST] LSB flag is altered",16);

	/* test the MSB flags */
	if (((T >> 63) & 1) != ((R7_MSB >> k) & 1))
	  qdcpoc_error("[BIST] MSB flag is altered",17);

	/* missing : the partial writes and the other write buses */
	/* warning : is "seems" to work but that doesn't mean that it is OK ! */
      }
    } 
  }
}


void  fc0_cycle_BIST() {
  /* the new version is not implemented yet. */

}
