#include <string.h>


#define DEGREE 4 /* 16 cache lines */
#define MAX_LINES  (1<<DEGREE)
#define LAST_LINE (MAX_LINES-1)

unsigned char Icache[MAX_LINES][32], IcacheIn[32], IcacheOut[32];
long int Iaddress_tags[MAX_LINES];
int ILRUtags[MAX_LINES];
long int Iaddress_read, Iaddress_write,
  /* bit 0 of the address is used for the valid bit */
 Icache_hit, Icache_hit_line, Icache_write_line;

void reset_LRU(void) {
  int i=0;
  do {
    ILRUtags[i]=i;
    Iaddress_tags[i]=0;
    i++;
  }
  while (i<MAX_LINES);
}

/* no input parameter, the variables are externally set before calling this : */
void read_Icache_line (void) {
  int i=0, temp1, temp2;
/* reset, just in case : */
  Icache_hit=0;
  Icache_hit_line=0;

/* 1 : seach in the address tags : */
  temp1=1 | (Iaddress_read & -32); /* check also the valid bit */
  while (temp1^(Iaddress_tags[Icache_hit_line] & -31)){
     Icache_hit_line++;
     if (Icache_hit_line > LAST_LINE)
       return;
  } 
  Icache_hit=1; /* bingo. */
  memcpy(IcacheOut, Icache[Icache_hit_line], 32);

/* 2 : in the hardware, we'll have to encode the parallel comparison's result
            into a normal integer : we get a bit vector instead of an integer.
            With the sequential C algorithm, it's not necessary. */

/*
We could split the algorithm at this point.
*/

/* 3 : LRU update */
  temp2=Icache_hit_line;
  do {
    temp1=ILRUtags[i];
    ILRUtags[i]=temp2;
    i++;
    temp2=temp1;
  } while (temp1 != Icache_hit_line);
/* Stop condition is simplified because we're
   sure to meet the desired value at least once.
   This is a pretty good piece of C code
   but it's far from a HDL description. */
}

void write_Icache_line (void) {
  int i,j;
/* 1: check the line that must be written */
  Icache_write_line = ILRUtags[LAST_LINE];

/* 2 : data move */
  Iaddress_tags[Icache_write_line]=(Iaddress_write & -32) | 1;
  memcpy(Icache[Icache_write_line],IcacheIn,32);

/* 3 : LRU update (circular move) */
  i=LAST_LINE;
  j=i-1;
  do
    ILRUtags[i--]=ILRUtags[j--];
  while (i);
  ILRUtags[0]=Icache_write_line;
}


void invalidate_Icache_line (void) {
  int i=0,j=1 | (Iaddress_read & -32);

  /* 1 : scan the tag lines */
  while (j^(Iaddress_tags[i] & -31)){
     i++;
     if (i > LAST_LINE)
       return;
  } 
  /* found : */

  Iaddress_tags[i]&=-2; /* remove the valid flag */

  printf ("Line @%d removed from cache\n",Iaddress_read);
#ifndef LRU_INVALID
  /* 2 : optional, set the invalidated line as least recently used */
  j=0;
  /* search the line number in the LRU queue */
  while (ILRUtags[j]!=i)
    j++;

  /* naughty shift here : */
  while (j<MAX_LINES-1) {
    ILRUtags[j]=ILRUtags[j+1];
    j++;
  }
  ILRUtags[j]=i;

#else
  /* ok, usually, we don't need the previous code because it is also complex. */
#ifndef LRU_INVALID_SMART
  /* but we can do something even smarter ! */

  /* trick : disable the shifting of the LRU tags during the next
     cache read, instead of shifting blindly. This keeps the multiplexer
     2-input and not 3-input, but adds another level */

  /* code will go here. */

#endif
#endif

}
