/*
 * prefetcher.c -- F-CPU instruction-level emulator Instruction Cache L0 methods
 * Copyright (C) 2003 Pierre Tardy <tardyp@free.fr>
 * Copyright (C) 2003 Franois Vieville <vieville@poste.isima.fr>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <stdio.h>
#include "emu.h"
enum
{
	false,true
};
extern int interactive;
void __myassert_fail (__const char *__assertion, __const char *__file,
			   unsigned int __line, __const char *__function)
{
  fprintf(stderr,"assertion %s failed %s:l%d, %s\n",__assertion,__file, __line, __function);
  interactive = 1;
}
void myabort (void)
{
  interactive = 1;
}
#define __assert_fail __myassert_fail
#include <assert.h>


int lineRef[NUM_REG];
BOOL waitPrefetcher = false;

#define LINEMASK  ((IL1_GRANULARITY)-1)
#define ADDRMASK  ((IL1_GRANULARITY*4)-1)
static U64 mask_addr(U64 addr)
{
  return addr ^ ( addr & ADDRMASK);
}
static U64 mask_addr_low(U64 addr)
{
  return  addr & ADDRMASK;
}
static U64 mask_line(U64 line)
{
  return line ^ ( line & LINEMASK);
}
#define NUM_PREFETCH_LINES (IL1_GRANULARITY*8)
typedef struct s_PrefetchLine
{
  int regRef;
  U32 instr;
  U64 addr; // the fields addr and valid are only usefull for lines[i%IL1_GRANULARITY]
  BOOL valid;
}PrefetchLine;
PrefetchLine lines[NUM_PREFETCH_LINES];

#define PrefetchAddr lines[PrefetchPC].addr

unsigned int PCRef = 0;
U64 PC = 0; 
unsigned int PrefetchPC = 0;

static void pf_prefetch_here (U64 addr)
{
  addr = mask_addr(addr);
  if(PrefetchAddr != addr) // we are already prefetching this!!
    {
      int i;
      //fprintf(stderr,"%llX %llX\n",addr, lines[PrefetchPC].addr);
      
      lines[PrefetchPC].addr = addr;
      for(i = 0; i < IL1_GRANULARITY; i++)
	{
	  lines[PrefetchPC+i].valid = false;
	  if(lines[PrefetchPC+i].regRef != 0 && lineRef[lines[PrefetchPC+i].regRef]== PrefetchPC+i)
	    {
	      //interactive = 1;
	      lineRef[lines[PrefetchPC+i].regRef]= 0xff;// we just forget this register
	    }
	  lines[PrefetchPC+i].regRef=0;	
	}
    }
}

// this process is called at each clock rising edge
U32 pf_getNextInstruction()
{
  assert(PC == regs.r_pc.C(o,0));
  if(lines[PCRef].addr != PC || !lines[mask_line(PCRef)].valid)//the valid flag is only 'valid' for the first line of a L1Line
    {  // we ve got a miss!
      waitPrefetcher = true; // signal that say the core that data is not ready. core: please sleep!
      PrefetchPC = mask_line(PCRef);
      pf_prefetch_here(PC);
    }
  else
    {
      U32 inst;
      assert(lines[PCRef].addr == PC);
      // we have the data, return it.
      waitPrefetcher = false;
      PC = PC + 4;
      inst = lines[PCRef].instr;
      PCRef = (PCRef+1)  % NUM_PREFETCH_LINES;
      return inst;
    }
  return 0;
}
// this process is called when the decoder finds a loadAddri. addr is on the cross bar, 
// regnum is passed from decoder to prefetcher
void pf_loadaddr(int regnum, U64 addr)
{
  // set the prefetcher to prefetch this addr
  pf_prefetch_here(addr);
  lineRef[regnum] = PrefetchPC + mask_addr_low(addr)/4;
  lines[lineRef[regnum]].regRef = regnum;
}
// this process is called when the decoder finds an instruction that changes Ri, 
// regnum is passed from decoder to prefetcher
void pf_RiChanged(int regnum) // some instruction other than loadAddri that change Ri
{
  lineRef[regnum] = 0xff;
}
// this process is called when we have to jump to a given register @, 
void pf_doJmp(int regnum)
{
  if(lineRef[regnum] != 0xff && lines[lineRef[regnum]].regRef == regnum) // we have it in the cache!
    {
      PCRef = lineRef[regnum];
      PC = lines[PCRef].addr;
      fprintf(stderr,"jmp that hit!\n");
      // Ok, lets wait next cycle.
    }
  else // damn! its a miss!
    {
      U64 savedPC = PC;

      PC = regs.r[regnum].o[0];// maybe this is on the crossbar but I fear we'll have to wait one cycle to have this value
      fprintf(stderr,"jmp that missed!\n");
      if(mask_addr(PC)==mask_addr(savedPC)) // the jump is just in this L1 line!
	{                                   // this optimisation saves 10% with testmul
	  PCRef = mask_line(PCRef) + mask_addr_low(PC)/4;	  
	}
      else
	{      // ok, abandon what we were prefetching..
	  PCRef = PrefetchPC + mask_addr_low(PC)/4;
	  pf_prefetch_here(PC);
	}
      lineRef[regnum] = PCRef;
      lines[PCRef].regRef = regnum;
	

      // the next getNextInstruction will fall into a miss.
    }
  
}
// this is called by the emulator debugger when PC is forced
void pf_updatePC()
{
  // forget all..
  PC = regs.r_pc.C(o,0);
  PCRef = PrefetchPC;
  pf_prefetch_here(PC);
}

// this process is called at each clock rising edge
void pf_doPrefetch()
{
  int i;
  assert((PrefetchPC%IL1_GRANULARITY)==0 && (PrefetchAddr % (IL1_GRANULARITY*4))==0);

  // L1DoLoadMem will return true at the nth times it has been called if its passed addr do not change
  // this is to emulate the slowness of the L1
  //fprintf(stderr,"loadmem at %X\n", (unsigned int)PrefetchAddr);
  if(L1DoLoadMem(PrefetchAddr)) // return 1 if finished
    {
      U64 addr = PrefetchAddr;

      for(i = 0 ; i < IL1_GRANULARITY; i++)
	{
	  lines[PrefetchPC].addr = addr;
	  lines[PrefetchPC].instr = getL1Data(i);
	  lines[PrefetchPC].valid = true;
	  // calculate the next opcode to prefetch
	  PrefetchPC++;
	  PrefetchPC = PrefetchPC  % NUM_PREFETCH_LINES; // loop
	  // calculate the next prefetch addr.
	  addr+=4;
	}
      pf_prefetch_here(addr);
    }  
}
extern int L1Icount;
void pf_showPrefetchLines()
{
  int i;
  char buf[255];
  sprintf(buf,"PrefetchPC:%d ",L1Icount);
  fprintf(stderr,"|Ri|lin| |lin|Ri|     addr       |  instr |valid\n");
  for( i = 0 ; i < /*NUM_PREFETCH_LINES*/30; i++)
    {
      if(i < NUM_REG)
	fprintf(stderr,"|%02d %03X| ", i,lineRef[i]);
      else
	printf("---------");
      fprintf(stderr,"%s%03X %02d %016X %08X %s%s%s%s%s\n",
	      (i%IL1_GRANULARITY)==0?"+":"|",
	     i,
	     lines[i].regRef,
	     (U32)lines[i].addr,
	     lines[i].instr,
	     ((lines[i].valid)?"*":" "),
	      (i%IL1_GRANULARITY)==0?"+":"|",
	     ((PrefetchPC == i)?buf:""),
	     ((PCRef == i)?"PCRef ":""),
	     ((PC == lines[i].addr)?"PC ":"")
	     );
    }
}
