//boveerle
/**
 * veerle derived from ppc.uni.cxx
 * parametrized version for dse
 */

// for external calls area/latency
#define TECHNOLOGY 0.07
//eoveerle

/**
 * Defines the number of extra cache in the hierarchy not counting L1 caches.
 *
 * To build up a simulator with L1 and L2 caches, set CACHE_HIERARCHY_LEVEL to 2
 *
 * Minimum value is 1.
 */
//boveerle few lines lower
//#define CACHE_HIERARCHY_LEVEL 2
//eoveerle

#define TEST_LOAD_WRITEBACK

//boveerle pipeliner
#define PIPELINER_STAGES 10
int latency_l1 = 1;
int latency_l2 = 98;
int latency_l3 = 99;
int latency_l4 = 100;
//eoveerle pipeliner

//#define DUMP_COMMITED_PC // Debug purpose: dump each commited instruction in a log file
//#define STOP 200         // Debug purpose: Stop at specified cycle count
//#define SVG_START        // Debug purpose: Generates a communication timeline

#include <csignal>
#include "unisim/unisim.h"

#include "debug/stop_at_cycle.sim"
//boveerle pipeliner
#ifdef PIPELINER_STAGES
#include "topology/pipeliner.sim"
#endif
//eoveerle pipeliner
//boveerle
// START of includes
#define CACHE_HIERARCHY_LEVEL 1
#include "bus_multiqueue.sim"
#include "CacheWBNB.sim"
#include "dram.sim"
#include "CpuPPC405.sim"
// END of includes
//eoveerle
#include <plugins/debug/gdb_server.hh>
#include <plugins/os/linux/powerpc/powerpc-linux.hh>
#include <utils/services/service.hh>
#include "stat_file/stat_file.h"

#include "debug/svg_memreq_service.h"
 
#if CACHE_HIERARCHY_LEVEL<1
#error "Cache hierarchy level can't be less than 1."
#endif
#if CACHE_HIERARCHY_LEVEL>4
#error "Cache hierarchy level can't be more than 4."
#endif

using full_system::plugins::debug::GDBServer;
using full_system::generic::memory::endian_interface::E_BIG_ENDIAN;
using full_system::utils::services::ServiceManager;

// -----------------------------------------------------------------------------------------------------------------

template
< bool validate
>
class GeneratedSimulator : public Simulator
{public:

#ifdef USE_CONFIG

#include USE_CONFIG

#else

#ifdef STOP
  typedef StopAtCycle<STOP> MyStopAtCycle;
#endif

//boveerle
// START OF MODULE EXPLORER
typedef BusMultiQueue<Instruction,__bus_multiqueue_nCPU,__bus_multiqueue_nBufferSize,__bus_multiqueue_nRequestWidth>MyBus;
typedef CacheWBNB<Instruction,__CacheWBNB_nCPUtoCacheDataPathSize,__CacheWBNB_nCachetoCPUDataPathSize,__CacheWBNB_nMemtoCacheDataPathSize,__CacheWBNB_nCachetoMemDataPathSize,__CacheWBNB_nLineSize,__CacheWBNB_nCacheLines,__CacheWBNB_nAssociativity,__CacheWBNB_nStages,__CacheWBNB_nDelay,__CacheWBNB_nProg,__CacheWBNB_nMSHR,__CacheWBNB_nMSHRRead,0>MyL1Cache;
typedef DRAM<Instruction,__dram_nBanks,__dram_nRows,__dram_nCols,__dram_TRRD,__dram_TRAS,__dram_TRCD,__dram_CL,__dram_TRP,__dram_TRC,__dram_TREF,__dram_nDataPathSize,__dram_nCacheLineSize,__dram_nCtrlQueueSize,1,0,0>MyDram;
typedef CpuPPC405<__CpuPPC405_nIntegerRegisters,__CpuPPC405_nIL1CachetoCPUDataPathSize,__CpuPPC405_nIL1CachetoMemDataPathSize,__CpuPPC405_nIL1MemtoCacheDataPathSize,__CpuPPC405_nDL1LineSize,__CpuPPC405_nIL1CacheLines,__CpuPPC405_nIL1Associativity,__CpuPPC405_nDL1LineSize,__CpuPPC405_nDL1CachetoCPUDataPathSize,__CpuPPC405_nDL1CPUtoCacheDataPathSize,1,0,validate,0>MyCpu;
// END OF MODULE EXPLORER
//eoveerle
//boveerle pipeliner
#ifdef PIPELINER_STAGES
#if CACHE_HIERARCHY_LEVEL>1
typedef Pipeliner<Instruction,PIPELINER_STAGES,__bus_multiqueue_nRequestWidth,false>MyL2Pipeliner;
#endif
#if CACHE_HIERARCHY_LEVEL>2
typedef Pipeliner<Instruction,PIPELINER_STAGES*2,__val_L2_nLineSize,false>MyL3Pipeliner;
#endif
#if CACHE_HIERARCHY_LEVEL>3
typedef Pipeliner<Instruction,PIPELINER_STAGES*3,__val_L3_nLineSize,false>MyL4Pipeliner;
#endif
#endif
//eoveerle pipeliner

#ifdef STOP
  MyStopAtCycle *sac;
#endif

#endif

  MyBus   *bus;
  MyDram  *dram;
  MyL1Cache *cache_l1;
#if CACHE_HIERARCHY_LEVEL>1
  MyL2Cache *cache_l2;
//boveerle pipeliner  
#ifdef PIPELINER_STAGES
  MyL2Pipeliner *pipeliner_l2;
#endif
//eoveerle pipeliner  
#endif
#if CACHE_HIERARCHY_LEVEL>2
  MyL3Cache *cache_l3;
//boveerle pipeliner  
#ifdef PIPELINER_STAGES
  MyL3Pipeliner *pipeliner_l3;
#endif
//eoveerle pipeliner  
#endif
#if CACHE_HIERARCHY_LEVEL>3
  MyL4Cache *cache_l4;
//boveerle pipeliner  
#ifdef PIPELINER_STAGES
  MyL4Pipeliner *pipeliner_l4;
#endif
//eoveerle pipeliner  
#endif
  MyCpu   *ppc;
  
  GeneratedSimulator() {
#ifdef STOP
    sac = new MyStopAtCycle("sac");
    sac->clock(global_clock);
#endif

    bus = new MyBus("bus");
    bus->clock(global_clock);
    dram = new MyDram("dram");
    dram->inClock(global_clock);
    cache_l1 = new MyL1Cache("cache_l1");
    cache_l1->inClock(global_clock);
#if CACHE_HIERARCHY_LEVEL>1
    cache_l2 = new MyL2Cache("cache_l2");
    cache_l2->inClock(global_clock);
//boveerle pipeliner
#ifdef PIPELINER_STAGES
    pipeliner_l2 = new MyL2Pipeliner("pipeliner_l2",latency_l2);
    pipeliner_l2->clock(global_clock);
#endif
//eoveerle pipeliner  
#endif
#if CACHE_HIERARCHY_LEVEL>2
    cache_l3 = new MyL3Cache("cache_l3");
    cache_l3->inClock(global_clock);
//boveerle pipeliner
#ifdef PIPELINER_STAGES
    pipeliner_l3 = new MyL3Pipeliner("pipeliner_l3",latency_l3);
    pipeliner_l3->clock(global_clock);
#endif
//eoveerle pipeliner  
#endif
#if CACHE_HIERARCHY_LEVEL>3
    cache_l4 = new MyL4Cache("cache_l4");
    cache_l4->inClock(global_clock);
//boveerle pipeliner
#ifdef PIPELINER_STAGES
    pipeliner_l4 = new MyL4Pipeliner("pipeliner_l4",latency_l4);
    pipeliner_l4->clock(global_clock);
#endif
//eoveerle pipeliner  
#endif

    ppc = new MyCpu("ppc");
    ppc->inClock(global_clock);
    // --- Connecting modules -------------------------------------------------

//boveerle
    inport  < memreq < Instruction,__bus_multiqueue_nRequestWidth > > *hierarchy_last_in_port = &(bus->inMEM);
    outport < memreq < Instruction,__bus_multiqueue_nRequestWidth > > *hierarchy_last_out_port = &(bus->outMEM);
//eoveerle

#if CACHE_HIERARCHY_LEVEL==1
    // BUS <-> MEM
    dram->out >> bus->inMEM;
    bus->outMEM >> dram->in;
#endif
#if CACHE_HIERARCHY_LEVEL==2
//boveerle pipeliner
#ifdef PIPELINER_STAGES
    // With a pipeliner, the connection is bus -> pipeliner -> l2
    bus->outMEM >> pipeliner_l2->inCPU;
    pipeliner_l2->outCPU >> bus->inMEM;
    pipeliner_l2->outMEM >> cache_l2->inCPU;
    cache_l2->outCPU >> pipeliner_l2->inMEM;
    dram->out >> cache_l2->inMEM;
    cache_l2->outMEM >> dram->in;    
//eoveerle pipeliner  
#else
    // No pipeliner : connect directly bus to l2
    // BUS <-> Lmin
    cache_l2->outCPU >> bus->inMEM;
    bus->outMEM >> cache_l2->inCPU;
    // Lmax <-> MEM
    dram->out >> cache_l2->inMEM;
    cache_l2->outMEM >> dram->in;
#endif    
#endif
#if CACHE_HIERARCHY_LEVEL==3
//boveerle pipeliner
#ifdef PIPELINER_STAGES
    // With a pipeliner, the connection is bus -> pipeliner -> l2 -> pipeliner -> L3
    bus->outMEM >> pipeliner_l2->inCPU;
    pipeliner_l2->outCPU >> bus->inMEM;
    pipeliner_l2->outMEM >> cache_l2->inCPU;
    cache_l2->outCPU >> pipeliner_l2->inMEM;
    cache_l2->outMEM >> pipeliner_l3->inCPU;
    pipeliner_l3->outCPU >> cache_l2->inMEM;
    pipeliner_l3->outMEM >> cache_l3->inCPU;
    cache_l3->outCPU >> pipeliner_l3->inMEM;
    dram->out >> cache_l3->inMEM;
    cache_l3->outMEM >> dram->in;
//eoveerle pipeliner      
#else
    // No pipeliner : connect directly bus to l2 to l3
    cache_l2->outCPU >> bus->inMEM;
    bus->outMEM >> cache_l2->inCPU;
    // Ln <-> Ln+1
    cache_l3->outCPU >> cache_l2->inMEM;
    cache_l2->outMEM >> cache_l3->inCPU;
    // Lmax <-> MEM
    dram->out >> cache_l3->inMEM;
    cache_l3->outMEM >> dram->in;
#endif
#endif
#if CACHE_HIERARCHY_LEVEL==4
    //boveerle pipeliner
#ifdef PIPELINER_STAGES
    // With a pipeliner, the connection is bus -> pipeliner -> l2 -> pipeliner -> l3 -> pipeliner -> l4
    bus->outMEM >> pipeliner_l2->inCPU;
    pipeliner_l2->outCPU >> bus->inMEM;
    pipeliner_l2->outMEM >> cache_l2->inCPU;
    cache_l2->outCPU >> pipeliner_l2->inMEM;
    cache_l2->outMEM >> pipeliner_l3->inCPU;
    pipeliner_l3->outCPU >> cache_l2->inMEM;
    pipeliner_l3->outMEM >> cache_l3->inCPU;
    cache_l3->outCPU >> pipeliner_l3->inMEM;
    cache_l3->outMEM >> pipeliner_l4->inCPU;
    pipeliner_l4->outCPU >> cache_l3->inMEM;
    pipeliner_l4->outMEM >> cache_l4->inCPU;
    cache_l4->outCPU >> pipeliner_l4->inMEM;
    dram->out >> cache_l4->inMEM;
    cache_l4->outMEM >> dram->in;
//eoveerle pipeliner      
#else
    // No pipeliner : connect directly bus to l2 to l3
    // BUS <-> Lmin
    cache_l2->outCPU >> bus->inMEM;
    bus->outMEM >> cache_l2->inCPU;
    // Ln <-> Ln+1
    cache_l3->outCPU >> cache_l2->inMEM;
    cache_l2->outMEM >> cache_l3->inCPU;
    // Ln <-> Ln+1
    cache_l4->outCPU >> cache_l3->inMEM;
    cache_l3->outMEM >> cache_l4->inCPU;
    // Lmax <-> MEM
    dram->out >> cache_l4->inMEM;
    cache_l4->outMEM >> dram->in;
#endif
#endif
    // L1 <-> BUS
    cache_l1->outMEM >> bus->inCPU[0];
    bus->outCPU[0] >> cache_l1->inMEM;
    // CPU <-> L1
    cache_l1->outCPU >> ppc->inDMEM;
    ppc->outDMEM >> cache_l1->inCPU;
    // CPU <-> BUS (IL1)
    bus->outCPU[1] >> ppc->inIMEM;
    ppc->outIMEM >> bus->inCPU[1];
  } //GeneratedSimulator()
}; //class GeneratedSimulator

// -----------------------------------------------------------------------------------------------------------------

int main(int argc, char **argv, char **envp)
{ // Debugger options
  command_line.add_option("gdb-server", "<port>", "Starts a gdb server to be connected on <port>");
  command_line.add_option("gdb-server-arch-file", "<file>", "Uses <arch file> as architecture description file for GDB server.\nDefaults to \"gdb_powerpc.xml\"");
  command_line.add_option("max:inst", "<count>", "Execute <count> instructions then exit");
  // Additional outputs
  command_line.add_option("dump-machine-description", "<file>", "Dump the machine description in <file> then exits.");
  command_line.add_option("dump-dot", "<file>", "Draw the architecture");
  command_line.add_option("dump-latex", "<file>", "Draw the architecture");
  command_line.add_option("dump-statistics", "<file>", "Dump the statistics at the end of the simulation to <file>.");
  // Additional inputs
/*FF*/
  command_line.add_option("fastforward", "<icount>", "Fastforwarding of <icount> instructions before starting simulation");
/*EOFF*/
  command_line.add_option("load-checkpoint", "<file>", "Load a checkpoint from <file>");
  command_line.add_option("load-emulator-statistics", "<file>", "Load the statics obtained while emulating the program.\nAllow additional features such as a progress bar.");
  //Redirection options
  command_line.add_option("stdin", "<file>", "Use <file> as stdin for the simulated program");
  command_line.add_option("stdout", "<file>", "Use <file> as stdout for the simulated program");
  command_line.add_option("stderr", "<file>", "Use <file> as stderr for the simulated program");
  //Simulated program and its parameters
  command_line.add_extra("program", "ELF32 statically linked Linux binary");
  command_line.add_extra_opt("program arguments", "ELF32 binary parameters");
//boveerle
  command_line.add_flag("checking", "Run until configuration can be checked for constraints then exit");
  command_line.add_option("lat:l1", "<latency_l1>", "Specify validated latency for l1");
  command_line.add_option("lat:l2", "<latency_l2>", "Specify validated latency for l2");
  command_line.add_option("lat:l3", "<latency_l3>", "Specify validated latency for l3");
  command_line.add_option("lat:l4", "<latency_l4>", "Specify validated latency for l4");
//eoveerle    
  command_line.parse(argc,argv);
  
  bool use_gdb_server = false;
  int gdb_server_tcp_port = 0;
  const char *gdb_server_arch_filename;
  uint64_t maxinst = 0;
  
  if(command_line["gdb-server"])
  { use_gdb_server = true;
    gdb_server_tcp_port = atoi(command_line["gdb-server"]);
  }
  if(command_line["gdb-server-arch-file"])
  { gdb_server_arch_filename = command_line["gdb-server-arch-file"];
  }
  if(command_line["max:inst"])
  { maxinst = strtoull(command_line["max:inst"], 0, 0);
  }

//boveerle
  if(command_line["lat:l1"])
  { latency_l1 = strtoull(command_line["lat:l1"], 0, 0);
  }
  if(command_line["lat:l1"])
  { latency_l2 = strtoull(command_line["lat:l2"], 0, 0);
  }
  if(command_line["lat:l1"])
  { latency_l3 = strtoull(command_line["lat:l3"], 0, 0);
  }
  if(command_line["lat:l1"])
  { latency_l4 = strtoull(command_line["lat:l4"], 0, 0);
  }
//eoveerle
  
  const char *filename = command_line[0];
  unsigned int sim_argc = command_line.count();

  ///////////////////////////////////////// "From Emulator"  End ///////////////////////
  
  const bool validate = false;
  
  GeneratedSimulator <validate> s;
  unisim_port::check_connections();

#ifdef SVG_START  
  SVGmemreq_service my_svg("my_svg",0);
#endif

  if(command_line["dump-machine-description"])
  { s.dump_machine_description(command_line["dump-machine-description"]);
    exit(0);
  }
  
  if(command_line["dump-dot"])
  { s.dump_dot(command_line["dump-dot"]);
    exit(0);
  }
  if(command_line["dump-latex"])
  { s.dump_latex(command_line["dump-latex"]);
    exit(0);
  }
  
  if(command_line["load-emulator-statistics"])
  { StatisticFile statfile(command_line["load-emulator-statistics"]);
    cerr << "loaded emulator statistics:" << endl;
    cerr << statfile;
    s.set_progress_max(statfile["instruction_count"]);
  }

  if(command_line.count()==0)
  { command_line.help();
    return 0;
  }


/*
  if(command_line["fastforward"])
  { s.ppc->fastforward = command_line["fastforward"];
  }
*/

  ///////////////////////////////////////// "From Emulator"  Start ///////////////////////
  // PPC Linux OS
  full_system::plugins::os::linux_os::powerpc::PowerPCLinux PPCLinux("my-ppclinux",0);
  // Debuggers
  GDBServer<address_t> *gdb_server = use_gdb_server ? new GDBServer<address_t>("gdb-server") : 0;

  // Setting MEMORY parameters :
  (*((s.dram)->emulator_mem))["memory.org"] = 0x00000000UL;
  (*((s.dram)->emulator_mem))["memory.bytesize"] = (uint32_t)-1;
    
  // Setup PPCLinux parameter :
  PPCLinux["elf32-loader.filename"] = filename;
  PPCLinux["ppc-linux-os.endianess"] = E_BIG_ENDIAN;
  PPCLinux["ppc-linux-os.memory-page-size"] = 4096;
  PPCLinux["linux-os.endianess"] = E_BIG_ENDIAN;
  PPCLinux["linux-os.verbose"] = false;
  PPCLinux["linux-loader.endianess"] = E_BIG_ENDIAN;
  PPCLinux["linux-loader.stack-base"] = 0xc0000000;
  PPCLinux["linux-loader.max-environ"] = 16 * 1024;
  PPCLinux["linux-loader.argc"] = sim_argc;
  for(unsigned int i = 0; i < sim_argc; i++)
  { PPCLinux["linux-loader.argv"][i] = command_line[i];
  }
  PPCLinux["linux-loader.envc"] = 0;

//redirection
  if(command_line["stdin"])
  { PPCLinux.set_stdin(command_line["stdin"]);
  }
  if(command_line["stdout"])
  { PPCLinux.set_stdout(command_line["stdout"]);
  }
  if(command_line["stderr"])
  { PPCLinux.set_stderr(command_line["stderr"]);
  }
//eoredirection
    
  // Seting CPU parameters :
  if(maxinst)
  { s.ppc->set_maxinst(maxinst);
    s.set_progress_max(maxinst);
  }
    
  /********************************************************************************** 
   * New Connections  : we are now using the memory broadcasting !!!  *
   *********************************************************************************/

  if(validate)
  { // Connect validation processor to validation memory
    s.ppc->cpu_emulator->memory >> s.dram->emulator_mem->memory_export;
  }

  s.ppc->cpu_simulator->memory >> s.cache_l1->syscall_MemExp;        // CPU_SIM -> DCACHE
  s.ppc->cpu_simulator->icache_MemImp >>  s.dram->syscall_MemExp;    // CPU_SIM -> ICACHE

#if CACHE_HIERARCHY_LEVEL==1
  s.cache_l1->syscall_MemImp >> s.dram->syscall_MemExp;              // DL1 -> DRAM
#endif
#if CACHE_HIERARCHY_LEVEL==2
  s.cache_l1->syscall_MemImp >> s.cache_l2->syscall_MemExp;          // DL1 -> Lmin
  s.cache_l2->syscall_MemImp >> s.dram->syscall_MemExp;              // Lmax -> DRAM
#endif
#if CACHE_HIERARCHY_LEVEL==3
  s.cache_l1->syscall_MemImp >> s.cache_l2->syscall_MemExp;           // DL1 -> Lmin
  s.cache_l2->syscall_MemImp >> s.cache_l3->syscall_MemExp;           // Ln -> Ln+1
  s.cache_l3->syscall_MemImp >> s.dram->syscall_MemExp;               // Lmax -> DRAM
#endif
#if CACHE_HIERARCHY_LEVEL==4
  s.cache_l1->syscall_MemImp >> s.cache_l2->syscall_MemExp;           // DL1 -> Lmin
  s.cache_l2->syscall_MemImp >> s.cache_l3->syscall_MemExp;           // Ln -> Ln+1
  s.cache_l3->syscall_MemImp >> s.cache_l4->syscall_MemExp;           // Ln -> Ln+1
  s.cache_l4->syscall_MemImp >> s.dram->syscall_MemExp;               // Lmax -> DRAM
#endif

  PPCLinux.memory_import >> s.dram->syscall_MemExp;               // Loader -> DRAM
  s.ppc->cpu_simulator->linux_os >> PPCLinux.ppclinuxos_exp;      // CPU -> SysCall

  /////////////////////////////////////////////////////////////////////////////////////////////////////////////

  // Debugger connections
  if(gdb_server)
  { (*gdb_server)["tcp-port"] = gdb_server_tcp_port;
    (*gdb_server)["architecture-description-filename"] = gdb_server_arch_filename;
    s.ppc->cpu_emulator->debugger >> gdb_server->exp;
  }

#ifdef SVG_START
  s.ppc->svg >> my_svg.port;
  s.cache_l1->svg >> my_svg.port;
  s.bus->svg >> my_svg.port;
  s.dram->svg >> my_svg.port;

  my_svg.set_mincycle(0);
  my_svg.set_step(20);
  my_svg.set_skewing(10);
#endif

  if(!ServiceManager::Setup())
  { cerr << "Error during ServiceManager::Setup() !" << endl;
    exit(1);
  }
  // ServiceManager::Dump(cout);

/*FF*/
  if(command_line["fastforward"])
  { s.ppc->cpu_simulator->ff_interval_len = command_line["fastforward"]; // number of instructions to fastforward
    //s.cache->SetFastForwarding(true); // uncomment to have perfect warmup caches
    cerr << "--- starting functional simulation -------------------" << endl;
    cerr << "Fastforwarding " << s.ppc->cpu_simulator->ff_interval_len << " instructions" << endl;
    s.ppc->cpu_simulator->FastForward();
    s.ppc->NotifyPipeline();
  }
/*EOFF*/

  if(command_line["load-checkpoint"])
  { cerr << "--- loading checkpoint -------------------------------" << endl;
    ifstream ckp(command_line["load-checkpoint"]);
    stringstream ckp2;
    ckp2 << ckp.rdbuf();
    ckp.close();
    s.ppc->load_checkpoint(ckp2);
    s.dram->load_checkpoint(ckp2);
    PPCLinux.linux_os->load_checkpoint(ckp2);
  }

  s.ppc->Reset();

  cerr << "--- memory architecture details ----------------------" << endl;
  cerr << CACHE_HIERARCHY_LEVEL << " level(s) in cache hierarchy" << endl;
  cerr << "--- starting cycle-level simulation ------------------" << endl;

  signal(SIGINT,Simulator::at_ctrlc);
  signal(SIGTSTP,Simulator::at_ctrlz);
//boveerle
#include "before_simloop.cxx"
  while(!unisim_terminated && !command_line["checking"].is_set)
//eoveerle
  { fsc_phase();
    fsc_phase();
    if(s._unknown_display) unisim_port::check_knowness(s.stream_knowness(),s._unknown_fatal);
  }
//boveerle
//#include "after_simloop.cxx"
//eoveerle

  if(command_line["dump-statistics"])
  { string filename = command_line["dump-statistics"];
    if(filename=="-")
    { StatisticService::dump_statistics(cerr);
    }
    else
    { ofstream stats(filename.c_str(),ios_base::trunc);
      StatisticService::dump_statistics(stats);
      stats.close();
    }
  }

  return 0;
} //main()

